diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index c20b1bdaf94c7..ce4d4ad7a0ab0 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -14952,6 +14952,29 @@ const SCEVAddRecExpr *ScalarEvolution::convertSCEVToAddRecWithPredicates( if (!AddRec) return nullptr; + // Check if any of the transformed predicates is known to be false. In that + // case, it doesn't make sense to convert to a predicated AddRec, as the + // versioned loop will never execute. + for (const SCEVPredicate *Pred : TransformPreds) { + auto *WrapPred = dyn_cast(Pred); + if (!WrapPred || WrapPred->getFlags() != SCEVWrapPredicate::IncrementNSSW) + continue; + + const SCEVAddRecExpr *AddRecToCheck = WrapPred->getExpr(); + const SCEV *ExitCount = getBackedgeTakenCount(AddRecToCheck->getLoop()); + if (isa(ExitCount)) + continue; + + const SCEV *Step = AddRecToCheck->getStepRecurrence(*this); + if (!Step->isOne()) + continue; + + ExitCount = getTruncateOrSignExtend(ExitCount, Step->getType()); + const SCEV *Add = getAddExpr(AddRecToCheck->getStart(), ExitCount); + if (isKnownPredicate(CmpInst::ICMP_SLT, Add, AddRecToCheck->getStart())) + return nullptr; + } + // Since the transformation was successful, we can now transfer the SCEV // predicates. Preds.append(TransformPreds.begin(), TransformPreds.end()); diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll index 71c2da2681b3d..e6a81b6f9f6d0 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll @@ -6,16 +6,61 @@ define i8 @recurrence_phi_with_same_incoming_values_after_simplifications(i8 %fo ; CHECK-LABEL: define i8 @recurrence_phi_with_same_incoming_values_after_simplifications( ; CHECK-SAME: i8 [[FOR_START:%.*]], ptr [[DST:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[FOR_START]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLAT]], <4 x i8> [[BROADCAST_SPLAT]], <4 x i32> +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], 5 +; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], 6 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], 7 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP2]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP3]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP5]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP6]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP7]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP8]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i8> [[TMP0]], i32 0 +; CHECK-NEXT: store i8 [[TMP17]], ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i8> [[TMP0]], i32 1 +; CHECK-NEXT: store i8 [[TMP18]], ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i8> [[TMP0]], i32 2 +; CHECK-NEXT: store i8 [[TMP19]], ptr [[TMP11]], align 1 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i8> [[TMP0]], i32 3 +; CHECK-NEXT: store i8 [[TMP20]], ptr [[TMP12]], align 1 +; CHECK-NEXT: store i8 [[TMP17]], ptr [[TMP13]], align 1 +; CHECK-NEXT: store i8 [[TMP18]], ptr [[TMP14]], align 1 +; CHECK-NEXT: store i8 [[TMP19]], ptr [[TMP15]], align 1 +; CHECK-NEXT: store i8 [[TMP20]], ptr [[TMP16]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], -8 +; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -7, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[FOR_START]], %[[MIDDLE_BLOCK]] ], [ [[FOR_START]], %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[FOR:%.*]] = phi i8 [ [[FOR_START]], %[[ENTRY]] ], [ [[FOR_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[FOR:%.*]] = phi i8 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[FOR_NEXT]] = and i8 [[FOR_START]], -1 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[IV]] ; CHECK-NEXT: store i8 [[FOR]], ptr [[GEP_DST]], align 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 0 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[FOR_NEXT_LCSSA:%.*]] = phi i8 [ [[FOR_NEXT]], %[[LOOP]] ] ; CHECK-NEXT: ret i8 [[FOR_NEXT_LCSSA]] @@ -61,7 +106,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2 ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -82,7 +127,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32 ; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[IV]] ; CHECK-NEXT: store i32 0, ptr [[A_GEP]], align 4 -; CHECK-NEXT: br i1 [[VEC_DEAD]], label %[[FOR_END]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[VEC_DEAD]], label %[[FOR_END]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[FOR_LCSSA]] @@ -142,7 +187,7 @@ define void @sink_dead_inst(ptr %a) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40 -; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT1:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 @@ -163,7 +208,7 @@ define void @sink_dead_inst(ptr %a) { ; CHECK-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[A]], i16 [[IV]] ; CHECK-NEXT: store i16 [[USE_REC_1]], ptr [[GEP]], align 2 -; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_END:.*]], label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_END:.*]], label %[[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret void ; @@ -205,7 +250,7 @@ define void @unused_recurrence(ptr %a) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 ; CHECK-NEXT: br label %[[SCALAR_PH]] @@ -220,7 +265,7 @@ define void @unused_recurrence(ptr %a) { ; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 ; CHECK-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[IV]], 1000 -; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_END:.*]], label %[[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_END:.*]], label %[[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret void ;