Skip to content

Commit 99cfa0c

Browse files
fhahngithub-actions[bot]
authored andcommitted
Automerge: [SCEV] Check if predicate is known false for predicated AddRecs. (#151134)
Similarly to llvm/llvm-project#131538, we can also try and check if a predicate is known to wrap given the backedge taken count. For now, this just checks directly when we try to create predicated AddRecs. This both helps to avoid spending compile-time on optimizations where we know the predicate is false, and can also help to allow additional vectorization (e.g. by deciding to scalarize memory accesses when otherwise we would try to create a predicated AddRec with a predicate that's always false). The initial version is quite restricted, but can be extended in follow-ups to cover more cases. PR: llvm/llvm-project#151134
2 parents eb85f8a + 36be0bb commit 99cfa0c

File tree

2 files changed

+77
-9
lines changed

2 files changed

+77
-9
lines changed

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14952,6 +14952,29 @@ const SCEVAddRecExpr *ScalarEvolution::convertSCEVToAddRecWithPredicates(
1495214952
if (!AddRec)
1495314953
return nullptr;
1495414954

14955+
// Check if any of the transformed predicates is known to be false. In that
14956+
// case, it doesn't make sense to convert to a predicated AddRec, as the
14957+
// versioned loop will never execute.
14958+
for (const SCEVPredicate *Pred : TransformPreds) {
14959+
auto *WrapPred = dyn_cast<SCEVWrapPredicate>(Pred);
14960+
if (!WrapPred || WrapPred->getFlags() != SCEVWrapPredicate::IncrementNSSW)
14961+
continue;
14962+
14963+
const SCEVAddRecExpr *AddRecToCheck = WrapPred->getExpr();
14964+
const SCEV *ExitCount = getBackedgeTakenCount(AddRecToCheck->getLoop());
14965+
if (isa<SCEVCouldNotCompute>(ExitCount))
14966+
continue;
14967+
14968+
const SCEV *Step = AddRecToCheck->getStepRecurrence(*this);
14969+
if (!Step->isOne())
14970+
continue;
14971+
14972+
ExitCount = getTruncateOrSignExtend(ExitCount, Step->getType());
14973+
const SCEV *Add = getAddExpr(AddRecToCheck->getStart(), ExitCount);
14974+
if (isKnownPredicate(CmpInst::ICMP_SLT, Add, AddRecToCheck->getStart()))
14975+
return nullptr;
14976+
}
14977+
1495514978
// Since the transformation was successful, we can now transfer the SCEV
1495614979
// predicates.
1495714980
Preds.append(TransformPreds.begin(), TransformPreds.end());

llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,61 @@ define i8 @recurrence_phi_with_same_incoming_values_after_simplifications(i8 %fo
66
; CHECK-LABEL: define i8 @recurrence_phi_with_same_incoming_values_after_simplifications(
77
; CHECK-SAME: i8 [[FOR_START:%.*]], ptr [[DST:%.*]]) {
88
; CHECK-NEXT: [[ENTRY:.*]]:
9+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
10+
; CHECK: [[VECTOR_PH]]:
11+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[FOR_START]], i64 0
12+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
13+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLAT]], <4 x i8> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
14+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
15+
; CHECK: [[VECTOR_BODY]]:
16+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
17+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]]
18+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0
19+
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 1
20+
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], 2
21+
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 3
22+
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], 4
23+
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], 5
24+
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], 6
25+
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], 7
26+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP1]]
27+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP2]]
28+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP3]]
29+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP4]]
30+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP5]]
31+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP6]]
32+
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP7]]
33+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP8]]
34+
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i8> [[TMP0]], i32 0
35+
; CHECK-NEXT: store i8 [[TMP17]], ptr [[TMP9]], align 1
36+
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i8> [[TMP0]], i32 1
37+
; CHECK-NEXT: store i8 [[TMP18]], ptr [[TMP10]], align 1
38+
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i8> [[TMP0]], i32 2
39+
; CHECK-NEXT: store i8 [[TMP19]], ptr [[TMP11]], align 1
40+
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i8> [[TMP0]], i32 3
41+
; CHECK-NEXT: store i8 [[TMP20]], ptr [[TMP12]], align 1
42+
; CHECK-NEXT: store i8 [[TMP17]], ptr [[TMP13]], align 1
43+
; CHECK-NEXT: store i8 [[TMP18]], ptr [[TMP14]], align 1
44+
; CHECK-NEXT: store i8 [[TMP19]], ptr [[TMP15]], align 1
45+
; CHECK-NEXT: store i8 [[TMP20]], ptr [[TMP16]], align 1
46+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
47+
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], -8
48+
; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
49+
; CHECK: [[MIDDLE_BLOCK]]:
50+
; CHECK-NEXT: br label %[[SCALAR_PH]]
51+
; CHECK: [[SCALAR_PH]]:
52+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -7, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
53+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[FOR_START]], %[[MIDDLE_BLOCK]] ], [ [[FOR_START]], %[[ENTRY]] ]
954
; CHECK-NEXT: br label %[[LOOP:.*]]
1055
; CHECK: [[LOOP]]:
11-
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
12-
; CHECK-NEXT: [[FOR:%.*]] = phi i8 [ [[FOR_START]], %[[ENTRY]] ], [ [[FOR_NEXT:%.*]], %[[LOOP]] ]
56+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
57+
; CHECK-NEXT: [[FOR:%.*]] = phi i8 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], %[[LOOP]] ]
1358
; CHECK-NEXT: [[FOR_NEXT]] = and i8 [[FOR_START]], -1
1459
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
1560
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[IV]]
1661
; CHECK-NEXT: store i8 [[FOR]], ptr [[GEP_DST]], align 1
1762
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 0
18-
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
63+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
1964
; CHECK: [[EXIT]]:
2065
; CHECK-NEXT: [[FOR_NEXT_LCSSA:%.*]] = phi i8 [ [[FOR_NEXT]], %[[LOOP]] ]
2166
; CHECK-NEXT: ret i8 [[FOR_NEXT_LCSSA]]
@@ -61,7 +106,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) {
61106
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
62107
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4)
63108
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
64-
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
109+
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
65110
; CHECK: [[MIDDLE_BLOCK]]:
66111
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2
67112
; CHECK-NEXT: br label %[[FOR_END:.*]]
@@ -82,7 +127,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) {
82127
; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32
83128
; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[IV]]
84129
; CHECK-NEXT: store i32 0, ptr [[A_GEP]], align 4
85-
; CHECK-NEXT: br i1 [[VEC_DEAD]], label %[[FOR_END]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
130+
; CHECK-NEXT: br i1 [[VEC_DEAD]], label %[[FOR_END]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
86131
; CHECK: [[FOR_END]]:
87132
; CHECK-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
88133
; CHECK-NEXT: ret i32 [[FOR_LCSSA]]
@@ -142,7 +187,7 @@ define void @sink_dead_inst(ptr %a) {
142187
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
143188
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4)
144189
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
145-
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
190+
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
146191
; CHECK: [[MIDDLE_BLOCK]]:
147192
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3
148193
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT1:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
@@ -163,7 +208,7 @@ define void @sink_dead_inst(ptr %a) {
163208
; CHECK-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
164209
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[A]], i16 [[IV]]
165210
; CHECK-NEXT: store i16 [[USE_REC_1]], ptr [[GEP]], align 2
166-
; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_END:.*]], label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
211+
; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_END:.*]], label %[[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
167212
; CHECK: [[FOR_END]]:
168213
; CHECK-NEXT: ret void
169214
;
@@ -205,7 +250,7 @@ define void @unused_recurrence(ptr %a) {
205250
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
206251
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4)
207252
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
208-
; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
253+
; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
209254
; CHECK: [[MIDDLE_BLOCK]]:
210255
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
211256
; CHECK-NEXT: br label %[[SCALAR_PH]]
@@ -220,7 +265,7 @@ define void @unused_recurrence(ptr %a) {
220265
; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
221266
; CHECK-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
222267
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[IV]], 1000
223-
; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_END:.*]], label %[[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
268+
; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_END:.*]], label %[[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]]
224269
; CHECK: [[FOR_END]]:
225270
; CHECK-NEXT: ret void
226271
;

0 commit comments

Comments
 (0)