Skip to content

Commit 4d823ad

Browse files
committed
[LV] Don't vectorize epilogue with scalable VF if no iterations remain.
Currently we may try to vectorize the epilogue with a scalable VF, even if there are no remaining iterations after the main vector loop with a fixed VF. Update selectEpilogueVectorizationFactor to always compute the number of remaining iterations and exit early if no epilogue iterations remain.
1 parent 9311f38 commit 4d823ad

File tree

4 files changed

+154
-170
lines changed

4 files changed

+154
-170
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4479,6 +4479,25 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
44794479
Type *TCType = Legal->getWidestInductionType();
44804480
const SCEV *RemainingIterations = nullptr;
44814481
unsigned MaxTripCount = 0;
4482+
if (MainLoopVF.isFixed()) {
4483+
const SCEV *TC = vputils::getSCEVExprForVPValue(
4484+
getPlanFor(MainLoopVF).getTripCount(), SE);
4485+
assert(!isa<SCEVCouldNotCompute>(TC) &&
4486+
"Trip count SCEV must be computable");
4487+
RemainingIterations = SE.getURemExpr(
4488+
TC, SE.getConstant(TCType, MainLoopVF.getFixedValue() * IC));
4489+
if (RemainingIterations->isZero())
4490+
return Result;
4491+
4492+
MaxTripCount = MainLoopVF.getFixedValue() * IC - 1;
4493+
if (SE.isKnownPredicate(CmpInst::ICMP_ULT, RemainingIterations,
4494+
SE.getConstant(TCType, MaxTripCount))) {
4495+
MaxTripCount = SE.getUnsignedRangeMax(RemainingIterations).getZExtValue();
4496+
}
4497+
LLVM_DEBUG(dbgs() << "LEV: Maximum Trip Count for Epilogue: "
4498+
<< MaxTripCount << "\n");
4499+
}
4500+
44824501
for (auto &NextVF : ProfitableVFs) {
44834502
// Skip candidate VFs without a corresponding VPlan.
44844503
if (!hasPlanWithVF(NextVF.Width))
@@ -4496,24 +4515,7 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
44964515

44974516
// If NextVF is greater than the number of remaining iterations, the
44984517
// epilogue loop would be dead. Skip such factors.
4499-
if (!MainLoopVF.isScalable() && !NextVF.Width.isScalable()) {
4500-
// TODO: extend to support scalable VFs.
4501-
if (!RemainingIterations) {
4502-
const SCEV *TC = vputils::getSCEVExprForVPValue(
4503-
getPlanFor(NextVF.Width).getTripCount(), SE);
4504-
assert(!isa<SCEVCouldNotCompute>(TC) &&
4505-
"Trip count SCEV must be computable");
4506-
RemainingIterations = SE.getURemExpr(
4507-
TC, SE.getConstant(TCType, MainLoopVF.getFixedValue() * IC));
4508-
MaxTripCount = MainLoopVF.getFixedValue() * IC - 1;
4509-
if (SE.isKnownPredicate(CmpInst::ICMP_ULT, RemainingIterations,
4510-
SE.getConstant(TCType, MaxTripCount))) {
4511-
MaxTripCount =
4512-
SE.getUnsignedRangeMax(RemainingIterations).getZExtValue();
4513-
}
4514-
LLVM_DEBUG(dbgs() << "LEV: Maximum Trip Count for Epilogue: "
4515-
<< MaxTripCount << "\n");
4516-
}
4518+
if (RemainingIterations && !NextVF.Width.isScalable()) {
45174519
if (SE.isKnownPredicate(
45184520
CmpInst::ICMP_UGT,
45194521
SE.getConstant(TCType, NextVF.Width.getFixedValue()),

llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll

Lines changed: 7 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -46,27 +46,17 @@ define void @_Z3foov() {
4646
; CHECK-V2-IC4-LABEL: define void @_Z3foov(
4747
; CHECK-V2-IC4-SAME: ) #[[ATTR0:[0-9]+]] {
4848
; CHECK-V2-IC4: [[VEC_EPILOG_VECTOR_BODY1:.*:]]
49-
; CHECK-V2-IC4: br i1 [[MIN_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF0:![0-9]+]]
50-
; CHECK-V2-IC4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
51-
; CHECK-V2-IC4: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF0]]
49+
; CHECK-V2-IC4: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF0:![0-9]+]]
5250
; CHECK-V2-IC4: [[VECTOR_PH]]:
5351
; CHECK-V2-IC4: br label %[[VECTOR_BODY:.*]]
5452
; CHECK-V2-IC4: [[VECTOR_BODY]]:
55-
; CHECK-V2-IC4: br i1 [[TMP12:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]]
53+
; CHECK-V2-IC4: br i1 [[TMP10:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]]
5654
; CHECK-V2-IC4: [[MIDDLE_BLOCK]]:
57-
; CHECK-V2-IC4: br i1 true, label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF5:![0-9]+]]
58-
; CHECK-V2-IC4: [[VEC_EPILOG_ITER_CHECK]]:
59-
; CHECK-V2-IC4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF6:![0-9]+]]
60-
; CHECK-V2-IC4: [[VEC_EPILOG_PH]]:
61-
; CHECK-V2-IC4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
62-
; CHECK-V2-IC4: [[VEC_EPILOG_VECTOR_BODY]]:
63-
; CHECK-V2-IC4: br i1 [[TMP23:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
64-
; CHECK-V2-IC4: [[VEC_EPILOG_MIDDLE_BLOCK]]:
65-
; CHECK-V2-IC4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF8:![0-9]+]]
66-
; CHECK-V2-IC4: [[VEC_EPILOG_SCALAR_PH]]:
55+
; CHECK-V2-IC4: br i1 true, label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]], !prof [[PROF5:![0-9]+]]
56+
; CHECK-V2-IC4: [[SCALAR_PH]]:
6757
; CHECK-V2-IC4: br label %[[FOR_BODY:.*]]
6858
; CHECK-V2-IC4: [[FOR_BODY]]:
69-
; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]]
59+
; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]]
7060
; CHECK-V2-IC4: [[FOR_COND_CLEANUP]]:
7161
;
7262
entry:
@@ -111,9 +101,6 @@ for.cond.cleanup: ; preds = %for.body
111101
; CHECK-V2-IC4: [[META3]] = !{!"llvm.loop.isvectorized", i32 1}
112102
; CHECK-V2-IC4: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"}
113103
; CHECK-V2-IC4: [[PROF5]] = !{!"branch_weights", i32 1, i32 15}
114-
; CHECK-V2-IC4: [[PROF6]] = !{!"branch_weights", i32 2, i32 0}
115-
; CHECK-V2-IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META3]], [[META4]]}
116-
; CHECK-V2-IC4: [[PROF8]] = !{!"branch_weights", i32 1, i32 1}
117-
; CHECK-V2-IC4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0}
118-
; CHECK-V2-IC4: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]], [[META3]]}
104+
; CHECK-V2-IC4: [[PROF6]] = !{!"branch_weights", i32 0, i32 0}
105+
; CHECK-V2-IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]], [[META3]]}
119106
;.

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll

Lines changed: 7 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,7 @@ target triple = "aarch64-none-unknown-elf"
77
define i32 @dotp(ptr %a, ptr %b) #0 {
88
; CHECK-LABEL: define i32 @dotp(
99
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
10-
; CHECK-NEXT: iter.check:
11-
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
12-
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
13-
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
14-
; CHECK: vector.main.loop.iter.check:
10+
; CHECK-NEXT: entry:
1511
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
1612
; CHECK: vector.ph:
1713
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -33,64 +29,8 @@ define i32 @dotp(ptr %a, ptr %b) #0 {
3329
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3430
; CHECK: middle.block:
3531
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]])
36-
; CHECK-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
37-
; CHECK: vec.epilog.iter.check:
38-
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
39-
; CHECK-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 4
40-
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 0, [[TMP13]]
41-
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
42-
; CHECK: vec.epilog.ph:
43-
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
44-
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP11]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
45-
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
46-
; CHECK-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 4
47-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP15]]
48-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
49-
; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
50-
; CHECK-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 4
51-
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0
52-
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
53-
; CHECK: vec.epilog.vector.body:
54-
; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
55-
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 4 x i32> [ [[TMP18]], [[VEC_EPILOG_PH]] ], [ [[TMP27:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
56-
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX2]]
57-
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i32 0
58-
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 4 x i8>, ptr [[TMP21]], align 1
59-
; CHECK-NEXT: [[TMP22:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD4]] to <vscale x 4 x i32>
60-
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX2]]
61-
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP23]], i32 0
62-
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 4 x i8>, ptr [[TMP24]], align 1
63-
; CHECK-NEXT: [[TMP25:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD5]] to <vscale x 4 x i32>
64-
; CHECK-NEXT: [[TMP26:%.*]] = mul <vscale x 4 x i32> [[TMP25]], [[TMP22]]
65-
; CHECK-NEXT: [[TMP27]] = add <vscale x 4 x i32> [[TMP26]], [[VEC_PHI3]]
66-
; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX2]], [[TMP17]]
67-
; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC]]
68-
; CHECK-NEXT: br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
69-
; CHECK: vec.epilog.middle.block:
70-
; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP27]])
71-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
72-
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
73-
; CHECK: vec.epilog.scalar.ph:
74-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 1024, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
75-
; CHECK-NEXT: [[BC_MERGE_RDX7:%.*]] = phi i32 [ [[TMP29]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP11]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
76-
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
77-
; CHECK: for.body:
78-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
79-
; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX7]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
80-
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
81-
; CHECK-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
82-
; CHECK-NEXT: [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32
83-
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]]
84-
; CHECK-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
85-
; CHECK-NEXT: [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32
86-
; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]]
87-
; CHECK-NEXT: [[ADD]] = add i32 [[MUL]], [[ACCUM]]
88-
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
89-
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
90-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
91-
; CHECK: for.exit:
92-
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ [[TMP29]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
93-
; CHECK-NEXT: ret i32 [[ADD_LCSSA]]
32+
; CHECK-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[VEC_EPILOG_PH]]
33+
; CHECK: scalar.ph:
9434
;
9535
entry:
9636
br label %for.body
@@ -142,7 +82,7 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 {
14282
; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP4]])
14383
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
14484
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[IV_NEXT]]
145-
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
85+
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
14686
; CHECK: middle.block:
14787
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]])
14888
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[IV_NEXT]]
@@ -174,7 +114,7 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 {
174114
; CHECK-NEXT: [[TMP13]] = add <4 x i32> [[TMP14]], [[VEC_PHI9]]
175115
; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX9]], 4
176116
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC5]]
177-
; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
117+
; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
178118
; CHECK: vec.epilog.middle.block:
179119
; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP13]])
180120
; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]]
@@ -198,7 +138,7 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 {
198138
; CHECK-NEXT: [[CMP_IV_NEG:%.*]] = icmp ugt i64 [[IV_NEG]], 0
199139
; CHECK-NEXT: [[CMP_IV:%.*]] = icmp ne i64 [[ACCUM1]], -1
200140
; CHECK-NEXT: [[EXITCOND:%.*]] = and i1 [[CMP_IV_NEG]], [[CMP_IV]]
201-
; CHECK-NEXT: br i1 [[EXITCOND]], label [[WHILE_BODY1]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]]
141+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[WHILE_BODY1]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP6:![0-9]+]]
202142
; CHECK: while.end.loopexit:
203143
; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[ADD]], [[WHILE_BODY1]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
204144
; CHECK-NEXT: ret void
@@ -557,7 +497,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) {
557497
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
558498
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16)
559499
; CHECK-NEXT: [[TMP181:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
560-
; CHECK-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
500+
; CHECK-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
561501
; CHECK: middle.block:
562502
; CHECK-NEXT: [[TMP182:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]])
563503
; CHECK-NEXT: br label [[EXIT:%.*]]

0 commit comments

Comments
 (0)