Skip to content

Commit a84d41f

Browse files
fhahnmahesh-attarde
authored andcommitted
[LV] Don't vectorize epilogue with scalable VF if no iterations remain. (llvm#149789)
Currently we may try to vectorize the epilogue with a scalable VF, even if there are no remaining iterations after the main vector loop with a fixed VF. Update selectEpilogueVectorizationFactor to always compute the number of remaining iterations and exit early if no epilogue iterations remain. Fixes llvm#149726 PR: llvm#149789
1 parent a3c6403 commit a84d41f

File tree

5 files changed

+436
-251
lines changed

5 files changed

+436
-251
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4479,6 +4479,28 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
44794479
Type *TCType = Legal->getWidestInductionType();
44804480
const SCEV *RemainingIterations = nullptr;
44814481
unsigned MaxTripCount = 0;
4482+
if (MainLoopVF.isFixed()) {
4483+
// TODO: extend to support scalable VFs.
4484+
const SCEV *TC = vputils::getSCEVExprForVPValue(
4485+
getPlanFor(MainLoopVF).getTripCount(), SE);
4486+
assert(!isa<SCEVCouldNotCompute>(TC) &&
4487+
"Trip count SCEV must be computable");
4488+
RemainingIterations = SE.getURemExpr(
4489+
TC, SE.getConstant(TCType, MainLoopVF.getFixedValue() * IC));
4490+
4491+
// No iterations left to process in the epilogue.
4492+
if (RemainingIterations->isZero())
4493+
return Result;
4494+
4495+
MaxTripCount = MainLoopVF.getFixedValue() * IC - 1;
4496+
if (SE.isKnownPredicate(CmpInst::ICMP_ULT, RemainingIterations,
4497+
SE.getConstant(TCType, MaxTripCount))) {
4498+
MaxTripCount = SE.getUnsignedRangeMax(RemainingIterations).getZExtValue();
4499+
}
4500+
LLVM_DEBUG(dbgs() << "LEV: Maximum Trip Count for Epilogue: "
4501+
<< MaxTripCount << "\n");
4502+
}
4503+
44824504
for (auto &NextVF : ProfitableVFs) {
44834505
// Skip candidate VFs without a corresponding VPlan.
44844506
if (!hasPlanWithVF(NextVF.Width))
@@ -4496,24 +4518,7 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
44964518

44974519
// If NextVF is greater than the number of remaining iterations, the
44984520
// epilogue loop would be dead. Skip such factors.
4499-
if (!MainLoopVF.isScalable() && !NextVF.Width.isScalable()) {
4500-
// TODO: extend to support scalable VFs.
4501-
if (!RemainingIterations) {
4502-
const SCEV *TC = vputils::getSCEVExprForVPValue(
4503-
getPlanFor(NextVF.Width).getTripCount(), SE);
4504-
assert(!isa<SCEVCouldNotCompute>(TC) &&
4505-
"Trip count SCEV must be computable");
4506-
RemainingIterations = SE.getURemExpr(
4507-
TC, SE.getConstant(TCType, MainLoopVF.getFixedValue() * IC));
4508-
MaxTripCount = MainLoopVF.getFixedValue() * IC - 1;
4509-
if (SE.isKnownPredicate(CmpInst::ICMP_ULT, RemainingIterations,
4510-
SE.getConstant(TCType, MaxTripCount))) {
4511-
MaxTripCount =
4512-
SE.getUnsignedRangeMax(RemainingIterations).getZExtValue();
4513-
}
4514-
LLVM_DEBUG(dbgs() << "LEV: Maximum Trip Count for Epilogue: "
4515-
<< MaxTripCount << "\n");
4516-
}
4521+
if (RemainingIterations && !NextVF.Width.isScalable()) {
45174522
if (SE.isKnownPredicate(
45184523
CmpInst::ICMP_UGT,
45194524
SE.getConstant(TCType, NextVF.Width.getFixedValue()),

llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll

Lines changed: 7 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -46,27 +46,17 @@ define void @_Z3foov() {
4646
; CHECK-V2-IC4-LABEL: define void @_Z3foov(
4747
; CHECK-V2-IC4-SAME: ) #[[ATTR0:[0-9]+]] {
4848
; CHECK-V2-IC4: [[VEC_EPILOG_VECTOR_BODY1:.*:]]
49-
; CHECK-V2-IC4: br i1 [[MIN_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF0:![0-9]+]]
50-
; CHECK-V2-IC4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
51-
; CHECK-V2-IC4: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF0]]
49+
; CHECK-V2-IC4: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF0:![0-9]+]]
5250
; CHECK-V2-IC4: [[VECTOR_PH]]:
5351
; CHECK-V2-IC4: br label %[[VECTOR_BODY:.*]]
5452
; CHECK-V2-IC4: [[VECTOR_BODY]]:
55-
; CHECK-V2-IC4: br i1 [[TMP12:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]]
53+
; CHECK-V2-IC4: br i1 [[TMP10:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]]
5654
; CHECK-V2-IC4: [[MIDDLE_BLOCK]]:
57-
; CHECK-V2-IC4: br i1 true, label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF5:![0-9]+]]
58-
; CHECK-V2-IC4: [[VEC_EPILOG_ITER_CHECK]]:
59-
; CHECK-V2-IC4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF6:![0-9]+]]
60-
; CHECK-V2-IC4: [[VEC_EPILOG_PH]]:
61-
; CHECK-V2-IC4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
62-
; CHECK-V2-IC4: [[VEC_EPILOG_VECTOR_BODY]]:
63-
; CHECK-V2-IC4: br i1 [[TMP23:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
64-
; CHECK-V2-IC4: [[VEC_EPILOG_MIDDLE_BLOCK]]:
65-
; CHECK-V2-IC4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF8:![0-9]+]]
66-
; CHECK-V2-IC4: [[VEC_EPILOG_SCALAR_PH]]:
55+
; CHECK-V2-IC4: br i1 true, label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]], !prof [[PROF5:![0-9]+]]
56+
; CHECK-V2-IC4: [[SCALAR_PH]]:
6757
; CHECK-V2-IC4: br label %[[FOR_BODY:.*]]
6858
; CHECK-V2-IC4: [[FOR_BODY]]:
69-
; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]]
59+
; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]]
7060
; CHECK-V2-IC4: [[FOR_COND_CLEANUP]]:
7161
;
7262
entry:
@@ -111,9 +101,6 @@ for.cond.cleanup: ; preds = %for.body
111101
; CHECK-V2-IC4: [[META3]] = !{!"llvm.loop.isvectorized", i32 1}
112102
; CHECK-V2-IC4: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"}
113103
; CHECK-V2-IC4: [[PROF5]] = !{!"branch_weights", i32 1, i32 15}
114-
; CHECK-V2-IC4: [[PROF6]] = !{!"branch_weights", i32 2, i32 0}
115-
; CHECK-V2-IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META3]], [[META4]]}
116-
; CHECK-V2-IC4: [[PROF8]] = !{!"branch_weights", i32 1, i32 1}
117-
; CHECK-V2-IC4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0}
118-
; CHECK-V2-IC4: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]], [[META3]]}
104+
; CHECK-V2-IC4: [[PROF6]] = !{!"branch_weights", i32 0, i32 0}
105+
; CHECK-V2-IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]], [[META3]]}
119106
;.

0 commit comments

Comments
 (0)