From 16952d72900496868485027a7d5e2c0f5933512c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 26 Jul 2025 16:01:38 +0100 Subject: [PATCH 1/4] [VPlan] Move initial skeleton construction earlier (NFC). Split up the not clearly named prepareForVectorization transform into addInitialSkeleton, which adds the vector preheader, middle and scalar preheader blocks, as well as the canonical induction recipes and sets the trip count. The new transform is run directly after building the plain CFG VPlan initially. The remaining code handling early exits and adding the branch in the middle block is renamed to handleEarlyExitsAndAddMiddleCheck and still runs at the original position. With the code movement, we only have to add the skeleton once to the initial VPlan, and cloning will take care of the rest. It will also enable moving other construction steps to work directly on VPlan0, like adding resume phis. --- .../Transforms/Vectorize/LoopVectorize.cpp | 35 +++++---- .../Vectorize/VPlanConstruction.cpp | 76 ++++++++++--------- .../Transforms/Vectorize/VPlanTransforms.h | 24 +++--- .../Transforms/Vectorize/VPlanTestBase.h | 7 +- 4 files changed, 81 insertions(+), 61 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 6616e61f9bb84..59b276b5096fd 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8373,8 +8373,18 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, LVer.prepareNoAliasMetadata(); } - auto MaxVFTimes2 = MaxVF * 2; + // Create initial VPlan skeleton, having a basic block for the pre-header + // which contains SCEV expansions that need to happen before the CFG is + // modified; a basic block for the vector pre-header, followed by a region for + // the vector loop, followed by the middle basic block, connecting to the + // scalar preheader and exit blcoks. auto VPlan0 = VPlanTransforms::buildPlainCFG(OrigLoop, *LI); + VPlanTransforms::addInitialSkeleton( + *VPlan0, Legal->getWidestInductionType(), + getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), PSE, + OrigLoop); + + auto MaxVFTimes2 = MaxVF * 2; for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) { VFRange SubRange = {VF, MaxVFTimes2}; if (auto Plan = tryToBuildVPlanWithVPRecipes( @@ -8615,22 +8625,14 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // visit each basic block after having visited its predecessor basic blocks. // --------------------------------------------------------------------------- - // Create initial VPlan skeleton, having a basic block for the pre-header - // which contains SCEV expansions that need to happen before the CFG is - // modified; a basic block for the vector pre-header, followed by a region for - // the vector loop, followed by the middle basic block. The skeleton vector - // loop region contains a header and latch basic blocks. - bool RequiresScalarEpilogueCheck = LoopVectorizationPlanner::getDecisionAndClampRange( [this](ElementCount VF) { return !CM.requiresScalarEpilogue(VF.isVector()); }, Range); - VPlanTransforms::prepareForVectorization( - *Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck, - CM.foldTailByMasking(), OrigLoop, - getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), + VPlanTransforms::handleEarlyExitsAndAddMiddleCheck( + *Plan, RequiresScalarEpilogueCheck, CM.foldTailByMasking(), Legal->hasUncountableEarlyExit(), Range); VPlanTransforms::createLoopRegions(*Plan); VPlanTransforms::createExtractsForLiveOuts(*Plan); @@ -8918,10 +8920,13 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) { assert(EnableVPlanNativePath && "VPlan-native path is not enabled."); auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI); - VPlanTransforms::prepareForVectorization( - *Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop, - getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), false, - Range); + + VPlanTransforms::addInitialSkeleton( + *Plan, Legal->getWidestInductionType(), + getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), PSE, + OrigLoop); + VPlanTransforms::handleEarlyExitsAndAddMiddleCheck(*Plan, true, false, false, + Range); VPlanTransforms::createLoopRegions(*Plan); for (ElementCount VF : Range) diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 6c1f53b4eaa24..92b2d017d7a11 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -459,10 +459,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, LatchDL); } -void VPlanTransforms::prepareForVectorization( - VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE, - bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop, - DebugLoc IVDL, bool HasUncountableEarlyExit, VFRange &Range) { +void VPlanTransforms::addInitialSkeleton(VPlan &Plan, Type *InductionTy, + DebugLoc IVDL, + PredicatedScalarEvolution &PSE, + Loop *TheLoop) { VPDominatorTree VPDT; VPDT.recalculate(Plan); @@ -488,12 +488,46 @@ void VPlanTransforms::prepareForVectorization( addCanonicalIVRecipes(Plan, HeaderVPBB, LatchVPBB, InductionTy, IVDL); - [[maybe_unused]] bool HandledUncountableEarlyExit = false; + // Create SCEV and VPValue for the trip count. + // We use the symbolic max backedge-taken-count, which works also when + // vectorizing loops with uncountable early exits. + const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount(); + assert(!isa(BackedgeTakenCountSCEV) && + "Invalid loop count"); + ScalarEvolution &SE = *PSE.getSE(); + const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV, + InductionTy, TheLoop); + Plan.setTripCount( + vputils::getOrCreateVPValueForSCEVExpr(Plan, TripCount, SE)); + + VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph"); + VPBlockUtils::connectBlocks(ScalarPH, Plan.getScalarHeader()); + + // The connection order corresponds to the operands of the conditional branch, + // with the middle block already connected to the exit block. + VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); + // Also connect the entry block to the scalar preheader. + // TODO: Also introduce a branch recipe together with the minimum trip count + // check. + VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH); + Plan.getEntry()->swapSuccessors(); +} + +void VPlanTransforms::handleEarlyExitsAndAddMiddleCheck( + VPlan &Plan, bool RequiresScalarEpilogueCheck, bool TailFolded, + bool HasUncountableEarlyExit, VFRange &Range) { + auto *MiddleVPBB = cast( + Plan.getScalarHeader()->getSinglePredecessor()->getPredecessors()[0]); + VPBlockBase *HeaderVPB = + Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor(); + auto *LatchVPBB = cast(HeaderVPB->getPredecessors()[1]); + // Disconnect all early exits from the loop leaving it with a single exit from // the latch. Early exits that are countable are left for a scalar epilog. The // condition of uncountable early exits (currently at most one is supported) // is fused into the latch exit, and used to branch from middle block to the // early exit destination. + [[maybe_unused]] bool HandledUncountableEarlyExit = false; for (VPIRBasicBlock *EB : Plan.getExitBlocks()) { for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) { if (Pred == MiddleVPBB) @@ -502,7 +536,8 @@ void VPlanTransforms::prepareForVectorization( assert(!HandledUncountableEarlyExit && "can handle exactly one uncountable early exit"); handleUncountableEarlyExit(cast(Pred), EB, Plan, - HeaderVPBB, LatchVPBB, Range); + cast(HeaderVPB), LatchVPBB, + Range); HandledUncountableEarlyExit = true; } else { for (VPRecipeBase &R : EB->phis()) @@ -516,38 +551,11 @@ void VPlanTransforms::prepareForVectorization( assert((!HasUncountableEarlyExit || HandledUncountableEarlyExit) && "missed an uncountable exit that must be handled"); - // Create SCEV and VPValue for the trip count. - // We use the symbolic max backedge-taken-count, which works also when - // vectorizing loops with uncountable early exits. - const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount(); - assert(!isa(BackedgeTakenCountSCEV) && - "Invalid loop count"); - ScalarEvolution &SE = *PSE.getSE(); - const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV, - InductionTy, TheLoop); - Plan.setTripCount( - vputils::getOrCreateVPValueForSCEVExpr(Plan, TripCount, SE)); - - VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph"); - VPBlockUtils::connectBlocks(ScalarPH, Plan.getScalarHeader()); - - // The connection order corresponds to the operands of the conditional branch, - // with the middle block already connected to the exit block. - VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); - // Also connect the entry block to the scalar preheader. - // TODO: Also introduce a branch recipe together with the minimum trip count - // check. - VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH); - Plan.getEntry()->swapSuccessors(); - // If MiddleVPBB has a single successor then the original loop does not exit // via the latch and the single successor must be the scalar preheader. // There's no need to add a runtime check to MiddleVPBB. - if (MiddleVPBB->getNumSuccessors() == 1) { - assert(MiddleVPBB->getSingleSuccessor() == ScalarPH && - "must have ScalarPH as single successor"); + if (MiddleVPBB->getNumSuccessors() == 1) return; - } assert(MiddleVPBB->getNumSuccessors() == 2 && "must have 2 successors"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index d5af6cd73a4a0..0b24cb00ff73c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -58,17 +58,21 @@ struct VPlanTransforms { LoopInfo &LI); /// Prepare the plan for vectorization. It will introduce a dedicated - /// VPBasicBlock for the vector pre-header as well as a VPBasicBlock as exit - /// block of the main vector loop (middle.block). If a check is needed to + /// VPBasicBlock for the vector pre-header, a VPBasicBlock as exit + /// block of the main vector loop (middle.block) and a VPBaiscBlock for the + /// scalar preheader. It also adds a canonical IV and its increment, using \p + /// InductionTy and \p IVDL, and creates a VPValue expression for the original + /// trip count. + LLVM_ABI_FOR_TEST static void + addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL, + PredicatedScalarEvolution &PSE, Loop *TheLoop); + + /// Update \p Plan to account for all early exits. If a check is needed to /// guard executing the scalar epilogue loop, it will be added to the middle - /// block, together with VPBasicBlocks for the scalar preheader and exit - /// blocks. \p InductionTy is the type of the canonical induction and used for - /// related values, like the trip count expression. It also creates a VPValue - /// expression for the original trip count. - LLVM_ABI_FOR_TEST static void prepareForVectorization( - VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE, - bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop, - DebugLoc IVDL, bool HasUncountableExit, VFRange &Range); + /// block + LLVM_ABI_FOR_TEST static void handleEarlyExitsAndAddMiddleCheck( + VPlan &Plan, bool RequiresScalarEpilogueCheck, bool TailFolded, + bool HasUncountableExit, VFRange &Range); /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's /// flat CFG into a hierarchical CFG. diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h index 7dfd11a48b595..877394cc22ba2 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h +++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h @@ -74,8 +74,11 @@ class VPlanTestIRBase : public testing::Test { PredicatedScalarEvolution PSE(*SE, *L); auto Plan = VPlanTransforms::buildPlainCFG(L, *LI); VFRange R(ElementCount::getFixed(1), ElementCount::getFixed(2)); - VPlanTransforms::prepareForVectorization(*Plan, IntegerType::get(*Ctx, 64), - PSE, true, false, L, {}, false, R); + VPlanTransforms::addInitialSkeleton(*Plan, IntegerType::get(*Ctx, 64), {}, + PSE, L); + + VPlanTransforms::handleEarlyExitsAndAddMiddleCheck(*Plan, true, false, + false, R); VPlanTransforms::createLoopRegions(*Plan); return Plan; } From 955881738d906f0bacbc117b8973e21da1533174 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 31 Jul 2025 12:53:11 +0100 Subject: [PATCH 2/4] !fixup address comments, thanks --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 5 +++-- llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp | 7 +++++-- llvm/lib/Transforms/Vectorize/VPlanTransforms.h | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 2bf2d2e3fcbc4..a827a24610dfc 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8952,8 +8952,9 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) { *Plan, Legal->getWidestInductionType(), getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), PSE, OrigLoop); - VPlanTransforms::handleEarlyExitsAndAddMiddleCheck(*Plan, true, false, false, - Range); + VPlanTransforms::handleEarlyExitsAndAddMiddleCheck( + *Plan, /*RequiresScalarEpilogue*/ true, /*TailFolded*/ false, + /*HasUncountableExit*/ false, Range); VPlanTransforms::createLoopRegions(*Plan); for (ElementCount VF : Range) diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 92b2d017d7a11..07bc21e093ab6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -493,7 +493,7 @@ void VPlanTransforms::addInitialSkeleton(VPlan &Plan, Type *InductionTy, // vectorizing loops with uncountable early exits. const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount(); assert(!isa(BackedgeTakenCountSCEV) && - "Invalid loop count"); + "Invalid backedge-taken count"); ScalarEvolution &SE = *PSE.getSE(); const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV, InductionTy, TheLoop); @@ -554,8 +554,11 @@ void VPlanTransforms::handleEarlyExitsAndAddMiddleCheck( // If MiddleVPBB has a single successor then the original loop does not exit // via the latch and the single successor must be the scalar preheader. // There's no need to add a runtime check to MiddleVPBB. - if (MiddleVPBB->getNumSuccessors() == 1) + if (MiddleVPBB->getNumSuccessors() == 1) { + assert(MiddleVPBB->getSingleSuccessor() == Plan.getScalarPreheader() && + "must have ScalarPH as single successor"); return; + } assert(MiddleVPBB->getNumSuccessors() == 2 && "must have 2 successors"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 9c1e0202ccc02..408a2c4050113 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -69,7 +69,7 @@ struct VPlanTransforms { /// Update \p Plan to account for all early exits. If a check is needed to /// guard executing the scalar epilogue loop, it will be added to the middle - /// block + /// block. LLVM_ABI_FOR_TEST static void handleEarlyExitsAndAddMiddleCheck( VPlan &Plan, bool RequiresScalarEpilogueCheck, bool TailFolded, bool HasUncountableExit, VFRange &Range); From d2a7fce1e7e88b0e4506a3c266fa4914ab7b78b2 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 4 Aug 2025 20:45:27 +0100 Subject: [PATCH 3/4] !fixup address comments, thanks --- .../Transforms/Vectorize/LoopVectorize.cpp | 24 +++++++------------ .../Vectorize/VPlanConstruction.cpp | 21 ++++++++-------- .../Transforms/Vectorize/VPlanTransforms.h | 22 +++++++++-------- .../Transforms/Vectorize/VPlanTestBase.h | 7 +++--- 4 files changed, 34 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 38b69f990b50a..18ad976b2c9e3 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8403,16 +8403,11 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, LVer.prepareNoAliasMetadata(); } - // Create initial VPlan skeleton, having a basic block for the pre-header - // which contains SCEV expansions that need to happen before the CFG is - // modified; a basic block for the vector pre-header, followed by a region for - // the vector loop, followed by the middle basic block, connecting to the - // scalar preheader and exit blcoks. - auto VPlan0 = VPlanTransforms::buildPlainCFG(OrigLoop, *LI); - VPlanTransforms::addInitialSkeleton( - *VPlan0, Legal->getWidestInductionType(), - getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), PSE, - OrigLoop); + // Create initial base VPlan0, to serve as common starting point for all + // candidates built later for specific VF ranges. + auto VPlan0 = VPlanTransforms::buildVPlan0( + OrigLoop, *LI, Legal->getWidestInductionType(), + getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), PSE); auto MaxVFTimes2 = MaxVF * 2; for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) { @@ -8949,12 +8944,9 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) { assert(!OrigLoop->isInnermost()); assert(EnableVPlanNativePath && "VPlan-native path is not enabled."); - auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI); - - VPlanTransforms::addInitialSkeleton( - *Plan, Legal->getWidestInductionType(), - getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), PSE, - OrigLoop); + auto Plan = VPlanTransforms::buildVPlan0( + OrigLoop, *LI, Legal->getWidestInductionType(), + getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), PSE); VPlanTransforms::handleEarlyExitsAndAddMiddleCheck( *Plan, /*RequiresScalarEpilogue*/ true, /*TailFolded*/ false, /*HasUncountableExit*/ false, Range); diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index b94d2ca55ab86..64f278f665546 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -338,12 +338,6 @@ std::unique_ptr PlainCFGBuilder::buildPlainCFG() { return std::move(Plan); } -std::unique_ptr VPlanTransforms::buildPlainCFG(Loop *TheLoop, - LoopInfo &LI) { - PlainCFGBuilder Builder(TheLoop, &LI); - return Builder.buildPlainCFG(); -} - /// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it /// has exactly 2 predecessors (preheader and latch), where the block /// dominates the latch and the preheader dominates the block. If it is a @@ -459,10 +453,8 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, LatchDL); } -void VPlanTransforms::addInitialSkeleton(VPlan &Plan, Type *InductionTy, - DebugLoc IVDL, - PredicatedScalarEvolution &PSE, - Loop *TheLoop) { +static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL, + PredicatedScalarEvolution &PSE, Loop *TheLoop) { VPDominatorTree VPDT; VPDT.recalculate(Plan); @@ -513,6 +505,15 @@ void VPlanTransforms::addInitialSkeleton(VPlan &Plan, Type *InductionTy, Plan.getEntry()->swapSuccessors(); } +std::unique_ptr +VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, + DebugLoc IVDL, PredicatedScalarEvolution &PSE) { + PlainCFGBuilder Builder(TheLoop, &LI); + std::unique_ptr VPlan0 = Builder.buildPlainCFG(); + addInitialSkeleton(*VPlan0, InductionTy, IVDL, PSE, TheLoop); + return VPlan0; +} + void VPlanTransforms::handleEarlyExitsAndAddMiddleCheck( VPlan &Plan, bool RequiresScalarEpilogueCheck, bool TailFolded, bool HasUncountableEarlyExit, VFRange &Range) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 53fb02501a06b..f8795cc8a5c2f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -54,18 +54,20 @@ struct VPlanTransforms { verifyVPlanIsValid(Plan); } - LLVM_ABI_FOR_TEST static std::unique_ptr buildPlainCFG(Loop *TheLoop, - LoopInfo &LI); - - /// Prepare the plan for vectorization. It will introduce a dedicated - /// VPBasicBlock for the vector pre-header, a VPBasicBlock as exit - /// block of the main vector loop (middle.block) and a VPBaiscBlock for the - /// scalar preheader. It also adds a canonical IV and its increment, using \p + /// Create a base VPlan0, serving as the common starting point for all later + /// candidates. It consists of an initial plain CFG loop with loop blocks from + /// \p TheLoop being directly translated to VPBasicBlocks with VPInstruction + /// corresponding to the input IR. + /// + /// The created loop is wrapped into an initial skeleton to facilitate + /// vectorization, consisting of a vector pre-header, a exit block for the + /// main vector loop (middle.block) and a new block as preheader of the scalar + /// loop (scalar.ph). It also adds a canonical IV and its increment, using \p /// InductionTy and \p IVDL, and creates a VPValue expression for the original /// trip count. - LLVM_ABI_FOR_TEST static void - addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL, - PredicatedScalarEvolution &PSE, Loop *TheLoop); + LLVM_ABI_FOR_TEST static std::unique_ptr + buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL, + PredicatedScalarEvolution &PSE); /// Update \p Plan to account for all early exits. If a check is needed to /// guard executing the scalar epilogue loop, it will be added to the middle diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h index 877394cc22ba2..c783c3b28f957 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h +++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h @@ -72,11 +72,10 @@ class VPlanTestIRBase : public testing::Test { Loop *L = LI->getLoopFor(LoopHeader); PredicatedScalarEvolution PSE(*SE, *L); - auto Plan = VPlanTransforms::buildPlainCFG(L, *LI); - VFRange R(ElementCount::getFixed(1), ElementCount::getFixed(2)); - VPlanTransforms::addInitialSkeleton(*Plan, IntegerType::get(*Ctx, 64), {}, - PSE, L); + auto Plan = VPlanTransforms::buildVPlan0(L, *LI, IntegerType::get(*Ctx, 64), + {}, PSE); + VFRange R(ElementCount::getFixed(1), ElementCount::getFixed(2)); VPlanTransforms::handleEarlyExitsAndAddMiddleCheck(*Plan, true, false, false, R); VPlanTransforms::createLoopRegions(*Plan); From b28bb32182a1eb999d1c115e7f967e2a9e483bdd Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 5 Aug 2025 14:24:29 +0100 Subject: [PATCH 4/4] !fixup address comments, thanks --- .../Transforms/Vectorize/LoopVectorize.cpp | 16 ++++++++++------ .../Vectorize/VPlanConstruction.cpp | 18 ++++++++++++------ .../Transforms/Vectorize/VPlanTransforms.h | 19 +++++++++++-------- .../Transforms/Vectorize/VPlanTestBase.h | 5 +++-- 4 files changed, 36 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f5e0b0fcee9f7..5b2e41aceece6 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8690,9 +8690,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( return !CM.requiresScalarEpilogue(VF.isVector()); }, Range); - VPlanTransforms::handleEarlyExitsAndAddMiddleCheck( - *Plan, RequiresScalarEpilogueCheck, CM.foldTailByMasking(), - Legal->hasUncountableEarlyExit(), Range); + VPlanTransforms::handleEarlyExits(*Plan, Legal->hasUncountableEarlyExit(), + Range); + VPlanTransforms::addMiddleCheck(*Plan, RequiresScalarEpilogueCheck, + CM.foldTailByMasking()); + VPlanTransforms::createLoopRegions(*Plan); VPlanTransforms::createExtractsForLiveOuts(*Plan); @@ -8981,9 +8983,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) { auto Plan = VPlanTransforms::buildVPlan0( OrigLoop, *LI, Legal->getWidestInductionType(), getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), PSE); - VPlanTransforms::handleEarlyExitsAndAddMiddleCheck( - *Plan, /*RequiresScalarEpilogue*/ true, /*TailFolded*/ false, - /*HasUncountableExit*/ false, Range); + VPlanTransforms::handleEarlyExits(*Plan, + /*HasUncountableExit*/ false, Range); + VPlanTransforms::addMiddleCheck(*Plan, /*RequiresScalarEpilogue*/ true, + /*TailFolded*/ false); + VPlanTransforms::createLoopRegions(*Plan); for (ElementCount VF : Range) diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 495b3924d5204..6f36540ed96f5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -514,14 +514,13 @@ VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, return VPlan0; } -void VPlanTransforms::handleEarlyExitsAndAddMiddleCheck( - VPlan &Plan, bool RequiresScalarEpilogueCheck, bool TailFolded, - bool HasUncountableEarlyExit, VFRange &Range) { +void VPlanTransforms::handleEarlyExits(VPlan &Plan, + bool HasUncountableEarlyExit, + VFRange &Range) { auto *MiddleVPBB = cast( Plan.getScalarHeader()->getSinglePredecessor()->getPredecessors()[0]); - VPBlockBase *HeaderVPB = - Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor(); - auto *LatchVPBB = cast(HeaderVPB->getPredecessors()[1]); + auto *LatchVPBB = cast(MiddleVPBB->getSinglePredecessor()); + VPBlockBase *HeaderVPB = cast(LatchVPBB->getSuccessors()[1]); // Disconnect all early exits from the loop leaving it with a single exit from // the latch. Early exits that are countable are left for a scalar epilog. The @@ -551,7 +550,13 @@ void VPlanTransforms::handleEarlyExitsAndAddMiddleCheck( assert((!HasUncountableEarlyExit || HandledUncountableEarlyExit) && "missed an uncountable exit that must be handled"); +} +void VPlanTransforms::addMiddleCheck(VPlan &Plan, + bool RequiresScalarEpilogueCheck, + bool TailFolded) { + auto *MiddleVPBB = cast( + Plan.getScalarHeader()->getSinglePredecessor()->getPredecessors()[0]); // If MiddleVPBB has a single successor then the original loop does not exit // via the latch and the single successor must be the scalar preheader. // There's no need to add a runtime check to MiddleVPBB. @@ -578,6 +583,7 @@ void VPlanTransforms::handleEarlyExitsAndAddMiddleCheck( // the corresponding compare because they may have ended up with different // line numbers and we want to avoid awkward line stepping while debugging. // E.g., if the compare has got a line number inside the loop. + auto *LatchVPBB = cast(MiddleVPBB->getSinglePredecessor()); DebugLoc LatchDL = LatchVPBB->getTerminator()->getDebugLoc(); VPBuilder Builder(MiddleVPBB); VPValue *Cmp; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index f8795cc8a5c2f..9c805d2a76287 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -59,8 +59,8 @@ struct VPlanTransforms { /// \p TheLoop being directly translated to VPBasicBlocks with VPInstruction /// corresponding to the input IR. /// - /// The created loop is wrapped into an initial skeleton to facilitate - /// vectorization, consisting of a vector pre-header, a exit block for the + /// The created loop is wrapped in an initial skeleton to facilitate + /// vectorization, consisting of a vector pre-header, an exit block for the /// main vector loop (middle.block) and a new block as preheader of the scalar /// loop (scalar.ph). It also adds a canonical IV and its increment, using \p /// InductionTy and \p IVDL, and creates a VPValue expression for the original @@ -69,12 +69,15 @@ struct VPlanTransforms { buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE); - /// Update \p Plan to account for all early exits. If a check is needed to - /// guard executing the scalar epilogue loop, it will be added to the middle - /// block. - LLVM_ABI_FOR_TEST static void handleEarlyExitsAndAddMiddleCheck( - VPlan &Plan, bool RequiresScalarEpilogueCheck, bool TailFolded, - bool HasUncountableExit, VFRange &Range); + /// Update \p Plan to account for all early exits. + LLVM_ABI_FOR_TEST static void + handleEarlyExits(VPlan &Plan, bool HasUncountableExit, VFRange &Range); + + /// If a check is needed to guard executing the scalar epilogue loop, it will + /// be added to the middle block. + LLVM_ABI_FOR_TEST static void addMiddleCheck(VPlan &Plan, + bool RequiresScalarEpilogueCheck, + bool TailFolded); /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's /// flat CFG into a hierarchical CFG. diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h index c783c3b28f957..56f685801151a 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h +++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h @@ -76,8 +76,9 @@ class VPlanTestIRBase : public testing::Test { {}, PSE); VFRange R(ElementCount::getFixed(1), ElementCount::getFixed(2)); - VPlanTransforms::handleEarlyExitsAndAddMiddleCheck(*Plan, true, false, - false, R); + VPlanTransforms::handleEarlyExits(*Plan, false, R); + VPlanTransforms::addMiddleCheck(*Plan, true, false); + VPlanTransforms::createLoopRegions(*Plan); return Plan; }