-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[VPlan] Move initial skeleton construction earlier (NFC). #150848
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
16952d7
f835229
9558817
3b51594
d2a7fce
7a79e1c
b28bb32
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -338,12 +338,6 @@ std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG() { | |
return std::move(Plan); | ||
} | ||
|
||
std::unique_ptr<VPlan> VPlanTransforms::buildPlainCFG(Loop *TheLoop, | ||
LoopInfo &LI) { | ||
PlainCFGBuilder Builder(TheLoop, &LI); | ||
return Builder.buildPlainCFG(); | ||
} | ||
|
||
/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it | ||
/// has exactly 2 predecessors (preheader and latch), where the block | ||
/// dominates the latch and the preheader dominates the block. If it is a | ||
|
@@ -459,10 +453,8 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, | |
LatchDL); | ||
} | ||
|
||
void VPlanTransforms::prepareForVectorization( | ||
VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE, | ||
bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop, | ||
DebugLoc IVDL, bool HasUncountableEarlyExit, VFRange &Range) { | ||
static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL, | ||
PredicatedScalarEvolution &PSE, Loop *TheLoop) { | ||
VPDominatorTree VPDT; | ||
VPDT.recalculate(Plan); | ||
|
||
|
@@ -488,12 +480,54 @@ void VPlanTransforms::prepareForVectorization( | |
|
||
addCanonicalIVRecipes(Plan, HeaderVPBB, LatchVPBB, InductionTy, IVDL); | ||
|
||
[[maybe_unused]] bool HandledUncountableEarlyExit = false; | ||
// Create SCEV and VPValue for the trip count. | ||
// We use the symbolic max backedge-taken-count, which works also when | ||
// vectorizing loops with uncountable early exits. | ||
const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount(); | ||
assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) && | ||
"Invalid backedge-taken count"); | ||
ScalarEvolution &SE = *PSE.getSE(); | ||
const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV, | ||
InductionTy, TheLoop); | ||
Plan.setTripCount( | ||
vputils::getOrCreateVPValueForSCEVExpr(Plan, TripCount, SE)); | ||
|
||
VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph"); | ||
VPBlockUtils::connectBlocks(ScalarPH, Plan.getScalarHeader()); | ||
|
||
// The connection order corresponds to the operands of the conditional branch, | ||
// with the middle block already connected to the exit block. | ||
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); | ||
// Also connect the entry block to the scalar preheader. | ||
// TODO: Also introduce a branch recipe together with the minimum trip count | ||
// check. | ||
VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH); | ||
Plan.getEntry()->swapSuccessors(); | ||
} | ||
|
||
std::unique_ptr<VPlan> | ||
VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, | ||
DebugLoc IVDL, PredicatedScalarEvolution &PSE) { | ||
PlainCFGBuilder Builder(TheLoop, &LI); | ||
std::unique_ptr<VPlan> VPlan0 = Builder.buildPlainCFG(); | ||
addInitialSkeleton(*VPlan0, InductionTy, IVDL, PSE, TheLoop); | ||
return VPlan0; | ||
} | ||
|
||
void VPlanTransforms::handleEarlyExits(VPlan &Plan, | ||
bool HasUncountableEarlyExit, | ||
VFRange &Range) { | ||
auto *MiddleVPBB = cast<VPBasicBlock>( | ||
Plan.getScalarHeader()->getSinglePredecessor()->getPredecessors()[0]); | ||
auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor()); | ||
VPBlockBase *HeaderVPB = cast<VPBasicBlock>(LatchVPBB->getSuccessors()[1]); | ||
|
||
// Disconnect all early exits from the loop leaving it with a single exit from | ||
// the latch. Early exits that are countable are left for a scalar epilog. The | ||
// condition of uncountable early exits (currently at most one is supported) | ||
// is fused into the latch exit, and used to branch from middle block to the | ||
// early exit destination. | ||
[[maybe_unused]] bool HandledUncountableEarlyExit = false; | ||
for (VPIRBasicBlock *EB : Plan.getExitBlocks()) { | ||
for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) { | ||
if (Pred == MiddleVPBB) | ||
|
@@ -502,7 +536,8 @@ void VPlanTransforms::prepareForVectorization( | |
assert(!HandledUncountableEarlyExit && | ||
"can handle exactly one uncountable early exit"); | ||
handleUncountableEarlyExit(cast<VPBasicBlock>(Pred), EB, Plan, | ||
HeaderVPBB, LatchVPBB, Range); | ||
cast<VPBasicBlock>(HeaderVPB), LatchVPBB, | ||
Range); | ||
HandledUncountableEarlyExit = true; | ||
} else { | ||
for (VPRecipeBase &R : EB->phis()) | ||
|
@@ -515,36 +550,18 @@ void VPlanTransforms::prepareForVectorization( | |
|
||
assert((!HasUncountableEarlyExit || HandledUncountableEarlyExit) && | ||
"missed an uncountable exit that must be handled"); | ||
} | ||
|
||
// Create SCEV and VPValue for the trip count. | ||
// We use the symbolic max backedge-taken-count, which works also when | ||
// vectorizing loops with uncountable early exits. | ||
const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount(); | ||
assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) && | ||
"Invalid loop count"); | ||
ScalarEvolution &SE = *PSE.getSE(); | ||
const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV, | ||
InductionTy, TheLoop); | ||
Plan.setTripCount( | ||
vputils::getOrCreateVPValueForSCEVExpr(Plan, TripCount, SE)); | ||
|
||
VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph"); | ||
VPBlockUtils::connectBlocks(ScalarPH, Plan.getScalarHeader()); | ||
|
||
// The connection order corresponds to the operands of the conditional branch, | ||
// with the middle block already connected to the exit block. | ||
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); | ||
// Also connect the entry block to the scalar preheader. | ||
// TODO: Also introduce a branch recipe together with the minimum trip count | ||
// check. | ||
VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH); | ||
Plan.getEntry()->swapSuccessors(); | ||
|
||
void VPlanTransforms::addMiddleCheck(VPlan &Plan, | ||
bool RequiresScalarEpilogueCheck, | ||
bool TailFolded) { | ||
auto *MiddleVPBB = cast<VPBasicBlock>( | ||
Plan.getScalarHeader()->getSinglePredecessor()->getPredecessors()[0]); | ||
// If MiddleVPBB has a single successor then the original loop does not exit | ||
// via the latch and the single successor must be the scalar preheader. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Independent/follow-up: this refers to how LV handles loops whose latch does not exit, i.e., by requiring a scalar epilogue. Better handle all such cases consistently - either by wiring middle block to scalar preheader only as here, or by case 1 below. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, this should be taken care of by consistently emitting a check if VectorTC == TC. This is the most general, the others are optimizations of cases where we know the check is either always false or true. |
||
// There's no need to add a runtime check to MiddleVPBB. | ||
if (MiddleVPBB->getNumSuccessors() == 1) { | ||
assert(MiddleVPBB->getSingleSuccessor() == ScalarPH && | ||
assert(MiddleVPBB->getSingleSuccessor() == Plan.getScalarPreheader() && | ||
"must have ScalarPH as single successor"); | ||
return; | ||
} | ||
|
@@ -566,6 +583,7 @@ void VPlanTransforms::prepareForVectorization( | |
// the corresponding compare because they may have ended up with different | ||
// line numbers and we want to avoid awkward line stepping while debugging. | ||
// E.g., if the compare has got a line number inside the loop. | ||
auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor()); | ||
DebugLoc LatchDL = LatchVPBB->getTerminator()->getDebugLoc(); | ||
VPBuilder Builder(MiddleVPBB); | ||
VPValue *Cmp; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,21 +54,30 @@ struct VPlanTransforms { | |
verifyVPlanIsValid(Plan); | ||
} | ||
|
||
LLVM_ABI_FOR_TEST static std::unique_ptr<VPlan> buildPlainCFG(Loop *TheLoop, | ||
LoopInfo &LI); | ||
|
||
/// Prepare the plan for vectorization. It will introduce a dedicated | ||
/// VPBasicBlock for the vector pre-header as well as a VPBasicBlock as exit | ||
/// block of the main vector loop (middle.block). If a check is needed to | ||
/// guard executing the scalar epilogue loop, it will be added to the middle | ||
/// block, together with VPBasicBlocks for the scalar preheader and exit | ||
/// blocks. \p InductionTy is the type of the canonical induction and used for | ||
/// related values, like the trip count expression. It also creates a VPValue | ||
/// expression for the original trip count. | ||
LLVM_ABI_FOR_TEST static void prepareForVectorization( | ||
VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE, | ||
bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop, | ||
DebugLoc IVDL, bool HasUncountableExit, VFRange &Range); | ||
/// Create a base VPlan0, serving as the common starting point for all later | ||
/// candidates. It consists of an initial plain CFG loop with loop blocks from | ||
/// \p TheLoop being directly translated to VPBasicBlocks with VPInstruction | ||
/// corresponding to the input IR. | ||
/// | ||
/// The created loop is wrapped in an initial skeleton to facilitate | ||
/// vectorization, consisting of a vector pre-header, an exit block for the | ||
/// main vector loop (middle.block) and a new block as preheader of the scalar | ||
/// loop (scalar.ph). It also adds a canonical IV and its increment, using \p | ||
/// InductionTy and \p IVDL, and creates a VPValue expression for the original | ||
/// trip count. | ||
LLVM_ABI_FOR_TEST static std::unique_ptr<VPlan> | ||
buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL, | ||
PredicatedScalarEvolution &PSE); | ||
|
||
/// Update \p Plan to account for all early exits. | ||
LLVM_ABI_FOR_TEST static void | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. LLVM_ABI_FOR_TEST? |
||
handleEarlyExits(VPlan &Plan, bool HasUncountableExit, VFRange &Range); | ||
|
||
/// If a check is needed to guard executing the scalar epilogue loop, it will | ||
/// be added to the middle block. | ||
LLVM_ABI_FOR_TEST static void addMiddleCheck(VPlan &Plan, | ||
bool RequiresScalarEpilogueCheck, | ||
bool TailFolded); | ||
|
||
/// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's | ||
/// flat CFG into a hierarchical CFG. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should
buildPlainCFG()
be calledbuildPlainLoopCFG()
or buildPlainCFGForLoop(), as it focuses on building the basic blocks of the loop along with its preheader and exit.
buildInitialSkeleton()also builds plain CFG, can be called
connectPlainLoopCFG(), as it adds new basic blocks before and after the loop connecting it(?). Should a new
buildSkeletalPlan()call
buildPlainCFG()followed by
addInitialSkeleton()`? Together they build an initial CFG-based skeleton of VPlan.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
combined into
buildVPlan0
, wdyt?I tried to combine the comments as well as possibe