@@ -2973,7 +2973,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
29732973 for (const auto &Entry : Legal->getInductionVars ())
29742974 fixupIVUsers (Entry.first , Entry.second ,
29752975 getOrCreateVectorTripCount (nullptr ), LoopMiddleBlock, State);
2976- fixCSALiveOuts (State, Plan);
29772976 }
29782977
29792978 for (Instruction *PI : PredicatedInstructions)
@@ -8731,13 +8730,18 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
87318730 // directly, enabling more efficient codegen.
87328731 PhiRecipe = new VPFirstOrderRecurrencePHIRecipe (Phi, *StartV);
87338732 } else if (Legal->isCSAPhi (Phi)) {
8734- VPCSAState *State = Plan.getCSAStates ().find (Phi)->second ;
8735- VPValue *InitData = State->getVPInitData ();
8733+ VPValue *InitScalar = Plan.getOrAddLiveIn (
8734+ Phi->getIncomingValueForBlock (OrigLoop->getLoopPreheader ()));
8735+
8736+ // Don't build full CSA for VF=ElementCount::getFixed(1)
8737+ bool IsScalarVF = LoopVectorizationPlanner::getDecisionAndClampRange (
8738+ [&](ElementCount VF) { return VF.isScalar (); }, Range);
8739+
87368740 // When the VF=getFixed(1), InitData is just InitScalar.
8737- if (!InitData)
8738- InitData = State->getVPInitScalar ();
8741+ VPValue *InitData =
8742+ IsScalarVF ? InitScalar
8743+ : getVPValueOrAddLiveIn (PoisonValue::get (Phi->getType ()));
87398744 PhiRecipe = new VPCSAHeaderPHIRecipe (Phi, InitData);
8740- State->setPhiRecipe (cast<VPCSAHeaderPHIRecipe>(PhiRecipe));
87418745 } else {
87428746 llvm_unreachable (
87438747 " can only widen reductions, fixed-order recurrences, and CSAs here" );
@@ -8778,13 +8782,17 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
87788782 return CSADescriptor::isCSASelect (CSA.second , SI);
87798783 });
87808784 if (CSADescIt != Legal->getCSAs ().end ()) {
8781- PHINode *CSAPhi = CSADescIt->first ;
8782- VPCSAState *State = Plan.getCSAStates ().find (CSAPhi)->second ;
8783- VPValue *VPDataPhi = State->getPhiRecipe ();
8784- auto *R = new VPCSADataUpdateRecipe (
8785- SI, {VPDataPhi, Operands[0 ], Operands[1 ], Operands[2 ]});
8786- State->setDataUpdate (R);
8787- return R;
8785+ for (VPRecipeBase &R :
8786+ Plan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
8787+ if (auto PhiR = dyn_cast<VPCSAHeaderPHIRecipe>(&R)) {
8788+ if (PhiR->getUnderlyingInstr () == CSADescIt->first ) {
8789+ auto *R = new VPCSADataUpdateRecipe (
8790+ SI, {PhiR, Operands[0 ], Operands[1 ], Operands[2 ]});
8791+ PhiR->setDataUpdate (R);
8792+ return R;
8793+ }
8794+ }
8795+ }
87888796 }
87898797
87908798 return new VPWidenSelectRecipe (
@@ -8799,44 +8807,6 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
87998807 return tryToWiden (Instr, Operands, VPBB);
88008808}
88018809
8802- // / Add CSA Recipes that can occur before each instruction in the input IR
8803- // / is processed and introduced into VPlan.
8804- static void
8805- addCSAPreprocessRecipes (const LoopVectorizationLegality::CSAList &CSAs,
8806- Loop *OrigLoop, VPBasicBlock *PreheaderVPBB,
8807- VPBasicBlock *HeaderVPBB, DebugLoc DL, VFRange &Range,
8808- VPlan &Plan, VPRecipeBuilder &Builder) {
8809-
8810- // Don't build full CSA for VF=ElementCount::getFixed(1)
8811- bool IsScalarVF = LoopVectorizationPlanner::getDecisionAndClampRange (
8812- [&](ElementCount VF) { return VF.isScalar (); }, Range);
8813-
8814- for (const auto &CSA : CSAs) {
8815- VPValue *VPInitScalar = Plan.getOrAddLiveIn (
8816- CSA.first ->getIncomingValueForBlock (OrigLoop->getLoopPreheader ()));
8817-
8818- // Scalar VF builds the scalar version of the loop. In that case,
8819- // no maintenence of mask nor extraction in middle block is needed.
8820- if (IsScalarVF) {
8821- VPCSAState *S = new VPCSAState (VPInitScalar);
8822- Plan.addCSAState (CSA.first , S);
8823- continue ;
8824- }
8825-
8826- VPBuilder PHB (PreheaderVPBB);
8827- auto *VPInitMask = Builder.getVPValueOrAddLiveIn (
8828- ConstantInt::getFalse (Type::getInt1Ty (CSA.first ->getContext ())));
8829- auto *VPInitData =
8830- Builder.getVPValueOrAddLiveIn (PoisonValue::get (CSA.first ->getType ()));
8831-
8832- VPBuilder HB (HeaderVPBB);
8833- auto *VPMaskPhi = HB.createCSAMaskPhi (VPInitMask, DL, " csa.mask.phi" );
8834-
8835- auto *S = new VPCSAState (VPInitScalar, VPInitData, VPMaskPhi);
8836- Plan.addCSAState (CSA.first , S);
8837- }
8838- }
8839-
88408810// / Add CSA Recipes that must occur after each instruction in the input IR
88418811// / is processed and introduced into VPlan.
88428812static void
@@ -8849,60 +8819,57 @@ addCSAPostprocessRecipes(VPRecipeBuilder &RecipeBuilder,
88498819 [&](ElementCount VF) { return VF.isScalar (); }, Range))
88508820 return ;
88518821
8822+ VPBasicBlock *Header = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
88528823 for (const auto &CSA : CSAs) {
8853- VPCSAState *CSAState = Plan.getCSAStates ().find (CSA.first )->second ;
8854- VPCSADataUpdateRecipe *VPDataUpdate = CSAState->getDataUpdate ();
8824+ // Build the MaskPhi recipe.
8825+ auto *VPInitMask = RecipeBuilder.getVPValueOrAddLiveIn (
8826+ ConstantInt::getFalse (Type::getInt1Ty (CSA.first ->getContext ())));
8827+ VPBuilder B;
8828+ B.setInsertPoint (Header, Header->getFirstNonPhi ());
8829+ auto *VPMaskPhi = B.createCSAMaskPhi (VPInitMask, DL, " csa.mask.phi" );
8830+ B.clearInsertionPoint ();
88558831
8856- assert (VPDataUpdate &&
8857- " VPDataUpdate must have been introduced prior to postprocess" );
8858- assert (CSA.second .getCond () &&
8859- " CSADescriptor must know how to describe the condition" );
88608832 auto GetVPValue = [&](Value *I) {
88618833 return RecipeBuilder.getRecipe (cast<Instruction>(I))->getVPSingleValue ();
88628834 };
8863- VPValue *WidenedCond = GetVPValue (CSA. second . getCond ());
8864- VPValue *VPInitScalar = CSAState-> getVPInitScalar ( );
8835+ VPCSADataUpdateRecipe *VPDataUpdate = cast<VPCSADataUpdateRecipe>(
8836+ cast<VPCSAHeaderPHIRecipe>( GetVPValue (CSA. first ))-> getVPNewData () );
88658837
88668838 // The CSA optimization wants to use a condition such that when it is
88678839 // true, a new value is assigned. However, it is possible that a true lane
88688840 // in WidenedCond corresponds to selection of the initial value instead.
88698841 // In that case, we must use the negation of WidenedCond.
88708842 // i.e. select cond new_val old_val versus select cond.not old_val new_val
8843+ assert (CSA.second .getCond () &&
8844+ " CSADescriptor must know how to describe the condition" );
8845+ VPValue *WidenedCond = GetVPValue (CSA.second .getCond ());
88718846 VPValue *CondToUse = WidenedCond;
8872- VPBuilder B;
88738847 if (cast<SelectInst>(CSA.second .getAssignment ())->getTrueValue () ==
88748848 CSA.first ) {
88758849 auto *VPNotCond = B.createNot (WidenedCond, DL);
8876- VPNotCond->insertBefore (
8877- GetVPValue (CSA.second .getAssignment ())->getDefiningRecipe ());
8850+ VPNotCond->insertBefore (VPDataUpdate);
88788851 CondToUse = VPNotCond;
88798852 }
88808853
8881- auto *VPAnyActive =
8882- B.createAnyActive (CondToUse, DL, " csa.cond.anyactive" );
8883- VPAnyActive->insertBefore (
8884- GetVPValue (CSA.second .getAssignment ())->getDefiningRecipe ());
8854+ auto *VPAnyActive = B.createAnyActive (CondToUse, DL, " csa.cond.anyactive" );
8855+ VPAnyActive->insertBefore (VPDataUpdate);
88858856
8886- auto *VPMaskSel = B.createCSAMaskSel (CondToUse, CSAState-> getVPMaskPhi () ,
8887- VPAnyActive, DL, " csa.mask.sel" );
8857+ auto *VPMaskSel = B.createCSAMaskSel (CondToUse, VPMaskPhi, VPAnyActive, DL ,
8858+ " csa.mask.sel" );
88888859 VPMaskSel->insertAfter (VPAnyActive);
8860+
88898861 VPDataUpdate->setVPNewMaskAndVPAnyActive (VPMaskSel, VPAnyActive);
8862+ VPValue *VPInitScalar = Plan.getOrAddLiveIn (
8863+ CSA.first ->getIncomingValueForBlock (OrigLoop->getLoopPreheader ()));
8864+ SmallVector<PHINode *> PhiToFix;
8865+ for (User *U : VPDataUpdate->getUnderlyingValue ()->users ())
8866+ if (auto *Phi = dyn_cast<PHINode>(U);
8867+ Phi && Phi->getParent () == OrigLoop->getUniqueExitBlock ())
8868+ PhiToFix.emplace_back (Phi);
88908869 VPCSAExtractScalarRecipe *ExtractScalarRecipe =
8891- new VPCSAExtractScalarRecipe ({VPInitScalar, VPMaskSel, VPDataUpdate});
8892-
8870+ new VPCSAExtractScalarRecipe ({VPInitScalar, VPMaskSel, VPDataUpdate},
8871+ PhiToFix);
88938872 MiddleVPBB->insert (ExtractScalarRecipe, MiddleVPBB->getFirstNonPhi ());
8894-
8895- // Update CSAState with new recipes
8896- CSAState->setExtractScalarRecipe (ExtractScalarRecipe);
8897- CSAState->setVPAnyActive (VPAnyActive);
8898-
8899- // Add live out for the CSA. We should be in LCSSA, so we are looking for
8900- // Phi users in the unique exit block of the original updated value.
8901- BasicBlock *OrigExit = OrigLoop->getUniqueExitBlock ();
8902- assert (OrigExit && " Expected a single exit block" );
8903- for (User *U :VPDataUpdate->getUnderlyingValue ()->users ())
8904- if (auto *Phi = dyn_cast<PHINode>(U); Phi && Phi->getParent () == OrigExit)
8905- Plan.addLiveOut (Phi, ExtractScalarRecipe);
89068873 }
89078874}
89088875
@@ -9224,11 +9191,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
92249191
92259192 VPRecipeBuilder RecipeBuilder (*Plan, OrigLoop, TLI, Legal, CM, PSE, Builder);
92269193
9227- addCSAPreprocessRecipes (Legal->getCSAs (), OrigLoop, Plan->getPreheader (),
9228- Plan->getVectorLoopRegion ()->getEntryBasicBlock (), DL,
9229- Range, *Plan, RecipeBuilder);
9230-
9231-
92329194 // ---------------------------------------------------------------------------
92339195 // Pre-construction: record ingredients whose recipes we'll need to further
92349196 // process after constructing the initial VPlan.
0 commit comments