@@ -2795,25 +2795,44 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
27952795 return new VPWidenLoadEVLRecipe (cast<VPWidenLoadRecipe>(CurRecipe), Addr,
27962796 EVL, Mask);
27972797
2798- if (match (&CurRecipe,
2798+ VPValue *ReversedVal;
2799+ if (match (&CurRecipe, m_Reverse (m_VPValue (ReversedVal))) &&
2800+ match (ReversedVal,
27992801 m_MaskedLoad (m_VPValue (EndPtr), m_RemoveMask (HeaderMask, Mask))) &&
28002802 match (EndPtr, m_VecEndPtr (m_VPValue (Addr), m_Specific (&Plan->getVF ()))) &&
2801- cast<VPWidenLoadRecipe>(CurRecipe).isReverse ())
2802- return new VPWidenLoadEVLRecipe (cast<VPWidenLoadRecipe>(CurRecipe),
2803- AdjustEndPtr (EndPtr), EVL, Mask);
2803+ cast<VPWidenLoadRecipe>(ReversedVal)->isReverse ()) {
2804+ auto *LoadR = new VPWidenLoadEVLRecipe (
2805+ *cast<VPWidenLoadRecipe>(ReversedVal), AdjustEndPtr (EndPtr), EVL, Mask);
2806+ LoadR->insertBefore (&CurRecipe);
2807+ return new VPWidenIntrinsicRecipe (
2808+ Intrinsic::experimental_vp_reverse, {LoadR, Plan->getTrue (), &EVL},
2809+ TypeInfo.inferScalarType (LoadR), {}, {}, DL);
2810+ }
28042811
28052812 if (match (&CurRecipe, m_MaskedStore (m_VPValue (Addr), m_VPValue (),
28062813 m_RemoveMask (HeaderMask, Mask))) &&
28072814 !cast<VPWidenStoreRecipe>(CurRecipe).isReverse ())
28082815 return new VPWidenStoreEVLRecipe (cast<VPWidenStoreRecipe>(CurRecipe), Addr,
28092816 EVL, Mask);
28102817
2811- if (match (&CurRecipe, m_MaskedStore (m_VPValue (EndPtr), m_VPValue (),
2818+ VPValue *StoredVal;
2819+ if (match (&CurRecipe, m_MaskedStore (m_VPValue (EndPtr), m_VPValue (StoredVal),
28122820 m_RemoveMask (HeaderMask, Mask))) &&
28132821 match (EndPtr, m_VecEndPtr (m_VPValue (Addr), m_Specific (&Plan->getVF ()))) &&
2814- cast<VPWidenStoreRecipe>(CurRecipe).isReverse ())
2815- return new VPWidenStoreEVLRecipe (cast<VPWidenStoreRecipe>(CurRecipe),
2816- AdjustEndPtr (EndPtr), EVL, Mask);
2822+ cast<VPWidenStoreRecipe>(CurRecipe).isReverse ()) {
2823+ auto *StoreR = cast<VPWidenStoreRecipe>(&CurRecipe);
2824+ if (match (StoredVal, m_Reverse (m_VPValue (ReversedVal)))) {
2825+ auto *NewReverse = new VPWidenIntrinsicRecipe (
2826+ Intrinsic::experimental_vp_reverse,
2827+ {ReversedVal, Plan->getTrue (), &EVL},
2828+ TypeInfo.inferScalarType (ReversedVal), {}, {},
2829+ cast<VPInstruction>(StoredVal)->getDebugLoc ());
2830+ NewReverse->insertBefore (&CurRecipe);
2831+ return new VPWidenStoreEVLRecipe (cast<VPWidenStoreRecipe>(CurRecipe),
2832+ AdjustEndPtr (EndPtr), NewReverse, EVL,
2833+ Mask);
2834+ }
2835+ }
28172836
28182837 if (auto *Rdx = dyn_cast<VPReductionRecipe>(&CurRecipe))
28192838 if (Rdx->isConditional () &&
@@ -2886,7 +2905,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
28862905 // contained.
28872906 bool ContainsFORs =
28882907 any_of (Header->phis (), IsaPred<VPFirstOrderRecurrencePHIRecipe>);
2889- VPValue *PrevEVL = nullptr ;
28902908 if (ContainsFORs) {
28912909 // TODO: Use VPInstruction::ExplicitVectorLength to get maximum EVL.
28922910 VPValue *MaxEVL = &Plan.getVF ();
@@ -2897,42 +2915,28 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
28972915 TypeInfo.inferScalarType (MaxEVL), DebugLoc::getUnknown ());
28982916
28992917 Builder.setInsertPoint (Header, Header->getFirstNonPhi ());
2900- PrevEVL = Builder.createScalarPhi ({MaxEVL, &EVL}, DebugLoc::getUnknown (),
2901- " prev.evl" );
2902- }
2903-
2904- // Transform the recipes must be converted to vector predication intrinsics
2905- // even if they do not use header mask.
2906- for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
2907- vp_depth_first_deep (Plan.getVectorLoopRegion ()->getEntry ()))) {
2908- for (VPRecipeBase &R : *VPBB) {
2909- VPWidenIntrinsicRecipe *NewRecipe = nullptr ;
2910- VPValue *V1, *V2;
2911- if (match (&R, m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
2912- m_VPValue (V1), m_VPValue (V2)))) {
2918+ VPValue *PrevEVL = Builder.createScalarPhi (
2919+ {MaxEVL, &EVL}, DebugLoc::getUnknown (), " prev.evl" );
2920+
2921+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
2922+ vp_depth_first_deep (Plan.getVectorLoopRegion ()->getEntry ()))) {
2923+ for (VPRecipeBase &R : *VPBB) {
2924+ VPValue *V1, *V2;
2925+ if (!match (&R,
2926+ m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
2927+ m_VPValue (V1), m_VPValue (V2))))
2928+ continue ;
29132929 VPValue *Imm = Plan.getOrAddLiveIn (
29142930 ConstantInt::getSigned (Type::getInt32Ty (Plan.getContext ()), -1 ));
2915- NewRecipe = new VPWidenIntrinsicRecipe (
2931+ VPWidenIntrinsicRecipe *VPSplice = new VPWidenIntrinsicRecipe (
29162932 Intrinsic::experimental_vp_splice,
29172933 {V1, V2, Imm, Plan.getTrue (), PrevEVL, &EVL},
29182934 TypeInfo.inferScalarType (R.getVPSingleValue ()), {}, {},
29192935 R.getDebugLoc ());
2936+ VPSplice->insertBefore (&R);
2937+ R.getVPSingleValue ()->replaceAllUsesWith (VPSplice);
2938+ ToErase.push_back (&R);
29202939 }
2921-
2922- // TODO: Only convert reverse to vp.reverse if it uses the result of
2923- // vp.load, or defines the stored value of vp.store.
2924- if (match (&R, m_Reverse (m_VPValue (V1)))) {
2925- NewRecipe = new VPWidenIntrinsicRecipe (
2926- Intrinsic::experimental_vp_reverse, {V1, Plan.getTrue (), &EVL},
2927- TypeInfo.inferScalarType (R.getVPSingleValue ()), {}, {},
2928- R.getDebugLoc ());
2929- }
2930-
2931- if (!NewRecipe)
2932- continue ;
2933- NewRecipe->insertBefore (&R);
2934- R.getVPSingleValue ()->replaceAllUsesWith (NewRecipe);
2935- ToErase.push_back (&R);
29362940 }
29372941 }
29382942
0 commit comments