@@ -2858,25 +2858,43 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
28582858 return new VPWidenLoadEVLRecipe (cast<VPWidenLoadRecipe>(CurRecipe), Addr,
28592859 EVL, Mask);
28602860
2861- if (match (&CurRecipe,
2861+ VPValue *ReversedVal;
2862+ if (match (&CurRecipe, m_Reverse (m_VPValue (ReversedVal))) &&
2863+ match (ReversedVal,
28622864 m_MaskedLoad (m_VPValue (EndPtr), m_RemoveMask (HeaderMask, Mask))) &&
28632865 match (EndPtr, m_VecEndPtr (m_VPValue (Addr), m_Specific (&Plan->getVF ()))) &&
2864- cast<VPWidenLoadRecipe>(CurRecipe).isReverse ())
2865- return new VPWidenLoadEVLRecipe (cast<VPWidenLoadRecipe>(CurRecipe),
2866- AdjustEndPtr (EndPtr), EVL, Mask);
2866+ cast<VPWidenLoadRecipe>(ReversedVal)->isReverse ()) {
2867+ auto *LoadR = new VPWidenLoadEVLRecipe (
2868+ *cast<VPWidenLoadRecipe>(ReversedVal), AdjustEndPtr (EndPtr), EVL, Mask);
2869+ LoadR->insertBefore (&CurRecipe);
2870+ return new VPWidenIntrinsicRecipe (
2871+ Intrinsic::experimental_vp_reverse, {LoadR, Plan->getTrue (), &EVL},
2872+ TypeInfo.inferScalarType (LoadR), {}, {}, DL);
2873+ }
28672874
28682875 if (match (&CurRecipe, m_MaskedStore (m_VPValue (Addr), m_VPValue (),
28692876 m_RemoveMask (HeaderMask, Mask))) &&
28702877 !cast<VPWidenStoreRecipe>(CurRecipe).isReverse ())
28712878 return new VPWidenStoreEVLRecipe (cast<VPWidenStoreRecipe>(CurRecipe), Addr,
28722879 EVL, Mask);
28732880
2874- if (match (&CurRecipe, m_MaskedStore (m_VPValue (EndPtr), m_VPValue (),
2881+ VPValue *StoredVal;
2882+ if (match (&CurRecipe, m_MaskedStore (m_VPValue (EndPtr), m_VPValue (StoredVal),
28752883 m_RemoveMask (HeaderMask, Mask))) &&
28762884 match (EndPtr, m_VecEndPtr (m_VPValue (Addr), m_Specific (&Plan->getVF ()))) &&
2877- cast<VPWidenStoreRecipe>(CurRecipe).isReverse ())
2878- return new VPWidenStoreEVLRecipe (cast<VPWidenStoreRecipe>(CurRecipe),
2879- AdjustEndPtr (EndPtr), EVL, Mask);
2885+ cast<VPWidenStoreRecipe>(CurRecipe).isReverse ()) {
2886+ if (match (StoredVal, m_Reverse (m_VPValue (ReversedVal)))) {
2887+ auto *NewReverse = new VPWidenIntrinsicRecipe (
2888+ Intrinsic::experimental_vp_reverse,
2889+ {ReversedVal, Plan->getTrue (), &EVL},
2890+ TypeInfo.inferScalarType (ReversedVal), {}, {},
2891+ cast<VPInstruction>(StoredVal)->getDebugLoc ());
2892+ NewReverse->insertBefore (&CurRecipe);
2893+ return new VPWidenStoreEVLRecipe (cast<VPWidenStoreRecipe>(CurRecipe),
2894+ AdjustEndPtr (EndPtr), NewReverse, EVL,
2895+ Mask);
2896+ }
2897+ }
28802898
28812899 if (auto *Rdx = dyn_cast<VPReductionRecipe>(&CurRecipe))
28822900 if (Rdx->isConditional () &&
@@ -2949,7 +2967,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
29492967 // contained.
29502968 bool ContainsFORs =
29512969 any_of (Header->phis (), IsaPred<VPFirstOrderRecurrencePHIRecipe>);
2952- VPValue *PrevEVL = nullptr ;
29532970 if (ContainsFORs) {
29542971 // TODO: Use VPInstruction::ExplicitVectorLength to get maximum EVL.
29552972 VPValue *MaxEVL = &Plan.getVF ();
@@ -2960,42 +2977,28 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
29602977 TypeInfo.inferScalarType (MaxEVL), DebugLoc::getUnknown ());
29612978
29622979 Builder.setInsertPoint (Header, Header->getFirstNonPhi ());
2963- PrevEVL = Builder.createScalarPhi ({MaxEVL, &EVL}, DebugLoc::getUnknown (),
2964- " prev.evl" );
2965- }
2966-
2967- // Transform the recipes must be converted to vector predication intrinsics
2968- // even if they do not use header mask.
2969- for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
2970- vp_depth_first_deep (Plan.getVectorLoopRegion ()->getEntry ()))) {
2971- for (VPRecipeBase &R : *VPBB) {
2972- VPWidenIntrinsicRecipe *NewRecipe = nullptr ;
2973- VPValue *V1, *V2;
2974- if (match (&R, m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
2975- m_VPValue (V1), m_VPValue (V2)))) {
2980+ VPValue *PrevEVL = Builder.createScalarPhi (
2981+ {MaxEVL, &EVL}, DebugLoc::getUnknown (), " prev.evl" );
2982+
2983+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
2984+ vp_depth_first_deep (Plan.getVectorLoopRegion ()->getEntry ()))) {
2985+ for (VPRecipeBase &R : *VPBB) {
2986+ VPValue *V1, *V2;
2987+ if (!match (&R,
2988+ m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
2989+ m_VPValue (V1), m_VPValue (V2))))
2990+ continue ;
29762991 VPValue *Imm = Plan.getOrAddLiveIn (
29772992 ConstantInt::getSigned (Type::getInt32Ty (Plan.getContext ()), -1 ));
2978- NewRecipe = new VPWidenIntrinsicRecipe (
2993+ VPWidenIntrinsicRecipe *VPSplice = new VPWidenIntrinsicRecipe (
29792994 Intrinsic::experimental_vp_splice,
29802995 {V1, V2, Imm, Plan.getTrue (), PrevEVL, &EVL},
29812996 TypeInfo.inferScalarType (R.getVPSingleValue ()), {}, {},
29822997 R.getDebugLoc ());
2998+ VPSplice->insertBefore (&R);
2999+ R.getVPSingleValue ()->replaceAllUsesWith (VPSplice);
3000+ ToErase.push_back (&R);
29833001 }
2984-
2985- // TODO: Only convert reverse to vp.reverse if it uses the result of
2986- // vp.load, or defines the stored value of vp.store.
2987- if (match (&R, m_Reverse (m_VPValue (V1)))) {
2988- NewRecipe = new VPWidenIntrinsicRecipe (
2989- Intrinsic::experimental_vp_reverse, {V1, Plan.getTrue (), &EVL},
2990- TypeInfo.inferScalarType (R.getVPSingleValue ()), {}, {},
2991- R.getDebugLoc ());
2992- }
2993-
2994- if (!NewRecipe)
2995- continue ;
2996- NewRecipe->insertBefore (&R);
2997- R.getVPSingleValue ()->replaceAllUsesWith (NewRecipe);
2998- ToErase.push_back (&R);
29993002 }
30003003 }
30013004
0 commit comments