Skip to content

Commit 766934d

Browse files
committed
The third conversion method for vp.reverse
It may temporarily lose some performance when EVL tail folding.
1 parent c81f541 commit 766934d

File tree

3 files changed

+51
-39
lines changed

3 files changed

+51
-39
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3470,6 +3470,15 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
34703470
setMask(Mask);
34713471
}
34723472

3473+
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr,
3474+
VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3475+
: VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3476+
{Addr, StoredVal, &EVL}, S.isConsecutive(),
3477+
S.isReverse(), S, S.getDebugLoc()) {
3478+
assert(isReverse() && "Only reverse access need to set new stored value");
3479+
setMask(Mask);
3480+
}
3481+
34733482
VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
34743483

34753484
/// Return the address accessed by this recipe.

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 41 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2858,25 +2858,43 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
28582858
return new VPWidenLoadEVLRecipe(cast<VPWidenLoadRecipe>(CurRecipe), Addr,
28592859
EVL, Mask);
28602860

2861-
if (match(&CurRecipe,
2861+
VPValue *ReversedVal;
2862+
if (match(&CurRecipe, m_Reverse(m_VPValue(ReversedVal))) &&
2863+
match(ReversedVal,
28622864
m_MaskedLoad(m_VPValue(EndPtr), m_RemoveMask(HeaderMask, Mask))) &&
28632865
match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) &&
2864-
cast<VPWidenLoadRecipe>(CurRecipe).isReverse())
2865-
return new VPWidenLoadEVLRecipe(cast<VPWidenLoadRecipe>(CurRecipe),
2866-
AdjustEndPtr(EndPtr), EVL, Mask);
2866+
cast<VPWidenLoadRecipe>(ReversedVal)->isReverse()) {
2867+
auto *LoadR = new VPWidenLoadEVLRecipe(
2868+
*cast<VPWidenLoadRecipe>(ReversedVal), AdjustEndPtr(EndPtr), EVL, Mask);
2869+
LoadR->insertBefore(&CurRecipe);
2870+
return new VPWidenIntrinsicRecipe(
2871+
Intrinsic::experimental_vp_reverse, {LoadR, Plan->getTrue(), &EVL},
2872+
TypeInfo.inferScalarType(LoadR), {}, {}, DL);
2873+
}
28672874

28682875
if (match(&CurRecipe, m_MaskedStore(m_VPValue(Addr), m_VPValue(),
28692876
m_RemoveMask(HeaderMask, Mask))) &&
28702877
!cast<VPWidenStoreRecipe>(CurRecipe).isReverse())
28712878
return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe), Addr,
28722879
EVL, Mask);
28732880

2874-
if (match(&CurRecipe, m_MaskedStore(m_VPValue(EndPtr), m_VPValue(),
2881+
VPValue *StoredVal;
2882+
if (match(&CurRecipe, m_MaskedStore(m_VPValue(EndPtr), m_VPValue(StoredVal),
28752883
m_RemoveMask(HeaderMask, Mask))) &&
28762884
match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) &&
2877-
cast<VPWidenStoreRecipe>(CurRecipe).isReverse())
2878-
return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe),
2879-
AdjustEndPtr(EndPtr), EVL, Mask);
2885+
cast<VPWidenStoreRecipe>(CurRecipe).isReverse()) {
2886+
if (match(StoredVal, m_Reverse(m_VPValue(ReversedVal)))) {
2887+
auto *NewReverse = new VPWidenIntrinsicRecipe(
2888+
Intrinsic::experimental_vp_reverse,
2889+
{ReversedVal, Plan->getTrue(), &EVL},
2890+
TypeInfo.inferScalarType(ReversedVal), {}, {},
2891+
cast<VPInstruction>(StoredVal)->getDebugLoc());
2892+
NewReverse->insertBefore(&CurRecipe);
2893+
return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe),
2894+
AdjustEndPtr(EndPtr), NewReverse, EVL,
2895+
Mask);
2896+
}
2897+
}
28802898

28812899
if (auto *Rdx = dyn_cast<VPReductionRecipe>(&CurRecipe))
28822900
if (Rdx->isConditional() &&
@@ -2949,7 +2967,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
29492967
// contained.
29502968
bool ContainsFORs =
29512969
any_of(Header->phis(), IsaPred<VPFirstOrderRecurrencePHIRecipe>);
2952-
VPValue *PrevEVL = nullptr;
29532970
if (ContainsFORs) {
29542971
// TODO: Use VPInstruction::ExplicitVectorLength to get maximum EVL.
29552972
VPValue *MaxEVL = &Plan.getVF();
@@ -2960,42 +2977,28 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
29602977
TypeInfo.inferScalarType(MaxEVL), DebugLoc::getUnknown());
29612978

29622979
Builder.setInsertPoint(Header, Header->getFirstNonPhi());
2963-
PrevEVL = Builder.createScalarPhi({MaxEVL, &EVL}, DebugLoc::getUnknown(),
2964-
"prev.evl");
2965-
}
2966-
2967-
// Transform the recipes must be converted to vector predication intrinsics
2968-
// even if they do not use header mask.
2969-
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
2970-
vp_depth_first_deep(Plan.getVectorLoopRegion()->getEntry()))) {
2971-
for (VPRecipeBase &R : *VPBB) {
2972-
VPWidenIntrinsicRecipe *NewRecipe = nullptr;
2973-
VPValue *V1, *V2;
2974-
if (match(&R, m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
2975-
m_VPValue(V1), m_VPValue(V2)))) {
2980+
VPValue *PrevEVL = Builder.createScalarPhi(
2981+
{MaxEVL, &EVL}, DebugLoc::getUnknown(), "prev.evl");
2982+
2983+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
2984+
vp_depth_first_deep(Plan.getVectorLoopRegion()->getEntry()))) {
2985+
for (VPRecipeBase &R : *VPBB) {
2986+
VPValue *V1, *V2;
2987+
if (!match(&R,
2988+
m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
2989+
m_VPValue(V1), m_VPValue(V2))))
2990+
continue;
29762991
VPValue *Imm = Plan.getOrAddLiveIn(
29772992
ConstantInt::getSigned(Type::getInt32Ty(Plan.getContext()), -1));
2978-
NewRecipe = new VPWidenIntrinsicRecipe(
2993+
VPWidenIntrinsicRecipe *VPSplice = new VPWidenIntrinsicRecipe(
29792994
Intrinsic::experimental_vp_splice,
29802995
{V1, V2, Imm, Plan.getTrue(), PrevEVL, &EVL},
29812996
TypeInfo.inferScalarType(R.getVPSingleValue()), {}, {},
29822997
R.getDebugLoc());
2998+
VPSplice->insertBefore(&R);
2999+
R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3000+
ToErase.push_back(&R);
29833001
}
2984-
2985-
// TODO: Only convert reverse to vp.reverse if it uses the result of
2986-
// vp.load, or defines the stored value of vp.store.
2987-
if (match(&R, m_Reverse(m_VPValue(V1)))) {
2988-
NewRecipe = new VPWidenIntrinsicRecipe(
2989-
Intrinsic::experimental_vp_reverse, {V1, Plan.getTrue(), &EVL},
2990-
TypeInfo.inferScalarType(R.getVPSingleValue()), {}, {},
2991-
R.getDebugLoc());
2992-
}
2993-
2994-
if (!NewRecipe)
2995-
continue;
2996-
NewRecipe->insertBefore(&R);
2997-
R.getVPSingleValue()->replaceAllUsesWith(NewRecipe);
2998-
ToErase.push_back(&R);
29993002
}
30003003
}
30013004

llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) {
1515
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 3, [[SPEC_SELECT]]
1616
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
1717
; CHECK: [[VECTOR_PH]]:
18-
; CHECK-NEXT: [[REVERSE:%.*]] = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> zeroinitializer)
1918
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
2019
; CHECK: [[VECTOR_BODY]]:
2120
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -24,6 +23,7 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) {
2423
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[SPEC_SELECT]], [[EVL_BASED_IV]]
2524
; CHECK-NEXT: [[TMP12:%.*]] = sub nuw nsw i64 1, [[OFFSET_IDX]]
2625
; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP12]]
26+
; CHECK-NEXT: [[REVERSE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), i32 [[TMP1]])
2727
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1]] to i64
2828
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 0, [[TMP4]]
2929
; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], 1

0 commit comments

Comments
 (0)