-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[VPlan] Extract reverse operation for reverse accesses #146525
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
060e5d3
ccfe30b
6e43a13
dadd010
142bd1c
7ceddb5
b144394
b786ff0
c1d2eb8
f1a0443
eb88d09
c81f541
766934d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -444,6 +444,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) { | |
| case VPInstruction::ExtractPenultimateElement: | ||
| case VPInstruction::Not: | ||
| case VPInstruction::ResumeForEpilogue: | ||
| case VPInstruction::Reverse: | ||
| case VPInstruction::Unpack: | ||
| return 1; | ||
| case Instruction::ICmp: | ||
|
|
@@ -917,6 +918,8 @@ Value *VPInstruction::generate(VPTransformState &State) { | |
| } | ||
| case VPInstruction::ResumeForEpilogue: | ||
| return State.get(getOperand(0), true); | ||
| case VPInstruction::Reverse: | ||
| return Builder.CreateVectorReverse(State.get(getOperand(0)), "reverse"); | ||
| default: | ||
| llvm_unreachable("Unsupported opcode for instruction"); | ||
| } | ||
|
|
@@ -1103,6 +1106,14 @@ InstructionCost VPInstruction::computeCost(ElementCount VF, | |
| I32Ty, {Arg0Ty, I32Ty, I1Ty}); | ||
| return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind); | ||
| } | ||
| case VPInstruction::Reverse: { | ||
| assert(VF.isVector() && "Reverse operation must be vector type"); | ||
| auto *VectorTy = cast<VectorType>( | ||
| toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF)); | ||
| return Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, | ||
| VectorTy, /*Mask=*/{}, Ctx.CostKind, | ||
| /*Index=*/0); | ||
| } | ||
| case VPInstruction::ExtractLastLane: { | ||
| // Add on the cost of extracting the element. | ||
| auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF); | ||
|
|
@@ -1205,6 +1216,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { | |
| case VPInstruction::WidePtrAdd: | ||
| case VPInstruction::StepVector: | ||
| case VPInstruction::ReductionStartVector: | ||
| case VPInstruction::Reverse: | ||
| case VPInstruction::VScale: | ||
| case VPInstruction::Unpack: | ||
| return false; | ||
|
|
@@ -1382,6 +1394,9 @@ void VPInstruction::printRecipe(raw_ostream &O, const Twine &Indent, | |
| case VPInstruction::ResumeForEpilogue: | ||
| O << "resume-for-epilogue"; | ||
| break; | ||
| case VPInstruction::Reverse: | ||
| O << "reverse"; | ||
| break; | ||
| case VPInstruction::Unpack: | ||
| O << "unpack"; | ||
| break; | ||
|
|
@@ -2260,18 +2275,32 @@ InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF, | |
| VPValue *Operand = getOperand(0); | ||
| TTI::CastContextHint CCH = TTI::CastContextHint::None; | ||
| // For Trunc/FPTrunc, get the context from the only user. | ||
| if ((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) && | ||
| !hasMoreThanOneUniqueUser() && getNumUsers() > 0) { | ||
| if (auto *StoreRecipe = dyn_cast<VPRecipeBase>(*user_begin())) | ||
| CCH = ComputeCCH(StoreRecipe); | ||
| if (Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) { | ||
| auto GetOnlyUser = [](const VPSingleDefRecipe *R) -> VPRecipeBase * { | ||
| if (R->getNumUsers() == 0 || R->hasMoreThanOneUniqueUser()) | ||
| return nullptr; | ||
| return dyn_cast<VPRecipeBase>(*R->user_begin()); | ||
| }; | ||
|
|
||
| if (VPRecipeBase *Recipe = GetOnlyUser(this)) { | ||
| if (match(Recipe, m_Reverse(m_VPValue()))) | ||
| Recipe = GetOnlyUser(cast<VPInstruction>(Recipe)); | ||
| if (Recipe) | ||
| CCH = ComputeCCH(Recipe); | ||
| } | ||
| } | ||
| // For Z/Sext, get the context from the operand. | ||
| else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt || | ||
| Opcode == Instruction::FPExt) { | ||
|
Comment on lines
2293
to
2294
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure I understand why the code is guarded by Trunc, FPTrunc, FPExt, ZExt, or SExt opcodes?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code is used to determine TTI::CastContextHint. Perhaps for other cast opcodes, there isn’t currently a situation that requires this hint. But I think that’s a separate issue. The change here is just to ensure that CastContextHint::Reversed is still correctly propagated after the reverse operation is separated out. |
||
| if (Operand->isLiveIn()) | ||
| CCH = TTI::CastContextHint::Normal; | ||
| else if (Operand->getDefiningRecipe()) | ||
| CCH = ComputeCCH(Operand->getDefiningRecipe()); | ||
| else if (auto *Recipe = Operand->getDefiningRecipe()) { | ||
| VPValue *ReverseOp; | ||
| if (match(Recipe, m_Reverse(m_VPValue(ReverseOp)))) | ||
| Recipe = ReverseOp->getDefiningRecipe(); | ||
| if (Recipe) | ||
| CCH = ComputeCCH(Recipe); | ||
| } | ||
| } | ||
|
|
||
| auto *SrcTy = | ||
|
|
@@ -3519,12 +3548,7 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF, | |
| Cost += Ctx.TTI.getMemoryOpCost(Opcode, Ty, Alignment, AS, Ctx.CostKind, | ||
| OpInfo, &Ingredient); | ||
| } | ||
| if (!Reverse) | ||
| return Cost; | ||
|
|
||
| return Cost += Ctx.TTI.getShuffleCost( | ||
| TargetTransformInfo::SK_Reverse, cast<VectorType>(Ty), | ||
| cast<VectorType>(Ty), {}, Ctx.CostKind, 0); | ||
| return Cost; | ||
| } | ||
|
|
||
| void VPWidenLoadRecipe::execute(VPTransformState &State) { | ||
|
|
@@ -3555,8 +3579,6 @@ void VPWidenLoadRecipe::execute(VPTransformState &State) { | |
| NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load"); | ||
| } | ||
| applyMetadata(*cast<Instruction>(NewLI)); | ||
| if (Reverse) | ||
| NewLI = Builder.CreateVectorReverse(NewLI, "reverse"); | ||
| State.set(this, NewLI); | ||
| } | ||
|
|
||
|
|
@@ -3611,8 +3633,6 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) { | |
| 0, Attribute::getWithAlignment(NewLI->getContext(), Alignment)); | ||
| applyMetadata(*NewLI); | ||
| Instruction *Res = NewLI; | ||
| if (isReverse()) | ||
| Res = createReverseEVL(Builder, Res, EVL, "vp.reverse"); | ||
| State.set(this, Res); | ||
| } | ||
|
|
||
|
|
@@ -3629,15 +3649,9 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF, | |
| Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF); | ||
| unsigned AS = cast<PointerType>(Ctx.Types.inferScalarType(getAddr())) | ||
| ->getAddressSpace(); | ||
| InstructionCost Cost = Ctx.TTI.getMemIntrinsicInstrCost( | ||
| return Ctx.TTI.getMemIntrinsicInstrCost( | ||
| MemIntrinsicCostAttributes(Intrinsic::vp_load, Ty, Alignment, AS), | ||
| Ctx.CostKind); | ||
| if (!Reverse) | ||
| return Cost; | ||
|
|
||
| return Cost + Ctx.TTI.getShuffleCost( | ||
| TargetTransformInfo::SK_Reverse, cast<VectorType>(Ty), | ||
| cast<VectorType>(Ty), {}, Ctx.CostKind, 0); | ||
| } | ||
|
|
||
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
|
|
@@ -3666,13 +3680,6 @@ void VPWidenStoreRecipe::execute(VPTransformState &State) { | |
| } | ||
|
|
||
| Value *StoredVal = State.get(StoredVPValue); | ||
| if (isReverse()) { | ||
| // If we store to reverse consecutive memory locations, then we need | ||
| // to reverse the order of elements in the stored value. | ||
| StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse"); | ||
| // We don't want to update the value in the map as it might be used in | ||
| // another expression. So don't call resetVectorValue(StoredVal). | ||
| } | ||
| Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter); | ||
| Instruction *NewSI = nullptr; | ||
| if (CreateScatter) | ||
|
|
@@ -3701,8 +3708,6 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) { | |
| CallInst *NewSI = nullptr; | ||
| Value *StoredVal = State.get(StoredValue); | ||
| Value *EVL = State.get(getEVL(), VPLane(0)); | ||
| if (isReverse()) | ||
| StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse"); | ||
| Value *Mask = nullptr; | ||
| if (VPValue *VPMask = getMask()) { | ||
| Mask = State.get(VPMask); | ||
|
|
@@ -3739,15 +3744,9 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF, | |
| Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF); | ||
| unsigned AS = cast<PointerType>(Ctx.Types.inferScalarType(getAddr())) | ||
| ->getAddressSpace(); | ||
| InstructionCost Cost = Ctx.TTI.getMemIntrinsicInstrCost( | ||
| return Ctx.TTI.getMemIntrinsicInstrCost( | ||
| MemIntrinsicCostAttributes(Intrinsic::vp_store, Ty, Alignment, AS), | ||
| Ctx.CostKind); | ||
| if (!Reverse) | ||
| return Cost; | ||
|
|
||
| return Cost + Ctx.TTI.getShuffleCost( | ||
| TargetTransformInfo::SK_Reverse, cast<VectorType>(Ty), | ||
| cast<VectorType>(Ty), {}, Ctx.CostKind, 0); | ||
| } | ||
|
|
||
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -2858,25 +2858,43 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask, | |||||||
| return new VPWidenLoadEVLRecipe(cast<VPWidenLoadRecipe>(CurRecipe), Addr, | ||||||||
| EVL, Mask); | ||||||||
|
|
||||||||
| if (match(&CurRecipe, | ||||||||
| VPValue *ReversedVal; | ||||||||
| if (match(&CurRecipe, m_Reverse(m_VPValue(ReversedVal))) && | ||||||||
| match(ReversedVal, | ||||||||
| m_MaskedLoad(m_VPValue(EndPtr), m_RemoveMask(HeaderMask, Mask))) && | ||||||||
| match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) && | ||||||||
| cast<VPWidenLoadRecipe>(CurRecipe).isReverse()) | ||||||||
| return new VPWidenLoadEVLRecipe(cast<VPWidenLoadRecipe>(CurRecipe), | ||||||||
| AdjustEndPtr(EndPtr), EVL, Mask); | ||||||||
| cast<VPWidenLoadRecipe>(ReversedVal)->isReverse()) { | ||||||||
| auto *LoadR = new VPWidenLoadEVLRecipe( | ||||||||
| *cast<VPWidenLoadRecipe>(ReversedVal), AdjustEndPtr(EndPtr), EVL, Mask); | ||||||||
| LoadR->insertBefore(&CurRecipe); | ||||||||
| return new VPWidenIntrinsicRecipe( | ||||||||
| Intrinsic::experimental_vp_reverse, {LoadR, Plan->getTrue(), &EVL}, | ||||||||
| TypeInfo.inferScalarType(LoadR), {}, {}, DL); | ||||||||
| } | ||||||||
|
|
||||||||
| if (match(&CurRecipe, m_MaskedStore(m_VPValue(Addr), m_VPValue(), | ||||||||
| m_RemoveMask(HeaderMask, Mask))) && | ||||||||
| !cast<VPWidenStoreRecipe>(CurRecipe).isReverse()) | ||||||||
| return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe), Addr, | ||||||||
| EVL, Mask); | ||||||||
|
|
||||||||
| if (match(&CurRecipe, m_MaskedStore(m_VPValue(EndPtr), m_VPValue(), | ||||||||
| VPValue *StoredVal; | ||||||||
| if (match(&CurRecipe, m_MaskedStore(m_VPValue(EndPtr), m_VPValue(StoredVal), | ||||||||
|
Comment on lines
+2881
to
+2882
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this just match the reverse in the same line so you don't need the separate match on line 2886
Suggested change
|
||||||||
| m_RemoveMask(HeaderMask, Mask))) && | ||||||||
| match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) && | ||||||||
| cast<VPWidenStoreRecipe>(CurRecipe).isReverse()) | ||||||||
| return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe), | ||||||||
| AdjustEndPtr(EndPtr), EVL, Mask); | ||||||||
| cast<VPWidenStoreRecipe>(CurRecipe).isReverse()) { | ||||||||
| if (match(StoredVal, m_Reverse(m_VPValue(ReversedVal)))) { | ||||||||
| auto *NewReverse = new VPWidenIntrinsicRecipe( | ||||||||
| Intrinsic::experimental_vp_reverse, | ||||||||
| {ReversedVal, Plan->getTrue(), &EVL}, | ||||||||
| TypeInfo.inferScalarType(ReversedVal), {}, {}, | ||||||||
| cast<VPInstruction>(StoredVal)->getDebugLoc()); | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can just use CurRecipe's debugloc, since the original reverse is created with the store's debugloc?
Suggested change
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we have tests with debug location for EVL transform, would be good to add some. |
||||||||
| NewReverse->insertBefore(&CurRecipe); | ||||||||
| return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe), | ||||||||
| AdjustEndPtr(EndPtr), NewReverse, EVL, | ||||||||
| Mask); | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| if (auto *Rdx = dyn_cast<VPReductionRecipe>(&CurRecipe)) | ||||||||
| if (Rdx->isConditional() && | ||||||||
|
|
||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the only addition here is the stored value, right? can we unify to a single constructor, and clarify that
VPWidenStoreRecipeis only used for other fields?