Skip to content

Commit 9913611

Browse files
committed
[VPlan] Extract reverse operation for reverse accesses
1 parent d9f9064 commit 9913611

20 files changed

+193
-121
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1523,6 +1523,12 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
15231523
cast<VectorType>(ICA.getArgTypes()[0]), {}, CostKind,
15241524
0, cast<VectorType>(ICA.getReturnType()));
15251525
}
1526+
case Intrinsic::experimental_vp_reverse: {
1527+
return getShuffleCost(TTI::SK_Reverse,
1528+
cast<VectorType>(ICA.getReturnType()),
1529+
cast<VectorType>(ICA.getArgTypes()[0]), {}, CostKind,
1530+
0, cast<VectorType>(ICA.getReturnType()));
1531+
}
15261532
case Intrinsic::fptoui_sat:
15271533
case Intrinsic::fptosi_sat: {
15281534
InstructionCost Cost = 0;

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8878,6 +8878,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
88788878
// bring the VPlan to its final state.
88798879
// ---------------------------------------------------------------------------
88808880

8881+
// Adjust the result of reverse memory accesses.
8882+
VPlanTransforms::runPass(VPlanTransforms::adjustRecipesForReverseAccesses,
8883+
*Plan);
8884+
88818885
// Adjust the recipes for any inloop reductions.
88828886
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);
88838887

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,6 +1003,8 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
10031003
// It produces the lane index across all unrolled iterations. Unrolling will
10041004
// add all copies of its original operand as additional operands.
10051005
FirstActiveLane,
1006+
// Returns a reversed vector for the operand.
1007+
Reverse,
10061008

10071009
// The opcodes below are used for VPInstructionWithType.
10081010
//

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
129129
case VPInstruction::Broadcast:
130130
case VPInstruction::PtrAdd:
131131
case VPInstruction::WidePtrAdd:
132+
case VPInstruction::Reverse:
132133
// Return the type based on first operand.
133134
return inferScalarType(R->getOperand(0));
134135
case VPInstruction::BranchOnCond:

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 18 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
466466
case VPInstruction::ExtractPenultimateElement:
467467
case VPInstruction::FirstActiveLane:
468468
case VPInstruction::Not:
469+
case VPInstruction::Reverse:
469470
return 1;
470471
case Instruction::ICmp:
471472
case Instruction::FCmp:
@@ -922,6 +923,9 @@ Value *VPInstruction::generate(VPTransformState &State) {
922923

923924
return Res;
924925
}
926+
case VPInstruction::Reverse: {
927+
return Builder.CreateVectorReverse(State.get(getOperand(0)), "reverse");
928+
}
925929
default:
926930
llvm_unreachable("Unsupported opcode for instruction");
927931
}
@@ -998,6 +1002,13 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
9981002
I32Ty, {Arg0Ty, I32Ty, I1Ty});
9991003
return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
10001004
}
1005+
case VPInstruction::Reverse: {
1006+
assert(VF.isVector() && "Reverse operation must be vector type");
1007+
Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1008+
return Ctx.TTI.getShuffleCost(
1009+
TargetTransformInfo::SK_Reverse, cast<VectorType>(VectorTy),
1010+
cast<VectorType>(VectorTy), {}, Ctx.CostKind, 0);
1011+
}
10011012
case VPInstruction::ExtractPenultimateElement:
10021013
if (VF == ElementCount::getScalable(1))
10031014
return InstructionCost::getInvalid();
@@ -1093,6 +1104,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
10931104
case VPInstruction::WidePtrAdd:
10941105
case VPInstruction::StepVector:
10951106
case VPInstruction::ReductionStartVector:
1107+
case VPInstruction::Reverse:
10961108
return false;
10971109
default:
10981110
return true;
@@ -1251,6 +1263,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
12511263
case VPInstruction::ReductionStartVector:
12521264
O << "reduction-start-vector";
12531265
break;
1266+
case VPInstruction::Reverse:
1267+
O << "reverse";
1268+
break;
12541269
default:
12551270
O << Instruction::getOpcodeName(getOpcode());
12561271
}
@@ -3115,12 +3130,7 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
31153130
Cost += Ctx.TTI.getMemoryOpCost(Opcode, Ty, Alignment, AS, Ctx.CostKind,
31163131
OpInfo, &Ingredient);
31173132
}
3118-
if (!Reverse)
3119-
return Cost;
3120-
3121-
return Cost += Ctx.TTI.getShuffleCost(
3122-
TargetTransformInfo::SK_Reverse, cast<VectorType>(Ty),
3123-
cast<VectorType>(Ty), {}, Ctx.CostKind, 0);
3133+
return Cost;
31243134
}
31253135

31263136
void VPWidenLoadRecipe::execute(VPTransformState &State) {
@@ -3152,8 +3162,6 @@ void VPWidenLoadRecipe::execute(VPTransformState &State) {
31523162
NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");
31533163
}
31543164
applyMetadata(*cast<Instruction>(NewLI));
3155-
if (Reverse)
3156-
NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
31573165
State.set(this, NewLI);
31583166
}
31593167

@@ -3209,8 +3217,6 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
32093217
0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
32103218
applyMetadata(*NewLI);
32113219
Instruction *Res = NewLI;
3212-
if (isReverse())
3213-
Res = createReverseEVL(Builder, Res, EVL, "vp.reverse");
32143220
State.set(this, Res);
32153221
}
32163222

@@ -3229,12 +3235,8 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,
32293235
unsigned AS = getLoadStoreAddressSpace(&Ingredient);
32303236
InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
32313237
Instruction::Load, Ty, Alignment, AS, Ctx.CostKind);
3232-
if (!Reverse)
3233-
return Cost;
32343238

3235-
return Cost + Ctx.TTI.getShuffleCost(
3236-
TargetTransformInfo::SK_Reverse, cast<VectorType>(Ty),
3237-
cast<VectorType>(Ty), {}, Ctx.CostKind, 0);
3239+
return Cost;
32383240
}
32393241

32403242
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -3264,13 +3266,6 @@ void VPWidenStoreRecipe::execute(VPTransformState &State) {
32643266
}
32653267

32663268
Value *StoredVal = State.get(StoredVPValue);
3267-
if (isReverse()) {
3268-
// If we store to reverse consecutive memory locations, then we need
3269-
// to reverse the order of elements in the stored value.
3270-
StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
3271-
// We don't want to update the value in the map as it might be used in
3272-
// another expression. So don't call resetVectorValue(StoredVal).
3273-
}
32743269
Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter);
32753270
Instruction *NewSI = nullptr;
32763271
if (CreateScatter)
@@ -3300,8 +3295,6 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
33003295
CallInst *NewSI = nullptr;
33013296
Value *StoredVal = State.get(StoredValue);
33023297
Value *EVL = State.get(getEVL(), VPLane(0));
3303-
if (isReverse())
3304-
StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse");
33053298
Value *Mask = nullptr;
33063299
if (VPValue *VPMask = getMask()) {
33073300
Mask = State.get(VPMask);
@@ -3340,12 +3333,8 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF,
33403333
unsigned AS = getLoadStoreAddressSpace(&Ingredient);
33413334
InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
33423335
Instruction::Store, Ty, Alignment, AS, Ctx.CostKind);
3343-
if (!Reverse)
3344-
return Cost;
33453336

3346-
return Cost + Ctx.TTI.getShuffleCost(
3347-
TargetTransformInfo::SK_Reverse, cast<VectorType>(Ty),
3348-
cast<VectorType>(Ty), {}, Ctx.CostKind, 0);
3337+
return Cost;
33493338
}
33503339

33513340
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2170,6 +2170,27 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
21702170
.Default([&](VPRecipeBase *R) { return nullptr; });
21712171
}
21722172

2173+
static void convertToEVLReverse(VPlan &Plan, VPTypeAnalysis &TypeInfo,
2174+
VPValue &AllOneMask, VPValue &EVL) {
2175+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
2176+
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
2177+
for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
2178+
auto *VPI = dyn_cast<VPInstruction>(&R);
2179+
if (!VPI || VPI->getOpcode() != VPInstruction::Reverse)
2180+
continue;
2181+
2182+
SmallVector<VPValue *> Ops(VPI->operands());
2183+
Ops.append({&AllOneMask, &EVL});
2184+
auto *NewReverse = new VPWidenIntrinsicRecipe(
2185+
Intrinsic::experimental_vp_reverse, Ops,
2186+
TypeInfo.inferScalarType(VPI), VPI->getDebugLoc());
2187+
NewReverse->insertBefore(VPI);
2188+
VPI->replaceAllUsesWith(NewReverse);
2189+
VPI->eraseFromParent();
2190+
}
2191+
}
2192+
}
2193+
21732194
/// Replace recipes with their EVL variants.
21742195
static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
21752196
Type *CanonicalIVType = Plan.getCanonicalIV()->getScalarType();
@@ -2283,6 +2304,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
22832304
HeaderMask->replaceAllUsesWith(EVLMask);
22842305
ToErase.push_back(HeaderMask->getDefiningRecipe());
22852306
}
2307+
convertToEVLReverse(Plan, TypeInfo, *AllOneMask, EVL);
22862308

22872309
for (VPRecipeBase *R : reverse(ToErase)) {
22882310
SmallVector<VPValue *> PossiblyDead(R->operands());
@@ -3535,3 +3557,34 @@ void VPlanTransforms::addBranchWeightToMiddleTerminator(
35353557
MDB.createBranchWeights({1, VectorStep - 1}, /*IsExpected=*/false);
35363558
MiddleTerm->addMetadata(LLVMContext::MD_prof, BranchWeights);
35373559
}
3560+
3561+
void VPlanTransforms::adjustRecipesForReverseAccesses(VPlan &Plan) {
3562+
if (Plan.hasScalarVFOnly())
3563+
return;
3564+
3565+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
3566+
vp_depth_first_deep(Plan.getVectorLoopRegion()))) {
3567+
for (VPRecipeBase &R : *VPBB) {
3568+
auto *MemR = dyn_cast<VPWidenMemoryRecipe>(&R);
3569+
if (!MemR || !MemR->isReverse())
3570+
continue;
3571+
3572+
if (auto *L = dyn_cast<VPWidenLoadRecipe>(MemR)) {
3573+
auto *Reverse =
3574+
new VPInstruction(VPInstruction::Reverse, {L}, L->getDebugLoc());
3575+
Reverse->insertAfter(L);
3576+
L->replaceAllUsesWith(Reverse);
3577+
Reverse->setOperand(0, L);
3578+
continue;
3579+
}
3580+
3581+
if (auto *S = dyn_cast<VPWidenStoreRecipe>(MemR)) {
3582+
VPValue *StoredVal = S->getStoredValue();
3583+
auto *Reverse = new VPInstruction(VPInstruction::Reverse, {StoredVal},
3584+
S->getDebugLoc());
3585+
Reverse->insertBefore(S);
3586+
S->setOperand(1, Reverse);
3587+
}
3588+
}
3589+
}
3590+
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,20 @@ struct VPlanTransforms {
284284
static void
285285
addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF,
286286
std::optional<unsigned> VScaleForTuning);
287+
288+
/// Add reverse recipes for reverse memory accesses.
289+
/// For reverse loads, transform
290+
/// WIDEN ir<%L> = load vp<%addr>
291+
/// into
292+
/// WIDEN ir<%L> = load vp<%addr>
293+
/// EMIT vp<%RevL> = reverse ir<%L>
294+
///
295+
/// For reverse stores, transform
296+
/// WIDEN store vp<%addr>, ir<%SVal>
297+
/// into
298+
/// EMIT vp<%RevS> = reverse ir<%SVal>
299+
/// WIDEN store vp<%addr>, vp<%RevS>
300+
static void adjustRecipesForReverseAccesses(VPlan &Plan);
287301
};
288302

289303
} // namespace llvm

llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ define void @vector_reverse_mask_nxv4i1(ptr %a, ptr %cond, i64 %N) #0 {
2222
; CHECK: %[[WIDEMSKLOAD:.*]] = call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %{{.*}}, i32 8, <vscale x 4 x i1> %[[REVERSE6]], <vscale x 4 x double> poison)
2323
; CHECK: %[[REVERSE7:.*]] = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> %[[WIDEMSKLOAD]])
2424
; CHECK: %[[FADD:.*]] = fadd <vscale x 4 x double> %[[REVERSE7]]
25-
; CHECK: %[[REVERSE9:.*]] = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> %{{.*}})
2625
; CHECK: %[[REVERSE8:.*]] = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> %[[FADD]])
26+
; CHECK: %[[REVERSE9:.*]] = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> %{{.*}})
2727
; CHECK: call void @llvm.masked.store.nxv4f64.p0(<vscale x 4 x double> %[[REVERSE8]], ptr %{{.*}}, i32 8, <vscale x 4 x i1> %[[REVERSE9]]
2828

2929
entry:

llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ define void @vector_reverse_mask_v4i1(ptr noalias %a, ptr noalias %cond, i64 %N)
3737
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 -24
3838
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 -56
3939
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP3]], align 8
40-
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
4140
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x double>, ptr [[TMP4]], align 8
41+
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
4242
; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x double> [[WIDE_LOAD1]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
4343
; CHECK-NEXT: [[TMP5:%.*]] = fcmp une <4 x double> [[REVERSE]], zeroinitializer
4444
; CHECK-NEXT: [[TMP6:%.*]] = fcmp une <4 x double> [[REVERSE2]], zeroinitializer

llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -339,20 +339,20 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n)
339339
; VF-TWO-CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -28
340340
; VF-TWO-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP70]], i32 -3
341341
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP57]], align 4
342-
; VF-TWO-CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
343342
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP59]], align 4
344-
; VF-TWO-CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x float> [[WIDE_LOAD2]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
345343
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP61]], align 4
346-
; VF-TWO-CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x float> [[WIDE_LOAD4]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
347344
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP63]], align 4
348-
; VF-TWO-CHECK-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x float> [[WIDE_LOAD6]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
349345
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP65]], align 4
350-
; VF-TWO-CHECK-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x float> [[WIDE_LOAD8]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
351346
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP67]], align 4
352-
; VF-TWO-CHECK-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x float> [[WIDE_LOAD10]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
353347
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, ptr [[TMP69]], align 4
354-
; VF-TWO-CHECK-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x float> [[WIDE_LOAD12]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
355348
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, ptr [[TMP71]], align 4
349+
; VF-TWO-CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
350+
; VF-TWO-CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x float> [[WIDE_LOAD2]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
351+
; VF-TWO-CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x float> [[WIDE_LOAD4]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
352+
; VF-TWO-CHECK-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x float> [[WIDE_LOAD6]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
353+
; VF-TWO-CHECK-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x float> [[WIDE_LOAD8]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
354+
; VF-TWO-CHECK-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x float> [[WIDE_LOAD10]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
355+
; VF-TWO-CHECK-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x float> [[WIDE_LOAD12]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
356356
; VF-TWO-CHECK-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x float> [[WIDE_LOAD14]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
357357
; VF-TWO-CHECK-NEXT: [[TMP72:%.*]] = fadd fast <4 x float> [[REVERSE]], splat (float 1.000000e+00)
358358
; VF-TWO-CHECK-NEXT: [[TMP73:%.*]] = fadd fast <4 x float> [[REVERSE3]], splat (float 1.000000e+00)
@@ -492,20 +492,20 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n)
492492
; VF-FOUR-CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -28
493493
; VF-FOUR-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP70]], i32 -3
494494
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP57]], align 4
495-
; VF-FOUR-CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
496495
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP59]], align 4
497-
; VF-FOUR-CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x float> [[WIDE_LOAD2]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
498496
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP61]], align 4
499-
; VF-FOUR-CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x float> [[WIDE_LOAD4]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
500497
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP63]], align 4
501-
; VF-FOUR-CHECK-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x float> [[WIDE_LOAD6]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
502498
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP65]], align 4
503-
; VF-FOUR-CHECK-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x float> [[WIDE_LOAD8]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
504499
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP67]], align 4
505-
; VF-FOUR-CHECK-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x float> [[WIDE_LOAD10]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
506500
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, ptr [[TMP69]], align 4
507-
; VF-FOUR-CHECK-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x float> [[WIDE_LOAD12]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
508501
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, ptr [[TMP71]], align 4
502+
; VF-FOUR-CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
503+
; VF-FOUR-CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x float> [[WIDE_LOAD2]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
504+
; VF-FOUR-CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x float> [[WIDE_LOAD4]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
505+
; VF-FOUR-CHECK-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x float> [[WIDE_LOAD6]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
506+
; VF-FOUR-CHECK-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x float> [[WIDE_LOAD8]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
507+
; VF-FOUR-CHECK-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x float> [[WIDE_LOAD10]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
508+
; VF-FOUR-CHECK-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x float> [[WIDE_LOAD12]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
509509
; VF-FOUR-CHECK-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x float> [[WIDE_LOAD14]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
510510
; VF-FOUR-CHECK-NEXT: [[TMP72:%.*]] = fadd fast <4 x float> [[REVERSE]], splat (float 1.000000e+00)
511511
; VF-FOUR-CHECK-NEXT: [[TMP73:%.*]] = fadd fast <4 x float> [[REVERSE3]], splat (float 1.000000e+00)

0 commit comments

Comments
 (0)