Skip to content

Commit 771fefc

Browse files
committed
[VPlan] Introduce CSE pass
1 parent a95d0cd commit 771fefc

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+491
-385
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7308,6 +7308,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
73087308
VPlanTransforms::narrowInterleaveGroups(
73097309
BestVPlan, BestVF,
73107310
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
7311+
VPlanTransforms::cse(BestVPlan, *Legal->getWidestInductionType());
73117312
VPlanTransforms::removeDeadRecipes(BestVPlan);
73127313

73137314
VPlanTransforms::convertToConcreteRecipes(BestVPlan,

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,38 @@ void VPDef::dump() const {
122122
}
123123
#endif
124124

125+
bool VPValue::isIdenticalTo(const VPValue *Other) const {
126+
if (getVPValueID() != Other->getVPValueID() ||
127+
hasDefiningRecipe() != Other->hasDefiningRecipe() ||
128+
!getUnderlyingValue() != !Other->getUnderlyingValue())
129+
return false;
130+
Instruction *I = dyn_cast_or_null<Instruction>(getUnderlyingValue());
131+
Instruction *OtherI =
132+
dyn_cast_or_null<Instruction>(Other->getUnderlyingValue());
133+
if (I && OtherI)
134+
return I->getOpcode() == OtherI->getOpcode() &&
135+
equal(I->operand_values(), OtherI->operand_values());
136+
if (hasDefiningRecipe()) {
137+
const VPRecipeBase *DefL = getDefiningRecipe();
138+
const VPRecipeBase *DefR = Other->getDefiningRecipe();
139+
return vputils::getOpcode(*DefL) == vputils::getOpcode(*DefR) &&
140+
equal(DefL->operands(), DefR->operands());
141+
}
142+
return getUnderlyingValue() == Other->getUnderlyingValue();
143+
}
144+
145+
hash_code llvm::hash_value(const VPValue &V) {
146+
if (Instruction *I = dyn_cast_or_null<Instruction>(V.getUnderlyingValue()))
147+
return hash_combine(I->getOpcode(),
148+
hash_combine_range(I->operand_values()));
149+
if (V.hasDefiningRecipe()) {
150+
const VPRecipeBase *Def = V.getDefiningRecipe();
151+
return hash_combine(vputils::getOpcode(*Def),
152+
hash_combine_range(Def->operands()));
153+
}
154+
return hash_combine(V.getVPValueID(), V.getUnderlyingValue());
155+
}
156+
125157
VPRecipeBase *VPValue::getDefiningRecipe() {
126158
return cast_or_null<VPRecipeBase>(Def);
127159
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1755,6 +1755,50 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
17551755
}
17561756
}
17571757

1758+
/// Hash the underlying data of a VPSingleDefRecipe pointer, instead of hashing
1759+
/// the pointer itself.
1760+
namespace {
1761+
struct CSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
1762+
static unsigned getHashValue(const VPSingleDefRecipe *R) {
1763+
return hash_value(*R);
1764+
}
1765+
1766+
static bool isEqual(const VPSingleDefRecipe *LHS,
1767+
const VPSingleDefRecipe *RHS) {
1768+
if (LHS == getEmptyKey() || RHS == getEmptyKey() ||
1769+
LHS == getTombstoneKey() || RHS == getTombstoneKey())
1770+
return LHS == RHS;
1771+
return LHS->isIdenticalTo(RHS);
1772+
}
1773+
};
1774+
} // end anonymous namespace
1775+
1776+
/// Perform a common-subexpression-elimination of VPSingleDefRecipes on the \p
1777+
/// Plan.
1778+
void VPlanTransforms::cse(VPlan &Plan, Type &CanonicalIVTy) {
1779+
DenseMap<VPSingleDefRecipe *, VPSingleDefRecipe *, CSEDenseMapInfo> CSEMap;
1780+
VPTypeAnalysis TypeInfo(&CanonicalIVTy);
1781+
// There is existing logic to sink instructions into replicate regions, and
1782+
// we'd be undoing that work if we went through replicate regions. Hence,
1783+
// don't CSE in replicate regions.
1784+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
1785+
vp_depth_first_shallow(Plan.getEntry()))) {
1786+
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
1787+
auto *Def = dyn_cast<VPSingleDefRecipe>(&R);
1788+
if (!Def)
1789+
continue;
1790+
if (VPSingleDefRecipe *V = CSEMap.lookup(Def)) {
1791+
if (TypeInfo.inferScalarType(Def) != TypeInfo.inferScalarType(V))
1792+
continue;
1793+
Def->replaceAllUsesWith(V);
1794+
Def->eraseFromParent();
1795+
continue;
1796+
}
1797+
CSEMap[Def] = Def;
1798+
}
1799+
}
1800+
}
1801+
17581802
/// Move loop-invariant recipes out of the vector loop region in \p Plan.
17591803
static void licm(VPlan &Plan) {
17601804
VPBasicBlock *Preheader = Plan.getVectorPreheader();

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,10 @@ struct VPlanTransforms {
240240
/// removing dead edges to their successors.
241241
static void removeBranchOnConst(VPlan &Plan);
242242

243+
/// Perform common-subexpression-elimination, which is best done after the \p
244+
/// Plan is executed.
245+
static void cse(VPlan &Plan, Type &CanonicalIVType);
246+
243247
/// If there's a single exit block, optimize its phi recipes that use exiting
244248
/// IV values by feeding them precomputed end values instead, possibly taken
245249
/// one step backwards.

llvm/lib/Transforms/Vectorize/VPlanUtils.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#define LLVM_TRANSFORMS_VECTORIZE_VPLANUTILS_H
1111

1212
#include "VPlan.h"
13+
#include "llvm/ADT/TypeSwitch.h"
1314

1415
namespace llvm {
1516
class ScalarEvolution;
@@ -37,6 +38,15 @@ VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
3738
/// SCEV expression could be constructed.
3839
const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE);
3940

41+
/// Get any instruction opcode data embedded in recipe \p R.
42+
inline std::optional<unsigned> getOpcode(const VPRecipeBase &R) {
43+
return TypeSwitch<const VPRecipeBase *, std::optional<unsigned>>(&R)
44+
.Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
45+
VPWidenSelectRecipe, VPHistogramRecipe, VPPartialReductionRecipe,
46+
VPReplicateRecipe>([](auto *I) { return I->getOpcode(); })
47+
.Default([](auto *) { return std::nullopt; });
48+
}
49+
4050
/// Returns true if \p VPV is a single scalar, either because it produces the
4151
/// same value for all lanes or only has its first lane used.
4252
inline bool isSingleScalar(const VPValue *VPV) {

llvm/lib/Transforms/Vectorize/VPlanValue.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,8 +185,15 @@ class LLVM_ABI_FOR_TEST VPValue {
185185
assert(!UnderlyingVal && "Underlying Value is already set.");
186186
UnderlyingVal = Val;
187187
}
188+
189+
// Equality of data.
190+
bool isIdenticalTo(const VPValue *Other) const;
188191
};
189192

193+
// Hash method so VPValue can be de-duplicated in certain
194+
// contexts.
195+
hash_code hash_value(const VPValue &Arg);
196+
190197
typedef DenseMap<Value *, VPValue *> Value2VPValueTy;
191198
typedef DenseMap<VPValue *, Value *> VPValue2ValueTy;
192199

llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -330,11 +330,10 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) {
330330
; CHECK-NEXT: [[CMP_N11:%.*]] = icmp eq i64 [[N]], [[IND_END]]
331331
; CHECK-NEXT: br i1 [[CMP_N11]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
332332
; CHECK: vec.epilog.scalar.ph:
333-
; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
334-
; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
333+
; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
335334
; CHECK-NEXT: br label [[LOOP:%.*]]
336335
; CHECK: loop:
337-
; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL5]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ]
336+
; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ]
338337
; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_1_NEXT]], [[LOOP]] ]
339338
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_1]]
340339
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[IV_2]], 10

llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll

Lines changed: 71 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
12
; REQUIRES: asserts
23

34
; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-ios %s -S -debug -disable-output 2>&1 | FileCheck --check-prefix=CM %s
@@ -22,23 +23,42 @@
2223

2324
; Check that the extractvalue operands are actually free in vector code.
2425

25-
; FORCED: [[E1:%.+]] = extractvalue { i64, i64 } %sv, 0
26-
; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x i64> poison, i64 [[E1]], i64 0
27-
; FORCED-NEXT: %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer
28-
; FORCED-NEXT: [[E2:%.+]] = extractvalue { i64, i64 } %sv, 1
29-
; FORCED-NEXT: %broadcast.splatinsert1 = insertelement <2 x i64> poison, i64 [[E2]], i64 0
30-
; FORCED-NEXT: %broadcast.splat2 = shufflevector <2 x i64> %broadcast.splatinsert1, <2 x i64> poison, <2 x i32> zeroinitializer
31-
; FORCED-NEXT: [[ADD:%.+]] = add <2 x i64> %broadcast.splat, %broadcast.splat2
32-
33-
; FORCED-LABEL: vector.body: ; preds = %vector.body, %vector.ph
34-
; FORCED-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
35-
; FORCED-NEXT: [[GEP:%.+]] = getelementptr i64, ptr %dst, i32 %index
36-
; FORCED-NEXT: store <2 x i64> [[ADD]], ptr [[GEP]], align 4
37-
; FORCED-NEXT: %index.next = add nuw i32 %index, 2
38-
; FORCED-NEXT: [[C:%.+]] = icmp eq i32 %index.next, 1000
39-
; FORCED-NEXT: br i1 [[C]], label %middle.block, label %vector.body
40-
4126
define void @test1(ptr %dst, {i64, i64} %sv) {
27+
; FORCED-LABEL: define void @test1(
28+
; FORCED-SAME: ptr [[DST:%.*]], { i64, i64 } [[SV:%.*]]) {
29+
; FORCED-NEXT: [[ENTRY:.*]]:
30+
; FORCED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
31+
; FORCED: [[VECTOR_PH]]:
32+
; FORCED-NEXT: [[TMP0:%.*]] = extractvalue { i64, i64 } [[SV]], 0
33+
; FORCED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
34+
; FORCED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
35+
; FORCED-NEXT: [[TMP1:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT]]
36+
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
37+
; FORCED: [[VECTOR_BODY]]:
38+
; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
39+
; FORCED-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[DST]], i32 [[INDEX]]
40+
; FORCED-NEXT: store <2 x i64> [[TMP1]], ptr [[TMP2]], align 4
41+
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
42+
; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
43+
; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
44+
; FORCED: [[MIDDLE_BLOCK]]:
45+
; FORCED-NEXT: br label %[[EXIT:.*]]
46+
; FORCED: [[SCALAR_PH]]:
47+
; FORCED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
48+
; FORCED-NEXT: br label %[[LOOP_BODY:.*]]
49+
; FORCED: [[LOOP_BODY]]:
50+
; FORCED-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_BODY]] ]
51+
; FORCED-NEXT: [[A:%.*]] = extractvalue { i64, i64 } [[SV]], 0
52+
; FORCED-NEXT: [[B:%.*]] = extractvalue { i64, i64 } [[SV]], 1
53+
; FORCED-NEXT: [[ADDR:%.*]] = getelementptr i64, ptr [[DST]], i32 [[IV]]
54+
; FORCED-NEXT: [[ADD:%.*]] = add i64 [[A]], [[B]]
55+
; FORCED-NEXT: store i64 [[ADD]], ptr [[ADDR]], align 4
56+
; FORCED-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
57+
; FORCED-NEXT: [[COND:%.*]] = icmp ne i32 [[IV_NEXT]], 1000
58+
; FORCED-NEXT: br i1 [[COND]], label %[[LOOP_BODY]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
59+
; FORCED: [[EXIT]]:
60+
; FORCED-NEXT: ret void
61+
;
4262
entry:
4363
br label %loop.body
4464

@@ -70,25 +90,42 @@ declare float @powf(float, float) readnone nounwind
7090

7191
; CM: LV: Scalar loop costs: 14.
7292

73-
; FORCED-LABEL: define void @test_getVectorCallCost
74-
75-
; FORCED: [[E1:%.+]] = extractvalue { float, float } %sv, 0
76-
; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x float> poison, float [[E1]], i64 0
77-
; FORCED-NEXT: %broadcast.splat = shufflevector <2 x float> %broadcast.splatinsert, <2 x float> poison, <2 x i32> zeroinitializer
78-
; FORCED-NEXT: [[E2:%.+]] = extractvalue { float, float } %sv, 1
79-
; FORCED-NEXT: %broadcast.splatinsert1 = insertelement <2 x float> poison, float [[E2]], i64 0
80-
; FORCED-NEXT: %broadcast.splat2 = shufflevector <2 x float> %broadcast.splatinsert1, <2 x float> poison, <2 x i32> zeroinitializer
81-
82-
; FORCED-LABEL: vector.body: ; preds = %vector.body, %vector.ph
83-
; FORCED-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
84-
; FORCED-NEXT: [[GEP1:%.+]] = getelementptr float, ptr %dst, i32 %index
85-
; FORCED-NEXT: [[POW:%.+]] = call <2 x float> @llvm.pow.v2f32(<2 x float> %broadcast.splat, <2 x float> %broadcast.splat2)
86-
; FORCED-NEXT: store <2 x float> [[POW]], ptr [[GEP1]], align 4
87-
; FORCED-NEXT: %index.next = add nuw i32 %index, 2
88-
; FORCED-NEXT: [[C:%.+]] = icmp eq i32 %index.next, 1000
89-
; FORCED-NEXT: br i1 [[C]], label %middle.block, label %vector.body
90-
9193
define void @test_getVectorCallCost(ptr %dst, {float, float} %sv) {
94+
; FORCED-LABEL: define void @test_getVectorCallCost(
95+
; FORCED-SAME: ptr [[DST:%.*]], { float, float } [[SV:%.*]]) {
96+
; FORCED-NEXT: [[ENTRY:.*]]:
97+
; FORCED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
98+
; FORCED: [[VECTOR_PH]]:
99+
; FORCED-NEXT: [[TMP0:%.*]] = extractvalue { float, float } [[SV]], 0
100+
; FORCED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0
101+
; FORCED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
102+
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
103+
; FORCED: [[VECTOR_BODY]]:
104+
; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
105+
; FORCED-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[DST]], i32 [[INDEX]]
106+
; FORCED-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT]])
107+
; FORCED-NEXT: store <2 x float> [[TMP2]], ptr [[TMP1]], align 4
108+
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
109+
; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
110+
; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
111+
; FORCED: [[MIDDLE_BLOCK]]:
112+
; FORCED-NEXT: br label %[[EXIT:.*]]
113+
; FORCED: [[SCALAR_PH]]:
114+
; FORCED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
115+
; FORCED-NEXT: br label %[[LOOP_BODY:.*]]
116+
; FORCED: [[LOOP_BODY]]:
117+
; FORCED-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_BODY]] ]
118+
; FORCED-NEXT: [[A:%.*]] = extractvalue { float, float } [[SV]], 0
119+
; FORCED-NEXT: [[B:%.*]] = extractvalue { float, float } [[SV]], 1
120+
; FORCED-NEXT: [[ADDR:%.*]] = getelementptr float, ptr [[DST]], i32 [[IV]]
121+
; FORCED-NEXT: [[P:%.*]] = call float @powf(float [[A]], float [[B]])
122+
; FORCED-NEXT: store float [[P]], ptr [[ADDR]], align 4
123+
; FORCED-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
124+
; FORCED-NEXT: [[COND:%.*]] = icmp ne i32 [[IV_NEXT]], 1000
125+
; FORCED-NEXT: br i1 [[COND]], label %[[LOOP_BODY]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
126+
; FORCED: [[EXIT]]:
127+
; FORCED-NEXT: ret void
128+
;
92129
entry:
93130
br label %loop.body
94131

llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,11 @@ define double @test_reduction_costs() {
2323
; CHECK: [[SCALAR_PH]]:
2424
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
2525
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ]
26-
; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ]
2726
; CHECK-NEXT: br label %[[LOOP_1:.*]]
2827
; CHECK: [[LOOP_1]]:
2928
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_1]] ]
3029
; CHECK-NEXT: [[R_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[R_1_NEXT:%.*]], %[[LOOP_1]] ]
31-
; CHECK-NEXT: [[R_2:%.*]] = phi double [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[R_2_NEXT:%.*]], %[[LOOP_1]] ]
30+
; CHECK-NEXT: [[R_2:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[R_2_NEXT:%.*]], %[[LOOP_1]] ]
3231
; CHECK-NEXT: [[R_1_NEXT]] = fadd double [[R_1]], 3.000000e+00
3332
; CHECK-NEXT: [[R_2_NEXT]] = fadd double [[R_2]], 9.000000e+00
3433
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,6 @@ define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) {
294294
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
295295
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4
296296
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]]
297-
; CHECK-NEXT: [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32
298297
; CHECK-NEXT: [[IND_END3:%.*]] = trunc i64 [[N_VEC]] to i32
299298
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
300299
; CHECK: vector.body:
@@ -321,7 +320,7 @@ define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) {
321320
; CHECK: scalar.ph:
322321
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
323322
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SRC]], [[ENTRY]] ]
324-
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
323+
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
325324
; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
326325
; CHECK-NEXT: br label [[LOOP:%.*]]
327326
; CHECK: loop:

0 commit comments

Comments
 (0)