Skip to content

Commit 428b9c4

Browse files
committed
!fixup address comments, update tests.
1 parent 2d389dc commit 428b9c4

File tree

10 files changed

+170
-67
lines changed

10 files changed

+170
-67
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1837,7 +1837,9 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
18371837
getGEPNoWrapFlags(), getDebugLoc());
18381838
}
18391839

1840-
bool isPart0() { return getUnrollPart(*this) == 0; }
1840+
/// Return true if this VPVectorPointerRecipe corresponds to part 0. Note that
1841+
/// this is only accurate after the VPlan has been unrolled.
1842+
bool isFirstPart() const { return getUnrollPart(*this) == 0; }
18411843

18421844
/// Return the cost of this VPHeaderPHIRecipe.
18431845
InstructionCost computeCost(ElementCount VF,

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,12 +1015,6 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
10151015
if (Op->isLiveIn())
10161016
PredPHI->replaceAllUsesWith(Op);
10171017
}
1018-
if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(&R)) {
1019-
if (VecPtr->getParent()->getPlan()->isUnrolled() && VecPtr->isPart0()) {
1020-
VecPtr->replaceAllUsesWith(VecPtr->getOperand(0));
1021-
return;
1022-
}
1023-
}
10241018

10251019
VPValue *A;
10261020
if (match(Def, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) {
@@ -1178,6 +1172,14 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
11781172
if (!Plan->isUnrolled())
11791173
return;
11801174

1175+
// VPVectorPointer for part 0 can be replaced by their start pointer.
1176+
if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(&R)) {
1177+
if (VecPtr->isFirstPart()) {
1178+
VecPtr->replaceAllUsesWith(VecPtr->getOperand(0));
1179+
return;
1180+
}
1181+
}
1182+
11811183
// VPScalarIVSteps for part 0 can be replaced by their start value, if only
11821184
// the first lane is demanded.
11831185
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(Def)) {

llvm/test/Transforms/LoopLoadElim/versioning-scev-invalidation.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,7 @@ define void @g(ptr %dst.1, ptr %start, i64 %N) {
6565
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6666
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
6767
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[LCSSA_PTR_IV_1]], i64 [[OFFSET_IDX]]
68-
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[NEXT_GEP]], i32 0
69-
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP5]], align 8
68+
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[NEXT_GEP]], align 8
7069
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
7170
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
7271
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -84,7 +83,7 @@ define void @g(ptr %dst.1, ptr %start, i64 %N) {
8483
; CHECK-NEXT: [[PTR_IV_2_NEXT]] = getelementptr inbounds double, ptr [[PTR_IV_2]], i64 1
8584
; CHECK-NEXT: [[IV_2_NEXT]] = add nuw nsw i64 [[IV_2]], 1
8685
; CHECK-NEXT: [[EXITCOND_1_NOT:%.*]] = icmp eq i64 [[IV_2_NEXT]], [[N]]
87-
; CHECK-NEXT: br i1 [[EXITCOND_1_NOT]], label [[EXIT_LOOPEXIT:%.*]], label [[LOOP_2]], !llvm.loop [[LOOP2:![0-9]+]]
86+
; CHECK-NEXT: br i1 [[EXITCOND_1_NOT]], label [[EXIT_LOOPEXIT:%.*]], label [[LOOP_2]], !llvm.loop [[LOOP3:![0-9]+]]
8887
; CHECK: exit.loopexit:
8988
; CHECK-NEXT: br label [[EXIT]]
9089
; CHECK: exit:

llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3
3232
; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP2]], <16 x i32> [[TMP5]])
3333
; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP3]], <16 x i32> [[TMP6]])
3434
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]]
35-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
36-
; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP9]], align 1
35+
; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP8]], align 1
3736
; CHECK-NEXT: [[TMP10:%.*]] = zext <16 x i32> [[TMP7]] to <16 x i64>
3837
; CHECK-NEXT: [[TMP11]] = or <16 x i64> [[VEC_PHI]], [[TMP10]]
3938
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
@@ -82,8 +81,7 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3
8281
; CHECK-NEXT: [[TMP30:%.*]] = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> [[TMP23]], <vscale x 2 x i32> [[TMP29]])
8382
; CHECK-NEXT: [[TMP31:%.*]] = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> [[TMP24]], <vscale x 2 x i32> [[TMP30]])
8483
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX7]]
85-
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i32 0
86-
; CHECK-NEXT: store <vscale x 2 x i8> zeroinitializer, ptr [[TMP33]], align 1
84+
; CHECK-NEXT: store <vscale x 2 x i8> zeroinitializer, ptr [[TMP32]], align 1
8785
; CHECK-NEXT: [[TMP34:%.*]] = zext <vscale x 2 x i32> [[TMP31]] to <vscale x 2 x i64>
8886
; CHECK-NEXT: [[TMP35]] = or <vscale x 2 x i64> [[VEC_PHI8]], [[TMP34]]
8987
; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX7]], [[TMP21]]
@@ -172,8 +170,7 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no
172170
; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP2]], <16 x i32> [[TMP5]])
173171
; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP3]], <16 x i32> [[TMP6]])
174172
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]]
175-
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP26]], i32 0
176-
; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP27]], align 1
173+
; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP26]], align 1
177174
; CHECK-NEXT: [[TMP10:%.*]] = zext <16 x i32> [[TMP7]] to <16 x i64>
178175
; CHECK-NEXT: [[TMP11]] = or <16 x i64> [[VEC_PHI]], [[TMP10]]
179176
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
@@ -222,8 +219,7 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no
222219
; CHECK-NEXT: [[TMP30:%.*]] = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> [[TMP23]], <vscale x 2 x i32> [[TMP29]])
223220
; CHECK-NEXT: [[TMP31:%.*]] = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> [[TMP24]], <vscale x 2 x i32> [[TMP30]])
224221
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX7]]
225-
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i32 0
226-
; CHECK-NEXT: store <vscale x 2 x i8> zeroinitializer, ptr [[TMP33]], align 1
222+
; CHECK-NEXT: store <vscale x 2 x i8> zeroinitializer, ptr [[TMP32]], align 1
227223
; CHECK-NEXT: [[TMP34:%.*]] = zext <vscale x 2 x i32> [[TMP31]] to <vscale x 2 x i64>
228224
; CHECK-NEXT: [[TMP35]] = or <vscale x 2 x i64> [[VEC_PHI8]], [[TMP34]]
229225
; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX7]], [[TMP21]]

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll

Lines changed: 2 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -27,47 +27,8 @@ define i32 @dotp(ptr %a, ptr %b) #0 {
2727
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
2828
; CHECK: middle.block:
2929
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]])
30-
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
31-
; CHECK: vec.epilog.vector.body:
32-
; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
33-
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 4 x i32> [ [[TMP18]], [[VEC_EPILOG_PH]] ], [ [[TMP27:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
34-
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX2]]
35-
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 4 x i8>, ptr [[TMP20]], align 1
36-
; CHECK-NEXT: [[TMP22:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD4]] to <vscale x 4 x i32>
37-
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX2]]
38-
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 4 x i8>, ptr [[TMP23]], align 1
39-
; CHECK-NEXT: [[TMP25:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD5]] to <vscale x 4 x i32>
40-
; CHECK-NEXT: [[TMP26:%.*]] = mul <vscale x 4 x i32> [[TMP25]], [[TMP22]]
41-
; CHECK-NEXT: [[TMP27]] = add <vscale x 4 x i32> [[TMP26]], [[VEC_PHI3]]
42-
; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX2]], [[TMP17]]
43-
; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC]]
44-
; CHECK-NEXT: br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
45-
; CHECK: vec.epilog.middle.block:
46-
; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP27]])
47-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
48-
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
49-
; CHECK: vec.epilog.scalar.ph:
50-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 1024, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
51-
; CHECK-NEXT: [[BC_MERGE_RDX7:%.*]] = phi i32 [ [[TMP29]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP11]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
52-
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
53-
; CHECK: for.body:
54-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
55-
; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX7]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
56-
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
57-
; CHECK-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
58-
; CHECK-NEXT: [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32
59-
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]]
60-
; CHECK-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
61-
; CHECK-NEXT: [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32
62-
; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]]
63-
; CHECK-NEXT: [[ADD]] = add i32 [[MUL]], [[ACCUM]]
64-
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
65-
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
66-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
67-
; CHECK: for.exit:
68-
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ [[TMP29]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
69-
; CHECK-NEXT: ret i32 [[ADD_LCSSA]]
70-
>>>>>>> 81d97e221e6e ([VPlan] Remove VPVectorPointer for part 0 after unrolling.)
30+
; CHECK-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[VEC_EPILOG_PH]]
31+
; CHECK: scalar.ph:
7132
;
7233
entry:
7334
br label %for.body

llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,12 @@ define void @trip8_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
133133
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
134134
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
135135
; CHECK: vector.body:
136+
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 8, i32 4, i1 true)
137+
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP9:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
138+
; CHECK-NEXT: [[TMP6:%.*]] = shl <vscale x 4 x i8> [[VP_OP_LOAD]], splat (i8 1)
139+
; CHECK-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP12:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
140+
; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 4 x i8> [[TMP6]], [[VP_OP_LOAD1]]
141+
; CHECK-NEXT: call void @llvm.vp.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP7]], ptr align 1 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
136142
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
137143
; CHECK: middle.block:
138144
; CHECK-NEXT: br label [[FOR_END:%.*]]
@@ -360,8 +366,7 @@ define i8 @mul_non_pow_2_low_trip_count(ptr noalias %a) {
360366
; CHECK-NEXT: [[VEC_IV:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
361367
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = icmp ule <16 x i64> [[VEC_IV]], splat (i64 9)
362368
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[INDEX]]
363-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0
364-
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP1]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
369+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP0]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
365370
; CHECK-NEXT: [[TMP2]] = mul <16 x i8> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
366371
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> [[TMP2]], <16 x i8> [[VEC_PHI]]
367372
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16

llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,15 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) {
2424
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2525
; CHECK: vector.body:
2626
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
27+
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
28+
; CHECK-NEXT: [[AVL:%.*]] = sub i64 1025, [[EVL_BASED_IV]]
29+
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
30+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
31+
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP10]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
32+
; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[VP_OP_LOAD]], [[BROADCAST_SPLAT]]
33+
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP7]], ptr align 8 [[TMP10]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
34+
; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP9]] to i64
35+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[EVL_BASED_IV]]
2736
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
2837
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2938
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -81,6 +90,11 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
8190
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
8291
; CHECK: vector.body:
8392
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
93+
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
94+
; CHECK-NEXT: [[AVL:%.*]] = sub i64 1025, [[EVL_BASED_IV]]
95+
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
96+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[EVL_BASED_IV]]
97+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP7]])
8498
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 2 x i64> [[WIDE_MASKED_LOAD]]
8599
; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x ptr> align 8 [[TMP10]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP7]])
86100
; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP7]] to i64
@@ -140,6 +154,10 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64
140154
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
141155
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
142156
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
157+
; CHECK-NEXT: [[AVL:%.*]] = sub i64 1025, [[EVL_BASED_IV]]
158+
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
159+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[EVL_BASED_IV]]
160+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP7]])
143161
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 2 x i64> [[WIDE_MASKED_LOAD]]
144162
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i64> @llvm.vp.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> align 8 [[TMP10]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP7]])
145163
; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 2 x i64> [[VEC_PHI]], [[WIDE_MASKED_GATHER]]
@@ -208,6 +226,13 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) {
208226
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
209227
; CHECK: vector.body:
210228
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
229+
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
230+
; CHECK-NEXT: [[AVL:%.*]] = sub i64 1025, [[EVL_BASED_IV]]
231+
; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
232+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
233+
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr align 8 [[TMP9]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP8]])
234+
; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP8]] to i64
235+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[EVL_BASED_IV]]
211236
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
212237
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
213238
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
@@ -263,6 +288,10 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
263288
; CHECK-NEXT: [[AVL:%.*]] = sub i64 1025, [[EVL_BASED_IV]]
264289
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
265290
; CHECK-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8
291+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
292+
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP7]])
293+
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
294+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]]
266295
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
267296
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
268297
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
@@ -349,6 +378,15 @@ define void @vector_add_trip1024(ptr noalias nocapture %a, i64 %v, i64 %n) {
349378
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
350379
; CHECK: vector.body:
351380
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
381+
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
382+
; CHECK-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
383+
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
384+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
385+
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP10]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
386+
; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[VP_OP_LOAD]], [[BROADCAST_SPLAT]]
387+
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP7]], ptr align 8 [[TMP10]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
388+
; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP9]] to i64
389+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[EVL_BASED_IV]]
352390
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
353391
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
354392
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]

0 commit comments

Comments
 (0)