|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph\:" --version 5 |
1 | 2 | ; RUN: opt -passes=loop-vectorize -S < %s | FileCheck %s
|
2 | 3 |
|
3 | 4 | target triple = "aarch64-unknown-linux-gnu"
|
4 | 5 |
|
5 | 6 | define void @trip7_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 {
|
6 |
| -; CHECK-LABEL: @trip7_i64( |
7 |
| -; CHECK: = call i64 @llvm.vscale.i64() |
8 |
| -; CHECK-NEXT: = mul nuw i64 |
9 |
| -; CHECK: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64() |
10 |
| -; CHECK-NEXT: [[VF:%.*]] = mul nuw i64 [[VSCALE]], 2 |
11 |
| -; CHECK: vector.body: |
12 |
| -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] |
13 |
| -; CHECK: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ {{%.*}}, %vector.ph ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %vector.body ] |
14 |
| -; CHECK: {{%.*}} = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr {{%.*}}, i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) |
15 |
| -; CHECK: {{%.*}} = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr {{%.*}}, i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) |
16 |
| -; CHECK: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> {{%.*}}, ptr {{%.*}}, i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) |
17 |
| -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[VF]] |
| 7 | +; CHECK-LABEL: define void @trip7_i64( |
| 8 | +; CHECK-SAME: ptr noalias noundef captures(none) [[DST:%.*]], ptr noalias noundef readonly captures(none) [[SRC:%.*]]) #[[ATTR0:[0-9]+]] { |
| 9 | +; CHECK-NEXT: [[ENTRY:.*:]] |
| 10 | +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| 11 | +; CHECK: [[VECTOR_PH]]: |
| 12 | +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() |
| 13 | +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 |
| 14 | +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 |
| 15 | +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 7, [[TMP2]] |
| 16 | +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] |
| 17 | +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] |
| 18 | +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() |
| 19 | +; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 |
| 20 | +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 7) |
| 21 | +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| 22 | +; CHECK: [[VECTOR_BODY]]: |
| 23 | +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| 24 | +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| 25 | +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]] |
| 26 | +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) |
| 27 | +; CHECK-NEXT: [[TMP6:%.*]] = shl nsw <vscale x 2 x i64> [[WIDE_MASKED_LOAD]], splat (i64 1) |
| 28 | +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]] |
| 29 | +; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) |
| 30 | +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <vscale x 2 x i64> [[WIDE_MASKED_LOAD1]], [[TMP6]] |
| 31 | +; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP8]], ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) |
| 32 | +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] |
18 | 33 | ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 7)
|
19 |
| -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NOT:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) |
20 |
| -; CHECK-NEXT: [[COND:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NOT]], i32 0 |
21 |
| -; CHECK-NEXT: br i1 [[COND]], label %middle.block, label %vector.body |
| 34 | +; CHECK-NEXT: [[TMP9:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) |
| 35 | +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <vscale x 2 x i1> [[TMP9]], i32 0 |
| 36 | +; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] |
| 37 | +; CHECK: [[MIDDLE_BLOCK]]: |
| 38 | +; CHECK-NEXT: br [[EXIT:label %.*]] |
| 39 | +; CHECK: [[SCALAR_PH]]: |
22 | 40 | ;
|
23 | 41 | entry:
|
24 |
| - br label %for.body |
| 42 | + br label %loop |
25 | 43 |
|
26 |
| -for.body: ; preds = %entry, %for.body |
27 |
| - %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ] |
28 |
| - %arrayidx = getelementptr inbounds i64, ptr %src, i64 %i.06 |
29 |
| - %0 = load i64, ptr %arrayidx, align 8 |
| 44 | +loop: |
| 45 | + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| 46 | + %gep.src = getelementptr inbounds i64, ptr %src, i64 %iv |
| 47 | + %0 = load i64, ptr %gep.src, align 8 |
30 | 48 | %mul = shl nsw i64 %0, 1
|
31 |
| - %arrayidx1 = getelementptr inbounds i64, ptr %dst, i64 %i.06 |
32 |
| - %1 = load i64, ptr %arrayidx1, align 8 |
| 49 | + %gep.dst = getelementptr inbounds i64, ptr %dst, i64 %iv |
| 50 | + %1 = load i64, ptr %gep.dst, align 8 |
33 | 51 | %add = add nsw i64 %1, %mul
|
34 |
| - store i64 %add, ptr %arrayidx1, align 8 |
35 |
| - %inc = add nuw nsw i64 %i.06, 1 |
36 |
| - %exitcond.not = icmp eq i64 %inc, 7 |
37 |
| - br i1 %exitcond.not, label %for.end, label %for.body |
| 52 | + store i64 %add, ptr %gep.dst, align 8 |
| 53 | + %iv.next = add nuw nsw i64 %iv, 1 |
| 54 | + %ec = icmp eq i64 %iv.next, 7 |
| 55 | + br i1 %ec, label %exit, label %loop |
38 | 56 |
|
39 |
| -for.end: ; preds = %for.body |
| 57 | +exit: |
40 | 58 | ret void
|
41 | 59 | }
|
42 | 60 |
|
43 | 61 | define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 {
|
44 |
| -; CHECK-LABEL: @trip5_i8( |
45 |
| -; CHECK-NEXT: entry: |
46 |
| -; CHECK-NEXT: br label [[FOR_BODY:%.*]] |
47 |
| -; CHECK: for.body: |
48 |
| -; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] |
49 |
| -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[I_08]] |
50 |
| -; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 |
| 62 | +; CHECK-LABEL: define void @trip5_i8( |
| 63 | +; CHECK-SAME: ptr noalias noundef captures(none) [[DST:%.*]], ptr noalias noundef readonly captures(none) [[SRC:%.*]]) #[[ATTR0]] { |
| 64 | +; CHECK-NEXT: [[ENTRY:.*]]: |
| 65 | +; CHECK-NEXT: br label %[[LOOP:.*]] |
| 66 | +; CHECK: [[LOOP]]: |
| 67 | +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] |
| 68 | +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[IV]] |
| 69 | +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[GEP_SRC]], align 1 |
51 | 70 | ; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP0]], 1
|
52 |
| -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[I_08]] |
53 |
| -; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 |
| 71 | +; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV]] |
| 72 | +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[GEP_DST]], align 1 |
54 | 73 | ; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP1]]
|
55 |
| -; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1 |
56 |
| -; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 |
57 |
| -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 5 |
58 |
| -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] |
59 |
| -; CHECK: for.end: |
| 74 | +; CHECK-NEXT: store i8 [[ADD]], ptr [[GEP_DST]], align 1 |
| 75 | +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 |
| 76 | +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 5 |
| 77 | +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] |
| 78 | +; CHECK: [[EXIT]]: |
60 | 79 | ; CHECK-NEXT: ret void
|
61 | 80 | ;
|
62 | 81 | entry:
|
63 |
| - br label %for.body |
| 82 | + br label %loop |
64 | 83 |
|
65 |
| -for.body: ; preds = %entry, %for.body |
66 |
| - %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ] |
67 |
| - %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08 |
68 |
| - %0 = load i8, ptr %arrayidx, align 1 |
| 84 | +loop: |
| 85 | + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| 86 | + %gep.src = getelementptr inbounds i8, ptr %src, i64 %iv |
| 87 | + %0 = load i8, ptr %gep.src, align 1 |
69 | 88 | %mul = shl i8 %0, 1
|
70 |
| - %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08 |
71 |
| - %1 = load i8, ptr %arrayidx1, align 1 |
| 89 | + %gep.dst = getelementptr inbounds i8, ptr %dst, i64 %iv |
| 90 | + %1 = load i8, ptr %gep.dst, align 1 |
72 | 91 | %add = add i8 %mul, %1
|
73 |
| - store i8 %add, ptr %arrayidx1, align 1 |
74 |
| - %inc = add nuw nsw i64 %i.08, 1 |
75 |
| - %exitcond.not = icmp eq i64 %inc, 5 |
76 |
| - br i1 %exitcond.not, label %for.end, label %for.body |
| 92 | + store i8 %add, ptr %gep.dst, align 1 |
| 93 | + %iv.next = add nuw nsw i64 %iv, 1 |
| 94 | + %ec = icmp eq i64 %iv.next, 5 |
| 95 | + br i1 %ec, label %exit, label %loop |
77 | 96 |
|
78 |
| -for.end: ; preds = %for.body |
| 97 | +exit: |
79 | 98 | ret void
|
80 | 99 | }
|
81 | 100 |
|
|
0 commit comments