|
| 1 | +; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -pass-remarks='loop-interchange' -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -disable-output -S |
| 2 | +; RUN: FileCheck --input-file=%t %s |
| 3 | + |
| 4 | +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" |
| 5 | + |
| 6 | +; This is a reduced test case for the example in "large-nested-6d.ll". For a |
| 7 | +; full description of the purpose this test and its complexities, see that file. |
| 8 | +; |
| 9 | +; This reproducer contains the perfectly nested sub part of that bigger loop |
| 10 | +; nest: |
| 11 | +; |
| 12 | +; for i=1 to NX |
| 13 | +; for j=1 to NY |
| 14 | +; for IL=1 to NX |
| 15 | +; load GlobC(i,IL,L) |
| 16 | +; load GlobG(i,IL,L) |
| 17 | +; load GlobE(i,IL,L) |
| 18 | +; load GlobI(i,IL,L) |
| 19 | +; for JL=1 to NY |
| 20 | +; load GlobD(j,JL,M) |
| 21 | +; load GlobH(j,JL,M) |
| 22 | +; load GlobF(j,JL,M) |
| 23 | +; load GlobJ(j,JL,M) |
| 24 | +; store GlobL(NY*i+j,NY*IL+JL) |
| 25 | +; End |
| 26 | +; End |
| 27 | +; End |
| 28 | +; End |
| 29 | +; |
| 30 | +; This reproducer is useful to focus on only on the 2nd challenge: the data |
| 31 | +; dependence analysis problem, and not worry about the rest of loop nest |
| 32 | +; structure. |
| 33 | +; |
| 34 | +; TODO: |
| 35 | +; |
| 36 | +; If loop-interchange is able to deal with imperfectly nested loops, this |
| 37 | +; test is redundant and we only need to keep "large-nested-6d.ll". |
| 38 | +; |
| 39 | +; CHECK: --- !Analysis |
| 40 | +; CHECK-NEXT: Pass: loop-interchange |
| 41 | +; CHECK-NEXT: Name: Dependence |
| 42 | +; CHECK-NEXT: Function: test |
| 43 | +; CHECK-NEXT: Args: |
| 44 | +; CHECK-NEXT: - String: Computed dependence info, invoking the transform. |
| 45 | +; CHECK-NEXT: ... |
| 46 | +; CHECK-NEXT: --- !Missed |
| 47 | +; CHECK-NEXT: Pass: loop-interchange |
| 48 | +; CHECK-NEXT: Name: Dependence |
| 49 | +; CHECK-NEXT: Function: test |
| 50 | +; CHECK-NEXT: Args: |
| 51 | +; CHECK-NEXT: - String: All loops have dependencies in all directions. |
| 52 | +; CHECK-NEXT: ... |
| 53 | + |
| 54 | +@GlobC = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer |
| 55 | +@GlobD = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer |
| 56 | +@GlobE = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer |
| 57 | +@GlobF = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer |
| 58 | +@GlobG = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer |
| 59 | +@GlobH = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer |
| 60 | +@GlobI = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer |
| 61 | +@GlobJ = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer |
| 62 | +@GlobL = local_unnamed_addr global [1000 x [1000 x double]] zeroinitializer |
| 63 | + |
| 64 | +define void @test(ptr noalias readonly captures(none) %0, ptr noalias readonly captures(none) %1, ptr noalias captures(none) %2, ptr noalias captures(none) %3, ptr noalias readonly captures(none) %4, ptr noalias readonly captures(none) %5, ptr noalias readonly captures(none) %6, ptr noalias readonly captures(none) %7, ptr noalias readonly captures(none) %8, ptr noalias readonly captures(none) %9) { |
| 65 | +entry: |
| 66 | + %17 = load i32, ptr %7, align 4 |
| 67 | + %18 = sext i32 %17 to i64 |
| 68 | + %20 = load i32, ptr %8, align 4 |
| 69 | + %21 = sext i32 %20 to i64 |
| 70 | + %cmp1 = icmp sgt i32 %17, 0 |
| 71 | + %cmp2 = icmp sgt i32 %20, 0 |
| 72 | + %cond = and i1 %cmp1, %cmp2 |
| 73 | + br i1 %cond, label %preheader, label %exit |
| 74 | + |
| 75 | +preheader: |
| 76 | + br label %i.header |
| 77 | + |
| 78 | +i.header: |
| 79 | + %i = phi i64 [ %i.next, %i.latch ], [ 1, %preheader ] |
| 80 | + %92 = add nsw i64 -55, %i |
| 81 | + %93 = add nsw i64 %i, -1 |
| 82 | + %94 = mul nsw i64 %93, %21 |
| 83 | + %invariant.gep = getelementptr double, ptr @GlobL, i64 %94 |
| 84 | + br label %j.header |
| 85 | + |
| 86 | +j.header: |
| 87 | + %j = phi i64 [ %j.next, %j.latch ], [ 1, %i.header ] |
| 88 | + %95 = add nsw i64 -55, %j |
| 89 | + %gep358 = getelementptr double, ptr %invariant.gep, i64 %j |
| 90 | + br label %IL.header |
| 91 | + |
| 92 | +IL.header: |
| 93 | + %IL = phi i64 [ %IL.next, %IL.latch ], [ 1, %j.header ] |
| 94 | + %96 = mul nuw nsw i64 %IL, 54 |
| 95 | + %97 = add nsw i64 %92, %96 |
| 96 | + %98 = getelementptr double, ptr @GlobC, i64 %97 |
| 97 | + %99 = load double, ptr %98, align 8 |
| 98 | + %100 = getelementptr double, ptr @GlobG, i64 %97 |
| 99 | + %101 = load double, ptr %100, align 8 |
| 100 | + %102 = getelementptr double, ptr @GlobE, i64 %97 |
| 101 | + %103 = load double, ptr %102, align 8 |
| 102 | + %104 = getelementptr double, ptr @GlobI, i64 %97 |
| 103 | + %105 = load double, ptr %104, align 8 |
| 104 | + %106 = add nsw i64 %IL, -1 |
| 105 | + %107 = mul nsw i64 %106, %21 |
| 106 | + br label %JL.body |
| 107 | + |
| 108 | +JL.body: |
| 109 | + %JL = phi i64 [ %JL.next, %JL.body ], [ 1, %IL.header ] |
| 110 | + %109 = mul nuw nsw i64 %JL, 54 |
| 111 | + %110 = add nsw i64 %95, %109 |
| 112 | + %111 = getelementptr double, ptr @GlobD, i64 %110 |
| 113 | + %112 = load double, ptr %111, align 8 |
| 114 | + %113 = fmul fast double %112, %99 |
| 115 | + %114 = getelementptr double, ptr @GlobH, i64 %110 |
| 116 | + %115 = load double, ptr %114, align 8 |
| 117 | + %116 = fmul fast double %115, %101 |
| 118 | + %117 = fadd fast double %116, %113 |
| 119 | + %118 = getelementptr double, ptr @GlobF, i64 %110 |
| 120 | + %119 = load double, ptr %118, align 8 |
| 121 | + %120 = fmul fast double %119, %103 |
| 122 | + %121 = fadd fast double %117, %120 |
| 123 | + %122 = getelementptr double, ptr @GlobJ, i64 %110 |
| 124 | + %123 = load double, ptr %122, align 8 |
| 125 | + %124 = fmul fast double %123, %105 |
| 126 | + %125 = fadd fast double %121, %124 |
| 127 | + %126 = add nsw i64 %JL, %107 |
| 128 | + %.idx247.us.us.us.us.us.us = mul nsw i64 %126, 8000 |
| 129 | + %gep.us.us.us.us.us.us = getelementptr i8, ptr %gep358, i64 %.idx247.us.us.us.us.us.us |
| 130 | + %127 = getelementptr i8, ptr %gep.us.us.us.us.us.us, i64 -8008 |
| 131 | + store double %125, ptr %127, align 8 |
| 132 | + %JL.next = add nuw nsw i64 %JL, 1 |
| 133 | + %exitcond.not = icmp eq i64 %JL, %21 |
| 134 | + br i1 %exitcond.not, label %IL.latch, label %JL.body |
| 135 | + |
| 136 | +IL.latch: |
| 137 | + %IL.next = add nuw nsw i64 %IL, 1 |
| 138 | + %exitcond320.not = icmp eq i64 %IL, %18 |
| 139 | + br i1 %exitcond320.not, label %j.latch, label %IL.header |
| 140 | + |
| 141 | +j.latch: |
| 142 | + %j.next = add nuw nsw i64 %j, 1 |
| 143 | + %exitcond324.not = icmp eq i64 %j, %21 |
| 144 | + br i1 %exitcond324.not, label %i.latch, label %j.header |
| 145 | + |
| 146 | +i.latch: |
| 147 | + %i.next = add nuw nsw i64 %i, 1 |
| 148 | + %exitcond328.not = icmp eq i64 %i, %18 |
| 149 | + br i1 %exitcond328.not, label %exit, label %i.header |
| 150 | + |
| 151 | +exit: |
| 152 | + ret void |
| 153 | +} |
| 154 | + |
| 155 | + |
0 commit comments