Skip to content

Commit b492b35

Browse files
authored
[LoopInterchange] Motivating example for interchange. NFC. (#171631)
This is precommitting a full reproducer of one of our motivating examples. Looking at a full reproducer is helpful for further discussion on DependenceAnalysis and Delinearization issues and the runtime predicates discussion. I appreciate that this is a larger than usual test case, but that is by design, because I think it is useful to look at the whole thing with all of its complexities. I have given useful names to all the relevant loop variables, and the relevant blocks in these loops and their functions, but have intentionally not done that for others as there are quite a few more.
1 parent 3e2a8e2 commit b492b35

File tree

2 files changed

+724
-0
lines changed

2 files changed

+724
-0
lines changed
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -pass-remarks='loop-interchange' -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -disable-output -S
2+
; RUN: FileCheck --input-file=%t %s
3+
4+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
5+
6+
; This is a reduced test case for the example in "large-nested-6d.ll". For a
7+
; full description of the purpose this test and its complexities, see that file.
8+
;
9+
; This reproducer contains the perfectly nested sub part of that bigger loop
10+
; nest:
11+
;
12+
; for i=1 to NX
13+
; for j=1 to NY
14+
; for IL=1 to NX
15+
; load GlobC(i,IL,L)
16+
; load GlobG(i,IL,L)
17+
; load GlobE(i,IL,L)
18+
; load GlobI(i,IL,L)
19+
; for JL=1 to NY
20+
; load GlobD(j,JL,M)
21+
; load GlobH(j,JL,M)
22+
; load GlobF(j,JL,M)
23+
; load GlobJ(j,JL,M)
24+
; store GlobL(NY*i+j,NY*IL+JL)
25+
; End
26+
; End
27+
; End
28+
; End
29+
;
30+
; This reproducer is useful to focus on only on the 2nd challenge: the data
31+
; dependence analysis problem, and not worry about the rest of loop nest
32+
; structure.
33+
;
34+
; TODO:
35+
;
36+
; If loop-interchange is able to deal with imperfectly nested loops, this
37+
; test is redundant and we only need to keep "large-nested-6d.ll".
38+
;
39+
; CHECK: --- !Analysis
40+
; CHECK-NEXT: Pass: loop-interchange
41+
; CHECK-NEXT: Name: Dependence
42+
; CHECK-NEXT: Function: test
43+
; CHECK-NEXT: Args:
44+
; CHECK-NEXT: - String: Computed dependence info, invoking the transform.
45+
; CHECK-NEXT: ...
46+
; CHECK-NEXT: --- !Missed
47+
; CHECK-NEXT: Pass: loop-interchange
48+
; CHECK-NEXT: Name: Dependence
49+
; CHECK-NEXT: Function: test
50+
; CHECK-NEXT: Args:
51+
; CHECK-NEXT: - String: All loops have dependencies in all directions.
52+
; CHECK-NEXT: ...
53+
54+
@GlobC = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
55+
@GlobD = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
56+
@GlobE = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
57+
@GlobF = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
58+
@GlobG = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
59+
@GlobH = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
60+
@GlobI = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
61+
@GlobJ = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
62+
@GlobL = local_unnamed_addr global [1000 x [1000 x double]] zeroinitializer
63+
64+
define void @test(ptr noalias readonly captures(none) %0, ptr noalias readonly captures(none) %1, ptr noalias captures(none) %2, ptr noalias captures(none) %3, ptr noalias readonly captures(none) %4, ptr noalias readonly captures(none) %5, ptr noalias readonly captures(none) %6, ptr noalias readonly captures(none) %7, ptr noalias readonly captures(none) %8, ptr noalias readonly captures(none) %9) {
65+
entry:
66+
%17 = load i32, ptr %7, align 4
67+
%18 = sext i32 %17 to i64
68+
%20 = load i32, ptr %8, align 4
69+
%21 = sext i32 %20 to i64
70+
%cmp1 = icmp sgt i32 %17, 0
71+
%cmp2 = icmp sgt i32 %20, 0
72+
%cond = and i1 %cmp1, %cmp2
73+
br i1 %cond, label %preheader, label %exit
74+
75+
preheader:
76+
br label %i.header
77+
78+
i.header:
79+
%i = phi i64 [ %i.next, %i.latch ], [ 1, %preheader ]
80+
%92 = add nsw i64 -55, %i
81+
%93 = add nsw i64 %i, -1
82+
%94 = mul nsw i64 %93, %21
83+
%invariant.gep = getelementptr double, ptr @GlobL, i64 %94
84+
br label %j.header
85+
86+
j.header:
87+
%j = phi i64 [ %j.next, %j.latch ], [ 1, %i.header ]
88+
%95 = add nsw i64 -55, %j
89+
%gep358 = getelementptr double, ptr %invariant.gep, i64 %j
90+
br label %IL.header
91+
92+
IL.header:
93+
%IL = phi i64 [ %IL.next, %IL.latch ], [ 1, %j.header ]
94+
%96 = mul nuw nsw i64 %IL, 54
95+
%97 = add nsw i64 %92, %96
96+
%98 = getelementptr double, ptr @GlobC, i64 %97
97+
%99 = load double, ptr %98, align 8
98+
%100 = getelementptr double, ptr @GlobG, i64 %97
99+
%101 = load double, ptr %100, align 8
100+
%102 = getelementptr double, ptr @GlobE, i64 %97
101+
%103 = load double, ptr %102, align 8
102+
%104 = getelementptr double, ptr @GlobI, i64 %97
103+
%105 = load double, ptr %104, align 8
104+
%106 = add nsw i64 %IL, -1
105+
%107 = mul nsw i64 %106, %21
106+
br label %JL.body
107+
108+
JL.body:
109+
%JL = phi i64 [ %JL.next, %JL.body ], [ 1, %IL.header ]
110+
%109 = mul nuw nsw i64 %JL, 54
111+
%110 = add nsw i64 %95, %109
112+
%111 = getelementptr double, ptr @GlobD, i64 %110
113+
%112 = load double, ptr %111, align 8
114+
%113 = fmul fast double %112, %99
115+
%114 = getelementptr double, ptr @GlobH, i64 %110
116+
%115 = load double, ptr %114, align 8
117+
%116 = fmul fast double %115, %101
118+
%117 = fadd fast double %116, %113
119+
%118 = getelementptr double, ptr @GlobF, i64 %110
120+
%119 = load double, ptr %118, align 8
121+
%120 = fmul fast double %119, %103
122+
%121 = fadd fast double %117, %120
123+
%122 = getelementptr double, ptr @GlobJ, i64 %110
124+
%123 = load double, ptr %122, align 8
125+
%124 = fmul fast double %123, %105
126+
%125 = fadd fast double %121, %124
127+
%126 = add nsw i64 %JL, %107
128+
%.idx247.us.us.us.us.us.us = mul nsw i64 %126, 8000
129+
%gep.us.us.us.us.us.us = getelementptr i8, ptr %gep358, i64 %.idx247.us.us.us.us.us.us
130+
%127 = getelementptr i8, ptr %gep.us.us.us.us.us.us, i64 -8008
131+
store double %125, ptr %127, align 8
132+
%JL.next = add nuw nsw i64 %JL, 1
133+
%exitcond.not = icmp eq i64 %JL, %21
134+
br i1 %exitcond.not, label %IL.latch, label %JL.body
135+
136+
IL.latch:
137+
%IL.next = add nuw nsw i64 %IL, 1
138+
%exitcond320.not = icmp eq i64 %IL, %18
139+
br i1 %exitcond320.not, label %j.latch, label %IL.header
140+
141+
j.latch:
142+
%j.next = add nuw nsw i64 %j, 1
143+
%exitcond324.not = icmp eq i64 %j, %21
144+
br i1 %exitcond324.not, label %i.latch, label %j.header
145+
146+
i.latch:
147+
%i.next = add nuw nsw i64 %i, 1
148+
%exitcond328.not = icmp eq i64 %i, %18
149+
br i1 %exitcond328.not, label %exit, label %i.header
150+
151+
exit:
152+
ret void
153+
}
154+
155+

0 commit comments

Comments
 (0)