Skip to content

Commit 62ab0f4

Browse files
jgu222igcbot
authored andcommitted
Fix the algo that generates unique visa names
vISA tries to generate unique names for each variable. The existing algorithm keeps the name map as small as possible by not adding visa-generated names into the map. But the algorithm fails to check if an input name (for example, llvm name) may collide with visa-generated names. This leads duplicate visa names. This change fixes this by checking if an input name may collide with visa-generated names. This checking is a little expensive. Thus, to make this checking faster, the suffix added for visa-generated names is changed to "_#v" from "_#". With this, a quick check of the last char can tell if it might be visa-generated names. Only if it ends with 'v' that the expensive checking would be triggered. As llvm has many names with digits as suffix, this trick avoid many expensive checking. But the downside is that many lit tests (around 100) need updating. The change makes sure any visa-generated new name is unqiue by checking if it is in the name map. This is also missed in the existing algorith.
1 parent 45a4ad5 commit 62ab0f4

File tree

5 files changed

+91
-42
lines changed

5 files changed

+91
-42
lines changed

IGC/Compiler/tests/EmitVISAPass/block2d-read-zeropadded.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
; REQUIRES: llvm-14-plus, regkeys
1313

14-
; RUN: igc_opt --opaque-pointers -platformpvc -igc-emit-visa %s -regkey DumpVISAASMToConsole -regkey EnableDebugging \
14+
; RUN: igc_opt --opaque-pointers -platformpvc -igc-emit-visa %s -regkey DumpVISAASMToConsole -regkey EnableDebugging -regkey DisableWarnings \
1515
; RUN: -simd-mode 16 | FileCheck %s
1616

1717
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"

IGC/Compiler/tests/ResourceLoopUnroll/ResourceloopUnrollNestedLsc.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ define spir_kernel void @test1(i32 %src1, i32 %val, i32 addrspace(1)* %dst) {
6262
; COM: check predicate load and lifetime.start
6363
; CHECK-VISAASM: _main_0:
6464
; CHECK-VISAASM-NEXT: mov (M1, 16) svn(0,0)<1> threadIdInGroupX(0,0)<1;1,0>
65-
; CHECK-VISAASM-NEXT: mov (M1, 16) nonuniform(0,0)<1> svn_0(0,0)<1;1,0>
65+
; CHECK-VISAASM-NEXT: mov (M1, 16) nonuniform(0,0)<1> svn_0v(0,0)<1;1,0>
6666
; CHECK-VISAASM-NEXT: add (M1, 16) offset(0,0)<1> src1(0,0)<0;1,0> nonuniform(0,0)<1;1,0>
6767
; CHECK-VISAASM-NEXT: lifetime.start V0032
6868
;
@@ -73,9 +73,9 @@ define spir_kernel void @test1(i32 %src1, i32 %val, i32 addrspace(1)* %dst) {
7373
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0034(0,0)<1> V0036(0,0)<0;1,0>
7474
; CHECK-VISAASM-NEXT: fbl (M1_NM, 1) V0038(0,0)<1> V0034(0,0)<0;1,0>
7575
; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp(0,0)<1> V0039(0,0)<0;1,0> 0x2:uw
76-
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A0(0)<1> &nonuniform_0 ShuffleTmp(0,0)<0;1,0>
76+
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A0(0)<1> &nonuniform_0v ShuffleTmp(0,0)<0;1,0>
7777
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveRes6(0,0)<1> r[A0(0),0]<0;1,0>:ud
78-
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P2 nonuniform_0(0,0)<1;1,0> firstActiveRes6(0,0)<0;1,0>
78+
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P2 nonuniform_0v(0,0)<1;1,0> firstActiveRes6(0,0)<0;1,0>
7979
; CHECK-VISAASM-NEXT: (P2) lsc_load.ugm.ca.ca (M1, 16) V0032:d32x3 bss(firstActiveRes6)[offset]:a32
8080
; CHECK-VISAASM-NEXT: (P2) goto (M1, 16) _test1_006_unroll_merge
8181
;
@@ -85,10 +85,10 @@ define spir_kernel void @test1(i32 %src1, i32 %val, i32 addrspace(1)* %dst) {
8585
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0043(0,0)<1> P3
8686
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0041(0,0)<1> V0043(0,0)<0;1,0>
8787
; CHECK-VISAASM-NEXT: fbl (M1_NM, 1) V0045(0,0)<1> V0041(0,0)<0;1,0>
88-
; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp_0(0,0)<1> V0046(0,0)<0;1,0> 0x2:uw
89-
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A1(0)<1> &nonuniform_0 ShuffleTmp_0(0,0)<0;1,0>
88+
; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp_0v(0,0)<1> V0046(0,0)<0;1,0> 0x2:uw
89+
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A1(0)<1> &nonuniform_0v ShuffleTmp_0v(0,0)<0;1,0>
9090
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveRes4(0,0)<1> r[A1(0),0]<0;1,0>:ud
91-
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P4 nonuniform_0(0,0)<1;1,0> firstActiveRes4(0,0)<0;1,0>
91+
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P4 nonuniform_0v(0,0)<1;1,0> firstActiveRes4(0,0)<0;1,0>
9292
; CHECK-VISAASM-NEXT: (P4) lsc_load.ugm.ca.ca (M1, 16) V0032:d32x3 bss(firstActiveRes4)[offset]:a32
9393
; CHECK-VISAASM-NEXT: (P4) goto (M1, 16) _test1_006_unroll_merge
9494
;
@@ -98,10 +98,10 @@ define spir_kernel void @test1(i32 %src1, i32 %val, i32 addrspace(1)* %dst) {
9898
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0050(0,0)<1> P5
9999
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0048(0,0)<1> V0050(0,0)<0;1,0>
100100
; CHECK-VISAASM-NEXT: fbl (M1_NM, 1) V0052(0,0)<1> V0048(0,0)<0;1,0>
101-
; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp_1(0,0)<1> V0053(0,0)<0;1,0> 0x2:uw
102-
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A2(0)<1> &nonuniform_0 ShuffleTmp_1(0,0)<0;1,0>
101+
; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp_1v(0,0)<1> V0053(0,0)<0;1,0> 0x2:uw
102+
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A2(0)<1> &nonuniform_0v ShuffleTmp_1v(0,0)<0;1,0>
103103
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveRes2(0,0)<1> r[A2(0),0]<0;1,0>:ud
104-
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P6 nonuniform_0(0,0)<1;1,0> firstActiveRes2(0,0)<0;1,0>
104+
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P6 nonuniform_0v(0,0)<1;1,0> firstActiveRes2(0,0)<0;1,0>
105105
; CHECK-VISAASM-NEXT: (P6) lsc_load.ugm.ca.ca (M1, 16) V0032:d32x3 bss(firstActiveRes2)[offset]:a32
106106
; CHECK-VISAASM-NEXT: (P6) goto (M1, 16) _test1_006_unroll_merge
107107
;
@@ -111,20 +111,20 @@ define spir_kernel void @test1(i32 %src1, i32 %val, i32 addrspace(1)* %dst) {
111111
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0057(0,0)<1> P7
112112
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0055(0,0)<1> V0057(0,0)<0;1,0>
113113
; CHECK-VISAASM-NEXT: fbl (M1_NM, 1) V0059(0,0)<1> V0055(0,0)<0;1,0>
114-
; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp_2(0,0)<1> V0060(0,0)<0;1,0> 0x2:uw
115-
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A3(0)<1> &nonuniform_0 ShuffleTmp_2(0,0)<0;1,0>
114+
; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp_2v(0,0)<1> V0060(0,0)<0;1,0> 0x2:uw
115+
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A3(0)<1> &nonuniform_0v ShuffleTmp_2v(0,0)<0;1,0>
116116
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveRes(0,0)<1> r[A3(0),0]<0;1,0>:ud
117-
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P8 nonuniform_0(0,0)<1;1,0> firstActiveRes(0,0)<0;1,0>
117+
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P8 nonuniform_0v(0,0)<1;1,0> firstActiveRes(0,0)<0;1,0>
118118
; CHECK-VISAASM-NEXT: (P8) lsc_load.ugm.ca.ca (M1, 16) V0032:d32x3 bss(firstActiveRes)[offset]:a32
119119
; CHECK-VISAASM-NEXT: (!P8) goto (M1, 16) _test1_001_partial_check5
120120
;
121121
; CHECK-VISAASM: _test1_006_unroll_merge:
122-
; CHECK-VISAASM-NEXT: mul (M1_NM, 1) V0061(0,0)<1> val_0(0,0)<0;1,0> 0x40:uw
122+
; CHECK-VISAASM-NEXT: mul (M1_NM, 1) V0061(0,0)<1> val_0v(0,0)<0;1,0> 0x40:uw
123123
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A4(0)<1> &V0032 V0061(0,0)<0;1,0>
124124
; CHECK-VISAASM-NEXT: mov (M1, 16) out(0,0)<1> r[A4(0),0]<8;8,1>:d
125-
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) dst_0(0,0)<1> dst(0,0)<0;1,0>
126-
; CHECK-VISAASM-NEXT: mov (M1, 16) dstBroadcast_0(0,0)<2> dst_1(0,0)<0;1,0>
127-
; CHECK-VISAASM-NEXT: mov (M1, 16) dstBroadcast_0(0,1)<2> dst_1(0,1)<0;1,0>
125+
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) dst_0v(0,0)<1> dst(0,0)<0;1,0>
126+
; CHECK-VISAASM-NEXT: mov (M1, 16) dstBroadcast_0v(0,0)<2> dst_1v(0,0)<0;1,0>
127+
; CHECK-VISAASM-NEXT: mov (M1, 16) dstBroadcast_0v(0,1)<2> dst_1v(0,1)<0;1,0>
128128
; CHECK-VISAASM-NEXT: lsc_store.ugm.wb.wb (M1, 16) flat[dstBroadcast]:a64 out:d32
129129
; CHECK-VISAASM-NEXT: ret (M1, 1)
130130

IGC/Compiler/tests/ResourceLoopUnroll/ResourceloopUnrollNestedSampler.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ define spir_kernel void @test1(<64 x i32> %src, float addrspace(1)* %dst) {
6666
; COM: check predicate load and lifetime.start
6767
; CHECK-VISAASM: _main_0:
6868
; CHECK-VISAASM-NEXT: mov (M1, 16) svn0(0,0)<1> threadIdInGroupX(0,0)<1;1,0>
69-
; CHECK-VISAASM-NEXT: mov (M1, 16) sampler(0,0)<1> svn0_0(0,0)<1;1,0>
69+
; CHECK-VISAASM-NEXT: mov (M1, 16) sampler(0,0)<1> svn0_0v(0,0)<1;1,0>
7070
; CHECK-VISAASM-NEXT: add (M1_NM, 1) texture(0,0)<1> src(2,8)<0;1,0> 0x500:w
7171
; CHECK-VISAASM-NEXT: lifetime.start V0032
7272
;
@@ -77,9 +77,9 @@ define spir_kernel void @test1(<64 x i32> %src, float addrspace(1)* %dst) {
7777
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0034(0,0)<1> V0036(0,0)<0;1,0>
7878
; CHECK-VISAASM-NEXT: fbl (M1_NM, 1) V0038(0,0)<1> V0034(0,0)<0;1,0>
7979
; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp(0,0)<1> V0039(0,0)<0;1,0> 0x2:uw
80-
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A0(0)<1> &sampler_0 ShuffleTmp(0,0)<0;1,0>
80+
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A0(0)<1> &sampler_0v ShuffleTmp(0,0)<0;1,0>
8181
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveSampler6(0,0)<1> r[A0(0),0]<0;1,0>:ud
82-
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P2 sampler_0(0,0)<1;1,0> firstActiveSampler6(0,0)<0;1,0>
82+
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P2 sampler_0v(0,0)<1;1,0> firstActiveSampler6(0,0)<0;1,0>
8383
; CHECK-VISAASM-NEXT: mov (M1, 16) V0040(0,0)<1> 0x0:f
8484
; CHECK-VISAASM-NEXT: movs (M1_NM, 1) S31(0) firstActiveSampler6(0,0)<0;1,0>
8585
; CHECK-VISAASM-NEXT: movs (M1_NM, 1) %bss(0) texture(0,0)<0;1,0>
@@ -92,10 +92,10 @@ define spir_kernel void @test1(<64 x i32> %src, float addrspace(1)* %dst) {
9292
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0044(0,0)<1> P3
9393
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0042(0,0)<1> V0044(0,0)<0;1,0>
9494
; CHECK-VISAASM-NEXT: fbl (M1_NM, 1) V0046(0,0)<1> V0042(0,0)<0;1,0>
95-
; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp_0(0,0)<1> V0047(0,0)<0;1,0> 0x2:uw
96-
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A1(0)<1> &sampler_0 ShuffleTmp_0(0,0)<0;1,0>
95+
; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp_0v(0,0)<1> V0047(0,0)<0;1,0> 0x2:uw
96+
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A1(0)<1> &sampler_0v ShuffleTmp_0v(0,0)<0;1,0>
9797
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveSampler4(0,0)<1> r[A1(0),0]<0;1,0>:ud
98-
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P4 sampler_0(0,0)<1;1,0> firstActiveSampler4(0,0)<0;1,0>
98+
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P4 sampler_0v(0,0)<1;1,0> firstActiveSampler4(0,0)<0;1,0>
9999
; CHECK-VISAASM-NEXT: mov (M1, 16) V0048(0,0)<1> 0x0:f
100100
; CHECK-VISAASM-NEXT: movs (M1_NM, 1) S31(0) firstActiveSampler4(0,0)<0;1,0>
101101
; CHECK-VISAASM-NEXT: movs (M1_NM, 1) %bss(0) texture(0,0)<0;1,0>
@@ -108,10 +108,10 @@ define spir_kernel void @test1(<64 x i32> %src, float addrspace(1)* %dst) {
108108
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0052(0,0)<1> P5
109109
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0050(0,0)<1> V0052(0,0)<0;1,0>
110110
; CHECK-VISAASM-NEXT: fbl (M1_NM, 1) V0054(0,0)<1> V0050(0,0)<0;1,0>
111-
; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp_1(0,0)<1> V0055(0,0)<0;1,0> 0x2:uw
112-
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A2(0)<1> &sampler_0 ShuffleTmp_1(0,0)<0;1,0>
111+
; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp_1v(0,0)<1> V0055(0,0)<0;1,0> 0x2:uw
112+
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A2(0)<1> &sampler_0v ShuffleTmp_1v(0,0)<0;1,0>
113113
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveSampler2(0,0)<1> r[A2(0),0]<0;1,0>:ud
114-
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P6 sampler_0(0,0)<1;1,0> firstActiveSampler2(0,0)<0;1,0>
114+
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P6 sampler_0v(0,0)<1;1,0> firstActiveSampler2(0,0)<0;1,0>
115115
; CHECK-VISAASM-NEXT: mov (M1, 16) V0056(0,0)<1> 0x0:f
116116
; CHECK-VISAASM-NEXT: movs (M1_NM, 1) S31(0) firstActiveSampler2(0,0)<0;1,0>
117117
; CHECK-VISAASM-NEXT: movs (M1_NM, 1) %bss(0) texture(0,0)<0;1,0>
@@ -124,10 +124,10 @@ define spir_kernel void @test1(<64 x i32> %src, float addrspace(1)* %dst) {
124124
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0060(0,0)<1> P7
125125
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0058(0,0)<1> V0060(0,0)<0;1,0>
126126
; CHECK-VISAASM-NEXT: fbl (M1_NM, 1) V0062(0,0)<1> V0058(0,0)<0;1,0>
127-
; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp_2(0,0)<1> V0063(0,0)<0;1,0> 0x2:uw
128-
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A3(0)<1> &sampler_0 ShuffleTmp_2(0,0)<0;1,0>
127+
; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp_2v(0,0)<1> V0063(0,0)<0;1,0> 0x2:uw
128+
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A3(0)<1> &sampler_0v ShuffleTmp_2v(0,0)<0;1,0>
129129
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveSampler(0,0)<1> r[A3(0),0]<0;1,0>:ud
130-
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P8 sampler_0(0,0)<1;1,0> firstActiveSampler(0,0)<0;1,0>
130+
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P8 sampler_0v(0,0)<1;1,0> firstActiveSampler(0,0)<0;1,0>
131131
; CHECK-VISAASM-NEXT: mov (M1, 16) V0064(0,0)<1> 0x0:f
132132
; CHECK-VISAASM-NEXT: movs (M1_NM, 1) S31(0) firstActiveSampler(0,0)<0;1,0>
133133
; CHECK-VISAASM-NEXT: movs (M1_NM, 1) %bss(0) texture(0,0)<0;1,0>
@@ -136,9 +136,9 @@ define spir_kernel void @test1(<64 x i32> %src, float addrspace(1)* %dst) {
136136
;
137137
; CHECK-VISAASM: _test1_006_unroll_merge:
138138
; CHECK-VISAASM-NEXT: mov (M1, 16) out(0,0)<1> V0032(0,0)<1;1,0>
139-
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) dst_0(0,0)<1> dst(0,0)<0;1,0>
140-
; CHECK-VISAASM-NEXT: mov (M1, 16) dstBroadcast_0(0,0)<2> dst_1(0,0)<0;1,0>
141-
; CHECK-VISAASM-NEXT: mov (M1, 16) dstBroadcast_0(0,1)<2> dst_1(0,1)<0;1,0>
139+
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) dst_0v(0,0)<1> dst(0,0)<0;1,0>
140+
; CHECK-VISAASM-NEXT: mov (M1, 16) dstBroadcast_0v(0,0)<2> dst_1v(0,0)<0;1,0>
141+
; CHECK-VISAASM-NEXT: mov (M1, 16) dstBroadcast_0v(0,1)<2> dst_1v(0,1)<0;1,0>
142142
; CHECK-VISAASM-NEXT: lsc_store.ugm.wb.wb (M1, 16) flat[dstBroadcast]:a64 out:d32
143143
; CHECK-VISAASM-NEXT: ret (M1, 1)
144144

IGC/ocloc_tests/Builtins/intel_sub_group_shuffle.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ kernel void test_intel_sub_group_shuffle_uniform_non_immediate_index_simd32(glob
3030
size_t gid = get_global_id(0);
3131
int x = in[gid];
3232

33-
// CHECK: shl (M1_NM, 1) ShuffleTmp(0,0)<1> which_sub_group_local_id_0(0,0)<0;1,0> 0x2:uw
33+
// CHECK: shl (M1_NM, 1) ShuffleTmp(0,0)<1> which_sub_group_local_id{{.*}}(0,0)<0;1,0> 0x2:uw
3434
// CHECK-NEXT: addr_add (M1_NM, 1) A0(0)<1> &{{V[0-9]+}} ShuffleTmp(0,0)<0;1,0>
3535
// CHECK-NEXT: mov (M1_NM, 1) simdShuffle(0,0)<1> r[A0(0),0]<0;1,0>:d
3636

visa/VISAKernelImpl.cpp

Lines changed: 58 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -866,8 +866,11 @@ void VISAKernelImpl::ensureVariableNameUnique(const char *&varName) {
866866
// 2. "add.i.i" ==> "add_i_i" (LLVM compound name)
867867
// 3. "mul" ==> "mul_" (vISA keyword)
868868
// 4. suppose both variable "x" and "x0" exist
869-
// "x" ==> "x_1" (since "x0" already used)
870-
// "x0" ==> "x0_1" (it's a dumb suffixing strategy)
869+
// "x" ==> "x_1v" (since "x" already used)
870+
// "x0" ==> "x0_1v" (append suffx "_#v")
871+
// Suffix "_#v" is used to avoid treating a llvm name, such
872+
// as n.123, as visa-generated name. It is useful to fast-check
873+
// whether a name is visa-generated.
871874
std::stringstream escdName;
872875

873876
// step 1
@@ -887,15 +890,61 @@ void VISAKernelImpl::ensureVariableNameUnique(const char *&varName) {
887890
while (isReservedName(escdName.str()))
888891
escdName << '_';
889892

890-
// case 4: if "x" already exists, then use "x_#" where # is 0,1,..
893+
// case 4.1: if "x" already exists; or
894+
// case 4.2: if "x" is in "y_#v" that has been auto-generated here
895+
// but not in map.
896+
// In both cases, use "x_#v" where # is 0,1,...
897+
// (Note that suffix '#v' is for fast-checking visa-generated names.
898+
// The 'v' is arbitrary and is to mean visa.)
891899
std::string varNameS = escdName.str();
892-
if (auto it = varNames.find(varNameS); it != varNames.end()) {
893-
size_t instanceNumber = it->second;
894-
varNames[varNameS] = instanceNumber + 1;
900+
const size_t Len = varNameS.length();
901+
// case 4.1
902+
bool existing = (varNames.find(varNameS) != varNames.end());
903+
// case 4.2, fast-check the last "#v" to see if it can collide with
904+
// visa-generated names.
905+
if (!existing && Len > 2 && isdigit(varNameS.at(Len - 2)) &&
906+
varNameS.at(Len - 1) == 'v') {
907+
// case 4.2
908+
size_t No;
909+
size_t pos = varNameS.rfind('_');
910+
if (pos != std::string::npos) {
911+
std::string suffix = varNameS.substr(pos + 1);
912+
No = 0;
913+
bool allDigit = true;
914+
// skip the last 'v'
915+
for (int i = 0, e = suffix.length() - 1; i < e; ++i) {
916+
char c = suffix.at(i);
917+
if (!isdigit(c)) {
918+
allDigit = false;
919+
break;
920+
}
921+
No = No * 10 + (c - '0');
922+
}
895923

896-
std::stringstream ss;
897-
ss << escdName.str() << '_' << instanceNumber;
898-
varNameS = ss.str();
924+
if (allDigit) {
925+
std::string prefix = varNameS.substr(0, pos);
926+
if (auto it = varNames.find(prefix); it != varNames.end()) {
927+
size_t instNo = it->second;
928+
if (instNo > No) {
929+
// Create entry for varNameS
930+
varNames.emplace(varNameS, 0);
931+
existing = true;
932+
}
933+
}
934+
}
935+
}
936+
}
937+
if (existing) {
938+
size_t instanceNumber = varNames[varNameS];
939+
std::string origVarNameS = varNameS;
940+
// Make sure the new name does not exist yet.
941+
do {
942+
std::stringstream ss;
943+
ss << escdName.str() << '_' << instanceNumber << 'v';
944+
varNameS = ss.str();
945+
++instanceNumber;
946+
} while (varNames.find(varNameS) != varNames.end());
947+
varNames[origVarNameS] = instanceNumber;
899948
} else {
900949
varNames.emplace(varNameS, 0);
901950
}

0 commit comments

Comments
 (0)