Skip to content

Commit 356b0dd

Browse files
authored
[AMDGPU][CodeGen] enable D16Writes32BitVgpr for gfx12 (#165587)
The issue in #157795 also reproducible for gfx12. This should only be used in true16 mode. Although gfx12 not yet has true16 mode enable, there could be downstream branch to test true16 mode manually thus enable this workaround now
1 parent 8475a66 commit 356b0dd

File tree

2 files changed

+204
-2
lines changed

2 files changed

+204
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2069,6 +2069,7 @@ def FeatureISAVersion12 : FeatureSet<
20692069
FeatureMemoryAtomicFAddF32DenormalSupport,
20702070
FeatureBVHDualAndBVH8Insts,
20712071
FeatureWaitsBeforeSystemScopeStores,
2072+
FeatureD16Writes32BitVgpr
20722073
]>;
20732074

20742075
def FeatureISAVersion12_50 : FeatureSet<
@@ -2143,6 +2144,7 @@ def FeatureISAVersion12_50 : FeatureSet<
21432144
FeatureSupportsXNACK,
21442145
FeatureXNACK,
21452146
FeatureClusters,
2147+
FeatureD16Writes32BitVgpr,
21462148
]>;
21472149

21482150
def FeatureISAVersion12_51 : FeatureSet<

llvm/test/CodeGen/AMDGPU/spillv16.ll

Lines changed: 202 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
22
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GCN,GCN-TRUE16
33
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GCN,GCN-FAKE16
4+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16,+d16-write-vgpr32 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GFX12-TRUE16,GFX12-TRUE16-D16W32
5+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16,-d16-write-vgpr32 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GFX12-TRUE16,GFX12-TRUE16-D16W16
46
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GFX1250,GFX1250-TRUE16
57
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GFX1250,GFX1250-FAKE16
68

@@ -35,6 +37,26 @@ define void @spill_i16_alu() {
3537
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
3638
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
3739
;
40+
; GFX12-TRUE16-LABEL: spill_i16_alu:
41+
; GFX12-TRUE16: ; %bb.0: ; %entry
42+
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
43+
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
44+
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
45+
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
46+
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
47+
; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 scope:SCOPE_SYS
48+
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
49+
; GFX12-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
50+
; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill
51+
; GFX12-TRUE16-NEXT: ;;#ASMSTART
52+
; GFX12-TRUE16-NEXT: ;;#ASMEND
53+
; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 th:TH_LOAD_LU ; 2-byte Folded Reload
54+
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
55+
; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
56+
; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
57+
; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
58+
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
59+
;
3860
; GFX1250-TRUE16-LABEL: spill_i16_alu:
3961
; GFX1250-TRUE16: ; %bb.0: ; %entry
4062
; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -126,6 +148,56 @@ define void @spill_i16_alu_two_vals() {
126148
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
127149
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
128150
;
151+
; GFX12-TRUE16-D16W32-LABEL: spill_i16_alu_two_vals:
152+
; GFX12-TRUE16-D16W32: ; %bb.0: ; %entry
153+
; GFX12-TRUE16-D16W32-NEXT: s_wait_loadcnt_dscnt 0x0
154+
; GFX12-TRUE16-D16W32-NEXT: s_wait_expcnt 0x0
155+
; GFX12-TRUE16-D16W32-NEXT: s_wait_samplecnt 0x0
156+
; GFX12-TRUE16-D16W32-NEXT: s_wait_bvhcnt 0x0
157+
; GFX12-TRUE16-D16W32-NEXT: s_wait_kmcnt 0x0
158+
; GFX12-TRUE16-D16W32-NEXT: scratch_load_d16_b16 v0, off, s32 scope:SCOPE_SYS
159+
; GFX12-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
160+
; GFX12-TRUE16-D16W32-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
161+
; GFX12-TRUE16-D16W32-NEXT: scratch_store_b16 off, v0, s32 offset:6 ; 2-byte Folded Spill
162+
; GFX12-TRUE16-D16W32-NEXT: ;;#ASMSTART
163+
; GFX12-TRUE16-D16W32-NEXT: ;;#ASMEND
164+
; GFX12-TRUE16-D16W32-NEXT: scratch_load_d16_b16 v0, off, s32 offset:4 scope:SCOPE_SYS
165+
; GFX12-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
166+
; GFX12-TRUE16-D16W32-NEXT: scratch_load_d16_hi_b16 v0, off, s32 offset:6 th:TH_LOAD_LU ; 2-byte Folded Reload
167+
; GFX12-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
168+
; GFX12-TRUE16-D16W32-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
169+
; GFX12-TRUE16-D16W32-NEXT: s_wait_storecnt 0x0
170+
; GFX12-TRUE16-D16W32-NEXT: scratch_store_d16_hi_b16 off, v0, s32 scope:SCOPE_SYS
171+
; GFX12-TRUE16-D16W32-NEXT: s_wait_storecnt 0x0
172+
; GFX12-TRUE16-D16W32-NEXT: scratch_store_b16 off, v0, s32 offset:4 scope:SCOPE_SYS
173+
; GFX12-TRUE16-D16W32-NEXT: s_wait_storecnt 0x0
174+
; GFX12-TRUE16-D16W32-NEXT: s_setpc_b64 s[30:31]
175+
;
176+
; GFX12-TRUE16-D16W16-LABEL: spill_i16_alu_two_vals:
177+
; GFX12-TRUE16-D16W16: ; %bb.0: ; %entry
178+
; GFX12-TRUE16-D16W16-NEXT: s_wait_loadcnt_dscnt 0x0
179+
; GFX12-TRUE16-D16W16-NEXT: s_wait_expcnt 0x0
180+
; GFX12-TRUE16-D16W16-NEXT: s_wait_samplecnt 0x0
181+
; GFX12-TRUE16-D16W16-NEXT: s_wait_bvhcnt 0x0
182+
; GFX12-TRUE16-D16W16-NEXT: s_wait_kmcnt 0x0
183+
; GFX12-TRUE16-D16W16-NEXT: scratch_load_d16_b16 v0, off, s32 scope:SCOPE_SYS
184+
; GFX12-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
185+
; GFX12-TRUE16-D16W16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
186+
; GFX12-TRUE16-D16W16-NEXT: scratch_store_b16 off, v0, s32 offset:6 ; 2-byte Folded Spill
187+
; GFX12-TRUE16-D16W16-NEXT: ;;#ASMSTART
188+
; GFX12-TRUE16-D16W16-NEXT: ;;#ASMEND
189+
; GFX12-TRUE16-D16W16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:4 scope:SCOPE_SYS
190+
; GFX12-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
191+
; GFX12-TRUE16-D16W16-NEXT: scratch_load_d16_hi_b16 v0, off, s32 offset:6 th:TH_LOAD_LU ; 2-byte Folded Reload
192+
; GFX12-TRUE16-D16W16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
193+
; GFX12-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
194+
; GFX12-TRUE16-D16W16-NEXT: s_wait_storecnt 0x0
195+
; GFX12-TRUE16-D16W16-NEXT: scratch_store_d16_hi_b16 off, v0, s32 scope:SCOPE_SYS
196+
; GFX12-TRUE16-D16W16-NEXT: s_wait_storecnt 0x0
197+
; GFX12-TRUE16-D16W16-NEXT: scratch_store_b16 off, v0, s32 offset:4 scope:SCOPE_SYS
198+
; GFX12-TRUE16-D16W16-NEXT: s_wait_storecnt 0x0
199+
; GFX12-TRUE16-D16W16-NEXT: s_setpc_b64 s[30:31]
200+
;
129201
; GFX1250-TRUE16-LABEL: spill_i16_alu_two_vals:
130202
; GFX1250-TRUE16: ; %bb.0: ; %entry
131203
; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -223,6 +295,25 @@ define void @spill_i16() {
223295
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
224296
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
225297
;
298+
; GFX12-TRUE16-LABEL: spill_i16:
299+
; GFX12-TRUE16: ; %bb.0: ; %entry
300+
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
301+
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
302+
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
303+
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
304+
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
305+
; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 scope:SCOPE_SYS
306+
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
307+
; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill
308+
; GFX12-TRUE16-NEXT: ;;#ASMSTART
309+
; GFX12-TRUE16-NEXT: ;;#ASMEND
310+
; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 th:TH_LOAD_LU ; 2-byte Folded Reload
311+
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
312+
; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
313+
; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
314+
; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
315+
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
316+
;
226317
; GFX1250-LABEL: spill_i16:
227318
; GFX1250: ; %bb.0: ; %entry
228319
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -282,6 +373,25 @@ define void @spill_half() {
282373
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
283374
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
284375
;
376+
; GFX12-TRUE16-LABEL: spill_half:
377+
; GFX12-TRUE16: ; %bb.0: ; %entry
378+
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
379+
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
380+
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
381+
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
382+
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
383+
; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 scope:SCOPE_SYS
384+
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
385+
; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill
386+
; GFX12-TRUE16-NEXT: ;;#ASMSTART
387+
; GFX12-TRUE16-NEXT: ;;#ASMEND
388+
; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 th:TH_LOAD_LU ; 2-byte Folded Reload
389+
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
390+
; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
391+
; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
392+
; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
393+
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
394+
;
285395
; GFX1250-LABEL: spill_half:
286396
; GFX1250: ; %bb.0: ; %entry
287397
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -341,6 +451,25 @@ define void @spill_i16_from_v2i16() {
341451
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
342452
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
343453
;
454+
; GFX12-TRUE16-LABEL: spill_i16_from_v2i16:
455+
; GFX12-TRUE16: ; %bb.0: ; %entry
456+
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
457+
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
458+
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
459+
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
460+
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
461+
; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 scope:SCOPE_SYS
462+
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
463+
; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:8 ; 2-byte Folded Spill
464+
; GFX12-TRUE16-NEXT: ;;#ASMSTART
465+
; GFX12-TRUE16-NEXT: ;;#ASMEND
466+
; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:8 th:TH_LOAD_LU ; 2-byte Folded Reload
467+
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
468+
; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
469+
; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
470+
; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
471+
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
472+
;
344473
; GFX1250-LABEL: spill_i16_from_v2i16:
345474
; GFX1250: ; %bb.0: ; %entry
346475
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -414,13 +543,39 @@ define void @spill_2xi16_from_v2i16() {
414543
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
415544
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
416545
;
546+
; GFX12-TRUE16-LABEL: spill_2xi16_from_v2i16:
547+
; GFX12-TRUE16: ; %bb.0: ; %entry
548+
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
549+
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
550+
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
551+
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
552+
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
553+
; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 scope:SCOPE_SYS
554+
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
555+
; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:8 ; 2-byte Folded Spill
556+
; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 scope:SCOPE_SYS
557+
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
558+
; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:10 ; 2-byte Folded Spill
559+
; GFX12-TRUE16-NEXT: ;;#ASMSTART
560+
; GFX12-TRUE16-NEXT: ;;#ASMEND
561+
; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:8 th:TH_LOAD_LU ; 2-byte Folded Reload
562+
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
563+
; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
564+
; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
565+
; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
566+
; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:10 th:TH_LOAD_LU ; 2-byte Folded Reload
567+
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
568+
; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
569+
; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
570+
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
571+
;
417572
; GFX1250-TRUE16-LABEL: spill_2xi16_from_v2i16:
418573
; GFX1250-TRUE16: ; %bb.0: ; %entry
419574
; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
420575
; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
421576
; GFX1250-TRUE16-NEXT: scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
422577
; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
423-
; GFX1250-TRUE16-NEXT: s_clause 0x1
578+
; GFX1250-TRUE16-NEXT: s_clause 0x1 ; 4-byte Folded Spill
424579
; GFX1250-TRUE16-NEXT: scratch_store_b32 off, v0, s32 offset:12
425580
; GFX1250-TRUE16-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
426581
; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
@@ -444,7 +599,7 @@ define void @spill_2xi16_from_v2i16() {
444599
; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
445600
; GFX1250-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
446601
; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
447-
; GFX1250-FAKE16-NEXT: s_clause 0x1
602+
; GFX1250-FAKE16-NEXT: s_clause 0x1 ; 4-byte Folded Spill
448603
; GFX1250-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:8
449604
; GFX1250-FAKE16-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
450605
; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
@@ -520,6 +675,32 @@ define void @spill_2xi16_from_v2i16_one_free_reg() {
520675
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
521676
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
522677
;
678+
; GFX12-TRUE16-LABEL: spill_2xi16_from_v2i16_one_free_reg:
679+
; GFX12-TRUE16: ; %bb.0: ; %entry
680+
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
681+
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
682+
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
683+
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
684+
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
685+
; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 scope:SCOPE_SYS
686+
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
687+
; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:8 ; 2-byte Folded Spill
688+
; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 scope:SCOPE_SYS
689+
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
690+
; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:10 ; 2-byte Folded Spill
691+
; GFX12-TRUE16-NEXT: ;;#ASMSTART
692+
; GFX12-TRUE16-NEXT: ;;#ASMEND
693+
; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:8 th:TH_LOAD_LU ; 2-byte Folded Reload
694+
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
695+
; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
696+
; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
697+
; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
698+
; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:10 th:TH_LOAD_LU ; 2-byte Folded Reload
699+
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
700+
; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
701+
; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
702+
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
703+
;
523704
; GFX1250-TRUE16-LABEL: spill_2xi16_from_v2i16_one_free_reg:
524705
; GFX1250-TRUE16: ; %bb.0: ; %entry
525706
; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -595,6 +776,25 @@ define void @spill_v2i16() {
595776
; GCN-NEXT: s_waitcnt_vscnt null, 0x0
596777
; GCN-NEXT: s_setpc_b64 s[30:31]
597778
;
779+
; GFX12-TRUE16-LABEL: spill_v2i16:
780+
; GFX12-TRUE16: ; %bb.0: ; %entry
781+
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
782+
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
783+
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
784+
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
785+
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
786+
; GFX12-TRUE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 scope:SCOPE_SYS
787+
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
788+
; GFX12-TRUE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
789+
; GFX12-TRUE16-NEXT: ;;#ASMSTART
790+
; GFX12-TRUE16-NEXT: ;;#ASMEND
791+
; GFX12-TRUE16-NEXT: scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
792+
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
793+
; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
794+
; GFX12-TRUE16-NEXT: scratch_store_b32 off, v0, s32 offset:4 scope:SCOPE_SYS
795+
; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
796+
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
797+
;
598798
; GFX1250-LABEL: spill_v2i16:
599799
; GFX1250: ; %bb.0: ; %entry
600800
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0

0 commit comments

Comments
 (0)