11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
22; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GCN,GCN-TRUE16
33; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GCN,GCN-FAKE16
4+ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16,+d16-write-vgpr32 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GFX12-TRUE16,GFX12-TRUE16-D16W32
5+ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16,-d16-write-vgpr32 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GFX12-TRUE16,GFX12-TRUE16-D16W16
46; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GFX1250,GFX1250-TRUE16
57; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GFX1250,GFX1250-FAKE16
68
@@ -35,6 +37,26 @@ define void @spill_i16_alu() {
3537; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
3638; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
3739;
40+ ; GFX12-TRUE16-LABEL: spill_i16_alu:
41+ ; GFX12-TRUE16: ; %bb.0: ; %entry
42+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
43+ ; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
44+ ; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
45+ ; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
46+ ; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
47+ ; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 scope:SCOPE_SYS
48+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
49+ ; GFX12-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
50+ ; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill
51+ ; GFX12-TRUE16-NEXT: ;;#ASMSTART
52+ ; GFX12-TRUE16-NEXT: ;;#ASMEND
53+ ; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 th:TH_LOAD_LU ; 2-byte Folded Reload
54+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
55+ ; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
56+ ; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
57+ ; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
58+ ; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
59+ ;
3860; GFX1250-TRUE16-LABEL: spill_i16_alu:
3961; GFX1250-TRUE16: ; %bb.0: ; %entry
4062; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -126,6 +148,56 @@ define void @spill_i16_alu_two_vals() {
126148; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
127149; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
128150;
151+ ; GFX12-TRUE16-D16W32-LABEL: spill_i16_alu_two_vals:
152+ ; GFX12-TRUE16-D16W32: ; %bb.0: ; %entry
153+ ; GFX12-TRUE16-D16W32-NEXT: s_wait_loadcnt_dscnt 0x0
154+ ; GFX12-TRUE16-D16W32-NEXT: s_wait_expcnt 0x0
155+ ; GFX12-TRUE16-D16W32-NEXT: s_wait_samplecnt 0x0
156+ ; GFX12-TRUE16-D16W32-NEXT: s_wait_bvhcnt 0x0
157+ ; GFX12-TRUE16-D16W32-NEXT: s_wait_kmcnt 0x0
158+ ; GFX12-TRUE16-D16W32-NEXT: scratch_load_d16_b16 v0, off, s32 scope:SCOPE_SYS
159+ ; GFX12-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
160+ ; GFX12-TRUE16-D16W32-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
161+ ; GFX12-TRUE16-D16W32-NEXT: scratch_store_b16 off, v0, s32 offset:6 ; 2-byte Folded Spill
162+ ; GFX12-TRUE16-D16W32-NEXT: ;;#ASMSTART
163+ ; GFX12-TRUE16-D16W32-NEXT: ;;#ASMEND
164+ ; GFX12-TRUE16-D16W32-NEXT: scratch_load_d16_b16 v0, off, s32 offset:4 scope:SCOPE_SYS
165+ ; GFX12-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
166+ ; GFX12-TRUE16-D16W32-NEXT: scratch_load_d16_hi_b16 v0, off, s32 offset:6 th:TH_LOAD_LU ; 2-byte Folded Reload
167+ ; GFX12-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
168+ ; GFX12-TRUE16-D16W32-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
169+ ; GFX12-TRUE16-D16W32-NEXT: s_wait_storecnt 0x0
170+ ; GFX12-TRUE16-D16W32-NEXT: scratch_store_d16_hi_b16 off, v0, s32 scope:SCOPE_SYS
171+ ; GFX12-TRUE16-D16W32-NEXT: s_wait_storecnt 0x0
172+ ; GFX12-TRUE16-D16W32-NEXT: scratch_store_b16 off, v0, s32 offset:4 scope:SCOPE_SYS
173+ ; GFX12-TRUE16-D16W32-NEXT: s_wait_storecnt 0x0
174+ ; GFX12-TRUE16-D16W32-NEXT: s_setpc_b64 s[30:31]
175+ ;
176+ ; GFX12-TRUE16-D16W16-LABEL: spill_i16_alu_two_vals:
177+ ; GFX12-TRUE16-D16W16: ; %bb.0: ; %entry
178+ ; GFX12-TRUE16-D16W16-NEXT: s_wait_loadcnt_dscnt 0x0
179+ ; GFX12-TRUE16-D16W16-NEXT: s_wait_expcnt 0x0
180+ ; GFX12-TRUE16-D16W16-NEXT: s_wait_samplecnt 0x0
181+ ; GFX12-TRUE16-D16W16-NEXT: s_wait_bvhcnt 0x0
182+ ; GFX12-TRUE16-D16W16-NEXT: s_wait_kmcnt 0x0
183+ ; GFX12-TRUE16-D16W16-NEXT: scratch_load_d16_b16 v0, off, s32 scope:SCOPE_SYS
184+ ; GFX12-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
185+ ; GFX12-TRUE16-D16W16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
186+ ; GFX12-TRUE16-D16W16-NEXT: scratch_store_b16 off, v0, s32 offset:6 ; 2-byte Folded Spill
187+ ; GFX12-TRUE16-D16W16-NEXT: ;;#ASMSTART
188+ ; GFX12-TRUE16-D16W16-NEXT: ;;#ASMEND
189+ ; GFX12-TRUE16-D16W16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:4 scope:SCOPE_SYS
190+ ; GFX12-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
191+ ; GFX12-TRUE16-D16W16-NEXT: scratch_load_d16_hi_b16 v0, off, s32 offset:6 th:TH_LOAD_LU ; 2-byte Folded Reload
192+ ; GFX12-TRUE16-D16W16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
193+ ; GFX12-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
194+ ; GFX12-TRUE16-D16W16-NEXT: s_wait_storecnt 0x0
195+ ; GFX12-TRUE16-D16W16-NEXT: scratch_store_d16_hi_b16 off, v0, s32 scope:SCOPE_SYS
196+ ; GFX12-TRUE16-D16W16-NEXT: s_wait_storecnt 0x0
197+ ; GFX12-TRUE16-D16W16-NEXT: scratch_store_b16 off, v0, s32 offset:4 scope:SCOPE_SYS
198+ ; GFX12-TRUE16-D16W16-NEXT: s_wait_storecnt 0x0
199+ ; GFX12-TRUE16-D16W16-NEXT: s_setpc_b64 s[30:31]
200+ ;
129201; GFX1250-TRUE16-LABEL: spill_i16_alu_two_vals:
130202; GFX1250-TRUE16: ; %bb.0: ; %entry
131203; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -223,6 +295,25 @@ define void @spill_i16() {
223295; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
224296; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
225297;
298+ ; GFX12-TRUE16-LABEL: spill_i16:
299+ ; GFX12-TRUE16: ; %bb.0: ; %entry
300+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
301+ ; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
302+ ; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
303+ ; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
304+ ; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
305+ ; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 scope:SCOPE_SYS
306+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
307+ ; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill
308+ ; GFX12-TRUE16-NEXT: ;;#ASMSTART
309+ ; GFX12-TRUE16-NEXT: ;;#ASMEND
310+ ; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 th:TH_LOAD_LU ; 2-byte Folded Reload
311+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
312+ ; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
313+ ; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
314+ ; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
315+ ; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
316+ ;
226317; GFX1250-LABEL: spill_i16:
227318; GFX1250: ; %bb.0: ; %entry
228319; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -282,6 +373,25 @@ define void @spill_half() {
282373; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
283374; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
284375;
376+ ; GFX12-TRUE16-LABEL: spill_half:
377+ ; GFX12-TRUE16: ; %bb.0: ; %entry
378+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
379+ ; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
380+ ; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
381+ ; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
382+ ; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
383+ ; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 scope:SCOPE_SYS
384+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
385+ ; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill
386+ ; GFX12-TRUE16-NEXT: ;;#ASMSTART
387+ ; GFX12-TRUE16-NEXT: ;;#ASMEND
388+ ; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 th:TH_LOAD_LU ; 2-byte Folded Reload
389+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
390+ ; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
391+ ; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
392+ ; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
393+ ; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
394+ ;
285395; GFX1250-LABEL: spill_half:
286396; GFX1250: ; %bb.0: ; %entry
287397; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -341,6 +451,25 @@ define void @spill_i16_from_v2i16() {
341451; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
342452; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
343453;
454+ ; GFX12-TRUE16-LABEL: spill_i16_from_v2i16:
455+ ; GFX12-TRUE16: ; %bb.0: ; %entry
456+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
457+ ; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
458+ ; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
459+ ; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
460+ ; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
461+ ; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 scope:SCOPE_SYS
462+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
463+ ; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:8 ; 2-byte Folded Spill
464+ ; GFX12-TRUE16-NEXT: ;;#ASMSTART
465+ ; GFX12-TRUE16-NEXT: ;;#ASMEND
466+ ; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:8 th:TH_LOAD_LU ; 2-byte Folded Reload
467+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
468+ ; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
469+ ; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
470+ ; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
471+ ; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
472+ ;
344473; GFX1250-LABEL: spill_i16_from_v2i16:
345474; GFX1250: ; %bb.0: ; %entry
346475; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -414,13 +543,39 @@ define void @spill_2xi16_from_v2i16() {
414543; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
415544; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
416545;
546+ ; GFX12-TRUE16-LABEL: spill_2xi16_from_v2i16:
547+ ; GFX12-TRUE16: ; %bb.0: ; %entry
548+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
549+ ; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
550+ ; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
551+ ; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
552+ ; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
553+ ; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 scope:SCOPE_SYS
554+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
555+ ; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:8 ; 2-byte Folded Spill
556+ ; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 scope:SCOPE_SYS
557+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
558+ ; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:10 ; 2-byte Folded Spill
559+ ; GFX12-TRUE16-NEXT: ;;#ASMSTART
560+ ; GFX12-TRUE16-NEXT: ;;#ASMEND
561+ ; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:8 th:TH_LOAD_LU ; 2-byte Folded Reload
562+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
563+ ; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
564+ ; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
565+ ; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
566+ ; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:10 th:TH_LOAD_LU ; 2-byte Folded Reload
567+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
568+ ; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
569+ ; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
570+ ; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
571+ ;
417572; GFX1250-TRUE16-LABEL: spill_2xi16_from_v2i16:
418573; GFX1250-TRUE16: ; %bb.0: ; %entry
419574; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
420575; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
421576; GFX1250-TRUE16-NEXT: scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
422577; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
423- ; GFX1250-TRUE16-NEXT: s_clause 0x1
578+ ; GFX1250-TRUE16-NEXT: s_clause 0x1 ; 4-byte Folded Spill
424579; GFX1250-TRUE16-NEXT: scratch_store_b32 off, v0, s32 offset:12
425580; GFX1250-TRUE16-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
426581; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
@@ -444,7 +599,7 @@ define void @spill_2xi16_from_v2i16() {
444599; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
445600; GFX1250-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
446601; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
447- ; GFX1250-FAKE16-NEXT: s_clause 0x1
602+ ; GFX1250-FAKE16-NEXT: s_clause 0x1 ; 4-byte Folded Spill
448603; GFX1250-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:8
449604; GFX1250-FAKE16-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
450605; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
@@ -520,6 +675,32 @@ define void @spill_2xi16_from_v2i16_one_free_reg() {
520675; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
521676; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
522677;
678+ ; GFX12-TRUE16-LABEL: spill_2xi16_from_v2i16_one_free_reg:
679+ ; GFX12-TRUE16: ; %bb.0: ; %entry
680+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
681+ ; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
682+ ; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
683+ ; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
684+ ; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
685+ ; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 scope:SCOPE_SYS
686+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
687+ ; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:8 ; 2-byte Folded Spill
688+ ; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 scope:SCOPE_SYS
689+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
690+ ; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:10 ; 2-byte Folded Spill
691+ ; GFX12-TRUE16-NEXT: ;;#ASMSTART
692+ ; GFX12-TRUE16-NEXT: ;;#ASMEND
693+ ; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:8 th:TH_LOAD_LU ; 2-byte Folded Reload
694+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
695+ ; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
696+ ; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
697+ ; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
698+ ; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:10 th:TH_LOAD_LU ; 2-byte Folded Reload
699+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
700+ ; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
701+ ; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
702+ ; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
703+ ;
523704; GFX1250-TRUE16-LABEL: spill_2xi16_from_v2i16_one_free_reg:
524705; GFX1250-TRUE16: ; %bb.0: ; %entry
525706; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -595,6 +776,25 @@ define void @spill_v2i16() {
595776; GCN-NEXT: s_waitcnt_vscnt null, 0x0
596777; GCN-NEXT: s_setpc_b64 s[30:31]
597778;
779+ ; GFX12-TRUE16-LABEL: spill_v2i16:
780+ ; GFX12-TRUE16: ; %bb.0: ; %entry
781+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
782+ ; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
783+ ; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
784+ ; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
785+ ; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
786+ ; GFX12-TRUE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 scope:SCOPE_SYS
787+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
788+ ; GFX12-TRUE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
789+ ; GFX12-TRUE16-NEXT: ;;#ASMSTART
790+ ; GFX12-TRUE16-NEXT: ;;#ASMEND
791+ ; GFX12-TRUE16-NEXT: scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
792+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
793+ ; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
794+ ; GFX12-TRUE16-NEXT: scratch_store_b32 off, v0, s32 offset:4 scope:SCOPE_SYS
795+ ; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
796+ ; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
797+ ;
598798; GFX1250-LABEL: spill_v2i16:
599799; GFX1250: ; %bb.0: ; %entry
600800; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
0 commit comments