Skip to content

Commit cad0982

Browse files
committed
[LIT] Updated the regressing LIT tests to accomodate patch changes.
1 parent 7d6e7fb commit cad0982

File tree

267 files changed

+7273
-7122
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

267 files changed

+7273
-7122
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -676,8 +676,8 @@ define amdgpu_ps i64 @s_saddo_i64(i64 inreg %a, i64 inreg %b) {
676676
; GFX7-LABEL: s_saddo_i64:
677677
; GFX7: ; %bb.0:
678678
; GFX7-NEXT: s_add_u32 s4, s0, s2
679-
; GFX7-NEXT: v_mov_b32_e32 v0, s0
680679
; GFX7-NEXT: s_addc_u32 s5, s1, s3
680+
; GFX7-NEXT: v_mov_b32_e32 v0, s0
681681
; GFX7-NEXT: v_mov_b32_e32 v1, s1
682682
; GFX7-NEXT: v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1]
683683
; GFX7-NEXT: v_cmp_lt_i64_e64 s[0:1], s[2:3], 0
@@ -693,8 +693,8 @@ define amdgpu_ps i64 @s_saddo_i64(i64 inreg %a, i64 inreg %b) {
693693
; GFX8-LABEL: s_saddo_i64:
694694
; GFX8: ; %bb.0:
695695
; GFX8-NEXT: s_add_u32 s4, s0, s2
696-
; GFX8-NEXT: v_mov_b32_e32 v0, s0
697696
; GFX8-NEXT: s_addc_u32 s5, s1, s3
697+
; GFX8-NEXT: v_mov_b32_e32 v0, s0
698698
; GFX8-NEXT: v_mov_b32_e32 v1, s1
699699
; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1]
700700
; GFX8-NEXT: v_cmp_lt_i64_e64 s[0:1], s[2:3], 0
@@ -710,8 +710,8 @@ define amdgpu_ps i64 @s_saddo_i64(i64 inreg %a, i64 inreg %b) {
710710
; GFX9-LABEL: s_saddo_i64:
711711
; GFX9: ; %bb.0:
712712
; GFX9-NEXT: s_add_u32 s4, s0, s2
713-
; GFX9-NEXT: v_mov_b32_e32 v0, s0
714713
; GFX9-NEXT: s_addc_u32 s5, s1, s3
714+
; GFX9-NEXT: v_mov_b32_e32 v0, s0
715715
; GFX9-NEXT: v_mov_b32_e32 v1, s1
716716
; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1]
717717
; GFX9-NEXT: v_cmp_lt_i64_e64 s[0:1], s[2:3], 0

llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,8 @@ define amdgpu_cs void @atomic_add_and_format(<4 x i32> inreg %arg) {
9696
; GCN-NEXT: s_waitcnt vmcnt(0)
9797
; GCN-NEXT: v_readfirstlane_b32 s4, v1
9898
; GCN-NEXT: v_add_i32_e32 v4, vcc, s4, v0
99-
; GCN-NEXT: s_waitcnt expcnt(0)
10099
; GCN-NEXT: v_mov_b32_e32 v0, s0
100+
; GCN-NEXT: s_waitcnt expcnt(0)
101101
; GCN-NEXT: v_mov_b32_e32 v1, s1
102102
; GCN-NEXT: v_mov_b32_e32 v2, s2
103103
; GCN-NEXT: v_mov_b32_e32 v3, s3
@@ -192,8 +192,8 @@ define amdgpu_cs void @atomic_sub_and_format(<4 x i32> inreg %arg) {
192192
; GCN-NEXT: s_waitcnt vmcnt(0)
193193
; GCN-NEXT: v_readfirstlane_b32 s4, v1
194194
; GCN-NEXT: v_sub_i32_e32 v4, vcc, s4, v0
195-
; GCN-NEXT: s_waitcnt expcnt(0)
196195
; GCN-NEXT: v_mov_b32_e32 v0, s0
196+
; GCN-NEXT: s_waitcnt expcnt(0)
197197
; GCN-NEXT: v_mov_b32_e32 v1, s1
198198
; GCN-NEXT: v_mov_b32_e32 v2, s2
199199
; GCN-NEXT: v_mov_b32_e32 v3, s3
@@ -294,8 +294,8 @@ define amdgpu_cs void @atomic_xor_and_format(<4 x i32> inreg %arg) {
294294
; GCN-NEXT: v_readfirstlane_b32 s4, v1
295295
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
296296
; GCN-NEXT: v_xor_b32_e32 v4, s4, v0
297-
; GCN-NEXT: s_waitcnt expcnt(0)
298297
; GCN-NEXT: v_mov_b32_e32 v0, s0
298+
; GCN-NEXT: s_waitcnt expcnt(0)
299299
; GCN-NEXT: v_mov_b32_e32 v1, s1
300300
; GCN-NEXT: v_mov_b32_e32 v2, s2
301301
; GCN-NEXT: v_mov_b32_e32 v3, s3
@@ -392,8 +392,8 @@ define amdgpu_cs void @atomic_ptr_add_and_format(ptr addrspace(8) inreg %arg) {
392392
; GCN-NEXT: s_waitcnt vmcnt(0)
393393
; GCN-NEXT: v_readfirstlane_b32 s4, v1
394394
; GCN-NEXT: v_add_i32_e32 v4, vcc, s4, v0
395-
; GCN-NEXT: s_waitcnt expcnt(0)
396395
; GCN-NEXT: v_mov_b32_e32 v0, s0
396+
; GCN-NEXT: s_waitcnt expcnt(0)
397397
; GCN-NEXT: v_mov_b32_e32 v1, s1
398398
; GCN-NEXT: v_mov_b32_e32 v2, s2
399399
; GCN-NEXT: v_mov_b32_e32 v3, s3
@@ -492,8 +492,8 @@ define amdgpu_cs void @atomic_ptr_sub_and_format(ptr addrspace(8) inreg %arg) {
492492
; GCN-NEXT: s_waitcnt vmcnt(0)
493493
; GCN-NEXT: v_readfirstlane_b32 s4, v1
494494
; GCN-NEXT: v_sub_i32_e32 v4, vcc, s4, v0
495-
; GCN-NEXT: s_waitcnt expcnt(0)
496495
; GCN-NEXT: v_mov_b32_e32 v0, s0
496+
; GCN-NEXT: s_waitcnt expcnt(0)
497497
; GCN-NEXT: v_mov_b32_e32 v1, s1
498498
; GCN-NEXT: v_mov_b32_e32 v2, s2
499499
; GCN-NEXT: v_mov_b32_e32 v3, s3
@@ -598,8 +598,8 @@ define amdgpu_cs void @atomic_ptr_xor_and_format(ptr addrspace(8) inreg %arg) {
598598
; GCN-NEXT: v_readfirstlane_b32 s4, v1
599599
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
600600
; GCN-NEXT: v_xor_b32_e32 v4, s4, v0
601-
; GCN-NEXT: s_waitcnt expcnt(0)
602601
; GCN-NEXT: v_mov_b32_e32 v0, s0
602+
; GCN-NEXT: s_waitcnt expcnt(0)
603603
; GCN-NEXT: v_mov_b32_e32 v1, s1
604604
; GCN-NEXT: v_mov_b32_e32 v2, s2
605605
; GCN-NEXT: v_mov_b32_e32 v3, s3

llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1822,12 +1822,12 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
18221822
; GFX12-NEXT: s_wait_loadcnt 0x0
18231823
; GFX12-NEXT: v_dual_mov_b32 v10, v1 :: v_dual_mov_b32 v9, v0
18241824
; GFX12-NEXT: s_wait_storecnt 0x0
1825-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1825+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1826+
; GFX12-NEXT: v_dual_mov_b32 v3, v10 :: v_dual_mov_b32 v2, v9
18261827
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[9:10], v[9:10]
18271828
; GFX12-NEXT: v_max_num_f64_e32 v[7:8], v[0:1], v[4:5]
18281829
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
18291830
; GFX12-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8
1830-
; GFX12-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10
18311831
; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v6, s[0:3], null offen th:TH_ATOMIC_RETURN
18321832
; GFX12-NEXT: s_wait_loadcnt 0x0
18331833
; GFX12-NEXT: global_inv scope:SCOPE_DEV
@@ -1864,12 +1864,12 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
18641864
; GFX11-NEXT: s_waitcnt vmcnt(0)
18651865
; GFX11-NEXT: v_dual_mov_b32 v10, v1 :: v_dual_mov_b32 v9, v0
18661866
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1867-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1867+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1868+
; GFX11-NEXT: v_dual_mov_b32 v3, v10 :: v_dual_mov_b32 v2, v9
18681869
; GFX11-NEXT: v_max_f64 v[0:1], v[9:10], v[9:10]
18691870
; GFX11-NEXT: v_max_f64 v[7:8], v[0:1], v[4:5]
18701871
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
18711872
; GFX11-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8
1872-
; GFX11-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10
18731873
; GFX11-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v6, s[0:3], 0 offen glc
18741874
; GFX11-NEXT: s_waitcnt vmcnt(0)
18751875
; GFX11-NEXT: buffer_gl1_inv
@@ -1918,11 +1918,11 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
19181918
; GFX908-NEXT: v_mov_b32_e32 v10, v1
19191919
; GFX908-NEXT: v_mov_b32_e32 v9, v0
19201920
; GFX908-NEXT: v_max_f64 v[0:1], v[9:10], v[9:10]
1921+
; GFX908-NEXT: v_mov_b32_e32 v2, v9
1922+
; GFX908-NEXT: v_mov_b32_e32 v3, v10
19211923
; GFX908-NEXT: v_max_f64 v[7:8], v[0:1], v[4:5]
19221924
; GFX908-NEXT: v_mov_b32_e32 v0, v7
19231925
; GFX908-NEXT: v_mov_b32_e32 v1, v8
1924-
; GFX908-NEXT: v_mov_b32_e32 v2, v9
1925-
; GFX908-NEXT: v_mov_b32_e32 v3, v10
19261926
; GFX908-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v6, s[16:19], 0 offen glc
19271927
; GFX908-NEXT: s_waitcnt vmcnt(0)
19281928
; GFX908-NEXT: buffer_wbinvl1
@@ -1949,11 +1949,11 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
19491949
; GFX8-NEXT: v_mov_b32_e32 v10, v1
19501950
; GFX8-NEXT: v_mov_b32_e32 v9, v0
19511951
; GFX8-NEXT: v_max_f64 v[0:1], v[9:10], v[9:10]
1952+
; GFX8-NEXT: v_mov_b32_e32 v2, v9
1953+
; GFX8-NEXT: v_mov_b32_e32 v3, v10
19521954
; GFX8-NEXT: v_max_f64 v[7:8], v[0:1], v[4:5]
19531955
; GFX8-NEXT: v_mov_b32_e32 v0, v7
19541956
; GFX8-NEXT: v_mov_b32_e32 v1, v8
1955-
; GFX8-NEXT: v_mov_b32_e32 v2, v9
1956-
; GFX8-NEXT: v_mov_b32_e32 v3, v10
19571957
; GFX8-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v6, s[16:19], 0 offen glc
19581958
; GFX8-NEXT: s_waitcnt vmcnt(0)
19591959
; GFX8-NEXT: buffer_wbinvl1
@@ -1993,10 +1993,10 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
19931993
; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
19941994
; GFX12-NEXT: s_wait_loadcnt 0x0
19951995
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[2:3], v[2:3]
1996+
; GFX12-NEXT: v_dual_mov_b32 v10, v3 :: v_dual_mov_b32 v9, v2
19961997
; GFX12-NEXT: s_wait_storecnt 0x0
1997-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1998+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
19981999
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[4:5]
1999-
; GFX12-NEXT: v_dual_mov_b32 v10, v3 :: v_dual_mov_b32 v9, v2
20002000
; GFX12-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v7, v0
20012001
; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[7:10], v6, s[0:3], null offen th:TH_ATOMIC_RETURN
20022002
; GFX12-NEXT: s_wait_loadcnt 0x0
@@ -2033,10 +2033,10 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
20332033
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
20342034
; GFX11-NEXT: s_waitcnt vmcnt(0)
20352035
; GFX11-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
2036+
; GFX11-NEXT: v_dual_mov_b32 v10, v3 :: v_dual_mov_b32 v9, v2
20362037
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2037-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2038+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
20382039
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
2039-
; GFX11-NEXT: v_dual_mov_b32 v10, v3 :: v_dual_mov_b32 v9, v2
20402040
; GFX11-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v7, v0
20412041
; GFX11-NEXT: buffer_atomic_cmpswap_b64 v[7:10], v6, s[0:3], 0 offen glc
20422042
; GFX11-NEXT: s_waitcnt vmcnt(0)
@@ -2083,9 +2083,9 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
20832083
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
20842084
; GFX908-NEXT: s_waitcnt vmcnt(0)
20852085
; GFX908-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
2086-
; GFX908-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
20872086
; GFX908-NEXT: v_mov_b32_e32 v10, v3
20882087
; GFX908-NEXT: v_mov_b32_e32 v9, v2
2088+
; GFX908-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
20892089
; GFX908-NEXT: v_mov_b32_e32 v8, v1
20902090
; GFX908-NEXT: v_mov_b32_e32 v7, v0
20912091
; GFX908-NEXT: buffer_atomic_cmpswap_x2 v[7:10], v6, s[16:19], 0 offen glc
@@ -2112,9 +2112,9 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
21122112
; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
21132113
; GFX8-NEXT: s_waitcnt vmcnt(0)
21142114
; GFX8-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
2115-
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
21162115
; GFX8-NEXT: v_mov_b32_e32 v10, v3
21172116
; GFX8-NEXT: v_mov_b32_e32 v9, v2
2117+
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
21182118
; GFX8-NEXT: v_mov_b32_e32 v8, v1
21192119
; GFX8-NEXT: v_mov_b32_e32 v7, v0
21202120
; GFX8-NEXT: buffer_atomic_cmpswap_x2 v[7:10], v6, s[16:19], 0 offen glc

llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1822,12 +1822,12 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_
18221822
; GFX12-NEXT: s_wait_loadcnt 0x0
18231823
; GFX12-NEXT: v_dual_mov_b32 v10, v1 :: v_dual_mov_b32 v9, v0
18241824
; GFX12-NEXT: s_wait_storecnt 0x0
1825-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1825+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1826+
; GFX12-NEXT: v_dual_mov_b32 v3, v10 :: v_dual_mov_b32 v2, v9
18261827
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[9:10], v[9:10]
18271828
; GFX12-NEXT: v_min_num_f64_e32 v[7:8], v[0:1], v[4:5]
18281829
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
18291830
; GFX12-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8
1830-
; GFX12-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10
18311831
; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v6, s[0:3], null offen th:TH_ATOMIC_RETURN
18321832
; GFX12-NEXT: s_wait_loadcnt 0x0
18331833
; GFX12-NEXT: global_inv scope:SCOPE_DEV
@@ -1864,12 +1864,12 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_
18641864
; GFX11-NEXT: s_waitcnt vmcnt(0)
18651865
; GFX11-NEXT: v_dual_mov_b32 v10, v1 :: v_dual_mov_b32 v9, v0
18661866
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1867-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1867+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1868+
; GFX11-NEXT: v_dual_mov_b32 v3, v10 :: v_dual_mov_b32 v2, v9
18681869
; GFX11-NEXT: v_max_f64 v[0:1], v[9:10], v[9:10]
18691870
; GFX11-NEXT: v_min_f64 v[7:8], v[0:1], v[4:5]
18701871
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
18711872
; GFX11-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8
1872-
; GFX11-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10
18731873
; GFX11-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v6, s[0:3], 0 offen glc
18741874
; GFX11-NEXT: s_waitcnt vmcnt(0)
18751875
; GFX11-NEXT: buffer_gl1_inv
@@ -1918,11 +1918,11 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_
19181918
; GFX908-NEXT: v_mov_b32_e32 v10, v1
19191919
; GFX908-NEXT: v_mov_b32_e32 v9, v0
19201920
; GFX908-NEXT: v_max_f64 v[0:1], v[9:10], v[9:10]
1921+
; GFX908-NEXT: v_mov_b32_e32 v2, v9
1922+
; GFX908-NEXT: v_mov_b32_e32 v3, v10
19211923
; GFX908-NEXT: v_min_f64 v[7:8], v[0:1], v[4:5]
19221924
; GFX908-NEXT: v_mov_b32_e32 v0, v7
19231925
; GFX908-NEXT: v_mov_b32_e32 v1, v8
1924-
; GFX908-NEXT: v_mov_b32_e32 v2, v9
1925-
; GFX908-NEXT: v_mov_b32_e32 v3, v10
19261926
; GFX908-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v6, s[16:19], 0 offen glc
19271927
; GFX908-NEXT: s_waitcnt vmcnt(0)
19281928
; GFX908-NEXT: buffer_wbinvl1
@@ -1949,11 +1949,11 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_
19491949
; GFX8-NEXT: v_mov_b32_e32 v10, v1
19501950
; GFX8-NEXT: v_mov_b32_e32 v9, v0
19511951
; GFX8-NEXT: v_max_f64 v[0:1], v[9:10], v[9:10]
1952+
; GFX8-NEXT: v_mov_b32_e32 v2, v9
1953+
; GFX8-NEXT: v_mov_b32_e32 v3, v10
19521954
; GFX8-NEXT: v_min_f64 v[7:8], v[0:1], v[4:5]
19531955
; GFX8-NEXT: v_mov_b32_e32 v0, v7
19541956
; GFX8-NEXT: v_mov_b32_e32 v1, v8
1955-
; GFX8-NEXT: v_mov_b32_e32 v2, v9
1956-
; GFX8-NEXT: v_mov_b32_e32 v3, v10
19571957
; GFX8-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v6, s[16:19], 0 offen glc
19581958
; GFX8-NEXT: s_waitcnt vmcnt(0)
19591959
; GFX8-NEXT: buffer_wbinvl1
@@ -1993,10 +1993,10 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_
19931993
; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
19941994
; GFX12-NEXT: s_wait_loadcnt 0x0
19951995
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[2:3], v[2:3]
1996+
; GFX12-NEXT: v_dual_mov_b32 v10, v3 :: v_dual_mov_b32 v9, v2
19961997
; GFX12-NEXT: s_wait_storecnt 0x0
1997-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1998+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
19981999
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[4:5]
1999-
; GFX12-NEXT: v_dual_mov_b32 v10, v3 :: v_dual_mov_b32 v9, v2
20002000
; GFX12-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v7, v0
20012001
; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[7:10], v6, s[0:3], null offen th:TH_ATOMIC_RETURN
20022002
; GFX12-NEXT: s_wait_loadcnt 0x0
@@ -2033,10 +2033,10 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_
20332033
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
20342034
; GFX11-NEXT: s_waitcnt vmcnt(0)
20352035
; GFX11-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
2036+
; GFX11-NEXT: v_dual_mov_b32 v10, v3 :: v_dual_mov_b32 v9, v2
20362037
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2037-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2038+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
20382039
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
2039-
; GFX11-NEXT: v_dual_mov_b32 v10, v3 :: v_dual_mov_b32 v9, v2
20402040
; GFX11-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v7, v0
20412041
; GFX11-NEXT: buffer_atomic_cmpswap_b64 v[7:10], v6, s[0:3], 0 offen glc
20422042
; GFX11-NEXT: s_waitcnt vmcnt(0)
@@ -2083,9 +2083,9 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_
20832083
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
20842084
; GFX908-NEXT: s_waitcnt vmcnt(0)
20852085
; GFX908-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
2086-
; GFX908-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
20872086
; GFX908-NEXT: v_mov_b32_e32 v10, v3
20882087
; GFX908-NEXT: v_mov_b32_e32 v9, v2
2088+
; GFX908-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
20892089
; GFX908-NEXT: v_mov_b32_e32 v8, v1
20902090
; GFX908-NEXT: v_mov_b32_e32 v7, v0
20912091
; GFX908-NEXT: buffer_atomic_cmpswap_x2 v[7:10], v6, s[16:19], 0 offen glc
@@ -2112,9 +2112,9 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_
21122112
; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
21132113
; GFX8-NEXT: s_waitcnt vmcnt(0)
21142114
; GFX8-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
2115-
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
21162115
; GFX8-NEXT: v_mov_b32_e32 v10, v3
21172116
; GFX8-NEXT: v_mov_b32_e32 v9, v2
2117+
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
21182118
; GFX8-NEXT: v_mov_b32_e32 v8, v1
21192119
; GFX8-NEXT: v_mov_b32_e32 v7, v0
21202120
; GFX8-NEXT: buffer_atomic_cmpswap_x2 v[7:10], v6, s[16:19], 0 offen glc

0 commit comments

Comments
 (0)