@@ -1822,12 +1822,12 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
18221822; GFX12-NEXT: s_wait_loadcnt 0x0
18231823; GFX12-NEXT: v_dual_mov_b32 v10, v1 :: v_dual_mov_b32 v9, v0
18241824; GFX12-NEXT: s_wait_storecnt 0x0
1825- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1825+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1826+ ; GFX12-NEXT: v_dual_mov_b32 v3, v10 :: v_dual_mov_b32 v2, v9
18261827; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[9:10], v[9:10]
18271828; GFX12-NEXT: v_max_num_f64_e32 v[7:8], v[0:1], v[4:5]
18281829; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
18291830; GFX12-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8
1830- ; GFX12-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10
18311831; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v6, s[0:3], null offen th:TH_ATOMIC_RETURN
18321832; GFX12-NEXT: s_wait_loadcnt 0x0
18331833; GFX12-NEXT: global_inv scope:SCOPE_DEV
@@ -1864,12 +1864,12 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
18641864; GFX11-NEXT: s_waitcnt vmcnt(0)
18651865; GFX11-NEXT: v_dual_mov_b32 v10, v1 :: v_dual_mov_b32 v9, v0
18661866; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1867- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1867+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1868+ ; GFX11-NEXT: v_dual_mov_b32 v3, v10 :: v_dual_mov_b32 v2, v9
18681869; GFX11-NEXT: v_max_f64 v[0:1], v[9:10], v[9:10]
18691870; GFX11-NEXT: v_max_f64 v[7:8], v[0:1], v[4:5]
18701871; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
18711872; GFX11-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8
1872- ; GFX11-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10
18731873; GFX11-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v6, s[0:3], 0 offen glc
18741874; GFX11-NEXT: s_waitcnt vmcnt(0)
18751875; GFX11-NEXT: buffer_gl1_inv
@@ -1918,11 +1918,11 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
19181918; GFX908-NEXT: v_mov_b32_e32 v10, v1
19191919; GFX908-NEXT: v_mov_b32_e32 v9, v0
19201920; GFX908-NEXT: v_max_f64 v[0:1], v[9:10], v[9:10]
1921+ ; GFX908-NEXT: v_mov_b32_e32 v2, v9
1922+ ; GFX908-NEXT: v_mov_b32_e32 v3, v10
19211923; GFX908-NEXT: v_max_f64 v[7:8], v[0:1], v[4:5]
19221924; GFX908-NEXT: v_mov_b32_e32 v0, v7
19231925; GFX908-NEXT: v_mov_b32_e32 v1, v8
1924- ; GFX908-NEXT: v_mov_b32_e32 v2, v9
1925- ; GFX908-NEXT: v_mov_b32_e32 v3, v10
19261926; GFX908-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v6, s[16:19], 0 offen glc
19271927; GFX908-NEXT: s_waitcnt vmcnt(0)
19281928; GFX908-NEXT: buffer_wbinvl1
@@ -1949,11 +1949,11 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
19491949; GFX8-NEXT: v_mov_b32_e32 v10, v1
19501950; GFX8-NEXT: v_mov_b32_e32 v9, v0
19511951; GFX8-NEXT: v_max_f64 v[0:1], v[9:10], v[9:10]
1952+ ; GFX8-NEXT: v_mov_b32_e32 v2, v9
1953+ ; GFX8-NEXT: v_mov_b32_e32 v3, v10
19521954; GFX8-NEXT: v_max_f64 v[7:8], v[0:1], v[4:5]
19531955; GFX8-NEXT: v_mov_b32_e32 v0, v7
19541956; GFX8-NEXT: v_mov_b32_e32 v1, v8
1955- ; GFX8-NEXT: v_mov_b32_e32 v2, v9
1956- ; GFX8-NEXT: v_mov_b32_e32 v3, v10
19571957; GFX8-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v6, s[16:19], 0 offen glc
19581958; GFX8-NEXT: s_waitcnt vmcnt(0)
19591959; GFX8-NEXT: buffer_wbinvl1
@@ -1993,10 +1993,10 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
19931993; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
19941994; GFX12-NEXT: s_wait_loadcnt 0x0
19951995; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[2:3], v[2:3]
1996+ ; GFX12-NEXT: v_dual_mov_b32 v10, v3 :: v_dual_mov_b32 v9, v2
19961997; GFX12-NEXT: s_wait_storecnt 0x0
1997- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1 ) | instskip(SKIP_1 ) | instid1(VALU_DEP_2 )
1998+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2 ) | instskip(NEXT ) | instid1(VALU_DEP_1 )
19981999; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[4:5]
1999- ; GFX12-NEXT: v_dual_mov_b32 v10, v3 :: v_dual_mov_b32 v9, v2
20002000; GFX12-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v7, v0
20012001; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[7:10], v6, s[0:3], null offen th:TH_ATOMIC_RETURN
20022002; GFX12-NEXT: s_wait_loadcnt 0x0
@@ -2033,10 +2033,10 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
20332033; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
20342034; GFX11-NEXT: s_waitcnt vmcnt(0)
20352035; GFX11-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
2036+ ; GFX11-NEXT: v_dual_mov_b32 v10, v3 :: v_dual_mov_b32 v9, v2
20362037; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2037- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1 ) | instskip(SKIP_1 ) | instid1(VALU_DEP_2 )
2038+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2 ) | instskip(NEXT ) | instid1(VALU_DEP_1 )
20382039; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
2039- ; GFX11-NEXT: v_dual_mov_b32 v10, v3 :: v_dual_mov_b32 v9, v2
20402040; GFX11-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v7, v0
20412041; GFX11-NEXT: buffer_atomic_cmpswap_b64 v[7:10], v6, s[0:3], 0 offen glc
20422042; GFX11-NEXT: s_waitcnt vmcnt(0)
@@ -2083,9 +2083,9 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
20832083; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
20842084; GFX908-NEXT: s_waitcnt vmcnt(0)
20852085; GFX908-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
2086- ; GFX908-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
20872086; GFX908-NEXT: v_mov_b32_e32 v10, v3
20882087; GFX908-NEXT: v_mov_b32_e32 v9, v2
2088+ ; GFX908-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
20892089; GFX908-NEXT: v_mov_b32_e32 v8, v1
20902090; GFX908-NEXT: v_mov_b32_e32 v7, v0
20912091; GFX908-NEXT: buffer_atomic_cmpswap_x2 v[7:10], v6, s[16:19], 0 offen glc
@@ -2112,9 +2112,9 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
21122112; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
21132113; GFX8-NEXT: s_waitcnt vmcnt(0)
21142114; GFX8-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
2115- ; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
21162115; GFX8-NEXT: v_mov_b32_e32 v10, v3
21172116; GFX8-NEXT: v_mov_b32_e32 v9, v2
2117+ ; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
21182118; GFX8-NEXT: v_mov_b32_e32 v8, v1
21192119; GFX8-NEXT: v_mov_b32_e32 v7, v0
21202120; GFX8-NEXT: buffer_atomic_cmpswap_x2 v[7:10], v6, s[16:19], 0 offen glc
0 commit comments