Skip to content

Commit 7bdbd3a

Browse files
committed
Encoded liveness info as MO_laneMask using the COPY_LANEMASK instruction.
1 parent ef8e4f7 commit 7bdbd3a

17 files changed

+104
-73
lines changed

llvm/include/llvm/CodeGen/MachineInstr.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1473,8 +1473,9 @@ class MachineInstr
14731473

14741474
/// Return true is the instruction is an identity copy.
14751475
bool isIdentityCopy() const {
1476-
return isCopy() && getOperand(0).getReg() == getOperand(1).getReg() &&
1477-
getOperand(0).getSubReg() == getOperand(1).getSubReg();
1476+
return (isCopy() || isCopyLaneMask()) &&
1477+
getOperand(0).getReg() == getOperand(1).getReg() &&
1478+
getOperand(0).getSubReg() == getOperand(1).getSubReg();
14781479
}
14791480

14801481
/// Return true if this is a transient instruction that is either very likely

llvm/include/llvm/Target/Target.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1346,7 +1346,7 @@ def REG_SEQUENCE : StandardPseudoInstruction {
13461346
}
13471347
def COPY : StandardPseudoInstruction {
13481348
let OutOperandList = (outs unknown:$dst);
1349-
let InOperandList = (ins unknown:$src, variable_ops);
1349+
let InOperandList = (ins unknown:$src);
13501350
let AsmString = "";
13511351
let hasSideEffects = false;
13521352
let isAsCheapAsAMove = true;

llvm/lib/CodeGen/ExpandPostRAPseudos.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ bool ExpandPostRA::run(MachineFunction &MF) {
168168
MadeChange |= LowerSubregToReg(&MI);
169169
break;
170170
case TargetOpcode::COPY:
171+
case TargetOpcode::COPY_LANEMASK:
171172
TII->lowerCopy(&MI, TRI);
172173
MadeChange = true;
173174
break;

llvm/lib/CodeGen/TargetInstrInfo.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -884,12 +884,21 @@ void TargetInstrInfo::lowerCopy(
884884
if (IdentityCopy || SrcMO.isUndef()) {
885885
// No need to insert an identity copy instruction, but replace with a KILL
886886
// if liveness is changed.
887-
if (SrcMO.isUndef() || MI->getNumOperands() > 2) {
887+
if (MI->getOpcode() == TargetOpcode::COPY &&
888+
(SrcMO.isUndef() || MI->getNumOperands() > 2)) {
888889
// We must make sure the super-register gets killed. Replace the
889890
// instruction with KILL.
890891
MI->setDesc(get(TargetOpcode::KILL));
891892
return;
892893
}
894+
if (MI->getOpcode() == TargetOpcode::COPY_LANEMASK &&
895+
(SrcMO.isUndef() || MI->getNumOperands() > 3)) {
896+
// We must make sure the super-register gets killed. Replace the
897+
// instruction with KILL.
898+
MI->setDesc(get(TargetOpcode::KILL));
899+
return;
900+
}
901+
893902
// Vanilla identity copy.
894903
MI->eraseFromParent();
895904
return;
@@ -900,7 +909,10 @@ void TargetInstrInfo::lowerCopy(
900909
DstMO.getReg().isPhysical() ? DstMO.isRenamable() : false,
901910
SrcMO.getReg().isPhysical() ? SrcMO.isRenamable() : false);
902911

903-
if (MI->getNumOperands() > 2)
912+
if (MI->getOpcode() == TargetOpcode::COPY && MI->getNumOperands() > 2)
913+
transferImplicitOperands(MI, &TRI);
914+
if (MI->getOpcode() == TargetOpcode::COPY_LANEMASK &&
915+
MI->getNumOperands() > 3)
904916
transferImplicitOperands(MI, &TRI);
905917
MI->eraseFromParent();
906918
}

llvm/lib/CodeGen/VirtRegMap.cpp

Lines changed: 33 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,8 @@ class VirtRegRewriter {
213213
void rewrite();
214214
void addMBBLiveIns();
215215
bool readsUndefSubreg(const MachineOperand &MO) const;
216-
uint64_t calcLiveRegUnitMask(const MachineOperand &MO,
217-
MCRegister PhysReg) const;
216+
LaneBitmask calcLiveRegUnitMask(const MachineOperand &MO,
217+
MCRegister PhysReg) const;
218218
void addLiveInsForSubRanges(const LiveInterval &LI, MCRegister PhysReg) const;
219219
void handleIdentityCopy(MachineInstr &MI);
220220
void expandCopyBundle(MachineInstr &MI) const;
@@ -476,11 +476,11 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const {
476476
return true;
477477
}
478478

479-
// Return LaneBitmask value as unint64_t for PhysReg assigned to MO,
479+
// Return LaneBitmask value for PhysReg assigned to MO,
480480
// representing its live register units at its parent MI. In case of undef or
481481
// fully live MO, return 0u.
482-
uint64_t VirtRegRewriter::calcLiveRegUnitMask(const MachineOperand &MO,
483-
MCRegister PhysReg) const {
482+
LaneBitmask VirtRegRewriter::calcLiveRegUnitMask(const MachineOperand &MO,
483+
MCRegister PhysReg) const {
484484
Register Reg = MO.getReg();
485485
const LiveInterval &LI = LIS->getInterval(Reg);
486486
const MachineInstr &MI = *MO.getParent();
@@ -492,20 +492,20 @@ uint64_t VirtRegRewriter::calcLiveRegUnitMask(const MachineOperand &MO,
492492
: LaneBitmask::getNone());
493493

494494
LaneBitmask LiveRegUnitMask;
495-
DenseSet<unsigned> LiveRegUnits;
495+
DenseSet<MCRegUnit> LiveRegUnits;
496496

497497
// dbgs() << "\n********** " << printReg(Reg, TRI) << "[ " <<
498498
// printReg(PhysReg, TRI) << " ]" << " **********\n";
499499

500500
if (MO.isUndef())
501-
return 0u;
501+
return LaneBitmask::getNone();
502502

503503
assert(LI.liveAt(MIIndex) &&
504504
"Reads of completely dead register should be marked undef already");
505505

506506
if (LI.hasSubRanges()) {
507507
for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
508-
unsigned Unit = (*Units).first;
508+
MCRegUnit Unit = (*Units).first;
509509
LaneBitmask Mask = (*Units).second;
510510
for (const LiveInterval::SubRange &S : LI.subranges()) {
511511
if ((S.LaneMask & UseMask & Mask).any() && S.liveAt(MIIndex)) {
@@ -515,7 +515,7 @@ uint64_t VirtRegRewriter::calcLiveRegUnitMask(const MachineOperand &MO,
515515
}
516516
} else {
517517
for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
518-
unsigned Unit = (*Units).first;
518+
MCRegUnit Unit = (*Units).first;
519519
const LiveRange &UnitRange = LIS->getRegUnit(Unit);
520520
LaneBitmask Mask = (*Units).second;
521521

@@ -531,7 +531,7 @@ uint64_t VirtRegRewriter::calcLiveRegUnitMask(const MachineOperand &MO,
531531
}
532532

533533
for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
534-
unsigned Unit = (*Units).first;
534+
MCRegUnit Unit = (*Units).first;
535535
LaneBitmask Mask = (*Units).second;
536536
if (LiveRegUnits.count(Unit)) {
537537
// dbgs() << "LIVE DEF UNIT : " << printRegUnit(Unit, TRI) << '\n';
@@ -541,10 +541,13 @@ uint64_t VirtRegRewriter::calcLiveRegUnitMask(const MachineOperand &MO,
541541

542542
// dbgs() << "UseMask : " << PrintLaneMask(UseMask) << '\n';
543543
// dbgs() << "LiveRegUnitMask : " << PrintLaneMask(LiveRegUnitMask) << '\n';
544-
if (UseMask == LiveRegUnitMask)
545-
return 0u;
544+
// If all lanes are live or dead, no need to create a COPY_LANEMASK
545+
// instruction.
546+
if (LiveRegUnitMask.all() || LiveRegUnitMask.none() ||
547+
LiveRegUnitMask == UseMask)
548+
return LaneBitmask::getNone();
546549

547-
return LiveRegUnitMask.getAsInteger();
550+
return LiveRegUnitMask;
548551
}
549552

550553
void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) {
@@ -568,11 +571,14 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) {
568571
// give us additional liveness information: The target (super-)register
569572
// must not be valid before this point. Replace the COPY with a KILL
570573
// instruction to maintain this information.
571-
572-
// Avoid COPY with an exact 3 operand, wiith third operand be Mask, as
573-
// it same as a COPY with no additional liveness information.
574-
if (MI.getOperand(1).isUndef() || MI.getNumOperands() > 3 ||
575-
(MI.getNumOperands() == 3 && !MI.getOperand(2).isImm())) {
574+
if (MI.getOpcode() == TargetOpcode::COPY &&
575+
(MI.getOperand(1).isUndef() || MI.getNumOperands() > 2)) {
576+
MI.setDesc(TII->get(TargetOpcode::KILL));
577+
LLVM_DEBUG(dbgs() << " replace by: " << MI);
578+
return;
579+
}
580+
if (MI.getOpcode() == TargetOpcode::COPY_LANEMASK &&
581+
(MI.getOperand(1).isUndef() || MI.getNumOperands() > 3)) {
576582
MI.setDesc(TII->get(TargetOpcode::KILL));
577583
LLVM_DEBUG(dbgs() << " replace by: " << MI);
578584
return;
@@ -718,14 +724,14 @@ void VirtRegRewriter::rewrite() {
718724
SmallVector<Register, 8> SuperDeads;
719725
SmallVector<Register, 8> SuperDefs;
720726
SmallVector<Register, 8> SuperKills;
721-
uint64_t Mask;
727+
LaneBitmask LaneMask;
722728

723729
for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
724730
MBBI != MBBE; ++MBBI) {
725731
LLVM_DEBUG(MBBI->print(dbgs(), Indexes));
726732
for (MachineInstr &MI : llvm::make_early_inc_range(MBBI->instrs())) {
727733
// reset for each MI.
728-
Mask = 0u;
734+
LaneMask = LaneBitmask::getNone();
729735
for (MachineOperand &MO : MI.operands()) {
730736
// Make sure MRI knows about registers clobbered by regmasks.
731737
if (MO.isRegMask())
@@ -744,7 +750,7 @@ void VirtRegRewriter::rewrite() {
744750
assert(!MRI->isReserved(PhysReg) && "Reserved register assignment");
745751

746752
if (MO.isUse() && MI.isCopy())
747-
Mask = calcLiveRegUnitMask(MO, PhysReg);
753+
LaneMask = calcLiveRegUnitMask(MO, PhysReg);
748754

749755
// Preserve semantics of sub-register operands.
750756
unsigned SubReg = MO.getSubReg();
@@ -822,9 +828,12 @@ void VirtRegRewriter::rewrite() {
822828
MO.setIsRenamable(true);
823829
}
824830

825-
// Add LaneBitmask as MO_Imm
826-
if (MI.isCopy() && Mask)
827-
MI.addOperand(*MF, MachineOperand::CreateImm(Mask));
831+
// If there are any live lanes, replace a COPY instruction with a
832+
// COPY_LANEMASK instruction with the lane mask.
833+
if (MI.isCopy() && LaneMask.any()) {
834+
MI.setDesc(TII->get(TargetOpcode::COPY_LANEMASK));
835+
MI.addOperand(*MF, MachineOperand::CreateLaneMask(LaneMask));
836+
}
828837

829838
// Add any missing super-register kills after rewriting the whole
830839
// instruction.

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -820,8 +820,8 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
820820
unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
821821

822822
uint64_t LiveRegUnitMaskVal = 0;
823-
if (MI->getNumOperands() > 2 && MI->getOperand(2).isImm()) {
824-
LiveRegUnitMaskVal = MI->getOperand(2).getImm();
823+
if (MI->getOpcode() == TargetOpcode::COPY_LANEMASK) {
824+
LiveRegUnitMaskVal = MI->getOperand(2).getLaneMask().getAsInteger();
825825
}
826826

827827
bool isSrcRegFullLive = LiveRegUnitMaskVal == 0;

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,9 @@ static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,
262262
if (PreferWholeRegisterMove)
263263
return false;
264264

265-
assert(MBBI->getOpcode() == TargetOpcode::COPY &&
265+
// TODO : Support COPY_LANEMASK instruction.
266+
assert((MBBI->getOpcode() == TargetOpcode::COPY ||
267+
MBBI->getOpcode() == TargetOpcode::COPY_LANEMASK) &&
266268
"Unexpected COPY instruction.");
267269
Register SrcReg = MBBI->getOperand(1).getReg();
268270
const TargetRegisterInfo *TRI = STI.getRegisterInfo();

llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1375,7 +1375,7 @@ body: |
13751375
; GFX942-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $exec
13761376
; GFX942-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3
13771377
S_NOP 0, implicit-def dead $sgpr0_sgpr1
1378-
renamable $agpr4_agpr5_agpr6_agpr7 = COPY renamable $sgpr0_sgpr1_sgpr2_sgpr3, 240, implicit $exec
1378+
renamable $agpr4_agpr5_agpr6_agpr7 = COPY_LANEMASK renamable $sgpr0_sgpr1_sgpr2_sgpr3, lanemask(240), implicit $exec
13791379
S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3
13801380
...
13811381
---
@@ -1411,7 +1411,7 @@ body: |
14111411
; GFX942-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $sgpr2, implicit $exec, implicit $exec
14121412
; GFX942-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7
14131413
S_NOP 0, implicit-def dead $sgpr0_sgpr1
1414-
renamable $agpr4_agpr5_agpr6_agpr7 = COPY renamable killed $sgpr0_sgpr1_sgpr2_sgpr3, 240, implicit $exec
1414+
renamable $agpr4_agpr5_agpr6_agpr7 = COPY_LANEMASK renamable killed $sgpr0_sgpr1_sgpr2_sgpr3, lanemask(240), implicit $exec
14151415
S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7
14161416
...
14171417

@@ -1448,7 +1448,7 @@ body: |
14481448
; GFX942-NEXT: $agpr6 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $exec
14491449
; GFX942-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3
14501450
S_NOP 0, implicit-def dead $agpr0_agpr1
1451-
renamable $agpr4_agpr5_agpr6_agpr7 = COPY renamable $agpr0_agpr1_agpr2_agpr3, 240, implicit $exec
1451+
renamable $agpr4_agpr5_agpr6_agpr7 = COPY_LANEMASK renamable $agpr0_agpr1_agpr2_agpr3, lanemask(240), implicit $exec
14521452
S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3
14531453
...
14541454

@@ -1485,7 +1485,7 @@ body: |
14851485
; GFX942-NEXT: $agpr6 = V_ACCVGPR_MOV_B32 killed $agpr2, implicit $exec, implicit $exec
14861486
; GFX942-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7
14871487
S_NOP 0, implicit-def dead $agpr0_agpr1
1488-
renamable $agpr4_agpr5_agpr6_agpr7 = COPY renamable killed $agpr0_agpr1_agpr2_agpr3, 240, implicit $exec
1488+
renamable $agpr4_agpr5_agpr6_agpr7 = COPY_LANEMASK renamable killed $agpr0_agpr1_agpr2_agpr3, lanemask(240), implicit $exec
14891489
S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7
14901490
...
14911491

llvm/test/CodeGen/AMDGPU/carryout-selection.ll

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,9 @@ define amdgpu_kernel void @sadd64rr(ptr addrspace(1) %out, i64 %a, i64 %b) {
110110
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
111111
; GFX11-NEXT: s_add_u32 s2, s2, s4
112112
; GFX11-NEXT: s_addc_u32 s3, s3, s5
113-
; GFX11-NEXT: v_mov_b32_e32 v0, s2
113+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
114114
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
115+
; GFX11-NEXT: v_mov_b32_e32 v0, s2
115116
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
116117
; GFX11-NEXT: s_endpgm
117118
;
@@ -222,8 +223,9 @@ define amdgpu_kernel void @sadd64ri(ptr addrspace(1) %out, i64 %a) {
222223
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
223224
; GFX11-NEXT: s_add_u32 s2, s2, 0x56789876
224225
; GFX11-NEXT: s_addc_u32 s3, s3, 0x1234
225-
; GFX11-NEXT: v_mov_b32_e32 v0, s2
226+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
226227
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
228+
; GFX11-NEXT: v_mov_b32_e32 v0, s2
227229
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
228230
; GFX11-NEXT: s_endpgm
229231
;
@@ -1087,8 +1089,9 @@ define amdgpu_kernel void @ssub64rr(ptr addrspace(1) %out, i64 %a, i64 %b) {
10871089
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
10881090
; GFX11-NEXT: s_sub_u32 s2, s2, s4
10891091
; GFX11-NEXT: s_subb_u32 s3, s3, s5
1090-
; GFX11-NEXT: v_mov_b32_e32 v0, s2
1092+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
10911093
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
1094+
; GFX11-NEXT: v_mov_b32_e32 v0, s2
10921095
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
10931096
; GFX11-NEXT: s_endpgm
10941097
;
@@ -1199,8 +1202,9 @@ define amdgpu_kernel void @ssub64ri(ptr addrspace(1) %out, i64 %a) {
11991202
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
12001203
; GFX11-NEXT: s_sub_u32 s2, 0x56789876, s2
12011204
; GFX11-NEXT: s_subb_u32 s3, 0x1234, s3
1202-
; GFX11-NEXT: v_mov_b32_e32 v0, s2
1205+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
12031206
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
1207+
; GFX11-NEXT: v_mov_b32_e32 v0, s2
12041208
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
12051209
; GFX11-NEXT: s_endpgm
12061210
;
@@ -2420,8 +2424,8 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
24202424
; GFX9-NEXT: s_cmp_ge_u32 s2, s6
24212425
; GFX9-NEXT: s_cselect_b32 s8, s4, s3
24222426
; GFX9-NEXT: .LBB16_3:
2423-
; GFX9-NEXT: v_mov_b32_e32 v0, s8
24242427
; GFX9-NEXT: v_mov_b32_e32 v2, 0
2428+
; GFX9-NEXT: v_mov_b32_e32 v0, s8
24252429
; GFX9-NEXT: v_mov_b32_e32 v1, s9
24262430
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
24272431
; GFX9-NEXT: s_endpgm
@@ -2573,8 +2577,8 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
25732577
; GFX1010-NEXT: s_cmp_ge_u32 s2, s6
25742578
; GFX1010-NEXT: s_cselect_b32 s8, s4, s3
25752579
; GFX1010-NEXT: .LBB16_3:
2576-
; GFX1010-NEXT: v_mov_b32_e32 v0, s8
25772580
; GFX1010-NEXT: v_mov_b32_e32 v2, 0
2581+
; GFX1010-NEXT: v_mov_b32_e32 v0, s8
25782582
; GFX1010-NEXT: v_mov_b32_e32 v1, s9
25792583
; GFX1010-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
25802584
; GFX1010-NEXT: s_endpgm
@@ -2726,8 +2730,8 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
27262730
; GFX1030W32-NEXT: s_cmp_ge_u32 s2, s4
27272731
; GFX1030W32-NEXT: s_cselect_b32 s8, s5, s3
27282732
; GFX1030W32-NEXT: .LBB16_3:
2729-
; GFX1030W32-NEXT: v_mov_b32_e32 v0, s8
27302733
; GFX1030W32-NEXT: v_mov_b32_e32 v2, 0
2734+
; GFX1030W32-NEXT: v_mov_b32_e32 v0, s8
27312735
; GFX1030W32-NEXT: v_mov_b32_e32 v1, s9
27322736
; GFX1030W32-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
27332737
; GFX1030W32-NEXT: s_endpgm
@@ -2878,8 +2882,8 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
28782882
; GFX1030W64-NEXT: s_cmp_ge_u32 s2, s4
28792883
; GFX1030W64-NEXT: s_cselect_b32 s6, s5, s3
28802884
; GFX1030W64-NEXT: .LBB16_3:
2881-
; GFX1030W64-NEXT: v_mov_b32_e32 v0, s6
28822885
; GFX1030W64-NEXT: v_mov_b32_e32 v2, 0
2886+
; GFX1030W64-NEXT: v_mov_b32_e32 v0, s6
28832887
; GFX1030W64-NEXT: v_mov_b32_e32 v1, s7
28842888
; GFX1030W64-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
28852889
; GFX1030W64-NEXT: s_endpgm
@@ -3046,9 +3050,8 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
30463050
; GFX11-NEXT: s_cmp_ge_u32 s2, s4
30473051
; GFX11-NEXT: s_cselect_b32 s8, s5, s3
30483052
; GFX11-NEXT: .LBB16_3:
3049-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3050-
; GFX11-NEXT: v_mov_b32_e32 v0, s8
30513053
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s9
3054+
; GFX11-NEXT: v_mov_b32_e32 v0, s8
30523055
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
30533056
; GFX11-NEXT: s_endpgm
30543057
; GFX11-NEXT: .LBB16_4:

llvm/test/CodeGen/AMDGPU/copy-phys-reg-implicit-operand-kills-subregs.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ body: |
1717
; CHECK-NEXT: $vgpr7 = V_MOV_B32_e32 killed $vgpr10, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10
1818
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr7
1919
renamable $vgpr7_vgpr8_vgpr9_vgpr10 = IMPLICIT_DEF
20-
renamable $vgpr7_vgpr8 = COPY killed renamable $vgpr10_vgpr11, 3, implicit killed $vgpr7_vgpr8_vgpr9_vgpr10
20+
renamable $vgpr7_vgpr8 = COPY_LANEMASK killed renamable $vgpr10_vgpr11, lanemask(3), implicit killed $vgpr7_vgpr8_vgpr9_vgpr10
2121
S_ENDPGM 0, implicit $vgpr7
2222
2323
...

0 commit comments

Comments
 (0)