diff --git a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp index a22ab6bfc04b8..f1827dcf174f3 100644 --- a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp @@ -70,6 +70,12 @@ struct RISCVLoadStoreOpt : public MachineFunctionPass { // Convert load/store pairs to single instructions. bool tryConvertToLdStPair(MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second); + bool tryConvertToXqcilsmLdStPair(MachineFunction *MF, + MachineBasicBlock::iterator First, + MachineBasicBlock::iterator Second); + bool tryConvertToMIPSLdStPair(MachineFunction *MF, + MachineBasicBlock::iterator First, + MachineBasicBlock::iterator Second); // Scan the instructions looking for a load/store that can be combined // with the current instruction into a load/store pair. @@ -114,7 +120,7 @@ bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { ModifiedRegUnits.init(*TRI); UsedRegUnits.init(*TRI); - if (Subtarget.useMIPSLoadStorePairs()) { + if (Subtarget.useMIPSLoadStorePairs() || Subtarget.hasVendorXqcilsm()) { for (MachineBasicBlock &MBB : Fn) { LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n"); @@ -168,14 +174,93 @@ bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) { return false; } -// Merge two adjacent load/store instructions into a paired instruction -// (LDP/SDP/SWP/LWP) if the effective address is 8-byte aligned in case of -// SWP/LWP 16-byte aligned in case of LDP/SDP. This function selects the -// appropriate paired opcode, verifies that the memory operand is properly -// aligned, and checks that the offset is valid. If all conditions are met, it -// builds and inserts the paired instruction. -bool RISCVLoadStoreOpt::tryConvertToLdStPair( - MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second) { +bool RISCVLoadStoreOpt::tryConvertToXqcilsmLdStPair( + MachineFunction *MF, MachineBasicBlock::iterator First, + MachineBasicBlock::iterator Second) { + unsigned Opc = First->getOpcode(); + if ((Opc != RISCV::LW && Opc != RISCV::SW) || Second->getOpcode() != Opc) + return false; + + const auto &FirstOp1 = First->getOperand(1); + const auto &SecondOp1 = Second->getOperand(1); + const auto &FirstOp2 = First->getOperand(2); + const auto &SecondOp2 = Second->getOperand(2); + + // Require simple reg+imm addressing for both. + if (!FirstOp1.isReg() || !SecondOp1.isReg() || !FirstOp2.isImm() || + !SecondOp2.isImm()) + return false; + + Register Base1 = FirstOp1.getReg(); + Register Base2 = SecondOp1.getReg(); + + if (Base1 != Base2) + return false; + + const MachineMemOperand *MMO = *First->memoperands_begin(); + Align MMOAlign = MMO->getAlign(); + + if (MMOAlign < Align(4)) + return false; + + auto &FirstOp0 = First->getOperand(0); + auto &SecondOp0 = Second->getOperand(0); + + int64_t Off1 = FirstOp2.getImm(); + int64_t Off2 = SecondOp2.getImm(); + + if (Off2 < Off1) { + std::swap(FirstOp0, SecondOp0); + std::swap(Off1, Off2); + } + + Register StartReg = FirstOp0.getReg(); + Register NextReg = SecondOp0.getReg(); + + if (StartReg == RISCV::X0 || NextReg == RISCV::X0) + return false; + + // If the base reg gets overwritten by one of the loads then bail out. + if (Opc == RISCV::LW && (StartReg == Base1 || NextReg == Base1)) + return false; + + if (!isShiftedUInt<5, 2>(Off1) || (Off2 - Off1 != 4)) + return false; + + if (NextReg != StartReg + 1) + return false; + + unsigned XqciOpc = (Opc == RISCV::LW) ? RISCV::QC_LWMI : RISCV::QC_SWMI; + + auto StartRegState = (Opc == RISCV::LW) ? RegState::Define + : getKillRegState(FirstOp0.isKill()); + auto NextRegState = + (Opc == RISCV::LW) + ? RegState::ImplicitDefine + : (RegState::Implicit | getKillRegState(SecondOp0.isKill())); + + DebugLoc DL = + First->getDebugLoc() ? First->getDebugLoc() : Second->getDebugLoc(); + MachineInstrBuilder MIB = BuildMI(*MF, DL, TII->get(XqciOpc)); + MIB.addReg(StartReg, StartRegState) + .addReg(Base1, getKillRegState(FirstOp1.isKill() || SecondOp1.isKill())) + .addImm(2) + .addImm(Off1) + .cloneMergedMemRefs({&*First, &*Second}) + .addReg(NextReg, NextRegState); + + First->getParent()->insert(First, MIB); + First->removeFromParent(); + Second->removeFromParent(); + + return true; +} + +bool RISCVLoadStoreOpt::tryConvertToMIPSLdStPair( + MachineFunction *MF, MachineBasicBlock::iterator First, + MachineBasicBlock::iterator Second) { + // Try converting to SWP/LWP/LDP/SDP. + // SWP/LWP requires 8-byte alignment whereas LDP/SDP needs 16-byte alignment. unsigned PairOpc; Align RequiredAlignment; switch (First->getOpcode()) { @@ -199,7 +284,6 @@ bool RISCVLoadStoreOpt::tryConvertToLdStPair( break; } - MachineFunction *MF = First->getMF(); const MachineMemOperand *MMO = *First->memoperands_begin(); Align MMOAlign = MMO->getAlign(); @@ -227,6 +311,24 @@ bool RISCVLoadStoreOpt::tryConvertToLdStPair( return true; } +// Merge two adjacent load/store instructions into a paired instruction. +// This function calls the vendor specific implementation that seelects the +// appropriate paired opcode, verifies that the memory operand is properly +// aligned, and checks that the offset is valid. If all conditions are met, it +// builds and inserts the paired instruction. +bool RISCVLoadStoreOpt::tryConvertToLdStPair( + MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second) { + MachineFunction *MF = First->getMF(); + const RISCVSubtarget &STI = MF->getSubtarget(); + + // Try converting to QC_LWMI/QC_SWMI if the XQCILSM extension is enabled. + if (!STI.is64Bit() && STI.hasVendorXqcilsm()) + return tryConvertToXqcilsmLdStPair(MF, First, Second); + + // Else try to convert them into MIPS Paired Loads/Stores. + return tryConvertToMIPSLdStPair(MF, First, Second); +} + static bool mayAlias(MachineInstr &MIa, SmallVectorImpl &MemInsns, AliasAnalysis *AA) { diff --git a/llvm/test/CodeGen/RISCV/xqcilsm-lwmi-swmi.mir b/llvm/test/CodeGen/RISCV/xqcilsm-lwmi-swmi.mir new file mode 100644 index 0000000000000..396f67326a7ca --- /dev/null +++ b/llvm/test/CodeGen/RISCV/xqcilsm-lwmi-swmi.mir @@ -0,0 +1,315 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcilsm -run-pass=riscv-load-store-opt %s -o - | FileCheck %s + +--- | + + define void @pair_two_lw_into_qc_lwmi() nounwind { ret void } + define void @pair_two_lw_into_qc_lwmi_reversed() nounwind { ret void } + define void @pair_two_sw_into_qc_swmi_reversed() nounwind { ret void } + define void @no_pair_if_different_base_regs() nounwind { ret void } + define void @no_pair_if_alignment_lt_4() nounwind { ret void } + define void @pair_two_sw_into_qc_swmi() nounwind { ret void } + define void @no_pair_if_misaligned() nounwind { ret void } + define void @pair_at_upper_boundary_lw() nounwind { ret void } + define void @pair_at_upper_boundary_sw() nounwind { ret void } + define void @no_pair_if_offset_out_of_range_lw() nounwind { ret void } + define void @no_pair_if_offset_out_of_range_sw() nounwind { ret void } + define void @no_pair_if_non_consecutive_regs() nounwind { ret void } + define void @no_pair_if_rd_is_x0() nounwind { ret void } + define void @no_pair_if_lw_rd_equals_base() nounwind { ret void } + define void @pair_if_not_adjacent() nounwind { ret void } + define void @pair_if_not_adjacent_use() nounwind { ret void } + define void @no_pair_if_not_adjacent_use() nounwind { ret void } +--- +name: pair_two_lw_into_qc_lwmi +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10 + ; CHECK-LABEL: name: pair_two_lw_into_qc_lwmi + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x12 = QC_LWMI $x10, 2, 0, implicit-def $x13 :: (load (s32)) + ; CHECK-NEXT: PseudoRET + $x12 = LW $x10, 0 :: (load (s32), align 4) + $x13 = LW $x10, 4 :: (load (s32), align 4) + PseudoRET + +... +--- +# FIXME: Kill flags are not propagated correctly for the base register +name: pair_two_lw_into_qc_lwmi_reversed +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10 + ; CHECK-LABEL: name: pair_two_lw_into_qc_lwmi_reversed + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x12 = QC_LWMI $x10, 2, 0, implicit-def $x13 :: (load (s32)) + ; CHECK-NEXT: PseudoRET + $x13 = LW $x10, 4 :: (load (s32)) + $x12 = LW killed $x10, 0 :: (load (s32)) + PseudoRET + +... +--- +name: pair_two_sw_into_qc_swmi_reversed +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10, $x12, $x13 + ; CHECK-LABEL: name: pair_two_sw_into_qc_swmi_reversed + ; CHECK: liveins: $x10, $x12, $x13 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: QC_SWMI killed $x12, $x10, 2, 0, implicit killed $x13 :: (store (s32)) + ; CHECK-NEXT: PseudoRET + SW killed $x13, $x10, 4 :: (store (s32)) + SW killed $x12, $x10, 0 :: (store (s32)) + PseudoRET + +... +--- +name: no_pair_if_different_base_regs +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10, $x11 + ; CHECK-LABEL: name: no_pair_if_different_base_regs + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x12 = LW $x10, 0 :: (load (s32)) + ; CHECK-NEXT: $x13 = LW $x11, 4 :: (load (s32)) + ; CHECK-NEXT: PseudoRET + $x12 = LW $x10, 0 :: (load (s32)) + $x13 = LW $x11, 4 :: (load (s32)) + PseudoRET + +... +--- +name: no_pair_if_alignment_lt_4 +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10 + ; CHECK-LABEL: name: no_pair_if_alignment_lt_4 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x12 = LW $x10, 0 :: (load (s32)) + ; CHECK-NEXT: $x13 = LW $x10, 3 :: (load (s32)) + ; CHECK-NEXT: PseudoRET + $x12 = LW $x10, 0 :: (load (s32)) + $x13 = LW $x10, 3 :: (load (s32)) + PseudoRET + +... +--- +name: pair_two_sw_into_qc_swmi +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10, $x12, $x13 + ; CHECK-LABEL: name: pair_two_sw_into_qc_swmi + ; CHECK: liveins: $x10, $x12, $x13 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: QC_SWMI killed $x12, $x10, 2, 0, implicit killed $x13 :: (store (s32)) + ; CHECK-NEXT: PseudoRET + SW killed $x12, $x10, 0 :: (store (s32), align 4) + SW killed $x13, $x10, 4 :: (store (s32), align 4) + PseudoRET + +... +--- +name: no_pair_if_misaligned +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10 + ; CHECK-LABEL: name: no_pair_if_misaligned + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x12 = LW $x10, 2 :: (load (s32)) + ; CHECK-NEXT: $x13 = LW $x10, 6 :: (load (s32)) + ; CHECK-NEXT: PseudoRET + $x12 = LW $x10, 2 :: (load (s32), align 4) + $x13 = LW $x10, 6 :: (load (s32), align 4) + PseudoRET + +... +--- +# FIXME: Kill flags are not propagated correctly for the base register +name: pair_at_upper_boundary_lw +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10 + ; CHECK-LABEL: name: pair_at_upper_boundary_lw + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x12 = QC_LWMI $x10, 2, 124, implicit-def $x13 :: (load (s32)) + ; CHECK-NEXT: PseudoRET + $x12 = LW $x10, 124 :: (load (s32), align 4) + $x13 = LW killed $x10, 128 :: (load (s32), align 4) + PseudoRET + +... +--- +# FIXME: Kill flags are not propagated correctly for the base register +name: pair_at_upper_boundary_sw +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10, $x12, $x13 + ; CHECK-LABEL: name: pair_at_upper_boundary_sw + ; CHECK: liveins: $x10, $x12, $x13 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: QC_SWMI $x12, $x10, 2, 124, implicit $x13 :: (store (s32)) + ; CHECK-NEXT: PseudoRET + SW $x12, $x10, 124 :: (store (s32), align 4) + SW $x13, killed $x10, 128 :: (store (s32), align 4) + PseudoRET + +... +--- +name: no_pair_if_offset_out_of_range_lw +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10 + ; CHECK-LABEL: name: no_pair_if_offset_out_of_range_lw + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x12 = LW $x10, 128 :: (load (s32)) + ; CHECK-NEXT: $x13 = LW $x10, 132 :: (load (s32)) + ; CHECK-NEXT: PseudoRET + $x12 = LW $x10, 128 :: (load (s32), align 4) + $x13 = LW $x10, 132 :: (load (s32), align 4) + PseudoRET + +... +--- +name: no_pair_if_offset_out_of_range_sw +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10, $x12, $x13 + ; CHECK-LABEL: name: no_pair_if_offset_out_of_range_sw + ; CHECK: liveins: $x10, $x12, $x13 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: SW $x12, $x10, 128 :: (store (s32)) + ; CHECK-NEXT: SW $x13, $x10, 132 :: (store (s32)) + ; CHECK-NEXT: PseudoRET + SW $x12, $x10, 128 :: (store (s32), align 4) + SW $x13, $x10, 132 :: (store (s32), align 4) + PseudoRET + +... +--- +name: no_pair_if_non_consecutive_regs +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10 + ; CHECK-LABEL: name: no_pair_if_non_consecutive_regs + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x11 = LW $x10, 0 :: (load (s32)) + ; CHECK-NEXT: $x13 = LW $x10, 4 :: (load (s32)) + ; CHECK-NEXT: PseudoRET + $x11 = LW $x10, 0 :: (load (s32), align 4) + $x13 = LW $x10, 4 :: (load (s32), align 4) + PseudoRET + +... +--- +name: no_pair_if_rd_is_x0 +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10 + ; CHECK-LABEL: name: no_pair_if_rd_is_x0 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x0 = LW $x10, 0 :: (load (s32)) + ; CHECK-NEXT: $x1 = LW $x10, 4 :: (load (s32)) + ; CHECK-NEXT: PseudoRET + $x0 = LW $x10, 0 :: (load (s32), align 4) + $x1 = LW $x10, 4 :: (load (s32), align 4) + PseudoRET + +... +--- +name: no_pair_if_lw_rd_equals_base +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10 + ; CHECK-LABEL: name: no_pair_if_lw_rd_equals_base + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x10 = LW $x10, 20 :: (load (s32)) + ; CHECK-NEXT: $x11 = LW $x10, 24 :: (load (s32)) + ; CHECK-NEXT: PseudoRET + $x10 = LW $x10, 20 :: (load (s32), align 4) + $x11 = LW $x10, 24 :: (load (s32), align 4) + PseudoRET + +... +--- +# FIXME: Kill flags are not propagated correctly for the base register +name: pair_if_not_adjacent +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10 + ; CHECK-LABEL: name: pair_if_not_adjacent + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x1 = QC_LWMI $x10, 2, 20, implicit-def $x2 :: (load (s32)) + ; CHECK-NEXT: $x3 = ADDI $x1, 10 + ; CHECK-NEXT: PseudoRET + $x1 = LW $x10, 20 :: (load (s32), align 4) + $x3 = ADDI $x1, 10 + $x2 = LW killed $x10, 24 :: (load (s32), align 4) + PseudoRET + +... +--- +name: pair_if_not_adjacent_use +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10, $x1, $x2 + ; CHECK-LABEL: name: pair_if_not_adjacent_use + ; CHECK: liveins: $x10, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x2 = ADDI $x2, 10 + ; CHECK-NEXT: QC_SWMI $x1, $x10, 2, 20, implicit $x2 :: (store (s32)) + ; CHECK-NEXT: PseudoRET + SW $x1, $x10, 20 :: (store (s32), align 4) + $x2 = ADDI $x2, 10 + SW $x2, $x10, 24 :: (store (s32), align 4) + PseudoRET + +... +--- +name: no_pair_if_not_adjacent_use +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10, $x2 + ; CHECK-LABEL: name: no_pair_if_not_adjacent_use + ; CHECK: liveins: $x10, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x1 = LW $x10, 20 :: (load (s32)) + ; CHECK-NEXT: $x1 = ADDI $x1, 10 + ; CHECK-NEXT: SW $x2, $x10, 40 :: (store (s32)) + ; CHECK-NEXT: $x2 = LW $x10, 24 :: (load (s32)) + ; CHECK-NEXT: PseudoRET + $x1 = LW $x10, 20 :: (load (s32), align 4) + $x1 = ADDI $x1, 10 + SW $x2, $x10, 40 :: (store (s32), align 4) + $x2 = LW $x10, 24 :: (load (s32), align 4) + PseudoRET + +...