-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[RISCV] Generate Xqcilsm LWMI/SWMI load/store multiple instructions #171079
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-risc-v Author: Sudharsan Veeravalli (svs-quic) ChangesThis patch adds support for generating the Xqcilsm load/store multiple instructions as a part of the RISCVLoadStoreOptimizer pass. For now we only combine two load/store instructions into a load/store multiple. Support for converting more loads/stores will be added in follow-up patches. These instructions are only applicable for 32-bit loads/stores with an alignment of 4-bytes. Full diff: https://github.com/llvm/llvm-project/pull/171079.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
index a22ab6bfc04b8..11fa0febcff1d 100644
--- a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
@@ -114,7 +114,7 @@ bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
ModifiedRegUnits.init(*TRI);
UsedRegUnits.init(*TRI);
- if (Subtarget.useMIPSLoadStorePairs()) {
+ if (Subtarget.useMIPSLoadStorePairs() || Subtarget.hasVendorXqcilsm()) {
for (MachineBasicBlock &MBB : Fn) {
LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
@@ -168,14 +168,85 @@ bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
return false;
}
-// Merge two adjacent load/store instructions into a paired instruction
-// (LDP/SDP/SWP/LWP) if the effective address is 8-byte aligned in case of
-// SWP/LWP 16-byte aligned in case of LDP/SDP. This function selects the
-// appropriate paired opcode, verifies that the memory operand is properly
-// aligned, and checks that the offset is valid. If all conditions are met, it
-// builds and inserts the paired instruction.
+// Merge two adjacent load/store instructions into a paired instruction.
+// This function selects the appropriate paired opcode, verifies that the
+// memory operand is properly aligned, and checks that the offset is valid. If
+// all conditions are met, it builds and inserts the paired instruction.
bool RISCVLoadStoreOpt::tryConvertToLdStPair(
MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second) {
+ MachineFunction *MF = First->getMF();
+ const RISCVSubtarget &STI = MF->getSubtarget<RISCVSubtarget>();
+ const MachineMemOperand *MMO = *First->memoperands_begin();
+ Align MMOAlign = MMO->getAlign();
+
+ // Try converting to QC_LWMI/QC_SWMI if the XQCILSM extension is enabled.
+ if (!STI.is64Bit() && STI.hasVendorXqcilsm()) {
+ unsigned Opc = First->getOpcode();
+ if ((Opc != RISCV::LW && Opc != RISCV::SW) || Second->getOpcode() != Opc)
+ return false;
+
+ // Require simple reg+imm addressing for both.
+ if (!First->getOperand(1).isReg() || !Second->getOperand(1).isReg() ||
+ !First->getOperand(2).isImm() || !Second->getOperand(2).isImm())
+ return false;
+
+ Register Base1 = First->getOperand(1).getReg();
+ Register Base2 = Second->getOperand(1).getReg();
+
+ if (Base1 != Base2)
+ return false;
+
+ if (MMOAlign < Align(4))
+ return false;
+
+ int64_t Off1 = First->getOperand(2).getImm();
+ int64_t Off2 = Second->getOperand(2).getImm();
+ int64_t BaseOff = std::min(Off1, Off2);
+
+ if (!isShiftedUInt<5, 2>(BaseOff) || std::abs(Off1 - Off2) != 4)
+ return false;
+
+ Register StartReg = First->getOperand(0).getReg();
+ Register NextReg = Second->getOperand(0).getReg();
+
+ if (StartReg == RISCV::X0 || NextReg == RISCV::X0)
+ return false;
+
+ // If the base reg gets overwritten by one of the loads then bail out.
+ if (Opc == RISCV::LW && (StartReg == Base1 || NextReg == Base1))
+ return false;
+
+ if (Off2 < Off1)
+ std::swap(StartReg, NextReg);
+
+ if (NextReg != StartReg + 1)
+ return false;
+
+ unsigned XqciOpc = (Opc == RISCV::LW) ? RISCV::QC_LWMI : RISCV::QC_SWMI;
+
+ auto StartRegState = (Opc == RISCV::LW) ? RegState::Define : 0;
+ auto NextRegState =
+ (Opc == RISCV::LW) ? RegState::ImplicitDefine : RegState::Implicit;
+
+ DebugLoc DL =
+ First->getDebugLoc() ? First->getDebugLoc() : Second->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(*MF, DL, TII->get(XqciOpc));
+ MIB.addReg(StartReg, StartRegState)
+ .addReg(Base1)
+ .addImm(2)
+ .addImm(BaseOff)
+ .cloneMergedMemRefs({&*First, &*Second})
+ .addReg(NextReg, NextRegState);
+
+ First->getParent()->insert(First, MIB);
+ First->removeFromParent();
+ Second->removeFromParent();
+
+ return true;
+ }
+
+ // Try converting to SWP/LWP/LDP/SDP.
+ // SWP/LWP requires 8-byte alignment whereas LDP/SDP needs 16-byte alignment.
unsigned PairOpc;
Align RequiredAlignment;
switch (First->getOpcode()) {
@@ -199,10 +270,6 @@ bool RISCVLoadStoreOpt::tryConvertToLdStPair(
break;
}
- MachineFunction *MF = First->getMF();
- const MachineMemOperand *MMO = *First->memoperands_begin();
- Align MMOAlign = MMO->getAlign();
-
if (MMOAlign < RequiredAlignment)
return false;
diff --git a/llvm/test/CodeGen/RISCV/xqcilsm-lwmi-swmi.mir b/llvm/test/CodeGen/RISCV/xqcilsm-lwmi-swmi.mir
new file mode 100644
index 0000000000000..155a1564c392d
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/xqcilsm-lwmi-swmi.mir
@@ -0,0 +1,311 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcilsm -run-pass=riscv-load-store-opt %s -o - | FileCheck %s
+
+--- |
+
+ define void @pair_two_lw_into_qc_lwmi() nounwind { ret void }
+ define void @pair_two_lw_into_qc_lwmi_reversed() nounwind { ret void }
+ define void @pair_two_sw_into_qc_swmi_reversed() nounwind { ret void }
+ define void @no_pair_if_different_base_regs() nounwind { ret void }
+ define void @no_pair_if_alignment_lt_4() nounwind { ret void }
+ define void @pair_two_sw_into_qc_swmi() nounwind { ret void }
+ define void @no_pair_if_misaligned() nounwind { ret void }
+ define void @pair_at_upper_boundary_lw() nounwind { ret void }
+ define void @pair_at_upper_boundary_sw() nounwind { ret void }
+ define void @no_pair_if_offset_out_of_range_lw() nounwind { ret void }
+ define void @no_pair_if_offset_out_of_range_sw() nounwind { ret void }
+ define void @no_pair_if_non_consecutive_regs() nounwind { ret void }
+ define void @no_pair_if_rd_is_x0() nounwind { ret void }
+ define void @no_pair_if_lw_rd_equals_base() nounwind { ret void }
+ define void @pair_if_not_adjacent() nounwind { ret void }
+ define void @pair_if_not_adjacent_use() nounwind { ret void }
+ define void @no_pair_if_not_adjacent_use() nounwind { ret void }
+---
+name: pair_two_lw_into_qc_lwmi
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+ ; CHECK-LABEL: name: pair_two_lw_into_qc_lwmi
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x12 = QC_LWMI $x10, 2, 0, implicit-def $x13 :: (load (s32))
+ ; CHECK-NEXT: PseudoRET
+ $x12 = LW $x10, 0 :: (load (s32), align 4)
+ $x13 = LW $x10, 4 :: (load (s32), align 4)
+ PseudoRET
+
+...
+---
+name: pair_two_lw_into_qc_lwmi_reversed
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+ ; CHECK-LABEL: name: pair_two_lw_into_qc_lwmi_reversed
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x12 = QC_LWMI $x10, 2, 0, implicit-def $x13 :: (load (s32))
+ ; CHECK-NEXT: PseudoRET
+ $x13 = LW $x10, 4 :: (load (s32))
+ $x12 = LW $x10, 0 :: (load (s32))
+ PseudoRET
+
+...
+---
+name: pair_two_sw_into_qc_swmi_reversed
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10, $x12, $x13
+ ; CHECK-LABEL: name: pair_two_sw_into_qc_swmi_reversed
+ ; CHECK: liveins: $x10, $x12, $x13
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: QC_SWMI $x12, $x10, 2, 0, implicit $x13 :: (store (s32))
+ ; CHECK-NEXT: PseudoRET
+ SW killed $x13, $x10, 4 :: (store (s32))
+ SW killed $x12, $x10, 0 :: (store (s32))
+ PseudoRET
+
+...
+---
+name: no_pair_if_different_base_regs
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10, $x11
+ ; CHECK-LABEL: name: no_pair_if_different_base_regs
+ ; CHECK: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x12 = LW $x10, 0 :: (load (s32))
+ ; CHECK-NEXT: $x13 = LW $x11, 4 :: (load (s32))
+ ; CHECK-NEXT: PseudoRET
+ $x12 = LW $x10, 0 :: (load (s32))
+ $x13 = LW $x11, 4 :: (load (s32))
+ PseudoRET
+
+...
+---
+name: no_pair_if_alignment_lt_4
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+ ; CHECK-LABEL: name: no_pair_if_alignment_lt_4
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x12 = LW $x10, 0 :: (load (s32))
+ ; CHECK-NEXT: $x13 = LW $x10, 3 :: (load (s32))
+ ; CHECK-NEXT: PseudoRET
+ $x12 = LW $x10, 0 :: (load (s32))
+ $x13 = LW $x10, 3 :: (load (s32))
+ PseudoRET
+
+...
+---
+name: pair_two_sw_into_qc_swmi
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10, $x12, $x13
+ ; CHECK-LABEL: name: pair_two_sw_into_qc_swmi
+ ; CHECK: liveins: $x10, $x12, $x13
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: QC_SWMI $x12, $x10, 2, 0, implicit $x13 :: (store (s32))
+ ; CHECK-NEXT: PseudoRET
+ SW killed $x12, $x10, 0 :: (store (s32), align 4)
+ SW killed $x13, $x10, 4 :: (store (s32), align 4)
+ PseudoRET
+
+...
+---
+name: no_pair_if_misaligned
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+ ; CHECK-LABEL: name: no_pair_if_misaligned
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x12 = LW $x10, 2 :: (load (s32))
+ ; CHECK-NEXT: $x13 = LW $x10, 6 :: (load (s32))
+ ; CHECK-NEXT: PseudoRET
+ $x12 = LW $x10, 2 :: (load (s32), align 4)
+ $x13 = LW $x10, 6 :: (load (s32), align 4)
+ PseudoRET
+
+...
+---
+name: pair_at_upper_boundary_lw
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+ ; CHECK-LABEL: name: pair_at_upper_boundary_lw
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x12 = QC_LWMI $x10, 2, 124, implicit-def $x13 :: (load (s32))
+ ; CHECK-NEXT: PseudoRET
+ $x12 = LW $x10, 124 :: (load (s32), align 4)
+ $x13 = LW $x10, 128 :: (load (s32), align 4)
+ PseudoRET
+
+...
+---
+name: pair_at_upper_boundary_sw
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10, $x12, $x13
+ ; CHECK-LABEL: name: pair_at_upper_boundary_sw
+ ; CHECK: liveins: $x10, $x12, $x13
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: QC_SWMI $x12, $x10, 2, 124, implicit $x13 :: (store (s32))
+ ; CHECK-NEXT: PseudoRET
+ SW $x12, $x10, 124 :: (store (s32), align 4)
+ SW $x13, $x10, 128 :: (store (s32), align 4)
+ PseudoRET
+
+...
+---
+name: no_pair_if_offset_out_of_range_lw
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+ ; CHECK-LABEL: name: no_pair_if_offset_out_of_range_lw
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x12 = LW $x10, 128 :: (load (s32))
+ ; CHECK-NEXT: $x13 = LW $x10, 132 :: (load (s32))
+ ; CHECK-NEXT: PseudoRET
+ $x12 = LW $x10, 128 :: (load (s32), align 4)
+ $x13 = LW $x10, 132 :: (load (s32), align 4)
+ PseudoRET
+
+...
+---
+name: no_pair_if_offset_out_of_range_sw
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10, $x12, $x13
+ ; CHECK-LABEL: name: no_pair_if_offset_out_of_range_sw
+ ; CHECK: liveins: $x10, $x12, $x13
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: SW $x12, $x10, 128 :: (store (s32))
+ ; CHECK-NEXT: SW $x13, $x10, 132 :: (store (s32))
+ ; CHECK-NEXT: PseudoRET
+ SW $x12, $x10, 128 :: (store (s32), align 4)
+ SW $x13, $x10, 132 :: (store (s32), align 4)
+ PseudoRET
+
+...
+---
+name: no_pair_if_non_consecutive_regs
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+ ; CHECK-LABEL: name: no_pair_if_non_consecutive_regs
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x11 = LW $x10, 0 :: (load (s32))
+ ; CHECK-NEXT: $x13 = LW $x10, 4 :: (load (s32))
+ ; CHECK-NEXT: PseudoRET
+ $x11 = LW $x10, 0 :: (load (s32), align 4)
+ $x13 = LW $x10, 4 :: (load (s32), align 4)
+ PseudoRET
+
+...
+---
+name: no_pair_if_rd_is_x0
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+ ; CHECK-LABEL: name: no_pair_if_rd_is_x0
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x0 = LW $x10, 0 :: (load (s32))
+ ; CHECK-NEXT: $x1 = LW $x10, 4 :: (load (s32))
+ ; CHECK-NEXT: PseudoRET
+ $x0 = LW $x10, 0 :: (load (s32), align 4)
+ $x1 = LW $x10, 4 :: (load (s32), align 4)
+ PseudoRET
+
+...
+---
+name: no_pair_if_lw_rd_equals_base
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+ ; CHECK-LABEL: name: no_pair_if_lw_rd_equals_base
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x10 = LW $x10, 20 :: (load (s32))
+ ; CHECK-NEXT: $x11 = LW $x10, 24 :: (load (s32))
+ ; CHECK-NEXT: PseudoRET
+ $x10 = LW $x10, 20 :: (load (s32), align 4)
+ $x11 = LW $x10, 24 :: (load (s32), align 4)
+ PseudoRET
+
+...
+---
+name: pair_if_not_adjacent
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+ ; CHECK-LABEL: name: pair_if_not_adjacent
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x1 = QC_LWMI $x10, 2, 20, implicit-def $x2 :: (load (s32))
+ ; CHECK-NEXT: $x3 = ADDI $x1, 10
+ ; CHECK-NEXT: PseudoRET
+ $x1 = LW $x10, 20 :: (load (s32), align 4)
+ $x3 = ADDI $x1, 10
+ $x2 = LW $x10, 24 :: (load (s32), align 4)
+ PseudoRET
+
+...
+---
+name: pair_if_not_adjacent_use
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10, $x1, $x2
+ ; CHECK-LABEL: name: pair_if_not_adjacent_use
+ ; CHECK: liveins: $x10, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x2 = ADDI $x2, 10
+ ; CHECK-NEXT: QC_SWMI $x1, $x10, 2, 20, implicit $x2 :: (store (s32))
+ ; CHECK-NEXT: PseudoRET
+ SW $x1, $x10, 20 :: (store (s32), align 4)
+ $x2 = ADDI $x2, 10
+ SW $x2, $x10, 24 :: (store (s32), align 4)
+ PseudoRET
+
+...
+---
+name: no_pair_if_not_adjacent_use
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10, $x2
+ ; CHECK-LABEL: name: no_pair_if_not_adjacent_use
+ ; CHECK: liveins: $x10, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x1 = LW $x10, 20 :: (load (s32))
+ ; CHECK-NEXT: $x1 = ADDI $x1, 10
+ ; CHECK-NEXT: SW $x2, $x10, 40 :: (store (s32))
+ ; CHECK-NEXT: $x2 = LW $x10, 24 :: (load (s32))
+ ; CHECK-NEXT: PseudoRET
+ $x1 = LW $x10, 20 :: (load (s32), align 4)
+ $x1 = ADDI $x1, 10
+ SW $x2, $x10, 40 :: (store (s32), align 4)
+ $x2 = LW $x10, 24 :: (load (s32), align 4)
+ PseudoRET
+
+...
|
| if (Opc == RISCV::LW && (StartReg == Base1 || NextReg == Base1)) | ||
| return false; | ||
|
|
||
| if (Off2 < Off1) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would the std::min and std::abs earlier be simplified if we did this earlier
if (Off2 < Off1) {
std::swap(StartReg, NextReg);
std::swap(Off1, Off2);
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks I've made changes to do the swaps together.
| First->getDebugLoc() ? First->getDebugLoc() : Second->getDebugLoc(); | ||
| MachineInstrBuilder MIB = BuildMI(*MF, DL, TII->get(XqciOpc)); | ||
| MIB.addReg(StartReg, StartRegState) | ||
| .addReg(Base1) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Preserve Kill flags?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've added code to preserve kills flags but code in mergePairedInsns is removing the kill flags incorrectly is some cases. This seems to be happening for the base register. For now, I have added FIXME's in the test cases where the flags get removed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://godbolt.org/z/cKhjMzYPo is an example where even MIPS seems to be dropping the killed flags on the base reg.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This has to do with the code in findMatchingInsn where they set MergeForward based on the following checks
// If the Rt of the second instruction was not modified or used between
// the two instructions and none of the instructions between the second
// and first alias with the second, we can combine the second into the
// first.
if (ModifiedRegUnits.available(MI.getOperand(0).getReg()) &&
!(MI.mayLoad() &&
!UsedRegUnits.available(MI.getOperand(0).getReg())) &&
!mayAlias(MI, MemInsns, AA)) {
MergeForward = false;
return MBBI;
}
// Likewise, if the Rt of the first instruction is not modified or used
// between the two instructions and none of the instructions between the
// first and the second alias with the first, we can combine the first
// into the second.
if (!(MayLoad &&
!UsedRegUnits.available(FirstMI.getOperand(0).getReg())) &&
!mayAlias(FirstMI, MemInsns, AA)) {
if (ModifiedRegUnits.available(FirstMI.getOperand(0).getReg())) {
MergeForward = true;
return MBBI;
}
And then clear the kill flags in mergePairedInsns
if (!MergeForward)
Paired->getOperand(1).setIsKill(false);
| if ((Opc != RISCV::LW && Opc != RISCV::SW) || Second->getOpcode() != Opc) | ||
| return false; | ||
|
|
||
| auto FirstOp1 = First->getOperand(1); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is copying the MachineOperands? Can we use references?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
lenary
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
|
@topperc do you have any other comments? |
topperc
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/162/builds/36909 Here is the relevant piece of the build log for the reference |
This patch adds support for generating the Xqcilsm load/store multiple instructions as a part of the RISCVLoadStoreOptimizer pass. For now we only combine two load/store instructions into a load/store multiple. Support for converting more loads/stores will be added in follow-up patches. These instructions are only applicable for 32-bit loads/stores with an alignment of 4-bytes.