diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml
index 4c2c066994..f78249bcaa 100644
--- a/.github/workflows/build-and-test.yaml
+++ b/.github/workflows/build-and-test.yaml
@@ -366,6 +366,19 @@ jobs:
           arch: "s390x"
           library-arch: s390x-linux-gnu
 
+        # riscv32-ilp32 build
+        - os: "ubuntu-24.04"
+          cc: "riscv32-unknown-linux-gnu-gcc"
+          cxx: "riscv32-unknown-linux-gnu-g++"
+          cflags: "-O2"
+          otp: "28"
+          elixir_version: "1.17"
+          rebar3_version: "3.24.0"
+          cmake_opts_other: "-DAVM_WARNINGS_ARE_ERRORS=ON -DCMAKE_TOOLCHAIN_FILE=${RUNNER_TEMP}/riscv32_ilp32_toolchain.cmake"
+          compiler_pkgs: "qemu-user qemu-user-binfmt binfmt-support"
+          arch: "riscv32"
+          library-arch: riscv32-linux-gnu-ilp32
+
     env:
       ImageOS: ${{ matrix.container == 'ubuntu:20.04' && 'ubuntu20' || matrix.os == 'ubuntu-20.04' && 'ubuntu20' || matrix.os == 'ubuntu-22.04' && 'ubuntu22' || matrix.os == 'ubuntu-24.04' && 'ubuntu24' || 'ubuntu24' }}
       CC: ${{ matrix.cc }}
@@ -386,7 +399,7 @@ jobs:
       run: sudo dpkg --add-architecture i386
 
     - name: "Setup cross compilation architecture"
-      if: matrix.library-arch != ''
+      if: matrix.library-arch != '' && matrix.library-arch != 'riscv32-linux-gnu-ilp32'
       run: |
         sudo dpkg --add-architecture ${{ matrix.arch }}
         cat > ${RUNNER_TEMP}/cross-compile-sources.list <<EOF
@@ -411,6 +424,97 @@ jobs:
         set(MBEDTLS_LIBRARIES_DIR /usr/lib/${{ matrix.library-arch }})
         EOF
 
+    - name: "Setup cross compilation architecture (riscv32)"
+      if: matrix.library-arch == 'riscv32-linux-gnu-ilp32'
+      run: |
+        sudo dpkg --add-architecture ${{ matrix.arch }}
+
+        # Download toolchain and libraries from release
+        gh release download riscv-toolchain-2025.10.18 \
+          -R pguyot/crossbuild-essential-riscv32 \
+          --pattern 'riscv32-gnu-toolchain-ilp32_2025.10.18_amd64.deb' \
+          --pattern 'libc6-ilp32_2.39-0ubuntu1_riscv32.deb' \
+          --pattern 'libc6-dev-ilp32_2.39-0ubuntu1_riscv32.deb' \
+          --pattern 'libc6-dbg-ilp32_2.39-0ubuntu1_riscv32.deb' \
+          --pattern 'zlib1g-ilp32_1.3.1-0ubuntu1_riscv32.deb' \
+          --pattern 'zlib1g-dev-ilp32_1.3.1-0ubuntu1_riscv32.deb' \
+          --pattern 'libmbedcrypto7-ilp32_2.28.8-0ubuntu1_riscv32.deb' \
+          --pattern 'libmbedtls-dev-ilp32_2.28.8-0ubuntu1_riscv32.deb' \
+          --pattern 'libmbedtls14-ilp32_2.28.8-0ubuntu1_riscv32.deb' \
+          --pattern 'libmbedx509-1-ilp32_2.28.8-0ubuntu1_riscv32.deb'
+
+        # Install the toolchain
+        sudo dpkg -i riscv32-gnu-toolchain-ilp32_2025.10.18_amd64.deb
+
+        # Add to PATH for all subsequent steps
+        echo "/opt/riscv32-ilp32/bin" >> $GITHUB_PATH
+
+        # Install the libs
+        sudo dpkg -i libc6-ilp32_2.39-0ubuntu1_riscv32.deb
+        sudo dpkg -i libc6-dev-ilp32_2.39-0ubuntu1_riscv32.deb
+        sudo dpkg -i libc6-dbg-ilp32_2.39-0ubuntu1_riscv32.deb
+
+        sudo dpkg -i zlib1g-ilp32_1.3.1-0ubuntu1_riscv32.deb
+        sudo dpkg -i zlib1g-dev-ilp32_1.3.1-0ubuntu1_riscv32.deb
+
+        # Install mbedtls runtime packages first (in dependency order)
+        sudo dpkg -i libmbedcrypto7-ilp32_2.28.8-0ubuntu1_riscv32.deb
+        sudo dpkg -i libmbedx509-1-ilp32_2.28.8-0ubuntu1_riscv32.deb
+        sudo dpkg -i libmbedtls14-ilp32_2.28.8-0ubuntu1_riscv32.deb
+        # Then install the dev package
+        sudo dpkg -i libmbedtls-dev-ilp32_2.28.8-0ubuntu1_riscv32.deb
+
+        sudo sed -i '/Types: deb/a Architectures: amd64' /etc/apt/sources.list.d/ubuntu.sources
+
+        cat > ${RUNNER_TEMP}/riscv32_ilp32_toolchain.cmake <<'EOF'
+        # Toolchain file for RISC-V32 ILP32 (RV32-IMAC) cross-compilation
+        set(CMAKE_SYSTEM_NAME Linux)
+        set(CMAKE_SYSTEM_PROCESSOR riscv32)
+        set(CMAKE_C_LIBRARY_ARCHITECTURE riscv32-linux-gnu-ilp32)
+
+        # Specify the cross compiler
+        set(CMAKE_C_COMPILER riscv32-unknown-linux-gnu-gcc)
+        set(CMAKE_CXX_COMPILER riscv32-unknown-linux-gnu-g++)
+
+        # Specify the target architecture
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=rv32imac -mabi=ilp32" CACHE STRING "" FORCE)
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=rv32imac -mabi=ilp32" CACHE STRING "" FORCE)
+
+        # Set up paths for cross-compiled libraries
+        set(ZLIB_LIBRARY /usr/lib/riscv32-linux-gnu-ilp32/libz.so CACHE FILEPATH "")
+        set(ZLIB_INCLUDE_DIR /usr/include/riscv32-linux-gnu CACHE PATH "")
+        set(ZLIB_FOUND TRUE CACHE BOOL "")
+
+        # MbedTLS configuration
+        set(MBEDTLS_ROOT_DIR /usr)
+        set(MBEDTLS_LIBRARIES_DIR /usr/lib/riscv32-linux-gnu-ilp32)
+
+        # Add cross-compilation include path to compiler flags
+        include_directories(SYSTEM /usr/include/riscv32-linux-gnu)
+
+        # Search for programs in the build host directories
+        set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+
+        # Search for libraries and headers in the target directories
+        set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+        set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+        set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
+        EOF
+
+        # Set up qemu-user binfmt to find libraries
+        sudo ln -s /opt/riscv32-ilp32/sysroot/lib/ld-linux-riscv32-ilp32.so.1 /lib/ld-linux-riscv32-ilp32.so.1
+        sudo mkdir -p /usr/gnemul
+        sudo ln -s /opt/riscv32-ilp32/sysroot /usr/gnemul/qemu-riscv32
+
+        # Copy cross-compiled libraries to sysroot for qemu-user
+        sudo cp /usr/lib/${{ matrix.library-arch }}/libz.so.1* /opt/riscv32-ilp32/sysroot/lib/
+        sudo cp /usr/lib/${{ matrix.library-arch }}/libmbedtls.so.14 /opt/riscv32-ilp32/sysroot/lib/
+        sudo cp /usr/lib/${{ matrix.library-arch }}/libmbedcrypto.so.7 /opt/riscv32-ilp32/sysroot/lib/
+        sudo cp /usr/lib/${{ matrix.library-arch }}/libmbedx509.so.1 /opt/riscv32-ilp32/sysroot/lib/
+
+      env:
+        GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
     - name: "APT update"
       run: sudo apt update -y
 
@@ -526,6 +630,19 @@ jobs:
         ulimit -c unlimited
         ./tests/test-heap
 
+    - name: "Test: test-jit_stream_flash with valgrind"
+      if: matrix.library-arch == ''
+      working-directory: build
+      run: |
+        ulimit -c unlimited
+        valgrind --error-exitcode=1 ./tests/test-jit_stream_flash
+
+    - name: "Test: test-jit_stream_flash"
+      working-directory: build
+      run: |
+        ulimit -c unlimited
+        ./tests/test-jit_stream_flash
+
     - name: "Test: test-mailbox with valgrind"
       if: matrix.library-arch == ''
       working-directory: build
diff --git a/.github/workflows/pico-build.yaml b/.github/workflows/pico-build.yaml
index 9cf01d045a..c5ce30f371 100644
--- a/.github/workflows/pico-build.yaml
+++ b/.github/workflows/pico-build.yaml
@@ -41,7 +41,17 @@ jobs:
     strategy:
       matrix:
         board: ["pico", "pico_w", "pico2"]
+        platform: [""]
         language: ["cpp"]
+        jit: ["", "-DAVM_DISABLE_JIT=OFF"]
+        include:
+          - board: "pico2"
+            platform: "-DPICO_PLATFORM=rp2350-riscv"
+            jit: ""
+
+          - board: "pico2"
+            platform: "-DPICO_PLATFORM=rp2350-riscv"
+            jit: "-DAVM_DISABLE_JIT=OFF"
 
     steps:
     - name: Checkout repo
@@ -57,6 +67,16 @@ jobs:
             libnewlib-arm-none-eabi libstdc++-arm-none-eabi-newlib \
             erlang-base erlang-dev erlang-dialyzer erlang-eunit rebar3
 
+    - name: Install riscv32 toolchain
+      if: matrix.platform == '-DPICO_PLATFORM=rp2350-riscv'
+      run: |
+        sudo mkdir -p /opt
+        cd /opt
+        sudo wget https://github.com/raspberrypi/pico-sdk-tools/releases/download/v2.2.0-3/riscv-toolchain-15-x86_64-lin.tar.gz
+        sudo tar xzf riscv-toolchain-15-x86_64-lin.tar.gz
+        ls /opt
+        echo "/opt/riscv-toolchain-15-x86_64-lin/bin" >> $GITHUB_PATH
+
     - name: "Git config safe.directory for codeql"
       run: git config --global --add safe.directory /__w/AtomVM/AtomVM
 
@@ -74,7 +94,7 @@ jobs:
         set -euo pipefail
         mkdir build
         cd build
-        cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }}
+        cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} ${{ matrix.platform }} ${{ matrix.jit }}
         ninja
 
     - name: "Perform CodeQL Analysis"
@@ -97,7 +117,7 @@ jobs:
         mkdir build.nosmp
         cd build.nosmp
         # TODO: fix all warnings and enable -DAVM_WARNINGS_ARE_ERRORS=ON
-        cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} -DAVM_DISABLE_SMP=1
+        cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} ${{ matrix.jit }} -DAVM_DISABLE_SMP=1
         cmake --build . --target=rp2_tests
 
     - name: Run tests with rp2040js
@@ -112,7 +132,7 @@ jobs:
         npx tsx run-tests.ts ../build.nosmp/tests/rp2_tests.uf2 ../build.nosmp/tests/test_erl_sources/rp2_test_modules.uf2
 
     - name: Build atomvmlib.uf2
-      if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w'
+      if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' && matrix.platform == '' && matrix.jit == ''
       shell: bash
       run: |
         set -euo pipefail
@@ -122,7 +142,7 @@ jobs:
         make atomvmlib-${{ matrix.board }}.uf2
 
     - name: Rename AtomVM and write sha256sum
-      if: startsWith(github.ref, 'refs/tags/')
+      if: startsWith(github.ref, 'refs/tags/') && matrix.platform == '' && matrix.jit == ''
       shell: bash
       run: |
         pushd src/platforms/rp2/build
@@ -137,7 +157,7 @@ jobs:
         popd
 
     - name: Rename atomvmlib and write sha256sum
-      if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w'
+      if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' && matrix.platform == '' && matrix.jit == ''
       shell: bash
       run: |
         pushd build/libs
@@ -148,7 +168,7 @@ jobs:
 
     - name: Release (Pico & Pico2)
       uses: softprops/action-gh-release@v1
-      if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w'
+      if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' && matrix.platform == '' && matrix.jit == ''
       with:
         draft: true
         fail_on_unmatched_files: true
@@ -160,7 +180,7 @@ jobs:
 
     - name: Release (PicoW)
       uses: softprops/action-gh-release@v1
-      if: startsWith(github.ref, 'refs/tags/') && matrix.board == 'pico_w'
+      if: startsWith(github.ref, 'refs/tags/') && matrix.board == 'pico_w' && matrix.platform == '' && matrix.jit == ''
       with:
         draft: true
         fail_on_unmatched_files: true
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 32484ee851..307917422f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -64,7 +64,7 @@ if (NOT AVM_DISABLE_JIT AND NOT DEFINED AVM_JIT_TARGET_ARCH)
     endif()
 endif()
 
-set(AVM_PRECOMPILED_TARGETS "x86_64;aarch64;armv6m;armv6m+float32" CACHE STRING "Targets to precompile code to if AVM_DISABLE_JIT is OFF or AVM_ENABLE_PRECOMPILED is ON")
+set(AVM_PRECOMPILED_TARGETS "x86_64;aarch64;armv6m;armv6m+float32;riscv32" CACHE STRING "Targets to precompile code to if AVM_DISABLE_JIT is OFF or AVM_ENABLE_PRECOMPILED is ON")
 
 if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") OR
    (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") OR
diff --git a/doc/src/atomvm-internals.md b/doc/src/atomvm-internals.md
index 60e2919b02..4d30e6767d 100644
--- a/doc/src/atomvm-internals.md
+++ b/doc/src/atomvm-internals.md
@@ -137,7 +137,7 @@ Following BEAM, there are two flavors of the emulator: jit and emu, but eventual
 - Native: the VM only runs native code and all code must be precompiled on the desktop using the JIT compiler (which effectively is a AOT or Ahead-of-Time compiler). In this mode, it is not necessary to bundle the jit compiler on the embedded target.
 - Hybrid: the VM can run native code as well as emulated BEAM code and some code is precompiled on the desktop.
 
-JIT is available on some platforms (currently only x86_64 and aarch64) and compiles Erlang bytecode at runtime. Erlang bytecode is never interpreted. EMU is available on all platforms and Erlang bytecode is interpreted.
+JIT is available on some platforms (currently x86_64, aarch64, armv6m and riscv32) and compiles Erlang bytecode at runtime. Erlang bytecode is never interpreted. EMU is available on all platforms and Erlang bytecode is interpreted.
 
 Modules can include precompiled code in a dedicated beam chunk with name 'avmN'. The chunk can contain native code for several architectures, however it may only contain native code for a given version of the native interface. Current version is 1. This native code is executed by the jit-flavor of the emulator as well as the emu flavor if execution of precompiled is enabled.
 
@@ -154,9 +154,37 @@ The JIT compiler is written in Erlang and is therefore precompiled. When a proce
 
 JIT compiler is composed of two main interfaces : backend and stream.
 
-A backend implementation is required for each architecture. The backend is called by jit module as it translates bytecodes to machine code. The current implementations are `jit_x86_64` and `jit_aarch64` which are suitable for systems with System V X86 64 ABI or AArch64 ABI.
+A backend implementation is required for each architecture. The backend is called by jit module as it translates bytecodes to machine code. The current implementations are :
+- `jit_x86_64` for System V X86 64 ABI
+- `jit_aarch64` for AArch64 ABI
+- `jit_armv6m` for AArch32 ABI
+- `jit_riscv32` for rv32imc ilp32 ABI.
 
-A stream implementation is responsible for streaming the machine code, especially in the context of low memory. Two implementations currently exist: `jit_stream_binary` that streams assembly code to an Erlang binary, suitable for tests and precompilation on the desktop, and `jit_stream_mmap` that streams assembly code in an `mmap(2)` allocated page, suitable for JIT compilation on Unix.
+A stream implementation is responsible for streaming the machine code, especially in the context of low memory. Three implementations currently exist:
+- `jit_stream_binary` that streams assembly code to an Erlang binary, suitable for tests and precompilation on the desktop
+- `jit_stream_mmap` that streams assembly code in an `mmap(2)` allocated page, suitable for JIT compilation on Unix
+- `jit_stream_flash` available on Pico that allows for embedded JIT.
+
+### Embedded JIT and Native
+
+On embedded devices, Native mode means the code is precompiled on the desktop and executed natively on the device. This currently works on all ARMv6M devices (Pico and STM32).
+
+The default partition scheme on all platforms is optimized for the Emulated VM which is larger than the JIT or Native VM, and for the Emulated atomvmlib (with no native code for estdlib and no jit library) which is smaller than the JIT atomvmlib (that includes native code for estdlib and jit library).
+
+JIT mode means the Erlang bytecode is compiled to native code directly on the device. This actually is possible on Raspberry Pi Pico by using the flash to store the native code. The first time the code is executed, it is compiled and streamed to flash, and for next runs (including at a future boot), the native code is directly executed.
+
+To achive embedded JIT, it is required to flash the device with the JIT compiler for armv6m which is part of the jit library. This library is quite large, so for Pico boards that come with 2MB of flash, it is required to remove jit modules for other backends. It is also required to change the way code is partitioned.
+
+For example, it is possible to have the following offsets defined in `src/platforms/rp2/src/main.c`:
+
+```
+#define LIB_AVM ((void *) 0x10060000)
+#define MAIN_AVM ((void *) 0x101B0000)
+```
+
+To fit in the lib partition, all networking modules should also be removed (the Pico doesn't have any networking capacity).
+
+After the first run, compiled modules in flash are used unless there is a version mismatch or the application avm or the library avm have been updated on the device. AVM packages end with a section called "end" (0x656E64). When the JIT compiler flashes native code, it changes this name to "END" (0x454E44), by effectively clearing 3 bits in the flash, which is possible without erasing any flash block. Any rewrite of these avm packages will overwrite the section names to "end".
 
 ## The Scheduler
 
diff --git a/libs/estdlib/src/code_server.erl b/libs/estdlib/src/code_server.erl
index 427d5fa529..8d20574cf6 100644
--- a/libs/estdlib/src/code_server.erl
+++ b/libs/estdlib/src/code_server.erl
@@ -39,6 +39,7 @@
     atom_resolver/2,
     literal_resolver/2,
     type_resolver/2,
+    import_resolver/2,
     set_native_code/3
 ]).
 
@@ -135,6 +136,14 @@ literal_resolver(_Module, _Index) ->
 type_resolver(_Module, _Index) ->
     erlang:nif_error(undefined).
 
+%% @doc Get an imported function triplet from its index
+%% @return The imported function as {Module, Function, Arity}
+%% @param Module module to get the imported function from
+%% @param Index imported function index in the module
+-spec import_resolver(Module :: module(), Index :: non_neg_integer()) -> {atom(), atom(), non_neg_integer()}.
+import_resolver(_Module, _Index) ->
+    erlang:nif_error(undefined).
+
 %% @doc Associate a native code stream with a module
 %% @return ok
 %% @param Module module to set the native code of
@@ -152,7 +161,7 @@ set_native_code(_Module, _LabelsCount, _Stream) ->
 load(Module) ->
     case erlang:system_info(emu_flavor) of
         jit ->
-            % atomvm_heap_growth, fibonacci divides compilation time by two
+            % atomvm_heap_growth, fibonacci reduces compilation time
             {Pid, Ref} = spawn_opt(
                 fun() ->
                     try
@@ -164,18 +173,23 @@ load(Module) ->
                             code_server:literal_resolver(Module, Index)
                         end,
                         TypeResolver = fun(Index) -> code_server:type_resolver(Module, Index) end,
-                        Stream0 = jit:stream(jit_mmap_size(byte_size(Code))),
-                        {BackendModule, BackendState0} = jit:backend(Stream0),
+                        ImportResolver = fun(Index) ->
+                            code_server:import_resolver(Module, Index)
+                        end,
+                        {StreamModule, Stream0} = jit:stream(jit_mmap_size(byte_size(Code))),
+                        {BackendModule, BackendState0} = jit:backend(StreamModule, Stream0),
                         {LabelsCount, BackendState1} = jit:compile(
                             Code,
                             AtomResolver,
                             LiteralResolver,
                             TypeResolver,
+                            ImportResolver,
                             BackendModule,
                             BackendState0
                         ),
                         Stream1 = BackendModule:stream(BackendState1),
-                        code_server:set_native_code(Module, LabelsCount, Stream1),
+                        Stream2 = StreamModule:flush(Stream1),
+                        code_server:set_native_code(Module, LabelsCount, Stream2),
                         End = erlang:system_time(millisecond),
                         io:format("~B ms (bytecode: ~B bytes, native code: ~B bytes)\n", [
                             End - Start, byte_size(Code), BackendModule:offset(BackendState1)
diff --git a/libs/jit/include/jit.hrl b/libs/jit/include/jit.hrl
index b006c5f34f..81ff1c42c2 100644
--- a/libs/jit/include/jit.hrl
+++ b/libs/jit/include/jit.hrl
@@ -23,6 +23,7 @@
 -define(JIT_ARCH_X86_64, 1).
 -define(JIT_ARCH_AARCH64, 2).
 -define(JIT_ARCH_ARMV6M, 3).
+-define(JIT_ARCH_RISCV32, 4).
 
 -define(JIT_VARIANT_PIC, 1).
 -define(JIT_VARIANT_FLOAT32, 2).
diff --git a/libs/jit/src/CMakeLists.txt b/libs/jit/src/CMakeLists.txt
index 7aad016575..ae62643c30 100644
--- a/libs/jit/src/CMakeLists.txt
+++ b/libs/jit/src/CMakeLists.txt
@@ -31,6 +31,8 @@ set(ERLANG_MODULES
     jit_aarch64_asm
     jit_armv6m
     jit_armv6m_asm
+    jit_riscv32
+    jit_riscv32_asm
     jit_x86_64
     jit_x86_64_asm
 )
diff --git a/libs/jit/src/jit.erl b/libs/jit/src/jit.erl
index 1992bf5841..0a5d4689b1 100644
--- a/libs/jit/src/jit.erl
+++ b/libs/jit/src/jit.erl
@@ -22,9 +22,10 @@
 
 -export([
     stream/1,
-    backend/1,
+    backend/2,
     beam_chunk_header/3,
-    compile/6
+    compile/7,
+    decode_value64/1
 ]).
 
 % NIFs
@@ -100,7 +101,9 @@
     labels_count :: pos_integer(),
     atom_resolver :: fun((integer()) -> atom()),
     literal_resolver :: fun((integer()) -> any()),
-    type_resolver :: fun((integer()) -> any())
+    type_resolver :: fun((integer()) -> any()),
+    import_resolver :: fun((integer()) -> {atom(), atom(), non_neg_integer()}),
+    tail_cache :: [{tuple(), non_neg_integer()}]
 }).
 
 -type stream() :: any().
@@ -113,6 +116,14 @@
 -define(ASSERT_ALL_NATIVE_FREE(St), ok).
 -define(ASSERT(Expr), ok).
 
+%-define(JIT_INSTRUMENT, true).
+
+-ifdef(JIT_INSTRUMENT).
+-define(INSTRUMENT(Tag, State, MSt), instrument(Tag, State, MSt)).
+-else.
+-define(INSTRUMENT(Tag, State, MSt), ok).
+-endif.
+
 %%-----------------------------------------------------------------------------
 %% @param   LabelsCount number of labels
 %% @param   Arch code for the architecture
@@ -133,30 +144,40 @@ compile(
     AtomResolver,
     LiteralResolver,
     TypeResolver,
+    ImportResolver,
     MMod,
     MSt0
 ) when OpcodeMax =< ?OPCODE_MAX ->
-    MSt1 = MMod:jump_table(MSt0, LabelsCount),
     State0 = #state{
         line_offsets = [],
         labels_count = LabelsCount,
         atom_resolver = AtomResolver,
         literal_resolver = LiteralResolver,
-        type_resolver = TypeResolver
+        type_resolver = TypeResolver,
+        import_resolver = ImportResolver,
+        tail_cache = []
     },
+    ?INSTRUMENT("compile_start", State0, MSt0),
+    MSt1 = MMod:jump_table(MSt0, LabelsCount),
+    ?INSTRUMENT("after_jump_table", State0, MSt1),
     {State1, MSt2} = first_pass(Opcodes, MMod, MSt1, State0),
+    ?INSTRUMENT("after_first_pass", State1, MSt2),
     MSt3 = second_pass(MMod, MSt2, State1),
-    {LabelsCount, MSt3};
+    ?INSTRUMENT("after_second_pass", State1, MSt3),
+    MSt4 = MMod:flush(MSt3),
+    ?INSTRUMENT("after_flush", State1, MSt4),
+    {LabelsCount, MSt4};
 compile(
     <<16:32, 0:32, OpcodeMax:32, _LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>>,
     _AtomResolver,
     _LiteralResolver,
     _TypeResolver,
+    _ImportResolver,
     _MMod,
     _MSt
 ) ->
     error(badarg, [OpcodeMax]);
-compile(CodeChunk, _AtomResolver, _LiteralResolver, _TypeResolver, _MMod, _MSt) ->
+compile(CodeChunk, _AtomResolver, _LiteralResolver, _TypeResolver, _ImportResolver, _MMod, _MSt) ->
     error(badarg, [CodeChunk]).
 
 % 1
@@ -170,18 +191,30 @@ first_pass(
     ?ASSERT_ALL_NATIVE_FREE(MSt1),
     first_pass(Rest1, MMod, MSt1, State0);
 % 2
-first_pass(<<?OP_FUNC_INFO, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_FUNC_INFO, Rest0/binary>>, MMod, MSt0, #state{tail_cache = TC} = State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {_ModuleAtom, Rest1} = decode_atom(Rest0),
     {_FunctionName, Rest2} = decode_atom(Rest1),
     {_Arity, Rest3} = decode_literal(Rest2),
     ?TRACE("OP_FUNC_INFO ~p, ~p, ~p\n", [_ModuleAtom, _FunctionName, _Arity]),
-    % Implement function clause at the previous label. (TODO: optimize it out to save space)
-    MSt1 = MMod:call_primitive_last(MSt0, ?PRIM_RAISE_ERROR, [
-        ctx, jit_state, offset, ?FUNCTION_CLAUSE_ATOM
-    ]),
-    ?ASSERT_ALL_NATIVE_FREE(MSt1),
-    first_pass(Rest3, MMod, MSt1, State0);
+    % Implement function clause at the previous label.
+    Offset = MMod:offset(MSt0),
+    {MSt1, OffsetReg} = MMod:move_to_native_register(MSt0, Offset),
+    TailCacheKey = {call_primitive_last, ?PRIM_RAISE_ERROR, [OffsetReg, ?FUNCTION_CLAUSE_ATOM]},
+    State1 =
+        case lists:keyfind(TailCacheKey, 1, TC) of
+            false ->
+                MSt3 = MMod:call_primitive_last(MSt1, ?PRIM_RAISE_ERROR, [
+                    ctx, jit_state, {free, OffsetReg}, ?FUNCTION_CLAUSE_ATOM
+                ]),
+                State0#state{tail_cache = [{TailCacheKey, Offset} | TC]};
+            {TailCacheKey, CacheOffset} ->
+                MSt2 = MMod:jump_to_offset(MSt1, CacheOffset),
+                MSt3 = MMod:free_native_registers(MSt2, [OffsetReg]),
+                State0
+        end,
+    ?ASSERT_ALL_NATIVE_FREE(MSt3),
+    first_pass(Rest3, MMod, MSt3, State1);
 % 3
 first_pass(
     <<?OP_INT_CALL_END>>, MMod, MSt0, #state{labels_count = LabelsCount} = State
@@ -203,26 +236,56 @@ first_pass(<<?OP_CALL, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt1),
     first_pass(Rest2, MMod, MSt1, State0);
 % 5
-first_pass(<<?OP_CALL_LAST, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_CALL_LAST, Rest0/binary>>, MMod, MSt0, #state{tail_cache = TC} = State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {_Arity, Rest1} = decode_literal(Rest0),
     {Label, Rest2} = decode_label(Rest1),
     {NWords, Rest3} = decode_literal(Rest2),
     ?TRACE("OP_CALL_LAST ~p, ~p, ~p\n", [_Arity, Label, NWords]),
-    MSt1 = MMod:move_to_cp(MSt0, {y_reg, NWords}),
-    MSt2 = MMod:increment_sp(MSt1, NWords + 1),
-    MSt3 = MMod:call_only_or_schedule_next(MSt2, Label),
+    TailCacheKey0 = {op_call_last, NWords, Label},
+    case lists:keyfind(TailCacheKey0, 1, TC) of
+        false ->
+            Offset0 = MMod:offset(MSt0),
+            MSt1 = MMod:move_to_cp(MSt0, {y_reg, NWords}),
+            MSt2 = MMod:increment_sp(MSt1, NWords + 1),
+            TailCacheKey1 = {op_call_only, Label},
+            case lists:keyfind(TailCacheKey1, 1, TC) of
+                false ->
+                    Offset1 = MMod:offset(MSt2),
+                    MSt3 = MMod:call_only_or_schedule_next(MSt2, Label),
+                    State1 = State0#state{
+                        tail_cache = [{TailCacheKey1, Offset1}, {TailCacheKey0, Offset0} | TC]
+                    };
+                {TailCacheKey1, Offset1} ->
+                    MSt3 = MMod:jump_to_offset(MSt2, Offset1),
+                    State1 = State0#state{
+                        tail_cache = [{TailCacheKey0, Offset0} | TC]
+                    }
+            end;
+        {TailCacheKey0, Offset0} ->
+            MSt3 = MMod:jump_to_offset(MSt0, Offset0),
+            State1 = State0
+    end,
     ?ASSERT_ALL_NATIVE_FREE(MSt3),
-    first_pass(Rest3, MMod, MSt3, State0);
+    first_pass(Rest3, MMod, MSt3, State1);
 % 6
-first_pass(<<?OP_CALL_ONLY, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_CALL_ONLY, Rest0/binary>>, MMod, MSt0, #state{tail_cache = TC} = State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {_Arity, Rest1} = decode_literal(Rest0),
     {Label, Rest2} = decode_label(Rest1),
     ?TRACE("OP_CALL_ONLY ~p, ~p\n", [_Arity, Label]),
-    MSt1 = MMod:call_only_or_schedule_next(MSt0, Label),
+    TailCacheKey = {op_call_only, Label},
+    case lists:keyfind(TailCacheKey, 1, TC) of
+        false ->
+            Offset = MMod:offset(MSt0),
+            MSt1 = MMod:call_only_or_schedule_next(MSt0, Label),
+            State1 = State0#state{tail_cache = [{TailCacheKey, Offset} | TC]};
+        {TailCacheKey, Offset} ->
+            MSt1 = MMod:jump_to_offset(MSt0, Offset),
+            State1 = State0
+    end,
     ?ASSERT_ALL_NATIVE_FREE(MSt1),
-    first_pass(Rest2, MMod, MSt1, State0);
+    first_pass(Rest2, MMod, MSt1, State1);
 % 7
 first_pass(<<?OP_CALL_EXT, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
@@ -348,7 +411,7 @@ first_pass(<<?OP_DEALLOCATE, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest1, MMod, MSt2, State0);
 % 19
-first_pass(<<?OP_RETURN, Rest/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_RETURN, Rest/binary>>, MMod, MSt0, #state{tail_cache = TC} = State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     ?TRACE("OP_RETURN\n", []),
     % Optimized return: check if returning within same module
@@ -363,7 +426,7 @@ first_pass(<<?OP_RETURN, Rest/binary>>, MMod, MSt0, State0) ->
         % Same module: fast intra-module return
         fun(BSt0) ->
             % Mask to get lower 24 bits and shift right by 2 for offset
-            BSt1 = MMod:and_(BSt0, CpReg0, 16#FFFFFF),
+            {BSt1, CpReg0} = MMod:and_(BSt0, {free, CpReg0}, 16#FFFFFF),
             {BSt3, CPReg1} = MMod:shift_right(BSt1, {free, CpReg0}, 2),
             % Jump to continuation (this is a tail call)
             MMod:jump_to_continuation(BSt3, {free, CPReg1})
@@ -371,9 +434,18 @@ first_pass(<<?OP_RETURN, Rest/binary>>, MMod, MSt0, State0) ->
     ),
     MSt5 = MMod:free_native_registers(MSt4, [CpReg0]),
     % Different module: use existing slow path
-    MSt6 = MMod:call_primitive_last(MSt5, ?PRIM_RETURN, [ctx, jit_state]),
+    TailCacheKey = {call_primitive_last, ?PRIM_RETURN},
+    case lists:keyfind(TailCacheKey, 1, TC) of
+        false ->
+            Offset = MMod:offset(MSt5),
+            MSt6 = MMod:call_primitive_last(MSt5, ?PRIM_RETURN, [ctx, jit_state]),
+            State1 = State0#state{tail_cache = [{TailCacheKey, Offset} | TC]};
+        {TailCacheKey, Offset} ->
+            MSt6 = MMod:jump_to_offset(MSt5, Offset),
+            State1 = State0
+    end,
     ?ASSERT_ALL_NATIVE_FREE(MSt6),
-    first_pass(Rest, MMod, MSt6, State0);
+    first_pass(Rest, MMod, MSt6, State1);
 % 20
 first_pass(<<?OP_SEND, Rest/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
@@ -499,16 +571,10 @@ first_pass(<<?OP_IS_LT, Rest0/binary>>, MMod, MSt0, State0) ->
 first_pass(<<?OP_IS_GE, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
-    {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
-    {MSt2, Arg2, Rest3} = decode_compact_term(Rest2, MMod, MSt1, State0),
+    {MSt1, Arg1, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0),
+    {MSt2, Arg2, Rest3} = decode_typed_compact_term(Rest2, MMod, MSt1, State0),
     ?TRACE("OP_IS_GE ~p, ~p, ~p\n", [Label, Arg1, Arg2]),
-    {MSt3, ResultReg} = MMod:call_primitive(MSt2, ?PRIM_TERM_COMPARE, [
-        ctx, jit_state, {free, Arg1}, {free, Arg2}, ?TERM_COMPARE_NO_OPTS
-    ]),
-    MSt4 = handle_error_if({'(int)', ResultReg, '==', ?TERM_COMPARE_MEMORY_ALLOC_FAIL}, MMod, MSt3),
-    MSt5 = cond_jump_to_label(
-        {'(int)', {free, ResultReg}, '==', ?TERM_LESS_THAN}, Label, MMod, MSt4
-    ),
+    MSt5 = op_is_ge(MMod, MSt2, Label, Arg1, Arg2),
     ?ASSERT_ALL_NATIVE_FREE(MSt5),
     first_pass(Rest3, MMod, MSt5, State0);
 % 41
@@ -631,7 +697,7 @@ first_pass(<<?OP_IS_NUMBER, Rest0/binary>>, MMod, MSt0, State0) ->
         BSt1 = cond_jump_to_label(
             {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, MMod, BSt0
         ),
-        BSt2 = MMod:and_(BSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+        {BSt2, Reg} = MMod:and_(BSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
         BSt3 = MMod:move_array_element(BSt2, Reg, 0, Reg),
         % Optimization : ((Reg & 0x3F) != 0x8) && ((Reg & 0x3F) != 0x18)
         % is equivalent to (Reg & 0x2F) != 0x8
@@ -684,9 +750,9 @@ first_pass(<<?OP_IS_REFERENCE, Rest0/binary>>, MMod, MSt0, State0) ->
     MSt3 = cond_jump_to_label(
         {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, MMod, MSt2
     ),
-    MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
     MSt5 = MMod:move_array_element(MSt4, Reg, 0, Reg),
-    MSt6 = MMod:and_(MSt5, Reg, ?TERM_BOXED_TAG_MASK),
+    {MSt6, Reg} = MMod:and_(MSt5, {free, Reg}, ?TERM_BOXED_TAG_MASK),
     MSt7 = cond_jump_to_label(
         {'and', [{Reg, '!=', ?TERM_BOXED_REF}, {Reg, '!=', ?TERM_BOXED_EXTERNAL_REF}]},
         Label,
@@ -775,7 +841,7 @@ first_pass(<<?OP_TEST_ARITY, Rest0/binary>>, MMod, MSt0, State0) ->
     {Arity, Rest3} = decode_literal(Rest2),
     ?TRACE("OP_TEST_ARITY ~p, ~p, ~p\n", [Label, Arg1, Arity]),
     {MSt2, Reg} = MMod:move_to_native_register(MSt1, Arg1),
-    MSt3 = MMod:and_(MSt2, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt3, Reg} = MMod:and_(MSt2, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
     MSt4 = MMod:move_array_element(MSt3, Reg, 0, Reg),
     {MSt5, ArityReg} = MMod:shift_right(MSt4, {free, Reg}, 6),
     MSt6 = cond_jump_to_label({{free, ArityReg}, '!=', Arity}, Label, MMod, MSt5),
@@ -836,13 +902,22 @@ first_pass(<<?OP_SELECT_TUPLE_ARITY, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt5),
     first_pass(Rest4, MMod, MSt5, State0);
 % 61
-first_pass(<<?OP_JUMP, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_JUMP, Rest0/binary>>, MMod, MSt0, #state{tail_cache = TC} = State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     ?TRACE("OP_JUMP ~p\n", [Label]),
-    MSt1 = MMod:call_only_or_schedule_next(MSt0, Label),
-    ?ASSERT_ALL_NATIVE_FREE(MSt1),
-    first_pass(Rest1, MMod, MSt1, State0);
+    TailCacheKey = {op_call_only, Label},
+    case lists:keyfind(TailCacheKey, 1, TC) of
+        false ->
+            Offset = MMod:offset(MSt0),
+            MSt1 = MMod:call_only_or_schedule_next(MSt0, Label),
+            ?ASSERT_ALL_NATIVE_FREE(MSt1),
+            first_pass(Rest1, MMod, MSt1, State0#state{tail_cache = [{TailCacheKey, Offset} | TC]});
+        {TailCacheKey, Offset} ->
+            MSt1 = MMod:jump_to_offset(MSt0, Offset),
+            ?ASSERT_ALL_NATIVE_FREE(MSt1),
+            first_pass(Rest1, MMod, MSt1, State0)
+    end;
 % 62
 % Same implementation as OP_TRY, to confirm.
 first_pass(<<?OP_CATCH, Rest0/binary>>, MMod, MSt0, State0) ->
@@ -882,7 +957,7 @@ first_pass(<<?OP_GET_LIST, Rest0/binary>>, MMod, MSt0, State0) ->
     {MSt3, TailDest, Rest3} = decode_dest(Rest2, MMod, MSt2),
     ?TRACE("OP_GET_LIST ~p, ~p, ~p\n", [List, HeadDest, TailDest]),
     {MSt4, Reg} = MMod:move_to_native_register(MSt3, List),
-    MSt5 = MMod:and_(MSt4, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt5, Reg} = MMod:and_(MSt4, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
     MSt6 = MMod:move_array_element(MSt5, Reg, ?LIST_HEAD_INDEX, HeadDest),
     MSt7 = MMod:free_native_registers(MSt6, [HeadDest]),
     MSt8 = MMod:move_array_element(MSt7, Reg, ?LIST_TAIL_INDEX, TailDest),
@@ -898,7 +973,7 @@ first_pass(<<?OP_GET_TUPLE_ELEMENT, Rest0/binary>>, MMod, MSt0, State0) ->
     {MSt2, Dest, Rest3} = decode_dest(Rest2, MMod, MSt1),
     ?TRACE("OP_GET_TUPLE_ELEMENT ~p, ~p, ~p\n", [Source, Element, Dest]),
     {MSt3, Reg} = MMod:move_to_native_register(MSt2, Source),
-    MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
     MSt5 = MMod:move_array_element(MSt4, Reg, Element + 1, Dest),
     MSt6 = MMod:free_native_registers(MSt5, [Reg, Dest]),
     ?ASSERT_ALL_NATIVE_FREE(MSt6),
@@ -911,7 +986,7 @@ first_pass(<<?OP_SET_TUPLE_ELEMENT, Rest0/binary>>, MMod, MSt0, State0) ->
     {Position, Rest3} = decode_literal(Rest2),
     ?TRACE("OP_SET_TUPLE_ELEMENT ~p, ~p, ~p\n", [NewElement, Tuple, Position]),
     {MSt3, Reg} = MMod:move_to_native_register(MSt2, Tuple),
-    MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
     MSt5 = MMod:move_to_array_element(MSt4, NewElement, Reg, Position + 1),
     MSt6 = MMod:free_native_registers(MSt5, [NewElement, Reg]),
     ?ASSERT_ALL_NATIVE_FREE(MSt6),
@@ -1011,7 +1086,7 @@ first_pass(<<?OP_FMOVE, Rest0/binary>>, MMod, MSt0, State0) ->
     {MSt2, ResultReg} = MMod:call_primitive(MSt1, ?PRIM_CONTEXT_ENSURE_FPREGS, [ctx]),
     MSt3 = MMod:free_native_registers(MSt2, [ResultReg]),
     {MSt4, Reg} = MMod:move_to_native_register(MSt3, SrcValue),
-    MSt5 = MMod:and_(MSt4, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt5, Reg} = MMod:and_(MSt4, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
     MSt6 = MMod:move_to_vm_register(MSt5, {free, {ptr, Reg, 1}}, FPReg),
     ?ASSERT_ALL_NATIVE_FREE(MSt6),
     first_pass(Rest2, MMod, MSt6, State0);
@@ -1294,7 +1369,7 @@ first_pass(<<?OP_BS_GET_BINARY2, Rest0/binary>>, MMod, MSt0, State0) ->
         MMod:call_primitive_last(BlockSt, ?PRIM_RAISE_ERROR, [ctx, jit_state, offset, ?BADARG_ATOM])
     end),
     {MSt8, BSOffsetReg1} = MMod:shift_right(MSt7, {free, BSOffsetReg0}, 3),
-    MSt9 = MMod:and_(MSt8, BSBinaryReg0, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt9, BSBinaryReg0} = MMod:and_(MSt8, {free, BSBinaryReg0}, ?TERM_PRIMARY_CLEAR_MASK),
     {MSt10, SizeReg} = MMod:get_array_element(MSt9, {free, BSBinaryReg0}, 1),
     {MSt13, SizeValue} =
         if
@@ -1427,31 +1502,21 @@ first_pass(<<?OP_GC_BIF1, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt7),
     first_pass(Rest5, MMod, MSt7, State0);
 % 125
-first_pass(<<?OP_GC_BIF2, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(
+    <<?OP_GC_BIF2, Rest0/binary>>, MMod, MSt0, #state{import_resolver = ImportResolver} = State0
+) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {FailLabel, Rest1} = decode_label(Rest0),
     {Live, Rest2} = decode_literal(Rest1),
-    {MSt1, TrimResultReg} = MMod:call_primitive(MSt0, ?PRIM_TRIM_LIVE_REGS, [ctx, Live]),
-    MSt2 = MMod:free_native_registers(MSt1, [TrimResultReg]),
-    CappedLive =
-        if
-            Live > ?MAX_REG -> ?MAX_REG;
-            true -> Live
-        end,
     {Bif, Rest3} = decode_literal(Rest2),
-    {MSt3, FuncPtr} = MMod:call_primitive(MSt2, ?PRIM_GET_IMPORTED_BIF, [
-        jit_state, Bif
-    ]),
-    {MSt4, Arg1, Rest4} = decode_compact_term(Rest3, MMod, MSt3, State0),
-    {MSt5, Arg2, Rest5} = decode_compact_term(Rest4, MMod, MSt4, State0),
-    {MSt6, Dest, Rest6} = decode_dest(Rest5, MMod, MSt5),
+    {MSt1, Arg1, Rest4} = decode_typed_compact_term(Rest3, MMod, MSt0, State0),
+    {MSt2, Arg2, Rest5} = decode_typed_compact_term(Rest4, MMod, MSt1, State0),
+    {MSt3, Dest, Rest6} = decode_dest(Rest5, MMod, MSt2),
+    {BifModule, BifFunName, 2} = ImportResolver(Bif),
     ?TRACE("OP_GC_BIF2 ~p, ~p, ~p, ~p, ~p, ~p\n", [FailLabel, Live, Bif, Arg1, Arg2, Dest]),
-    {MSt7, ResultReg} = MMod:call_func_ptr(MSt6, {free, FuncPtr}, [
-        ctx, FailLabel, CappedLive, {free, Arg1}, {free, Arg2}
-    ]),
-    MSt8 = bif_faillabel_test(FailLabel, MMod, MSt7, {free, ResultReg}, {free, Dest}),
-    ?ASSERT_ALL_NATIVE_FREE(MSt8),
-    first_pass(Rest6, MMod, MSt8, State0);
+    MSt4 = op_gc_bif2(MMod, MSt3, FailLabel, Live, Bif, BifModule, BifFunName, Arg1, Arg2, Dest),
+    ?ASSERT_ALL_NATIVE_FREE(MSt4),
+    first_pass(Rest6, MMod, MSt4, State0);
 % 129
 first_pass(<<?OP_IS_BITSTR, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
@@ -1462,9 +1527,9 @@ first_pass(<<?OP_IS_BITSTR, Rest0/binary>>, MMod, MSt0, State0) ->
     MSt3 = cond_jump_to_label(
         {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, MMod, MSt2
     ),
-    MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
     MSt5 = MMod:move_array_element(MSt4, Reg, 0, Reg),
-    MSt6 = MMod:and_(MSt5, Reg, ?TERM_BOXED_TAG_MASK),
+    {MSt6, Reg} = MMod:and_(MSt5, {free, Reg}, ?TERM_BOXED_TAG_MASK),
     MSt7 = cond_jump_to_label(
         {'and', [
             {Reg, '!=', ?TERM_BOXED_REFC_BINARY},
@@ -1510,11 +1575,16 @@ first_pass(<<?OP_BS_INIT_WRITABLE, Rest0/binary>>, MMod, MSt0, State0) ->
     ),
     MSt2 = handle_error_if({'(bool)', {free, MemoryEnsureFreeReg}, '==', false}, MMod, MSt1),
     {MSt3, CreatedBin} = MMod:call_primitive(MSt2, ?PRIM_TERM_CREATE_EMPTY_BINARY, [ctx, 0]),
-    MSt4 = MMod:set_bs(MSt3, CreatedBin),
-    MSt5 = MMod:move_to_vm_register(MSt4, CreatedBin, {x_reg, 0}),
-    MSt6 = MMod:free_native_registers(MSt5, [CreatedBin]),
-    ?ASSERT_ALL_NATIVE_FREE(MSt6),
-    first_pass(Rest0, MMod, MSt6, State0);
+    MSt4 = MMod:if_block(MSt3, {CreatedBin, '==', ?TERM_INVALID_TERM}, fun(BSt0) ->
+        MMod:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR, [
+            ctx, jit_state, offset, ?OUT_OF_MEMORY_ATOM
+        ])
+    end),
+    MSt5 = MMod:set_bs(MSt4, CreatedBin),
+    MSt6 = MMod:move_to_vm_register(MSt5, CreatedBin, {x_reg, 0}),
+    MSt7 = MMod:free_native_registers(MSt6, [CreatedBin]),
+    ?ASSERT_ALL_NATIVE_FREE(MSt7),
+    first_pass(Rest0, MMod, MSt7, State0);
 % 136
 first_pass(<<?OP_TRIM, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
@@ -1778,7 +1848,7 @@ first_pass(<<?OP_PUT_MAP_EXACT, Rest0/binary>>, MMod, MSt0, State0) ->
         Src, Live, {free, SrcSizeReg}, MMod, MSt7
     ),
     {MSt9, NewMapPtrReg} = MMod:call_primitive(MSt8, ?PRIM_TERM_COPY_MAP, [ctx, NewSrc]),
-    MSt10 = MMod:and_(MSt9, NewMapPtrReg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt10, NewMapPtrReg} = MMod:and_(MSt9, {free, NewMapPtrReg}, ?TERM_PRIMARY_CLEAR_MASK),
     {MSt11, Rest6} = lists:foldl(
         fun(_Index, {ASt0, ARest0}) ->
             {ASt1, Key, ARest1} = decode_compact_term(ARest0, MMod, ASt0, State0),
@@ -1880,14 +1950,13 @@ first_pass(<<?OP_GET_MAP_ELEMENTS, Rest0/binary>>, MMod, MSt0, State0) ->
         ])
     end),
     {MSt6, SrcReg} = MMod:move_to_native_register(MSt5, Src),
-    {MSt7, MapReg} = MMod:copy_to_native_register(MSt6, SrcReg),
-    MSt8 = MMod:and_(MSt7, MapReg, ?TERM_PRIMARY_CLEAR_MASK),
-    MSt9 = MMod:add(MSt8, MapReg, MMod:word_size() * 2),
-    {MSt10, Dest1, Rest5} = decode_dest(Rest4, MMod, MSt9),
+    {MSt7, MapReg} = MMod:and_(MSt6, SrcReg, ?TERM_PRIMARY_CLEAR_MASK),
+    MSt8 = MMod:add(MSt7, MapReg, MMod:word_size() * 2),
+    {MSt9, Dest1, Rest5} = decode_dest(Rest4, MMod, MSt8),
     ?TRACE(",~p", [Dest1]),
-    MSt11 = MMod:move_array_element(MSt10, MapReg, {free, PosReg1}, Dest1),
-    MSt12 = MMod:free_native_registers(MSt11, [Dest1]),
-    {MSt13, Rest6} = lists:foldl(
+    MSt10 = MMod:move_array_element(MSt9, MapReg, {free, PosReg1}, Dest1),
+    MSt11 = MMod:free_native_registers(MSt10, [Dest1]),
+    {MSt12, Rest6} = lists:foldl(
         fun(_Index, {AccMSt0, AccRest0}) ->
             {AccMSt1, Key, AccRest1} = decode_compact_term(AccRest0, MMod, AccMSt0, State0),
             ?TRACE(",~p", [Key]),
@@ -1912,13 +1981,13 @@ first_pass(<<?OP_GET_MAP_ELEMENTS, Rest0/binary>>, MMod, MSt0, State0) ->
             AccMSt8 = MMod:free_native_registers(AccMSt7, [Dest]),
             {AccMSt8, AccRest2}
         end,
-        {MSt12, Rest5},
+        {MSt11, Rest5},
         lists:seq(2, ListSize div 2)
     ),
     ?TRACE("]\n", []),
-    MSt14 = MMod:free_native_registers(MSt13, [MapReg, SrcReg]),
-    ?ASSERT_ALL_NATIVE_FREE(MSt14),
-    first_pass(Rest6, MMod, MSt14, State0);
+    MSt13 = MMod:free_native_registers(MSt12, [MapReg, SrcReg]),
+    ?ASSERT_ALL_NATIVE_FREE(MSt13),
+    first_pass(Rest6, MMod, MSt13, State0);
 % 159
 first_pass(
     <<?OP_IS_TAGGED_TUPLE, Rest0/binary>>, MMod, MSt0, #state{atom_resolver = AtomResolver} = State0
@@ -1933,7 +2002,7 @@ first_pass(
     MSt3 = cond_jump_to_label(
         {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, MMod, MSt2
     ),
-    MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
     {MSt5, TagReg0} = MMod:get_array_element(MSt4, Reg, 0),
     MSt6 = cond_jump_to_label(
         {TagReg0, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_TUPLE}, Label, MMod, MSt5
@@ -1987,7 +2056,7 @@ first_pass(<<?OP_GET_HD, Rest0/binary>>, MMod, MSt0, State0) ->
     {MSt2, Dest, Rest3} = decode_dest(Rest1, MMod, MSt1),
     ?TRACE("OP_GET_HD ~p, ~p\n", [SrcValue, Dest]),
     {MSt3, Reg} = MMod:move_to_native_register(MSt2, SrcValue),
-    MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
     MSt5 = MMod:move_array_element(MSt4, Reg, ?LIST_HEAD_INDEX, Dest),
     MSt6 = MMod:free_native_registers(MSt5, [Dest, Reg]),
     ?ASSERT_ALL_NATIVE_FREE(MSt6),
@@ -1999,7 +2068,7 @@ first_pass(<<?OP_GET_TL, Rest0/binary>>, MMod, MSt0, State0) ->
     {MSt2, Dest, Rest3} = decode_dest(Rest1, MMod, MSt1),
     ?TRACE("OP_GET_TL ~p, ~p\n", [SrcValue, Dest]),
     {MSt3, Reg} = MMod:move_to_native_register(MSt2, SrcValue),
-    MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
     MSt5 = MMod:move_array_element(MSt4, Reg, ?LIST_TAIL_INDEX, Dest),
     MSt6 = MMod:free_native_registers(MSt5, [Dest, Reg]),
     ?ASSERT_ALL_NATIVE_FREE(MSt6),
@@ -2011,7 +2080,7 @@ first_pass(<<?OP_PUT_TUPLE2, Rest0/binary>>, MMod, MSt0, State0) ->
     {ListSize, Rest2} = decode_extended_list_header(Rest1),
     ?TRACE("OP_PUT_TUPLE2 ~p, [", [Dest]),
     {MSt2, ResultReg} = MMod:call_primitive(MSt1, ?PRIM_TERM_ALLOC_TUPLE, [ctx, ListSize]),
-    MSt3 = MMod:and_(MSt2, ResultReg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt3, ResultReg} = MMod:and_(MSt2, {free, ResultReg}, ?TERM_PRIMARY_CLEAR_MASK),
     {MSt4, Rest3} = lists:foldl(
         fun(Index, {AccMSt0, AccRest0}) ->
             {AccMSt1, Element, AccRest1} = decode_compact_term(AccRest0, MMod, AccMSt0, State0),
@@ -2040,13 +2109,13 @@ first_pass(<<?OP_BS_GET_TAIL, Rest0/binary>>, MMod, MSt0, State0) ->
     {MSt4, BSBinaryReg} = MMod:get_array_element(MSt3, MatchStateRegPtr, 1),
     {MSt5, BSOffsetReg} = MMod:get_array_element(MSt4, MatchStateRegPtr, 2),
     MSt6 = MMod:free_native_registers(MSt5, [MatchStateRegPtr]),
-    MSt7 = MMod:and_(MSt6, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt7, BSBinaryReg} = MMod:and_(MSt6, {free, BSBinaryReg}, ?TERM_PRIMARY_CLEAR_MASK),
     {MSt8, ResultTerm, NewMatchState} = do_get_tail(
         Src, Live, BSOffsetReg, BSBinaryReg, MMod, MSt7
     ),
     MSt9 = MMod:free_native_registers(MSt8, [BSBinaryReg]),
     {MSt10, MatchStateReg1} = MMod:move_to_native_register(MSt9, NewMatchState),
-    MSt11 = MMod:and_(MSt10, MatchStateReg1, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt11, MatchStateReg1} = MMod:and_(MSt10, {free, MatchStateReg1}, ?TERM_PRIMARY_CLEAR_MASK),
     MSt12 = MMod:move_to_array_element(MSt11, BSOffsetReg, MatchStateReg1, 2),
     MSt13 = MMod:move_to_vm_register(MSt12, ResultTerm, Dest),
     MSt14 = MMod:free_native_registers(MSt13, [MatchStateReg1, BSOffsetReg, ResultTerm, Dest]),
@@ -2073,7 +2142,7 @@ first_pass(<<?OP_BS_GET_POSITION, Rest0/binary>>, MMod, MSt0, State0) ->
     {_Live, Rest3} = decode_literal(Rest2),
     ?TRACE("OP_BS_GET_POSITION ~p, ~p, ~p\n", [Src, Dest, _Live]),
     {MSt3, Reg} = MMod:move_to_native_register(MSt2, Src),
-    MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
     MSt5 = MMod:move_array_element(MSt4, Reg, 2, Reg),
     MSt6 = MMod:shift_left(MSt5, Reg, 4),
     MSt7 = MMod:or_(MSt6, Reg, ?TERM_INTEGER_TAG),
@@ -2136,7 +2205,7 @@ first_pass(<<?OP_MAKE_FUN3, Rest0/binary>>, MMod, MSt0, State0) ->
     {MSt2, ResultReg} = MMod:call_primitive(MSt1, ?PRIM_TERM_ALLOC_FUN, [
         ctx, jit_state, FunIndex, NumFree
     ]),
-    MSt3 = MMod:and_(MSt2, ResultReg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt3, ResultReg} = MMod:and_(MSt2, {free, ResultReg}, ?TERM_PRIMARY_CLEAR_MASK),
     {MSt4, Rest4} = lists:foldl(
         fun(Index, {AccMSt0, AccRest0}) ->
             {AccMSt1, Element, AccRest1} = decode_compact_term(AccRest0, MMod, AccMSt0, State0),
@@ -2221,8 +2290,8 @@ first_pass(
     {ListLen, Rest6} = decode_extended_list_header(Rest5),
     % Compute binary size and verify types in first iteration
     NBSegments = ListLen div 6,
-    {Rest7, MSt2, BinaryLitSize, BinaryRegSize, State1} = lists:foldl(
-        fun(_Index, {AccRest0, AccMSt0, AccLiteralSize0, AccSizeReg0, AccState0}) ->
+    {Rest7, MSt2, BinaryLitSize, BinaryRegSize, State1, ReuseSourceBinary} = lists:foldl(
+        fun(Index, {AccRest0, AccMSt0, AccLiteralSize0, AccSizeReg0, AccState0, AccReuseSrc}) ->
             {AtomTypeIndex, AccRest1} = decode_atom(AccRest0),
             AtomType = AtomResolver(AtomTypeIndex),
             {_Seg, AccRest2} = decode_literal(AccRest1),
@@ -2242,10 +2311,13 @@ first_pass(
                 AccMSt2,
                 AccState0
             ),
+            NewReuseSrc =
+                AccReuseSrc orelse
+                    (Index =:= 1 andalso AtomType =:= private_append andalso Size =:= ?ALL_ATOM),
             AccMSt4 = MMod:free_native_registers(AccMSt3, [Src, Size]),
-            {AccRest6, AccMSt4, AccLiteralSize1, AccSizeReg1, AccState1}
+            {AccRest6, AccMSt4, AccLiteralSize1, AccSizeReg1, AccState1, NewReuseSrc}
         end,
-        {Rest6, MSt1, 0, undefined, State0},
+        {Rest6, MSt1, 0, undefined, State0, false},
         lists:seq(1, NBSegments)
     ),
     {MSt4, BinaryTotalSize} =
@@ -2302,12 +2374,30 @@ first_pass(
         ]
     ),
     MSt14 = handle_error_if({'(bool)', {free, MemoryEnsureFreeReg}, '==', false}, MMod, MSt13),
-    {MSt15, CreatedBin} = MMod:call_primitive(MSt14, ?PRIM_TERM_CREATE_EMPTY_BINARY, [
-        ctx, {free, BinaryTotalSizeInBytes}
-    ]),
+    {MSt17, InitialCreatedBin} =
+        case ReuseSourceBinary of
+            false ->
+                % No reuse - create the binary now
+                {MSt15, CreatedBinResult} = MMod:call_primitive(
+                    MSt14, ?PRIM_TERM_CREATE_EMPTY_BINARY, [
+                        ctx, {free, BinaryTotalSizeInBytes}
+                    ]
+                ),
+                MSt16 = MMod:if_block(MSt15, {CreatedBinResult, '==', ?TERM_INVALID_TERM}, fun(
+                    BSt0
+                ) ->
+                    MMod:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR, [
+                        ctx, jit_state, offset, ?OUT_OF_MEMORY_ATOM
+                    ])
+                end),
+                {MSt16, CreatedBinResult};
+            true ->
+                % Will reuse - defer creation until first segment
+                {MSt14, {private_append, BinaryTotalSizeInBytes}}
+        end,
     % We redo the decoding. Rest7 should still be equal to previous value.
-    {Rest7, MSt16, FinalOffset} = lists:foldl(
-        fun(_Index, {AccRest0, AccMSt0, AccOffset0}) ->
+    {Rest7, MSt18, FinalOffset, CreatedBin} = lists:foldl(
+        fun(_Index, {AccRest0, AccMSt0, AccOffset0, AccCreatedBin}) ->
             {AtomTypeIndex, AccRest1} = decode_atom(AccRest0),
             AtomType = AtomResolver(AtomTypeIndex),
             {_Seg, AccRest2} = decode_literal(AccRest1),
@@ -2316,30 +2406,30 @@ first_pass(
             {AccMSt2, Src, AccRest5} = decode_compact_term(AccRest4, MMod, AccMSt1, State1),
             {AccMSt3, Size, AccRest6} = decode_compact_term(AccRest5, MMod, AccMSt2, State1),
             ?TRACE("{~p,~p,~p,~p,~p,~p},", [AtomType, _Seg, SegmentUnit, Flags, Src, Size]),
-            {AccMSt4, AccOffset1} = first_pass_bs_create_bin_insert_value(
+            {AccMSt4, AccOffset1, AccCreatedBin1} = first_pass_bs_create_bin_insert_value(
                 AtomType,
                 Flags,
                 Src,
                 Size,
                 SegmentUnit,
                 Fail,
-                CreatedBin,
+                AccCreatedBin,
                 AccOffset0,
                 MMod,
                 AccMSt3
             ),
             AccMSt5 = MMod:free_native_registers(AccMSt4, [Flags, Src, Size]),
-            {AccRest6, AccMSt5, AccOffset1}
+            {AccRest6, AccMSt5, AccOffset1, AccCreatedBin1}
         end,
-        {Rest6, MSt15, 0},
+        {Rest6, MSt17, 0, InitialCreatedBin},
         lists:seq(1, NBSegments)
     ),
     ?TRACE("]\n", []),
-    MSt17 = MMod:free_native_registers(MSt16, [FinalOffset]),
-    MSt18 = MMod:move_to_vm_register(MSt17, CreatedBin, Dest),
-    MSt19 = MMod:free_native_registers(MSt18, [CreatedBin, Dest]),
-    ?ASSERT_ALL_NATIVE_FREE(MSt19),
-    first_pass(Rest7, MMod, MSt19, State1);
+    MSt19 = MMod:free_native_registers(MSt18, [FinalOffset]),
+    MSt20 = MMod:move_to_vm_register(MSt19, CreatedBin, Dest),
+    MSt21 = MMod:free_native_registers(MSt20, [CreatedBin, Dest]),
+    ?ASSERT_ALL_NATIVE_FREE(MSt21),
+    first_pass(Rest7, MMod, MSt21, State1);
 % 178
 first_pass(<<?OP_CALL_FUN2, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
@@ -2376,12 +2466,12 @@ first_pass(
     {Size, Rest2} = decode_literal(Rest1),
     {MSt1, Src, Rest3} = decode_compact_term(Rest2, MMod, MSt0, State0),
     {MSt2, SrcReg} = MMod:move_to_native_register(MSt1, Src),
-    MSt3 = MMod:and_(MSt2, SrcReg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt3, SrcReg} = MMod:and_(MSt2, {free, SrcReg}, ?TERM_PRIMARY_CLEAR_MASK),
     {MSt4, Dest, Rest4} = decode_dest(Rest3, MMod, MSt3),
     {ListLen, Rest5} = decode_extended_list_header(Rest4),
     ?TRACE("OP_UPDATE_RECORD ~p, ~p, ~p, ~p, [", [Hint, Size, Src, Dest]),
     {MSt5, DestReg} = MMod:call_primitive(MSt4, ?PRIM_TERM_ALLOC_TUPLE, [ctx, Size]),
-    MSt6 = MMod:and_(MSt5, DestReg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt6, DestReg} = MMod:and_(MSt5, {free, DestReg}, ?TERM_PRIMARY_CLEAR_MASK),
     {MSt7, ReuseReg} = MMod:move_to_native_register(
         MSt6,
         if
@@ -2451,20 +2541,19 @@ first_pass(<<?OP_BS_MATCH, Rest0/binary>>, MMod, MSt0, State0) ->
     {MSt1, MatchState, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
     {ListLen, Rest3} = decode_extended_list_header(Rest2),
     ?TRACE("OP_BS_MATCH ~p, ~p, [", [Fail, MatchState]),
-    {MSt2, MatchStateReg0} = MMod:copy_to_native_register(MSt1, MatchState),
-    MSt3 = MMod:and_(MSt2, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK),
-    {MSt4, BSBinaryReg} = MMod:get_array_element(MSt3, MatchStateReg0, 1),
-    {MSt5, BSOffsetReg} = MMod:get_array_element(MSt4, MatchStateReg0, 2),
-    MSt6 = MMod:free_native_registers(MSt5, [MatchStateReg0]),
-    MSt7 = MMod:and_(MSt6, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK),
-    {MSt8, MatchStateReg1} = MMod:move_to_native_register(MSt7, MatchState),
-    {MSt9, Rest4, NewMatchState, NewBSOffsetReg} = first_pass_bs_match(
-        Fail, MatchStateReg1, BSBinaryReg, BSOffsetReg, ListLen, Rest3, MMod, MSt8, State0
+    {MSt2, MatchStateReg0} = MMod:move_to_native_register(MSt1, MatchState),
+    {MSt3, MatchStateReg1} = MMod:and_(MSt2, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt4, BSBinaryReg} = MMod:get_array_element(MSt3, MatchStateReg1, 1),
+    {MSt5, BSOffsetReg} = MMod:get_array_element(MSt4, MatchStateReg1, 2),
+    MSt6 = MMod:free_native_registers(MSt5, [MatchStateReg1]),
+    {MSt7, BSBinaryReg} = MMod:and_(MSt6, {free, BSBinaryReg}, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt8, Rest4, MatchStateReg2, NewBSOffsetReg} = first_pass_bs_match(
+        Fail, MatchStateReg0, BSBinaryReg, BSOffsetReg, ListLen, Rest3, MMod, MSt7, State0
     ),
     ?TRACE("]\n", []),
-    MSt10 = MMod:free_native_registers(MSt9, [BSBinaryReg, NewBSOffsetReg, NewMatchState]),
-    ?ASSERT_ALL_NATIVE_FREE(MSt10),
-    first_pass(Rest4, MMod, MSt10, State0).
+    MSt9 = MMod:free_native_registers(MSt8, [BSBinaryReg, NewBSOffsetReg, MatchStateReg2]),
+    ?ASSERT_ALL_NATIVE_FREE(MSt9),
+    first_pass(Rest4, MMod, MSt9, State0).
 
 first_pass_bs_create_bin_compute_size(
     AtomType, Src, _Size, _SegmentUnit, Fail, AccLiteralSize0, AccSizeReg0, MMod, MSt0, State0
@@ -2494,6 +2583,42 @@ first_pass_bs_create_bin_compute_size(
 ) ->
     MSt1 = verify_is_integer(Src, Fail, MMod, MSt0),
     {MSt1, AccLiteralSize0 + 32, AccSizeReg0, State0};
+first_pass_bs_create_bin_compute_size(
+    float, Src, Size, _SegmentUnit, Fail, AccLiteralSize0, AccSizeReg0, MMod, MSt0, State0
+) ->
+    MSt1 = verify_is_number(Src, Fail, MMod, MSt0),
+    % Verify and get the float size (defaults to 64 if nil)
+    case Size of
+        ?TERM_NIL ->
+            {MSt1, AccLiteralSize0 + 64, AccSizeReg0, State0};
+        _ ->
+            {MSt2, SizeValue} = term_to_int(Size, Fail, MMod, MSt1),
+            if
+                is_integer(SizeValue) ->
+                    % If size is a literal, compiler would only allow 16/32/64.
+                    {MSt2, AccLiteralSize0 + SizeValue, AccSizeReg0, State0};
+                is_atom(SizeValue) ->
+                    % Check if size is 16, 32, or 64 using 'and' of '!=' checks
+                    MSt3 = cond_raise_badarg_or_jump_to_fail_label(
+                        {'and', [
+                            {SizeValue, '!=', 16},
+                            {SizeValue, '!=', 32},
+                            {SizeValue, '!=', 64}
+                        ]},
+                        Fail,
+                        MMod,
+                        MSt2
+                    ),
+                    case AccSizeReg0 of
+                        undefined ->
+                            {MSt3, AccLiteralSize0, SizeValue, State0};
+                        _ ->
+                            MSt4 = MMod:add(MSt3, AccSizeReg0, SizeValue),
+                            MSt5 = MMod:free_native_registers(MSt4, [SizeValue]),
+                            {MSt5, AccLiteralSize0, AccSizeReg0, State0}
+                    end
+            end
+    end;
 first_pass_bs_create_bin_compute_size(
     integer, Src, Size, SegmentUnit, Fail, AccLiteralSize0, AccSizeReg0, MMod, MSt0, State0
 ) ->
@@ -2540,7 +2665,7 @@ first_pass_bs_create_bin_compute_size(
 ) when AtomType =:= binary orelse AtomType =:= append orelse AtomType =:= private_append ->
     MSt1 = verify_is_binary(Src, Fail, MMod, MSt0),
     {MSt2, Reg} = MMod:copy_to_native_register(MSt1, Src),
-    MSt3 = MMod:and_(MSt2, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt3, Reg} = MMod:and_(MSt2, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
     MSt4 = MMod:move_array_element(MSt3, Reg, 1, Reg),
     MSt5 = MMod:shift_left(MSt4, Reg, 3),
     case AccSizeReg0 of
@@ -2566,7 +2691,7 @@ first_pass_bs_create_bin_compute_size(
     MSt1 = verify_is_binary(Src, Fail, MMod, MSt0),
     {MSt2, Reg0} = MMod:copy_to_native_register(MSt1, Size),
     {MSt3, Reg1} = MMod:copy_to_native_register(MSt2, Src),
-    MSt4 = MMod:and_(MSt3, Reg1, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt4, Reg1} = MMod:and_(MSt3, {free, Reg1}, ?TERM_PRIMARY_CLEAR_MASK),
     MSt5 = MMod:move_array_element(MSt4, Reg1, 1, Reg1),
     MSt6 = MMod:shift_left(MSt5, Reg1, 3),
     MSt7 = MMod:if_block(MSt6, {{free, Reg0}, '!=', ?ALL_ATOM}, fun(BSt0) ->
@@ -2600,7 +2725,7 @@ first_pass_bs_create_bin_insert_value(
     {MSt3, NewOffset} = first_pass_bs_create_bin_insert_value_increment_offset(
         MMod, MSt2, Offset, Size, 8
     ),
-    {MSt3, NewOffset};
+    {MSt3, NewOffset, CreatedBin};
 first_pass_bs_create_bin_insert_value(
     utf16, Flags, Src, _Size, _SegmentUnit, Fail, CreatedBin, Offset, MMod, MSt0
 ) ->
@@ -2612,7 +2737,7 @@ first_pass_bs_create_bin_insert_value(
     {MSt4, NewOffset} = first_pass_bs_create_bin_insert_value_increment_offset(
         MMod, MSt3, Offset, Size, 8
     ),
-    {MSt4, NewOffset};
+    {MSt4, NewOffset, CreatedBin};
 first_pass_bs_create_bin_insert_value(
     utf32, Flags, Src, _Size, _SegmentUnit, Fail, CreatedBin, Offset, MMod, MSt0
 ) ->
@@ -2627,7 +2752,7 @@ first_pass_bs_create_bin_insert_value(
     {MSt5, NewOffset} = first_pass_bs_create_bin_insert_value_increment_offset(
         MMod, MSt4, Offset, 4, 8
     ),
-    {MSt5, NewOffset};
+    {MSt5, NewOffset, CreatedBin};
 first_pass_bs_create_bin_insert_value(
     integer, Flags, Src, Size, SegmentUnit, Fail, CreatedBin, Offset, MMod, MSt0
 ) ->
@@ -2648,7 +2773,32 @@ first_pass_bs_create_bin_insert_value(
     {MSt7, NewOffset} = first_pass_bs_create_bin_insert_value_increment_offset(
         MMod, MSt6, Offset, SizeValue, 1
     ),
-    {MSt7, NewOffset};
+    {MSt7, NewOffset, CreatedBin};
+first_pass_bs_create_bin_insert_value(
+    float, Flags, Src, Size, _SegmentUnit, Fail, CreatedBin, Offset, MMod, MSt0
+) ->
+    % Src is a term (boxed float or integer)
+    {MSt1, SrcReg} = MMod:move_to_native_register(MSt0, Src),
+    {MSt2, FlagsValue} = decode_flags_list(Flags, MMod, MSt1),
+    % Get the float size (defaults to 64 if nil)
+    {MSt3, SizeValue} =
+        case Size of
+            ?TERM_NIL ->
+                {MSt2, 64};
+            _ ->
+                term_to_int(Size, Fail, MMod, MSt2)
+        end,
+    % Call single primitive with size parameter
+    {MSt4, BoolResult} = MMod:call_primitive(MSt3, ?PRIM_BITSTRING_INSERT_FLOAT, [
+        CreatedBin, Offset, {free, SrcReg}, SizeValue, {free, FlagsValue}
+    ]),
+    MSt5 = cond_raise_badarg_or_jump_to_fail_label(
+        {'(bool)', {free, BoolResult}, '==', false}, Fail, MMod, MSt4
+    ),
+    {MSt6, NewOffset} = first_pass_bs_create_bin_insert_value_increment_offset(
+        MMod, MSt5, Offset, SizeValue, 1
+    ),
+    {MSt6, NewOffset, CreatedBin};
 first_pass_bs_create_bin_insert_value(
     string, _Flags, Src, Size, SegmentUnit, Fail, CreatedBin, Offset, MMod, MSt0
 ) ->
@@ -2668,7 +2818,37 @@ first_pass_bs_create_bin_insert_value(
     {MSt6, NewOffset} = first_pass_bs_create_bin_insert_value_increment_offset(
         MMod, MSt5, Offset, BitSize, 1
     ),
-    {MSt6, NewOffset};
+    {MSt6, NewOffset, CreatedBin};
+first_pass_bs_create_bin_insert_value(
+    private_append,
+    _Flags,
+    Src,
+    _Size,
+    _SegmentUnit,
+    _Fail,
+    {private_append, BinaryTotalSizeInBytes},
+    Offset,
+    MMod,
+    MSt0
+) ->
+    % Special case: first segment is private_append with undefined CreatedBin
+    % Get original size before reusing
+    {MSt1, OriginalSize} = term_binary_size(Src, MMod, MSt0),
+    % Reuse the source binary (content is already there, no need to copy)
+    {MSt2, CreatedBin} = MMod:call_primitive(MSt1, ?PRIM_TERM_REUSE_BINARY, [
+        ctx, {free, Src}, {free, BinaryTotalSizeInBytes}
+    ]),
+    MSt3 = MMod:if_block(MSt2, {CreatedBin, '==', ?TERM_INVALID_TERM}, fun(BSt0) ->
+        MMod:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR, [
+            ctx, jit_state, offset, ?OUT_OF_MEMORY_ATOM
+        ])
+    end),
+    % Convert original size to bits and update offset
+    MSt4 = MMod:shift_left(MSt3, OriginalSize, 3),
+    {MSt5, NewOffset} = first_pass_bs_create_bin_insert_value_increment_offset(
+        MMod, MSt4, Offset, OriginalSize, 1
+    ),
+    {MSt5, NewOffset, CreatedBin};
 first_pass_bs_create_bin_insert_value(
     AtomType, _Flags, Src, Size, _SegmentUnit, _Fail, CreatedBin, Offset, MMod, MSt0
 ) when AtomType =:= binary orelse AtomType =:= append orelse AtomType =:= private_append ->
@@ -2683,11 +2863,11 @@ first_pass_bs_create_bin_insert_value(
     {MSt3, NewOffset} = first_pass_bs_create_bin_insert_value_increment_offset(
         MMod, MSt2, Offset, SizeValue, 1
     ),
-    {MSt3, NewOffset};
+    {MSt3, NewOffset, CreatedBin};
 first_pass_bs_create_bin_insert_value(
-    _OtherType, _Flag, _Src, _Size, _SegmentUnit, _Fail, _CreatedBin, Offset, _MMod, MSt0
+    _OtherType, _Flag, _Src, _Size, _SegmentUnit, _Fail, CreatedBin, Offset, _MMod, MSt0
 ) ->
-    {MSt0, Offset}.
+    {MSt0, Offset, CreatedBin}.
 
 first_pass_bs_create_bin_insert_value_increment_offset(_MMod, MSt0, Offset, Size, Unit) when
     is_integer(Offset) andalso is_integer(Size) andalso is_integer(Unit)
@@ -2772,12 +2952,11 @@ first_pass_bs_match(
                 first_pass_bs_match_skip(MatchState, BSOffsetReg, J1, Rest1, MMod, MSt0)
         end,
     % offset needs to be updated in the loop
-    {MSt2, MatchStateReg1} = MMod:copy_to_native_register(MSt1, NewMatchState),
-    MSt3 = MMod:and_(MSt2, MatchStateReg1, ?TERM_PRIMARY_CLEAR_MASK),
-    MSt4 = MMod:move_to_array_element(MSt3, NewBSOffsetReg, MatchStateReg1, 2),
-    MSt5 = MMod:free_native_registers(MSt4, [MatchStateReg1]),
+    {MSt2, MatchStateReg1} = MMod:and_(MSt1, NewMatchState, ?TERM_PRIMARY_CLEAR_MASK),
+    MSt3 = MMod:move_to_array_element(MSt2, NewBSOffsetReg, MatchStateReg1, 2),
+    MSt4 = MMod:free_native_registers(MSt3, [MatchStateReg1]),
     first_pass_bs_match(
-        Fail, NewMatchState, BSBinaryReg, NewBSOffsetReg, J2, Rest2, MMod, MSt5, State0
+        Fail, NewMatchState, BSBinaryReg, NewBSOffsetReg, J2, Rest2, MMod, MSt4, State0
     ).
 
 first_pass_bs_match_ensure_at_least(
@@ -2862,7 +3041,7 @@ first_pass_bs_match_integer(
     MSt13 = MMod:free_native_registers(MSt12, [Result, Dest]),
     case MMod:available_regs(MSt9) of
         [] ->
-            MSt14 = MMod:and_(MSt13, MatchState, ?TERM_PRIMARY_CLEAR_MASK),
+            {MSt14, MatchState} = MMod:and_(MSt13, {free, MatchState}, ?TERM_PRIMARY_CLEAR_MASK),
             {MSt15, NewBSOffsetReg} = MMod:get_array_element(MSt14, MatchState, 2),
             MSt16 = MMod:or_(MSt15, MatchState, ?TERM_PRIMARY_BOXED),
             MSt17 = MMod:add(MSt16, NewBSOffsetReg, NumBits),
@@ -2915,13 +3094,13 @@ first_pass_bs_match_binary(
     ),
     % Restore BSBinaryReg as it may have been gc'd as well
     {MSt9, MatchStateReg0} = MMod:copy_to_native_register(MSt8, NewMatchState),
-    MSt10 = MMod:and_(MSt9, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt10, MatchStateReg0} = MMod:and_(MSt9, {free, MatchStateReg0}, ?TERM_PRIMARY_CLEAR_MASK),
     MSt11 = MMod:move_array_element(MSt10, MatchStateReg0, 1, BSBinaryReg),
     MSt12 = MMod:free_native_registers(MSt11, [MatchStateReg0]),
     {MSt13, ResultTerm} = MMod:call_primitive(MSt12, ?PRIM_TERM_MAYBE_CREATE_SUB_BINARY, [
         ctx, BSBinaryReg, {free, BSOffseBytesReg}, MatchedBytes
     ]),
-    MSt14 = MMod:and_(MSt13, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt14, BSBinaryReg} = MMod:and_(MSt13, {free, BSBinaryReg}, ?TERM_PRIMARY_CLEAR_MASK),
     {MSt15, Dest, Rest5} = decode_dest(Rest4, MMod, MSt14),
     ?TRACE("~p},", [Dest]),
     MSt16 = MMod:move_to_vm_register(MSt15, ResultTerm, Dest),
@@ -2958,10 +3137,10 @@ do_get_tail(
     ),
     % Restore BSBinaryReg as it may have been gc'd as well
     {MSt7, MatchStateReg0} = MMod:copy_to_native_register(MSt6, NewMatchState),
-    MSt8 = MMod:and_(MSt7, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt8, MatchStateReg0} = MMod:and_(MSt7, {free, MatchStateReg0}, ?TERM_PRIMARY_CLEAR_MASK),
     MSt9 = MMod:move_array_element(MSt8, MatchStateReg0, 1, BSBinaryReg),
     MSt10 = MMod:free_native_registers(MSt9, [MatchStateReg0]),
-    MSt11 = MMod:and_(MSt10, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt11, BSBinaryReg} = MMod:and_(MSt10, {free, BSBinaryReg}, ?TERM_PRIMARY_CLEAR_MASK),
     {MSt12, TailBytesReg1} = MMod:get_array_element(MSt11, BSBinaryReg, 1),
     MSt13 = MMod:sub(MSt12, TailBytesReg0, BSOffseBytesReg),
     MSt14 = MMod:add(MSt13, BSBinaryReg, ?TERM_PRIMARY_BOXED),
@@ -2995,7 +3174,7 @@ first_pass_bs_match_equal_colon_equal(
                         MMod:jump_to_label(BSt0, Fail)
                     end
                 ),
-                MSt4 = MMod:and_(MSt3, Result, ?TERM_PRIMARY_CLEAR_MASK),
+                {MSt4, Result} = MMod:and_(MSt3, {free, Result}, ?TERM_PRIMARY_CLEAR_MASK),
                 {MSt5, IntValue} = MMod:get_array_element(MSt4, {free, Result}, 1),
                 cond_jump_to_label({{free, IntValue}, '!=', PatternValue}, Fail, MMod, MSt5);
             _ ->
@@ -3011,6 +3190,255 @@ first_pass_bs_match_skip(MatchState, BSOffsetReg, J0, Rest0, MMod, MSt0) ->
     ?TRACE("{skip,~p},", [Stride]),
     {J0 - 1, Rest1, MatchState, BSOffsetReg, MSt1}.
 
+op_gc_bif2(
+    MMod,
+    MSt0,
+    FailLabel,
+    Live,
+    Bif,
+    erlang,
+    '+',
+    {typed, Arg1, {t_integer, Range1}},
+    {typed, Arg2, {t_integer, Range2}},
+    Dest
+) ->
+    op_gc_bif2_add(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2);
+op_gc_bif2(
+    MMod, MSt0, FailLabel, Live, Bif, erlang, '+', {typed, Arg1, {t_integer, Range1}}, Arg2, Dest
+) when is_integer(Arg2), Arg2 band ?TERM_IMMED_TAG_MASK =:= ?TERM_INTEGER_TAG ->
+    % Arg2 is a small integer literal, extract its value and create a range
+    Arg2Value = Arg2 bsr 4,
+    Range2 = {Arg2Value, Arg2Value},
+    op_gc_bif2_add(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2);
+op_gc_bif2(
+    MMod,
+    MSt0,
+    FailLabel,
+    Live,
+    Bif,
+    erlang,
+    '-',
+    {typed, Arg1, {t_integer, Range1}},
+    {typed, Arg2, {t_integer, Range2}},
+    Dest
+) ->
+    op_gc_bif2_sub(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2);
+op_gc_bif2(
+    MMod, MSt0, FailLabel, Live, Bif, erlang, '-', {typed, Arg1, {t_integer, Range1}}, Arg2, Dest
+) when is_integer(Arg2), Arg2 band ?TERM_IMMED_TAG_MASK =:= ?TERM_INTEGER_TAG ->
+    % Arg2 is a small integer literal, extract its value and create a range
+    Arg2Value = Arg2 bsr 4,
+    Range2 = {Arg2Value, Arg2Value},
+    op_gc_bif2_sub(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2);
+% Default case
+op_gc_bif2(
+    MMod, MSt0, FailLabel, Live, Bif, _Module, _Function, {typed, Arg1, _}, {typed, Arg2, _}, Dest
+) ->
+    op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest);
+op_gc_bif2(MMod, MSt0, FailLabel, Live, Bif, _Module, _Function, {typed, Arg1, _}, Arg2, Dest) ->
+    op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest);
+op_gc_bif2(MMod, MSt0, FailLabel, Live, Bif, _Module, _Function, Arg1, {typed, Arg2, _}, Dest) ->
+    op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest);
+op_gc_bif2(MMod, MSt0, FailLabel, Live, Bif, _Module, _Function, Arg1, Arg2, Dest) ->
+    op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest).
+
+op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest) ->
+    {MSt1, TrimResultReg} = MMod:call_primitive(MSt0, ?PRIM_TRIM_LIVE_REGS, [ctx, Live]),
+    MSt2 = MMod:free_native_registers(MSt1, [TrimResultReg]),
+    CappedLive =
+        if
+            Live > ?MAX_REG -> ?MAX_REG;
+            true -> Live
+        end,
+    {MSt3, FuncPtr} = MMod:call_primitive(MSt2, ?PRIM_GET_IMPORTED_BIF, [
+        jit_state, Bif
+    ]),
+    {MSt4, ResultReg} = MMod:call_func_ptr(MSt3, {free, FuncPtr}, [
+        ctx, FailLabel, CappedLive, {free, Arg1}, {free, Arg2}
+    ]),
+    bif_faillabel_test(FailLabel, MMod, MSt4, {free, ResultReg}, {free, Dest}).
+
+% Check if addition can overflow based on type ranges
+% Returns true if the result is guaranteed to fit in a small integer
+can_inline_add(Range1, Range2, MMod) ->
+    % Platform-specific bounds
+    {MinSafe, MaxSafe} =
+        case MMod:word_size() of
+            % 32-bit
+            4 -> {-(1 bsl 27), (1 bsl 27) - 1};
+            % 64-bit
+            8 -> {-(1 bsl 59), (1 bsl 59) - 1}
+        end,
+
+    case {Range1, Range2} of
+        {{Min1, Max1}, {Min2, Max2}} when
+            is_integer(Min1),
+            is_integer(Max1),
+            is_integer(Min2),
+            is_integer(Max2)
+        ->
+            % Calculate min and max possible results
+            MinResult = Min1 + Min2,
+            MaxResult = Max1 + Max2,
+            % Check if both are in safe range
+            MinResult >= MinSafe andalso MaxResult =< MaxSafe;
+        _ ->
+            % Unbounded range (has '-inf' or '+inf'), cannot optimize
+            false
+    end.
+
+% Optimized addition with compile-time range checking
+op_gc_bif2_add(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2) when
+    is_integer(Arg2)
+->
+    case can_inline_add(Range1, Range2, MMod) of
+        true ->
+            % Safe to inline - no overflow possible
+            {MSt1, Reg} = MMod:move_to_native_register(MSt0, Arg1),
+            MSt2 = MMod:add(MSt1, Reg, Arg2 band (bnot (?TERM_IMMED_TAG_MASK))),
+            MSt3 = MMod:move_to_vm_register(MSt2, Reg, Dest),
+            MMod:free_native_registers(MSt3, [Reg]);
+        false ->
+            % Cannot prove safety, use default BIF call
+            op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest)
+    end;
+op_gc_bif2_add(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2) ->
+    case can_inline_add(Range1, Range2, MMod) of
+        true ->
+            % Safe to inline both arguments
+            {MSt1, Reg1} = MMod:move_to_native_register(MSt0, Arg1),
+            {MSt2, Reg2} = MMod:move_to_native_register(MSt1, Arg2),
+            % Strip tag from Reg2 using AND, then add to Reg1 (Reg1 keeps its tag)
+            {MSt3, Reg2Stripped} = MMod:and_(MSt2, {free, Reg2}, bnot (?TERM_IMMED_TAG_MASK)),
+            MSt4 = MMod:add(MSt3, Reg1, Reg2Stripped),
+            MSt5 = MMod:move_to_vm_register(MSt4, Reg1, Dest),
+            MMod:free_native_registers(MSt5, [Reg1, Reg2Stripped]);
+        false ->
+            % Cannot prove safety, use default BIF call
+            op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest)
+    end.
+
+% Check if subtraction can overflow based on type ranges
+% Returns true if the result is guaranteed to fit in a small integer
+can_inline_sub(Range1, Range2, MMod) ->
+    % Platform-specific bounds
+    {MinSafe, MaxSafe} =
+        case MMod:word_size() of
+            4 -> {-(1 bsl 27), (1 bsl 27) - 1};
+            % 32-bit
+            8 -> {-(1 bsl 59), (1 bsl 59) - 1}
+            % 64-bit
+        end,
+
+    case {Range1, Range2} of
+        {{Min1, Max1}, {Min2, Max2}} when
+            is_integer(Min1),
+            is_integer(Max1),
+            is_integer(Min2),
+            is_integer(Max2)
+        ->
+            % Calculate min and max possible results
+            % Min result: Min1 - Max2 (smallest value minus largest value)
+            % Max result: Max1 - Min2 (largest value minus smallest value)
+            MinResult = Min1 - Max2,
+            MaxResult = Max1 - Min2,
+            % Check if both are in safe range
+            MinResult >= MinSafe andalso MaxResult =< MaxSafe;
+        _ ->
+            % Unbounded range (has '-inf' or '+inf'), cannot optimize
+            false
+    end.
+
+% Optimized subtraction with compile-time range checking
+op_gc_bif2_sub(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2) when
+    is_integer(Arg2)
+->
+    case can_inline_sub(Range1, Range2, MMod) of
+        true ->
+            % Safe to inline - no overflow possible
+            {MSt1, Reg} = MMod:move_to_native_register(MSt0, Arg1),
+            MSt2 = MMod:sub(MSt1, Reg, Arg2 band (bnot (?TERM_IMMED_TAG_MASK))),
+            MSt3 = MMod:move_to_vm_register(MSt2, Reg, Dest),
+            MMod:free_native_registers(MSt3, [Reg]);
+        false ->
+            % Cannot prove safety, use default BIF call
+            op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest)
+    end;
+op_gc_bif2_sub(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2) ->
+    case can_inline_sub(Range1, Range2, MMod) of
+        true ->
+            % Safe to inline both arguments
+            {MSt1, Reg1} = MMod:move_to_native_register(MSt0, Arg1),
+            {MSt2, Reg2} = MMod:move_to_native_register(MSt1, Arg2),
+            % Strip tag from Reg2 using AND, then subtract from Reg1 (Reg1 keeps its tag)
+            {MSt3, Reg2Stripped} = MMod:and_(MSt2, {free, Reg2}, bnot (?TERM_IMMED_TAG_MASK)),
+            MSt4 = MMod:sub(MSt3, Reg1, Reg2Stripped),
+            MSt5 = MMod:move_to_vm_register(MSt4, Reg1, Dest),
+            MMod:free_native_registers(MSt5, [Reg1, Reg2Stripped]);
+        false ->
+            % Cannot prove safety, use default BIF call
+            op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest)
+    end.
+
+% Helper to unwrap typed arguments
+unwrap_typed({typed, Arg, _Type}) -> Arg;
+unwrap_typed(Arg) -> Arg.
+
+% Optimized >= comparison for typed integers
+% Test if Arg1 >= Arg2, jump to Label if false (i.e., if Arg1 < Arg2)
+op_is_ge(MMod, MSt0, Label, Arg1, {typed, Arg2, {t_integer, _Range}}) when is_integer(Arg1) ->
+    % Arg1 is integer literal (already tagged by decode_compact_term), Arg2 is typed integer
+    % If Arg2 is boxed (bignum), the comparison result depends on the sign
+    {MSt1, Arg2Reg} = MMod:move_to_native_register(MSt0, Arg2),
+    % Check if Arg2 is a small integer (tagged with 0xF)
+    MSt2 = MMod:if_block(MSt1, {Arg2Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(
+        BSt0
+    ) ->
+        % Arg2 is boxed (bignum) - need to determine comparison result
+        % For small Arg1, if Arg2 is positive bignum -> Arg1 < Arg2 (fail)
+        % For small Arg1, if Arg2 is negative bignum -> Arg1 > Arg2 (pass)
+        % We need to check the sign of the boxed integer
+        {BSt1, BoxedReg} = MMod:and_(BSt0, Arg2Reg, bnot (?TERM_PRIMARY_MASK)),
+        BSt2 = MMod:move_array_element(BSt1, BoxedReg, 0, BoxedReg),
+        {BSt3, TagReg} = MMod:and_(BSt2, {free, BoxedReg}, ?TERM_BOXED_TAG_MASK),
+        % Jump to label if it's a positive bignum (tag = 0x8)
+        % For negative bignum (tag = 0x28), Arg1 >= Arg2 is true, so don't jump
+        cond_jump_to_label({{free, TagReg}, '==', ?TERM_BOXED_POSITIVE_INTEGER}, Label, MMod, BSt3)
+    end),
+    % If we're here, Arg2 is a small integer - do inline comparison
+    % is_ge tests Arg1 >= Arg2, jump to Label if Arg1 < Arg2
+    % Arg1 is already tagged, use it directly
+    cond_jump_to_label({Arg1, '<', {free, Arg2Reg}}, Label, MMod, MSt2);
+op_is_ge(MMod, MSt0, Label, {typed, Arg1, {t_integer, _Range}}, Arg2) when is_integer(Arg2) ->
+    % Arg1 is typed integer, Arg2 is integer literal (already tagged by decode_compact_term)
+    % If Arg1 is boxed (bignum), the comparison result depends on the sign
+    {MSt1, Arg1Reg} = MMod:move_to_native_register(MSt0, Arg1),
+    % Check if Arg1 is a small integer (tagged with 0xF)
+    MSt2 = MMod:if_block(MSt1, {Arg1Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(
+        BSt0
+    ) ->
+        % Arg1 is boxed (bignum) - need to determine comparison result
+        % For small Arg2, if Arg1 is positive bignum -> Arg1 > Arg2 (pass), don't jump
+        % For small Arg2, if Arg1 is negative bignum -> Arg1 < Arg2 (fail), jump
+        {BSt1, BoxedReg} = MMod:and_(BSt0, Arg1Reg, bnot (?TERM_PRIMARY_MASK)),
+        BSt2 = MMod:move_array_element(BSt1, BoxedReg, 0, BoxedReg),
+        {BSt3, TagReg} = MMod:and_(BSt2, {free, BoxedReg}, ?TERM_BOXED_TAG_MASK),
+        % Jump to label if it's a negative bignum (tag = 0x28)
+        % For positive bignum (tag = 0x8), Arg1 >= Arg2 is true, so don't jump
+        cond_jump_to_label({{free, TagReg}, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, Label, MMod, BSt3)
+    end),
+    % If we're here, Arg1 is a small integer - do inline comparison
+    % is_ge tests Arg1 >= Arg2, jump to Label if Arg1 < Arg2
+    % Arg2 is already tagged, use it directly
+    cond_jump_to_label({{free, Arg1Reg}, '<', Arg2}, Label, MMod, MSt2);
+% Fallback: use term_compare
+op_is_ge(MMod, MSt0, Label, Arg1, Arg2) ->
+    {MSt1, ResultReg} = MMod:call_primitive(MSt0, ?PRIM_TERM_COMPARE, [
+        ctx, jit_state, {free, unwrap_typed(Arg1)}, {free, unwrap_typed(Arg2)}, ?TERM_COMPARE_NO_OPTS
+    ]),
+    MSt2 = handle_error_if({'(int)', ResultReg, '==', ?TERM_COMPARE_MEMORY_ALLOC_FAIL}, MMod, MSt1),
+    cond_jump_to_label({'(int)', {free, ResultReg}, '==', ?TERM_LESS_THAN}, Label, MMod, MSt2).
+
 term_alloc_bin_match_state(Live, Src, Dest, MMod, MSt0) ->
     {MSt1, TrimResultReg} = MMod:call_primitive(MSt0, ?PRIM_TRIM_LIVE_REGS, [ctx, Live]),
     MSt2 = MMod:free_native_registers(MSt1, [TrimResultReg]),
@@ -3037,7 +3465,7 @@ term_is_boxed_with_tag_and_get_ptr(Label, Arg1, BoxedTag, MMod, MSt1) ->
     MSt3 = cond_jump_to_label(
         {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, MMod, MSt2
     ),
-    MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
     {MSt5, BoxTagReg} = MMod:get_array_element(MSt4, Reg, 0),
     MSt6 = cond_jump_to_label(
         {{free, BoxTagReg}, '&', ?TERM_BOXED_TAG_MASK, '!=', BoxedTag}, Label, MMod, MSt5
@@ -3062,28 +3490,30 @@ verify_is_function({typed, Func, _Other}, MMod, MSt0) ->
     ]),
     {MSt2, Reg};
 verify_is_function(Func, MMod, MSt0) ->
-    {MSt1, Reg} = MMod:copy_to_native_register(MSt0, Func),
+    {MSt1, Reg} = MMod:move_to_native_register(MSt0, Func),
     MSt2 = MMod:if_block(MSt1, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) ->
         MMod:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [
             ctx, jit_state, offset, ?BADFUN_ATOM, Reg
         ])
     end),
-    MSt3 = MMod:and_(MSt2, Reg, ?TERM_PRIMARY_CLEAR_MASK),
-    MSt4 = MMod:move_array_element(MSt3, Reg, 0, Reg),
-    MSt5 = MMod:if_block(MSt4, {Reg, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) ->
-        MMod:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [
-            ctx, jit_state, offset, ?BADFUN_ATOM, Reg
-        ])
-    end),
-    MSt6 = MMod:free_native_registers(MSt5, [Reg]),
-    MMod:move_to_native_register(MSt6, Func).
+    {MSt3, BoxedPtrReg} = MMod:and_(MSt2, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+    MSt4 = MMod:move_array_element(MSt3, BoxedPtrReg, 0, BoxedPtrReg),
+    MSt5 = MMod:if_block(
+        MSt4, {BoxedPtrReg, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) ->
+            MMod:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [
+                ctx, jit_state, offset, ?BADFUN_ATOM, Reg
+            ])
+        end
+    ),
+    MSt6 = MMod:free_native_registers(MSt5, [BoxedPtrReg]),
+    {MSt6, Reg}.
 
 verify_is_binary_or_match_state(Label, Src, MMod, MSt0) ->
     {MSt1, Reg} = MMod:copy_to_native_register(MSt0, Src),
     MSt2 = verify_is_boxed(MMod, MSt1, Reg, Label),
-    MSt3 = MMod:and_(MSt2, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt3, Reg} = MMod:and_(MSt2, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
     MSt4 = MMod:move_array_element(MSt3, Reg, 0, Reg),
-    MSt5 = MMod:and_(MSt4, Reg, ?TERM_BOXED_TAG_MASK),
+    {MSt5, Reg} = MMod:and_(MSt4, {free, Reg}, ?TERM_BOXED_TAG_MASK),
     MSt6 = cond_raise_badarg_or_jump_to_fail_label(
         {'and', [
             {Reg, '!=', ?TERM_BOXED_REFC_BINARY},
@@ -3099,7 +3529,7 @@ verify_is_binary_or_match_state(Label, Src, MMod, MSt0) ->
 
 verify_is_boxed_with_tag(Label, {free, Reg}, BoxedTag, MMod, MSt0) when is_atom(Reg) ->
     MSt1 = verify_is_boxed(MMod, MSt0, Reg, Label),
-    MSt2 = MMod:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt2, Reg} = MMod:and_(MSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
     MSt3 = MMod:move_array_element(MSt2, Reg, 0, Reg),
     cond_raise_badarg_or_jump_to_fail_label(
         {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', BoxedTag}, Label, MMod, MSt3
@@ -3107,7 +3537,7 @@ verify_is_boxed_with_tag(Label, {free, Reg}, BoxedTag, MMod, MSt0) when is_atom(
 verify_is_boxed_with_tag(Label, Arg1, BoxedTag, MMod, MSt1) ->
     {MSt2, Reg} = MMod:copy_to_native_register(MSt1, Arg1),
     MSt3 = verify_is_boxed(MMod, MSt2, Reg, Label),
-    MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
     MSt5 = MMod:move_array_element(MSt4, Reg, 0, Reg),
     cond_raise_badarg_or_jump_to_fail_label(
         {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', BoxedTag}, Label, MMod, MSt5
@@ -3138,7 +3568,7 @@ verify_is_match_state_and_get_ptr(MMod, MSt0, Src) ->
     verify_is_match_state_and_get_ptr0(MMod, MSt2, Reg).
 
 verify_is_match_state_and_get_ptr0(MMod, MSt0, Reg) ->
-    MSt1 = MMod:and_(MSt0, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt1, Reg} = MMod:and_(MSt0, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
     {MSt2, BoxTag} = MMod:get_array_element(MSt1, Reg, 0),
     MSt3 = cond_raise_badarg(
         {{free, BoxTag}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_BIN_MATCH_STATE}, MMod, MSt2
@@ -3195,6 +3625,13 @@ verify_is_any_integer(Arg1, Fail, MMod, MSt0) ->
         Arg1, ?TERM_INTEGER_TAG, ?TERM_BOXED_POSITIVE_INTEGER, Fail, MMod, MSt0
     ).
 
+verify_is_number(Arg1, Fail, MMod, MSt0) ->
+    {MSt1, Reg} = MMod:copy_to_native_register(MSt0, Arg1),
+    {MSt2, IsNumber} = MMod:call_primitive(MSt1, ?PRIM_TERM_IS_NUMBER, [{free, Reg}]),
+    cond_raise_badarg_or_jump_to_fail_label(
+        {'(bool)', {free, IsNumber}, '==', false}, Fail, MMod, MSt2
+    ).
+
 %%-----------------------------------------------------------------------------
 %% @doc Test if Arg1 is a binary, jump to FailLabel if it isn't or raise
 %% badarg if FailLabel is 0
@@ -3207,9 +3644,9 @@ verify_is_any_integer(Arg1, Fail, MMod, MSt0) ->
 verify_is_binary(Arg1, FailLabel, MMod, MSt0) ->
     {MSt1, Reg} = MMod:copy_to_native_register(MSt0, Arg1),
     MSt2 = verify_is_boxed(MMod, MSt1, Reg, FailLabel),
-    MSt3 = MMod:and_(MSt2, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt3, Reg} = MMod:and_(MSt2, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
     MSt4 = MMod:move_array_element(MSt3, Reg, 0, Reg),
-    MSt5 = MMod:and_(MSt4, Reg, ?TERM_BOXED_TAG_MASK),
+    {MSt5, Reg} = MMod:and_(MSt4, {free, Reg}, ?TERM_BOXED_TAG_MASK),
     MSt6 = cond_raise_badarg_or_jump_to_fail_label(
         {'and', [
             {Reg, '!=', ?TERM_BOXED_REFC_BINARY},
@@ -3608,7 +4045,7 @@ term_get_tuple_arity(Tuple, MMod, MSt0) ->
             {free, TupleReg} -> MMod:move_to_native_register(MSt0, TupleReg);
             _ -> MMod:copy_to_native_register(MSt0, Tuple)
         end,
-    MSt2 = MMod:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt2, Reg} = MMod:and_(MSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
     MSt3 = MMod:move_array_element(MSt2, Reg, 0, Reg),
     {MSt4, ArityReg} = MMod:shift_right(MSt3, {free, Reg}, 6),
     {MSt4, ArityReg}.
@@ -3623,7 +4060,7 @@ term_get_map_keys(Map, MMod, MSt0) ->
             {free, MapReg} -> MMod:move_to_native_register(MSt0, MapReg);
             _ -> MMod:copy_to_native_register(MSt0, Map)
         end,
-    MSt2 = MMod:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt2, Reg} = MMod:and_(MSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
     MSt3 = MMod:move_array_element(MSt2, Reg, 1, Reg),
     {MSt3, Reg}.
 
@@ -3687,9 +4124,14 @@ term_binary_heap_size({free, Reg}, MMod, MSt0) ->
     {MSt1, Reg}.
 
 term_binary_size({free, BinReg}, MMod, MSt0) ->
-    MSt1 = MMod:and_(MSt0, BinReg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt1, BinReg} = MMod:and_(MSt0, {free, BinReg}, ?TERM_PRIMARY_CLEAR_MASK),
     MSt2 = MMod:move_array_element(MSt1, BinReg, 1, BinReg),
-    {MSt2, BinReg}.
+    {MSt2, BinReg};
+term_binary_size(Src, MMod, MSt0) ->
+    {MSt1, SrcReg} = MMod:move_to_native_register(MSt0, Src),
+    {MSt2, SrcReg} = MMod:and_(MSt1, {free, SrcReg}, ?TERM_PRIMARY_CLEAR_MASK),
+    MSt3 = MMod:move_array_element(MSt2, SrcReg, 1, SrcReg),
+    {MSt3, SrcReg}.
 
 term_set_map_assoc(MapPtrReg, {free, PosReg}, {free, Key}, {free, Value}, MMod, MSt0) ->
     {MSt1, MapKeysReg} = MMod:get_array_element(MSt0, MapPtrReg, 1),
@@ -3698,7 +4140,7 @@ term_set_map_assoc(MapPtrReg, {free, PosReg}, {free, Key}, {free, Value}, MMod,
     MMod:free_native_registers(MSt3, [PosReg, Value]).
 
 term_put_tuple_element({free, TupleReg}, PosReg, {free, Value}, MMod, MSt0) ->
-    MSt1 = MMod:and_(MSt0, TupleReg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt1, TupleReg} = MMod:and_(MSt0, {free, TupleReg}, ?TERM_PRIMARY_CLEAR_MASK),
     MSt2 = MMod:move_to_array_element(MSt1, Value, TupleReg, PosReg, 1),
     MMod:free_native_registers(MSt2, [TupleReg, Value]).
 
@@ -3730,8 +4172,42 @@ variant() ->
 
 %% @doc Instantiate backend for this platform
 %% @return A tuple with the backend module and the backend state for this platform
-backend({StreamModule, Stream}) ->
+backend(StreamModule, Stream) ->
     BackendModule = ?MODULE:backend_module(),
     Variant = ?MODULE:variant(),
     BackendState = BackendModule:new(Variant, StreamModule, Stream),
     {BackendModule, BackendState}.
+
+-ifdef(JIT_INSTRUMENT).
+instrument(Tag, #state{line_offsets = Lines, tail_cache = TC}, MSt) ->
+    StateSize = erts_debug:flat_size({Lines, TC}),
+    MStSize = erts_debug:flat_size(MSt),
+    LinesCount = length(Lines),
+    TCCount = length(TC),
+
+    % Extract branches count from backend state
+    % state record: {state, stream_module, stream, offset, branches, jump_table_start, ...}
+    BranchesCount =
+        case element(1, MSt) of
+            state -> length(element(5, MSt));
+            _ -> unknown
+        end,
+
+    {heap_size, HeapSize} = process_info(self(), heap_size),
+    {total_heap_size, TotalHeapSize} = process_info(self(), total_heap_size),
+
+    io:format(
+        "~s: mst=~p words, state=~p words (lines=~p, tc=~p, br=~p), "
+        "heap=~p, total_heap=~p~n",
+        [
+            Tag,
+            MStSize,
+            StateSize,
+            LinesCount,
+            TCCount,
+            BranchesCount,
+            HeapSize,
+            TotalHeapSize
+        ]
+    ).
+-endif.
diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl
index 1eba4fba80..52c28ad465 100644
--- a/libs/jit/src/jit_aarch64.erl
+++ b/libs/jit/src/jit_aarch64.erl
@@ -25,6 +25,7 @@
     new/3,
     stream/1,
     offset/1,
+    flush/1,
     debugger/1,
     used_regs/1,
     available_regs/1,
@@ -38,6 +39,7 @@
     return_if_not_equal_to_ctx/2,
     jump_to_label/2,
     jump_to_continuation/2,
+    jump_to_offset/2,
     if_block/3,
     if_else_block/4,
     shift_right/3,
@@ -132,6 +134,7 @@
     stream :: stream(),
     offset :: non_neg_integer(),
     branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}],
+    jump_table_start :: non_neg_integer(),
     available_regs :: [aarch64_register()],
     used_regs :: [aarch64_register()],
     labels :: [{integer() | reference(), integer()}],
@@ -151,6 +154,7 @@
 -type condition() ::
     {aarch64_register(), '<', integer()}
     | {maybe_free_aarch64_register(), '<', aarch64_register()}
+    | {integer(), '<', maybe_free_aarch64_register()}
     | {maybe_free_aarch64_register(), '==', integer()}
     | {maybe_free_aarch64_register(), '!=', aarch64_register() | integer()}
     | {'(int)', maybe_free_aarch64_register(), '==', integer()}
@@ -231,6 +235,7 @@ new(Variant, StreamModule, Stream) ->
         stream_module = StreamModule,
         stream = Stream,
         branches = [],
+        jump_table_start = 0,
         offset = StreamModule:offset(Stream),
         available_regs = ?AVAILABLE_REGS,
         used_regs = [],
@@ -258,6 +263,16 @@ stream(#state{stream = Stream}) ->
 offset(#state{stream_module = StreamModule, stream = Stream}) ->
     StreamModule:offset(Stream).
 
+%%-----------------------------------------------------------------------------
+%% @doc Flush the current state (unused on aarch64)
+%% @end
+%% @param State current backend state
+%% @return The flushed state
+%%-----------------------------------------------------------------------------
+-spec flush(state()) -> state().
+flush(#state{} = State) ->
+    State.
+
 %%-----------------------------------------------------------------------------
 %% @doc Emit a debugger of breakpoint instruction. This is used for debugging
 %% and not in production.
@@ -343,22 +358,78 @@ assert_all_native_free(#state{
 %% @return Updated backend state
 %%-----------------------------------------------------------------------------
 -spec jump_table(state(), pos_integer()) -> state().
-jump_table(State, LabelsCount) ->
-    jump_table0(State, 0, LabelsCount).
+jump_table(#state{stream_module = StreamModule, stream = Stream0} = State, LabelsCount) ->
+    JumpTableStart = StreamModule:offset(Stream0),
+    jump_table0(State#state{jump_table_start = JumpTableStart}, 0, LabelsCount).
 
 -spec jump_table0(state(), non_neg_integer(), pos_integer()) -> state().
 jump_table0(State, N, LabelsCount) when N > LabelsCount ->
     State;
 jump_table0(
-    #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State,
+    #state{stream_module = StreamModule, stream = Stream0} = State,
     N,
     LabelsCount
 ) ->
-    Offset = StreamModule:offset(Stream0),
-    BranchInstr = jit_aarch64_asm:b(0),
-    Reloc = {N, Offset, b},
+    % Placeholder jumps to next entry (1 instruction forward = 4 bytes)
+    BranchInstr = jit_aarch64_asm:b(1),
     Stream1 = StreamModule:append(Stream0, BranchInstr),
-    jump_table0(State#state{stream = Stream1, branches = [Reloc | Branches]}, N + 1, LabelsCount).
+    jump_table0(State#state{stream = Stream1}, N + 1, LabelsCount).
+
+%%-----------------------------------------------------------------------------
+%% @doc Patch a single branch in the stream
+%% @end
+%% @param StreamModule stream module
+%% @param Stream stream state
+%% @param Offset offset of the branch to patch
+%% @param Type type of the branch
+%% @param LabelOffset target label offset
+%% @return Updated stream
+%%-----------------------------------------------------------------------------
+-spec patch_branch(module(), stream(), non_neg_integer(), any(), non_neg_integer()) -> stream().
+patch_branch(StreamModule, Stream, Offset, Type, LabelOffset) ->
+    Rel = LabelOffset - Offset,
+    NewInstr =
+        case Type of
+            {bcc, CC} -> jit_aarch64_asm:bcc(CC, Rel);
+            {adr, Reg} -> jit_aarch64_asm:adr(Reg, Rel);
+            b -> jit_aarch64_asm:b(Rel)
+        end,
+    StreamModule:replace(Stream, Offset, NewInstr).
+
+%%-----------------------------------------------------------------------------
+%% @doc Patch all branches targeting a specific label and return remaining branches
+%% @end
+%% @param StreamModule stream module
+%% @param Stream stream state
+%% @param TargetLabel label to patch branches for
+%% @param LabelOffset offset of the target label
+%% @param Branches list of pending branches
+%% @return {UpdatedStream, RemainingBranches}
+%%-----------------------------------------------------------------------------
+-spec patch_branches_for_label(
+    module(),
+    stream(),
+    integer(),
+    non_neg_integer(),
+    [{integer(), non_neg_integer(), any()}]
+) -> {stream(), [{integer(), non_neg_integer(), any()}]}.
+patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches) ->
+    patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches, []).
+
+patch_branches_for_label(_StreamModule, Stream, _TargetLabel, _LabelOffset, [], Acc) ->
+    {Stream, lists:reverse(Acc)};
+patch_branches_for_label(
+    StreamModule,
+    Stream0,
+    TargetLabel,
+    LabelOffset,
+    [{Label, Offset, Type} | Rest],
+    Acc
+) when Label =:= TargetLabel ->
+    Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset),
+    patch_branches_for_label(StreamModule, Stream1, TargetLabel, LabelOffset, Rest, Acc);
+patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, [Branch | Rest], Acc) ->
+    patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Rest, [Branch | Acc]).
 
 %%-----------------------------------------------------------------------------
 %% @doc Rewrite stream to update all branches for labels.
@@ -378,14 +449,7 @@ update_branches(
     } = State
 ) ->
     {Label, LabelOffset} = lists:keyfind(Label, 1, Labels),
-    Rel = LabelOffset - Offset,
-    NewInstr =
-        case Type of
-            {bcc, CC} -> jit_aarch64_asm:bcc(CC, Rel);
-            {adr, Reg} -> jit_aarch64_asm:adr(Reg, Rel);
-            b -> jit_aarch64_asm:b(Rel)
-        end,
-    Stream1 = StreamModule:replace(Stream0, Offset, NewInstr),
+    Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset),
     update_branches(State#state{stream = Stream1, branches = BranchesT}).
 
 %%-----------------------------------------------------------------------------
@@ -531,6 +595,13 @@ jump_to_label(
             State#state{stream = Stream1, branches = [Reloc | AccBranches]}
     end.
 
+jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) ->
+    Offset = StreamModule:offset(Stream0),
+    Rel = TargetOffset - Offset,
+    I1 = jit_aarch64_asm:b(Rel),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1}.
+
 %%-----------------------------------------------------------------------------
 %% @doc Jump to a continuation address stored in a register.
 %% This is used for optimized intra-module returns.
@@ -687,15 +758,47 @@ if_else_block(
         jit_aarch64_asm:cc() | {tbz | tbnz, atom(), 0..63} | {cbz, atom()},
         non_neg_integer()
     }.
-if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, {Reg, '<', 0}) ->
+if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '<', 0}) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
     I = jit_aarch64_asm:tbz(Reg, 63, 0),
     Stream1 = StreamModule:append(Stream0, I),
-    State1 = State0#state{stream = Stream1},
-    {State1, {tbz, Reg, 63}, 0};
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, {tbz, Reg, 63}, 0};
+% Handle {Val, '<', Reg} - means Val < Reg, jump if false (i.e., if Val >= Reg or Reg <= Val)
 if_block_cond(
     #state{stream_module = StreamModule, stream = Stream0} = State0,
-    {Reg, '<', Val}
-) when is_atom(Reg), is_integer(Val) ->
+    {Val, '<', RegOrTuple}
+) when is_integer(Val) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    I1 = jit_aarch64_asm:cmp(Reg, Val),
+    % le = less than or equal
+    I2 = jit_aarch64_asm:bcc(le, 0),
+    Code = <<
+        I1/binary,
+        I2/binary
+    >>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, le, byte_size(I1)};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {RegOrTuple, '<', Val}
+) when is_integer(Val), Val =/= 0 ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
     I1 = jit_aarch64_asm:cmp(Reg, Val),
     % ge = greater than or equal
     I2 = jit_aarch64_asm:bcc(ge, 0),
@@ -704,8 +807,9 @@ if_block_cond(
         I2/binary
     >>,
     Stream1 = StreamModule:append(Stream0, Code),
-    State1 = State0#state{stream = Stream1},
-    {State1, ge, byte_size(I1)};
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, ge, byte_size(I1)};
 if_block_cond(
     #state{stream_module = StreamModule, stream = Stream0} = State0,
     {RegOrTuple, '<', RegB}
@@ -933,7 +1037,7 @@ if_block_cond(
 ) when ?IS_GPR(Reg) ->
     % AND with mask
     OffsetBefore = StreamModule:offset(Stream0),
-    State1 = and_(State0, Reg, Mask),
+    {State1, Reg} = and_(State0, RegTuple, Mask),
     Stream1 = State1#state.stream,
     % Compare with value
     I2 = jit_aarch64_asm:cmp(Reg, Val),
@@ -1835,17 +1939,30 @@ set_continuation_to_label(
         stream_module = StreamModule,
         stream = Stream0,
         available_regs = [Temp | _],
-        branches = Branches
+        branches = Branches,
+        labels = Labels
     } = State,
     Label
 ) ->
     Offset = StreamModule:offset(Stream0),
-    I1 = jit_aarch64_asm:adr(Temp, 0),
-    Reloc = {Label, Offset, {adr, Temp}},
-    I2 = jit_aarch64_asm:str(Temp, ?JITSTATE_CONTINUATION),
-    Code = <<I1/binary, I2/binary>>,
-    Stream1 = StreamModule:append(Stream0, Code),
-    State#state{stream = Stream1, branches = [Reloc | Branches]}.
+    case lists:keyfind(Label, 1, Labels) of
+        {Label, LabelOffset} ->
+            % Label is already known, emit direct adr without relocation
+            Rel = LabelOffset - Offset,
+            I1 = jit_aarch64_asm:adr(Temp, Rel),
+            I2 = jit_aarch64_asm:str(Temp, ?JITSTATE_CONTINUATION),
+            Code = <<I1/binary, I2/binary>>,
+            Stream1 = StreamModule:append(Stream0, Code),
+            State#state{stream = Stream1};
+        false ->
+            % Label not yet known, emit placeholder and add relocation
+            I1 = jit_aarch64_asm:adr(Temp, 0),
+            Reloc = {Label, Offset, {adr, Temp}},
+            I2 = jit_aarch64_asm:str(Temp, ?JITSTATE_CONTINUATION),
+            Code = <<I1/binary, I2/binary>>,
+            Stream1 = StreamModule:append(Stream0, Code),
+            State#state{stream = Stream1, branches = [Reloc | Branches]}
+    end.
 
 %%-----------------------------------------------------------------------------
 %% @doc Set the continuation address to the current offset, creating a
@@ -1945,9 +2062,18 @@ op_imm(#state{stream_module = StreamModule, stream = Stream0} = State, Op, RegA,
 %% @param Val immediate value to AND
 %% @return Updated backend state
 %%-----------------------------------------------------------------------------
--spec and_(state(), aarch64_register(), integer()) -> state().
-and_(State, Reg, Val) ->
-    op_imm(State, and_, Reg, Reg, Val).
+and_(State, {free, Reg}, Val) ->
+    NewState = op_imm(State, and_, Reg, Reg, Val),
+    {NewState, Reg};
+and_(
+    #state{available_regs = [ResultReg | T], used_regs = UR} = State,
+    Reg,
+    Val
+) ->
+    NewState = op_imm(
+        State#state{available_regs = T, used_regs = [ResultReg | UR]}, and_, ResultReg, Reg, Val
+    ),
+    {NewState, ResultReg}.
 
 %%-----------------------------------------------------------------------------
 %% @doc Perform bitwise OR of a register with an immediate value.
@@ -1981,8 +2107,10 @@ add(State, Reg, Val) ->
 %% @param Val immediate value to subtract
 %% @return Updated backend state
 %%-----------------------------------------------------------------------------
--spec sub(state(), aarch64_register(), integer()) -> state().
-sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) ->
+-spec sub(state(), aarch64_register(), integer() | aarch64_register()) -> state().
+sub(State, Reg, Val) when is_integer(Val) ->
+    op_imm(State, sub, Reg, Reg, Val);
+sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when is_atom(Val) ->
     I1 = jit_aarch64_asm:sub(Reg, Reg, Val),
     Stream1 = StreamModule:append(Stream0, I1),
     State#state{stream = Stream1}.
@@ -2123,6 +2251,7 @@ call_only_or_schedule_next(
         stream_module = StreamModule,
         stream = Stream0,
         branches = Branches,
+        labels = Labels,
         available_regs = [Temp | _]
     } = State0,
     Label
@@ -2135,11 +2264,22 @@ call_only_or_schedule_next(
     I3 = jit_aarch64_asm:str_w(Temp, ?JITSTATE_REDUCTIONCOUNT),
     Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
     BNEOffset = StreamModule:offset(Stream1),
-    % Branch to label if reduction count is not zero
-    I4 = jit_aarch64_asm:bcc(ne, 0),
-    Reloc1 = {Label, BNEOffset, {bcc, ne}},
-    Stream2 = StreamModule:append(Stream1, I4),
-    State1 = State0#state{stream = Stream2, branches = [Reloc1 | Branches]},
+
+    case lists:keyfind(Label, 1, Labels) of
+        {Label, LabelOffset} ->
+            % Label is already known, emit direct branch with calculated offset
+            % Calculate relative offset (must be 4-byte aligned)
+            Rel = LabelOffset - BNEOffset,
+            I4 = jit_aarch64_asm:bcc(ne, Rel),
+            Stream2 = StreamModule:append(Stream1, I4),
+            State1 = State0#state{stream = Stream2};
+        false ->
+            % Label not yet known, emit placeholder and add relocation
+            I4 = jit_aarch64_asm:bcc(ne, 0),
+            Reloc1 = {Label, BNEOffset, {bcc, ne}},
+            Stream2 = StreamModule:append(Stream1, I4),
+            State1 = State0#state{stream = Stream2, branches = [Reloc1 | Branches]}
+    end,
     State2 = set_continuation_to_label(State1, Label),
     call_primitive_last(State2, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]).
 
@@ -2315,5 +2455,35 @@ add_label(#state{stream_module = StreamModule, stream = Stream} = State, Label)
 %% @return Updated backend state
 %%-----------------------------------------------------------------------------
 -spec add_label(state(), integer() | reference(), integer()) -> state().
+add_label(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        jump_table_start = JumpTableStart,
+        branches = Branches,
+        labels = Labels
+    } = State,
+    Label,
+    LabelOffset
+) when is_integer(Label) ->
+    % Patch the jump table entry immediately
+    % Each b instruction is 4 bytes
+    JumpTableEntryOffset = JumpTableStart + Label * 4,
+    RelativeOffset = LabelOffset - JumpTableEntryOffset,
+    BranchInstr = jit_aarch64_asm:b(RelativeOffset),
+    Stream1 = StreamModule:replace(Stream0, JumpTableEntryOffset, BranchInstr),
+
+    % Eagerly patch any branches targeting this label
+    {Stream2, RemainingBranches} = patch_branches_for_label(
+        StreamModule,
+        Stream1,
+        Label,
+        LabelOffset,
+        Branches
+    ),
+
+    State#state{
+        stream = Stream2, branches = RemainingBranches, labels = [{Label, LabelOffset} | Labels]
+    };
 add_label(#state{labels = Labels} = State, Label, Offset) ->
     State#state{labels = [{Label, Offset} | Labels]}.
diff --git a/libs/jit/src/jit_aarch64_asm.erl b/libs/jit/src/jit_aarch64_asm.erl
index 6237294614..277a97d9ea 100644
--- a/libs/jit/src/jit_aarch64_asm.erl
+++ b/libs/jit/src/jit_aarch64_asm.erl
@@ -948,6 +948,8 @@ sub(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =
     RdNum = reg_to_num(Rd),
     RnNum = reg_to_num(Rn),
     <<(16#D1000000 bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5) bor RdNum):32/little>>;
+sub(_Rd, _Rn, Imm) when is_integer(Imm) ->
+    error({unencodable_immediate, Imm});
 sub(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) ->
     sub(Rd, Rn, Rm, {lsl, 0}).
 
diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl
index b051850135..7bfd329e89 100644
--- a/libs/jit/src/jit_armv6m.erl
+++ b/libs/jit/src/jit_armv6m.erl
@@ -25,6 +25,7 @@
     new/3,
     stream/1,
     offset/1,
+    flush/1,
     debugger/1,
     used_regs/1,
     available_regs/1,
@@ -38,6 +39,7 @@
     return_if_not_equal_to_ctx/2,
     jump_to_label/2,
     jump_to_continuation/2,
+    jump_to_offset/2,
     if_block/3,
     if_else_block/4,
     shift_right/3,
@@ -74,8 +76,10 @@
 -include_lib("jit.hrl").
 
 -include("primitives.hrl").
+-include("term.hrl").
 
--define(ASSERT(Expr), true = Expr).
+%-define(ASSERT(Expr), true = Expr).
+-define(ASSERT(Expr), ok).
 
 %% ARMv6-M AAPCS32 ABI: r0-r3 are used for argument passing and return value.
 %% r0-r1 form a double-word for 64-bit returns, additional args passed on stack.
@@ -130,10 +134,12 @@
     stream :: stream(),
     offset :: non_neg_integer(),
     branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}],
+    jump_table_start :: non_neg_integer(),
     available_regs :: [armv6m_register()],
     used_regs :: [armv6m_register()],
     labels :: [{integer() | reference(), integer()}],
-    variant :: non_neg_integer()
+    variant :: non_neg_integer(),
+    literal_pool :: [{non_neg_integer(), armv6m_register(), non_neg_integer()}]
 }).
 
 -type state() :: #state{}.
@@ -149,6 +155,7 @@
 -type condition() ::
     {armv6m_register(), '<', integer()}
     | {maybe_free_armv6m_register(), '<', armv6m_register()}
+    | {integer(), '<', maybe_free_armv6m_register()}
     | {maybe_free_armv6m_register(), '==', integer()}
     | {maybe_free_armv6m_register(), '!=', armv6m_register() | integer()}
     | {'(int)', maybe_free_armv6m_register(), '==', integer()}
@@ -242,11 +249,13 @@ new(Variant, StreamModule, Stream) ->
         stream_module = StreamModule,
         stream = Stream,
         branches = [],
+        jump_table_start = 0,
         offset = StreamModule:offset(Stream),
         available_regs = ?AVAILABLE_REGS,
         used_regs = [],
         labels = [],
-        variant = Variant
+        variant = Variant,
+        literal_pool = []
     }.
 
 %%-----------------------------------------------------------------------------
@@ -269,6 +278,16 @@ stream(#state{stream = Stream}) ->
 offset(#state{stream_module = StreamModule, stream = Stream}) ->
     StreamModule:offset(Stream).
 
+%%-----------------------------------------------------------------------------
+%% @doc Flush the current state, e.g. literal pools
+%% @end
+%% @param State current backend state
+%% @return The flushed state
+%%-----------------------------------------------------------------------------
+-spec flush(state()) -> state().
+flush(#state{} = State) ->
+    flush_literal_pool(State).
+
 %%-----------------------------------------------------------------------------
 %% @doc Emit a debugger of breakpoint instruction. This is used for debugging
 %% and not in production.
@@ -364,13 +383,14 @@ assert_all_native_free(#state{
 %% @return Updated backend state
 %%-----------------------------------------------------------------------------
 -spec jump_table(state(), pos_integer()) -> state().
-jump_table(State, LabelsCount) ->
-    jump_table0(State, 0, LabelsCount).
+jump_table(#state{stream_module = StreamModule, stream = Stream0} = State, LabelsCount) ->
+    JumpTableStart = StreamModule:offset(Stream0),
+    jump_table0(State#state{jump_table_start = JumpTableStart}, 0, LabelsCount).
 
 jump_table0(State, N, LabelsCount) when N > LabelsCount ->
     State;
 jump_table0(
-    #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State,
+    #state{stream_module = StreamModule, stream = Stream0} = State,
     N,
     LabelsCount
 ) ->
@@ -380,37 +400,23 @@ jump_table0(
     I3 = jit_armv6m_asm:add(pc, r3),
     I4 = jit_armv6m_asm:nop(),
 
-    JumpEntry = <<I1/binary, I2/binary, I3/binary, I4/binary, 0:32>>,
+    JumpEntry = <<I1/binary, I2/binary, I3/binary, I4/binary, 16#FFFFFFFF:32>>,
     Stream1 = StreamModule:append(Stream0, JumpEntry),
 
-    % Add relocation for the data entry so update_branches/2 can patch the jump target
-    DataOffset = StreamModule:offset(Stream1) - 4,
-    % Calculate the offset of the add instruction (3rd instruction, at offset 4 from entry start)
-    EntryStartOffset = StreamModule:offset(Stream1) - 12,
-    AddInstrOffset = EntryStartOffset + 4,
-    DataReloc = {N, DataOffset, {jump_table_data, AddInstrOffset}},
-    UpdatedState = State#state{stream = Stream1, branches = [DataReloc | Branches]},
-
-    jump_table0(UpdatedState, N + 1, LabelsCount).
+    jump_table0(State#state{stream = Stream1}, N + 1, LabelsCount).
 
 %%-----------------------------------------------------------------------------
-%% @doc Rewrite stream to update all branches for labels.
+%% @doc Patch a single branch in the stream
 %% @end
-%% @param State current backend state
-%% @return Updated backend state
+%% @param StreamModule stream module
+%% @param Stream stream state
+%% @param Offset offset of the branch to patch
+%% @param Type type of the branch
+%% @param LabelOffset target label offset
+%% @return Updated stream
 %%-----------------------------------------------------------------------------
--spec update_branches(state()) -> state().
-update_branches(#state{branches = []} = State) ->
-    State;
-update_branches(
-    #state{
-        stream_module = StreamModule,
-        stream = Stream0,
-        branches = [{Label, Offset, Type} | BranchesT],
-        labels = Labels
-    } = State
-) ->
-    {Label, LabelOffset} = lists:keyfind(Label, 1, Labels),
+-spec patch_branch(module(), stream(), non_neg_integer(), any(), non_neg_integer()) -> stream().
+patch_branch(StreamModule, Stream, Offset, Type, LabelOffset) ->
     Rel = LabelOffset - Offset,
     NewInstr =
         case Type of
@@ -484,15 +490,64 @@ update_branches(
                                 I4 = <<RelativeOffset:32/little>>,
                                 <<I1/binary, I2/binary, I3/binary, I4/binary>>
                         end
-                end;
-            {jump_table_data, AddInstrOffset} ->
-                % Calculate offset from 'add pc, pc, r3' instruction + 4 to target label
-                % PC when add instruction executes
-                AddPC = AddInstrOffset + 4,
-                RelativeOffset = LabelOffset - AddPC,
-                <<RelativeOffset:32/little>>
+                end
         end,
-    Stream1 = StreamModule:replace(Stream0, Offset, NewInstr),
+    StreamModule:replace(Stream, Offset, NewInstr).
+
+%%-----------------------------------------------------------------------------
+%% @doc Patch all branches targeting a specific label and return remaining branches
+%% @end
+%% @param StreamModule stream module
+%% @param Stream stream state
+%% @param TargetLabel label to patch branches for
+%% @param LabelOffset offset of the target label
+%% @param Branches list of pending branches
+%% @return {UpdatedStream, RemainingBranches}
+%%-----------------------------------------------------------------------------
+-spec patch_branches_for_label(
+    module(),
+    stream(),
+    integer(),
+    non_neg_integer(),
+    [{integer(), non_neg_integer(), any()}]
+) -> {stream(), [{integer(), non_neg_integer(), any()}]}.
+patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches) ->
+    patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches, []).
+
+patch_branches_for_label(_StreamModule, Stream, _TargetLabel, _LabelOffset, [], Acc) ->
+    {Stream, lists:reverse(Acc)};
+patch_branches_for_label(
+    StreamModule,
+    Stream0,
+    TargetLabel,
+    LabelOffset,
+    [{Label, Offset, Type} | Rest],
+    Acc
+) when Label =:= TargetLabel ->
+    Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset),
+    patch_branches_for_label(StreamModule, Stream1, TargetLabel, LabelOffset, Rest, Acc);
+patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, [Branch | Rest], Acc) ->
+    patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Rest, [Branch | Acc]).
+
+%%-----------------------------------------------------------------------------
+%% @doc Rewrite stream to update all branches for labels.
+%% @end
+%% @param State current backend state
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec update_branches(state()) -> state().
+update_branches(#state{branches = []} = State) ->
+    State;
+update_branches(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        branches = [{Label, Offset, Type} | BranchesT],
+        labels = Labels
+    } = State
+) ->
+    {Label, LabelOffset} = lists:keyfind(Label, 1, Labels),
+    Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset),
     update_branches(State#state{stream = Stream1, branches = BranchesT}).
 
 %%-----------------------------------------------------------------------------
@@ -631,7 +686,8 @@ call_primitive_last(
                 State2 = set_registers_args(State1, ArgsForTailCall, 0),
                 tail_call_with_jit_state_registers_only(State2, Temp)
         end,
-    State4#state{available_regs = ?AVAILABLE_REGS, used_regs = []}.
+    State5 = State4#state{available_regs = ?AVAILABLE_REGS, used_regs = []},
+    flush_literal_pool(State5).
 
 %%-----------------------------------------------------------------------------
 %% @doc Tail call to address in register, restoring prolog registers including
@@ -724,7 +780,15 @@ jump_to_label(
     Offset = StreamModule:offset(Stream0),
     {State1, CodeBlock} = branch_to_label_code(State0, Offset, Label, LabelLookupResult),
     Stream1 = StreamModule:append(Stream0, CodeBlock),
-    State1#state{stream = Stream1}.
+    State2 = State1#state{stream = Stream1},
+    flush_literal_pool(State2).
+
+jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) ->
+    Offset = StreamModule:offset(Stream0),
+    CodeBlock = branch_to_offset_code(State, Offset, TargetOffset),
+    Stream1 = StreamModule:append(Stream0, CodeBlock),
+    State2 = State#state{stream = Stream1},
+    flush_literal_pool(State2).
 
 %%-----------------------------------------------------------------------------
 %% @doc Jump to address in continuation pointer register
@@ -786,17 +850,17 @@ jump_to_continuation(
     Code = <<I3/binary, I4/binary, I5/binary, I6/binary, I7/binary>>,
     Stream2 = StreamModule:append(State1#state.stream, Code),
     % Free all registers as this is a terminal instruction
-    State1#state{stream = Stream2, available_regs = ?AVAILABLE_REGS, used_regs = []}.
+    State2 = State1#state{stream = Stream2, available_regs = ?AVAILABLE_REGS, used_regs = []},
+    flush_literal_pool(State2).
 
-branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) when
-    LabelOffset - Offset =< 2050, LabelOffset - Offset >= -2044
+branch_to_offset_code(_State, Offset, TargetOffset) when
+    TargetOffset - Offset =< 2050, TargetOffset - Offset >= -2044
 ->
     % Near branch: use direct B instruction
-    Rel = LabelOffset - Offset,
-    CodeBlock = jit_armv6m_asm:b(Rel),
-    {State, CodeBlock};
-branch_to_label_code(
-    #state{available_regs = [TempReg | _]} = State0, Offset, Label, {Label, LabelOffset}
+    Rel = TargetOffset - Offset,
+    jit_armv6m_asm:b(Rel);
+branch_to_offset_code(
+    #state{available_regs = [TempReg | _]}, Offset, TargetOffset
 ) ->
     % Far branch: use register-based sequence, need temporary register
     if
@@ -807,23 +871,26 @@ branch_to_label_code(
             I3 = jit_armv6m_asm:bx(TempReg),
             % Unaligned : need nop
             I4 = jit_armv6m_asm:nop(),
-            LiteralValue = LabelOffset - Offset - 5,
+            LiteralValue = TargetOffset - Offset - 5,
             I5 = <<LiteralValue:32/little>>,
-            CodeBlock = <<I1/binary, I2/binary, I3/binary, I4/binary, I5/binary>>;
+            <<I1/binary, I2/binary, I3/binary, I4/binary, I5/binary>>;
         true ->
             % Unaligned
             I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}),
             I2 = jit_armv6m_asm:add(TempReg, pc),
             I3 = jit_armv6m_asm:bx(TempReg),
-            LiteralValue = LabelOffset - Offset - 5,
+            LiteralValue = TargetOffset - Offset - 5,
             I4 = <<LiteralValue:32/little>>,
-            CodeBlock = <<I1/binary, I2/binary, I3/binary, I4/binary>>
-    end,
-    {State0, CodeBlock};
+            <<I1/binary, I2/binary, I3/binary, I4/binary>>
+    end.
+
+branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) ->
+    CodeBlock = branch_to_offset_code(State, Offset, LabelOffset),
+    {State, CodeBlock};
 branch_to_label_code(
     #state{available_regs = [TempReg | _], branches = Branches} = State0, Offset, Label, false
 ) ->
-    {CodeBlock, SequenceSize} =
+    SequenceSize =
         if
             Offset rem 4 =:= 0 ->
                 % Aligned
@@ -835,7 +902,7 @@ branch_to_label_code(
                 % Placeholder offset
                 I5 = <<0:32/little>>,
                 Seq = <<I1/binary, I2/binary, I3/binary, I4/binary, I5/binary>>,
-                {Seq, byte_size(Seq)};
+                byte_size(Seq);
             true ->
                 % Unaligned
                 I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}),
@@ -844,16 +911,17 @@ branch_to_label_code(
                 % Placeholder offset
                 I4 = <<0:32/little>>,
                 Seq = <<I1/binary, I2/binary, I3/binary, I4/binary>>,
-                {Seq, byte_size(Seq)}
+                byte_size(Seq)
         end,
     % Add relocation entry
+    CodeBlock = binary:copy(<<16#FF>>, SequenceSize),
     Reloc = {Label, Offset, {far_branch, SequenceSize, TempReg}},
     State1 = State0#state{branches = [Reloc | Branches]},
     {State1, CodeBlock};
 branch_to_label_code(
     #state{available_regs = [], branches = Branches} = State0, Offset, Label, false
 ) ->
-    {CodeBlock, SequenceSize} =
+    SequenceSize =
         if
             Offset rem 4 =/= 0 ->
                 % Unaligned
@@ -871,7 +939,7 @@ branch_to_label_code(
                 Seq =
                     <<I1/binary, I2/binary, I3/binary, I4/binary, I5/binary, I6/binary, I7/binary,
                         I8/binary>>,
-                {Seq, byte_size(Seq)};
+                byte_size(Seq);
             true ->
                 % Aligned
                 I1 = jit_armv6m_asm:push([r0]),
@@ -885,9 +953,10 @@ branch_to_label_code(
                 I7 = <<0:32/little>>,
                 Seq =
                     <<I1/binary, I2/binary, I3/binary, I4/binary, I5/binary, I6/binary, I7/binary>>,
-                {Seq, byte_size(Seq)}
+                byte_size(Seq)
         end,
     % Add relocation entry
+    CodeBlock = binary:copy(<<16#FF>>, SequenceSize),
     Reloc = {Label, Offset, {far_branch, SequenceSize, ?IP_REG}},
     State1 = State0#state{branches = [Reloc | Branches]},
     {State1, CodeBlock};
@@ -971,7 +1040,8 @@ if_else_block(
     Stream2 = State2#state.stream,
     %% Emit unconditional branch to skip the else block (will be replaced)
     ElseJumpOffset = StreamModule:offset(Stream2),
-    ElseJumpInstr = jit_armv6m_asm:b(0),
+    ?ASSERT(byte_size(jit_armv6m_asm:b(0)) =:= 2),
+    ElseJumpInstr = <<16#FFFF:16>>,
     Stream3 = StreamModule:append(Stream2, ElseJumpInstr),
     %% Else block starts here.
     OffsetAfter = StreamModule:offset(Stream3),
@@ -1000,44 +1070,95 @@ if_else_block(
         jit_armv6m_asm:cc() | {tbz | tbnz, atom(), 0..63} | {cbz, atom()},
         non_neg_integer()
     }.
-if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, {Reg, '<', 0}) ->
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '<', 0}
+) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
     %% Compare register with 0
     I1 = jit_armv6m_asm:cmp(Reg, 0),
     %% Branch if positive (N flag clear)
-    I2 = jit_armv6m_asm:bcc(pl, 0),
-    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
-    State1 = State0#state{stream = Stream1},
-    {State1, pl, byte_size(I1)};
+    CC = pl,
+    ?ASSERT(byte_size(jit_armv6m_asm:bcc(pl, 0)) =:= 2),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, 16#FFFF:16>>),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, CC, byte_size(I1)};
 if_block_cond(
     #state{stream_module = StreamModule, stream = Stream0} = State0,
-    {Reg, '<', Val}
-) when is_atom(Reg), is_integer(Val), Val >= 0, Val =< 255 ->
+    {RegOrTuple, '<', Val}
+) when is_integer(Val), Val >= 0, Val =< 255 ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
     I1 = jit_armv6m_asm:cmp(Reg, Val),
     % ge = greater than or equal
-    I2 = jit_armv6m_asm:bcc(ge, 0),
-    Code = <<
-        I1/binary,
-        I2/binary
-    >>,
-    Stream1 = StreamModule:append(Stream0, Code),
-    State1 = State0#state{stream = Stream1},
-    {State1, ge, byte_size(I1)};
+    CC = ge,
+    ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, 16#FFFF:16>>),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, CC, byte_size(I1)};
 if_block_cond(
     #state{stream_module = StreamModule, available_regs = [Temp | _]} = State0,
-    {Reg, '<', Val}
-) when is_atom(Reg), is_integer(Val) ->
+    {RegOrTuple, '<', Val}
+) when is_integer(Val) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
     State1 = mov_immediate(State0, Temp, Val),
     Stream0 = State1#state.stream,
     I1 = jit_armv6m_asm:cmp(Reg, Temp),
     % ge = greater than or equal
-    I2 = jit_armv6m_asm:bcc(ge, 0),
-    Code = <<
-        I1/binary,
-        I2/binary
-    >>,
-    Stream1 = StreamModule:append(Stream0, Code),
+    CC = ge,
+    ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, 16#FFFF:16>>),
+    State2 = if_block_free_reg(RegOrTuple, State1),
+    State3 = State2#state{stream = Stream1},
+    {State3, CC, byte_size(I1)};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {Val, '<', RegOrTuple}
+) when is_integer(Val), Val >= 0, Val =< 255 ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    I1 = jit_armv6m_asm:cmp(Reg, Val),
+    % le = less than or equal (branch when Val >= Reg, i.e., NOT Val < Reg)
+    CC = le,
+    ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, 16#FFFF:16>>),
+    State1 = if_block_free_reg(RegOrTuple, State0),
     State2 = State1#state{stream = Stream1},
-    {State2, ge, byte_size(I1)};
+    {State2, CC, byte_size(I1)};
+if_block_cond(
+    #state{stream_module = StreamModule, available_regs = [Temp | _]} = State0,
+    {Val, '<', RegOrTuple}
+) when is_integer(Val) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    State1 = mov_immediate(State0, Temp, Val),
+    Stream0 = State1#state.stream,
+    I1 = jit_armv6m_asm:cmp(Reg, Temp),
+    % le = less than or equal (branch when Val >= Reg, i.e., NOT Val < Reg)
+    CC = le,
+    ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, 16#FFFF:16>>),
+    State2 = if_block_free_reg(RegOrTuple, State1),
+    State3 = State2#state{stream = Stream1},
+    {State3, CC, byte_size(I1)};
 if_block_cond(
     #state{stream_module = StreamModule, stream = Stream0} = State0,
     {RegOrTuple, '<', RegB}
@@ -1049,15 +1170,12 @@ if_block_cond(
         end,
     I1 = jit_armv6m_asm:cmp(Reg, RegB),
     % ge = greater than or equal
-    I2 = jit_armv6m_asm:bcc(ge, 0),
-    Code = <<
-        I1/binary,
-        I2/binary
-    >>,
-    Stream1 = StreamModule:append(Stream0, Code),
+    CC = ge,
+    ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, 16#FFFF:16>>),
     State1 = if_block_free_reg(RegOrTuple, State0),
     State2 = State1#state{stream = Stream1},
-    {State2, ge, byte_size(I1)};
+    {State2, CC, byte_size(I1)};
 if_block_cond(
     #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '==', 0}
 ) ->
@@ -1069,11 +1187,12 @@ if_block_cond(
     %% Compare register with 0
     I1 = jit_armv6m_asm:cmp(Reg, 0),
     %% Branch if not equal
-    I2 = jit_armv6m_asm:bcc(ne, 0),
-    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    CC = ne,
+    ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, 16#FFFF:16>>),
     State1 = if_block_free_reg(RegOrTuple, State0),
     State2 = State1#state{stream = Stream1},
-    {State2, ne, byte_size(I1)};
+    {State2, CC, byte_size(I1)};
 %% Delegate (int) forms to regular forms since we only have 32-bit words
 if_block_cond(State, {'(int)', RegOrTuple, '==', 0}) ->
     if_block_cond(State, {RegOrTuple, '==', 0});
@@ -1089,15 +1208,12 @@ if_block_cond(
             RegOrTuple -> RegOrTuple
         end,
     I1 = jit_armv6m_asm:cmp(Reg, Val),
-    I2 = jit_armv6m_asm:bcc(eq, 0),
-    Code = <<
-        I1/binary,
-        I2/binary
-    >>,
-    Stream1 = StreamModule:append(Stream0, Code),
+    CC = eq,
+    ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, 16#FFFF:16>>),
     State1 = if_block_free_reg(RegOrTuple, State0),
     State2 = State1#state{stream = Stream1},
-    {State2, eq, byte_size(I1)};
+    {State2, CC, byte_size(I1)};
 if_block_cond(State, {'(int)', RegOrTuple, '!=', Val}) when is_integer(Val) ->
     if_block_cond(State, {RegOrTuple, '!=', Val});
 if_block_cond(
@@ -1110,28 +1226,25 @@ if_block_cond(
             RegOrTuple -> RegOrTuple
         end,
     I1 = jit_armv6m_asm:cmp(Reg, Val),
-    I2 = jit_armv6m_asm:bcc(ne, 0),
-    Code = <<
-        I1/binary,
-        I2/binary
-    >>,
-    Stream1 = StreamModule:append(Stream0, Code),
+    CC = ne,
+    ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, 16#FFFF:16>>),
     State1 = if_block_free_reg(RegOrTuple, State0),
     State2 = State1#state{stream = Stream1},
-    {State2, ne, byte_size(I1)};
+    {State2, CC, byte_size(I1)};
 if_block_cond(
     #state{stream_module = StreamModule, stream = Stream0} = State0,
     {{free, RegA}, '==', {free, RegB}}
 ) ->
     % Compare two free registers: cmp RegA, RegB; beq <target>
     I1 = jit_armv6m_asm:cmp(RegA, RegB),
-    Stream1 = StreamModule:append(Stream0, I1),
-    I2 = jit_armv6m_asm:bcc(ne, 0),
-    Stream2 = StreamModule:append(Stream1, I2),
-    State1 = State0#state{stream = Stream2},
+    CC = ne,
+    ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, 16#FFFF:16>>),
+    State1 = State0#state{stream = Stream1},
     State2 = if_block_free_reg({free, RegA}, State1),
     State3 = if_block_free_reg({free, RegB}, State2),
-    {State3, ne, byte_size(I1)};
+    {State3, CC, byte_size(I1)};
 if_block_cond(
     #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0,
     {RegOrTuple, '==', Val}
@@ -1146,15 +1259,12 @@ if_block_cond(
     Stream1 = State1#state.stream,
     Offset1 = StreamModule:offset(Stream1),
     I1 = jit_armv6m_asm:cmp(Reg, Temp),
-    I2 = jit_armv6m_asm:bcc(ne, 0),
-    Code = <<
-        I1/binary,
-        I2/binary
-    >>,
-    Stream2 = StreamModule:append(Stream1, Code),
+    CC = ne,
+    ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2),
+    Stream2 = StreamModule:append(Stream1, <<I1/binary, 16#FFFF:16>>),
     State2 = if_block_free_reg(RegOrTuple, State1),
     State3 = State2#state{stream = Stream2},
-    {State3, ne, Offset1 - Offset0 + byte_size(I1)};
+    {State3, CC, Offset1 - Offset0 + byte_size(I1)};
 if_block_cond(
     #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0,
     {RegOrTuple, '!=', Val}
@@ -1169,15 +1279,12 @@ if_block_cond(
     Stream1 = State1#state.stream,
     Offset1 = StreamModule:offset(Stream1),
     I1 = jit_armv6m_asm:cmp(Reg, Temp),
-    I2 = jit_armv6m_asm:bcc(eq, 0),
-    Code = <<
-        I1/binary,
-        I2/binary
-    >>,
-    Stream2 = StreamModule:append(Stream1, Code),
+    CC = eq,
+    ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2),
+    Stream2 = StreamModule:append(Stream1, <<I1/binary, 16#FFFF:16>>),
     State2 = if_block_free_reg(RegOrTuple, State1),
     State3 = State2#state{stream = Stream2},
-    {State3, eq, Offset1 - Offset0 + byte_size(I1)};
+    {State3, CC, Offset1 - Offset0 + byte_size(I1)};
 if_block_cond(
     #state{
         stream_module = StreamModule,
@@ -1194,12 +1301,12 @@ if_block_cond(
     % Test bit 0: shift bit 0 to MSB and branch if positive (bit was 0/false)
     I1 = jit_armv6m_asm:lsls(Temp, Reg, 31),
     % branch if negative (bit was 1/true)
-    I2 = jit_armv6m_asm:bcc(mi, 0),
-    Code = <<I1/binary, I2/binary>>,
-    Stream1 = StreamModule:append(Stream0, Code),
+    CC = mi,
+    ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, 16#FFFF:16>>),
     State1 = if_block_free_reg(RegOrTuple, State0),
     State2 = State1#state{stream = Stream1},
-    {State2, mi, byte_size(I1)};
+    {State2, CC, byte_size(I1)};
 if_block_cond(
     #state{
         stream_module = StreamModule,
@@ -1216,12 +1323,12 @@ if_block_cond(
     % Test bit 0: shift bit 0 to MSB and branch if negative (bit was 1/true)
     I1 = jit_armv6m_asm:lsls(Temp, Reg, 31),
     % branch if positive (bit was 0/false)
-    I2 = jit_armv6m_asm:bcc(pl, 0),
-    Code = <<I1/binary, I2/binary>>,
-    Stream1 = StreamModule:append(Stream0, Code),
+    CC = pl,
+    ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, 16#FFFF:16>>),
     State1 = if_block_free_reg(RegOrTuple, State0),
     State2 = State1#state{stream = Stream1},
-    {State2, pl, byte_size(I1)};
+    {State2, CC, byte_size(I1)};
 if_block_cond(
     #state{
         stream_module = StreamModule,
@@ -1250,8 +1357,8 @@ if_block_cond(
                 TestCode1 = jit_armv6m_asm:tst(Reg, Temp),
                 {<<TestCode0/binary, TestCode1/binary>>, eq}
         end,
-    I2 = jit_armv6m_asm:bcc(BranchCond, 0),
-    Code = <<TestCode/binary, I2/binary>>,
+    ?ASSERT(byte_size(jit_armv6m_asm:bcc(BranchCond, 0)) =:= 2),
+    Code = <<TestCode/binary, 16#FFFF:16>>,
     Stream1 = StreamModule:append(Stream0, Code),
     State1 = if_block_free_reg(RegOrTuple, State0),
     State2 = State1#state{stream = Stream1},
@@ -1268,10 +1375,11 @@ if_block_cond(
     I1 = jit_armv6m_asm:mvns(Temp, Reg),
     % 32 - 4
     I2 = jit_armv6m_asm:lsls(Temp, Temp, 28),
-    I3 = jit_armv6m_asm:bcc(eq, 0),
-    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
+    CC = eq,
+    ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, 16#FFFF:16>>),
     State1 = State0#state{stream = Stream1},
-    {State1, eq, byte_size(I1) + byte_size(I2)};
+    {State1, CC, byte_size(I1) + byte_size(I2)};
 if_block_cond(
     #state{
         stream_module = StreamModule,
@@ -1283,11 +1391,12 @@ if_block_cond(
     I1 = jit_armv6m_asm:mvns(Reg, Reg),
     % 32 - 4
     I2 = jit_armv6m_asm:lsls(Reg, Reg, 28),
-    I3 = jit_armv6m_asm:bcc(eq, 0),
-    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
+    CC = eq,
+    ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, 16#FFFF:16>>),
     State1 = State0#state{stream = Stream1},
     State2 = if_block_free_reg(RegTuple, State1),
-    {State2, eq, byte_size(I1) + byte_size(I2)};
+    {State2, CC, byte_size(I1) + byte_size(I2)};
 if_block_cond(
     #state{
         stream_module = StreamModule,
@@ -1301,16 +1410,17 @@ if_block_cond(
     I1 = jit_armv6m_asm:mov(Temp, Reg),
     Stream1 = StreamModule:append(Stream0, I1),
     State1 = State0#state{stream = Stream1},
-    State2 = and_(State1#state{available_regs = AT}, Temp, Mask),
+    {State2, Temp} = and_(State1#state{available_regs = AT}, {free, Temp}, Mask),
     Stream2 = State2#state.stream,
     % Compare with value
     I2 = jit_armv6m_asm:cmp(Temp, Val),
     Stream3 = StreamModule:append(Stream2, I2),
     OffsetAfter = StreamModule:offset(Stream3),
-    I3 = jit_armv6m_asm:bcc(eq, 0),
-    Stream4 = StreamModule:append(Stream3, I3),
+    CC = eq,
+    ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2),
+    Stream4 = StreamModule:append(Stream3, <<16#FFFF:16>>),
     State3 = State2#state{stream = Stream4, available_regs = [Temp | State2#state.available_regs]},
-    {State3, eq, OffsetAfter - OffsetBefore};
+    {State3, CC, OffsetAfter - OffsetBefore};
 if_block_cond(
     #state{
         stream_module = StreamModule,
@@ -1320,17 +1430,18 @@ if_block_cond(
 ) when ?IS_GPR(Reg) ->
     % AND with mask
     OffsetBefore = StreamModule:offset(Stream0),
-    State1 = and_(State0, Reg, Mask),
+    {State1, Reg} = and_(State0, RegTuple, Mask),
     Stream1 = State1#state.stream,
     % Compare with value
     I2 = jit_armv6m_asm:cmp(Reg, Val),
     Stream2 = StreamModule:append(Stream1, I2),
     OffsetAfter = StreamModule:offset(Stream2),
-    I3 = jit_armv6m_asm:bcc(eq, 0),
-    Stream3 = StreamModule:append(Stream2, I3),
+    CC = eq,
+    ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2),
+    Stream3 = StreamModule:append(Stream2, <<16#FFFF:16>>),
     State3 = State1#state{stream = Stream3},
     State4 = if_block_free_reg(RegTuple, State3),
-    {State4, eq, OffsetAfter - OffsetBefore}.
+    {State4, CC, OffsetAfter - OffsetBefore}.
 
 -spec if_block_free_reg(armv6m_register() | {free, armv6m_register()}, state()) -> state().
 if_block_free_reg({free, Reg}, State0) ->
@@ -1727,7 +1838,7 @@ set_registers_args(
         UsedRegs,
         Args
     ),
-    State0#state{
+    State1#state{
         stream = Stream1,
         available_regs = ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs,
         used_regs = ParamRegs ++ (NewUsedRegs -- ParamRegs)
@@ -1785,7 +1896,7 @@ set_registers_args0(
 set_registers_args0(
     State, [Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset
 ) ->
-    false = lists:member(?CTX_REG, ArgsRegs),
+    ?ASSERT(not lists:member(?CTX_REG, ArgsRegs)),
     State1 = set_registers_args1(State, Arg, ?CTX_REG, StackOffset),
     set_registers_args0(State1, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset);
 set_registers_args0(
@@ -1990,33 +2101,95 @@ move_array_element(
     Reg,
     Index,
     {x_reg, X}
-) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) ->
+) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) andalso Index * 4 =< 124 ->
     I1 = jit_armv6m_asm:ldr(Temp, {Reg, Index * 4}),
     I2 = jit_armv6m_asm:str(Temp, ?X_REG(X)),
     Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
     State#state{stream = Stream1};
+move_array_element(
+    #state{stream_module = StreamModule, available_regs = [Temp1, Temp2 | _]} =
+        State,
+    Reg,
+    Index,
+    {x_reg, X}
+) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) ->
+    % For large offsets, use max offset (124) in ldr + remainder in temp register
+    Offset = Index * 4,
+    LdrOffset = 124,
+    Remainder = Offset - LdrOffset,
+    % Load offset remainder into temp register and add to base
+    State1 = mov_immediate(State, Temp1, Remainder),
+    Stream1 = State1#state.stream,
+    % add Temp1, Reg (Temp1 = Temp1 + Reg)
+    I1 = jit_armv6m_asm:add(Temp1, Reg),
+    % ldr Temp2, [Temp1, #124]
+    I2 = jit_armv6m_asm:ldr(Temp2, {Temp1, LdrOffset}),
+    % str Temp2, [r0, #X*4]
+    I3 = jit_armv6m_asm:str(Temp2, ?X_REG(X)),
+    Stream2 = StreamModule:append(Stream1, <<I1/binary, I2/binary, I3/binary>>),
+    State1#state{stream = Stream2};
 move_array_element(
     #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State,
     Reg,
     Index,
     {ptr, Dest}
-) when is_atom(Reg) andalso is_integer(Index) ->
+) when is_atom(Reg) andalso is_integer(Index) andalso Index * 4 =< 124 ->
     I1 = jit_armv6m_asm:ldr(Temp, {Reg, Index * 4}),
     I2 = jit_armv6m_asm:str(Temp, {Dest, 0}),
     Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
     State#state{stream = Stream1};
+move_array_element(
+    #state{stream_module = StreamModule, available_regs = [Temp | _]} =
+        State,
+    Reg,
+    Index,
+    {ptr, Dest}
+) when is_atom(Reg) andalso is_integer(Index) ->
+    % For large offsets, use max offset (124) in ldr + remainder in temp register
+    Offset = Index * 4,
+    LdrOffset = 124,
+    Remainder = Offset - LdrOffset,
+    % Load offset remainder into temp register and add to base
+    State1 = mov_immediate(State, Temp, Remainder),
+    Stream1 = State1#state.stream,
+    I1 = jit_armv6m_asm:add(Temp, Reg),
+    I2 = jit_armv6m_asm:ldr(Temp, {Temp, LdrOffset}),
+    I3 = jit_armv6m_asm:str(Temp, {Dest, 0}),
+    Stream2 = StreamModule:append(Stream1, <<I1/binary, I2/binary, I3/binary>>),
+    State1#state{stream = Stream2};
 move_array_element(
     #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | AT]} =
         State,
     Reg,
     Index,
     {y_reg, Y}
-) when is_atom(Reg) andalso is_integer(Index) ->
+) when is_atom(Reg) andalso is_integer(Index) andalso Index * 4 =< 124 ->
     I1 = jit_armv6m_asm:ldr(Temp2, {Reg, Index * 4}),
     YCode = str_y_reg(Temp2, Y, Temp1, AT),
     Code = <<I1/binary, YCode/binary>>,
     Stream1 = StreamModule:append(Stream0, Code),
     State#state{stream = Stream1};
+move_array_element(
+    #state{
+        stream_module = StreamModule, available_regs = [Temp1, Temp2 | AT]
+    } =
+        State,
+    Reg,
+    Index,
+    {y_reg, Y}
+) when is_atom(Reg) andalso is_integer(Index) ->
+    % For large offsets, use max offset (124) in ldr + remainder in temp register
+    Offset = Index * 4,
+    LdrOffset = 124,
+    Remainder = Offset - LdrOffset,
+    State1 = mov_immediate(State, Temp2, Remainder),
+    Stream1 = State1#state.stream,
+    I1 = jit_armv6m_asm:add(Temp2, Reg),
+    I2 = jit_armv6m_asm:ldr(Temp2, {Temp2, LdrOffset}),
+    YCode = str_y_reg(Temp2, Y, Temp1, AT),
+    Code = <<I1/binary, I2/binary, YCode/binary>>,
+    Stream2 = StreamModule:append(Stream1, Code),
+    State1#state{stream = Stream2};
 move_array_element(
     #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | AT]} =
         State,
@@ -2116,10 +2289,32 @@ get_array_element(
     } = State,
     {free, Reg},
     Index
-) ->
+) when Index * 4 =< 124 ->
     I1 = jit_armv6m_asm:ldr(Reg, {Reg, Index * 4}),
     Stream1 = StreamModule:append(Stream0, <<I1/binary>>),
     {State#state{stream = Stream1}, Reg};
+get_array_element(
+    #state{
+        stream_module = StreamModule,
+        available_regs = [Temp | _]
+    } = State,
+    {free, Reg},
+    Index
+) ->
+    % For large offsets, split into ldr immediate (max 124) + remainder in temp register
+    Offset = Index * 4,
+    LdrOffset = (Offset div 4) * 4,
+    LdrOffset1 = min(LdrOffset, 124),
+    Remainder = Offset - LdrOffset1,
+    % Load offset remainder into temp register and add to Reg
+    State1 = mov_immediate(State, Temp, Remainder),
+    Stream1 = State1#state.stream,
+    % add Reg, Temp (Reg = Reg + Temp)
+    I1 = jit_armv6m_asm:add(Reg, Temp),
+    % ldr Reg, [Reg, #LdrOffset1]
+    I2 = jit_armv6m_asm:ldr(Reg, {Reg, LdrOffset1}),
+    Stream2 = StreamModule:append(Stream1, <<I1/binary, I2/binary>>),
+    {State1#state{stream = Stream2}, Reg};
 get_array_element(
     #state{
         stream_module = StreamModule,
@@ -2129,7 +2324,7 @@ get_array_element(
     } = State,
     Reg,
     Index
-) ->
+) when Index * 4 =< 124 ->
     I1 = jit_armv6m_asm:ldr(ElemReg, {Reg, Index * 4}),
     Stream1 = StreamModule:append(Stream0, <<I1/binary>>),
     {
@@ -2137,6 +2332,32 @@ get_array_element(
             stream = Stream1, available_regs = AvailableT, used_regs = [ElemReg | UsedRegs0]
         },
         ElemReg
+    };
+get_array_element(
+    #state{
+        stream_module = StreamModule,
+        available_regs = [ElemReg, Temp | AvailableT],
+        used_regs = UsedRegs0
+    } = State,
+    Reg,
+    Index
+) ->
+    % For large offsets, split into ldr immediate (max 124) + remainder in temp register
+    Offset = Index * 4,
+    Remainder = Offset - 124,
+    % Load offset remainder into temp register
+    State1 = mov_immediate(State, Temp, Remainder),
+    Stream1 = State1#state.stream,
+    I1 = jit_armv6m_asm:add(Temp, Reg),
+    I2 = jit_armv6m_asm:ldr(ElemReg, {Temp, 124}),
+    Stream2 = StreamModule:append(Stream1, <<I1/binary, I2/binary>>),
+    {
+        State1#state{
+            stream = Stream2,
+            available_regs = [Temp | AvailableT],
+            used_regs = [ElemReg | UsedRegs0]
+        },
+        ElemReg
     }.
 
 %% @doc move an integer, a vm or native register to reg[x]
@@ -2148,10 +2369,26 @@ move_to_array_element(
     ValueReg,
     Reg,
     Index
-) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) ->
+) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) andalso Index < 32 ->
     I1 = jit_armv6m_asm:str(ValueReg, {Reg, Index * 4}),
     Stream1 = StreamModule:append(Stream0, I1),
     State0#state{stream = Stream1};
+move_to_array_element(
+    #state{stream_module = StreamModule, available_regs = [Temp | _]} = State0,
+    ValueReg,
+    Reg,
+    Index
+) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) ->
+    % For large offsets, split into str immediate (max 124) + remainder in temp register
+    Offset = Index * 4,
+    Remainder = Offset - 124,
+    % Load offset remainder into temp register
+    State1 = mov_immediate(State0, Temp, Remainder),
+    Stream1 = State1#state.stream,
+    I1 = jit_armv6m_asm:add(Temp, Reg),
+    I2 = jit_armv6m_asm:str(ValueReg, {Temp, 124}),
+    Stream2 = StreamModule:append(Stream1, <<I1/binary, I2/binary>>),
+    State1#state{stream = Stream2};
 move_to_array_element(
     #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0,
     ValueReg,
@@ -2168,7 +2405,7 @@ move_to_array_element(
     Value,
     Reg,
     Index
-) ->
+) when not ?IS_GPR(Value) andalso ?IS_GPR(Reg) ->
     {State1, Temp} = copy_to_native_register(State0, Value),
     State2 = move_to_array_element(State1, Temp, Reg, Index),
     free_native_register(State2, Temp).
@@ -2450,7 +2687,8 @@ set_continuation_to_offset(
 ) ->
     OffsetRef = make_ref(),
     Offset = StreamModule:offset(Stream0),
-    I1 = jit_armv6m_asm:adr(Temp, 4),
+    ?ASSERT(byte_size(jit_armv6m_asm:adr(Temp, 4)) =:= 2),
+    I1 = <<16#FFFF:16>>,
     Reloc = {OffsetRef, Offset, {adr, Temp}},
     % Set thumb bit (LSB = 1) by adding 1 to the 4-byte aligned address
     I2 = jit_armv6m_asm:adds(Temp, Temp, 1),
@@ -2508,34 +2746,34 @@ get_module_index(
 %% JIT currentl calls this with two values: ?TERM_PRIMARY_CLEAR_MASK (-4) to
 %% clear bits and ?TERM_BOXED_TAG_MASK (0x3F). We can avoid any literal pool
 %% by using BICS for -4.
-and_(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, 16#FFFFFF) ->
+and_(#state{stream_module = StreamModule, stream = Stream0} = State0, {free, Reg}, 16#FFFFFF) ->
     I1 = jit_armv6m_asm:lsls(Reg, Reg, 8),
     I2 = jit_armv6m_asm:lsrs(Reg, Reg, 8),
     Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
-    State0#state{stream = Stream1};
+    {State0#state{stream = Stream1}, Reg};
 and_(
     #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0,
-    Reg,
+    {free, Reg},
     Val
 ) when Val < 0 andalso Val >= -256 ->
     State1 = mov_immediate(State0#state{available_regs = AT}, Temp, bnot (Val)),
     Stream1 = State1#state.stream,
     I = jit_armv6m_asm:bics(Reg, Temp),
     Stream2 = StreamModule:append(Stream1, I),
-    State1#state{available_regs = [Temp | AT], stream = Stream2};
+    {State1#state{available_regs = [Temp | AT], stream = Stream2}, Reg};
 and_(
     #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0,
-    Reg,
+    {free, Reg},
     Val
 ) ->
     State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val),
     Stream1 = State1#state.stream,
     I = jit_armv6m_asm:ands(Reg, Temp),
     Stream2 = StreamModule:append(Stream1, I),
-    State1#state{available_regs = [Temp | AT], stream = Stream2};
+    {State1#state{available_regs = [Temp | AT], stream = Stream2}, Reg};
 and_(
     #state{stream_module = StreamModule, available_regs = []} = State0,
-    Reg,
+    {free, Reg},
     Val
 ) when Val < 0 andalso Val >= -256 ->
     % No available registers, use r0 as temp and save it to r12
@@ -2552,10 +2790,10 @@ and_(
     % Restore r0 from r12
     Restore = jit_armv6m_asm:mov(r0, ?IP_REG),
     Stream4 = StreamModule:append(Stream3, Restore),
-    State0#state{stream = Stream4};
+    {State0#state{stream = Stream4}, Reg};
 and_(
     #state{stream_module = StreamModule, available_regs = []} = State0,
-    Reg,
+    {free, Reg},
     Val
 ) ->
     % No available registers, use r0 as temp and save it to r12
@@ -2572,7 +2810,17 @@ and_(
     % Restore r0 from r12
     Restore = jit_armv6m_asm:mov(r0, ?IP_REG),
     Stream4 = StreamModule:append(Stream3, Restore),
-    State0#state{stream = Stream4}.
+    {State0#state{stream = Stream4}, Reg};
+and_(
+    #state{stream_module = StreamModule, available_regs = [ResultReg | AT], used_regs = UR} =
+        State0,
+    Reg,
+    ?TERM_PRIMARY_CLEAR_MASK
+) ->
+    I1 = jit_armv6m_asm:lsrs(ResultReg, Reg, 2),
+    I2 = jit_armv6m_asm:lsls(ResultReg, ResultReg, 2),
+    Stream1 = StreamModule:append(State0#state.stream, <<I1/binary, I2/binary>>),
+    {State0#state{stream = Stream1, available_regs = AT, used_regs = [ResultReg | UR]}, ResultReg}.
 
 or_(
     #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0,
@@ -2611,41 +2859,42 @@ mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Re
     I2 = jit_armv6m_asm:negs(Reg, Reg),
     Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
     State#state{stream = Stream1};
-mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) ->
-    %% Use a literal pool with a branch instruction (branch-over pattern)
-    %% Calculate where literal will be placed (must be word-aligned)
-    %% After LDR (2 bytes) + Branch (2 bytes) = 4 bytes from current position
-    CurrentOffset = StreamModule:offset(Stream0),
-    OffsetAfterInstructions = CurrentOffset + 4,
-    %% Find next word-aligned position for literal
-    LiteralPosition =
-        case OffsetAfterInstructions rem 4 of
-            % Already aligned
-            0 -> OffsetAfterInstructions;
-            % Add 2 bytes padding to align
-            _ -> OffsetAfterInstructions + 2
+mov_immediate(
+    #state{stream_module = StreamModule, stream = Stream0, literal_pool = LP} = State, Reg, Val
+) ->
+    LdrInstructionAddr = StreamModule:offset(Stream0),
+    ?ASSERT(byte_size(jit_armv6m_asm:ldr(Reg, {pc, 0})) =:= 2),
+    Stream1 = StreamModule:append(Stream0, <<16#FFFF:16>>),
+    State#state{stream = Stream1, literal_pool = [{LdrInstructionAddr, Reg, Val} | LP]}.
+
+flush_literal_pool(#state{literal_pool = []} = State) ->
+    State;
+flush_literal_pool(
+    #state{stream_module = StreamModule, stream = Stream0, literal_pool = LP} = State
+) ->
+    % Align
+    Offset = StreamModule:offset(Stream0),
+    Stream1 =
+        if
+            Offset rem 4 =:= 0 -> Stream0;
+            true -> StreamModule:append(Stream0, <<0:16>>)
         end,
-    PaddingNeeded = LiteralPosition - OffsetAfterInstructions,
-
-    %% Calculate LDR PC-relative offset
-    %% PC = (current_instruction_address & ~3) + 4
-    LdrInstructionAddr = CurrentOffset,
-    LdrPC = (LdrInstructionAddr band (bnot 3)) + 4,
-    LiteralOffset = LiteralPosition - LdrPC,
-
-    %% Generate: ldr rTemp, [pc, #LiteralOffset]  ; Load from literal
-    I1 = jit_armv6m_asm:ldr(Reg, {pc, LiteralOffset}),
-    %% Calculate branch offset
-    %% Branch is at CurrentOffset + 2, need to jump past literal
-    BranchPosition = CurrentOffset + 2,
-    % After the 4-byte literal
-    TargetPosition = LiteralPosition + 4,
-    BranchOffset = TargetPosition - BranchPosition,
-    I2 = jit_armv6m_asm:b(BranchOffset),
-    %% Generate padding if needed (just zeros)
-    Padding = <<0:(PaddingNeeded * 8)>>,
-    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, Padding/binary, Val:32/little>>),
-    State#state{stream = Stream1}.
+    % Lay all values and update ldr instructions
+    Stream2 = lists:foldl(
+        fun({LdrInstructionAddr, Reg, Val}, AccStream) ->
+            LiteralPosition = StreamModule:offset(AccStream),
+            LdrPC = (LdrInstructionAddr band (bnot 3)) + 4,
+            LiteralOffset = LiteralPosition - LdrPC,
+            LdrInstruction = jit_armv6m_asm:ldr(Reg, {pc, LiteralOffset}),
+            AccStream1 = StreamModule:append(AccStream, <<Val:32/little>>),
+            StreamModule:replace(
+                AccStream1, LdrInstructionAddr, LdrInstruction
+            )
+        end,
+        Stream1,
+        lists:reverse(LP)
+    ),
+    State#state{stream = Stream2, literal_pool = []}.
 
 sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when
     (Val >= 0 andalso Val =< 255) orelse is_atom(Val)
@@ -2754,10 +3003,12 @@ decrement_reductions_and_maybe_schedule_next(
     Stream1 = StreamModule:append(Stream0, <<I0/binary, I1/binary, I2/binary, I3/binary>>),
     BNEOffset = StreamModule:offset(Stream1),
     % Branch if reduction count is not zero
-    I4 = jit_armv6m_asm:bcc(ne, 0),
+    ?ASSERT(byte_size(jit_armv6m_asm:bcc(ne, 0)) =:= 2),
+    I4 = <<16#FFFF:16>>,
     % Set continuation to the next instruction
     ADROffset = BNEOffset + byte_size(I4),
-    I5 = jit_armv6m_asm:adr(Temp, 4),
+    ?ASSERT(byte_size(jit_armv6m_asm:adr(Temp, 4) =:= 2)),
+    I5 = <<16#FFFF:16>>,
     I6 = jit_armv6m_asm:adds(Temp, Temp, 1),
     I7 = jit_armv6m_asm:str(Temp, ?JITSTATE_CONTINUATION(TempJitState)),
     % Append the instructions to the stream
@@ -2890,8 +3141,8 @@ set_cp(State0) ->
     Offset = StreamModule:offset(Stream0),
     % build cp with module_index << 24
     I1 = jit_armv6m_asm:lsls(Reg, Reg, 24),
-    % Emit a single nop as placeholder for offset load instruction
-    I2 = jit_armv6m_asm:nop(),
+    % Placeholder for offset load instruction
+    I2 = <<16#FFFF:16>>,
     MOVOffset = Offset + byte_size(I1),
     % OR the module index with the offset (loaded in temp register)
     I3 = jit_armv6m_asm:orrs(Reg, TempReg),
@@ -3124,5 +3375,47 @@ add_label(#state{stream_module = StreamModule, stream = Stream0} = State0, Label
 %% @return Updated backend state
 %%-----------------------------------------------------------------------------
 -spec add_label(state(), integer() | reference(), integer()) -> state().
+add_label(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        jump_table_start = JumpTableStart,
+        branches = Branches,
+        labels = Labels
+    } = State,
+    Label,
+    LabelOffset
+) when is_integer(Label) ->
+    % Patch the jump table entry immediately
+    % Each jump table entry is 12 bytes:
+    % - ldr r3, [pc, 4] (2 bytes) at offset 0
+    % - push {...} (2 bytes) at offset 2
+    % - add pc, r3 (2 bytes) at offset 4
+    % - nop (2 bytes) at offset 6
+    % - data (4 bytes) at offset 8
+    JumpTableEntryStart = JumpTableStart + Label * 12,
+    DataOffset = JumpTableEntryStart + 8,
+    AddInstrOffset = JumpTableEntryStart + 4,
+
+    % Calculate offset from 'add pc, pc, r3' instruction + 4 to target label
+    % PC when add instruction executes
+    AddPC = AddInstrOffset + 4,
+    RelativeOffset = LabelOffset - AddPC,
+    DataBytes = <<RelativeOffset:32/little>>,
+
+    Stream1 = StreamModule:replace(Stream0, DataOffset, DataBytes),
+
+    % Eagerly patch any branches targeting this label
+    {Stream2, RemainingBranches} = patch_branches_for_label(
+        StreamModule,
+        Stream1,
+        Label,
+        LabelOffset,
+        Branches
+    ),
+
+    State#state{
+        stream = Stream2, branches = RemainingBranches, labels = [{Label, LabelOffset} | Labels]
+    };
 add_label(#state{labels = Labels} = State, Label, Offset) ->
     State#state{labels = [{Label, Offset} | Labels]}.
diff --git a/libs/jit/src/jit_precompile.erl b/libs/jit/src/jit_precompile.erl
index cd9646790d..5d91690498 100644
--- a/libs/jit/src/jit_precompile.erl
+++ b/libs/jit/src/jit_precompile.erl
@@ -19,7 +19,7 @@
 %
 -module(jit_precompile).
 
--export([start/0, compile/3, atom_resolver/1, type_resolver/1]).
+-export([start/0, compile/3, atom_resolver/1, type_resolver/1, import_resolver/2]).
 
 -include_lib("jit.hrl").
 
@@ -84,6 +84,15 @@ compile(Target, Dir, Path) ->
             end,
         TypeResolver = type_resolver(TypesChunk),
 
+        ImportedFunctionsChunk =
+            case lists:keyfind("ImpT", 1, InitialChunks) of
+                {"ImpT", ImportedFunctionsChunk0} ->
+                    ImportedFunctionsChunk0;
+                false ->
+                    <<>>
+            end,
+        ImportedFunctionResolver = import_resolver(ImportedFunctionsChunk, AtomResolver),
+
         % Parse target to extract arch and variant
         {BaseTarget, RequestedVariant} = parse_target(Target),
         Backend = list_to_atom("jit_" ++ BaseTarget),
@@ -93,6 +102,7 @@ compile(Target, Dir, Path) ->
                 "x86_64" -> ?JIT_ARCH_X86_64;
                 "aarch64" -> ?JIT_ARCH_AARCH64;
                 "armv6m" -> ?JIT_ARCH_ARMV6M;
+                "riscv32" -> ?JIT_ARCH_RISCV32;
                 _ -> error({unsupported_target, Target})
             end,
 
@@ -106,7 +116,7 @@ compile(Target, Dir, Path) ->
 
         Stream2 = Backend:new(RequestedVariant, jit_stream_binary, Stream1),
         {LabelsCount, Stream3} = jit:compile(
-            CodeChunk, AtomResolver, LiteralResolver, TypeResolver, Backend, Stream2
+            CodeChunk, AtomResolver, LiteralResolver, TypeResolver, ImportedFunctionResolver, Backend, Stream2
         ),
         NativeCode = Backend:stream(Stream3),
         UpdatedChunks = FilteredChunks ++ [{"avmN", NativeCode}],
@@ -174,6 +184,26 @@ parse_literals_chunk0(N, <<TermSize:32, TermBin:TermSize/binary, Rest/binary>>,
     Term = binary_to_term(TermBin),
     parse_literals_chunk0(N - 1, Rest, [Term | Acc]).
 
+import_resolver(FunctionChunks, AtomResolver) ->
+    ImportedFunctions = parse_imported_functions_chunk(FunctionChunks, AtomResolver),
+    fun(Index) -> lists:nth(Index + 1, ImportedFunctions) end.
+
+%% @doc Parse imported functions chunk to extract {Module, Function, Arity} triplets
+parse_imported_functions_chunk(<<FunctionsCount:32, Rest/binary>>, AtomResolver) ->
+    parse_imported_functions_chunk0(FunctionsCount, Rest, AtomResolver, []);
+parse_imported_functions_chunk(<<>>, _AtomResolver) ->
+    [].
+
+parse_imported_functions_chunk0(0, <<>>, _AtomResolver, Acc) ->
+    lists:reverse(Acc);
+parse_imported_functions_chunk0(
+    N, <<ModuleIndex:32, FunctionIndex:32, Arity:32, Rest/binary>>, AtomResolver, Acc
+) ->
+    Module = AtomResolver(ModuleIndex),
+    Function = AtomResolver(FunctionIndex),
+    ImportedFunction = {Module, Function, Arity},
+    parse_imported_functions_chunk0(N - 1, Rest, AtomResolver, [ImportedFunction | Acc]).
+
 %% Version (from beam_types.hrl)
 -define(BEAM_TYPES_VERSION, 3).
 
diff --git a/libs/jit/src/jit_riscv32.erl b/libs/jit/src/jit_riscv32.erl
new file mode 100644
index 0000000000..52a04c2a00
--- /dev/null
+++ b/libs/jit/src/jit_riscv32.erl
@@ -0,0 +1,3208 @@
+%
+% This file is part of AtomVM.
+%
+% Copyright 2025 Paul Guyot <pguyot@kallisys.net>
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%    http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+%
+% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+%
+
+-module(jit_riscv32).
+
+-export([
+    word_size/0,
+    new/3,
+    stream/1,
+    offset/1,
+    flush/1,
+    debugger/1,
+    used_regs/1,
+    available_regs/1,
+    free_native_registers/2,
+    assert_all_native_free/1,
+    jump_table/2,
+    update_branches/1,
+    call_primitive/3,
+    call_primitive_last/3,
+    call_primitive_with_cp/3,
+    return_if_not_equal_to_ctx/2,
+    jump_to_label/2,
+    jump_to_continuation/2,
+    jump_to_offset/2,
+    if_block/3,
+    if_else_block/4,
+    shift_right/3,
+    shift_left/3,
+    move_to_vm_register/3,
+    move_to_native_register/2,
+    move_to_native_register/3,
+    move_to_cp/2,
+    move_array_element/4,
+    move_to_array_element/4,
+    move_to_array_element/5,
+    set_bs/2,
+    copy_to_native_register/2,
+    get_array_element/3,
+    increment_sp/2,
+    set_continuation_to_label/2,
+    set_continuation_to_offset/1,
+    continuation_entry_point/1,
+    get_module_index/1,
+    and_/3,
+    or_/3,
+    add/3,
+    sub/3,
+    mul/3,
+    decrement_reductions_and_maybe_schedule_next/1,
+    call_or_schedule_next/2,
+    call_only_or_schedule_next/2,
+    call_func_ptr/3,
+    return_labels_and_lines/2,
+    add_label/2,
+    add_label/3
+]).
+
+-ifdef(JIT_DWARF).
+-export([
+    dwarf_opcode/2,
+    dwarf_label/2,
+    dwarf_function/3,
+    dwarf_line/2
+]).
+-endif.
+
+-compile([warnings_as_errors]).
+
+-include_lib("jit.hrl").
+
+-include("primitives.hrl").
+-include("term.hrl").
+
+-define(ASSERT(Expr), true = Expr).
+
+%% RISC-V32 ILP32 ABI: a0-a7 are used for argument passing (8 registers).
+%% a0-a1 are used for return values (a0 for 32-bit, a0-a1 for 64-bit returns).
+%% s0-s11 are callee-saved registers (must be preserved across calls).
+%% t0-t6 are caller-saved temporary registers.
+%% sp is the stack pointer.
+%% ra is the return address register.
+%% zero (x0) is hardwired to constant 0.
+%% This implementation uses RV32IMC (base + multiply/compressed extensions).
+%%
+%% See: RISC-V Calling Convention
+%% https://riscv.org/wp-content/uploads/2024/12/riscv-calling.pdf
+%%
+%% Registers used by the JIT backend (RISC-V32):
+%%   - Argument/return: a0-a7 (up to 8 args in registers)
+%%   - Callee-saved: s0-s11 (must preserve)
+%%   - Temporaries: t0-t6 (caller-saved)
+%%   - Stack pointer: sp
+%%   - Return address: ra
+%%   - Zero register: zero (always 0)
+%%   - Available for JIT scratch: t0-t6 (7 temp registers)
+%%
+%% Note: RISC-V32 instructions are fixed 32-bit with uniform encoding,
+%% allowing access to all 32 registers.
+%%
+%% For more details, refer to the RISC-V ILP32 Procedure Call Standard.
+
+-type riscv32_register() ::
+    a0
+    | a1
+    | a2
+    | a3
+    | a4
+    | a5
+    | a6
+    | a7
+    | t0
+    | t1
+    | t2
+    | t3
+    | t4
+    | t5
+    | t6
+    | s0
+    | s1
+    | s2
+    | s3
+    | s4
+    | s5
+    | s6
+    | s7
+    | s8
+    | s9
+    | s10
+    | s11
+    | sp
+    | ra.
+
+-define(IS_GPR(Reg),
+    (Reg =:= a0 orelse Reg =:= a1 orelse Reg =:= a2 orelse Reg =:= a3 orelse Reg =:= a4 orelse
+        Reg =:= a5 orelse Reg =:= a6 orelse Reg =:= a7 orelse Reg =:= t0 orelse Reg =:= t1 orelse
+        Reg =:= t2 orelse Reg =:= t3 orelse Reg =:= t4 orelse Reg =:= t5 orelse Reg =:= t6 orelse
+        Reg =:= s0 orelse Reg =:= s1 orelse Reg =:= s2 orelse Reg =:= s3 orelse Reg =:= s4 orelse
+        Reg =:= s5 orelse Reg =:= s6 orelse Reg =:= s7 orelse Reg =:= s8 orelse Reg =:= s9 orelse
+        Reg =:= s10 orelse Reg =:= s11 orelse Reg =:= sp orelse Reg =:= ra)
+).
+
+-type stream() :: any().
+
+-record(state, {
+    stream_module :: module(),
+    stream :: stream(),
+    offset :: non_neg_integer(),
+    branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}],
+    jump_table_start :: non_neg_integer(),
+    available_regs :: [riscv32_register()],
+    used_regs :: [riscv32_register()],
+    labels :: [{integer() | reference(), integer()}],
+    variant :: non_neg_integer()
+}).
+
+-type state() :: #state{}.
+-type immediate() :: non_neg_integer().
+-type vm_register() ::
+    {x_reg, non_neg_integer()} | {y_reg, non_neg_integer()} | {ptr, riscv32_register()}.
+-type value() :: immediate() | vm_register() | riscv32_register() | {ptr, riscv32_register()}.
+-type arg() :: ctx | jit_state | offset | value() | {free, value()} | {avm_int64_t, integer()}.
+
+-type maybe_free_riscv32_register() ::
+    {free, riscv32_register()} | riscv32_register().
+
+-type condition() ::
+    {riscv32_register(), '<', integer()}
+    | {maybe_free_riscv32_register(), '<', riscv32_register()}
+    | {integer(), '<', maybe_free_riscv32_register()}
+    | {maybe_free_riscv32_register(), '==', integer()}
+    | {maybe_free_riscv32_register(), '!=', riscv32_register() | integer()}
+    | {'(int)', maybe_free_riscv32_register(), '==', integer()}
+    | {'(int)', maybe_free_riscv32_register(), '!=', riscv32_register() | integer()}
+    | {'(bool)', maybe_free_riscv32_register(), '==', false}
+    | {'(bool)', maybe_free_riscv32_register(), '!=', false}
+    | {maybe_free_riscv32_register(), '&', non_neg_integer(), '!=', integer()}
+    | {{free, riscv32_register()}, '==', {free, riscv32_register()}}.
+
+% Context offsets (32-bit architecture)
+% ctx->e is 0x14
+% ctx->x is 0x18
+-define(CTX_REG, a0).
+-define(NATIVE_INTERFACE_REG, a2).
+-define(Y_REGS, {?CTX_REG, 16#14}).
+-define(X_REG(N), {?CTX_REG, 16#18 + (N * 4)}).
+-define(CP, {?CTX_REG, 16#5C}).
+-define(FP_REGS, {?CTX_REG, 16#60}).
+-define(BS, {?CTX_REG, 16#64}).
+-define(BS_OFFSET, {?CTX_REG, 16#68}).
+% JITSTATE is in a1 register (no prolog, following aarch64 model)
+-define(JITSTATE_REG, a1).
+% Return address register (like LR in AArch64)
+-define(RA_REG, ra).
+-define(JITSTATE_MODULE_OFFSET, 0).
+-define(JITSTATE_CONTINUATION_OFFSET, 16#4).
+-define(JITSTATE_REDUCTIONCOUNT_OFFSET, 16#8).
+-define(PRIMITIVE(N), {?NATIVE_INTERFACE_REG, N * 4}).
+-define(MODULE_INDEX(ModuleReg), {ModuleReg, 0}).
+
+-define(JUMP_TABLE_ENTRY_SIZE, 8).
+
+%% RISC-V32 register mappings
+
+%% Use t3 as temporary for some operations
+-define(IP_REG, t3).
+
+-define(IS_SINT8_T(X), is_integer(X) andalso X >= -128 andalso X =< 127).
+-define(IS_SINT32_T(X), is_integer(X) andalso X >= -16#80000000 andalso X < 16#80000000).
+-define(IS_UINT8_T(X), is_integer(X) andalso X >= 0 andalso X =< 255).
+-define(IS_UINT32_T(X), is_integer(X) andalso X >= 0 andalso X < 16#100000000).
+-define(IS_SIGNED_OR_UNSIGNED_INT32_T(X),
+    is_integer(X) andalso X >= -16#80000000 andalso X < 16#100000000
+).
+
+%% RISC-V32 ILP32 ABI register allocation:
+%% - a0: context pointer (reserved, passed as first parameter)
+%% - a1-a5: available for parameters to native functions (up to 6 params)
+%% - a2: native interface pointer (reserved)
+%% - t0-t6: temporaries, caller-saved, available for JIT use
+%% - s0-s11: callee-saved (would need to be saved/restored)
+-define(AVAILABLE_REGS, [t6, t5, t4, t3, t2, t1, t0]).
+-define(PARAMETER_REGS, [a0, a1, a2, a3, a4, a5, a6, a7]).
+-define(SCRATCH_REGS, [t6, t5, t4, t2, t1, t0]).
+
+%%-----------------------------------------------------------------------------
+%% @doc Return the word size in bytes, i.e. the sizeof(term) i.e.
+%% sizeof(uintptr_t)
+%%
+%% C code equivalent is:
+%% #if UINTPTR_MAX == UINT32_MAX
+%%    #define TERM_BYTES 4
+%% #elif UINTPTR_MAX == UINT64_MAX
+%%    #define TERM_BYTES 8
+%% #else
+%%    #error "Term size must be either 32 bit or 64 bit."
+%% #endif
+%%
+%% @end
+%% @return Word size in bytes
+%%-----------------------------------------------------------------------------
+-spec word_size() -> 4 | 8.
+word_size() -> 4.
+
+%%-----------------------------------------------------------------------------
+%% @doc Create a new backend state for provided variant, module and stream.
+%% @end
+%% @param Variant JIT variant to use (currently ?JIT_VARIANT_PIC)
+%% @param StreamModule module to stream instructions
+%% @param Stream stream state
+%% @return New backend state
+%%-----------------------------------------------------------------------------
+-spec new(any(), module(), stream()) -> state().
+new(Variant, StreamModule, Stream) ->
+    #state{
+        stream_module = StreamModule,
+        stream = Stream,
+        branches = [],
+        jump_table_start = 0,
+        offset = StreamModule:offset(Stream),
+        available_regs = ?AVAILABLE_REGS,
+        used_regs = [],
+        labels = [],
+        variant = Variant
+    }.
+
+%%-----------------------------------------------------------------------------
+%% @doc Access the stream object.
+%% @end
+%% @param State current backend state
+%% @return The stream object
+%%-----------------------------------------------------------------------------
+-spec stream(state()) -> stream().
+stream(#state{stream = Stream}) ->
+    Stream.
+
+%%-----------------------------------------------------------------------------
+%% @doc Get the current offset in the stream
+%% @end
+%% @param State current backend state
+%% @return The current offset
+%%-----------------------------------------------------------------------------
+-spec offset(state()) -> non_neg_integer().
+offset(#state{stream_module = StreamModule, stream = Stream}) ->
+    StreamModule:offset(Stream).
+
+%%-----------------------------------------------------------------------------
+%% @doc Flush the stream.
+%% @end
+%% @param State current backend state
+%% @return The new state
+%%-----------------------------------------------------------------------------
+-spec flush(state()) -> stream().
+flush(#state{stream_module = StreamModule, stream = Stream0} = State) ->
+    Stream1 = StreamModule:flush(Stream0),
+    State#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a debugger of breakpoint instruction. This is used for debugging
+%% and not in production.
+%% @end
+%% @param State current backend state
+%% @return The updated backend state
+%%-----------------------------------------------------------------------------
+-spec debugger(state()) -> state().
+debugger(#state{stream_module = StreamModule, stream = Stream0} = State) ->
+    Stream1 = StreamModule:append(Stream0, jit_riscv32_asm:c_ebreak()),
+    State#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Return the list of currently used native registers. This is used for
+%% debugging and not in production.
+%% @end
+%% @param State current backend state
+%% @return The list of used registers
+%%-----------------------------------------------------------------------------
+-spec used_regs(state()) -> [riscv32_register()].
+used_regs(#state{used_regs = Used}) -> Used.
+
+%%-----------------------------------------------------------------------------
+%% @doc Return the list of currently available native scratch registers. This
+%% is used for debugging and not in production.
+%% @end
+%% @param State current backend state
+%% @return The list of available registers
+%%-----------------------------------------------------------------------------
+-spec available_regs(state()) -> [riscv32_register()].
+available_regs(#state{available_regs = Available}) -> Available.
+
+%%-----------------------------------------------------------------------------
+%% @doc Free native registers. The passed list of registers can contain
+%% registers, pointer to registers or other values that are ignored.
+%% @end
+%% @param State current backend state
+%% @param Regs list of registers or other values
+%% @return The updated backend state
+%%-----------------------------------------------------------------------------
+-spec free_native_registers(state(), [value()]) -> state().
+free_native_registers(State, []) ->
+    State;
+free_native_registers(State, [Reg | Rest]) ->
+    State1 = free_native_register(State, Reg),
+    free_native_registers(State1, Rest).
+
+-spec free_native_register(state(), value()) -> state().
+free_native_register(
+    #state{available_regs = Available0, used_regs = Used0} = State,
+    Reg
+) when
+    is_atom(Reg)
+->
+    {Available1, Used1} = free_reg(Available0, Used0, Reg),
+    State#state{available_regs = Available1, used_regs = Used1};
+free_native_register(State, {ptr, Reg}) ->
+    free_native_register(State, Reg);
+free_native_register(State, _Other) ->
+    State.
+
+%%-----------------------------------------------------------------------------
+%% @doc Assert that all native scratch registers are available. This is used
+%% for debugging and not in production.
+%% @end
+%% @param State current backend state
+%% @return ok
+%%-----------------------------------------------------------------------------
+-spec assert_all_native_free(state()) -> ok.
+assert_all_native_free(#state{
+    available_regs = ?AVAILABLE_REGS, used_regs = []
+}) ->
+    ok.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit the jump table at the beginning of the module. Branches will be
+%% updated afterwards with update_branches/2. Emit branches for labels from
+%% 0 (special entry for lines and labels information) to LabelsCount included
+%% (special entry for OP_INT_CALL_END).
+%%
+%% On this platform, each jump table entry is 12 bytes.
+%% ```
+%% ldr a3, pc+4
+%% push {a1, r4, r5, r6, r7, lr}
+%% add pc, pc, a3
+%% nop()
+%% offset_to_label0
+%% ```
+%%
+%% @end
+%% @param State current backend state
+%% @param LabelsCount number of labels in the module.
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec jump_table(state(), pos_integer()) -> state().
+jump_table(#state{stream_module = StreamModule, stream = Stream0} = State, LabelsCount) ->
+    JumpTableStart = StreamModule:offset(Stream0),
+    jump_table0(State#state{jump_table_start = JumpTableStart}, 0, LabelsCount).
+
+jump_table0(State, N, LabelsCount) when N > LabelsCount ->
+    State;
+jump_table0(
+    #state{stream_module = StreamModule, stream = Stream0} = State,
+    N,
+    LabelsCount
+) ->
+    % Create jump table entry: AUIPC + JALR (8 bytes total)
+    % This will be patched in add_label when the label offset is known
+    JumpEntry = <<16#FFFFFFFF:32, 16#FFFFFFFF:32>>,
+    Stream1 = StreamModule:append(Stream0, JumpEntry),
+    jump_table0(State#state{stream = Stream1}, N + 1, LabelsCount).
+
+%%-----------------------------------------------------------------------------
+%% @doc Patch a single branch in the stream
+%% @end
+%% @param StreamModule stream module
+%% @param Stream stream state
+%% @param Offset offset of the branch to patch
+%% @param Type type of the branch
+%% @param LabelOffset target label offset
+%% @return Updated stream
+%%-----------------------------------------------------------------------------
+-spec patch_branch(module(), stream(), non_neg_integer(), any(), non_neg_integer()) -> stream().
+patch_branch(StreamModule, Stream, Offset, Type, LabelOffset) ->
+    Rel = LabelOffset - Offset,
+    NewInstr =
+        case Type of
+            {adr, Reg} when Rel rem 4 =:= 0 ->
+                % Generate pc_relative_address and pad to 8 bytes with NOP
+                I = pc_relative_address(Reg, Rel),
+                case byte_size(I) of
+                    4 -> <<I/binary, (jit_riscv32_asm:nop())/binary>>;
+                    6 -> <<I/binary, (jit_riscv32_asm:c_nop())/binary>>;
+                    8 -> I
+                end;
+            {adr, Reg} when Rel rem 4 =:= 2; Rel rem 4 =:= -2 ->
+                % Handle 2-byte aligned offsets and pad to 8 bytes
+                % Handle both positive and negative offsets (Erlang rem can be negative)
+                I = pc_relative_address(Reg, Rel),
+                case byte_size(I) of
+                    4 -> <<I/binary, (jit_riscv32_asm:nop())/binary>>;
+                    6 -> <<I/binary, (jit_riscv32_asm:c_nop())/binary>>;
+                    8 -> I
+                end;
+            {far_branch, TempReg} ->
+                % Check if branch can now be optimized to near branch
+                if
+                    Rel >= -1048576 andalso Rel =< 1048574 andalso (Rel rem 2) =:= 0 ->
+                        % RISC-V jal has ±1MB range
+                        % Optimize to near branch: jal + nops to fill original size
+                        DirectBranch = jit_riscv32_asm:jal(zero, Rel),
+                        case byte_size(DirectBranch) of
+                            2 ->
+                                <<DirectBranch/binary, (jit_riscv32_asm:c_nop())/binary,
+                                    (jit_riscv32_asm:nop())/binary>>;
+                            4 ->
+                                <<DirectBranch/binary, (jit_riscv32_asm:nop())/binary>>
+                        end;
+                    true ->
+                        % Keep far branch sequence: auipc + jalr (PC-relative, 8 bytes)
+                        % Split the relative offset into upper 20 bits and lower 12 bits
+                        Hi20 = (Rel + 16#800) bsr 12,
+                        Lo12 = Rel - (Hi20 bsl 12),
+                        I1 = jit_riscv32_asm:auipc(TempReg, Hi20),
+                        I2 = jit_riscv32_asm:jalr(zero, TempReg, Lo12),
+                        Entry = <<I1/binary, I2/binary>>,
+                        case byte_size(Entry) of
+                            6 -> <<Entry/binary, (jit_riscv32_asm:c_nop())/binary>>;
+                            8 -> Entry
+                        end
+                end
+        end,
+    StreamModule:replace(Stream, Offset, NewInstr).
+
+%%-----------------------------------------------------------------------------
+%% @doc Patch all branches targeting a specific label and return remaining branches
+%% @end
+%% @param StreamModule stream module
+%% @param Stream stream state
+%% @param TargetLabel label to patch branches for
+%% @param LabelOffset offset of the target label
+%% @param Branches list of pending branches
+%% @return {UpdatedStream, RemainingBranches}
+%%-----------------------------------------------------------------------------
+-spec patch_branches_for_label(
+    module(),
+    stream(),
+    integer(),
+    non_neg_integer(),
+    [{integer(), non_neg_integer(), any()}]
+) -> {stream(), [{integer(), non_neg_integer(), any()}]}.
+patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches) ->
+    patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches, []).
+
+patch_branches_for_label(_StreamModule, Stream, _TargetLabel, _LabelOffset, [], Acc) ->
+    {Stream, lists:reverse(Acc)};
+patch_branches_for_label(
+    StreamModule,
+    Stream0,
+    TargetLabel,
+    LabelOffset,
+    [{Label, Offset, Type} | Rest],
+    Acc
+) when Label =:= TargetLabel ->
+    Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset),
+    patch_branches_for_label(StreamModule, Stream1, TargetLabel, LabelOffset, Rest, Acc);
+patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, [Branch | Rest], Acc) ->
+    patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Rest, [Branch | Acc]).
+
+%%-----------------------------------------------------------------------------
+%% @doc Rewrite stream to update all branches for labels.
+%% @end
+%% @param State current backend state
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec update_branches(state()) -> state().
+update_branches(#state{branches = []} = State) ->
+    State;
+update_branches(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        branches = [{Label, Offset, Type} | BranchesT],
+        labels = Labels
+    } = State
+) ->
+    {Label, LabelOffset} = lists:keyfind(Label, 1, Labels),
+    Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset),
+    update_branches(State#state{stream = Stream1, branches = BranchesT}).
+
+%%-----------------------------------------------------------------------------
+%% @doc Generate code to load a primitive function pointer into a register
+%% @param Primitive index to the primitive to call
+%% @param TargetReg register to load the function pointer into
+%% @return Binary instruction sequence
+%%-----------------------------------------------------------------------------
+-spec load_primitive_ptr(non_neg_integer(), riscv32_register()) -> binary().
+load_primitive_ptr(Primitive, TargetReg) ->
+    case Primitive of
+        0 ->
+            jit_riscv32_asm:lw(TargetReg, ?NATIVE_INTERFACE_REG, 0);
+        N when N * 4 =< 124 ->
+            jit_riscv32_asm:lw(TargetReg, ?NATIVE_INTERFACE_REG, N * 4);
+        N when N * 4 < 256 ->
+            % Can encode N * 4 directly in li instruction
+            I1 = jit_riscv32_asm:li(TargetReg, N * 4),
+            I2 = jit_riscv32_asm:add(TargetReg, TargetReg, ?NATIVE_INTERFACE_REG),
+            I3 = jit_riscv32_asm:lw(TargetReg, TargetReg, 0),
+            <<I1/binary, I2/binary, I3/binary>>;
+        N ->
+            % For very large primitive numbers, load N and shift left by 2 (multiply by 4)
+            I1 = jit_riscv32_asm:li(TargetReg, N),
+            I2 = jit_riscv32_asm:slli(TargetReg, TargetReg, 2),
+            I3 = jit_riscv32_asm:add(TargetReg, TargetReg, ?NATIVE_INTERFACE_REG),
+            I4 = jit_riscv32_asm:lw(TargetReg, TargetReg, 0),
+            <<I1/binary, I2/binary, I3/binary, I4/binary>>
+    end.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a call (call with return) to a primitive with arguments. This
+%% function converts arguments and pass them following the backend ABI
+%% convention. It also saves scratch registers we need to preserve.
+%% @end
+%% @param State current backend state
+%% @param Primitive index to the primitive to call
+%% @param Args arguments to pass to the primitive
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec call_primitive(state(), non_neg_integer(), [arg()]) -> {state(), riscv32_register()}.
+call_primitive(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [TempReg | RestRegs],
+        used_regs = UsedRegs
+    } = State,
+    Primitive,
+    Args
+) ->
+    % Use a low register for LDR since ARM Thumb LDR only works with low registers
+    PrepCall = load_primitive_ptr(Primitive, TempReg),
+    Stream1 = StreamModule:append(Stream0, PrepCall),
+    StateCall = State#state{
+        stream = Stream1,
+        available_regs = RestRegs,
+        used_regs = [TempReg | UsedRegs]
+    },
+    call_func_ptr(StateCall, {free, TempReg}, Args);
+call_primitive(
+    #state{available_regs = []} = State,
+    Primitive,
+    Args
+) ->
+    call_func_ptr(State, {primitive, Primitive}, Args).
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a jump (call without return) to a primitive with arguments. This
+%% function converts arguments and pass them following the backend ABI
+%% convention.
+%% @end
+%% @param State current backend state
+%% @param Primitive index to the primitive to call
+%% @param Args arguments to pass to the primitive
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+call_primitive_last(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0
+    } = State0,
+    Primitive,
+    Args
+) ->
+    % We need a register for the function pointer that should not be used as a parameter
+    % Since we're not returning, we can use all scratch registers except
+    % registers used for parameters
+    ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)),
+    ArgsRegs = args_regs(Args),
+    ScratchRegs = ?AVAILABLE_REGS -- ArgsRegs -- ParamRegs,
+    [Temp | AvailableRegs1] = ScratchRegs,
+    UsedRegs = ?AVAILABLE_REGS -- AvailableRegs1,
+    PrepCall = load_primitive_ptr(Primitive, Temp),
+    Stream1 = StreamModule:append(Stream0, PrepCall),
+
+    State1 = State0#state{
+        stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs
+    },
+
+    % Preprocess offset special arg
+    Args1 = lists:map(
+        fun(Arg) ->
+            case Arg of
+                offset -> StreamModule:offset(Stream1);
+                _ -> Arg
+            end
+        end,
+        Args
+    ),
+
+    % In RISC-V, all up to 8 arguments fit in registers (a0-a7)
+    % Always use tail call when calling primitives in tail position
+    State4 =
+        case Args1 of
+            [FirstArg, jit_state | ArgsT] ->
+                % Use tail call
+                ArgsForTailCall = [FirstArg, jit_state_tail_call | ArgsT],
+                State2 = set_registers_args(State1, ArgsForTailCall, 0),
+                tail_call_with_jit_state_registers_only(State2, Temp)
+        end,
+    State4#state{available_regs = ?AVAILABLE_REGS, used_regs = []}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Tail call to address in register.
+%% RA is preserved across regular calls (call_func_ptr saves/restores it),
+%% so when the called C primitive returns, it returns to opcodesswitch.h.
+%% @end
+%% @param State current backend state
+%% @param Reg register containing the target address
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+tail_call_with_jit_state_registers_only(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0
+    } = State,
+    Reg
+) ->
+    % Jump to address in register (tail call)
+    I1 = jit_riscv32_asm:jr(Reg),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a return of a value if it's not equal to ctx.
+%% This logic is used to break out to the scheduler, typically after signal
+%% messages have been processed.
+%% @end
+%% @param State current backend state
+%% @param Reg register to compare to (should be {free, Reg} as it's always freed)
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+return_if_not_equal_to_ctx(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = AvailableRegs0,
+        used_regs = UsedRegs0
+    } = State,
+    {free, Reg}
+) ->
+    % RISC-V doesn't have a separate cmp instruction, use beq directly
+    I2 =
+        case Reg of
+            % Return value is already in a0
+            a0 -> <<>>;
+            % Move to a0 (return register)
+            _ -> jit_riscv32_asm:mv(a0, Reg)
+        end,
+    I3 = jit_riscv32_asm:ret(),
+    % Branch if equal (skip the return)
+    % Offset must account for the beq instruction itself (4 bytes) plus I2 and I3
+    I1 = jit_riscv32_asm:beq(Reg, ?CTX_REG, 4 + byte_size(I2) + byte_size(I3)),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
+    {AvailableRegs1, UsedRegs1} = free_reg(
+        AvailableRegs0, UsedRegs0, Reg
+    ),
+    State#state{
+        stream = Stream1,
+        available_regs = AvailableRegs1,
+        used_regs = UsedRegs1
+    }.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a jump to a label. The offset of the relocation is saved and will
+%% be updated with `update_branches/2`.
+%% @end
+%% @param State current backend state
+%% @param Label to jump to
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+jump_to_label(
+    #state{stream_module = StreamModule, stream = Stream0, labels = Labels} = State0, Label
+) ->
+    LabelLookupResult = lists:keyfind(Label, 1, Labels),
+    Offset = StreamModule:offset(Stream0),
+    {State1, CodeBlock} = branch_to_label_code(State0, Offset, Label, LabelLookupResult),
+    Stream1 = StreamModule:append(Stream0, CodeBlock),
+    State1#state{stream = Stream1}.
+
+jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) ->
+    Offset = StreamModule:offset(Stream0),
+    CodeBlock = branch_to_offset_code(State, Offset, TargetOffset),
+    Stream1 = StreamModule:append(Stream0, CodeBlock),
+    State#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Jump to address in continuation pointer register
+%% Calculate absolute address and jump to it.
+%% @end
+%% @param State current backend state
+%% @param {free, OffsetReg} register containing the offset value
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+jump_to_continuation(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _],
+        offset = BaseOffset
+    } = State0,
+    {free, OffsetReg}
+) ->
+    % Calculate absolute address: native_code_base + target_offset
+    % where native_code_base = current_pc + (BaseOffset - CurrentStreamOffset)
+    CurrentStreamOffset = StreamModule:offset(Stream0),
+    NetOffset = BaseOffset - CurrentStreamOffset,
+
+    % Get native code base address into temporary register
+    I1 = pc_relative_address(Temp, NetOffset),
+    % Add target offset to get final absolute address
+    I2 = jit_riscv32_asm:add(Temp, Temp, OffsetReg),
+    % Indirect branch to the calculated absolute address
+    I3 = jit_riscv32_asm:jr(Temp),
+
+    Code = <<I1/binary, I2/binary, I3/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    % Free all registers since this is a tail jump
+    State0#state{stream = Stream1, available_regs = ?AVAILABLE_REGS, used_regs = []}.
+
+branch_to_offset_code(_State, Offset, TargetOffset) when
+    TargetOffset - Offset =< 2050, TargetOffset - Offset >= -2044
+->
+    % Near branch: use direct J instruction
+    Rel = TargetOffset - Offset,
+    jit_riscv32_asm:j(Rel);
+branch_to_offset_code(
+    #state{available_regs = [TempReg | _]}, Offset, TargetOffset
+) ->
+    % Far branch: use auipc + jalr sequence for PC-relative addressing
+    % This computes: PC + Immediate and jumps to it
+
+    Rel = TargetOffset - Offset,
+    % Split the relative offset into upper 20 bits and lower 12 bits
+    % RISC-V PC-relative addressing: target = PC + (imm20 << 12) + sign_extend(imm12)
+    % Since jalr's imm12 is sign-extended, if bit 11 of Rel is set,
+    % we need to add 0x800 before splitting to compensate
+    Hi20 = (Rel + 16#800) bsr 12,
+    Lo12Unsigned = Rel band 16#FFF,
+    % Convert to signed 12-bit value: if bit 11 is set, subtract 4096
+    Lo12 =
+        if
+            Lo12Unsigned >= 16#800 -> Lo12Unsigned - 16#1000;
+            true -> Lo12Unsigned
+        end,
+
+    % TempReg = PC + (Hi20 << 12)
+    I1 = jit_riscv32_asm:auipc(TempReg, Hi20),
+    % Jump to TempReg + sign_extend(Lo12)
+    I2 = jit_riscv32_asm:jalr(zero, TempReg, Lo12),
+    <<I1/binary, I2/binary>>.
+
+branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) ->
+    CodeBlock = branch_to_offset_code(State, Offset, LabelOffset),
+    {State, CodeBlock};
+branch_to_label_code(
+    #state{available_regs = [TempReg | _], branches = Branches} = State0, Offset, Label, false
+) ->
+    % RISC-V: Far branch sequence using PC-relative auipc + jalr (8 bytes)
+
+    % Placeholder: auipc TempReg, 0
+    % Placeholder: jalr zero, TempReg, 0
+    CodeBlock = <<16#FFFFFFFF:32, 16#FFFFFFFF:32>>,
+    % Add relocation entry
+    Reloc = {Label, Offset, {far_branch, TempReg}},
+    State1 = State0#state{branches = [Reloc | Branches]},
+    {State1, CodeBlock};
+branch_to_label_code(
+    #state{available_regs = [], branches = Branches} = State0, Offset, Label, false
+) ->
+    % RISC-V: Use t6 as scratch (caller-saved, safe to clobber)
+    % Far branch sequence using PC-relative auipc + jalr (8 bytes)
+
+    % Placeholder: auipc t6, 0
+    % Placeholder: jalr zero, t6, 0
+    CodeBlock = <<16#FFFFFFFF:32, 16#FFFFFFFF:32>>,
+    % Add relocation entry
+    Reloc = {Label, Offset, {far_branch, t6}},
+    State1 = State0#state{branches = [Reloc | Branches]},
+    {State1, CodeBlock};
+branch_to_label_code(#state{available_regs = []}, _Offset, _Label, _LabelLookup) ->
+    error({no_available_registers, _LabelLookup}).
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit an if block, i.e. emit a test of a condition and conditionnally
+%% execute a block.
+%% @end
+%% @param State current backend state
+%% @param Cond condition to test
+%% @param BlockFn function to emit the block that may be executed
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec if_block(state(), condition() | {'and', [condition()]}, fun((state()) -> state())) -> state().
+if_block(
+    #state{stream_module = StreamModule} = State0,
+    {'and', CondList},
+    BlockFn
+) ->
+    {Replacements, State1} = lists:foldl(
+        fun(Cond, {AccReplacements, AccState}) ->
+            Offset = StreamModule:offset(AccState#state.stream),
+            {NewAccState, BranchInfo, ReplaceDelta} = if_block_cond(AccState, Cond),
+            {[{Offset + ReplaceDelta, BranchInfo} | AccReplacements], NewAccState}
+        end,
+        {[], State0},
+        CondList
+    ),
+    State2 = BlockFn(State1),
+    Stream2 = State2#state.stream,
+    OffsetAfter = StreamModule:offset(Stream2),
+    Stream3 = lists:foldl(
+        fun({ReplacementOffset, {BranchFunc, Reg, Operand}}, AccStream) ->
+            BranchOffset = OffsetAfter - ReplacementOffset,
+            NewBranchInstr = apply(jit_riscv32_asm, BranchFunc, [Reg, Operand, BranchOffset]),
+            StreamModule:replace(AccStream, ReplacementOffset, NewBranchInstr)
+        end,
+        Stream2,
+        Replacements
+    ),
+    merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs);
+if_block(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    Cond,
+    BlockFn
+) ->
+    Offset = StreamModule:offset(Stream0),
+    {State1, {BranchFunc, Reg, Operand}, BranchInstrDelta} = if_block_cond(State0, Cond),
+    State2 = BlockFn(State1),
+    Stream2 = State2#state.stream,
+    OffsetAfter = StreamModule:offset(Stream2),
+    %% Patch the conditional branch instruction to jump to the end of the block
+    BranchInstrOffset = Offset + BranchInstrDelta,
+    BranchOffset = OffsetAfter - BranchInstrOffset,
+    NewBranchInstr = apply(jit_riscv32_asm, BranchFunc, [Reg, Operand, BranchOffset]),
+    Stream3 = StreamModule:replace(Stream2, BranchInstrOffset, NewBranchInstr),
+    merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs).
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit an if else block, i.e. emit a test of a condition and
+%% conditionnally execute a block or another block.
+%% @end
+%% @param State current backend state
+%% @param Cond condition to test
+%% @param BlockTrueFn function to emit the block that is executed if condition is true
+%% @param BlockFalseFn function to emit the block that is executed if condition is false
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec if_else_block(state(), condition(), fun((state()) -> state()), fun((state()) -> state())) ->
+    state().
+if_else_block(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    Cond,
+    BlockTrueFn,
+    BlockFalseFn
+) ->
+    Offset = StreamModule:offset(Stream0),
+    {State1, {BranchFunc, Reg, Operand}, BranchInstrDelta} = if_block_cond(State0, Cond),
+    BranchInstrOffset = Offset + BranchInstrDelta,
+    State2 = BlockTrueFn(State1),
+    Stream2 = State2#state.stream,
+    %% Emit unconditional branch to skip the else block (will be replaced)
+    ElseJumpOffset = StreamModule:offset(Stream2),
+    ElseJumpInstr = jit_riscv32_asm:j(0),
+    Stream3 = StreamModule:append(Stream2, ElseJumpInstr),
+    %% Else block starts here.
+    OffsetAfter = StreamModule:offset(Stream3),
+    %% Patch the conditional branch to jump to the else block
+    ElseBranchOffset = OffsetAfter - BranchInstrOffset,
+    NewBranchInstr = apply(jit_riscv32_asm, BranchFunc, [Reg, Operand, ElseBranchOffset]),
+    Stream4 = StreamModule:replace(Stream3, BranchInstrOffset, NewBranchInstr),
+    %% Build the else block
+    StateElse = State2#state{
+        stream = Stream4,
+        used_regs = State1#state.used_regs,
+        available_regs = State1#state.available_regs
+    },
+    State3 = BlockFalseFn(StateElse),
+    Stream5 = State3#state.stream,
+    OffsetFinal = StreamModule:offset(Stream5),
+    %% Patch the unconditional branch to jump to the end
+    FinalJumpOffset = OffsetFinal - ElseJumpOffset,
+    NewElseJumpInstr = jit_riscv32_asm:j(FinalJumpOffset),
+    Stream6 = StreamModule:replace(Stream5, ElseJumpOffset, NewElseJumpInstr),
+    merge_used_regs(State3#state{stream = Stream6}, State2#state.used_regs).
+
+-spec if_block_cond(state(), condition()) ->
+    {
+        state(),
+        {beq | bne | blt | bge, atom(), atom() | integer()},
+        non_neg_integer()
+    }.
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '<', 0}
+) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    %% RISC-V: bge Reg, zero, offset (branch if Reg >= 0, i.e., NOT negative/NOT less than 0)
+    BranchInstr = <<16#FFFFFFFF:32/little>>,
+    Stream1 = StreamModule:append(Stream0, BranchInstr),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, {bge, Reg, zero}, 0};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {RegOrTuple, '<', Val}
+) when is_integer(Val), Val >= 0, Val =< 255 ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    % RISC-V: bge Reg, Val, offset (branch if Reg >= Val, i.e., NOT less than)
+    % Load immediate into a temp register for comparison
+    [Temp | _] = State0#state.available_regs,
+    OffsetBefore = StreamModule:offset(Stream0),
+    State1 = mov_immediate(State0, Temp, Val),
+    Stream1 = State1#state.stream,
+    BranchDelta = StreamModule:offset(Stream1) - OffsetBefore,
+    BranchInstr = <<16#FFFFFFFF:32/little>>,
+    Stream2 = StreamModule:append(Stream1, BranchInstr),
+    State2 = if_block_free_reg(RegOrTuple, State1),
+    State3 = State2#state{stream = Stream2},
+    {State3, {bge, Reg, Temp}, BranchDelta};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0,
+    {RegOrTuple, '<', Val}
+) when is_integer(Val) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    % RISC-V: bge Reg, Temp, offset (branch if Reg >= Temp, i.e., NOT less than)
+    OffsetBefore = StreamModule:offset(Stream0),
+    State1 = mov_immediate(State0, Temp, Val),
+    Stream1 = State1#state.stream,
+    BranchDelta = StreamModule:offset(Stream1) - OffsetBefore,
+    BranchInstr = <<16#FFFFFFFF:32/little>>,
+    Stream2 = StreamModule:append(Stream1, BranchInstr),
+    State2 = if_block_free_reg(RegOrTuple, State1),
+    State3 = State2#state{stream = Stream2},
+    {State3, {bge, Reg, Temp}, BranchDelta};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0,
+    {Val, '<', RegOrTuple}
+) when is_integer(Val), Val >= 0, Val =< 255 ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    % RISC-V: bge Temp, Reg, offset (branch if Val >= Reg, i.e., NOT Val < Reg)
+    OffsetBefore = StreamModule:offset(Stream0),
+    State1 = mov_immediate(State0, Temp, Val),
+    Stream1 = State1#state.stream,
+    BranchDelta = StreamModule:offset(Stream1) - OffsetBefore,
+    BranchInstr = <<16#FFFFFFFF:32/little>>,
+    Stream2 = StreamModule:append(Stream1, BranchInstr),
+    State2 = if_block_free_reg(RegOrTuple, State1),
+    State3 = State2#state{stream = Stream2},
+    {State3, {bge, Temp, Reg}, BranchDelta};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0,
+    {Val, '<', RegOrTuple}
+) when is_integer(Val) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    % RISC-V: bge Temp, Reg, offset (branch if Val >= Reg, i.e., NOT Val < Reg)
+    OffsetBefore = StreamModule:offset(Stream0),
+    State1 = mov_immediate(State0, Temp, Val),
+    Stream1 = State1#state.stream,
+    BranchDelta = StreamModule:offset(Stream1) - OffsetBefore,
+    BranchInstr = <<16#FFFFFFFF:32/little>>,
+    Stream2 = StreamModule:append(Stream1, BranchInstr),
+    State2 = if_block_free_reg(RegOrTuple, State1),
+    State3 = State2#state{stream = Stream2},
+    {State3, {bge, Temp, Reg}, BranchDelta};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {RegOrTuple, '<', RegB}
+) when is_atom(RegB) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    % RISC-V: bge Reg, RegB, offset (branch if Reg >= RegB, i.e., NOT less than)
+    BranchInstr = <<16#FFFFFFFF:32/little>>,
+    Stream1 = StreamModule:append(Stream0, BranchInstr),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, {bge, Reg, RegB}, 0};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '==', 0}
+) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    %% RISC-V: bne Reg, zero, offset (branch if Reg != 0, i.e., NOT equal to 0)
+    BranchInstr = <<16#FFFFFFFF:32/little>>,
+    Stream1 = StreamModule:append(Stream0, BranchInstr),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, {bne, Reg, zero}, 0};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {RegOrTuple, '==', RegB}
+) when is_atom(RegB) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    %% RISC-V: bne Reg, RegB, offset (branch if Reg != RegB, i.e., NOT equal)
+    BranchInstr = <<16#FFFFFFFF:32/little>>,
+    Stream1 = StreamModule:append(Stream0, BranchInstr),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, {bne, Reg, RegB}, 0};
+%% Delegate (int) forms to regular forms since we only have 32-bit words
+if_block_cond(State, {'(int)', RegOrTuple, '==', 0}) ->
+    if_block_cond(State, {RegOrTuple, '==', 0});
+if_block_cond(State, {'(int)', RegOrTuple, '==', Val}) when is_integer(Val) ->
+    if_block_cond(State, {RegOrTuple, '==', Val});
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0,
+    {RegOrTuple, '!=', Val}
+) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    %% RISC-V: Load immediate into temp, then beq Reg, Temp, offset
+    OffsetBefore = StreamModule:offset(Stream0),
+    State1 = mov_immediate(State0, Temp, Val),
+    Stream1 = State1#state.stream,
+    BranchDelta = StreamModule:offset(Stream1) - OffsetBefore,
+    BranchInstr = <<16#FFFFFFFF:32/little>>,
+    Stream2 = StreamModule:append(Stream1, BranchInstr),
+    State2 = if_block_free_reg(RegOrTuple, State1),
+    State3 = State2#state{stream = Stream2},
+    {State3, {beq, Reg, Temp}, BranchDelta};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {RegOrTuple, '!=', Val}
+) when ?IS_GPR(Val) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    %% RISC-V: beq Reg, Val, offset (branch if Reg == Val, i.e., NOT not-equal)
+    BranchInstr = <<16#FFFFFFFF:32/little>>,
+    Stream1 = StreamModule:append(Stream0, BranchInstr),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, {beq, Reg, Val}, 0};
+if_block_cond(State, {'(int)', RegOrTuple, '!=', Val}) when is_integer(Val) ->
+    if_block_cond(State, {RegOrTuple, '!=', Val});
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0,
+    {RegOrTuple, '==', Val}
+) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    %% RISC-V: Load immediate into temp, then bne Reg, Temp, offset
+    OffsetBefore = StreamModule:offset(Stream0),
+    State1 = mov_immediate(State0, Temp, Val),
+    Stream1 = State1#state.stream,
+    BranchDelta = StreamModule:offset(Stream1) - OffsetBefore,
+    BranchInstr = <<16#FFFFFFFF:32/little>>,
+    Stream2 = StreamModule:append(Stream1, BranchInstr),
+    State2 = if_block_free_reg(RegOrTuple, State1),
+    State3 = State2#state{stream = Stream2},
+    {State3, {bne, Reg, Temp}, BranchDelta};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {{free, RegA}, '==', {free, RegB}}
+) ->
+    %% RISC-V: bne RegA, RegB, offset (branch if RegA != RegB, i.e., NOT equal)
+    BranchInstr = <<16#FFFFFFFF:32/little>>,
+    Stream1 = StreamModule:append(Stream0, BranchInstr),
+    State1 = State0#state{stream = Stream1},
+    State2 = if_block_free_reg({free, RegA}, State1),
+    State3 = if_block_free_reg({free, RegB}, State2),
+    {State3, {bne, RegA, RegB}, 0};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0,
+    {RegOrTuple, '==', Val}
+) when is_integer(Val) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    OffsetBefore = StreamModule:offset(Stream0),
+    State1 = mov_immediate(State0, Temp, Val),
+    Stream1 = State1#state.stream,
+    BranchDelta = StreamModule:offset(Stream1) - OffsetBefore,
+    %% RISC-V: bne Reg, Temp, offset (branch if Reg != Temp, i.e., NOT equal)
+    BranchInstr = <<16#FFFFFFFF:32/little>>,
+    Stream2 = StreamModule:append(Stream1, BranchInstr),
+    State2 = if_block_free_reg(RegOrTuple, State1),
+    State3 = State2#state{stream = Stream2},
+    {State3, {bne, Reg, Temp}, BranchDelta};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0,
+    {RegOrTuple, '!=', Val}
+) when is_integer(Val) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    OffsetBefore = StreamModule:offset(Stream0),
+    State1 = mov_immediate(State0, Temp, Val),
+    Stream1 = State1#state.stream,
+    BranchDelta = StreamModule:offset(Stream1) - OffsetBefore,
+    %% RISC-V: beq Reg, Temp, offset (branch if Reg == Temp, i.e., NOT not-equal)
+    BranchInstr = <<16#FFFFFFFF:32/little>>,
+    Stream2 = StreamModule:append(Stream1, BranchInstr),
+    State2 = if_block_free_reg(RegOrTuple, State1),
+    State3 = State2#state{stream = Stream2},
+    {State3, {beq, Reg, Temp}, BranchDelta};
+if_block_cond(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _]
+    } = State0,
+    {'(bool)', RegOrTuple, '==', false}
+) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    %% RISC-V: Test bit 0 by shifting to MSB, then branch if negative (bit was 1, NOT false)
+    I1 = jit_riscv32_asm:slli(Temp, Reg, 31),
+    Stream1 = StreamModule:append(Stream0, I1),
+    BranchInstr = <<16#FFFFFFFF:32/little>>,
+    Stream2 = StreamModule:append(Stream1, BranchInstr),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream2},
+    {State2, {blt, Temp, zero}, byte_size(I1)};
+if_block_cond(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _]
+    } = State0,
+    {'(bool)', RegOrTuple, '!=', false}
+) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    %% RISC-V: Test bit 0 by shifting to MSB, then branch if non-negative (bit was 0, NOT true)
+    I1 = jit_riscv32_asm:slli(Temp, Reg, 31),
+    Stream1 = StreamModule:append(Stream0, I1),
+    BranchInstr = <<16#FFFFFFFF:32/little>>,
+    Stream2 = StreamModule:append(Stream1, BranchInstr),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream2},
+    {State2, {bge, Temp, zero}, byte_size(I1)};
+if_block_cond(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _]
+    } = State0,
+    {RegOrTuple, '&', Val, '!=', 0}
+) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    %% RISC-V: Test bits using ANDI or li+and
+    TestCode =
+        if
+            Val >= -2048 andalso Val =< 2047 ->
+                %% Can use ANDI instruction directly
+                jit_riscv32_asm:andi(Temp, Reg, Val);
+            true ->
+                %% Need to load immediate into temp register first
+                TestCode0 = jit_riscv32_asm:li(Temp, Val),
+                TestCode1 = jit_riscv32_asm:and_(Temp, Reg, Temp),
+                <<TestCode0/binary, TestCode1/binary>>
+        end,
+    OffsetBefore = StreamModule:offset(Stream0),
+    Stream1 = StreamModule:append(Stream0, TestCode),
+    BranchDelta = StreamModule:offset(Stream1) - OffsetBefore,
+    %% Branch if result is zero (no bits set, NOT != 0)
+    BranchInstr = <<16#FFFFFFFF:32/little>>,
+    Stream2 = StreamModule:append(Stream1, BranchInstr),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream2},
+    {State2, {beq, Temp, zero}, BranchDelta};
+if_block_cond(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _]
+    } = State0,
+    {Reg, '&', 16#F, '!=', 16#F}
+) when ?IS_GPR(Reg) ->
+    %% RISC-V: Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG
+    I1 = jit_riscv32_asm:not_(Temp, Reg),
+    I2 = jit_riscv32_asm:slli(Temp, Temp, 28),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    BranchInstr = <<16#FFFFFFFF:32/little>>,
+    Stream2 = StreamModule:append(Stream1, BranchInstr),
+    State1 = State0#state{stream = Stream2},
+    {State1, {beq, Temp, zero}, byte_size(I1) + byte_size(I2)};
+if_block_cond(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0
+    } = State0,
+    {{free, Reg} = RegTuple, '&', 16#F, '!=', 16#F}
+) when ?IS_GPR(Reg) ->
+    %% RISC-V: Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG
+    I1 = jit_riscv32_asm:not_(Reg, Reg),
+    I2 = jit_riscv32_asm:slli(Reg, Reg, 28),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    BranchInstr = <<16#FFFFFFFF:32/little>>,
+    Stream2 = StreamModule:append(Stream1, BranchInstr),
+    State1 = State0#state{stream = Stream2},
+    State2 = if_block_free_reg(RegTuple, State1),
+    {State2, {beq, Reg, zero}, byte_size(I1) + byte_size(I2)};
+if_block_cond(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | AT]
+    } = State0,
+    {Reg, '&', Mask, '!=', Val}
+) when ?IS_GPR(Reg) ->
+    %% RISC-V: AND with mask, then compare with value
+    OffsetBefore = StreamModule:offset(Stream0),
+    I1 = jit_riscv32_asm:mv(Temp, Reg),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State1 = State0#state{stream = Stream1},
+    {State2, Temp} = and_(State1#state{available_regs = AT}, {free, Temp}, Mask),
+    Stream2 = State2#state.stream,
+    %% Compare Temp with Val and branch if equal (NOT != Val)
+    case Val of
+        0 ->
+            %% Optimize comparison with zero
+            BranchDelta = StreamModule:offset(Stream2) - OffsetBefore,
+            BranchInstr = <<16#FFFFFFFF:32/little>>,
+            Stream3 = StreamModule:append(Stream2, BranchInstr),
+            State3 = State2#state{
+                stream = Stream3, available_regs = [Temp | State2#state.available_regs]
+            },
+            {State3, {beq, Temp, zero}, BranchDelta};
+        _ when ?IS_GPR(Val) ->
+            %% Val is a register
+            BranchDelta = StreamModule:offset(Stream2) - OffsetBefore,
+            BranchInstr = <<16#FFFFFFFF:32/little>>,
+            Stream3 = StreamModule:append(Stream2, BranchInstr),
+            State3 = State2#state{
+                stream = Stream3, available_regs = [Temp | State2#state.available_regs]
+            },
+            {State3, {beq, Temp, Val}, BranchDelta};
+        _ ->
+            %% Val is an immediate - need second temp register
+            %% Reuse the mask register for the comparison value
+            [MaskReg | AT2] = AT,
+            State3 = mov_immediate(State2#state{available_regs = AT2}, MaskReg, Val),
+            Stream3 = State3#state.stream,
+            BranchDelta = StreamModule:offset(Stream3) - OffsetBefore,
+            BranchInstr = <<16#FFFFFFFF:32/little>>,
+            Stream4 = StreamModule:append(Stream3, BranchInstr),
+            State4 = State3#state{
+                stream = Stream4, available_regs = [Temp, MaskReg | State3#state.available_regs]
+            },
+            {State4, {beq, Temp, MaskReg}, BranchDelta}
+    end;
+if_block_cond(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = AvailRegs
+    } = State0,
+    {{free, Reg} = RegTuple, '&', Mask, '!=', Val}
+) when ?IS_GPR(Reg) ->
+    %% RISC-V: AND with mask, then compare with value
+    OffsetBefore = StreamModule:offset(Stream0),
+    {State1, Reg} = and_(State0, RegTuple, Mask),
+    Stream1 = State1#state.stream,
+    %% Compare Reg with Val and branch if equal (NOT != Val)
+    case Val of
+        0 ->
+            %% Optimize comparison with zero
+            BranchDelta = StreamModule:offset(Stream1) - OffsetBefore,
+            BranchInstr = <<16#FFFFFFFF:32/little>>,
+            Stream2 = StreamModule:append(Stream1, BranchInstr),
+            State2 = State1#state{stream = Stream2},
+            State3 = if_block_free_reg(RegTuple, State2),
+            {State3, {beq, Reg, zero}, BranchDelta};
+        _ when ?IS_GPR(Val) ->
+            %% Val is a register
+            BranchDelta = StreamModule:offset(Stream1) - OffsetBefore,
+            BranchInstr = <<16#FFFFFFFF:32/little>>,
+            Stream2 = StreamModule:append(Stream1, BranchInstr),
+            State2 = State1#state{stream = Stream2},
+            State3 = if_block_free_reg(RegTuple, State2),
+            {State3, {beq, Reg, Val}, BranchDelta};
+        _ ->
+            %% Val is an immediate - need temp register
+            %% Reuse the mask register for the comparison value
+            [MaskReg | AT] = State1#state.available_regs,
+            State2 = mov_immediate(State1#state{available_regs = AT}, MaskReg, Val),
+            Stream2 = State2#state.stream,
+            BranchDelta = StreamModule:offset(Stream2) - OffsetBefore,
+            BranchInstr = <<16#FFFFFFFF:32/little>>,
+            Stream3 = StreamModule:append(Stream2, BranchInstr),
+            State3 = State2#state{stream = Stream3, available_regs = AvailRegs},
+            State4 = if_block_free_reg(RegTuple, State3),
+            {State4, {beq, Reg, MaskReg}, BranchDelta}
+    end.
+
+-spec if_block_free_reg(riscv32_register() | {free, riscv32_register()}, state()) -> state().
+if_block_free_reg({free, Reg}, State0) ->
+    #state{available_regs = AvR0, used_regs = UR0} = State0,
+    {AvR1, UR1} = free_reg(AvR0, UR0, Reg),
+    State0#state{
+        available_regs = AvR1,
+        used_regs = UR1
+    };
+if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) ->
+    State0.
+
+-spec merge_used_regs(state(), [riscv32_register()]) -> state().
+merge_used_regs(#state{used_regs = UR0, available_regs = AvR0} = State, [
+    Reg | T
+]) ->
+    case lists:member(Reg, UR0) of
+        true ->
+            merge_used_regs(State, T);
+        false ->
+            AvR1 = lists:delete(Reg, AvR0),
+            UR1 = [Reg | UR0],
+            merge_used_regs(
+                State#state{used_regs = UR1, available_regs = AvR1}, T
+            )
+    end;
+merge_used_regs(State, []) ->
+    State.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a shift register right by a fixed number of bits, effectively
+%% dividing it by 2^Shift
+%% @param State current state
+%% @param Reg register to shift
+%% @param Shift number of bits to shift
+%% @return new state
+%%-----------------------------------------------------------------------------
+-spec shift_right(#state{}, maybe_free_riscv32_register(), non_neg_integer()) ->
+    {#state{}, riscv32_register()}.
+shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Shift) when
+    ?IS_GPR(Reg) andalso is_integer(Shift)
+->
+    I = jit_riscv32_asm:srli(Reg, Reg, Shift),
+    Stream1 = StreamModule:append(Stream0, I),
+    {State#state{stream = Stream1}, Reg};
+shift_right(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [ResultReg | T],
+        used_regs = UR
+    } = State,
+    Reg,
+    Shift
+) when
+    ?IS_GPR(Reg) andalso is_integer(Shift)
+->
+    I = jit_riscv32_asm:srli(ResultReg, Reg, Shift),
+    Stream1 = StreamModule:append(Stream0, I),
+    {State#state{stream = Stream1, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a shift register left by a fixed number of bits, effectively
+%% multiplying it by 2^Shift
+%% @param State current state
+%% @param Reg register to shift
+%% @param Shift number of bits to shift
+%% @return new state
+%%-----------------------------------------------------------------------------
+shift_left(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when
+    is_atom(Reg)
+->
+    I = jit_riscv32_asm:slli(Reg, Reg, Shift),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a call to a function pointer with arguments. This function converts
+%% arguments and passes them following the backend ABI convention.
+%% @end
+%% @param State current backend state
+%% @param FuncPtrTuple either {free, Reg} or {primitive, PrimitiveIndex}
+%% @param Args arguments to pass to the function
+%% @return Updated backend state and return register
+%%-----------------------------------------------------------------------------
+-spec call_func_ptr(state(), {free, riscv32_register()} | {primitive, non_neg_integer()}, [arg()]) ->
+    {state(), riscv32_register()}.
+call_func_ptr(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = AvailableRegs0,
+        used_regs = UsedRegs0
+    } = State0,
+    FuncPtrTuple,
+    Args
+) ->
+    FreeRegs = lists:flatmap(
+        fun
+            ({free, {ptr, Reg}}) -> [Reg];
+            ({free, Reg}) when is_atom(Reg) -> [Reg];
+            (_) -> []
+        end,
+        [FuncPtrTuple | Args]
+    ),
+    UsedRegs1 = UsedRegs0 -- FreeRegs,
+    % Save RA (like AArch64 saves LR) so it's preserved across jalr calls
+    SavedRegs = [?RA_REG, ?CTX_REG, ?JITSTATE_REG, ?NATIVE_INTERFACE_REG | UsedRegs1],
+
+    % Calculate available registers
+    FreeGPRegs = FreeRegs -- (FreeRegs -- ?AVAILABLE_REGS),
+    AvailableRegs1 = FreeGPRegs ++ AvailableRegs0,
+
+    % Calculate stack space: round up to 16-byte boundary for RISC-V ABI
+    NumRegs = length(SavedRegs),
+    StackBytes = NumRegs * 4,
+    AlignedStackBytes = ((StackBytes + 15) div 16) * 16,
+
+    Stream1 = push_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream0),
+
+    % Set up arguments following RISC-V ILP32 calling convention
+    % Arguments are passed in a0-a7 (up to 8 register arguments)
+    Args1 = lists:map(
+        fun(Arg) ->
+            case Arg of
+                offset -> StreamModule:offset(Stream1);
+                _ -> Arg
+            end
+        end,
+        Args
+    ),
+
+    RegArgs0 = Args1,
+    RegArgsRegs = lists:flatmap(fun arg_to_reg_list/1, RegArgs0),
+
+    % We pushed registers to stack, so we can use these registers we saved
+    % and the currently available registers
+    SetArgsRegsOnlyAvailableArgs = (UsedRegs1 -- RegArgsRegs) ++ AvailableRegs0,
+    State1 = State0#state{
+        available_regs = SetArgsRegsOnlyAvailableArgs,
+        used_regs = ?AVAILABLE_REGS -- SetArgsRegsOnlyAvailableArgs,
+        stream = Stream1
+    },
+
+    ParameterRegs = parameter_regs(RegArgs0),
+    {Stream3, SetArgsAvailableRegs, FuncPtrReg, RegArgs} =
+        case FuncPtrTuple of
+            {free, FuncPtrReg0} ->
+                % If FuncPtrReg is in parameter regs, we must swap it with a free reg.
+                case lists:member(FuncPtrReg0, ParameterRegs) of
+                    true ->
+                        case SetArgsRegsOnlyAvailableArgs -- ParameterRegs of
+                            [] ->
+                                % Swap SetArgsRegsOnlyAvailableArgs with a reg used in RegArgs0
+                                % that is not in ParameterRegs
+                                [NewArgReg | _] = SetArgsRegsOnlyAvailableArgs,
+                                [FuncPtrReg1 | _] = RegArgsRegs -- ParameterRegs,
+                                MovInstr1 = jit_riscv32_asm:mv(NewArgReg, FuncPtrReg1),
+                                MovInstr2 = jit_riscv32_asm:mv(FuncPtrReg1, FuncPtrReg0),
+                                SetArgsAvailableArgs1 =
+                                    (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++
+                                        [FuncPtrReg0],
+                                RegArgs1 = replace_reg(RegArgs0, FuncPtrReg1, NewArgReg),
+                                {
+                                    StreamModule:append(
+                                        State1#state.stream, <<MovInstr1/binary, MovInstr2/binary>>
+                                    ),
+                                    SetArgsAvailableArgs1,
+                                    FuncPtrReg1,
+                                    RegArgs1
+                                };
+                            [FuncPtrReg1 | _] ->
+                                MovInstr = jit_riscv32_asm:mv(FuncPtrReg1, FuncPtrReg0),
+                                SetArgsAvailableArgs1 =
+                                    (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++
+                                        [FuncPtrReg0],
+                                {
+                                    StreamModule:append(State1#state.stream, MovInstr),
+                                    SetArgsAvailableArgs1,
+                                    FuncPtrReg1,
+                                    RegArgs0
+                                }
+                        end;
+                    false ->
+                        SetArgsAvailableArgs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0],
+                        {State1#state.stream, SetArgsAvailableArgs1, FuncPtrReg0, RegArgs0}
+                end;
+            {primitive, Primitive} ->
+                [FuncPtrReg0 | _] = SetArgsRegsOnlyAvailableArgs -- ParameterRegs,
+                SetArgsAvailableRegs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0],
+                PrepCall = load_primitive_ptr(Primitive, FuncPtrReg0),
+                Stream2 = StreamModule:append(State1#state.stream, PrepCall),
+                {Stream2, SetArgsAvailableRegs1, FuncPtrReg0, RegArgs0}
+        end,
+
+    State3 = State1#state{
+        available_regs = SetArgsAvailableRegs,
+        used_regs = ?AVAILABLE_REGS -- SetArgsAvailableRegs,
+        stream = Stream3
+    },
+
+    StackOffset = AlignedStackBytes,
+    State4 = set_registers_args(State3, RegArgs, ParameterRegs, StackOffset),
+    Stream4 = State4#state.stream,
+
+    % Call the function pointer (using JALR for call with return)
+    Call = jit_riscv32_asm:jalr(ra, FuncPtrReg, 0),
+    Stream5 = StreamModule:append(Stream4, Call),
+
+    % For result, we need a free register (including FuncPtrReg).
+    % If none are available (all registers were pushed to the stack),
+    % we write the result to the stack position of FuncPtrReg
+    {Stream6, UsedRegs2} =
+        case length(SavedRegs) of
+            N when N >= 7 andalso element(1, FuncPtrTuple) =:= free ->
+                % We use original FuncPtrReg then as we know it's available.
+                % Calculate stack offset: find register index in SavedRegs * 4 bytes
+                ResultReg = element(2, FuncPtrTuple),
+                RegIndex = index_of(ResultReg, SavedRegs),
+                case RegIndex >= 0 of
+                    true ->
+                        StoreResultStackOffset = RegIndex * 4,
+                        StoreResult = jit_riscv32_asm:sw(sp, a0, StoreResultStackOffset),
+                        {StreamModule:append(Stream5, StoreResult), [ResultReg | UsedRegs1]};
+                    false ->
+                        % FuncPtrReg was not in SavedRegs, use an available register
+                        [ResultReg1 | _] = AvailableRegs1 -- SavedRegs,
+                        MoveResult = jit_riscv32_asm:mv(ResultReg1, a0),
+                        {StreamModule:append(Stream5, MoveResult), [ResultReg1 | UsedRegs1]}
+                end;
+            _ ->
+                % Use any free that is not in SavedRegs
+                [ResultReg | _] = AvailableRegs1 -- SavedRegs,
+                MoveResult = jit_riscv32_asm:mv(ResultReg, a0),
+                {StreamModule:append(Stream5, MoveResult), [ResultReg | UsedRegs1]}
+        end,
+
+    Stream8 = pop_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream6),
+
+    AvailableRegs2 = lists:delete(ResultReg, AvailableRegs1),
+    AvailableRegs3 = ?AVAILABLE_REGS -- (?AVAILABLE_REGS -- AvailableRegs2),
+    {
+        State4#state{
+            stream = Stream8,
+            available_regs = AvailableRegs3,
+            used_regs = UsedRegs2
+        },
+        ResultReg
+    }.
+
+arg_to_reg_list({free, {ptr, Reg}}) -> [Reg];
+arg_to_reg_list({free, Reg}) when is_atom(Reg) -> [Reg];
+arg_to_reg_list(Reg) when is_atom(Reg) -> [Reg];
+arg_to_reg_list(_) -> [].
+
+index_of(Item, List) -> index_of(Item, List, 0).
+
+index_of(_, [], _) -> -1;
+index_of(Item, [Item | _], Index) -> Index;
+index_of(Item, [_ | Rest], Index) -> index_of(Item, Rest, Index + 1).
+
+push_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream0) when length(SavedRegs) > 0 ->
+    % RISC-V: addi sp, sp, -AlignedStackBytes then sw reg, offset(sp) for each reg
+    StackAdjust = jit_riscv32_asm:addi(sp, sp, -AlignedStackBytes),
+    Stream1 = StreamModule:append(Stream0, StackAdjust),
+    {Stream2, _} = lists:foldl(
+        fun(Reg, {StreamAcc, Offset}) ->
+            Store = jit_riscv32_asm:sw(sp, Reg, Offset),
+            {StreamModule:append(StreamAcc, Store), Offset + 4}
+        end,
+        {Stream1, 0},
+        SavedRegs
+    ),
+    Stream2;
+push_registers([], _AlignedStackBytes, _StreamModule, Stream0) ->
+    Stream0.
+
+pop_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream0) when length(SavedRegs) > 0 ->
+    % RISC-V: lw reg, offset(sp) for each reg then addi sp, sp, AlignedStackBytes
+    {Stream1, _} = lists:foldl(
+        fun(Reg, {StreamAcc, Offset}) ->
+            Load = jit_riscv32_asm:lw(Reg, sp, Offset),
+            {StreamModule:append(StreamAcc, Load), Offset + 4}
+        end,
+        {Stream0, 0},
+        SavedRegs
+    ),
+    StackAdjust = jit_riscv32_asm:addi(sp, sp, AlignedStackBytes),
+    StreamModule:append(Stream1, StackAdjust);
+pop_registers([], _AlignedStackBytes, _StreamModule, Stream0) ->
+    Stream0.
+
+set_registers_args(State0, Args, StackOffset) ->
+    ParamRegs = parameter_regs(Args),
+    set_registers_args(State0, Args, ParamRegs, StackOffset).
+
+set_registers_args(
+    #state{used_regs = UsedRegs} = State0,
+    Args,
+    ParamRegs,
+    StackOffset
+) ->
+    ArgsRegs = args_regs(Args),
+    AvailableScratchGP = ((?SCRATCH_REGS -- ParamRegs) -- ArgsRegs) -- UsedRegs,
+    State1 = set_registers_args0(
+        State0, Args, ArgsRegs, ParamRegs, AvailableScratchGP, StackOffset
+    ),
+    Stream1 = State1#state.stream,
+    NewUsedRegs = lists:foldl(
+        fun
+            ({free, {ptr, Reg}}, AccUsed) -> lists:delete(Reg, AccUsed);
+            ({free, Reg}, AccUsed) -> lists:delete(Reg, AccUsed);
+            (_, AccUsed) -> AccUsed
+        end,
+        UsedRegs,
+        Args
+    ),
+    State1#state{
+        stream = Stream1,
+        available_regs = ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs,
+        used_regs = ParamRegs ++ (NewUsedRegs -- ParamRegs)
+    }.
+
+parameter_regs(Args) ->
+    parameter_regs0(Args, ?PARAMETER_REGS, []).
+
+% ILP32: 64-bit arguments require double-word alignment (even register number)
+parameter_regs0([], _, Acc) ->
+    lists:reverse(Acc);
+parameter_regs0([{avm_int64_t, _} | T], [a0, a1 | Rest], Acc) ->
+    parameter_regs0(T, Rest, [a1, a0 | Acc]);
+parameter_regs0([{avm_int64_t, _} | T], [a1, a2 | Rest], Acc) ->
+    parameter_regs0(T, Rest, [a2, a1 | Acc]);
+parameter_regs0([{avm_int64_t, _} | T], [a2, a3 | Rest], Acc) ->
+    parameter_regs0(T, Rest, [a3, a2 | Acc]);
+parameter_regs0([_Other | T], [Reg | Rest], Acc) ->
+    parameter_regs0(T, Rest, [Reg | Acc]).
+
+replace_reg(Args, Reg1, Reg2) ->
+    replace_reg0(Args, Reg1, Reg2, []).
+
+replace_reg0([Reg | T], Reg, Replacement, Acc) ->
+    lists:reverse(Acc, [Replacement | T]);
+replace_reg0([{free, Reg} | T], Reg, Replacement, Acc) ->
+    lists:reverse(Acc, [Replacement | T]);
+replace_reg0([Other | T], Reg, Replacement, Acc) ->
+    replace_reg0(T, Reg, Replacement, [Other | Acc]).
+
+set_registers_args0(State, [], [], [], _AvailGP, _StackOffset) ->
+    State;
+set_registers_args0(State, [{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset) ->
+    set_registers_args0(State, [FreeVal | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset);
+set_registers_args0(
+    State, [ctx | ArgsT], [?CTX_REG | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset
+) ->
+    set_registers_args0(State, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset);
+% Handle 64-bit arguments that need two registers according to ILP32
+set_registers_args0(
+    State,
+    [{avm_int64_t, Value} | ArgsT],
+    ArgsRegs,
+    ParamRegs,
+    AvailGP,
+    StackOffset
+) when is_integer(Value) ->
+    LowPartUnsigned = Value band 16#FFFFFFFF,
+    HighPartUnsigned = (Value bsr 32) band 16#FFFFFFFF,
+    % Convert to signed 32-bit values for RISC-V li instruction
+    LowPart =
+        if
+            LowPartUnsigned > 16#7FFFFFFF -> LowPartUnsigned - 16#100000000;
+            true -> LowPartUnsigned
+        end,
+    HighPart =
+        if
+            HighPartUnsigned > 16#7FFFFFFF -> HighPartUnsigned - 16#100000000;
+            true -> HighPartUnsigned
+        end,
+    set_registers_args0(
+        State, [LowPart, HighPart | ArgsT], [imm | ArgsRegs], ParamRegs, AvailGP, StackOffset
+    );
+% ctx is special as we need it to access x_reg/y_reg/fp_reg and we don't
+% want to replace it
+set_registers_args0(
+    State, [Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset
+) ->
+    false = lists:member(?CTX_REG, ArgsRegs),
+    State1 = set_registers_args1(State, Arg, ?CTX_REG, StackOffset),
+    set_registers_args0(State1, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset);
+set_registers_args0(
+    #state{stream_module = StreamModule} = State0,
+    [Arg | ArgsT],
+    [_ArgReg | ArgsRegsT],
+    [ParamReg | ParamRegsT],
+    AvailGP,
+    StackOffset
+) ->
+    case lists:member(ParamReg, ArgsRegsT) of
+        false ->
+            State1 = set_registers_args1(State0, Arg, ParamReg, StackOffset),
+            set_registers_args0(State1, ArgsT, ArgsRegsT, ParamRegsT, AvailGP, StackOffset);
+        true ->
+            [Avail | AvailGPT] = AvailGP,
+            I = jit_riscv32_asm:mv(Avail, ParamReg),
+            Stream1 = StreamModule:append(State0#state.stream, I),
+            State1 = set_registers_args1(
+                State0#state{stream = Stream1}, Arg, ParamReg, StackOffset
+            ),
+            NewArgsT = replace_reg(ArgsT, ParamReg, Avail),
+            set_registers_args0(
+                State1, NewArgsT, ArgsRegsT, ParamRegsT, AvailGPT, StackOffset
+            )
+    end.
+
+set_registers_args1(State, Reg, Reg, _Offset) ->
+    State;
+set_registers_args1(
+    #state{stream_module = StreamModule, stream = Stream0} = State,
+    jit_state,
+    ParamReg,
+    _StackOffset
+) ->
+    % jit_state is always in a1, so we only need to move it if the param reg is different
+    case ParamReg of
+        a1 ->
+            State;
+        _ ->
+            I = jit_riscv32_asm:mv(ParamReg, a1),
+            Stream1 = StreamModule:append(Stream0, I),
+            State#state{stream = Stream1}
+    end;
+% For tail calls, jit_state is already in a1
+set_registers_args1(State, jit_state_tail_call, a1, _StackOffset) ->
+    State;
+set_registers_args1(
+    #state{stream_module = StreamModule, stream = Stream0} = State,
+    {x_reg, extra},
+    Reg,
+    _StackOffset
+) ->
+    {BaseReg, Off} = ?X_REG(?MAX_REG),
+    I = jit_riscv32_asm:lw(Reg, BaseReg, Off),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1};
+set_registers_args1(
+    #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, Reg, _StackOffset
+) ->
+    {XReg, X_REGOffset} = ?X_REG(X),
+    I = jit_riscv32_asm:lw(Reg, XReg, X_REGOffset),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1};
+set_registers_args1(
+    #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Source}, Reg, _StackOffset
+) ->
+    I = jit_riscv32_asm:lw(Reg, Source, 0),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1};
+set_registers_args1(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = AvailRegs} = State,
+    {y_reg, X},
+    Reg,
+    _StackOffset
+) ->
+    Code = ldr_y_reg(Reg, X, AvailRegs),
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1};
+set_registers_args1(
+    #state{stream_module = StreamModule, stream = Stream0} = State, ArgReg, Reg, _StackOffset
+) when
+    ?IS_GPR(ArgReg)
+->
+    I = jit_riscv32_asm:mv(Reg, ArgReg),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1};
+set_registers_args1(State, Value, Reg, _StackOffset) when ?IS_SIGNED_OR_UNSIGNED_INT32_T(Value) ->
+    mov_immediate(State, Reg, Value).
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a move to a vm register (x_reg, y_reg, fpreg or a pointer on x_reg)
+%% from an immediate, a native register or another vm register.
+%% @end
+%% @param State current backend state
+%% @param Src value to move to vm register
+%% @param Dest vm register to move to
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec move_to_vm_register(state(), Src :: value() | vm_register(), Dest :: vm_register()) ->
+    state().
+% Native register to VM register
+move_to_vm_register(State0, Src, {x_reg, extra}) when is_atom(Src) ->
+    {BaseReg, Off} = ?X_REG(?MAX_REG),
+    I1 = jit_riscv32_asm:sw(BaseReg, Src, Off),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State0#state{stream = Stream1};
+move_to_vm_register(State0, Src, {x_reg, X}) when is_atom(Src) ->
+    {BaseReg, Off} = ?X_REG(X),
+    I1 = jit_riscv32_asm:sw(BaseReg, Src, Off),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State0#state{stream = Stream1};
+move_to_vm_register(State0, Src, {ptr, Reg}) when is_atom(Src) ->
+    I1 = jit_riscv32_asm:sw(Reg, Src, 0),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State0#state{stream = Stream1};
+move_to_vm_register(#state{available_regs = [Temp1 | AT]} = State0, Src, {y_reg, Y}) when
+    is_atom(Src)
+->
+    Code = str_y_reg(Src, Y, Temp1, AT),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, Code),
+    State0#state{stream = Stream1};
+% Source is an integer to y_reg (optimized: ldr first, then movs)
+move_to_vm_register(#state{available_regs = [Temp1, Temp2 | AT]} = State0, N, {y_reg, Y}) when
+    is_integer(N), N >= 0, N =< 255
+->
+    I1 = jit_riscv32_asm:li(Temp2, N),
+    YCode = str_y_reg(Temp2, Y, Temp1, AT),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, <<I1/binary, YCode/binary>>),
+    State0#state{stream = Stream1};
+% Source is an integer (0-255 for movs, negative values need different handling)
+move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when
+    is_integer(N), N >= 0, N =< 255
+->
+    I1 = jit_riscv32_asm:li(Temp, N),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest),
+    State1#state{available_regs = AR0};
+%% Handle large values using simple literal pool (branch-over pattern)
+move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when
+    is_integer(N)
+->
+    State1 = mov_immediate(State0#state{available_regs = AT}, Temp, N),
+    State2 = move_to_vm_register(State1, Temp, Dest),
+    State2#state{available_regs = AR0};
+% Source is a VM register
+move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, extra}, Dest) ->
+    {BaseReg, Off} = ?X_REG(?MAX_REG),
+    I1 = jit_riscv32_asm:lw(Temp, BaseReg, Off),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest),
+    State1#state{available_regs = AR0};
+move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, X}, Dest) ->
+    {XReg, X_REGOffset} = ?X_REG(X),
+    I1 = jit_riscv32_asm:lw(Temp, XReg, X_REGOffset),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest),
+    State1#state{available_regs = AR0};
+move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {ptr, Reg}, Dest) ->
+    I1 = jit_riscv32_asm:lw(Temp, Reg, 0),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest),
+    State1#state{available_regs = AR0};
+move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {y_reg, Y}, Dest) ->
+    Code = ldr_y_reg(Temp, Y, AT),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, Code),
+    State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest),
+    State1#state{available_regs = AR0};
+% term_to_float
+move_to_vm_register(
+    #state{
+        stream_module = StreamModule,
+        available_regs = [Temp1, Temp2 | _],
+        stream = Stream0,
+        variant = Variant
+    } =
+        State0,
+    {free, {ptr, Reg, 1}},
+    {fp_reg, F}
+) ->
+    {BaseReg, Off} = ?FP_REGS,
+    I1 = jit_riscv32_asm:lw(Temp1, BaseReg, Off),
+    I2 = jit_riscv32_asm:lw(Temp2, Reg, 4),
+    case Variant band ?JIT_VARIANT_FLOAT32 of
+        0 ->
+            % Double precision: write both 32-bit parts
+            I3 = jit_riscv32_asm:sw(Temp1, Temp2, F * 8),
+            I4 = jit_riscv32_asm:lw(Temp2, Reg, 8),
+            I5 = jit_riscv32_asm:sw(Temp1, Temp2, F * 8 + 4),
+            Code = <<I1/binary, I2/binary, I3/binary, I4/binary, I5/binary>>;
+        _ ->
+            % Single precision: write only first 32-bit part
+            I3 = jit_riscv32_asm:sw(Temp1, Temp2, F * 4),
+            Code = <<I1/binary, I2/binary, I3/binary>>
+    end,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State1 = free_native_register(State0, Reg),
+    State1#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a move of an array element (reg[x]) to a vm or a native register.
+%% @end
+%% @param State current backend state
+%% @param Reg base register of the array
+%% @param Index index in the array, as an integer or a native register
+%% @param Dest vm or native register to move to
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec move_array_element(
+    state(),
+    riscv32_register(),
+    non_neg_integer() | riscv32_register(),
+    vm_register() | riscv32_register()
+) -> state().
+move_array_element(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State,
+    Reg,
+    Index,
+    {x_reg, X}
+) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) ->
+    I1 = jit_riscv32_asm:lw(Temp, Reg, Index * 4),
+    {BaseReg, Off} = ?X_REG(X),
+    I2 = jit_riscv32_asm:sw(BaseReg, Temp, Off),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+move_array_element(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State,
+    Reg,
+    Index,
+    {ptr, Dest}
+) when is_atom(Reg) andalso is_integer(Index) ->
+    I1 = jit_riscv32_asm:lw(Temp, Reg, Index * 4),
+    I2 = jit_riscv32_asm:sw(Dest, Temp, 0),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+move_array_element(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | AT]} =
+        State,
+    Reg,
+    Index,
+    {y_reg, Y}
+) when is_atom(Reg) andalso is_integer(Index) ->
+    I1 = jit_riscv32_asm:lw(Temp2, Reg, Index * 4),
+    YCode = str_y_reg(Temp2, Y, Temp1, AT),
+    Code = <<I1/binary, YCode/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1};
+move_array_element(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | AT]} =
+        State,
+    {free, Reg},
+    Index,
+    {y_reg, Y}
+) when is_integer(Index) ->
+    I1 = jit_riscv32_asm:lw(Reg, Reg, Index * 4),
+    YCode = str_y_reg(Reg, Y, Temp, AT),
+    Code = <<I1/binary, YCode/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1};
+move_array_element(
+    #state{stream_module = StreamModule, stream = Stream0} = State, Reg, Index, Dest
+) when is_atom(Dest) andalso is_integer(Index) ->
+    I1 = jit_riscv32_asm:lw(Dest, Reg, Index * 4),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1};
+move_array_element(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = AvailableRegs0,
+        used_regs = UsedRegs0
+    } = State,
+    Reg,
+    {free, IndexReg},
+    {x_reg, X}
+) when X < ?MAX_REG andalso is_atom(IndexReg) ->
+    I1 = jit_riscv32_asm:slli(IndexReg, IndexReg, 2),
+    I2 = jit_riscv32_asm:add(IndexReg, Reg, IndexReg),
+    I3 = jit_riscv32_asm:lw(IndexReg, IndexReg, 0),
+    {BaseReg, Off} = ?X_REG(X),
+    I4 = jit_riscv32_asm:sw(BaseReg, IndexReg, Off),
+    {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary, I4/binary>>),
+    State#state{
+        available_regs = AvailableRegs1,
+        used_regs = UsedRegs1,
+        stream = Stream1
+    };
+move_array_element(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = AvailableRegs0,
+        used_regs = UsedRegs0
+    } = State,
+    Reg,
+    {free, IndexReg},
+    {ptr, PtrReg}
+) when is_atom(IndexReg) ->
+    I1 = jit_riscv32_asm:slli(IndexReg, IndexReg, 2),
+    I2 = jit_riscv32_asm:add(IndexReg, Reg, IndexReg),
+    I3 = jit_riscv32_asm:lw(IndexReg, IndexReg, 0),
+    I4 = jit_riscv32_asm:sw(PtrReg, IndexReg, 0),
+    {AvailableRegs1, UsedRegs1} = free_reg(
+        AvailableRegs0, UsedRegs0, IndexReg
+    ),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary, I4/binary>>),
+    State#state{
+        available_regs = AvailableRegs1,
+        used_regs = UsedRegs1,
+        stream = Stream1
+    };
+move_array_element(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | AT] = AvailableRegs0,
+        used_regs = UsedRegs0
+    } = State,
+    Reg,
+    {free, IndexReg},
+    {y_reg, Y}
+) when is_atom(IndexReg) ->
+    I1 = jit_riscv32_asm:slli(IndexReg, IndexReg, 2),
+    I2 = jit_riscv32_asm:add(IndexReg, Reg, IndexReg),
+    I3 = jit_riscv32_asm:lw(IndexReg, IndexReg, 0),
+    Code = str_y_reg(IndexReg, Y, Temp, AT),
+    I4 = Code,
+    {AvailableRegs1, UsedRegs1} = free_reg(
+        AvailableRegs0, UsedRegs0, IndexReg
+    ),
+    Stream1 = StreamModule:append(
+        Stream0, <<I1/binary, I2/binary, I3/binary, I4/binary>>
+    ),
+    State#state{
+        available_regs = AvailableRegs1,
+        used_regs = UsedRegs1,
+        stream = Stream1
+    }.
+
+%% @doc move reg[x] to a vm or native register
+-spec get_array_element(
+    state(), riscv32_register() | {free, riscv32_register()}, non_neg_integer()
+) ->
+    {state(), riscv32_register()}.
+get_array_element(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0
+    } = State,
+    {free, Reg},
+    Index
+) ->
+    I1 = jit_riscv32_asm:lw(Reg, Reg, Index * 4),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary>>),
+    {State#state{stream = Stream1}, Reg};
+get_array_element(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [ElemReg | AvailableT],
+        used_regs = UsedRegs0
+    } = State,
+    Reg,
+    Index
+) ->
+    I1 = jit_riscv32_asm:lw(ElemReg, Reg, Index * 4),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary>>),
+    {
+        State#state{
+            stream = Stream1, available_regs = AvailableT, used_regs = [ElemReg | UsedRegs0]
+        },
+        ElemReg
+    }.
+
+%% @doc move an integer, a vm or native register to reg[x]
+-spec move_to_array_element(
+    state(), integer() | vm_register() | riscv32_register(), riscv32_register(), non_neg_integer()
+) -> state().
+move_to_array_element(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    ValueReg,
+    Reg,
+    Index
+) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) ->
+    I1 = jit_riscv32_asm:sw(Reg, ValueReg, Index * 4),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State0#state{stream = Stream1};
+move_to_array_element(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0,
+    ValueReg,
+    Reg,
+    IndexReg
+) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) ->
+    I1 = jit_riscv32_asm:mv(Temp, IndexReg),
+    I2 = jit_riscv32_asm:slli(Temp, Temp, 2),
+    I3 = jit_riscv32_asm:add(Temp, Reg, Temp),
+    I4 = jit_riscv32_asm:sw(Temp, ValueReg, 0),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary, I4/binary>>),
+    State0#state{stream = Stream1};
+move_to_array_element(
+    State0,
+    Value,
+    Reg,
+    Index
+) ->
+    {State1, Temp} = copy_to_native_register(State0, Value),
+    State2 = move_to_array_element(State1, Temp, Reg, Index),
+    free_native_register(State2, Temp).
+
+move_to_array_element(
+    State,
+    Value,
+    BaseReg,
+    IndexReg,
+    Offset
+) when is_integer(IndexReg) andalso is_integer(Offset) andalso Offset div 8 =:= 0 ->
+    move_to_array_element(State, Value, BaseReg, IndexReg + (Offset div 8));
+move_to_array_element(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State,
+    ValueReg,
+    BaseReg,
+    IndexReg,
+    Offset
+) when ?IS_GPR(ValueReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) ->
+    I1 = jit_riscv32_asm:addi(Temp, IndexReg, Offset),
+    I2 = jit_riscv32_asm:slli(Temp, Temp, 2),
+    I3 = jit_riscv32_asm:add(Temp, BaseReg, Temp),
+    I4 = jit_riscv32_asm:sw(Temp, ValueReg, 0),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary, I4/binary>>),
+    State#state{stream = Stream1};
+move_to_array_element(
+    State0,
+    Value,
+    BaseReg,
+    IndexReg,
+    Offset
+) ->
+    {State1, ValueReg} = copy_to_native_register(State0, Value),
+    [Temp | _] = State1#state.available_regs,
+    I1 = jit_riscv32_asm:addi(Temp, IndexReg, Offset),
+    I2 = jit_riscv32_asm:slli(Temp, Temp, 2),
+    I3 = jit_riscv32_asm:add(Temp, BaseReg, Temp),
+    I4 = jit_riscv32_asm:sw(Temp, ValueReg, 0),
+    Stream1 = (State1#state.stream_module):append(
+        State1#state.stream, <<I1/binary, I2/binary, I3/binary, I4/binary>>
+    ),
+    State2 = State1#state{stream = Stream1},
+    free_native_register(State2, ValueReg).
+
+-spec move_to_native_register(state(), value() | cp) -> {state(), riscv32_register()}.
+move_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Reg | AvailT],
+        used_regs = Used
+    } = State,
+    cp
+) ->
+    {BaseReg, Off} = ?CP,
+    I1 = jit_riscv32_asm:lw(Reg, BaseReg, Off),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg};
+move_to_native_register(State, Reg) when is_atom(Reg) ->
+    {State, Reg};
+move_to_native_register(
+    #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg}
+) when is_atom(Reg) ->
+    I1 = jit_riscv32_asm:lw(Reg, Reg, 0),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1}, Reg};
+move_to_native_register(
+    #state{
+        available_regs = [Reg | AvailT],
+        used_regs = Used
+    } = State0,
+    Imm
+) when
+    is_integer(Imm)
+->
+    State1 = State0#state{used_regs = [Reg | Used], available_regs = AvailT},
+    {move_to_native_register(State1, Imm, Reg), Reg};
+move_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Reg | AvailT],
+        used_regs = Used
+    } = State,
+    {x_reg, extra}
+) ->
+    {BaseReg, Off} = ?X_REG(?MAX_REG),
+    I1 = jit_riscv32_asm:lw(Reg, BaseReg, Off),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg};
+move_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Reg | AvailT],
+        used_regs = Used
+    } = State,
+    {x_reg, X}
+) when
+    X < ?MAX_REG
+->
+    {BaseReg, Offset} = ?X_REG(X),
+    I1 = jit_riscv32_asm:lw(Reg, BaseReg, Offset),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg};
+move_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Reg | AvailT],
+        used_regs = Used
+    } = State,
+    {y_reg, Y}
+) ->
+    Code = ldr_y_reg(Reg, Y, AvailT),
+    Stream1 = StreamModule:append(Stream0, Code),
+    {State#state{stream = Stream1, available_regs = AvailT, used_regs = [Reg | Used]}, Reg};
+move_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [RegA, RegB | AvailT],
+        used_regs = Used
+    } = State,
+    {fp_reg, F}
+) ->
+    {BaseReg, Off} = ?FP_REGS,
+    I1 = jit_riscv32_asm:lw(RegB, BaseReg, Off),
+    I2 = jit_riscv32_asm:lw(RegA, RegB, F * 8),
+    I3 = jit_riscv32_asm:lw(RegB, RegB, F * 8 + 4),
+    Code = <<I1/binary, I2/binary, I3/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    {
+        State#state{stream = Stream1, available_regs = AvailT, used_regs = [RegB, RegA | Used]},
+        {fp, RegA, RegB}
+    }.
+
+-spec move_to_native_register(state(), value(), riscv32_register()) -> state().
+move_to_native_register(
+    #state{stream_module = StreamModule, stream = Stream0} = State, RegSrc, RegDst
+) when is_atom(RegSrc) ->
+    I = jit_riscv32_asm:mv(RegDst, RegSrc),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1};
+move_to_native_register(State, ValSrc, RegDst) when is_integer(ValSrc) ->
+    mov_immediate(State, RegDst, ValSrc);
+move_to_native_register(
+    #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg}, RegDst
+) when ?IS_GPR(Reg) ->
+    I1 = jit_riscv32_asm:lw(RegDst, Reg, 0),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1};
+move_to_native_register(
+    #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, extra}, RegDst
+) ->
+    {BaseReg, Off} = ?X_REG(?MAX_REG),
+    I1 = jit_riscv32_asm:lw(RegDst, BaseReg, Off),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1};
+move_to_native_register(
+    #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, RegDst
+) when
+    X < ?MAX_REG
+->
+    {XReg, X_REGOffset} = ?X_REG(X),
+    I1 = jit_riscv32_asm:lw(RegDst, XReg, X_REGOffset),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1};
+move_to_native_register(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = AT} = State,
+    {y_reg, Y},
+    RegDst
+) ->
+    Code = ldr_y_reg(RegDst, Y, AT),
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1};
+move_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0
+    } = State,
+    {fp_reg, F},
+    {fp, RegA, RegB}
+) ->
+    {BaseReg, Off} = ?FP_REGS,
+    I1 = jit_riscv32_asm:lw(RegB, BaseReg, Off),
+    I2 = jit_riscv32_asm:lw(RegA, RegB, F * 8),
+    I3 = jit_riscv32_asm:lw(RegB, RegB, F * 8 + 4),
+    Code = <<I1/binary, I2/binary, I3/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1}.
+
+-spec copy_to_native_register(state(), value()) -> {state(), riscv32_register()}.
+copy_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [SaveReg | AvailT],
+        used_regs = Used
+    } = State,
+    Reg
+) when is_atom(Reg) ->
+    I1 = jit_riscv32_asm:mv(SaveReg, Reg),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg};
+copy_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [SaveReg | AvailT],
+        used_regs = Used
+    } = State,
+    {ptr, Reg}
+) when is_atom(Reg) ->
+    I1 = jit_riscv32_asm:lw(SaveReg, Reg, 0),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg};
+copy_to_native_register(State, Reg) ->
+    move_to_native_register(State, Reg).
+
+move_to_cp(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | AvailT]} = State,
+    {y_reg, Y}
+) ->
+    I1 = ldr_y_reg(Reg, Y, AvailT),
+    {BaseReg, Off} = ?CP,
+    I2 = jit_riscv32_asm:sw(BaseReg, Reg, Off),
+    Code = <<I1/binary, I2/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1}.
+
+increment_sp(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | _]} = State,
+    Offset
+) ->
+    {BaseReg1, Off1} = ?Y_REGS,
+    I1 = jit_riscv32_asm:lw(Reg, BaseReg1, Off1),
+    I2 = jit_riscv32_asm:addi(Reg, Reg, Offset * 4),
+    {BaseReg2, Off2} = ?Y_REGS,
+    I3 = jit_riscv32_asm:sw(BaseReg2, Reg, Off2),
+    Code = <<I1/binary, I2/binary, I3/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1}.
+
+set_continuation_to_label(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _],
+        branches = Branches,
+        labels = Labels
+    } = State,
+    Label
+) ->
+    Offset = StreamModule:offset(Stream0),
+    case lists:keyfind(Label, 1, Labels) of
+        {Label, LabelOffset} ->
+            % Label is already known, emit direct pc-relative address without relocation
+            Rel = LabelOffset - Offset,
+            I1 = pc_relative_address(Temp, Rel),
+            I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET),
+            Code = <<I1/binary, I2/binary>>,
+            Stream1 = StreamModule:append(Stream0, Code),
+            State#state{stream = Stream1};
+        false ->
+            % Label not yet known, emit placeholder and add relocation
+            % Reserve 8 bytes (2 x 32-bit instructions) with all-1s placeholder for flash programming
+            % The relocation will replace these with the correct offset
+            I1 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>,
+            Reloc = {Label, Offset, {adr, Temp}},
+            I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET),
+            Code = <<I1/binary, I2/binary>>,
+            Stream1 = StreamModule:append(Stream0, Code),
+            State#state{stream = Stream1, branches = [Reloc | Branches]}
+    end.
+
+%% @doc Set the contination to a given offset
+%% Return a reference so the offset will be updated with update_branches
+%% This is only used with OP_WAIT_TIMEOUT and the offset is after the current
+%% code and not too far, so on Thumb we can use adr instruction.
+set_continuation_to_offset(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _],
+        branches = Branches
+    } = State
+) ->
+    OffsetRef = make_ref(),
+    Offset = StreamModule:offset(Stream0),
+    % Reserve 8 bytes with all-1s placeholder for flash programming
+    I1 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>,
+    Reloc = {OffsetRef, Offset, {adr, Temp}},
+    % Store continuation (jit_state is in a1)
+    I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET),
+    Code = <<I1/binary, I2/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    {State#state{stream = Stream1, branches = [Reloc | Branches]}, OffsetRef}.
+
+%% @doc Implement a continuation entry point.
+-spec continuation_entry_point(#state{}) -> #state{}.
+continuation_entry_point(State) ->
+    State.
+
+get_module_index(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Reg | AvailableT],
+        used_regs = UsedRegs0
+    } = State
+) ->
+    % Load module from jit_state (which is in a1)
+    I1 = jit_riscv32_asm:lw(Reg, ?JITSTATE_REG, ?JITSTATE_MODULE_OFFSET),
+    I2 = jit_riscv32_asm:lw(Reg, Reg, 0),
+    Code = <<I1/binary, I2/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    {
+        State#state{
+            stream = Stream1,
+            available_regs = AvailableT,
+            used_regs = [Reg | UsedRegs0]
+        },
+        Reg
+    }.
+
+%% @doc Perform an AND of a register with an immediate.
+%% JIT currentl calls this with two values: ?TERM_PRIMARY_CLEAR_MASK (-4) to
+%% clear bits and ?TERM_BOXED_TAG_MASK (0x3F). We can avoid any literal pool
+%% by using BICS for -4.
+and_(#state{stream_module = StreamModule, stream = Stream0} = State0, {free, Reg}, 16#FFFFFF) ->
+    I1 = jit_riscv32_asm:slli(Reg, Reg, 8),
+    I2 = jit_riscv32_asm:srli(Reg, Reg, 8),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    {State0#state{stream = Stream1}, Reg};
+and_(
+    #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0,
+    {free, Reg},
+    Val
+) when Val < 0 andalso Val >= -256 ->
+    State1 = mov_immediate(State0#state{available_regs = AT}, Temp, bnot (Val)),
+    Stream1 = State1#state.stream,
+    % RISC-V doesn't have bics, use not + and
+    I1 = jit_riscv32_asm:not_(Temp, Temp),
+    I2 = jit_riscv32_asm:and_(Reg, Reg, Temp),
+    Stream2 = StreamModule:append(Stream1, <<I1/binary, I2/binary>>),
+    {State1#state{available_regs = [Temp | AT], stream = Stream2}, Reg};
+and_(
+    #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0,
+    {free, Reg},
+    Val
+) ->
+    State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val),
+    Stream1 = State1#state.stream,
+    I = jit_riscv32_asm:and_(Reg, Reg, Temp),
+    Stream2 = StreamModule:append(Stream1, I),
+    {State1#state{available_regs = [Temp | AT], stream = Stream2}, Reg};
+and_(
+    #state{stream_module = StreamModule, available_regs = []} = State0,
+    {free, Reg},
+    Val
+) when Val < 0 andalso Val >= -256 ->
+    % No available registers, use a0 as temp and save it to t3
+    Stream0 = State0#state.stream,
+    % Save a0 to t3
+    Save = jit_riscv32_asm:mv(?IP_REG, a0),
+    Stream1 = StreamModule:append(Stream0, Save),
+    % Load immediate value into a0
+    State1 = mov_immediate(State0#state{stream = Stream1}, a0, bnot (Val)),
+    Stream2 = State1#state.stream,
+    % Perform BICS operation (RISC-V: not + and)
+    I1 = jit_riscv32_asm:not_(a0, a0),
+    I2 = jit_riscv32_asm:and_(Reg, Reg, a0),
+    Stream3 = StreamModule:append(Stream2, <<I1/binary, I2/binary>>),
+    % Restore a0 from t3
+    Restore = jit_riscv32_asm:mv(a0, ?IP_REG),
+    Stream4 = StreamModule:append(Stream3, Restore),
+    {State0#state{stream = Stream4}, Reg};
+and_(
+    #state{stream_module = StreamModule, available_regs = []} = State0,
+    {free, Reg},
+    Val
+) ->
+    % No available registers, use a0 as temp and save it to t3
+    Stream0 = State0#state.stream,
+    % Save a0 to t3
+    Save = jit_riscv32_asm:mv(?IP_REG, a0),
+    Stream1 = StreamModule:append(Stream0, Save),
+    % Load immediate value into a0
+    State1 = mov_immediate(State0#state{stream = Stream1}, a0, Val),
+    Stream2 = State1#state.stream,
+    % Perform ANDS operation
+    I = jit_riscv32_asm:and_(Reg, Reg, a0),
+    Stream3 = StreamModule:append(Stream2, I),
+    % Restore a0 from t3
+    Restore = jit_riscv32_asm:mv(a0, ?IP_REG),
+    Stream4 = StreamModule:append(Stream3, Restore),
+    {State0#state{stream = Stream4}, Reg};
+and_(
+    #state{stream_module = StreamModule, available_regs = [ResultReg | AT], used_regs = UR} =
+        State0,
+    Reg,
+    ?TERM_PRIMARY_CLEAR_MASK
+) ->
+    I = jit_riscv32_asm:andi(ResultReg, Reg, -4),
+    Stream1 = StreamModule:append(State0#state.stream, I),
+    {State0#state{stream = Stream1, available_regs = AT, used_regs = [ResultReg | UR]}, ResultReg}.
+
+or_(
+    #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0,
+    Reg,
+    Val
+) ->
+    State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val),
+    Stream1 = State1#state.stream,
+    I = jit_riscv32_asm:or_(Reg, Temp),
+    Stream2 = StreamModule:append(Stream1, I),
+    State1#state{available_regs = [Temp | AT], stream = Stream2}.
+
+add(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, Val) when
+    Val >= 0 andalso Val =< 255
+->
+    I = jit_riscv32_asm:addi(Reg, Reg, Val),
+    Stream1 = StreamModule:append(Stream0, I),
+    State0#state{stream = Stream1};
+add(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, Val) when
+    is_atom(Val)
+->
+    I = jit_riscv32_asm:add(Reg, Reg, Val),
+    Stream1 = StreamModule:append(Stream0, I),
+    State0#state{stream = Stream1};
+add(#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val) ->
+    State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val),
+    Stream1 = State1#state.stream,
+    I = jit_riscv32_asm:add(Reg, Reg, Temp),
+    Stream2 = StreamModule:append(Stream1, I),
+    State1#state{available_regs = [Temp | AT], stream = Stream2}.
+
+mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when
+    Val >= -16#800, Val =< 16#7FF
+->
+    % RISC-V li can handle 12-bit signed immediates in a single instruction (addi)
+    I = jit_riscv32_asm:li(Reg, Val),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1};
+mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) ->
+    % For values outside 12-bit range, li will use lui + addi (2 instructions)
+    % which is efficient enough, no need for literal pool
+    I = jit_riscv32_asm:li(Reg, Val),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1}.
+
+sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when
+    Val >= 0 andalso Val =< 255
+->
+    I1 = jit_riscv32_asm:addi(Reg, Reg, -Val),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1};
+sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when
+    is_atom(Val)
+->
+    I = jit_riscv32_asm:sub(Reg, Reg, Val),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1};
+sub(#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val) ->
+    State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val),
+    Stream1 = State1#state.stream,
+    I = jit_riscv32_asm:sub(Reg, Reg, Temp),
+    Stream2 = StreamModule:append(Stream1, I),
+    State1#state{available_regs = [Temp | AT], stream = Stream2}.
+
+mul(State, _Reg, 1) ->
+    State;
+mul(State, Reg, 2) ->
+    shift_left(State, Reg, 1);
+mul(#state{available_regs = [Temp | _]} = State, Reg, 3) ->
+    I1 = jit_riscv32_asm:slli(Temp, Reg, 1),
+    I2 = jit_riscv32_asm:add(Reg, Temp, Reg),
+    Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+mul(State, Reg, 4) ->
+    shift_left(State, Reg, 2);
+mul(#state{available_regs = [Temp | _]} = State, Reg, 5) ->
+    I1 = jit_riscv32_asm:slli(Temp, Reg, 2),
+    I2 = jit_riscv32_asm:add(Reg, Temp, Reg),
+    Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+mul(State0, Reg, 6) ->
+    State1 = mul(State0, Reg, 3),
+    mul(State1, Reg, 2);
+mul(#state{available_regs = [Temp | _]} = State, Reg, 7) ->
+    I1 = jit_riscv32_asm:slli(Temp, Reg, 3),
+    I2 = jit_riscv32_asm:sub(Reg, Temp, Reg),
+    Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+mul(State, Reg, 8) ->
+    shift_left(State, Reg, 3);
+mul(#state{available_regs = [Temp | _]} = State, Reg, 9) ->
+    I1 = jit_riscv32_asm:slli(Temp, Reg, 3),
+    I2 = jit_riscv32_asm:add(Reg, Temp, Reg),
+    Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+mul(State0, Reg, 10) ->
+    State1 = mul(State0, Reg, 5),
+    mul(State1, Reg, 2);
+mul(#state{available_regs = [Temp | _]} = State, Reg, 15) ->
+    I1 = jit_riscv32_asm:slli(Temp, Reg, 4),
+    I2 = jit_riscv32_asm:sub(Reg, Temp, Reg),
+    Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+mul(State, Reg, 16) ->
+    shift_left(State, Reg, 4);
+mul(State, Reg, 32) ->
+    shift_left(State, Reg, 5);
+mul(State, Reg, 64) ->
+    shift_left(State, Reg, 6);
+mul(
+    #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0,
+    Reg,
+    Val
+) ->
+    % multiply by decomposing by power of 2
+    State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val),
+    Stream1 = State1#state.stream,
+    I = jit_riscv32_asm:mul(Reg, Reg, Temp),
+    Stream2 = StreamModule:append(Stream1, I),
+    State1#state{stream = Stream2, available_regs = [Temp | State1#state.available_regs]}.
+
+%%
+%% Analysis of AArch64 pattern and RISC-V32 implementation:
+%%
+%% AArch64 layout (from call_ext_only_test):
+%%   0x0-0x8:  Decrement reductions, store back
+%%   0xc:      b.ne 0x20   ; Branch if reductions != 0 to continuation
+%%   0x10-0x1c: adr/str/ldr/br sequence for scheduling next process
+%%   0x20:     [CONTINUATION POINT] - Actual function starts here
+%%
+%% RISC-V32 implementation (no prolog/epilog needed due to 32 registers):
+%%   0x0-0x8:  Decrement reductions, store back
+%%   0xc:      bne continuation ; Branch if reductions != 0 to continuation
+%%   0x10-0x?:  adr/sw/ldr/jalr sequence for scheduling next process
+%%   continuation: [actual function body]
+%%
+%% Key insight: With 32 registers, RISC-V32 doesn't need prolog/epilog like ARM Thumb.
+%% When reductions != 0, we branch directly to continue execution.
+%% When reductions == 0, we schedule the next process, and resume at the continuation point.
+%%
+-spec decrement_reductions_and_maybe_schedule_next(state()) -> state().
+decrement_reductions_and_maybe_schedule_next(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0
+) ->
+    % Load reduction count
+    I1 = jit_riscv32_asm:lw(Temp, ?JITSTATE_REG, ?JITSTATE_REDUCTIONCOUNT_OFFSET),
+    % Decrement reduction count
+    I2 = jit_riscv32_asm:addi(Temp, Temp, -1),
+    % Store back the decremented value
+    I3 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_REDUCTIONCOUNT_OFFSET),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
+    BNEOffset = StreamModule:offset(Stream1),
+    % Branch if reduction count is not zero
+    I4 = <<16#FFFFFFFF:32/little>>,
+    % Set continuation to the next instruction
+    ADROffset = BNEOffset + byte_size(I4),
+    % Use 8-byte placeholder (2 words of 0xFFFFFFFF) for pc_relative_address
+    % This ensures we can always rewrite with either auipc alone (4 bytes) or auipc+addi (8 bytes)
+    I5 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>,
+    I6 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET),
+    % Append the instructions to the stream
+    Stream2 = StreamModule:append(Stream1, <<I4/binary, I5/binary, I6/binary>>),
+    State1 = State0#state{stream = Stream2},
+    State2 = call_primitive_last(State1, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]),
+    % Rewrite the branch and adr instructions
+    #state{stream = Stream3} = State2,
+    NewOffset = StreamModule:offset(Stream3),
+    NewI4 = jit_riscv32_asm:bne(Temp, zero, NewOffset - BNEOffset),
+    NewI5Offset = NewOffset - ADROffset,
+    % Generate the new pc_relative_address instruction, padding with NOP if needed
+    NewI5 =
+        case pc_relative_address(Temp, NewI5Offset) of
+            I when byte_size(I) =:= 4 ->
+                % Only auipc, pad with NOP (4 bytes)
+                <<I/binary, (jit_riscv32_asm:nop())/binary>>;
+            I when byte_size(I) =:= 6 ->
+                % auipc + c.addi, pad with c.nop (2 bytes)
+                <<I/binary, (jit_riscv32_asm:c_nop())/binary>>;
+            I when byte_size(I) =:= 8 ->
+                % auipc + addi, no padding needed
+                I
+        end,
+    Stream4 = StreamModule:replace(
+        Stream3, BNEOffset, <<NewI4/binary, NewI5/binary>>
+    ),
+    merge_used_regs(State2#state{stream = Stream4}, State1#state.used_regs).
+
+-spec call_or_schedule_next(state(), non_neg_integer()) -> state().
+call_or_schedule_next(State0, Label) ->
+    {State1, RewriteOffset, TempReg} = set_cp(State0),
+    State2 = call_only_or_schedule_next(State1, Label),
+    rewrite_cp_offset(State2, RewriteOffset, TempReg).
+
+call_only_or_schedule_next(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _]
+    } = State0,
+    Label
+) ->
+    % Load reduction count (jit_state is in a1)
+    I1 = jit_riscv32_asm:lw(Temp, ?JITSTATE_REG, ?JITSTATE_REDUCTIONCOUNT_OFFSET),
+    % Decrement reduction count
+    I2 = jit_riscv32_asm:addi(Temp, Temp, -1),
+    % Store back the decremented value
+    I3 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_REDUCTIONCOUNT_OFFSET),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
+    % Use trampoline technique: branch if zero (eq) to skip over the long branch
+    % If not zero, we want to continue execution at Label
+    % If zero, we want to fall through to scheduling code
+
+    % Look up label once to avoid duplicate lookup in helper
+    LabelLookupResult = lists:keyfind(Label, 1, State0#state.labels),
+
+    BccOffset = StreamModule:offset(Stream1),
+
+    State4 =
+        case LabelLookupResult of
+            {Label, LabelOffset} ->
+                % Label is known, check if we can optimize the conditional branch
+                % After branch instruction
+                Rel = LabelOffset - BccOffset,
+
+                if
+                    Rel >= -4096 andalso Rel =< 4094 andalso (Rel rem 2) =:= 0 ->
+                        % Near branch: use direct conditional branch (RISC-V has ±4KB range)
+
+                        % Branch if NOT zero (temp != 0)
+                        I4 = jit_riscv32_asm:bne(Temp, zero, Rel),
+                        Stream2 = StreamModule:append(Stream1, I4),
+                        State0#state{stream = Stream2};
+                    true ->
+                        % Far branch: use trampoline with helper
+                        % Get the code block size for the far branch sequence that will follow
+
+                        % RISC-V branch is 4 bytes
+                        FarSeqOffset = BccOffset + 4,
+                        {State1, FarCodeBlock} = branch_to_label_code(
+                            State0, FarSeqOffset, Label, LabelLookupResult
+                        ),
+                        FarSeqSize = byte_size(FarCodeBlock),
+                        % Skip over the far branch sequence if zero (temp == 0)
+                        I4 = jit_riscv32_asm:beq(Temp, zero, FarSeqSize + 4),
+                        Stream2 = StreamModule:append(Stream1, I4),
+                        Stream3 = StreamModule:append(Stream2, FarCodeBlock),
+                        State1#state{stream = Stream3}
+                end;
+            false ->
+                % Label not known, get the far branch size for the skip
+
+                % RISC-V branch is 4 bytes
+                FarSeqOffset = BccOffset + 4,
+                {State1, FarCodeBlock} = branch_to_label_code(State0, FarSeqOffset, Label, false),
+                FarSeqSize = byte_size(FarCodeBlock),
+                I4 = jit_riscv32_asm:beq(Temp, zero, FarSeqSize + 4),
+                Stream2 = StreamModule:append(Stream1, I4),
+                Stream3 = StreamModule:append(Stream2, FarCodeBlock),
+                State1#state{stream = Stream3}
+        end,
+    State5 = set_continuation_to_label(State4, Label),
+    call_primitive_last(State5, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]).
+
+call_primitive_with_cp(State0, Primitive, Args) ->
+    {State1, RewriteOffset, TempReg} = set_cp(State0),
+    State2 = call_primitive_last(State1, Primitive, Args),
+    rewrite_cp_offset(State2, RewriteOffset, TempReg).
+
+-spec set_cp(state()) -> {state(), non_neg_integer(), riscv32_register()}.
+set_cp(#state{available_regs = [TempReg | AvailT], used_regs = UsedRegs} = State0) ->
+    % Reserve a temporary register for the offset BEFORE calling get_module_index
+    % to avoid running out of available registers
+    State0b = State0#state{available_regs = AvailT, used_regs = [TempReg | UsedRegs]},
+    % get module index (dynamically)
+    {
+        #state{stream_module = StreamModule, stream = Stream0} = State1,
+        Reg
+    } = get_module_index(
+        State0b
+    ),
+
+    Offset = StreamModule:offset(Stream0),
+    % build cp with module_index << 24
+    I1 = jit_riscv32_asm:slli(Reg, Reg, 24),
+    % Reserve space for offset load instruction
+    % li can generate 1 instruction (4 bytes) for small immediates (< 2048)
+    % or 2 instructions (8 bytes) for large immediates
+    % Since we don't know the final CP value yet (it depends on code size),
+    % we must always reserve 2 instructions (8 bytes) to be safe
+    % The final CP value is (final_offset << 2), and final_offset is unknown
+    % Use 0xFFFFFFFF placeholders for flash compatibility (can only flip 1->0)
+    I2 = <<16#FFFFFFFF:32/little>>,
+    I3 = <<16#FFFFFFFF:32/little>>,
+    MOVOffset = Offset + byte_size(I1),
+    % OR the module index with the offset (loaded in temp register)
+    I4 = jit_riscv32_asm:or_(Reg, TempReg),
+    {BaseReg, Off} = ?CP,
+    I5 = jit_riscv32_asm:sw(BaseReg, Reg, Off),
+    Code = <<I1/binary, I2/binary, I3/binary, I4/binary, I5/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State2 = State1#state{stream = Stream1},
+    State3 = free_native_register(State2, Reg),
+    State4 = free_native_register(State3, TempReg),
+    {State4, MOVOffset, TempReg}.
+
+-spec rewrite_cp_offset(state(), non_neg_integer(), riscv32_register()) -> state().
+rewrite_cp_offset(
+    #state{stream_module = StreamModule, stream = Stream0, offset = CodeOffset} = State0,
+    RewriteOffset,
+    TempReg
+) ->
+    NewOffset = StreamModule:offset(Stream0) - CodeOffset,
+    CPValue = NewOffset bsl 2,
+    NewMoveInstr = jit_riscv32_asm:li(TempReg, CPValue),
+    % We reserved 8 bytes (2 instructions) for the CP value
+    % Pad with NOP if needed to maintain alignment
+    PaddedInstr =
+        case byte_size(NewMoveInstr) of
+            4 -> <<NewMoveInstr/binary, (jit_riscv32_asm:nop())/binary>>;
+            6 -> <<NewMoveInstr/binary, (jit_riscv32_asm:c_nop())/binary>>;
+            8 -> NewMoveInstr
+        end,
+    Stream1 = StreamModule:replace(Stream0, RewriteOffset, PaddedInstr),
+    State0#state{stream = Stream1}.
+
+set_bs(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0,
+    TermReg
+) ->
+    {BaseReg1, Off1} = ?BS,
+    I1 = jit_riscv32_asm:sw(BaseReg1, TermReg, Off1),
+    I2 = jit_riscv32_asm:li(Temp, 0),
+    {BaseReg2, Off2} = ?BS_OFFSET,
+    I3 = jit_riscv32_asm:sw(BaseReg2, Temp, Off2),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
+    State0#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @param State current state
+%% @param SortedLines line information, sorted by offset
+%% @doc Build labels and line tables and encode a function that returns it.
+%% In this case, the function returns the effective address of what immediately
+%% follows.
+%% @end
+%% @return New state
+%%-----------------------------------------------------------------------------
+return_labels_and_lines(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        labels = Labels
+    } = State,
+    SortedLines
+) ->
+    SortedLabels = lists:keysort(2, [
+        {Label, LabelOffset}
+     || {Label, LabelOffset} <- Labels, is_integer(Label)
+    ]),
+
+    I2 = jit_riscv32_asm:ret(),
+    % Assume total size is 10 bytes (8-byte I1 + 2-byte c.ret)
+    % If actual is 8 bytes (6-byte I1 + 2-byte c.ret), we'll pad with 2 bytes
+    I1 = pc_relative_address(a0, 10),
+    Prologue = <<I1/binary, I2/binary>>,
+    ProloguePadded =
+        case byte_size(Prologue) of
+            10 -> Prologue;
+            % 2-byte padding
+            8 -> <<Prologue/binary, 16#FFFF:16>>
+        end,
+    LabelsTable = <<<<Label:16, Offset:32>> || {Label, Offset} <- SortedLabels>>,
+    LinesTable = <<<<Line:16, Offset:32>> || {Line, Offset} <- SortedLines>>,
+    Stream1 = StreamModule:append(
+        Stream0,
+        <<ProloguePadded/binary, (length(SortedLabels)):16, LabelsTable/binary,
+            (length(SortedLines)):16, LinesTable/binary>>
+    ),
+    State#state{stream = Stream1}.
+
+%% @doc Generate PC-relative address calculation using AUIPC + ADDI
+%% This replaces the ARM-style 'adr' pseudo-instruction with native RISC-V instructions
+-spec pc_relative_address(riscv32_register(), integer()) -> binary().
+pc_relative_address(Rd, 0) ->
+    % Simple case: just get current PC
+    jit_riscv32_asm:auipc(Rd, 0);
+pc_relative_address(Rd, Offset) ->
+    % PC-relative address calculation
+    % Split offset into upper 20 bits and lower 12 bits
+    % AUIPC can represent offsets in range: (-524288 << 12) to (524287 << 12)
+    % Combined with ADDI: (-524288 << 12) - 2048 to (524287 << 12) + 2047
+    Lower = Offset band 16#FFF,
+    % Sign extend lower 12 bits
+    LowerSigned =
+        if
+            Lower >= 16#800 -> Lower - 16#1000;
+            true -> Lower
+        end,
+    % Compute upper 20 bits, adjusting if lower is negative
+    % Use arithmetic right shift (bsr) which preserves sign in Erlang
+    Upper =
+        if
+            LowerSigned < 0 ->
+                (Offset bsr 12) + 1;
+            true ->
+                Offset bsr 12
+        end,
+    % Validate that Upper is in valid range for AUIPC
+    if
+        Upper < -16#80000; Upper > 16#7FFFF ->
+            error({offset_out_of_range, Offset, Upper, -16#80000, 16#7FFFF});
+        true ->
+            ok
+    end,
+    case {Upper, LowerSigned} of
+        {0, 0} ->
+            % Zero offset
+            jit_riscv32_asm:auipc(Rd, 0);
+        {0, _} ->
+            % Only lower bits needed: auipc + addi
+            AuipcInstr = jit_riscv32_asm:auipc(Rd, 0),
+            AddiInstr = jit_riscv32_asm:addi(Rd, Rd, LowerSigned),
+            <<AuipcInstr/binary, AddiInstr/binary>>;
+        {_, 0} ->
+            % Only upper bits needed
+            jit_riscv32_asm:auipc(Rd, Upper);
+        {_, _} ->
+            % Both upper and lower bits
+            AuipcInstr = jit_riscv32_asm:auipc(Rd, Upper),
+            AddiInstr = jit_riscv32_asm:addi(Rd, Rd, LowerSigned),
+            <<AuipcInstr/binary, AddiInstr/binary>>
+    end.
+
+%% Helper function to generate str instruction with y_reg offset, handling large offsets
+str_y_reg(SrcReg, Y, TempReg, _AvailableRegs) when Y * 4 =< 124 ->
+    % Small offset - use immediate addressing
+    {BaseReg, Off} = ?Y_REGS,
+    I1 = jit_riscv32_asm:lw(TempReg, BaseReg, Off),
+    I2 = jit_riscv32_asm:sw(TempReg, SrcReg, Y * 4),
+    <<I1/binary, I2/binary>>;
+str_y_reg(SrcReg, Y, TempReg1, [TempReg2 | _]) ->
+    % Large offset - use register arithmetic with second available register
+    Offset = Y * 4,
+    {BaseReg, Off} = ?Y_REGS,
+    I1 = jit_riscv32_asm:lw(TempReg1, BaseReg, Off),
+    I2 = jit_riscv32_asm:li(TempReg2, Offset),
+    I3 = jit_riscv32_asm:add(TempReg2, TempReg2, TempReg1),
+    I4 = jit_riscv32_asm:sw(TempReg2, SrcReg, 0),
+    <<I1/binary, I2/binary, I3/binary, I4/binary>>;
+str_y_reg(SrcReg, Y, TempReg1, []) ->
+    % Large offset - no additional registers available, use IP_REG as second temp
+    Offset = Y * 4,
+    {BaseReg, Off} = ?Y_REGS,
+    I1 = jit_riscv32_asm:lw(TempReg1, BaseReg, Off),
+    I2 = jit_riscv32_asm:mv(?IP_REG, TempReg1),
+    I3 = jit_riscv32_asm:li(TempReg1, Offset),
+    I4 = jit_riscv32_asm:add(TempReg1, TempReg1, ?IP_REG),
+    I5 = jit_riscv32_asm:sw(TempReg1, SrcReg, 0),
+    <<I1/binary, I2/binary, I3/binary, I4/binary, I5/binary>>.
+
+%% Helper function to generate ldr instruction with y_reg offset, handling large offsets
+ldr_y_reg(DstReg, Y, [TempReg | _]) when Y * 4 =< 124 ->
+    % Small offset - use immediate addressing
+    {BaseReg, Off} = ?Y_REGS,
+    I1 = jit_riscv32_asm:lw(TempReg, BaseReg, Off),
+    I2 = jit_riscv32_asm:lw(DstReg, TempReg, Y * 4),
+    <<I1/binary, I2/binary>>;
+ldr_y_reg(DstReg, Y, [TempReg | _]) ->
+    % Large offset - use DstReg as second temp register for arithmetic
+    Offset = Y * 4,
+    {BaseReg, Off} = ?Y_REGS,
+    I1 = jit_riscv32_asm:lw(TempReg, BaseReg, Off),
+    I2 = jit_riscv32_asm:li(DstReg, Offset),
+    I3 = jit_riscv32_asm:add(DstReg, DstReg, TempReg),
+    I4 = jit_riscv32_asm:lw(DstReg, DstReg, 0),
+    <<I1/binary, I2/binary, I3/binary, I4/binary>>;
+ldr_y_reg(DstReg, Y, []) when Y * 4 =< 124 ->
+    % Small offset, no registers available - use DstReg as temp
+    {BaseReg, Off} = ?Y_REGS,
+    I1 = jit_riscv32_asm:lw(DstReg, BaseReg, Off),
+    I2 = jit_riscv32_asm:lw(DstReg, DstReg, Y * 4),
+    <<I1/binary, I2/binary>>;
+ldr_y_reg(DstReg, Y, []) ->
+    % Large offset, no registers available - use IP_REG as temp register
+    % Note: IP_REG (t3) can only be used with mov, not ldr directly
+    Offset = Y * 4,
+    {BaseReg, Off} = ?Y_REGS,
+    I1 = jit_riscv32_asm:lw(DstReg, BaseReg, Off),
+    I2 = jit_riscv32_asm:mv(?IP_REG, DstReg),
+    I3 = jit_riscv32_asm:li(DstReg, Offset),
+    I4 = jit_riscv32_asm:add(DstReg, DstReg, ?IP_REG),
+    I5 = jit_riscv32_asm:lw(DstReg, DstReg, 0),
+    <<I1/binary, I2/binary, I3/binary, I4/binary, I5/binary>>.
+
+free_reg(AvailableRegs0, UsedRegs0, Reg) when ?IS_GPR(Reg) ->
+    AvailableRegs1 = free_reg0(?AVAILABLE_REGS, AvailableRegs0, Reg, []),
+    true = lists:member(Reg, UsedRegs0),
+    UsedRegs1 = lists:delete(Reg, UsedRegs0),
+    {AvailableRegs1, UsedRegs1}.
+
+free_reg0([Reg | _SortedT], PrevRegs0, Reg, Acc) ->
+    lists:reverse(Acc, [Reg | PrevRegs0]);
+free_reg0([PrevReg | SortedT], [PrevReg | PrevT], Reg, Acc) ->
+    free_reg0(SortedT, PrevT, Reg, [PrevReg | Acc]);
+free_reg0([_Other | SortedT], PrevRegs, Reg, Acc) ->
+    free_reg0(SortedT, PrevRegs, Reg, Acc).
+
+args_regs(Args) ->
+    lists:map(
+        fun
+            ({free, {ptr, Reg}}) -> Reg;
+            ({free, Reg}) when is_atom(Reg) -> Reg;
+            ({free, Imm}) when is_integer(Imm) -> imm;
+            (offset) -> imm;
+            (ctx) -> ?CTX_REG;
+            (jit_state) -> jit_state;
+            (jit_state_tail_call) -> jit_state;
+            (stack) -> stack;
+            (Reg) when is_atom(Reg) -> Reg;
+            (Imm) when is_integer(Imm) -> imm;
+            ({ptr, Reg}) -> Reg;
+            ({x_reg, _}) -> ?CTX_REG;
+            ({y_reg, _}) -> ?CTX_REG;
+            ({fp_reg, _}) -> ?CTX_REG;
+            ({free, {x_reg, _}}) -> ?CTX_REG;
+            ({free, {y_reg, _}}) -> ?CTX_REG;
+            ({free, {fp_reg, _}}) -> ?CTX_REG;
+            ({avm_int64_t, _}) -> imm
+        end,
+        Args
+    ).
+
+%%-----------------------------------------------------------------------------
+%% @doc Add a label at the current offset.
+%% @end
+%% @param State current backend state
+%% @param Label the label number or reference
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec add_label(state(), integer() | reference()) -> state().
+add_label(#state{stream_module = StreamModule, stream = Stream0} = State0, Label) ->
+    Offset0 = StreamModule:offset(Stream0),
+    add_label(State0, Label, Offset0).
+
+%%-----------------------------------------------------------------------------
+%% @doc Add a label at a specific offset
+%% @end
+%% @param State current backend state
+%% @param Label the label number or reference
+%% @param Offset the explicit offset for this label
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec add_label(state(), integer() | reference(), integer()) -> state().
+add_label(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        jump_table_start = JumpTableStart,
+        branches = Branches,
+        labels = Labels
+    } = State,
+    Label,
+    LabelOffset
+) when is_integer(Label) ->
+    % Patch the jump table entry immediately
+    % Each jump table entry is AUIPC + JALR (8 bytes)
+    JumpTableEntryOffset = JumpTableStart + Label * 8,
+
+    % Calculate PC-relative offset from AUIPC instruction to target
+    PCRelOffset = LabelOffset - JumpTableEntryOffset,
+
+    % Split into upper 20 bits and lower 12 bits
+    % RISC-V encodes: target = PC + (upper20 << 12) + sign_ext(lower12)
+    % If lower12 >= 0x800, it's negative when sign-extended, so add 1 to upper
+    Upper20 = (PCRelOffset + 16#800) bsr 12,
+    Lower12 = PCRelOffset band 16#FFF,
+    % Sign-extend lower 12 bits for JALR immediate
+    Lower12Signed =
+        if
+            Lower12 >= 16#800 -> Lower12 - 16#1000;
+            true -> Lower12
+        end,
+
+    % Encode AUIPC and JALR with computed offsets
+    I1 = jit_riscv32_asm:auipc(a3, Upper20),
+    I2 = jit_riscv32_asm:jalr(zero, a3, Lower12Signed),
+    % Create 8-byte jump table entry
+    JumpTableEntry = <<I1/binary, I2/binary>>,
+    PaddedEntry =
+        case byte_size(JumpTableEntry) of
+            6 -> <<JumpTableEntry/binary, (jit_riscv32_asm:c_nop())/binary>>;
+            8 -> JumpTableEntry
+        end,
+
+    Stream1 = StreamModule:replace(Stream0, JumpTableEntryOffset, PaddedEntry),
+
+    % Eagerly patch any branches targeting this label
+    {Stream2, RemainingBranches} = patch_branches_for_label(
+        StreamModule,
+        Stream1,
+        Label,
+        LabelOffset,
+        Branches
+    ),
+
+    State#state{
+        stream = Stream2, branches = RemainingBranches, labels = [{Label, LabelOffset} | Labels]
+    };
+add_label(#state{labels = Labels} = State, Label, Offset) ->
+    State#state{labels = [{Label, Offset} | Labels]}.
diff --git a/libs/jit/src/jit_riscv32_asm.erl b/libs/jit/src/jit_riscv32_asm.erl
new file mode 100644
index 0000000000..25bf1ff574
--- /dev/null
+++ b/libs/jit/src/jit_riscv32_asm.erl
@@ -0,0 +1,1802 @@
+%
+% This file is part of AtomVM.
+%
+% Copyright 2025 Paul Guyot <pguyot@kallisys.net>
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%    http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+%
+% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+%
+
+-module(jit_riscv32_asm).
+
+-export([
+    % R-type arithmetic and logical instructions
+    add/3,
+    sub/3,
+    and_/3,
+    or_/2,
+    or_/3,
+    xor_/3,
+    sll/3,
+    srl/3,
+    sra/3,
+    slt/3,
+    sltu/3,
+    % I-type immediate instructions
+    addi/3,
+    andi/3,
+    ori/3,
+    xori/3,
+    slli/3,
+    srli/3,
+    srai/3,
+    slti/3,
+    sltiu/3,
+    % Load instructions
+    lw/2,
+    lw/3,
+    lh/2,
+    lh/3,
+    lhu/2,
+    lhu/3,
+    lb/2,
+    lb/3,
+    lbu/2,
+    lbu/3,
+    % Store instructions
+    sw/2,
+    sw/3,
+    sh/2,
+    sh/3,
+    sb/2,
+    sb/3,
+    % Branch instructions
+    beq/3,
+    bne/3,
+    blt/3,
+    bge/3,
+    bltu/3,
+    bgeu/3,
+    % Jump instructions
+    jal/2,
+    jalr/3,
+    jalr/2,
+    % Upper immediate instructions
+    lui/2,
+    auipc/2,
+    % Pseudo-instructions
+    nop/0,
+    li/2,
+    mv/2,
+    not_/2,
+    neg/2,
+    j/1,
+    jr/1,
+    ret/0,
+    call/2,
+    % M extension (multiply/divide)
+    mul/3,
+    % C extension (compressed) - arithmetic/logical
+    c_add/2,
+    c_sub/2,
+    c_and/2,
+    c_or/2,
+    c_xor/2,
+    c_mv/2,
+    % C extension - immediate instructions
+    c_addi/2,
+    c_andi/2,
+    c_li/2,
+    c_lui/2,
+    c_addi16sp/1,
+    c_addi4spn/2,
+    % C extension - shift instructions
+    c_slli/2,
+    c_srli/2,
+    c_srai/2,
+    % C extension - load/store
+    c_lw/2,
+    c_sw/2,
+    c_lwsp/2,
+    c_swsp/2,
+    % C extension - branches and jumps
+    c_beqz/2,
+    c_bnez/2,
+    c_j/1,
+    c_jal/1,
+    c_jr/1,
+    c_jalr/1,
+    % C extension - system instructions
+    c_ebreak/0,
+    % C extension - pseudo-instructions
+    c_nop/0
+]).
+
+-export_type([
+    riscv_register/0
+]).
+
+%% RISC-V 32-bit (RV32I) Assembler
+%%
+%% This module provides an assembler for the RISC-V 32-bit instruction set.
+%% It generates binary machine code for RISC-V instructions following the
+%% RV32I base integer instruction set architecture.
+%%
+%% RISC-V Register Set (32 registers):
+%%   x0  (zero) - Hardwired zero (reads as 0, writes ignored)
+%%   x1  (ra)   - Return address
+%%   x2  (sp)   - Stack pointer
+%%   x3  (gp)   - Global pointer
+%%   x4  (tp)   - Thread pointer
+%%   x5  (t0)   - Temporary register 0
+%%   x6  (t1)   - Temporary register 1
+%%   x7  (t2)   - Temporary register 2
+%%   x8  (s0/fp)- Saved register 0 / Frame pointer
+%%   x9  (s1)   - Saved register 1
+%%   x10 (a0)   - Function argument 0 / Return value 0
+%%   x11 (a1)   - Function argument 1 / Return value 1
+%%   x12 (a2)   - Function argument 2
+%%   x13 (a3)   - Function argument 3
+%%   x14 (a4)   - Function argument 4
+%%   x15 (a5)   - Function argument 5
+%%   x16 (a6)   - Function argument 6
+%%   x17 (a7)   - Function argument 7
+%%   x18 (s2)   - Saved register 2
+%%   x19 (s3)   - Saved register 3
+%%   x20 (s4)   - Saved register 4
+%%   x21 (s5)   - Saved register 5
+%%   x22 (s6)   - Saved register 6
+%%   x23 (s7)   - Saved register 7
+%%   x24 (s8)   - Saved register 8
+%%   x25 (s9)   - Saved register 9
+%%   x26 (s10)  - Saved register 10
+%%   x27 (s11)  - Saved register 11
+%%   x28 (t3)   - Temporary register 3
+%%   x29 (t4)   - Temporary register 4
+%%   x30 (t5)   - Temporary register 5
+%%   x31 (t6)   - Temporary register 6
+%%
+%% RISC-V Calling Convention (ILP32):
+%%   - Arguments: a0-a7 (x10-x17)
+%%   - Return values: a0-a1 (x10-x11)
+%%   - Caller-saved: t0-t6, a0-a7
+%%   - Callee-saved: s0-s11, sp, ra
+%%   - Stack grows downward
+%%   - Stack must be 16-byte aligned at function call boundaries
+%%
+%% Instruction Encoding:
+%%   All RV32I instructions are 32 bits (4 bytes).
+%%   Bit ordering is little-endian within each 32-bit word.
+%%
+%% See: RISC-V Instruction Set Manual, Volume I: User-Level ISA
+%% https://riscv.org/technical/specifications/
+%% https://github.com/riscv/riscv-isa-manual/
+
+-type riscv_register() ::
+    zero
+    | ra
+    | sp
+    | gp
+    | tp
+    | t0
+    | t1
+    | t2
+    | s0
+    | fp
+    | s1
+    | a0
+    | a1
+    | a2
+    | a3
+    | a4
+    | a5
+    | a6
+    | a7
+    | s2
+    | s3
+    | s4
+    | s5
+    | s6
+    | s7
+    | s8
+    | s9
+    | s10
+    | s11
+    | t3
+    | t4
+    | t5
+    | t6.
+
+%%-----------------------------------------------------------------------------
+%% Helper functions
+%%-----------------------------------------------------------------------------
+
+%% Convert register atoms to register numbers (0-31)
+-spec reg_to_num(riscv_register()) -> 0..31.
+% ABI names
+reg_to_num(zero) -> 0;
+reg_to_num(ra) -> 1;
+reg_to_num(sp) -> 2;
+reg_to_num(gp) -> 3;
+reg_to_num(tp) -> 4;
+reg_to_num(t0) -> 5;
+reg_to_num(t1) -> 6;
+reg_to_num(t2) -> 7;
+reg_to_num(s0) -> 8;
+reg_to_num(fp) -> 8;
+reg_to_num(s1) -> 9;
+reg_to_num(a0) -> 10;
+reg_to_num(a1) -> 11;
+reg_to_num(a2) -> 12;
+reg_to_num(a3) -> 13;
+reg_to_num(a4) -> 14;
+reg_to_num(a5) -> 15;
+reg_to_num(a6) -> 16;
+reg_to_num(a7) -> 17;
+reg_to_num(s2) -> 18;
+reg_to_num(s3) -> 19;
+reg_to_num(s4) -> 20;
+reg_to_num(s5) -> 21;
+reg_to_num(s6) -> 22;
+reg_to_num(s7) -> 23;
+reg_to_num(s8) -> 24;
+reg_to_num(s9) -> 25;
+reg_to_num(s10) -> 26;
+reg_to_num(s11) -> 27;
+reg_to_num(t3) -> 28;
+reg_to_num(t4) -> 29;
+reg_to_num(t5) -> 30;
+reg_to_num(t6) -> 31.
+
+%%-----------------------------------------------------------------------------
+%% R-type instruction encoding
+%%-----------------------------------------------------------------------------
+
+%% R-type instruction format:
+%% funct7 (7) | rs2 (5) | rs1 (5) | funct3 (3) | rd (5) | opcode (7)
+%% Bits:  31-25     24-20     19-15     14-12      11-7      6-0
+
+-spec encode_r_type(
+    Opcode :: 0..127,
+    Rd :: riscv_register(),
+    Funct3 :: 0..7,
+    Rs1 :: riscv_register(),
+    Rs2 :: riscv_register(),
+    Funct7 :: 0..127
+) -> binary().
+encode_r_type(Opcode, Rd, Funct3, Rs1, Rs2, Funct7) ->
+    RdNum = reg_to_num(Rd),
+    Rs1Num = reg_to_num(Rs1),
+    Rs2Num = reg_to_num(Rs2),
+    Instr =
+        (Funct7 bsl 25) bor
+            (Rs2Num bsl 20) bor
+            (Rs1Num bsl 15) bor
+            (Funct3 bsl 12) bor
+            (RdNum bsl 7) bor
+            Opcode,
+    <<Instr:32/little>>.
+
+%%-----------------------------------------------------------------------------
+%% R-type arithmetic and logical instructions
+%%-----------------------------------------------------------------------------
+
+%% ADD - Add
+%% rd = rs1 + rs2
+-spec add(riscv_register(), riscv_register(), riscv_register()) -> binary().
+add(Rd, Rs1, Rs2) when Rd =:= Rs1, Rd =/= zero, Rs2 =/= zero ->
+    % Use c.add when rd == rs1 and neither register is zero
+    c_add(Rd, Rs2);
+add(Rd, Rs1, Rs2) ->
+    % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0000000
+    encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#00).
+
+%% SUB - Subtract
+%% rd = rs1 - rs2
+-spec sub(riscv_register(), riscv_register(), riscv_register()) -> binary().
+sub(Rd, Rs1, Rs2) when Rd =:= Rs1 ->
+    case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of
+        true -> c_sub(Rd, Rs2);
+        false -> encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#20)
+    end;
+sub(Rd, Rs1, Rs2) ->
+    % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0100000
+    encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#20).
+
+%% AND - Bitwise AND
+%% rd = rs1 & rs2
+-spec and_(riscv_register(), riscv_register(), riscv_register()) -> binary().
+and_(Rd, Rs1, Rs2) when Rd =:= Rs1 ->
+    case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of
+        true -> c_and(Rd, Rs2);
+        false -> encode_r_type(16#33, Rd, 16#7, Rs1, Rs2, 16#00)
+    end;
+and_(Rd, Rs1, Rs2) ->
+    % Opcode: 0110011 (0x33), Funct3: 111, Funct7: 0000000
+    encode_r_type(16#33, Rd, 16#7, Rs1, Rs2, 16#00).
+
+%% OR - Bitwise OR
+%% rd = rs1 | rs2
+-spec or_(riscv_register(), riscv_register(), riscv_register()) -> binary().
+or_(Rd, Rs1, Rs2) when Rd =:= Rs1 ->
+    case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of
+        true -> c_or(Rd, Rs2);
+        false -> encode_r_type(16#33, Rd, 16#6, Rs1, Rs2, 16#00)
+    end;
+or_(Rd, Rs1, Rs2) ->
+    % Opcode: 0110011 (0x33), Funct3: 110, Funct7: 0000000
+    encode_r_type(16#33, Rd, 16#6, Rs1, Rs2, 16#00).
+
+%% OR - Bitwise OR (in-place)
+%% rd = rd | rs
+-spec or_(riscv_register(), riscv_register()) -> binary().
+or_(Rd, Rs) ->
+    or_(Rd, Rd, Rs).
+
+%% XOR - Bitwise XOR
+%% rd = rs1 ^ rs2
+-spec xor_(riscv_register(), riscv_register(), riscv_register()) -> binary().
+xor_(Rd, Rs1, Rs2) when Rd =:= Rs1 ->
+    case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of
+        true -> c_xor(Rd, Rs2);
+        false -> encode_r_type(16#33, Rd, 16#4, Rs1, Rs2, 16#00)
+    end;
+xor_(Rd, Rs1, Rs2) ->
+    % Opcode: 0110011 (0x33), Funct3: 100, Funct7: 0000000
+    encode_r_type(16#33, Rd, 16#4, Rs1, Rs2, 16#00).
+
+%% SLL - Shift Left Logical
+%% rd = rs1 << rs2[4:0]
+-spec sll(riscv_register(), riscv_register(), riscv_register()) -> binary().
+sll(Rd, Rs1, Rs2) ->
+    % Opcode: 0110011 (0x33), Funct3: 001, Funct7: 0000000
+    encode_r_type(16#33, Rd, 16#1, Rs1, Rs2, 16#00).
+
+%% SRL - Shift Right Logical
+%% rd = rs1 >> rs2[4:0] (zero-extend)
+-spec srl(riscv_register(), riscv_register(), riscv_register()) -> binary().
+srl(Rd, Rs1, Rs2) ->
+    % Opcode: 0110011 (0x33), Funct3: 101, Funct7: 0000000
+    encode_r_type(16#33, Rd, 16#5, Rs1, Rs2, 16#00).
+
+%% SRA - Shift Right Arithmetic
+%% rd = rs1 >> rs2[4:0] (sign-extend)
+-spec sra(riscv_register(), riscv_register(), riscv_register()) -> binary().
+sra(Rd, Rs1, Rs2) ->
+    % Opcode: 0110011 (0x33), Funct3: 101, Funct7: 0100000
+    encode_r_type(16#33, Rd, 16#5, Rs1, Rs2, 16#20).
+
+%% SLT - Set Less Than
+%% rd = (rs1 < rs2) ? 1 : 0 (signed)
+-spec slt(riscv_register(), riscv_register(), riscv_register()) -> binary().
+slt(Rd, Rs1, Rs2) ->
+    % Opcode: 0110011 (0x33), Funct3: 010, Funct7: 0000000
+    encode_r_type(16#33, Rd, 16#2, Rs1, Rs2, 16#00).
+
+%% SLTU - Set Less Than Unsigned
+%% rd = (rs1 < rs2) ? 1 : 0 (unsigned)
+-spec sltu(riscv_register(), riscv_register(), riscv_register()) -> binary().
+sltu(Rd, Rs1, Rs2) ->
+    % Opcode: 0110011 (0x33), Funct3: 011, Funct7: 0000000
+    encode_r_type(16#33, Rd, 16#3, Rs1, Rs2, 16#00).
+
+%%-----------------------------------------------------------------------------
+%% I-type instruction encoding
+%%-----------------------------------------------------------------------------
+
+%% I-type instruction format:
+%% imm[11:0] (12) | rs1 (5) | funct3 (3) | rd (5) | opcode (7)
+%% Bits:  31-20       19-15     14-12      11-7      6-0
+
+-spec encode_i_type(
+    Opcode :: 0..127,
+    Rd :: riscv_register(),
+    Funct3 :: 0..7,
+    Rs1 :: riscv_register(),
+    Imm :: integer()
+) -> binary().
+encode_i_type(Opcode, Rd, Funct3, Rs1, Imm) ->
+    RdNum = reg_to_num(Rd),
+    Rs1Num = reg_to_num(Rs1),
+    % Sign-extend and mask to 12 bits
+    ImmMasked = Imm band 16#FFF,
+    Instr =
+        (ImmMasked bsl 20) bor
+            (Rs1Num bsl 15) bor
+            (Funct3 bsl 12) bor
+            (RdNum bsl 7) bor
+            Opcode,
+    <<Instr:32/little>>.
+
+%%-----------------------------------------------------------------------------
+%% I-type immediate arithmetic and logical instructions
+%%-----------------------------------------------------------------------------
+
+%% ADDI - Add Immediate
+%% rd = rs1 + imm
+-spec addi(riscv_register(), riscv_register(), integer()) -> binary().
+addi(Rd, Rs1, Imm) when Rd =:= Rs1, Rd =/= zero, Imm >= -32, Imm =< 31 ->
+    % Use c.addi when rd == rs1, rd != zero, and imm fits in 6 bits (signed)
+    c_addi(Rd, Imm);
+addi(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 ->
+    % Opcode: 0010011 (0x13), Funct3: 000
+    encode_i_type(16#13, Rd, 16#0, Rs1, Imm);
+addi(_Rd, _Rs1, Imm) ->
+    error({immediate_out_of_range, Imm, -2048, 2047}).
+
+%% ANDI - AND Immediate
+%% rd = rs1 & imm
+-spec andi(riscv_register(), riscv_register(), integer()) -> binary().
+andi(Rd, Rs1, Imm) when Rd =:= Rs1, Imm >= -32, Imm =< 31 ->
+    case is_compressed_reg(Rd) of
+        true -> c_andi(Rd, Imm);
+        false -> encode_i_type(16#13, Rd, 16#7, Rs1, Imm)
+    end;
+andi(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 ->
+    % Opcode: 0010011 (0x13), Funct3: 111
+    encode_i_type(16#13, Rd, 16#7, Rs1, Imm);
+andi(_Rd, _Rs1, Imm) ->
+    error({immediate_out_of_range, Imm, -2048, 2047}).
+
+%% ORI - OR Immediate
+%% rd = rs1 | imm
+-spec ori(riscv_register(), riscv_register(), integer()) -> binary().
+ori(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 ->
+    % Opcode: 0010011 (0x13), Funct3: 110
+    encode_i_type(16#13, Rd, 16#6, Rs1, Imm);
+ori(_Rd, _Rs1, Imm) ->
+    error({immediate_out_of_range, Imm, -2048, 2047}).
+
+%% XORI - XOR Immediate
+%% rd = rs1 ^ imm
+-spec xori(riscv_register(), riscv_register(), integer()) -> binary().
+xori(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 ->
+    % Opcode: 0010011 (0x13), Funct3: 100
+    encode_i_type(16#13, Rd, 16#4, Rs1, Imm);
+xori(_Rd, _Rs1, Imm) ->
+    error({immediate_out_of_range, Imm, -2048, 2047}).
+
+%% SLTI - Set Less Than Immediate
+%% rd = (rs1 < imm) ? 1 : 0 (signed)
+-spec slti(riscv_register(), riscv_register(), integer()) -> binary().
+slti(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 ->
+    % Opcode: 0010011 (0x13), Funct3: 010
+    encode_i_type(16#13, Rd, 16#2, Rs1, Imm);
+slti(_Rd, _Rs1, Imm) ->
+    error({immediate_out_of_range, Imm, -2048, 2047}).
+
+%% SLTIU - Set Less Than Immediate Unsigned
+%% rd = (rs1 < imm) ? 1 : 0 (unsigned)
+-spec sltiu(riscv_register(), riscv_register(), integer()) -> binary().
+sltiu(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 ->
+    % Opcode: 0010011 (0x13), Funct3: 011
+    encode_i_type(16#13, Rd, 16#3, Rs1, Imm);
+sltiu(_Rd, _Rs1, Imm) ->
+    error({immediate_out_of_range, Imm, -2048, 2047}).
+
+%%-----------------------------------------------------------------------------
+%% I-type immediate shift instructions
+%%-----------------------------------------------------------------------------
+
+%% SLLI - Shift Left Logical Immediate
+%% rd = rs1 << shamt
+-spec slli(riscv_register(), riscv_register(), 0..31) -> binary().
+slli(Rd, Rs1, Shamt) when Rd =:= Rs1, Rd =/= zero, Shamt >= 1, Shamt =< 31 ->
+    % Use c.slli when rd == rs1, rd != zero, and shamt != 0 (c.slli with shamt=0 is reserved)
+    c_slli(Rd, Shamt);
+slli(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 ->
+    % Opcode: 0010011 (0x13), Funct3: 001, Imm[11:5] = 0000000
+    encode_i_type(16#13, Rd, 16#1, Rs1, Shamt);
+slli(_Rd, _Rs1, Shamt) ->
+    error({shift_amount_out_of_range, Shamt, 0, 31}).
+
+%% SRLI - Shift Right Logical Immediate
+%% rd = rs1 >> shamt (zero-extend)
+-spec srli(riscv_register(), riscv_register(), 0..31) -> binary().
+srli(Rd, Rs1, Shamt) when Rd =:= Rs1, Shamt >= 0, Shamt =< 31 ->
+    case is_compressed_reg(Rd) of
+        true -> c_srli(Rd, Shamt);
+        false -> encode_i_type(16#13, Rd, 16#5, Rs1, Shamt)
+    end;
+srli(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 ->
+    % Opcode: 0010011 (0x13), Funct3: 101, Imm[11:5] = 0000000
+    encode_i_type(16#13, Rd, 16#5, Rs1, Shamt);
+srli(_Rd, _Rs1, Shamt) ->
+    error({shift_amount_out_of_range, Shamt, 0, 31}).
+
+%% SRAI - Shift Right Arithmetic Immediate
+%% rd = rs1 >> shamt (sign-extend)
+-spec srai(riscv_register(), riscv_register(), 0..31) -> binary().
+srai(Rd, Rs1, Shamt) when Rd =:= Rs1, Shamt >= 0, Shamt =< 31 ->
+    case is_compressed_reg(Rd) of
+        true ->
+            c_srai(Rd, Shamt);
+        false ->
+            ImmWithBit30 = Shamt bor (1 bsl 10),
+            encode_i_type(16#13, Rd, 16#5, Rs1, ImmWithBit30)
+    end;
+srai(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 ->
+    % Opcode: 0010011 (0x13), Funct3: 101, Imm[11:5] = 0100000
+    % The encoding uses bit 30 (Imm[10]) to distinguish SRAI from SRLI
+    ImmWithBit30 = Shamt bor (1 bsl 10),
+    encode_i_type(16#13, Rd, 16#5, Rs1, ImmWithBit30);
+srai(_Rd, _Rs1, Shamt) ->
+    error({shift_amount_out_of_range, Shamt, 0, 31}).
+
+%%-----------------------------------------------------------------------------
+%% Load instructions (I-type)
+%%-----------------------------------------------------------------------------
+
+%% LW - Load Word
+%% rd = mem[rs1 + offset] (32-bit)
+-spec lw({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) ->
+    binary().
+lw(Rd, {Rs1, Offset}) ->
+    lw(Rd, Rs1, Offset);
+lw(Rd, Rs1) when is_atom(Rs1) ->
+    lw(Rd, Rs1, 0).
+
+-spec lw(riscv_register(), riscv_register(), integer()) -> binary().
+lw(Rd, sp, Offset) when Rd =/= zero, Offset >= 0, Offset =< 252, Offset rem 4 =:= 0 ->
+    % Use c.lwsp for loads from sp with aligned offset in range
+    c_lwsp(Rd, Offset);
+lw(Rd, Rs1, Offset) when Offset >= 0, Offset =< 124, Offset rem 4 =:= 0 ->
+    % Use c.lw when both registers are in compressed set and offset is aligned
+    case is_compressed_reg(Rd) andalso is_compressed_reg(Rs1) of
+        true -> c_lw(Rd, {Rs1, Offset});
+        false -> encode_i_type(16#03, Rd, 16#2, Rs1, Offset)
+    end;
+lw(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 ->
+    % Opcode: 0000011 (0x03), Funct3: 010
+    encode_i_type(16#03, Rd, 16#2, Rs1, Offset);
+lw(_Rd, _Rs1, Offset) ->
+    error({offset_out_of_range, Offset, -2048, 2047}).
+
+%% LH - Load Halfword (sign-extended)
+%% rd = sign_extend(mem[rs1 + offset][15:0])
+-spec lh({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) ->
+    binary().
+lh(Rd, {Rs1, Offset}) ->
+    lh(Rd, Rs1, Offset);
+lh(Rd, Rs1) when is_atom(Rs1) ->
+    lh(Rd, Rs1, 0).
+
+-spec lh(riscv_register(), riscv_register(), integer()) -> binary().
+lh(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 ->
+    % Opcode: 0000011 (0x03), Funct3: 001
+    encode_i_type(16#03, Rd, 16#1, Rs1, Offset);
+lh(_Rd, _Rs1, Offset) ->
+    error({offset_out_of_range, Offset, -2048, 2047}).
+
+%% LHU - Load Halfword Unsigned (zero-extended)
+%% rd = zero_extend(mem[rs1 + offset][15:0])
+-spec lhu({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) ->
+    binary().
+lhu(Rd, {Rs1, Offset}) ->
+    lhu(Rd, Rs1, Offset);
+lhu(Rd, Rs1) when is_atom(Rs1) ->
+    lhu(Rd, Rs1, 0).
+
+-spec lhu(riscv_register(), riscv_register(), integer()) -> binary().
+lhu(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 ->
+    % Opcode: 0000011 (0x03), Funct3: 101
+    encode_i_type(16#03, Rd, 16#5, Rs1, Offset);
+lhu(_Rd, _Rs1, Offset) ->
+    error({offset_out_of_range, Offset, -2048, 2047}).
+
+%% LB - Load Byte (sign-extended)
+%% rd = sign_extend(mem[rs1 + offset][7:0])
+-spec lb({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) ->
+    binary().
+lb(Rd, {Rs1, Offset}) ->
+    lb(Rd, Rs1, Offset);
+lb(Rd, Rs1) when is_atom(Rs1) ->
+    lb(Rd, Rs1, 0).
+
+-spec lb(riscv_register(), riscv_register(), integer()) -> binary().
+lb(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 ->
+    % Opcode: 0000011 (0x03), Funct3: 000
+    encode_i_type(16#03, Rd, 16#0, Rs1, Offset);
+lb(_Rd, _Rs1, Offset) ->
+    error({offset_out_of_range, Offset, -2048, 2047}).
+
+%% LBU - Load Byte Unsigned (zero-extended)
+%% rd = zero_extend(mem[rs1 + offset][7:0])
+-spec lbu({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) ->
+    binary().
+lbu(Rd, {Rs1, Offset}) ->
+    lbu(Rd, Rs1, Offset);
+lbu(Rd, Rs1) when is_atom(Rs1) ->
+    lbu(Rd, Rs1, 0).
+
+-spec lbu(riscv_register(), riscv_register(), integer()) -> binary().
+lbu(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 ->
+    % Opcode: 0000011 (0x03), Funct3: 100
+    encode_i_type(16#03, Rd, 16#4, Rs1, Offset);
+lbu(_Rd, _Rs1, Offset) ->
+    error({offset_out_of_range, Offset, -2048, 2047}).
+
+%%-----------------------------------------------------------------------------
+%% S-type instruction encoding (for stores)
+%%-----------------------------------------------------------------------------
+
+%% S-type instruction format:
+%% imm[11:5] (7) | rs2 (5) | rs1 (5) | funct3 (3) | imm[4:0] (5) | opcode (7)
+%% Bits:  31-25      24-20     19-15     14-12      11-7          6-0
+
+-spec encode_s_type(
+    Opcode :: 0..127,
+    Funct3 :: 0..7,
+    Rs1 :: riscv_register(),
+    Rs2 :: riscv_register(),
+    Imm :: integer()
+) -> binary().
+encode_s_type(Opcode, Funct3, Rs1, Rs2, Imm) ->
+    Rs1Num = reg_to_num(Rs1),
+    Rs2Num = reg_to_num(Rs2),
+    % Split immediate: imm[11:5] goes to bits 31-25, imm[4:0] goes to bits 11-7
+    ImmMasked = Imm band 16#FFF,
+    Imm11_5 = (ImmMasked bsr 5) band 16#7F,
+    Imm4_0 = ImmMasked band 16#1F,
+    Instr =
+        (Imm11_5 bsl 25) bor
+            (Rs2Num bsl 20) bor
+            (Rs1Num bsl 15) bor
+            (Funct3 bsl 12) bor
+            (Imm4_0 bsl 7) bor
+            Opcode,
+    <<Instr:32/little>>.
+
+%%-----------------------------------------------------------------------------
+%% Store instructions (S-type)
+%%-----------------------------------------------------------------------------
+
+%% SW - Store Word
+%% mem[rs1 + offset] = rs2[31:0]
+-spec sw({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) ->
+    binary().
+sw(Rs2, {Rs1, Offset}) ->
+    sw(Rs1, Rs2, Offset);
+sw(Rs2, Rs1) when is_atom(Rs1) ->
+    sw(Rs1, Rs2, 0).
+
+-spec sw(riscv_register(), riscv_register(), integer()) -> binary().
+sw(sp, Rs2, Offset) when Offset >= 0, Offset =< 252, Offset rem 4 =:= 0 ->
+    % Use c.swsp for stores to sp with aligned offset in range
+    c_swsp(Rs2, Offset);
+sw(Rs1, Rs2, Offset) when Offset >= 0, Offset =< 124, Offset rem 4 =:= 0 ->
+    % Use c.sw when both registers are in compressed set and offset is aligned
+    case is_compressed_reg(Rs1) andalso is_compressed_reg(Rs2) of
+        true -> c_sw(Rs2, {Rs1, Offset});
+        false -> encode_s_type(16#23, 16#2, Rs1, Rs2, Offset)
+    end;
+sw(Rs1, Rs2, Offset) when Offset >= -2048, Offset =< 2047 ->
+    % Opcode: 0100011 (0x23), Funct3: 010
+    encode_s_type(16#23, 16#2, Rs1, Rs2, Offset);
+sw(_Rs1, _Rs2, Offset) ->
+    error({offset_out_of_range, Offset, -2048, 2047}).
+
+%% SH - Store Halfword
+%% mem[rs1 + offset][15:0] = rs2[15:0]
+-spec sh({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) ->
+    binary().
+sh(Rs2, {Rs1, Offset}) ->
+    sh(Rs1, Rs2, Offset);
+sh(Rs2, Rs1) when is_atom(Rs1) ->
+    sh(Rs1, Rs2, 0).
+
+-spec sh(riscv_register(), riscv_register(), integer()) -> binary().
+sh(Rs1, Rs2, Offset) when Offset >= -2048, Offset =< 2047 ->
+    % Opcode: 0100011 (0x23), Funct3: 001
+    encode_s_type(16#23, 16#1, Rs1, Rs2, Offset);
+sh(_Rs1, _Rs2, Offset) ->
+    error({offset_out_of_range, Offset, -2048, 2047}).
+
+%% SB - Store Byte
+%% mem[rs1 + offset][7:0] = rs2[7:0]
+-spec sb({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) ->
+    binary().
+sb(Rs2, {Rs1, Offset}) ->
+    sb(Rs1, Rs2, Offset);
+sb(Rs2, Rs1) when is_atom(Rs1) ->
+    sb(Rs1, Rs2, 0).
+
+-spec sb(riscv_register(), riscv_register(), integer()) -> binary().
+sb(Rs1, Rs2, Offset) when Offset >= -2048, Offset =< 2047 ->
+    % Opcode: 0100011 (0x23), Funct3: 000
+    encode_s_type(16#23, 16#0, Rs1, Rs2, Offset);
+sb(_Rs1, _Rs2, Offset) ->
+    error({offset_out_of_range, Offset, -2048, 2047}).
+
+%%-----------------------------------------------------------------------------
+%% B-type instruction encoding (for branches)
+%%-----------------------------------------------------------------------------
+
+%% B-type instruction format:
+%% imm[12|10:5] (7) | rs2 (5) | rs1 (5) | funct3 (3) | imm[4:1|11] (5) | opcode (7)
+%% Bits:  31-25         24-20     19-15     14-12      11-7              6-0
+%%
+%% The immediate is split across the instruction and represents a signed offset
+%% in multiples of 2 bytes (must be 2-byte aligned).
+%% Range: ±4 KiB (±4096 bytes)
+
+-spec encode_b_type(
+    Opcode :: 0..127,
+    Funct3 :: 0..7,
+    Rs1 :: riscv_register(),
+    Rs2 :: riscv_register(),
+    Offset :: integer()
+) -> binary().
+encode_b_type(Opcode, Funct3, Rs1, Rs2, Offset) ->
+    Rs1Num = reg_to_num(Rs1),
+    Rs2Num = reg_to_num(Rs2),
+    % Offset must be 2-byte aligned and in range [-4096, 4094]
+    % Extract bits: imm[12], imm[10:5], imm[4:1], imm[11]
+    OffsetMasked = Offset band 16#1FFF,
+    % imm[12] -> bit 31
+    Imm12 = (OffsetMasked bsr 12) band 1,
+    % imm[10:5] -> bits 30-25
+    Imm10_5 = (OffsetMasked bsr 5) band 16#3F,
+    % imm[4:1] -> bits 11-8
+    Imm4_1 = (OffsetMasked bsr 1) band 16#F,
+    % imm[11] -> bit 7
+    Imm11 = (OffsetMasked bsr 11) band 1,
+    Instr =
+        (Imm12 bsl 31) bor
+            (Imm10_5 bsl 25) bor
+            (Rs2Num bsl 20) bor
+            (Rs1Num bsl 15) bor
+            (Funct3 bsl 12) bor
+            (Imm4_1 bsl 8) bor
+            (Imm11 bsl 7) bor
+            Opcode,
+    <<Instr:32/little>>.
+
+%%-----------------------------------------------------------------------------
+%% Branch instructions (B-type)
+%%-----------------------------------------------------------------------------
+
+%% BEQ - Branch if Equal
+%% if (rs1 == rs2) pc += offset
+-spec beq(riscv_register(), riscv_register(), integer()) -> binary().
+beq(Rs1, zero, Offset) when Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0 ->
+    % Use c.beqz when comparing with zero and offset fits
+    case is_compressed_reg(Rs1) of
+        true -> c_beqz(Rs1, Offset);
+        false -> encode_b_type(16#63, 16#0, Rs1, zero, Offset)
+    end;
+beq(Rs1, Rs2, Offset) when
+    Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0
+->
+    % Opcode: 1100011 (0x63), Funct3: 000
+    encode_b_type(16#63, 16#0, Rs1, Rs2, Offset);
+beq(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 ->
+    error({offset_not_aligned, Offset, 2});
+beq(_Rs1, _Rs2, Offset) ->
+    error({offset_out_of_range, Offset, -4096, 4094}).
+
+%% BNE - Branch if Not Equal
+%% if (rs1 != rs2) pc += offset
+-spec bne(riscv_register(), riscv_register(), integer()) -> binary().
+bne(Rs1, zero, Offset) when Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0 ->
+    % Use c.bnez when comparing with zero and offset fits
+    case is_compressed_reg(Rs1) of
+        true -> c_bnez(Rs1, Offset);
+        false -> encode_b_type(16#63, 16#1, Rs1, zero, Offset)
+    end;
+bne(Rs1, Rs2, Offset) when
+    Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0
+->
+    % Opcode: 1100011 (0x63), Funct3: 001
+    encode_b_type(16#63, 16#1, Rs1, Rs2, Offset);
+bne(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 ->
+    error({offset_not_aligned, Offset, 2});
+bne(_Rs1, _Rs2, Offset) ->
+    error({offset_out_of_range, Offset, -4096, 4094}).
+
+%% BLT - Branch if Less Than (signed)
+%% if (rs1 < rs2) pc += offset
+-spec blt(riscv_register(), riscv_register(), integer()) -> binary().
+blt(Rs1, Rs2, Offset) when
+    Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0
+->
+    % Opcode: 1100011 (0x63), Funct3: 100
+    encode_b_type(16#63, 16#4, Rs1, Rs2, Offset);
+blt(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 ->
+    error({offset_not_aligned, Offset, 2});
+blt(_Rs1, _Rs2, Offset) ->
+    error({offset_out_of_range, Offset, -4096, 4094}).
+
+%% BGE - Branch if Greater or Equal (signed)
+%% if (rs1 >= rs2) pc += offset
+-spec bge(riscv_register(), riscv_register(), integer()) -> binary().
+bge(Rs1, Rs2, Offset) when
+    Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0
+->
+    % Opcode: 1100011 (0x63), Funct3: 101
+    encode_b_type(16#63, 16#5, Rs1, Rs2, Offset);
+bge(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 ->
+    error({offset_not_aligned, Offset, 2});
+bge(_Rs1, _Rs2, Offset) ->
+    error({offset_out_of_range, Offset, -4096, 4094}).
+
+%% BLTU - Branch if Less Than Unsigned
+%% if (rs1 < rs2) pc += offset (unsigned)
+-spec bltu(riscv_register(), riscv_register(), integer()) -> binary().
+bltu(Rs1, Rs2, Offset) when
+    Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0
+->
+    % Opcode: 1100011 (0x63), Funct3: 110
+    encode_b_type(16#63, 16#6, Rs1, Rs2, Offset);
+bltu(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 ->
+    error({offset_not_aligned, Offset, 2});
+bltu(_Rs1, _Rs2, Offset) ->
+    error({offset_out_of_range, Offset, -4096, 4094}).
+
+%% BGEU - Branch if Greater or Equal Unsigned
+%% if (rs1 >= rs2) pc += offset (unsigned)
+-spec bgeu(riscv_register(), riscv_register(), integer()) -> binary().
+bgeu(Rs1, Rs2, Offset) when
+    Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0
+->
+    % Opcode: 1100011 (0x63), Funct3: 111
+    encode_b_type(16#63, 16#7, Rs1, Rs2, Offset);
+bgeu(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 ->
+    error({offset_not_aligned, Offset, 2});
+bgeu(_Rs1, _Rs2, Offset) ->
+    error({offset_out_of_range, Offset, -4096, 4094}).
+
+%%-----------------------------------------------------------------------------
+%% J-type instruction encoding (for JAL)
+%%-----------------------------------------------------------------------------
+
+%% J-type instruction format (JAL):
+%% imm[20|10:1|11|19:12] (20) | rd (5) | opcode (7)
+%% Bits:  31-12                  11-7     6-0
+%%
+%% The immediate represents a signed offset in multiples of 2 bytes.
+%% Range: ±1 MiB (±1048576 bytes)
+
+-spec encode_j_type(
+    Opcode :: 0..127, Rd :: riscv_register(), Offset :: integer()
+) -> binary().
+encode_j_type(Opcode, Rd, Offset) ->
+    RdNum = reg_to_num(Rd),
+    % Extract immediate bits: imm[20], imm[10:1], imm[11], imm[19:12]
+    OffsetMasked = Offset band 16#1FFFFF,
+    % imm[20] -> bit 31
+    Imm20 = (OffsetMasked bsr 20) band 1,
+    % imm[10:1] -> bits 30-21
+    Imm10_1 = (OffsetMasked bsr 1) band 16#3FF,
+    % imm[11] -> bit 20
+    Imm11 = (OffsetMasked bsr 11) band 1,
+    % imm[19:12] -> bits 19-12
+    Imm19_12 = (OffsetMasked bsr 12) band 16#FF,
+    Instr =
+        (Imm20 bsl 31) bor
+            (Imm10_1 bsl 21) bor
+            (Imm11 bsl 20) bor
+            (Imm19_12 bsl 12) bor
+            (RdNum bsl 7) bor
+            Opcode,
+    <<Instr:32/little>>.
+
+%%-----------------------------------------------------------------------------
+%% U-type instruction encoding (for LUI, AUIPC)
+%%-----------------------------------------------------------------------------
+
+%% U-type instruction format:
+%% imm[31:12] (20) | rd (5) | opcode (7)
+%% Bits:  31-12        11-7     6-0
+
+-spec encode_u_type(
+    Opcode :: 0..127, Rd :: riscv_register(), Imm :: integer()
+) -> binary().
+encode_u_type(Opcode, Rd, Imm) ->
+    RdNum = reg_to_num(Rd),
+    % Upper 20 bits of immediate
+    ImmUpper = (Imm bsr 12) band 16#FFFFF,
+    Instr = (ImmUpper bsl 12) bor (RdNum bsl 7) bor Opcode,
+    <<Instr:32/little>>.
+
+%%-----------------------------------------------------------------------------
+%% Jump and link instructions
+%%-----------------------------------------------------------------------------
+
+%% JAL - Jump and Link
+%% rd = pc + 4; pc += offset
+-spec jal(riscv_register(), integer()) -> binary().
+jal(zero, Offset) when Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0 ->
+    % Use c.j when rd is zero (no link) and offset fits
+    c_j(Offset);
+jal(ra, Offset) when Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0 ->
+    % Use c.jal when rd is ra and offset fits (RV32C only)
+    c_jal(Offset);
+jal(Rd, Offset) when
+    Offset >= -1048576, Offset =< 1048574, (Offset rem 2) =:= 0
+->
+    % Opcode: 1101111 (0x6F)
+    encode_j_type(16#6F, Rd, Offset);
+jal(_Rd, Offset) when (Offset rem 2) =/= 0 ->
+    error({offset_not_aligned, Offset, 2});
+jal(_Rd, Offset) ->
+    error({offset_out_of_range, Offset, -1048576, 1048574}).
+
+%% JALR - Jump and Link Register
+%% rd = pc + 4; pc = (rs1 + offset) & ~1
+-spec jalr(riscv_register(), riscv_register(), integer()) -> binary().
+jalr(zero, Rs1, 0) when Rs1 =/= zero ->
+    % Use c.jr for jump to register without link (rd=zero, offset=0)
+    c_jr(Rs1);
+jalr(ra, Rs1, 0) when Rs1 =/= zero ->
+    % Use c.jalr for jump to register with link (rd=ra, offset=0)
+    c_jalr(Rs1);
+jalr(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 ->
+    % Opcode: 1100111 (0x67), Funct3: 000
+    encode_i_type(16#67, Rd, 16#0, Rs1, Offset);
+jalr(_Rd, _Rs1, Offset) ->
+    error({offset_out_of_range, Offset, -2048, 2047}).
+
+%% JALR - Jump and Link Register (no offset)
+%% rd = pc + 4; pc = rs1 & ~1
+-spec jalr(riscv_register(), riscv_register()) -> binary().
+jalr(Rd, Rs1) ->
+    jalr(Rd, Rs1, 0).
+
+%%-----------------------------------------------------------------------------
+%% Upper immediate instructions
+%%-----------------------------------------------------------------------------
+
+%% LUI - Load Upper Immediate
+%% rd = imm << 12
+-spec lui(riscv_register(), integer()) -> binary().
+lui(Rd, Imm) when Rd =/= zero, Imm >= -32, Imm =< 31, Imm =/= 0 ->
+    % Use c.lui when rd != zero and imm fits in 6 bits (signed) and imm != 0
+    c_lui(Rd, Imm);
+lui(Rd, Imm) when Imm >= -16#80000, Imm =< 16#7FFFF ->
+    % Opcode: 0110111 (0x37)
+    encode_u_type(16#37, Rd, Imm bsl 12);
+lui(_Rd, Imm) ->
+    error({immediate_out_of_range, Imm, -16#80000, 16#7FFFF}).
+
+%% AUIPC - Add Upper Immediate to PC
+%% rd = pc + (imm << 12)
+-spec auipc(riscv_register(), integer()) -> binary().
+auipc(Rd, Imm) when Imm >= -16#80000, Imm =< 16#7FFFF ->
+    % Opcode: 0010111 (0x17)
+    encode_u_type(16#17, Rd, Imm bsl 12);
+auipc(_Rd, Imm) ->
+    error({immediate_out_of_range, Imm, -16#80000, 16#7FFFF}).
+
+%%-----------------------------------------------------------------------------
+%% Pseudo-instructions
+%%-----------------------------------------------------------------------------
+%% These are convenience instructions that map to actual RV32I instructions
+
+%% NOP - No Operation
+%% Expands to: addi x0, x0, 0
+-spec nop() -> binary().
+nop() ->
+    addi(zero, zero, 0).
+
+%% LI - Load Immediate
+%% Load a 32-bit immediate value into a register
+%% For small immediates (-2048 to 2047): addi rd, x0, imm
+%% For larger immediates: lui + addi sequence
+-spec li(riscv_register(), integer()) -> binary().
+li(Rd, Imm) when Rd =/= zero, Imm >= -32, Imm =< 31 ->
+    % Use c.li when rd != zero and imm fits in 6 bits (signed)
+    c_li(Rd, Imm);
+li(Rd, Imm) when Imm >= -2048, Imm =< 2047 ->
+    % Small immediate: addi rd, x0, imm
+    addi(Rd, zero, Imm);
+li(Rd, Imm) when Imm >= -16#80000000, Imm =< 16#7FFFFFFF ->
+    % Large immediate: lui + addi
+    % Split into upper 20 bits and lower 12 bits
+    % Need to account for sign extension of lower 12 bits
+    Lower = Imm band 16#FFF,
+    % If lower 12 bits has sign bit set, we need to add 1 to upper
+    UpperRaw =
+        if
+            Lower >= 16#800 ->
+                (Imm bsr 12) + 1;
+            true ->
+                Imm bsr 12
+        end,
+    % Mask to 20 bits first, then sign extend if needed
+    UpperMasked = UpperRaw band 16#FFFFF,
+    Upper =
+        if
+            UpperMasked band 16#80000 =/= 0 ->
+                % Bit 19 is set, so this is negative in 20-bit representation
+                % Sign extend from 20 bits
+                UpperMasked - 16#100000;
+            true ->
+                % Positive value
+                UpperMasked
+        end,
+    % Sign extend lower 12 bits
+    LowerSigned =
+        if
+            Lower >= 16#800 -> Lower - 16#1000;
+            true -> Lower
+        end,
+    LuiInstr = lui(Rd, Upper),
+    AddiInstr = addi(Rd, Rd, LowerSigned),
+    <<LuiInstr/binary, AddiInstr/binary>>;
+li(_Rd, Imm) ->
+    error({immediate_out_of_range, Imm, -16#80000000, 16#7FFFFFFF}).
+
+%% MV - Move (copy register)
+%% Expands to: addi rd, rs, 0 or c.mv rd, rs
+-spec mv(riscv_register(), riscv_register()) -> binary().
+mv(Rd, Rs) when Rd =/= zero, Rs =/= zero ->
+    % Use c.mv when both rd and rs are not zero
+    c_mv(Rd, Rs);
+mv(Rd, Rs) ->
+    addi(Rd, Rs, 0).
+
+%% NOT - Bitwise NOT
+%% Expands to: xori rd, rs, -1
+-spec not_(riscv_register(), riscv_register()) -> binary().
+not_(Rd, Rs) ->
+    xori(Rd, Rs, -1).
+
+%% NEG - Negate (two's complement)
+%% Expands to: sub rd, x0, rs
+-spec neg(riscv_register(), riscv_register()) -> binary().
+neg(Rd, Rs) ->
+    sub(Rd, zero, Rs).
+
+%% J - Unconditional Jump
+%% Expands to: jal x0, offset
+-spec j(integer()) -> binary().
+j(Offset) ->
+    jal(zero, Offset).
+
+%% JR - Jump Register
+%% Expands to: jalr x0, rs, 0
+-spec jr(riscv_register()) -> binary().
+jr(Rs) ->
+    jalr(zero, Rs, 0).
+
+%% RET - Return from subroutine
+%% Expands to: jalr x0, ra, 0
+-spec ret() -> binary().
+ret() ->
+    jalr(zero, ra, 0).
+
+%% CALL - Call function (far call using AUIPC + JALR)
+%% This is a two-instruction sequence for calling functions beyond JAL range
+%% Expands to: auipc ra, offset[31:12]; jalr ra, ra, offset[11:0]
+-spec call(riscv_register(), integer()) -> binary().
+call(Rd, Offset) when Offset >= -16#80000000, Offset =< 16#7FFFFFFF ->
+    % Split offset into upper 20 bits and lower 12 bits
+    Lower = Offset band 16#FFF,
+    % If lower 12 bits has sign bit set, we need to add 1 to upper
+    Upper =
+        if
+            Lower >= 16#800 ->
+                ((Offset bsr 12) + 1) band 16#FFFFF;
+            true ->
+                (Offset bsr 12) band 16#FFFFF
+        end,
+    % Sign extend lower 12 bits
+    LowerSigned =
+        if
+            Lower >= 16#800 -> Lower - 16#1000;
+            true -> Lower
+        end,
+    AuipcInstr = auipc(Rd, Upper),
+    JalrInstr = jalr(ra, Rd, LowerSigned),
+    <<AuipcInstr/binary, JalrInstr/binary>>;
+call(_Rd, Offset) ->
+    error({offset_out_of_range, Offset, -16#80000000, 16#7FFFFFFF}).
+
+%% MUL - Multiply (RV32M extension)
+%% Multiplies rs1 by rs2 and places the lower 32 bits in rd
+%% Format: mul rd, rs1, rs2
+%% Encoding: R-type with opcode=0x33, funct3=0x0, funct7=0x01
+-spec mul(riscv_register(), riscv_register(), riscv_register()) -> binary().
+mul(Rd, Rs1, Rs2) ->
+    % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0000001
+    encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#01).
+
+%%-----------------------------------------------------------------------------
+%% C Extension (RV32C) - Compressed Instructions
+%%-----------------------------------------------------------------------------
+%% The C extension adds 16-bit compressed instructions to reduce code size.
+%% All compressed instructions are 16 bits (2 bytes) and use a different
+%% encoding format from the base 32-bit instructions.
+%%
+%% Register encoding for compressed instructions:
+%% - Some instructions use the full 5-bit register encoding (x0-x31)
+%% - Others use 3-bit encoding for registers x8-x15 (s0, s1, a0-a5)
+%%   This is called the "compressed register set" or "C register set"
+%%
+%% Instruction formats:
+%% - CR (Register): funct4 | rd/rs1 | rs2 | op
+%% - CI (Immediate): funct3 | imm | rd/rs1 | imm | op
+%% - CSS (Stack Store): funct3 | imm | rs2 | op
+%% - CIW (Wide Immediate): funct3 | imm | rd' | op
+%% - CL (Load): funct3 | imm | rs1' | imm | rd' | op
+%% - CS (Store): funct3 | imm | rs1' | imm | rs2' | op
+%% - CA (Arithmetic): funct6 | rd'/rs1' | funct2 | rs2' | op
+%% - CB (Branch): funct3 | offset | rs1' | offset | op
+%% - CJ (Jump): funct3 | jump target | op
+%%
+%% See: RISC-V Instruction Set Manual, Volume I, Chapter 16
+%%-----------------------------------------------------------------------------
+
+%% Convert register to compressed register encoding (3 bits for x8-x15)
+%% Returns the 3-bit encoding (0-7 maps to x8-x15)
+-spec reg_to_c_num(riscv_register()) -> 0..7.
+reg_to_c_num(s0) -> 0;
+reg_to_c_num(fp) -> 0;
+reg_to_c_num(s1) -> 1;
+reg_to_c_num(a0) -> 2;
+reg_to_c_num(a1) -> 3;
+reg_to_c_num(a2) -> 4;
+reg_to_c_num(a3) -> 5;
+reg_to_c_num(a4) -> 6;
+reg_to_c_num(a5) -> 7;
+reg_to_c_num(Reg) -> error({register_not_in_compressed_set, Reg, 's0/fp, s1, a0-a5'}).
+
+%% Check if a register is in the compressed register set (s0/fp, s1, a0-a5)
+-spec is_compressed_reg(riscv_register()) -> boolean().
+is_compressed_reg(s0) -> true;
+is_compressed_reg(fp) -> true;
+is_compressed_reg(s1) -> true;
+is_compressed_reg(a0) -> true;
+is_compressed_reg(a1) -> true;
+is_compressed_reg(a2) -> true;
+is_compressed_reg(a3) -> true;
+is_compressed_reg(a4) -> true;
+is_compressed_reg(a5) -> true;
+is_compressed_reg(_) -> false.
+
+%%-----------------------------------------------------------------------------
+%% CR-type instruction encoding (Compressed Register format)
+%%-----------------------------------------------------------------------------
+%% CR format: funct4 (4) | rd/rs1 (5) | rs2 (5) | op (2)
+%% Bits:      15-12         11-7          6-2       1-0
+
+-spec encode_cr_type(
+    Opcode :: 0..3,
+    Rd :: riscv_register(),
+    Rs2 :: riscv_register(),
+    Funct4 :: 0..15
+) -> binary().
+encode_cr_type(Opcode, Rd, Rs2, Funct4) ->
+    RdNum = reg_to_num(Rd),
+    Rs2Num = reg_to_num(Rs2),
+    Instr =
+        (Funct4 bsl 12) bor
+            (RdNum bsl 7) bor
+            (Rs2Num bsl 2) bor
+            Opcode,
+    <<Instr:16/little>>.
+
+%%-----------------------------------------------------------------------------
+%% CI-type instruction encoding (Compressed Immediate format)
+%%-----------------------------------------------------------------------------
+%% CI format: funct3 (3) | imm[5] (1) | rd/rs1 (5) | imm[4:0] (5) | op (2)
+%% Bits:      15-13        12            11-7          6-2            1-0
+
+-spec encode_ci_type(
+    Opcode :: 0..3,
+    Rd :: riscv_register(),
+    Imm :: integer(),
+    Funct3 :: 0..7
+) -> binary().
+encode_ci_type(Opcode, Rd, Imm, Funct3) ->
+    RdNum = reg_to_num(Rd),
+    % Extract immediate bits
+    ImmMasked = Imm band 16#3F,
+    Imm5 = (ImmMasked bsr 5) band 1,
+    Imm4_0 = ImmMasked band 16#1F,
+    Instr =
+        (Funct3 bsl 13) bor
+            (Imm5 bsl 12) bor
+            (RdNum bsl 7) bor
+            (Imm4_0 bsl 2) bor
+            Opcode,
+    <<Instr:16/little>>.
+
+%%-----------------------------------------------------------------------------
+%% CSS-type instruction encoding (Compressed Stack Store format)
+%%-----------------------------------------------------------------------------
+%% CSS format: funct3 (3) | imm[5:0] (6) | rs2 (5) | op (2)
+%% Bits:       15-13        12-7           6-2       1-0
+
+-spec encode_css_type(
+    Opcode :: 0..3,
+    Rs2 :: riscv_register(),
+    Imm :: integer(),
+    Funct3 :: 0..7
+) -> binary().
+encode_css_type(Opcode, Rs2, Imm, Funct3) ->
+    Rs2Num = reg_to_num(Rs2),
+    % Extract immediate bits (typically scaled for word access)
+    ImmMasked = Imm band 16#3F,
+    Instr =
+        (Funct3 bsl 13) bor
+            (ImmMasked bsl 7) bor
+            (Rs2Num bsl 2) bor
+            Opcode,
+    <<Instr:16/little>>.
+
+%%-----------------------------------------------------------------------------
+%% CIW-type instruction encoding (Compressed Wide Immediate format)
+%%-----------------------------------------------------------------------------
+%% CIW format: funct3 (3) | imm[7:0] (8) | rd' (3) | op (2)
+%% Bits:       15-13        12-5           4-2       1-0
+
+-spec encode_ciw_type(
+    Opcode :: 0..3,
+    Rd :: riscv_register(),
+    Imm :: integer(),
+    Funct3 :: 0..7
+) -> binary().
+encode_ciw_type(Opcode, Rd, Imm, Funct3) ->
+    RdNum = reg_to_c_num(Rd),
+    ImmMasked = Imm band 16#FF,
+    Instr =
+        (Funct3 bsl 13) bor
+            (ImmMasked bsl 5) bor
+            (RdNum bsl 2) bor
+            Opcode,
+    <<Instr:16/little>>.
+
+%%-----------------------------------------------------------------------------
+%% CL-type instruction encoding (Compressed Load format)
+%%-----------------------------------------------------------------------------
+%% CL format: funct3 (3) | imm (3) | rs1' (3) | imm (2) | rd' (3) | op (2)
+%% Bits:      15-13        12-10     9-7        6-5       4-2       1-0
+
+-spec encode_cl_type(
+    Opcode :: 0..3,
+    Rd :: riscv_register(),
+    Rs1 :: riscv_register(),
+    Imm :: integer(),
+    Funct3 :: 0..7
+) -> binary().
+encode_cl_type(Opcode, Rd, Rs1, Imm, Funct3) ->
+    RdNum = reg_to_c_num(Rd),
+    Rs1Num = reg_to_c_num(Rs1),
+    % For LW: imm[5:3] goes to bits 12-10, imm[2] goes to bit 6, imm[6] goes to bit 5
+    ImmMasked = Imm band 16#7F,
+    Imm5_3 = (ImmMasked bsr 3) band 7,
+    Imm2 = (ImmMasked bsr 2) band 1,
+    Imm6 = (ImmMasked bsr 6) band 1,
+    Instr =
+        (Funct3 bsl 13) bor
+            (Imm5_3 bsl 10) bor
+            (Rs1Num bsl 7) bor
+            (Imm2 bsl 6) bor
+            (Imm6 bsl 5) bor
+            (RdNum bsl 2) bor
+            Opcode,
+    <<Instr:16/little>>.
+
+%%-----------------------------------------------------------------------------
+%% CS-type instruction encoding (Compressed Store format)
+%%-----------------------------------------------------------------------------
+%% CS format: funct3 (3) | imm (3) | rs1' (3) | imm (2) | rs2' (3) | op (2)
+%% Bits:      15-13        12-10     9-7        6-5       4-2        1-0
+
+-spec encode_cs_type(
+    Opcode :: 0..3,
+    Rs1 :: riscv_register(),
+    Rs2 :: riscv_register(),
+    Imm :: integer(),
+    Funct3 :: 0..7
+) -> binary().
+encode_cs_type(Opcode, Rs1, Rs2, Imm, Funct3) ->
+    Rs1Num = reg_to_c_num(Rs1),
+    Rs2Num = reg_to_c_num(Rs2),
+    % For SW: imm[5:3] goes to bits 12-10, imm[2] goes to bit 6, imm[6] goes to bit 5
+    ImmMasked = Imm band 16#7F,
+    Imm5_3 = (ImmMasked bsr 3) band 7,
+    Imm2 = (ImmMasked bsr 2) band 1,
+    Imm6 = (ImmMasked bsr 6) band 1,
+    Instr =
+        (Funct3 bsl 13) bor
+            (Imm5_3 bsl 10) bor
+            (Rs1Num bsl 7) bor
+            (Imm2 bsl 6) bor
+            (Imm6 bsl 5) bor
+            (Rs2Num bsl 2) bor
+            Opcode,
+    <<Instr:16/little>>.
+
+%%-----------------------------------------------------------------------------
+%% CA-type instruction encoding (Compressed Arithmetic format)
+%%-----------------------------------------------------------------------------
+%% CA format: funct6 (6) | rd'/rs1' (3) | funct2 (2) | rs2' (3) | op (2)
+%% Bits:      15-10        9-7             6-5          4-2        1-0
+
+-spec encode_ca_type(
+    Opcode :: 0..3,
+    Rd :: riscv_register(),
+    Rs2 :: riscv_register(),
+    Funct2 :: 0..3,
+    Funct6 :: 0..63
+) -> binary().
+encode_ca_type(Opcode, Rd, Rs2, Funct2, Funct6) ->
+    RdNum = reg_to_c_num(Rd),
+    Rs2Num = reg_to_c_num(Rs2),
+    Instr =
+        (Funct6 bsl 10) bor
+            (RdNum bsl 7) bor
+            (Funct2 bsl 5) bor
+            (Rs2Num bsl 2) bor
+            Opcode,
+    <<Instr:16/little>>.
+
+%%-----------------------------------------------------------------------------
+%% CB-type instruction encoding (Compressed Branch format)
+%%-----------------------------------------------------------------------------
+%% CB format: funct3 (3) | offset (8) | rs1' (3) | op (2)
+%% Bits:      15-13        12-5         4-2        1-0
+%% Offset encoding: offset[8|4:3|7:6|2:1|5] -> bits [12|11:10|6:5|4:3|2]
+
+-spec encode_cb_type(
+    Opcode :: 0..3,
+    Rs1 :: riscv_register(),
+    Offset :: integer(),
+    Funct3 :: 0..7
+) -> binary().
+encode_cb_type(Opcode, Rs1, Offset, Funct3) ->
+    Rs1Num = reg_to_c_num(Rs1),
+    % Extract offset bits: offset[8|4:3|7:6|2:1|5] -> bits [12|11:10|6:5|4:3|2]
+    OffsetMasked = Offset band 16#1FF,
+    Offset8 = (OffsetMasked bsr 8) band 1,
+    Offset4_3 = (OffsetMasked bsr 3) band 3,
+    Offset7_6 = (OffsetMasked bsr 6) band 3,
+    Offset2_1 = (OffsetMasked bsr 1) band 3,
+    Offset5 = (OffsetMasked bsr 5) band 1,
+    Instr =
+        (Funct3 bsl 13) bor
+            (Offset8 bsl 12) bor
+            (Offset4_3 bsl 10) bor
+            (Rs1Num bsl 7) bor
+            (Offset7_6 bsl 5) bor
+            (Offset2_1 bsl 3) bor
+            (Offset5 bsl 2) bor
+            Opcode,
+    <<Instr:16/little>>.
+
+%%-----------------------------------------------------------------------------
+%% CJ-type instruction encoding (Compressed Jump format)
+%%-----------------------------------------------------------------------------
+%% CJ format: funct3 (3) | jump target (11) | op (2)
+%% Bits:      15-13        12-2              1-0
+%% Target encoding: target[11|4|9:8|10|6|7|3:1|5] -> bits [12|11|10:9|8|7|6|5:3|2]
+
+-spec encode_cj_type(Opcode :: 0..3, Offset :: integer(), Funct3 :: 0..7) -> binary().
+encode_cj_type(Opcode, Offset, Funct3) ->
+    % Extract offset bits: offset[11|4|9:8|10|6|7|3:1|5]
+    OffsetMasked = Offset band 16#FFF,
+    Offset11 = (OffsetMasked bsr 11) band 1,
+    Offset4 = (OffsetMasked bsr 4) band 1,
+    Offset9_8 = (OffsetMasked bsr 8) band 3,
+    Offset10 = (OffsetMasked bsr 10) band 1,
+    Offset6 = (OffsetMasked bsr 6) band 1,
+    Offset7 = (OffsetMasked bsr 7) band 1,
+    Offset3_1 = (OffsetMasked bsr 1) band 7,
+    Offset5 = (OffsetMasked bsr 5) band 1,
+    OffsetBits =
+        (Offset11 bsl 10) bor
+            (Offset4 bsl 9) bor
+            (Offset9_8 bsl 7) bor
+            (Offset10 bsl 6) bor
+            (Offset6 bsl 5) bor
+            (Offset7 bsl 4) bor
+            (Offset3_1 bsl 1) bor
+            Offset5,
+    Instr =
+        (Funct3 bsl 13) bor
+            (OffsetBits bsl 2) bor
+            Opcode,
+    <<Instr:16/little>>.
+
+%%-----------------------------------------------------------------------------
+%% C Extension - Arithmetic and Logical Instructions
+%%-----------------------------------------------------------------------------
+
+%% C.ADD - Compressed Add
+%% rd = rd + rs2 (both rd and rs2 are full 5-bit registers)
+%% Format: CR-type
+%% Encoding: funct4=1001 (0x9), op=10 (0x2)
+-spec c_add(riscv_register(), riscv_register()) -> binary().
+c_add(Rd, Rs2) ->
+    encode_cr_type(16#2, Rd, Rs2, 16#9).
+
+%% C.MV - Compressed Move (copy register)
+%% rd = rs2 (both are full 5-bit registers)
+%% Format: CR-type
+%% Encoding: funct4=1000 (0x8), op=10 (0x2)
+-spec c_mv(riscv_register(), riscv_register()) -> binary().
+c_mv(Rd, Rs2) ->
+    encode_cr_type(16#2, Rd, Rs2, 16#8).
+
+%% C.SUB - Compressed Subtract
+%% rd' = rd' - rs2' (both use 3-bit compressed register encoding)
+%% Format: CA-type
+%% Encoding: funct6=100011 (0x23), funct2=00, op=01 (0x1)
+-spec c_sub(riscv_register(), riscv_register()) -> binary().
+c_sub(Rd, Rs2) ->
+    encode_ca_type(16#1, Rd, Rs2, 16#0, 16#23).
+
+%% C.AND - Compressed Bitwise AND
+%% rd' = rd' & rs2'
+%% Format: CA-type
+%% Encoding: funct6=100011 (0x23), funct2=11, op=01 (0x1)
+-spec c_and(riscv_register(), riscv_register()) -> binary().
+c_and(Rd, Rs2) ->
+    encode_ca_type(16#1, Rd, Rs2, 16#3, 16#23).
+
+%% C.OR - Compressed Bitwise OR
+%% rd' = rd' | rs2'
+%% Format: CA-type
+%% Encoding: funct6=100011 (0x23), funct2=10, op=01 (0x1)
+-spec c_or(riscv_register(), riscv_register()) -> binary().
+c_or(Rd, Rs2) ->
+    encode_ca_type(16#1, Rd, Rs2, 16#2, 16#23).
+
+%% C.XOR - Compressed Bitwise XOR
+%% rd' = rd' ^ rs2'
+%% Format: CA-type
+%% Encoding: funct6=100011 (0x23), funct2=01, op=01 (0x1)
+-spec c_xor(riscv_register(), riscv_register()) -> binary().
+c_xor(Rd, Rs2) ->
+    encode_ca_type(16#1, Rd, Rs2, 16#1, 16#23).
+
+%%-----------------------------------------------------------------------------
+%% C Extension - Immediate Instructions
+%%-----------------------------------------------------------------------------
+
+%% C.ADDI - Compressed Add Immediate
+%% rd = rd + imm (rd is full 5-bit register, imm is 6-bit signed)
+%% Format: CI-type
+%% Encoding: funct3=000, op=01 (0x1)
+-spec c_addi(riscv_register(), integer()) -> binary().
+c_addi(Rd, Imm) when Imm >= -32, Imm =< 31, Rd =/= zero ->
+    encode_ci_type(16#1, Rd, Imm, 16#0);
+c_addi(zero, _Imm) ->
+    error({invalid_compressed_instruction, c_addi, 'rd cannot be zero'});
+c_addi(_Rd, Imm) ->
+    error({immediate_out_of_range, Imm, -32, 31}).
+
+%% C.ANDI - Compressed AND Immediate
+%% rd' = rd' & imm (rd' uses 3-bit encoding, imm is 6-bit signed)
+%% Format: CB-type (with special encoding)
+%% Encoding: funct3=100, imm[5]=bit12, funct2=10, imm[4:0]=bits 6:2, op=01
+-spec c_andi(riscv_register(), integer()) -> binary().
+c_andi(Rd, Imm) when Imm >= -32, Imm =< 31 ->
+    RdNum = reg_to_c_num(Rd),
+    ImmMasked = Imm band 16#3F,
+    Imm5 = (ImmMasked bsr 5) band 1,
+    Imm4_0 = ImmMasked band 16#1F,
+    Instr =
+        (16#4 bsl 13) bor
+            (Imm5 bsl 12) bor
+            (16#2 bsl 10) bor
+            (RdNum bsl 7) bor
+            (Imm4_0 bsl 2) bor
+            16#1,
+    <<Instr:16/little>>;
+c_andi(_Rd, Imm) ->
+    error({immediate_out_of_range, Imm, -32, 31}).
+
+%% C.LI - Compressed Load Immediate
+%% rd = imm (rd is full 5-bit register, imm is 6-bit signed)
+%% Format: CI-type
+%% Encoding: funct3=010, op=01 (0x1)
+-spec c_li(riscv_register(), integer()) -> binary().
+c_li(Rd, Imm) when Imm >= -32, Imm =< 31 ->
+    encode_ci_type(16#1, Rd, Imm, 16#2);
+c_li(_Rd, Imm) ->
+    error({immediate_out_of_range, Imm, -32, 31}).
+
+%% C.LUI - Compressed Load Upper Immediate
+%% rd = imm << 12 (rd is full 5-bit register, imm is 6-bit signed non-zero)
+%% Format: CI-type
+%% Encoding: funct3=011, op=01 (0x1)
+-spec c_lui(riscv_register(), integer()) -> binary().
+c_lui(Rd, Imm) when Imm >= -32, Imm =< 31, Imm =/= 0, Rd =/= zero, Rd =/= sp ->
+    encode_ci_type(16#1, Rd, Imm, 16#3);
+c_lui(Rd, _Imm) when Rd =:= zero; Rd =:= sp ->
+    error({invalid_compressed_instruction, c_lui, 'rd cannot be zero or sp'});
+c_lui(_Rd, 0) ->
+    error({invalid_compressed_instruction, c_lui, 'immediate cannot be zero'});
+c_lui(_Rd, Imm) ->
+    error({immediate_out_of_range, Imm, -32, 31}).
+
+%% C.ADDI16SP - Compressed Add Immediate to SP (scaled by 16)
+%% sp = sp + imm (imm is 10-bit signed, must be multiple of 16, non-zero)
+%% Format: CI-type (special encoding)
+%% Encoding: funct3=011, rd/rs1=sp (x2), op=01
+-spec c_addi16sp(integer()) -> binary().
+c_addi16sp(Imm) when
+    Imm >= -512, Imm =< 496, (Imm rem 16) =:= 0, Imm =/= 0
+->
+    % Immediate encoding: nzimm[9|4|6|8:7|5] -> bits [12|6|5|4:3|2]
+    ImmMasked = Imm band 16#3FF,
+    Imm9 = (ImmMasked bsr 9) band 1,
+    Imm4 = (ImmMasked bsr 4) band 1,
+    Imm6 = (ImmMasked bsr 6) band 1,
+    Imm8_7 = (ImmMasked bsr 7) band 3,
+    Imm5 = (ImmMasked bsr 5) band 1,
+    ImmBits = (Imm9 bsl 5) bor (Imm4 bsl 4) bor (Imm6 bsl 3) bor (Imm8_7 bsl 1) bor Imm5,
+    encode_ci_type(16#1, sp, ImmBits, 16#3);
+c_addi16sp(0) ->
+    error({invalid_compressed_instruction, c_addi16sp, 'immediate cannot be zero'});
+c_addi16sp(Imm) when (Imm rem 16) =/= 0 ->
+    error({immediate_not_aligned, Imm, 16});
+c_addi16sp(Imm) ->
+    error({immediate_out_of_range, Imm, -512, 496}).
+
+%% C.ADDI4SPN - Compressed Add Immediate (scaled by 4) to SP, store in rd'
+%% rd' = sp + imm (imm is 10-bit unsigned, must be multiple of 4, non-zero)
+%% Format: CIW-type
+%% Encoding: funct3=000, op=00 (0x0)
+-spec c_addi4spn(riscv_register(), integer()) -> binary().
+c_addi4spn(Rd, Imm) when
+    Imm >= 4, Imm =< 1020, (Imm rem 4) =:= 0
+->
+    % Immediate encoding: nzuimm[5:4|9:6|2|3] -> bits [12:11|10:7|6|5]
+    ImmMasked = Imm band 16#3FF,
+    Imm5_4 = (ImmMasked bsr 4) band 3,
+    Imm9_6 = (ImmMasked bsr 6) band 15,
+    Imm2 = (ImmMasked bsr 2) band 1,
+    Imm3 = (ImmMasked bsr 3) band 1,
+    ImmBits = (Imm5_4 bsl 6) bor (Imm9_6 bsl 2) bor (Imm2 bsl 1) bor Imm3,
+    encode_ciw_type(16#0, Rd, ImmBits, 16#0);
+c_addi4spn(_Rd, Imm) when Imm =:= 0 ->
+    error({invalid_compressed_instruction, c_addi4spn, 'immediate cannot be zero'});
+c_addi4spn(_Rd, Imm) when (Imm rem 4) =/= 0 ->
+    error({immediate_not_aligned, Imm, 4});
+c_addi4spn(_Rd, Imm) ->
+    error({immediate_out_of_range, Imm, 4, 1020}).
+
+%%-----------------------------------------------------------------------------
+%% C Extension - Shift Instructions
+%%-----------------------------------------------------------------------------
+
+%% C.SLLI - Compressed Shift Left Logical Immediate
+%% rd = rd << shamt (rd is full 5-bit register, shamt is 6-bit unsigned)
+%% Format: CI-type
+%% Encoding: funct3=000, op=10 (0x2)
+-spec c_slli(riscv_register(), 0..63) -> binary().
+c_slli(Rd, Shamt) when Shamt >= 0, Shamt =< 63, Rd =/= zero ->
+    encode_ci_type(16#2, Rd, Shamt, 16#0);
+c_slli(zero, _Shamt) ->
+    error({invalid_compressed_instruction, c_slli, 'rd cannot be zero'});
+c_slli(_Rd, Shamt) ->
+    error({shift_amount_out_of_range, Shamt, 0, 63}).
+
+%% C.SRLI - Compressed Shift Right Logical Immediate
+%% rd' = rd' >> shamt (rd' uses 3-bit encoding, shamt is 6-bit unsigned)
+%% Format: CB-type (with special encoding)
+%% Encoding: funct3=100, shamt[5]=bit12, funct2=00, shamt[4:0]=bits 6:2, op=01
+-spec c_srli(riscv_register(), 0..63) -> binary().
+c_srli(Rd, Shamt) when Shamt >= 0, Shamt =< 63 ->
+    RdNum = reg_to_c_num(Rd),
+    Shamt5 = (Shamt bsr 5) band 1,
+    Shamt4_0 = Shamt band 16#1F,
+    Instr =
+        (16#4 bsl 13) bor
+            (Shamt5 bsl 12) bor
+            (16#0 bsl 10) bor
+            (RdNum bsl 7) bor
+            (Shamt4_0 bsl 2) bor
+            16#1,
+    <<Instr:16/little>>;
+c_srli(_Rd, Shamt) ->
+    error({shift_amount_out_of_range, Shamt, 0, 63}).
+
+%% C.SRAI - Compressed Shift Right Arithmetic Immediate
+%% rd' = rd' >> shamt (sign-extend, rd' uses 3-bit encoding, shamt is 6-bit unsigned)
+%% Format: CB-type (with special encoding)
+%% Encoding: funct3=100, shamt[5]=bit12, funct2=01, shamt[4:0]=bits 6:2, op=01
+-spec c_srai(riscv_register(), 0..63) -> binary().
+c_srai(Rd, Shamt) when Shamt >= 0, Shamt =< 63 ->
+    RdNum = reg_to_c_num(Rd),
+    Shamt5 = (Shamt bsr 5) band 1,
+    Shamt4_0 = Shamt band 16#1F,
+    Instr =
+        (16#4 bsl 13) bor
+            (Shamt5 bsl 12) bor
+            (16#1 bsl 10) bor
+            (RdNum bsl 7) bor
+            (Shamt4_0 bsl 2) bor
+            16#1,
+    <<Instr:16/little>>;
+c_srai(_Rd, Shamt) ->
+    error({shift_amount_out_of_range, Shamt, 0, 63}).
+
+%%-----------------------------------------------------------------------------
+%% C Extension - Load/Store Instructions
+%%-----------------------------------------------------------------------------
+
+%% C.LW - Compressed Load Word
+%% rd' = mem[rs1' + offset] (both use 3-bit encoding, offset is 7-bit unsigned, multiple of 4)
+%% Format: CL-type
+%% Encoding: funct3=010, op=00 (0x0)
+-spec c_lw(riscv_register(), {riscv_register(), integer()}) -> binary().
+c_lw(Rd, {Rs1, Offset}) when
+    Offset >= 0, Offset =< 124, (Offset rem 4) =:= 0
+->
+    encode_cl_type(16#0, Rd, Rs1, Offset, 16#2);
+c_lw(_Rd, {_Rs1, Offset}) when (Offset rem 4) =/= 0 ->
+    error({offset_not_aligned, Offset, 4});
+c_lw(_Rd, {_Rs1, Offset}) ->
+    error({offset_out_of_range, Offset, 0, 124}).
+
+%% C.SW - Compressed Store Word
+%% mem[rs1' + offset] = rs2' (both use 3-bit encoding, offset is 7-bit unsigned, multiple of 4)
+%% Format: CS-type
+%% Encoding: funct3=110, op=00 (0x0)
+-spec c_sw(riscv_register(), {riscv_register(), integer()}) -> binary().
+c_sw(Rs2, {Rs1, Offset}) when
+    Offset >= 0, Offset =< 124, (Offset rem 4) =:= 0
+->
+    encode_cs_type(16#0, Rs1, Rs2, Offset, 16#6);
+c_sw(_Rs2, {_Rs1, Offset}) when (Offset rem 4) =/= 0 ->
+    error({offset_not_aligned, Offset, 4});
+c_sw(_Rs2, {_Rs1, Offset}) ->
+    error({offset_out_of_range, Offset, 0, 124}).
+
+%% C.LWSP - Compressed Load Word from Stack Pointer
+%% rd = mem[sp + offset] (rd is full 5-bit register, offset is 8-bit unsigned, multiple of 4)
+%% Format: CI-type (special encoding)
+%% Encoding: funct3=010, op=10 (0x2)
+-spec c_lwsp(riscv_register(), integer()) -> binary().
+c_lwsp(Rd, Offset) when
+    Offset >= 0, Offset =< 252, (Offset rem 4) =:= 0, Rd =/= zero
+->
+    % Offset encoding: offset[5|4:2|7:6] -> bits [12|6:4|3:2]
+    OffsetMasked = Offset band 16#FF,
+    Offset5 = (OffsetMasked bsr 5) band 1,
+    Offset4_2 = (OffsetMasked bsr 2) band 7,
+    Offset7_6 = (OffsetMasked bsr 6) band 3,
+    ImmBits = (Offset5 bsl 5) bor (Offset4_2 bsl 2) bor Offset7_6,
+    encode_ci_type(16#2, Rd, ImmBits, 16#2);
+c_lwsp(zero, _Offset) ->
+    error({invalid_compressed_instruction, c_lwsp, 'rd cannot be zero'});
+c_lwsp(_Rd, Offset) when (Offset rem 4) =/= 0 ->
+    error({offset_not_aligned, Offset, 4});
+c_lwsp(_Rd, Offset) ->
+    error({offset_out_of_range, Offset, 0, 252}).
+
+%% C.SWSP - Compressed Store Word to Stack Pointer
+%% mem[sp + offset] = rs2 (rs2 is full 5-bit register, offset is 8-bit unsigned, multiple of 4)
+%% Format: CSS-type
+%% Encoding: funct3=110, op=10 (0x2)
+-spec c_swsp(riscv_register(), integer()) -> binary().
+c_swsp(Rs2, Offset) when
+    Offset >= 0, Offset =< 252, (Offset rem 4) =:= 0
+->
+    % Offset encoding: offset[5:2|7:6] -> bits [12:9|8:7]
+    OffsetMasked = Offset band 16#FF,
+    Offset5_2 = (OffsetMasked bsr 2) band 15,
+    Offset7_6 = (OffsetMasked bsr 6) band 3,
+    ImmBits = (Offset5_2 bsl 2) bor Offset7_6,
+    encode_css_type(16#2, Rs2, ImmBits, 16#6);
+c_swsp(_Rs2, Offset) when (Offset rem 4) =/= 0 ->
+    error({offset_not_aligned, Offset, 4});
+c_swsp(_Rs2, Offset) ->
+    error({offset_out_of_range, Offset, 0, 252}).
+
+%%-----------------------------------------------------------------------------
+%% C Extension - Branch and Jump Instructions
+%%-----------------------------------------------------------------------------
+
+%% C.BEQZ - Compressed Branch if Equal to Zero
+%% if (rs1' == 0) pc += offset (rs1' uses 3-bit encoding, offset is 9-bit signed, multiple of 2)
+%% Format: CB-type
+%% Encoding: funct3=110, op=01 (0x1)
+-spec c_beqz(riscv_register(), integer()) -> binary().
+c_beqz(Rs1, Offset) when
+    Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0
+->
+    encode_cb_type(16#1, Rs1, Offset, 16#6);
+c_beqz(_Rs1, Offset) when (Offset rem 2) =/= 0 ->
+    error({offset_not_aligned, Offset, 2});
+c_beqz(_Rs1, Offset) ->
+    error({offset_out_of_range, Offset, -256, 254}).
+
+%% C.BNEZ - Compressed Branch if Not Equal to Zero
+%% if (rs1' != 0) pc += offset (rs1' uses 3-bit encoding, offset is 9-bit signed, multiple of 2)
+%% Format: CB-type
+%% Encoding: funct3=111, op=01 (0x1)
+-spec c_bnez(riscv_register(), integer()) -> binary().
+c_bnez(Rs1, Offset) when
+    Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0
+->
+    encode_cb_type(16#1, Rs1, Offset, 16#7);
+c_bnez(_Rs1, Offset) when (Offset rem 2) =/= 0 ->
+    error({offset_not_aligned, Offset, 2});
+c_bnez(_Rs1, Offset) ->
+    error({offset_out_of_range, Offset, -256, 254}).
+
+%% C.J - Compressed Unconditional Jump
+%% pc += offset (offset is 12-bit signed, multiple of 2)
+%% Format: CJ-type
+%% Encoding: funct3=101, op=01 (0x1)
+-spec c_j(integer()) -> binary().
+c_j(Offset) when
+    Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0
+->
+    encode_cj_type(16#1, Offset, 16#5);
+c_j(Offset) when (Offset rem 2) =/= 0 ->
+    error({offset_not_aligned, Offset, 2});
+c_j(Offset) ->
+    error({offset_out_of_range, Offset, -2048, 2046}).
+
+%% C.JAL - Compressed Jump and Link (RV32C only, rd is implicitly ra)
+%% ra = pc + 2; pc += offset (offset is 12-bit signed, multiple of 2)
+%% Format: CJ-type
+%% Encoding: funct3=001 (0x1), op=01 (0x1)
+-spec c_jal(integer()) -> binary().
+c_jal(Offset) when
+    Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0
+->
+    encode_cj_type(16#1, Offset, 16#1);
+c_jal(Offset) when (Offset rem 2) =/= 0 ->
+    error({offset_not_aligned, Offset, 2});
+c_jal(Offset) ->
+    error({offset_out_of_range, Offset, -2048, 2046}).
+
+%% C.JR - Compressed Jump Register
+%% pc = rs1 (rs1 is full 5-bit register, must not be zero)
+%% Format: CR-type
+%% Encoding: funct4=1000 (0x8), rs2=x0, op=10 (0x2)
+-spec c_jr(riscv_register()) -> binary().
+c_jr(Rs1) when Rs1 =/= zero ->
+    encode_cr_type(16#2, Rs1, zero, 16#8);
+c_jr(zero) ->
+    error({invalid_compressed_instruction, c_jr, 'rs1 cannot be zero'}).
+
+%% C.JALR - Compressed Jump and Link Register
+%% ra = pc + 2; pc = rs1 (rs1 is full 5-bit register, must not be zero)
+%% Format: CR-type
+%% Encoding: funct4=1001 (0x9), rs2=x0, op=10 (0x2)
+-spec c_jalr(riscv_register()) -> binary().
+c_jalr(Rs1) when Rs1 =/= zero ->
+    encode_cr_type(16#2, Rs1, zero, 16#9);
+c_jalr(zero) ->
+    error({invalid_compressed_instruction, c_jalr, 'rs1 cannot be zero'}).
+
+%% C.EBREAK - Compressed Environment Breakpoint
+%% Causes a breakpoint exception to be raised
+%% Format: CR-type
+%% Encoding: funct4=1001 (0x9), rs1/rd=x0, rs2=x0, op=10 (0x2)
+-spec c_ebreak() -> binary().
+c_ebreak() ->
+    encode_cr_type(16#2, zero, zero, 16#9).
+
+%%-----------------------------------------------------------------------------
+%% C Extension - Pseudo-instructions
+%%-----------------------------------------------------------------------------
+
+%% C.NOP - Compressed No Operation
+%% Expands to: c.addi x0, 0
+%% Format: CI-type
+%% Encoding: funct3=000, rd/rs1=x0, imm=0, op=01 (0x1)
+-spec c_nop() -> binary().
+c_nop() ->
+    encode_ci_type(16#1, zero, 0, 16#0).
diff --git a/libs/jit/src/jit_stream_binary.erl b/libs/jit/src/jit_stream_binary.erl
index db433c12ea..26e32bafa0 100644
--- a/libs/jit/src/jit_stream_binary.erl
+++ b/libs/jit/src/jit_stream_binary.erl
@@ -27,7 +27,8 @@
     offset/1,
     append/2,
     replace/3,
-    map/4
+    map/4,
+    flush/1
 ]).
 
 -export_type([stream/0]).
@@ -93,3 +94,14 @@ map(Stream, Offset, Length, MapFunction) ->
     {Prefix, <<Previous:Length/binary, Suffix/binary>>} = split_binary(Stream, Offset),
     Replacement = MapFunction(Previous),
     <<Prefix/binary, Replacement/binary, Suffix/binary>>.
+
+%%-----------------------------------------------------------------------------
+%% @param Stream        stream to flush
+%% @returns The stream flushed
+%% @doc     Flush the stream. NOP with binaries.
+%%
+%% @end
+%%-----------------------------------------------------------------------------
+-spec flush(stream()) -> stream().
+flush(Stream) ->
+    Stream.
diff --git a/libs/jit/src/jit_stream_mmap.erl b/libs/jit/src/jit_stream_mmap.erl
index d8129f9a41..4429146c4e 100644
--- a/libs/jit/src/jit_stream_mmap.erl
+++ b/libs/jit/src/jit_stream_mmap.erl
@@ -27,7 +27,8 @@
     offset/1,
     append/2,
     replace/3,
-    map/4
+    map/4,
+    flush/1
 ]).
 
 %% Additional nif
@@ -109,3 +110,14 @@ map(Stream, Offset, Length, MapFunction) ->
 -spec read(stream(), non_neg_integer(), pos_integer()) -> binary().
 read(_Stream, _Offset, _Length) ->
     erlang:nif_error(undefined).
+
+%%-----------------------------------------------------------------------------
+%% @param Stream        stream to flush
+%% @returns The stream flushed
+%% @doc     Flush the stream. Typically invalidates instruction cache.
+%%
+%% @end
+%%-----------------------------------------------------------------------------
+-spec flush(stream()) -> stream().
+flush(_Stream) ->
+    erlang:nif_error(undefined).
diff --git a/libs/jit/src/jit_x86_64.erl b/libs/jit/src/jit_x86_64.erl
index df8e7cf1d6..8344b9c023 100644
--- a/libs/jit/src/jit_x86_64.erl
+++ b/libs/jit/src/jit_x86_64.erl
@@ -25,6 +25,7 @@
     new/3,
     stream/1,
     offset/1,
+    flush/1,
     debugger/1,
     used_regs/1,
     available_regs/1,
@@ -38,6 +39,7 @@
     return_if_not_equal_to_ctx/2,
     jump_to_label/2,
     jump_to_continuation/2,
+    jump_to_offset/2,
     if_block/3,
     if_else_block/4,
     shift_right/3,
@@ -113,6 +115,7 @@
     stream :: stream(),
     offset :: non_neg_integer(),
     branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}],
+    jump_table_start :: non_neg_integer(),
     available_regs :: [x86_64_register()],
     used_regs :: [x86_64_register()],
     labels :: [{integer() | reference(), integer()}],
@@ -134,6 +137,7 @@
 -type condition() ::
     {x86_64_register(), '<', integer()}
     | {maybe_free_x86_64_register(), '<', x86_64_register()}
+    | {integer(), '<', maybe_free_x86_64_register()}
     | {maybe_free_x86_64_register(), '==', integer()}
     | {maybe_free_x86_64_register(), '!=', x86_64_register() | integer()}
     | {'(int)', maybe_free_x86_64_register(), '==', integer()}
@@ -216,6 +220,7 @@ new(Variant, StreamModule, Stream) ->
         stream_module = StreamModule,
         stream = Stream,
         branches = [],
+        jump_table_start = 0,
         offset = StreamModule:offset(Stream),
         available_regs = ?AVAILABLE_REGS,
         used_regs = [],
@@ -243,6 +248,16 @@ stream(#state{stream = Stream}) ->
 offset(#state{stream_module = StreamModule, stream = Stream}) ->
     StreamModule:offset(Stream).
 
+%%-----------------------------------------------------------------------------
+%% @doc Flush the current state (unused on x86-64)
+%% @end
+%% @param State current backend state
+%% @return The flushed state
+%%-----------------------------------------------------------------------------
+-spec flush(state()) -> state().
+flush(#state{} = State) ->
+    State.
+
 %%-----------------------------------------------------------------------------
 %% @doc Emit a debugger of breakpoint instruction. This is used for debugging
 %% and not in production.
@@ -328,21 +343,73 @@ assert_all_native_free(State) ->
 %% @return Updated backend state
 %%-----------------------------------------------------------------------------
 -spec jump_table(state(), pos_integer()) -> state().
-jump_table(State, LabelsCount) ->
-    jump_table0(State, 0, LabelsCount).
+jump_table(#state{stream_module = StreamModule, stream = Stream0} = State, LabelsCount) ->
+    JumpTableStart = StreamModule:offset(Stream0),
+    jump_table0(State#state{jump_table_start = JumpTableStart}, 0, LabelsCount).
 
 jump_table0(State, N, LabelsCount) when N > LabelsCount ->
     State;
 jump_table0(
-    #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State,
+    #state{stream_module = StreamModule, stream = Stream0} = State,
     N,
     LabelsCount
 ) ->
-    Offset = StreamModule:offset(Stream0),
-    {RelocOffset, I1} = jit_x86_64_asm:jmp_rel32(1),
-    Reloc = {N, Offset + RelocOffset, 32},
+    % Placeholder, encodes with 0xffffffff
+    {_RelocOffset, I1} = jit_x86_64_asm:jmp_rel32(4),
     Stream1 = StreamModule:append(Stream0, I1),
-    jump_table0(State#state{stream = Stream1, branches = [Reloc | Branches]}, N + 1, LabelsCount).
+    jump_table0(State#state{stream = Stream1}, N + 1, LabelsCount).
+
+%%-----------------------------------------------------------------------------
+%% @doc Patch a single branch in the stream
+%% @end
+%% @param StreamModule stream module
+%% @param Stream stream state
+%% @param Offset offset of the branch to patch
+%% @param Size size of the branch in bits
+%% @param LabelOffset target label offset
+%% @return Updated stream
+%%-----------------------------------------------------------------------------
+-spec patch_branch(module(), stream(), non_neg_integer(), non_neg_integer(), non_neg_integer()) ->
+    stream().
+patch_branch(StreamModule, Stream, Offset, Size, LabelOffset) ->
+    StreamModule:map(Stream, Offset, Size div 8, fun(<<Delta:Size/signed-little>>) ->
+        <<(Delta + LabelOffset - Offset):Size/little>>
+    end).
+
+%%-----------------------------------------------------------------------------
+%% @doc Patch all branches targeting a specific label and return remaining branches
+%% @end
+%% @param StreamModule stream module
+%% @param Stream stream state
+%% @param TargetLabel label to patch branches for
+%% @param LabelOffset offset of the target label
+%% @param Branches list of pending branches
+%% @return {UpdatedStream, RemainingBranches}
+%%-----------------------------------------------------------------------------
+-spec patch_branches_for_label(
+    module(),
+    stream(),
+    integer(),
+    non_neg_integer(),
+    [{integer(), non_neg_integer(), non_neg_integer()}]
+) -> {stream(), [{integer(), non_neg_integer(), non_neg_integer()}]}.
+patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches) ->
+    patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches, []).
+
+patch_branches_for_label(_StreamModule, Stream, _TargetLabel, _LabelOffset, [], Acc) ->
+    {Stream, lists:reverse(Acc)};
+patch_branches_for_label(
+    StreamModule,
+    Stream0,
+    TargetLabel,
+    LabelOffset,
+    [{Label, Offset, Size} | Rest],
+    Acc
+) when Label =:= TargetLabel ->
+    Stream1 = patch_branch(StreamModule, Stream0, Offset, Size, LabelOffset),
+    patch_branches_for_label(StreamModule, Stream1, TargetLabel, LabelOffset, Rest, Acc);
+patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, [Branch | Rest], Acc) ->
+    patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Rest, [Branch | Acc]).
 
 %%-----------------------------------------------------------------------------
 %% @doc Rewrite stream to update all branches for labels.
@@ -362,9 +429,7 @@ update_branches(
     } = State
 ) ->
     {Label, LabelOffset} = lists:keyfind(Label, 1, Labels),
-    Stream1 = StreamModule:map(Stream0, Offset, Size div 8, fun(<<Delta:Size/signed-little>>) ->
-        <<(Delta + LabelOffset - Offset):Size/little>>
-    end),
+    Stream1 = patch_branch(StreamModule, Stream0, Offset, Size, LabelOffset),
     update_branches(State#state{stream = Stream1, branches = BranchesT}).
 
 %%-----------------------------------------------------------------------------
@@ -524,6 +589,13 @@ jump_to_label(
             State#state{stream = Stream1, branches = [Reloc | AccBranches]}
     end.
 
+jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) ->
+    Offset = StreamModule:offset(Stream0),
+    RelOffset = TargetOffset - Offset,
+    I1 = jit_x86_64_asm:jmp(RelOffset),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1}.
+
 %%-----------------------------------------------------------------------------
 %% @doc Jump to a continuation address stored in a register.
 %% This is used for optimized intra-module returns.
@@ -668,11 +740,41 @@ if_block_cond(#state{stream_module = StreamModule} = State0, Cond) ->
     {State2, ReplaceDelta}.
 
 -spec if_block_cond0(state(), condition()) -> {state(), binary(), non_neg_integer()}.
-if_block_cond0(State0, {Reg, '<', 0}) when is_atom(Reg) ->
+if_block_cond0(State0, {RegOrTuple, '<', 0}) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
     I1 = jit_x86_64_asm:testq(Reg, Reg),
     {RelocJGEOffset, I2} = jit_x86_64_asm:jge_rel8(1),
-    {State0, <<I1/binary, I2/binary>>, byte_size(I1) + RelocJGEOffset};
-if_block_cond0(State0, {RegOrTuple, '<', Value}) ->
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    {State1, <<I1/binary, I2/binary>>, byte_size(I1) + RelocJGEOffset};
+% Handle {Value, '<', Reg} - means Value < Reg, jump if false (i.e., if Value >= Reg or Reg <= Value)
+if_block_cond0(State0, {Value, '<', RegOrTuple}) when ?IS_SINT32_T(Value) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    I1 = jit_x86_64_asm:cmpq(Value, Reg),
+    {RelocJLEOffset, I2} = jit_x86_64_asm:jle_rel8(1),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    {State1, <<I1/binary, I2/binary>>, byte_size(I1) + RelocJLEOffset};
+% Catch-all for large values outside SINT32_T range
+if_block_cond0(State0, {Value, '<', RegOrTuple}) when is_integer(Value) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    % Load large value into a temporary register
+    I1 = jit_x86_64_asm:movabsq(Value, r11),
+    I2 = jit_x86_64_asm:cmpq(r11, Reg),
+    {RelocJLEOffset, I3} = jit_x86_64_asm:jle_rel8(1),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    {State1, <<I1/binary, I2/binary, I3/binary>>, byte_size(I1) + byte_size(I2) + RelocJLEOffset};
+if_block_cond0(State0, {RegOrTuple, '<', Value}) when ?IS_SINT32_T(Value) ->
     Reg =
         case RegOrTuple of
             {free, Reg0} -> Reg0;
@@ -682,6 +784,29 @@ if_block_cond0(State0, {RegOrTuple, '<', Value}) ->
     {RelocJGEOffset, I2} = jit_x86_64_asm:jge_rel8(1),
     State1 = if_block_free_reg(RegOrTuple, State0),
     {State1, <<I1/binary, I2/binary>>, byte_size(I1) + RelocJGEOffset};
+if_block_cond0(State0, {RegOrTuple, '<', RegB}) when is_atom(RegB) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    I1 = jit_x86_64_asm:cmpq(RegB, Reg),
+    {RelocJGEOffset, I2} = jit_x86_64_asm:jge_rel8(1),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    {State1, <<I1/binary, I2/binary>>, byte_size(I1) + RelocJGEOffset};
+% Catch-all for large values outside SINT32_T range
+if_block_cond0(State0, {RegOrTuple, '<', Value}) when is_integer(Value) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    % Load large value into a temporary register
+    I1 = jit_x86_64_asm:movabsq(Value, r11),
+    I2 = jit_x86_64_asm:cmpq(r11, Reg),
+    {RelocJGEOffset, I3} = jit_x86_64_asm:jge_rel8(1),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    {State1, <<I1/binary, I2/binary, I3/binary>>, byte_size(I1) + byte_size(I2) + RelocJGEOffset};
 if_block_cond0(State0, {RegOrTuple, '==', 0}) ->
     Reg =
         case RegOrTuple of
@@ -1774,17 +1899,31 @@ set_continuation_to_label(
         stream_module = StreamModule,
         stream = Stream0,
         available_regs = [Temp | _],
-        branches = Branches
+        branches = Branches,
+        labels = Labels
     } = State,
     Label
 ) ->
     Offset = StreamModule:offset(Stream0),
-    {RewriteLEAOffset, I1} = jit_x86_64_asm:leaq_rel32({-4, rip}, Temp),
-    Reloc = {Label, Offset + RewriteLEAOffset, 32},
-    I2 = jit_x86_64_asm:movq(Temp, ?JITSTATE_CONTINUATION),
-    Code = <<I1/binary, I2/binary>>,
-    Stream1 = StreamModule:append(Stream0, Code),
-    State#state{stream = Stream1, branches = [Reloc | Branches]}.
+    case lists:keyfind(Label, 1, Labels) of
+        {Label, LabelOffset} ->
+            % Label is already known, emit direct leaq without relocation
+            % leaq instruction is 7 bytes, RIP points to next instruction
+            RelOffset = LabelOffset - (Offset + 7),
+            I1 = jit_x86_64_asm:leaq({rip, RelOffset}, Temp),
+            I2 = jit_x86_64_asm:movq(Temp, ?JITSTATE_CONTINUATION),
+            Code = <<I1/binary, I2/binary>>,
+            Stream1 = StreamModule:append(Stream0, Code),
+            State#state{stream = Stream1};
+        false ->
+            % Label not yet known, emit placeholder and add relocation
+            {RewriteLEAOffset, I1} = jit_x86_64_asm:leaq_rel32({-4, rip}, Temp),
+            Reloc = {Label, Offset + RewriteLEAOffset, 32},
+            I2 = jit_x86_64_asm:movq(Temp, ?JITSTATE_CONTINUATION),
+            Code = <<I1/binary, I2/binary>>,
+            Stream1 = StreamModule:append(Stream0, Code),
+            State#state{stream = Stream1, branches = [Reloc | Branches]}
+    end.
 
 set_continuation_to_offset(
     #state{
@@ -1826,7 +1965,9 @@ get_module_index(
         Reg
     }.
 
-and_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) ->
+and_(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Val) when
+    ?IS_GPR(Reg)
+->
     % 32 bits instructions on x86-64 zero the high 32 bits
     I1 =
         if
@@ -1834,7 +1975,28 @@ and_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -
             true -> jit_x86_64_asm:andq(Val, Reg)
         end,
     Stream1 = StreamModule:append(Stream0, I1),
-    State#state{stream = Stream1}.
+    {State#state{stream = Stream1}, Reg};
+and_(
+    #state{
+        stream_module = StreamModule,
+        available_regs = [ResultReg | T],
+        used_regs = UR,
+        stream = Stream0
+    } = State,
+    Reg,
+    Val
+) when
+    ?IS_GPR(Reg)
+->
+    I1 = jit_x86_64_asm:movq(Reg, ResultReg),
+    I2 =
+        if
+            Val >= 0, Val =< 16#FFFFFFFF -> jit_x86_64_asm:andl(Val, ResultReg);
+            true -> jit_x86_64_asm:andq(Val, ResultReg)
+        end,
+    Stream1 = StreamModule:append(Stream0, I1),
+    Stream2 = StreamModule:append(Stream1, I2),
+    {State#state{stream = Stream2, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}.
 
 or_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) ->
     I1 = jit_x86_64_asm:orq(Val, Reg),
@@ -1846,6 +2008,22 @@ add(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) ->
     Stream1 = StreamModule:append(Stream0, I1),
     State#state{stream = Stream1}.
 
+sub(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [TempReg | _]
+    } = State,
+    Reg,
+    Val
+) when is_integer(Val), Val < -16#80000000 orelse Val > 16#7FFFFFFF ->
+    % Immediate too large for 32-bit, load into temporary register
+    I1 = jit_x86_64_asm:movabsq(Val, TempReg),
+    I2 = jit_x86_64_asm:subq(TempReg, Reg),
+    Stream1 = StreamModule:append(Stream0, I1),
+    Stream2 = StreamModule:append(Stream1, I2),
+    % Free temporary register immediately
+    State#state{stream = Stream2};
 sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) ->
     I1 = jit_x86_64_asm:subq(Val, Reg),
     Stream1 = StreamModule:append(Stream0, I1),
@@ -1906,19 +2084,38 @@ call_only_or_schedule_next(
     #state{
         stream_module = StreamModule,
         stream = Stream0,
-        branches = Branches
+        branches = Branches,
+        labels = Labels
     } = State0,
     Label
 ) ->
     Offset = StreamModule:offset(Stream0),
     I1 = jit_x86_64_asm:decl(?JITSTATE_REMAINING_REDUCTIONS),
-    {RewriteJMPOffset, I3} = jit_x86_64_asm:jmp_rel32(1),
-    I2 = jit_x86_64_asm:jz(byte_size(I3) + 2),
-    Sz = byte_size(I1) + byte_size(I2),
-    Reloc1 = {Label, Offset + Sz + RewriteJMPOffset, 32},
-    Code = <<I1/binary, I2/binary, I3/binary>>,
-    Stream1 = StreamModule:append(Stream0, Code),
-    State1 = State0#state{stream = Stream1, branches = [Reloc1 | Branches]},
+    I1Size = byte_size(I1),
+
+    case lists:keyfind(Label, 1, Labels) of
+        {Label, LabelOffset} ->
+            % Label is already known, emit direct jmp with calculated offset
+            % jz is 2 bytes, jmp_rel32 is 5 bytes
+            JmpSize = 5,
+            I2 = jit_x86_64_asm:jz(JmpSize + 2),
+            I2Size = byte_size(I2),
+            % Calculate relative offset: target - current
+            RelOffset = LabelOffset - (Offset + I1Size + I2Size),
+            {_RewriteJMPOffset, I3} = jit_x86_64_asm:jmp_rel32(RelOffset),
+            Code = <<I1/binary, I2/binary, I3/binary>>,
+            Stream1 = StreamModule:append(Stream0, Code),
+            State1 = State0#state{stream = Stream1};
+        false ->
+            % Label not yet known, emit placeholder and add relocation
+            {RewriteJMPOffset, I3} = jit_x86_64_asm:jmp_rel32(1),
+            I2 = jit_x86_64_asm:jz(byte_size(I3) + 2),
+            Sz = I1Size + byte_size(I2),
+            Reloc1 = {Label, Offset + Sz + RewriteJMPOffset, 32},
+            Code = <<I1/binary, I2/binary, I3/binary>>,
+            Stream1 = StreamModule:append(Stream0, Code),
+            State1 = State0#state{stream = Stream1, branches = [Reloc1 | Branches]}
+    end,
     State2 = set_continuation_to_label(State1, Label),
     call_primitive_last(State2, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]).
 
@@ -2044,5 +2241,35 @@ add_label(#state{stream_module = StreamModule, stream = Stream} = State, Label)
     add_label(State, Label, Offset).
 
 -spec add_label(state(), integer() | reference(), integer()) -> state().
+add_label(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        jump_table_start = JumpTableStart,
+        branches = Branches,
+        labels = Labels
+    } = State,
+    Label,
+    LabelOffset
+) when is_integer(Label) ->
+    % Patch the jump table entry immediately
+    % Each jmp_rel32 instruction is 5 bytes
+    JumpTableEntryOffset = JumpTableStart + Label * 5,
+    RelativeOffset = LabelOffset - JumpTableEntryOffset,
+    {_RelocOffset, JmpInstruction} = jit_x86_64_asm:jmp_rel32(RelativeOffset),
+    Stream1 = StreamModule:replace(Stream0, JumpTableEntryOffset, JmpInstruction),
+
+    % Eagerly patch any branches targeting this label
+    {Stream2, RemainingBranches} = patch_branches_for_label(
+        StreamModule,
+        Stream1,
+        Label,
+        LabelOffset,
+        Branches
+    ),
+
+    State#state{
+        stream = Stream2, branches = RemainingBranches, labels = [{Label, LabelOffset} | Labels]
+    };
 add_label(#state{labels = Labels} = State, Label, Offset) ->
     State#state{labels = [{Label, Offset} | Labels]}.
diff --git a/libs/jit/src/jit_x86_64_asm.erl b/libs/jit/src/jit_x86_64_asm.erl
index c3b54fbe06..f5867bc5c0 100644
--- a/libs/jit/src/jit_x86_64_asm.erl
+++ b/libs/jit/src/jit_x86_64_asm.erl
@@ -35,6 +35,8 @@
     jnz_rel8/1,
     jge/1,
     jge_rel8/1,
+    jle/1,
+    jle_rel8/1,
     jmp/1,
     jmp_rel8/1,
     jmp_rel32/1,
@@ -360,6 +362,14 @@ jge(Offset) when Offset >= -126 andalso Offset =< 129 ->
 jge_rel8(Offset) when Offset >= -126 andalso Offset =< 129 ->
     {1, jge(Offset)}.
 
+jle(Offset) when Offset >= -126 andalso Offset =< 129 ->
+    % Use short jump (matches assembler behavior)
+    AdjustedOffset = Offset - 2,
+    <<16#7E, AdjustedOffset>>.
+
+jle_rel8(Offset) when Offset >= -126 andalso Offset =< 129 ->
+    {1, jle(Offset)}.
+
 jmp(Offset) when Offset >= -126 andalso Offset =< 129 ->
     % Use short jump (matches assembler behavior)
     AdjustedOffset = Offset - 2,
@@ -483,6 +493,17 @@ addq(SrcReg, DestReg) when is_atom(SrcReg), is_atom(DestReg) ->
     {REX_B, MODRM_RM} = x86_64_x_reg(DestReg),
     <<?X86_64_REX(1, REX_R, 0, REX_B), 16#01, 3:2, MODRM_REG:3, MODRM_RM:3>>.
 
+subq(Imm, Reg) when ?IS_SINT8_T(Imm), is_atom(Reg) ->
+    case x86_64_x_reg(Reg) of
+        {0, Index} -> <<16#48, 16#83, (16#E8 + Index), Imm>>;
+        {1, Index} -> <<16#49, 16#83, (16#E8 + Index), Imm>>
+    end;
+subq(Imm, rax) when ?IS_SINT32_T(Imm) ->
+    % Special short encoding for sub imm32, %rax
+    <<16#48, 16#2D, Imm:32/little>>;
+subq(Imm, Reg) when ?IS_SINT32_T(Imm), is_atom(Reg) ->
+    {REX_B, MODRM_RM} = x86_64_x_reg(Reg),
+    <<?X86_64_REX(1, 0, 0, REX_B), 16#81, 3:2, 5:3, MODRM_RM:3, Imm:32/little>>;
 subq(RegA, RegB) when is_atom(RegA), is_atom(RegB) ->
     {REX_R, MODRM_REG} = x86_64_x_reg(RegA),
     {REX_B, MODRM_RM} = x86_64_x_reg(RegB),
diff --git a/libs/jit/src/primitives.hrl b/libs/jit/src/primitives.hrl
index 67ff60ecc8..5cf22c5163 100644
--- a/libs/jit/src/primitives.hrl
+++ b/libs/jit/src/primitives.hrl
@@ -92,6 +92,8 @@
 -define(PRIM_BITSTRING_GET_UTF32, 69).
 -define(PRIM_TERM_COPY_MAP, 70).
 -define(PRIM_STACKTRACE_BUILD, 71).
+-define(PRIM_TERM_REUSE_BINARY, 72).
+-define(PRIM_BITSTRING_INSERT_FLOAT, 73).
 
 % Parameters to ?PRIM_MEMORY_ENSURE_FREE_WITH_ROOTS
 % -define(MEMORY_NO_SHRINK, 0).
diff --git a/libs/jit/src/term.hrl b/libs/jit/src/term.hrl
index 9270de3244..eca86c623a 100644
--- a/libs/jit/src/term.hrl
+++ b/libs/jit/src/term.hrl
@@ -74,3 +74,5 @@
 -define(REFC_BINARY_MIN_64, 64).
 -define(TERM_BOXED_REFC_BINARY_SIZE, 6).
 -define(BINARY_HEADER_SIZE, 2).
+
+-define(TERM_INVALID_TERM, 0).
diff --git a/src/libAtomVM/avmpack.c b/src/libAtomVM/avmpack.c
index 075dc94966..624fa82493 100644
--- a/src/libAtomVM/avmpack.c
+++ b/src/libAtomVM/avmpack.c
@@ -35,6 +35,7 @@ static inline int pad(int size)
 
 bool avmpack_is_valid(const void *avmpack_binary, uint32_t size)
 {
+    // "#!/usr/bin/env AtomVM"
     const unsigned char pack_header[AVMPACK_SIZE] = {
         0x23, 0x21, 0x2f, 0x75,
         0x73, 0x72, 0x2f, 0x62,
@@ -51,7 +52,7 @@ bool avmpack_is_valid(const void *avmpack_binary, uint32_t size)
     return memcmp(avmpack_binary, pack_header, AVMPACK_SIZE) == 0;
 }
 
-int avmpack_find_section_by_flag(const void *avmpack_binary, uint32_t flags_mask, const void **ptr, uint32_t *size, const char **name)
+int avmpack_find_section_by_flag(const void *avmpack_binary, uint32_t flags_mask, uint32_t flags_val, const void **ptr, uint32_t *size, const char **name)
 {
     int offset = AVMPACK_SIZE;
     const uint32_t *flags;
@@ -60,7 +61,7 @@ int avmpack_find_section_by_flag(const void *avmpack_binary, uint32_t flags_mask
         const uint32_t *sizes = ((const uint32_t *) (avmpack_binary)) + offset / sizeof(uint32_t);
         flags = ((const uint32_t *) (avmpack_binary)) + 1 + offset / sizeof(uint32_t);
 
-        if ((ENDIAN_SWAP_32(*flags) & flags_mask) == flags_mask) {
+        if ((ENDIAN_SWAP_32(*flags) & flags_mask) == flags_val) {
             const char *found_section_name = (const char *) (sizes + 3);
             int section_name_len = pad(strlen(found_section_name) + 1);
 
diff --git a/src/libAtomVM/avmpack.h b/src/libAtomVM/avmpack.h
index 85ce9562d9..a68e6e3378 100644
--- a/src/libAtomVM/avmpack.h
+++ b/src/libAtomVM/avmpack.h
@@ -39,6 +39,7 @@ extern "C" {
 #define END_OF_FILE 0
 #define BEAM_START_FLAG 1
 #define BEAM_CODE_FLAG 2
+#define END_OF_FILE_MASK 255
 
 struct AVMPackData;
 
@@ -104,12 +105,13 @@ typedef void *(*avmpack_fold_fun)(void *accum, const void *section_ptr, uint32_t
  * @details Finds an AVM Pack section that has certain flags set and returns a pointer to it, its size and its name.
  * @param avmpack_binary a pointer to valid AVM Pack file data.
  * @param flags_mask that will be matched against file sections.
+ * @param flags_value that will be matched against file sections.
  * @param ptr will point to the found file section.
  * @param size will be set to the file section size that has been found, if the section has not been found it will not be updated.
  * @param name the section name, as defined in the module header.
  * @returns 1 if the file section has been found, 0 otherwise.
  */
-int avmpack_find_section_by_flag(const void *avmpack_binary, uint32_t flags_mask, const void **ptr, uint32_t *size, const char **name);
+int avmpack_find_section_by_flag(const void *avmpack_binary, uint32_t flags_mask, uint32_t flags_value, const void **ptr, uint32_t *size, const char **name);
 
 /**
  * @brief Finds an AVM Pack section that has certain name.
diff --git a/src/libAtomVM/bitstring.c b/src/libAtomVM/bitstring.c
index b74c365316..cb14bfa608 100644
--- a/src/libAtomVM/bitstring.c
+++ b/src/libAtomVM/bitstring.c
@@ -330,6 +330,73 @@ void bitstring_copy_bits_incomplete_bytes(uint8_t *dst, size_t bits_offset, cons
     *dst = dest_byte;
 }
 
+bool bitstring_extract_f16(
+    term src_bin, size_t offset, avm_int_t n, enum BitstringFlags bs_flags, avm_float_t *dst)
+{
+    unsigned long capacity = term_binary_size(src_bin);
+    if (8 * capacity - offset < (unsigned long) n) {
+        return false;
+    }
+
+    if ((offset & 0x7) == 0) {
+        int byte_offset = offset >> 3;
+        const uint8_t *src = (const uint8_t *) term_binary_data(src_bin) + byte_offset;
+
+        // Read 16-bit value
+        uint16_t f16_bits;
+        if (bs_flags & LittleEndianIntegerMask) {
+            f16_bits = READ_16LE_UNALIGNED(src);
+        } else {
+            f16_bits = READ_16_UNALIGNED(src);
+        }
+
+        // Convert IEEE 754 half-precision to single-precision
+        uint32_t sign = (f16_bits >> 15) & 0x1;
+        uint32_t f16_exp = (f16_bits >> 10) & 0x1F;
+        uint32_t f16_mantissa = f16_bits & 0x3FF;
+
+        uint32_t f32_bits;
+        if (f16_exp == 0) {
+            if (f16_mantissa == 0) {
+                // Zero
+                f32_bits = sign << 31;
+            } else {
+                // Subnormal number - normalize it
+                int e = -1;
+                uint32_t m = f16_mantissa;
+                do {
+                    e++;
+                    m <<= 1;
+                } while ((m & 0x400) == 0);
+                f16_mantissa = m & 0x3FF;
+                f16_exp = -e;
+                int32_t f32_exp = (int32_t) f16_exp + 127 - 15;
+                f32_bits = (sign << 31) | (f32_exp << 23) | (f16_mantissa << 13);
+            }
+        } else if (f16_exp == 0x1F) {
+            // Inf or NaN - not finite
+            return false;
+        } else {
+            // Normalized number
+            int32_t f32_exp = (int32_t) f16_exp + 127 - 15;
+            f32_bits = (sign << 31) | (f32_exp << 23) | (f16_mantissa << 13);
+        }
+
+        union
+        {
+            uint32_t bits;
+            float fvalue;
+        } f32;
+        f32.bits = f32_bits;
+
+        *dst = f32.fvalue;
+        return true;
+    } else {
+        // TODO: add support to floats not aligned to byte boundary
+        return false;
+    }
+}
+
 bool bitstring_extract_f32(
     term src_bin, size_t offset, avm_int_t n, enum BitstringFlags bs_flags, avm_float_t *dst)
 {
@@ -406,3 +473,158 @@ bool bitstring_extract_f64(
         return false;
     }
 }
+
+bool bitstring_insert_f16(
+    term dst_bin, size_t offset, avm_float_t value, enum BitstringFlags bs_flags)
+{
+    unsigned long capacity = term_binary_size(dst_bin);
+    if (8 * capacity - offset < 16) {
+        return false;
+    }
+
+    if (!isfinite(value)) {
+        return false;
+    }
+
+    if ((offset & 0x7) == 0) {
+        int byte_offset = offset >> 3;
+        uint8_t *dst = (uint8_t *) term_binary_data(dst_bin) + byte_offset;
+
+        _Static_assert(sizeof(float) == 4, "Unsupported float size");
+
+        // Convert double to float first
+        union
+        {
+            uint32_t bits;
+            float fvalue;
+        } f32;
+
+        f32.fvalue = (float) value;
+        uint32_t f32_bits = f32.bits;
+
+        // Extract components from float (32-bit)
+        uint32_t sign = (f32_bits >> 31) & 0x1;
+        int32_t exp = ((f32_bits >> 23) & 0xFF) - 127; // Remove float bias
+        uint32_t mantissa = f32_bits & 0x7FFFFF;
+
+        uint16_t f16_bits;
+
+        // Handle special cases
+        if (exp > 15) {
+            // Overflow to infinity
+            f16_bits = (sign << 15) | 0x7C00;
+        } else if (exp < -14) {
+            // Underflow to zero or denormal
+            if (exp < -24) {
+                // Too small, round to zero
+                f16_bits = sign << 15;
+            } else {
+                // Denormal number
+                uint32_t denorm_mantissa = (mantissa | 0x800000) >> (-14 - exp);
+                f16_bits = (sign << 15) | (denorm_mantissa >> 13);
+            }
+        } else {
+            // Normal number
+            uint32_t f16_exp = exp + 15; // Add half-precision bias
+            // Round to nearest even (bit 12 is the rounding bit)
+            uint32_t f16_mantissa = (mantissa + 0x1000) >> 13; // Round and keep top 10 bits
+            // Handle mantissa overflow
+            if (f16_mantissa > 0x3FF) {
+                f16_mantissa = 0;
+                f16_exp++;
+            }
+            if (f16_exp > 30) {
+                // Overflow to infinity
+                f16_bits = (sign << 15) | 0x7C00;
+            } else {
+                f16_bits = (sign << 15) | (f16_exp << 10) | f16_mantissa;
+            }
+        }
+
+        if (bs_flags & LittleEndianIntegerMask) {
+            WRITE_16LE_UNALIGNED(dst, f16_bits);
+        } else {
+            WRITE_16_UNALIGNED(dst, f16_bits);
+        }
+        return true;
+    } else {
+        // TODO: add support to floats not aligned to byte boundary
+        return false;
+    }
+}
+
+bool bitstring_insert_f32(
+    term dst_bin, size_t offset, avm_float_t value, enum BitstringFlags bs_flags)
+{
+    unsigned long capacity = term_binary_size(dst_bin);
+    if (8 * capacity - offset < 32) {
+        return false;
+    }
+
+    if (!isfinite(value)) {
+        return false;
+    }
+
+    if ((offset & 0x7) == 0) {
+        int byte_offset = offset >> 3;
+        uint8_t *dst = (uint8_t *) term_binary_data(dst_bin) + byte_offset;
+
+        _Static_assert(sizeof(float) == 4, "Unsupported float size");
+
+        union
+        {
+            uint32_t bits;
+            float fvalue;
+        } f32;
+
+        f32.fvalue = (float) value;
+
+        if (bs_flags & LittleEndianIntegerMask) {
+            WRITE_32LE_UNALIGNED(dst, f32.bits);
+        } else {
+            WRITE_32_UNALIGNED(dst, f32.bits);
+        }
+        return true;
+    } else {
+        // TODO: add support to floats not aligned to byte boundary
+        return false;
+    }
+}
+
+bool bitstring_insert_f64(
+    term dst_bin, size_t offset, avm_float_t value, enum BitstringFlags bs_flags)
+{
+    unsigned long capacity = term_binary_size(dst_bin);
+    if (8 * capacity - offset < 64) {
+        return false;
+    }
+
+    if (!isfinite(value)) {
+        return false;
+    }
+
+    if ((offset & 0x7) == 0) {
+        int byte_offset = offset >> 3;
+        uint8_t *dst = (uint8_t *) term_binary_data(dst_bin) + byte_offset;
+
+        _Static_assert(sizeof(double) == 8, "Unsupported double size");
+
+        union
+        {
+            uint64_t bits;
+            double fvalue;
+        } f64;
+
+        f64.fvalue = value;
+
+        if (bs_flags & LittleEndianIntegerMask) {
+            WRITE_64LE_UNALIGNED(dst, f64.bits);
+        } else {
+            WRITE_64_UNALIGNED(dst, f64.bits);
+        }
+        return true;
+    } else {
+        // TODO: add support to doubles not aligned to byte boundary
+        return false;
+    }
+}
diff --git a/src/libAtomVM/bitstring.h b/src/libAtomVM/bitstring.h
index da20507158..530833faa0 100644
--- a/src/libAtomVM/bitstring.h
+++ b/src/libAtomVM/bitstring.h
@@ -502,11 +502,20 @@ static inline void bitstring_copy_bits(uint8_t *dst, size_t bits_offset, const u
     }
 }
 
+bool bitstring_extract_f16(
+    term src_bin, size_t offset, avm_int_t n, enum BitstringFlags bs_flags, avm_float_t *dst);
 bool bitstring_extract_f32(
     term src_bin, size_t offset, avm_int_t n, enum BitstringFlags bs_flags, avm_float_t *dst);
 bool bitstring_extract_f64(
     term src_bin, size_t offset, avm_int_t n, enum BitstringFlags bs_flags, avm_float_t *dst);
 
+bool bitstring_insert_f16(
+    term dst_bin, size_t offset, avm_float_t value, enum BitstringFlags bs_flags);
+bool bitstring_insert_f32(
+    term dst_bin, size_t offset, avm_float_t value, enum BitstringFlags bs_flags);
+bool bitstring_insert_f64(
+    term dst_bin, size_t offset, avm_float_t value, enum BitstringFlags bs_flags);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/libAtomVM/context.c b/src/libAtomVM/context.c
index f682e8c34c..89f8c94567 100644
--- a/src/libAtomVM/context.c
+++ b/src/libAtomVM/context.c
@@ -1297,7 +1297,7 @@ COLD_FUNC void context_dump(Context *ctx)
         fprintf(stderr, "process_count = %zu\n", process_count);
         fprintf(stderr, "ports_count = %zu\n", ports_count);
         fprintf(stderr, "atoms_count = %zu\n", atom_table_count(glb->atom_table));
-        fprintf(stderr, "refc_binary_total_size = %zu\n", refc_binary_total_size(ctx));
+        refc_binary_dump_info(ctx);
     }
     fprintf(stderr, "\n\n**End Of Crash Report**\n");
 }
diff --git a/src/libAtomVM/defaultatoms.def b/src/libAtomVM/defaultatoms.def
index 35330fdecc..4c02b5c255 100644
--- a/src/libAtomVM/defaultatoms.def
+++ b/src/libAtomVM/defaultatoms.def
@@ -114,6 +114,7 @@ X(STRING_ATOM, "\x6", "string")
 X(UTF8_ATOM, "\x4", "utf8")
 X(UTF16_ATOM, "\x5", "utf16")
 X(UTF32_ATOM, "\x5", "utf32")
+X(FLOAT_ATOM, "\x5", "float")
 
 X(COPY_ATOM, "\x4", "copy")
 X(REUSE_ATOM, "\x5", "reuse")
@@ -209,3 +210,4 @@ X(LOAD_ATOM, "\x4", "load")
 X(JIT_X86_64_ATOM, "\xA", "jit_x86_64")
 X(JIT_AARCH64_ATOM, "\xB", "jit_aarch64")
 X(JIT_ARMV6M_ATOM, "\xA", "jit_armv6m")
+X(JIT_RISCV32_ATOM, "\xB", "jit_riscv32")
diff --git a/src/libAtomVM/jit.c b/src/libAtomVM/jit.c
index 39bfa963a1..f399e127cc 100644
--- a/src/libAtomVM/jit.c
+++ b/src/libAtomVM/jit.c
@@ -86,6 +86,20 @@ _Static_assert(offsetof(JITState, remaining_reductions) == 0x8, "jit_state->rema
 
 _Static_assert(sizeof(size_t) == 4, "size_t is expected to be 32 bits");
 
+#elif JIT_ARCH_TARGET == JIT_ARCH_RISCV32
+_Static_assert(offsetof(Context, e) == 0x14, "ctx->e is 0x14 in jit/src/jit_riscv32.erl");
+_Static_assert(offsetof(Context, x) == 0x18, "ctx->x is 0x18 in jit/src/jit_riscv32.erl");
+_Static_assert(offsetof(Context, cp) == 0x5C, "ctx->cp is 0x5C in jit/src/jit_riscv32.erl");
+_Static_assert(offsetof(Context, fr) == 0x60, "ctx->fr is 0x60 in jit/src/jit_riscv32.erl");
+_Static_assert(offsetof(Context, bs) == 0x64, "ctx->bs is 0x64 in jit/src/jit_riscv32.erl");
+_Static_assert(offsetof(Context, bs_offset) == 0x68, "ctx->bs_offset is 0x68 in jit/src/jit_riscv32.erl");
+
+_Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in jit/src/jit_riscv32.erl");
+_Static_assert(offsetof(JITState, continuation) == 0x4, "jit_state->continuation is 0x4 in jit/src/jit_riscv32.erl");
+_Static_assert(offsetof(JITState, remaining_reductions) == 0x8, "jit_state->remaining_reductions is 0x8 in jit/src/jit_riscv32.erl");
+
+_Static_assert(sizeof(size_t) == 4, "size_t is expected to be 32 bits");
+
 #else
 #error Unknown jit target
 #endif
@@ -133,7 +147,7 @@ static void destroy_extended_registers(Context *ctx, unsigned int live)
 
 static void jit_trim_live_regs(Context *ctx, uint32_t live)
 {
-    TRACE("jit_trim_live_regs: ctx->process_id = %d, live = %d\n", ctx->process_id, live);
+    TRACE("jit_trim_live_regs: ctx->process_id = %" PRId32 ", live = %" PRIu32 "\n", ctx->process_id, live);
     if (UNLIKELY(!list_is_empty(&ctx->extended_x_regs))) {
         destroy_extended_registers(ctx, live);
     }
@@ -173,8 +187,8 @@ static Context *jit_return(Context *ctx, JITState *jit_state)
 
 static Context *jit_terminate_context(Context *ctx, JITState *jit_state)
 {
-    TRACE("jit_terminate_context: ctx->process_id = %d\n", ctx->process_id);
-    TRACE("-- Code execution finished for %i--\n", ctx->process_id);
+    TRACE("jit_terminate_context: ctx->process_id = %" PRId32 "\n", ctx->process_id);
+    TRACE("-- Code execution finished for %" PRId32 "--\n", ctx->process_id);
     GlobalContext *global = ctx->global;
     if (ctx->leader) {
         scheduler_stop_all(global);
@@ -186,7 +200,7 @@ static Context *jit_terminate_context(Context *ctx, JITState *jit_state)
 
 static Context *jit_handle_error(Context *ctx, JITState *jit_state, int offset)
 {
-    TRACE("jit_terminate_context: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset);
+    TRACE("jit_terminate_context: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset);
     if (offset || term_is_invalid_term(ctx->x[2])) {
         ctx->x[2] = stacktrace_create_raw(ctx, jit_state->module, offset, ctx->x[0]);
     }
@@ -253,14 +267,14 @@ static void set_error(Context *ctx, JITState *jit_state, int offset, term error_
 
 static Context *jit_raise_error(Context *ctx, JITState *jit_state, int offset, term error_type_atom)
 {
-    TRACE("jit_raise_error: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset);
+    TRACE("jit_raise_error: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset);
     set_error(ctx, jit_state, offset, error_type_atom);
     return jit_handle_error(ctx, jit_state, 0);
 }
 
 static Context *jit_raise_error_tuple(Context *ctx, JITState *jit_state, int offset, term error_atom, term arg1)
 {
-    TRACE("jit_raise_error_tuple: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset);
+    TRACE("jit_raise_error_tuple: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset);
     // We can gc as we are raising
     if (UNLIKELY(memory_ensure_free_with_roots(ctx, TUPLE_SIZE(2), 1, &arg1, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) {
         set_error(ctx, jit_state, offset, OUT_OF_MEMORY_ATOM);
@@ -277,7 +291,7 @@ static Context *jit_raise_error_tuple(Context *ctx, JITState *jit_state, int off
 
 static Context *jit_raise(Context *ctx, JITState *jit_state, int offset, term stacktrace, term exc_value)
 {
-    TRACE("jit_raise: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset);
+    TRACE("jit_raise: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset);
     ctx->x[0] = stacktrace_exception_class(stacktrace);
     ctx->x[1] = exc_value;
     ctx->x[2] = stacktrace_create_raw(ctx, jit_state->module, offset, stacktrace);
@@ -286,7 +300,7 @@ static Context *jit_raise(Context *ctx, JITState *jit_state, int offset, term st
 
 static Context *jit_schedule_next_cp(Context *ctx, JITState *jit_state)
 {
-    TRACE("jit_schedule_next_cp: ctx->process_id = %d\n", ctx->process_id);
+    TRACE("jit_schedule_next_cp: ctx->process_id = %" PRId32 "\n", ctx->process_id);
     ctx->saved_function_ptr = jit_state->continuation;
     ctx->saved_module = jit_state->module;
     jit_state->remaining_reductions = 0;
@@ -295,7 +309,7 @@ static Context *jit_schedule_next_cp(Context *ctx, JITState *jit_state)
 
 static Context *jit_schedule_wait_cp(Context *ctx, JITState *jit_state)
 {
-    TRACE("jit_schedule_wait_cp: ctx->process_id = %d\n", ctx->process_id);
+    TRACE("jit_schedule_wait_cp: ctx->process_id = %" PRId32 "\n", ctx->process_id);
     ctx->saved_function_ptr = jit_state->continuation;
     ctx->saved_module = jit_state->module;
     jit_state->remaining_reductions = 0;
@@ -433,7 +447,7 @@ static Context *jit_call_ext(Context *ctx, JITState *jit_state, int offset, int
                     return_value = bif->bif2_ptr(ctx, 0, ctx->x[0], ctx->x[1]);
                     break;
                 default:
-                    fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", arity);
+                    fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", (uint32_t) arity);
                     AVM_ABORT();
             }
             PROCESS_MAYBE_TRAP_RETURN_VALUE_LAST(return_value, offset);
@@ -463,7 +477,7 @@ static Context *jit_call_ext(Context *ctx, JITState *jit_state, int offset, int
                     return_value = gcbif->gcbif2_ptr(ctx, 0, 0, ctx->x[0], ctx->x[1]);
                     break;
                 default:
-                    fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", arity);
+                    fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", (uint32_t) arity);
                     AVM_ABORT();
             }
             PROCESS_MAYBE_TRAP_RETURN_VALUE_LAST(return_value, offset);
@@ -486,7 +500,7 @@ static term jit_module_get_atom_term_by_id(JITState *jit_state, int atom_index)
 
 static bool jit_allocate(Context *ctx, JITState *jit_state, uint32_t stack_need, uint32_t heap_need, uint32_t live)
 {
-    TRACE("jit_allocate: stack_need=%u heap_need=%u live=%u\n", stack_need, heap_need, live);
+    TRACE("jit_allocate: ENTRY ctx=%p jit_state=%p stack_need=%" PRIu32 " heap_need=%" PRIu32 " live=%" PRIu32 "\n", (void *) ctx, (void *) jit_state, stack_need, heap_need, live);
     if (ctx->heap.root->next || ((ctx->heap.heap_ptr + heap_need > ctx->e - (stack_need + 1)))) {
         TRIM_LIVE_REGS(live);
         if (UNLIKELY(memory_ensure_free_with_roots(ctx, heap_need + stack_need + 1, live, ctx->x, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) {
@@ -501,7 +515,7 @@ static bool jit_allocate(Context *ctx, JITState *jit_state, uint32_t stack_need,
 
 static BifImpl0 jit_get_imported_bif(JITState *jit_state, uint32_t bif)
 {
-    TRACE("jit_get_imported_bif: bif=%u\n", bif);
+    TRACE("jit_get_imported_bif: bif=%" PRIu32 "\n", bif);
     const struct ExportedFunction *exported_bif = jit_state->module->imported_funcs[bif];
     const BifImpl0 result = EXPORTED_FUNCTION_TO_BIF(exported_bif)->bif0_ptr;
     return result;
@@ -509,7 +523,7 @@ static BifImpl0 jit_get_imported_bif(JITState *jit_state, uint32_t bif)
 
 static bool jit_deallocate(Context *ctx, JITState *jit_state, uint32_t n_words)
 {
-    TRACE("jit_deallocate: n_words=%u\n", n_words);
+    TRACE("jit_deallocate: n_words=%" PRIu32 "\n", n_words);
     ctx->cp = ctx->e[n_words];
     ctx->e += n_words + 1;
     // Hopefully, we only need x[0]
@@ -534,7 +548,7 @@ static TermCompareResult jit_term_compare(Context *ctx, JITState *jit_state, ter
 
 static bool jit_test_heap(Context *ctx, JITState *jit_state, uint32_t heap_need, uint32_t live_registers)
 {
-    TRACE("jit_test_heap: heap_need=%u live_registers=%u\n", heap_need, live_registers);
+    TRACE("jit_test_heap: heap_need=%" PRIu32 " live_registers=%" PRIu32 "\n", heap_need, live_registers);
     size_t heap_free = context_avail_free_memory(ctx);
     // if we need more heap space than is currently free, then try to GC the needed space
     if (heap_free < heap_need) {
@@ -548,7 +562,7 @@ static bool jit_test_heap(Context *ctx, JITState *jit_state, uint32_t heap_need,
     } else if (heap_free > heap_need * HEAP_NEED_GC_SHRINK_THRESHOLD_COEFF) {
         TRIM_LIVE_REGS(live_registers);
         if (UNLIKELY(memory_ensure_free_with_roots(ctx, heap_need * (HEAP_NEED_GC_SHRINK_THRESHOLD_COEFF / 2), live_registers, ctx->x, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) {
-            TRACE("Unable to ensure free memory.  heap_need=%i\n", heap_need);
+            TRACE("Unable to ensure free memory.  heap_need=%" PRIu32 "\n", heap_need);
             set_error(ctx, jit_state, 0, OUT_OF_MEMORY_ATOM);
             return false;
         }
@@ -629,13 +643,13 @@ static term maybe_alloc_boxed_integer_fragment(Context *ctx, avm_int64_t value)
 
 static term jit_term_alloc_tuple(Context *ctx, uint32_t size)
 {
-    TRACE("jit_term_alloc_tuple: size=%u\n", size);
+    TRACE("jit_term_alloc_tuple: size=%" PRIu32 "\n", size);
     return term_alloc_tuple(size, &ctx->heap);
 }
 
 static term jit_term_alloc_fun(Context *ctx, JITState *jit_state, uint32_t fun_index, uint32_t numfree)
 {
-    TRACE("jit_term_alloc_fun: fun_index=%u numfree=%u\n", fun_index, numfree);
+    TRACE("jit_term_alloc_fun: fun_index=%" PRIu32 " numfree=%" PRIu32 "\n", fun_index, numfree);
     size_t size = numfree + BOXED_FUN_SIZE;
     term *boxed_func = memory_heap_alloc(&ctx->heap, size);
 
@@ -841,7 +855,7 @@ static Context *jit_process_signal_messages(Context *ctx, JITState *jit_state)
 
 static term jit_mailbox_peek(Context *ctx)
 {
-    TRACE("jit_mailbox_peek: ctx->process_id=%d\n", ctx->process_id);
+    TRACE("jit_mailbox_peek: ctx->process_id=%" PRId32 "\n", ctx->process_id);
     term out = term_invalid_term();
     mailbox_peek(ctx, &out);
     return out;
@@ -849,26 +863,26 @@ static term jit_mailbox_peek(Context *ctx)
 
 static void jit_mailbox_remove_message(Context *ctx)
 {
-    TRACE("jit_mailbox_remove_message: ctx->process_id=%d\n", ctx->process_id);
+    TRACE("jit_mailbox_remove_message: ctx->process_id=%" PRId32 "\n", ctx->process_id);
     mailbox_remove_message(&ctx->mailbox, &ctx->heap);
 }
 
 static void jit_timeout(Context *ctx)
 {
-    TRACE("jit_timeout: ctx->process_id=%d\n", ctx->process_id);
+    TRACE("jit_timeout: ctx->process_id=%" PRId32 "\n", ctx->process_id);
     context_update_flags(ctx, ~WaitingTimeoutExpired, NoFlags);
     mailbox_reset(&ctx->mailbox);
 }
 
 static void jit_mailbox_next(Context *ctx)
 {
-    TRACE("jit_mailbox_next: ctx->process_id=%d\n", ctx->process_id);
+    TRACE("jit_mailbox_next: ctx->process_id=%" PRId32 "\n", ctx->process_id);
     mailbox_next(&ctx->mailbox);
 }
 
 static void jit_cancel_timeout(Context *ctx)
 {
-    TRACE("jit_cancel_timeout: ctx->process_id=%d\n", ctx->process_id);
+    TRACE("jit_cancel_timeout: ctx->process_id=%" PRId32 "\n", ctx->process_id);
     if (context_get_flags(ctx, WaitingTimeout | WaitingTimeoutExpired)) {
         scheduler_cancel_timeout(ctx);
     }
@@ -876,7 +890,7 @@ static void jit_cancel_timeout(Context *ctx)
 
 static void jit_clear_timeout_flag(Context *ctx)
 {
-    TRACE("jit_clear_timeout_flag: ctx->process_id=%d\n", ctx->process_id);
+    TRACE("jit_clear_timeout_flag: ctx->process_id=%" PRId32 "\n", ctx->process_id);
     context_update_flags(ctx, ~WaitingTimeoutExpired, NoFlags);
 }
 
@@ -1244,6 +1258,9 @@ static term jit_bitstring_extract_float(Context *ctx, term *bin_ptr, size_t offs
     avm_float_t value;
     bool status;
     switch (n) {
+        case 16:
+            status = bitstring_extract_f16(((term) bin_ptr) | TERM_PRIMARY_BOXED, offset, n, bs_flags, &value);
+            break;
         case 32:
             status = bitstring_extract_f32(((term) bin_ptr) | TERM_PRIMARY_BOXED, offset, n, bs_flags, &value);
             break;
@@ -1301,6 +1318,12 @@ static term jit_term_create_empty_binary(Context *ctx, size_t len)
     return term_create_empty_binary(len, &ctx->heap, ctx->global);
 }
 
+static term jit_term_reuse_binary(Context *ctx, term src, size_t len)
+{
+    TRACE("jit_term_reuse_binary: src=0x%lx, len=%d\n", src, (int) len);
+    return term_reuse_binary(src, len, &ctx->heap, ctx->global);
+}
+
 static int jit_decode_flags_list(Context *ctx, JITState *jit_state, term flags)
 {
     int flags_value = 0;
@@ -1354,6 +1377,18 @@ static bool jit_bitstring_insert_integer(term bin, size_t offset, term value, si
     return bitstring_insert_integer(bin, offset, int_value, n, flags);
 }
 
+static bool jit_bitstring_insert_float(term bin, size_t offset, term value, size_t n, enum BitstringFlags flags)
+{
+    avm_float_t float_value = term_conv_to_float(value);
+    if (n == 16) {
+        return bitstring_insert_f16(bin, offset, float_value, flags);
+    } else if (n == 32) {
+        return bitstring_insert_f32(bin, offset, float_value, flags);
+    } else {
+        return bitstring_insert_f64(bin, offset, float_value, flags);
+    }
+}
+
 static void jit_bitstring_copy_module_str(Context *ctx, JITState *jit_state, term bin, size_t offset, int str_id, size_t len)
 {
     TRACE("jit_bitstring_copy_module_str: bin=%p offset=%d str_id=%d len=%d\n", (void *) bin, (int) offset, str_id, (int) len);
@@ -1734,7 +1769,9 @@ const ModuleNativeInterface module_native_interface = {
     jit_bitstring_get_utf16,
     jit_bitstring_get_utf32,
     term_copy_map,
-    jit_stacktrace_build
+    jit_stacktrace_build,
+    jit_term_reuse_binary,
+    jit_bitstring_insert_float
 };
 
 #endif
diff --git a/src/libAtomVM/jit.h b/src/libAtomVM/jit.h
index ee53259886..ae57c6c941 100644
--- a/src/libAtomVM/jit.h
+++ b/src/libAtomVM/jit.h
@@ -158,6 +158,8 @@ struct ModuleNativeInterface
     term (*bitstring_get_utf32)(term src, int flags_value);
     term (*term_copy_map)(Context *ctx, term src);
     term (*stacktrace_build)(Context *ctx);
+    term (*term_reuse_binary)(Context *ctx, term src, size_t len);
+    bool (*bitstring_insert_float)(term bin, size_t offset, term value, size_t n, enum BitstringFlags flags);
 };
 
 extern const ModuleNativeInterface module_native_interface;
@@ -173,6 +175,7 @@ enum TrapAndLoadResult
 #define JIT_ARCH_X86_64 1
 #define JIT_ARCH_AARCH64 2
 #define JIT_ARCH_ARMV6M 3
+#define JIT_ARCH_RISCV32 4
 
 #define JIT_VARIANT_PIC 1
 #define JIT_VARIANT_FLOAT32 2
@@ -194,6 +197,11 @@ enum TrapAndLoadResult
 #define JIT_JUMPTABLE_ENTRY_SIZE 12
 #endif
 
+#if defined(__riscv) && (__riscv_xlen == 32)
+#define JIT_ARCH_TARGET JIT_ARCH_RISCV32
+#define JIT_JUMPTABLE_ENTRY_SIZE 8
+#endif
+
 #ifndef JIT_ARCH_TARGET
 #error Unknown JIT target
 #endif
diff --git a/src/libAtomVM/jit_stream_flash.c b/src/libAtomVM/jit_stream_flash.c
new file mode 100644
index 0000000000..829b7d4f76
--- /dev/null
+++ b/src/libAtomVM/jit_stream_flash.c
@@ -0,0 +1,928 @@
+/*
+ * This file is part of AtomVM.
+ *
+ * Copyright 2025 by Paul Guyot <pguyot@kallisys.net>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+ */
+
+#include "jit_stream_flash.h"
+
+#include "avmpack.h"
+#include "context.h"
+#include "defaultatoms.h"
+#include "erl_nif.h"
+#include "erl_nif_priv.h"
+#include "globalcontext.h"
+#include "jit.h"
+#include "module.h"
+#include "nifs.h"
+#include "platform_defaultatoms.h"
+#include "sys.h"
+#include "term.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+// #define ENABLE_TRACE
+#include "trace.h"
+
+#define JIT_ENTRY_MAGIC 0x4A74
+
+#ifdef ENABLE_TRACE
+// Simple CRC32 for verification
+static uint32_t crc32(const uint8_t *data, size_t len)
+{
+    uint32_t crc = 0xFFFFFFFF;
+    for (size_t i = 0; i < len; i++) {
+        crc ^= data[i];
+        for (int j = 0; j < 8; j++) {
+            crc = (crc >> 1) ^ (0xEDB88320 & -(crc & 1));
+        }
+    }
+    return ~crc;
+}
+#endif
+
+/**
+ * @brief JIT entry header stored in flash
+ *
+ * Each compiled module has an entry with this header followed by the native code.
+ */
+struct JITEntry
+{
+    uint16_t magic; ///< Magic number (JIT_ENTRY_MAGIC) or 0xFFFF for free space
+    uint16_t version; ///< Module version
+    uint32_t code; ///< Pointer to original BEAM code (32-bit for flash storage)
+    uint32_t labels; ///< Number of labels
+    uint32_t size; ///< Size of native code in bytes
+} __attribute__((packed));
+
+_Static_assert(sizeof(struct JITEntry) == 16, "sizeof(struct JITEntry) must be 16");
+
+/**
+ * @brief JIT stream flash state
+ *
+ * Maintains the state for writing JIT code to flash with page buffering.
+ */
+struct JITStreamFlash
+{
+    struct JITEntry *jit_entry; ///< Pointer to current JIT entry in flash
+    uintptr_t page_base_addr; ///< Base address of current page
+    uint8_t page_buffer[FLASH_PAGE_SIZE]; ///< Page buffer for writing
+    uint8_t page_offset; ///< Current offset within page
+    struct JSFlashPlatformContext *pf_ctx; ///< Platform-specific context
+};
+
+static ErlNifResourceType *jit_stream_flash_resource_type;
+static void jit_stream_flash_dtor(ErlNifEnv *caller_env, void *obj);
+static bool jit_stream_flash_replace_at_addr(struct JSFlashPlatformContext *pf_ctx, uintptr_t addr, const uint8_t *data, size_t len);
+
+const ErlNifResourceTypeInit jit_stream_flash_resource_type_init = {
+    .members = 1,
+    .dtor = jit_stream_flash_dtor
+};
+
+static struct JITEntry *jit_entry_next(struct JITEntry *jit_entry)
+{
+    uintptr_t next_entry_addr = ((uintptr_t) jit_entry) + sizeof(struct JITEntry) + jit_entry->size;
+    // Align to 4 bytes boundaries
+    next_entry_addr = (next_entry_addr + 3) & ~3;
+
+    TRACE("jit_entry_next: jit_entry = %p, return %p\n", (void *) jit_entry, (void *) next_entry_addr);
+
+    return (struct JITEntry *) next_entry_addr;
+}
+
+/**
+ * @brief Check if a sector needs to be erased
+ *
+ * Scans the entire sector to check if it contains any non-0xFF bytes.
+ * Uses word-by-word comparison for efficiency since sectors are aligned.
+ *
+ * @param sector_addr Address of the sector (must be sector-aligned)
+ * @return true if sector needs erasing, false if already erased
+ */
+static bool jit_stream_flash_sector_needs_erase(uintptr_t sector_addr)
+{
+    const uintptr_t *sector_ptr = (const uintptr_t *) sector_addr;
+    const uintptr_t erased_pattern = ~((uintptr_t) 0); // All bits set to 1 (0xFF...FF)
+    size_t num_words = FLASH_SECTOR_SIZE / sizeof(uintptr_t);
+
+    // Check if entire sector is all 0xFF by comparing word-by-word
+    for (size_t i = 0; i < num_words; i++) {
+        if (sector_ptr[i] != erased_pattern) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+static struct JITEntry *globalcontext_find_first_jit_entry(GlobalContext *global, bool *is_valid)
+{
+    const void *max_end_offset = NULL;
+    uint32_t end_size;
+    const void *end_offset;
+    const char *end_name;
+    bool valid_cache = true;
+
+    struct ListHead *item;
+    struct ListHead *avmpack_data = synclist_rdlock(&global->avmpack_data);
+    LIST_FOR_EACH (item, avmpack_data) {
+        struct AVMPackData *avmpack_data = GET_LIST_ENTRY(item, struct AVMPackData, avmpack_head);
+        avmpack_find_section_by_flag(avmpack_data->data, END_OF_FILE_MASK, END_OF_FILE, &end_offset, &end_size, &end_name);
+        valid_cache = valid_cache && (strcmp(end_name, "END") == 0);
+
+        if (end_offset > max_end_offset) {
+            max_end_offset = end_offset;
+        }
+    }
+    synclist_unlock(&global->avmpack_data);
+
+    uintptr_t max_end_offset_page = ((((uintptr_t) max_end_offset) - 1) & ~(FLASH_SECTOR_SIZE - 1));
+    *is_valid = valid_cache;
+
+    TRACE("globalcontext_find_first_jit_entry: return %p\n", (void *) (max_end_offset_page + FLASH_SECTOR_SIZE));
+
+    return (struct JITEntry *) (max_end_offset_page + FLASH_SECTOR_SIZE);
+}
+
+static void globalcontext_set_cache_valid(GlobalContext *global)
+{
+    TRACE("globalcontext_set_cache_valid\n");
+
+    uint32_t end_size;
+    const void *end_offset;
+    const char *end_name;
+    bool valid_cache;
+
+    // Create platform context for flash operations
+    struct JSFlashPlatformContext *pf_ctx = jit_stream_flash_platform_init();
+    if (IS_NULL_PTR(pf_ctx)) {
+        fprintf(stderr, "Failed to initialize platform flash context\n");
+        return;
+    }
+
+    do {
+        valid_cache = true;
+        struct ListHead *item;
+        struct ListHead *avmpack_data = synclist_rdlock(&global->avmpack_data);
+        LIST_FOR_EACH (item, avmpack_data) {
+            struct AVMPackData *avmpack_data = GET_LIST_ENTRY(item, struct AVMPackData, avmpack_head);
+            avmpack_find_section_by_flag(avmpack_data->data, END_OF_FILE_MASK, END_OF_FILE, &end_offset, &end_size, &end_name);
+            if (strcmp(end_name, "END")) {
+                valid_cache = false;
+                break;
+            }
+        }
+        synclist_unlock(&global->avmpack_data);
+        if (!valid_cache) {
+            // Replace "end" with "END" - this is a 3-byte string replacement
+            const uint8_t end_str[] = "END";
+            if (!jit_stream_flash_replace_at_addr(pf_ctx, (uintptr_t) end_name, end_str, 3)) {
+                fprintf(stderr, "Failed to update cache validity marker from 'end' to 'END'\n");
+                break;
+            }
+        }
+    } while (!valid_cache);
+
+    jit_stream_flash_platform_destroy(pf_ctx);
+}
+
+static struct JITEntry *globalcontext_find_last_jit_entry(GlobalContext *global)
+{
+    bool is_valid;
+    struct JITEntry *jit_entry = globalcontext_find_first_jit_entry(global, &is_valid);
+    if (!is_valid) {
+        TRACE("globalcontext_find_last_jit_entry, cache not valid, returning NULL\n");
+        return NULL;
+    }
+
+    // Find the last valid entry
+    struct JITEntry *last_valid = jit_entry;
+    while (jit_entry->magic == JIT_ENTRY_MAGIC) {
+        last_valid = jit_entry;
+        jit_entry = jit_entry_next(jit_entry);
+    }
+    TRACE("globalcontext_find_last_jit_entry, returning last valid entry at %p\n", (void *) last_valid);
+    return last_valid;
+}
+
+static bool jit_stream_flash_flush_page(struct JITStreamFlash *js)
+{
+    // Write the page
+    // Note: sector is already erased by nif_jit_stream_flash_new (first sector)
+    // or jit_stream_flash_append (subsequent sectors when crossing boundaries)
+    if (!jit_stream_flash_platform_write_page(js->pf_ctx, js->page_base_addr, js->page_buffer)) {
+        fprintf(stderr, "Failed to write page at address 0x%lx\n", (unsigned long) js->page_base_addr);
+        return false;
+    }
+
+    return true;
+}
+
+static bool jit_stream_flash_finalize_entry(struct JSFlashPlatformContext *pf_ctx, struct JITEntry *jit_entry, uint16_t magic, uint16_t version, uint32_t code, uint32_t labels)
+{
+    uintptr_t entry_addr = (uintptr_t) jit_entry;
+    uint8_t page_buffer[FLASH_PAGE_SIZE];
+    uintptr_t page_base_addr = entry_addr & ~(FLASH_PAGE_SIZE - 1);
+
+    // Read current page contents
+    memcpy(page_buffer, (const uint8_t *) page_base_addr, FLASH_PAGE_SIZE);
+
+    // Calculate offset within page
+    size_t entry_offset = entry_addr - page_base_addr;
+    struct JITEntry *updated_entry = (struct JITEntry *) (page_buffer + entry_offset);
+
+    // Update fields
+    updated_entry->magic = magic;
+    updated_entry->version = version;
+    updated_entry->code = code;
+    updated_entry->labels = labels;
+
+    // Write back to flash
+    if (!jit_stream_flash_platform_write_page(pf_ctx, page_base_addr, page_buffer)) {
+        fprintf(stderr, "Failed to finalize entry at address 0x%lx\n", (unsigned long) page_base_addr);
+        return false;
+    }
+
+    return true;
+}
+
+// Replace data in flash at the given absolute address
+// Returns true on success, false if validation fails (trying to set bits 0→1)
+static bool jit_stream_flash_replace_at_addr(struct JSFlashPlatformContext *pf_ctx, uintptr_t addr, const uint8_t *data, size_t len)
+{
+    uintptr_t replace_start = addr;
+    uintptr_t replace_end = replace_start + len;
+
+    // Iterate over all pages that need to be updated
+    uintptr_t current_page_addr = replace_start & ~(FLASH_PAGE_SIZE - 1);
+    size_t data_offset = 0;
+
+    while (current_page_addr < replace_end) {
+        // Calculate the range within this page that needs to be replaced
+        uintptr_t page_start_offset = 0;
+        uintptr_t page_end_offset = FLASH_PAGE_SIZE;
+
+        if (current_page_addr < replace_start) {
+            page_start_offset = replace_start - current_page_addr;
+        }
+
+        if (current_page_addr + FLASH_PAGE_SIZE > replace_end) {
+            page_end_offset = replace_end - current_page_addr;
+        }
+
+        size_t copy_len = page_end_offset - page_start_offset;
+
+        // Prepare page buffer
+        uint8_t page_buffer[FLASH_PAGE_SIZE];
+        uintptr_t page_base_addr = current_page_addr;
+
+        // Read current page contents
+        memcpy(page_buffer, (const uint8_t *) page_base_addr, FLASH_PAGE_SIZE);
+
+        // Verify that we're only clearing bits (1→0), not setting them (0→1)
+        const uint8_t *flash_ptr = (const uint8_t *) page_base_addr;
+        for (size_t i = 0; i < copy_len; i++) {
+            uint8_t flash_byte = flash_ptr[page_start_offset + i];
+            uint8_t new_byte = data[data_offset + i];
+
+            // Check if we're trying to set any bits (0→1)
+            if ((new_byte & ~flash_byte) != 0) {
+                fprintf(stderr, "\n=== FLASH REPLACE VALIDATION FAILED ===\n");
+                fprintf(stderr, "Attempting to set bits (0→1) without erase!\n");
+                fprintf(stderr, "Page address: 0x%lx\n", (unsigned long) page_base_addr);
+                fprintf(stderr, "Offset in page: %zu, flash byte: 0x%02hhx, new byte: 0x%02hhx\n",
+                    page_start_offset + i, flash_byte, new_byte);
+                fprintf(stderr, "Bits being set (0→1): 0x%02hhx\n", (new_byte & ~flash_byte));
+                fprintf(stderr, "Replace address: 0x%lx, len=%zu\n", (unsigned long) addr, len);
+                fprintf(stderr, "========================================\n\n");
+                return false;
+            }
+        }
+
+        // Update with new data
+        memcpy(page_buffer + page_start_offset, data + data_offset, copy_len);
+
+        // Write back to flash
+        if (!jit_stream_flash_platform_write_page(pf_ctx, page_base_addr, page_buffer)) {
+            fprintf(stderr, "Failed to replace data at address 0x%lx\n", (unsigned long) page_base_addr);
+            return false;
+        }
+
+        data_offset += copy_len;
+        current_page_addr += FLASH_PAGE_SIZE;
+    }
+
+    return true;
+}
+
+static bool jit_stream_flash_append(struct JITStreamFlash *js, const uint8_t *buffer, size_t count)
+{
+    while (count > 0) {
+        // Validate flash constraints: can only write to erased (0xFF) bytes
+        uint8_t current_byte = js->page_buffer[js->page_offset];
+        uint8_t new_byte = *buffer;
+        if ((~current_byte & new_byte) != 0) {
+            // Trying to set bits from 0→1 without erase
+            fprintf(stderr, "\n=== JIT STREAM FLASH APPEND ERROR ===\n");
+            fprintf(stderr, "Attempting to write 0x%02x over 0x%02x at page offset %u\n",
+                new_byte, current_byte, js->page_offset);
+            fprintf(stderr, "Page base address: 0x%lx\n", (unsigned long) js->page_base_addr);
+            fprintf(stderr, "Flash address: 0x%lx\n", (unsigned long) (js->page_base_addr + js->page_offset));
+            fprintf(stderr, "Bits being set 0→1: 0x%02x\n", (~current_byte & new_byte));
+            fprintf(stderr, "This indicates the sector was not properly erased!\n");
+            fprintf(stderr, "=====================================\n\n");
+            return false;
+        }
+
+        js->page_buffer[js->page_offset] = *buffer;
+        if (js->page_offset == (FLASH_PAGE_SIZE - 1)) {
+            if (!jit_stream_flash_flush_page(js)) {
+                fprintf(stderr, "jit_stream_flash_flush_page failed\n");
+                return false;
+            }
+            // Move to the next page after flushing
+            uintptr_t previous_sector = js->page_base_addr & ~(FLASH_SECTOR_SIZE - 1);
+            js->page_base_addr += FLASH_PAGE_SIZE;
+            js->page_offset = 0;
+            uintptr_t new_sector = js->page_base_addr & ~(FLASH_SECTOR_SIZE - 1);
+
+            // Check if we've entered a new sector and erase if needed
+            if (new_sector != previous_sector) {
+                if (jit_stream_flash_sector_needs_erase(new_sector)) {
+                    TRACE("jit_stream_flash_append -- erasing new sector at %lx\n", (unsigned long) new_sector);
+                    if (!jit_stream_flash_platform_erase_sector(js->pf_ctx, new_sector)) {
+                        fprintf(stderr, "Failed to erase new sector at address 0x%lx\n", (unsigned long) new_sector);
+                        return false;
+                    }
+                }
+            }
+
+            // Read the new page contents into the buffer
+            memcpy(js->page_buffer, (const uint8_t *) js->page_base_addr, FLASH_PAGE_SIZE);
+        } else {
+            js->page_offset++;
+        }
+        buffer++;
+        count--;
+    }
+    return true;
+}
+
+static term nif_jit_stream_flash_new(Context *ctx, int argc, term argv[])
+{
+    UNUSED(argc);
+    UNUSED(argv);
+
+    struct JITEntry *last_valid_entry = globalcontext_find_last_jit_entry(ctx->global);
+    struct JITEntry *new_entry;
+
+    if (last_valid_entry == NULL) {
+        // No valid entries, get the first position
+        bool is_valid;
+        new_entry = globalcontext_find_first_jit_entry(ctx->global, &is_valid);
+    } else {
+        // Get position after last valid entry
+        new_entry = jit_entry_next(last_valid_entry);
+    }
+
+    // Return a resource object
+    struct JITStreamFlash *js = enif_alloc_resource(jit_stream_flash_resource_type, sizeof(struct JITStreamFlash));
+    if (IS_NULL_PTR(js)) {
+        RAISE_ERROR(OUT_OF_MEMORY_ATOM);
+    }
+
+    // Initialize platform context
+    js->pf_ctx = jit_stream_flash_platform_init();
+    if (IS_NULL_PTR(js->pf_ctx)) {
+        fprintf(stderr, "Failed to initialize platform flash context\n");
+        enif_release_resource(js);
+        RAISE_ERROR(OUT_OF_MEMORY_ATOM);
+    }
+
+    js->jit_entry = new_entry;
+    js->page_base_addr = (uintptr_t) new_entry & ~(FLASH_PAGE_SIZE - 1);
+
+    // Handle sector erasing for the sector where JIT entry starts
+    uintptr_t new_entry_addr = (uintptr_t) new_entry;
+    uintptr_t new_entry_sector = new_entry_addr & ~(FLASH_SECTOR_SIZE - 1);
+    uintptr_t sector_end = new_entry_sector + FLASH_SECTOR_SIZE;
+
+    // Check if there's stale data from entry position to end of sector
+    bool has_stale_data = false;
+    const uintptr_t *check_ptr = (const uintptr_t *) new_entry_addr;
+    const uintptr_t *check_end = (const uintptr_t *) sector_end;
+    const uintptr_t erased_pattern = ~((uintptr_t) 0);
+
+    while (check_ptr < check_end) {
+        if (*check_ptr != erased_pattern) {
+            has_stale_data = true;
+            break;
+        }
+        check_ptr++;
+    }
+
+    if (has_stale_data) {
+        // There's stale data (from failed compilation) - need to erase but preserve data before entry
+        size_t preserve_size = new_entry_addr - new_entry_sector;
+
+        if (preserve_size > 0) {
+            // Allocate buffer for the sector
+            uint8_t *sector_buffer = malloc(FLASH_SECTOR_SIZE);
+            if (IS_NULL_PTR(sector_buffer)) {
+                fprintf(stderr, "Failed to allocate sector buffer\n");
+                jit_stream_flash_platform_destroy(js->pf_ctx);
+                enif_release_resource(js);
+                RAISE_ERROR(OUT_OF_MEMORY_ATOM);
+            }
+
+            // Copy data to preserve (before the entry)
+            memcpy(sector_buffer, (const uint8_t *) new_entry_sector, preserve_size);
+
+            // Fill rest with 0xFF (erased state)
+            memset(sector_buffer + preserve_size, 0xFF, FLASH_SECTOR_SIZE - preserve_size);
+
+            // Erase the sector
+            TRACE("nif_jit_stream_flash_new -- erasing sector with stale data at %lx (preserving %zu bytes)\n",
+                (unsigned long) new_entry_sector, preserve_size);
+            if (!jit_stream_flash_platform_erase_sector(js->pf_ctx, new_entry_sector)) {
+                fprintf(stderr, "Failed to erase sector with stale data\n");
+                free(sector_buffer);
+                jit_stream_flash_platform_destroy(js->pf_ctx);
+                enif_release_resource(js);
+                RAISE_ERROR(BADARG_ATOM);
+            }
+
+            // Write back the preserved data page by page
+            for (size_t page_offset = 0; page_offset < preserve_size; page_offset += FLASH_PAGE_SIZE) {
+                if (!jit_stream_flash_platform_write_page(js->pf_ctx, new_entry_sector + page_offset,
+                        sector_buffer + page_offset)) {
+                    fprintf(stderr, "Failed to write back preserved data\n");
+                    free(sector_buffer);
+                    jit_stream_flash_platform_destroy(js->pf_ctx);
+                    enif_release_resource(js);
+                    RAISE_ERROR(BADARG_ATOM);
+                }
+            }
+
+            free(sector_buffer);
+        } else {
+            // Entry is at sector boundary, just erase
+            TRACE("nif_jit_stream_flash_new -- erasing sector with stale data at %lx\n",
+                (unsigned long) new_entry_sector);
+            if (!jit_stream_flash_platform_erase_sector(js->pf_ctx, new_entry_sector)) {
+                fprintf(stderr, "Failed to erase sector for new JIT entry\n");
+                jit_stream_flash_platform_destroy(js->pf_ctx);
+                enif_release_resource(js);
+                RAISE_ERROR(BADARG_ATOM);
+            }
+        }
+    } else {
+        TRACE("nif_jit_stream_flash_new -- sector at %lx is clean (no stale data)\n",
+            (unsigned long) new_entry_sector);
+    }
+
+    // Now handle the sector where JIT entry ends (if different from start sector)
+    uintptr_t entry_end = new_entry_addr + sizeof(struct JITEntry);
+    uintptr_t entry_end_sector = entry_end & ~(FLASH_SECTOR_SIZE - 1);
+
+    if (entry_end_sector != new_entry_sector) {
+        // Entry spans two sectors - erase the end sector if needed
+        if (jit_stream_flash_sector_needs_erase(entry_end_sector)) {
+            TRACE("nif_jit_stream_flash_new -- erasing end sector at %lx\n",
+                (unsigned long) entry_end_sector);
+            if (!jit_stream_flash_platform_erase_sector(js->pf_ctx, entry_end_sector)) {
+                fprintf(stderr, "Failed to erase end sector for new JIT entry\n");
+                jit_stream_flash_platform_destroy(js->pf_ctx);
+                enif_release_resource(js);
+                RAISE_ERROR(BADARG_ATOM);
+            }
+        }
+    }
+
+    memcpy(js->page_buffer, (const uint8_t *) js->page_base_addr, FLASH_PAGE_SIZE);
+    js->page_offset = (uintptr_t) new_entry & (FLASH_PAGE_SIZE - 1);
+
+    TRACE("nif_jit_stream_flash_new entry is %p, page_offset is %lx\n", (void *) new_entry, (unsigned long) js->page_offset);
+
+    // Append the first bytes, which may flush the page
+    struct JITEntry header;
+    header.magic = 0xFFFF;
+    header.version = 0xFFFF;
+    header.code = 0xFFFFFFFF;
+    header.labels = 0xFFFFFFFF;
+    header.size = 0xFFFFFFFF;
+    if (!jit_stream_flash_append(js, (const uint8_t *) &header, sizeof(header))) {
+        jit_stream_flash_platform_destroy(js->pf_ctx);
+        enif_release_resource(js);
+        RAISE_ERROR(BADARG_ATOM);
+    }
+
+    term obj = enif_make_resource(erl_nif_env_from_context(ctx), js);
+    enif_release_resource(js); // decrement refcount after enif_alloc_resource
+    return obj;
+}
+
+static term nif_jit_stream_flash_offset(Context *ctx, int argc, term argv[])
+{
+    UNUSED(argc);
+
+    void *js_obj_ptr;
+    if (UNLIKELY(!enif_get_resource(erl_nif_env_from_context(ctx), argv[0], jit_stream_flash_resource_type, &js_obj_ptr))) {
+        RAISE_ERROR(BADARG_ATOM);
+    }
+    struct JITStreamFlash *js_obj = (struct JITStreamFlash *) js_obj_ptr;
+
+    uintptr_t current_addr = js_obj->page_base_addr + js_obj->page_offset;
+    uintptr_t base_addr = ((uintptr_t) js_obj->jit_entry + sizeof(struct JITEntry));
+
+    int offset = current_addr - base_addr;
+
+    return term_from_int(offset);
+}
+
+static term nif_jit_stream_flash_append(Context *ctx, int argc, term argv[])
+{
+    UNUSED(argc);
+
+    VALIDATE_VALUE(argv[1], term_is_binary);
+    void *js_obj_ptr;
+    if (UNLIKELY(!enif_get_resource(erl_nif_env_from_context(ctx), argv[0], jit_stream_flash_resource_type, &js_obj_ptr))) {
+        RAISE_ERROR(BADARG_ATOM);
+    }
+    struct JITStreamFlash *js_obj = (struct JITStreamFlash *) js_obj_ptr;
+
+    size_t binary_size = term_binary_size(argv[1]);
+    const uint8_t *binary_data = (const uint8_t *) term_binary_data(argv[1]);
+
+    if (!jit_stream_flash_append(js_obj, binary_data, binary_size)) {
+        RAISE_ERROR(BADARG_ATOM);
+    }
+
+    return argv[0];
+}
+
+static term nif_jit_stream_flash_replace(Context *ctx, int argc, term argv[])
+{
+    UNUSED(argc);
+
+    VALIDATE_VALUE(argv[1], term_is_integer);
+    VALIDATE_VALUE(argv[2], term_is_binary);
+    void *js_obj_ptr;
+    if (UNLIKELY(!enif_get_resource(erl_nif_env_from_context(ctx), argv[0], jit_stream_flash_resource_type, &js_obj_ptr))) {
+        RAISE_ERROR(BADARG_ATOM);
+    }
+
+    size_t binary_size = term_binary_size(argv[2]);
+    const uint8_t *binary_data = (const uint8_t *) term_binary_data(argv[2]);
+    avm_int_t offset = term_to_int(argv[1]);
+
+    struct JITStreamFlash *js_obj = (struct JITStreamFlash *) js_obj_ptr;
+
+    uintptr_t base_addr = ((uintptr_t) js_obj->jit_entry + sizeof(struct JITEntry));
+    uintptr_t replace_start = base_addr + offset;
+    uintptr_t replace_end = replace_start + binary_size;
+
+    // Iterate over all pages that need to be updated
+    uintptr_t current_page_addr = replace_start & ~(FLASH_PAGE_SIZE - 1);
+    size_t binary_offset = 0;
+
+    while (current_page_addr < replace_end) {
+        // Calculate the range within this page that needs to be replaced
+        uintptr_t page_start_offset = 0;
+        uintptr_t page_end_offset = FLASH_PAGE_SIZE;
+
+        if (current_page_addr < replace_start) {
+            page_start_offset = replace_start - current_page_addr;
+        }
+
+        if (current_page_addr + FLASH_PAGE_SIZE > replace_end) {
+            page_end_offset = replace_end - current_page_addr;
+        }
+
+        size_t copy_len = page_end_offset - page_start_offset;
+
+        // Check if this is the current buffer page
+        if (current_page_addr == js_obj->page_base_addr) {
+            // Update current buffer directly
+            memcpy(js_obj->page_buffer + page_start_offset, binary_data + binary_offset, copy_len);
+        } else {
+            // This is an already-flushed page, need to update flash
+            if (!jit_stream_flash_replace_at_addr(js_obj->pf_ctx, current_page_addr + page_start_offset,
+                    binary_data + binary_offset,
+                    copy_len)) {
+                RAISE_ERROR(BADARG_ATOM);
+            }
+        }
+
+        binary_offset += copy_len;
+        current_page_addr += FLASH_PAGE_SIZE;
+    }
+
+    return argv[0];
+}
+
+static term nif_jit_stream_flash_read(Context *ctx, int argc, term argv[])
+{
+    UNUSED(argc);
+
+    VALIDATE_VALUE(argv[1], term_is_integer);
+    VALIDATE_VALUE(argv[2], term_is_integer);
+    void *js_obj_ptr;
+    if (UNLIKELY(!enif_get_resource(erl_nif_env_from_context(ctx), argv[0], jit_stream_flash_resource_type, &js_obj_ptr))) {
+        RAISE_ERROR(BADARG_ATOM);
+    }
+    struct JITStreamFlash *js_obj = (struct JITStreamFlash *) js_obj_ptr;
+
+    avm_int_t offset = term_to_int(argv[1]);
+    avm_int_t len = term_to_int(argv[2]);
+
+    // Validate parameters
+    if (UNLIKELY(len <= 0 || offset < 0)) {
+        RAISE_ERROR(BADARG_ATOM);
+    }
+
+    // Calculate current stream position
+    uintptr_t current_addr = js_obj->page_base_addr + js_obj->page_offset;
+    uintptr_t base_addr = ((uintptr_t) js_obj->jit_entry + sizeof(struct JITEntry));
+    size_t stream_offset = current_addr - base_addr;
+
+    // Check if read is within bounds
+    if (UNLIKELY((size_t) (offset + len) > stream_offset)) {
+        RAISE_ERROR(BADARG_ATOM);
+    }
+
+    if (UNLIKELY(memory_ensure_free_opt(ctx, TERM_BINARY_HEAP_SIZE(len), MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) {
+        RAISE_ERROR(OUT_OF_MEMORY_ATOM);
+    }
+
+    uintptr_t read_addr = base_addr + offset;
+    return term_from_literal_binary((const uint8_t *) read_addr, len, &ctx->heap, ctx->global);
+}
+
+static term nif_jit_stream_flash_flush(Context *ctx, int argc, term argv[])
+{
+    UNUSED(ctx);
+    UNUSED(argc);
+
+    void *js_obj_ptr;
+    if (UNLIKELY(!enif_get_resource(erl_nif_env_from_context(ctx), argv[0], jit_stream_flash_resource_type, &js_obj_ptr))) {
+        RAISE_ERROR(BADARG_ATOM);
+    }
+    struct JITStreamFlash *js_obj = (struct JITStreamFlash *) js_obj_ptr;
+
+    // Calculate the size BEFORE flushing
+    uintptr_t current_addr = js_obj->page_base_addr + js_obj->page_offset;
+    uintptr_t code_start = (uintptr_t) js_obj->jit_entry + sizeof(struct JITEntry);
+    uint32_t code_size = current_addr - code_start;
+
+    // Check if the size field is in the current unflushed page buffer or in an already-flushed page
+    uintptr_t size_field_addr = (uintptr_t) &js_obj->jit_entry->size;
+    uintptr_t size_field_page = size_field_addr & ~(FLASH_PAGE_SIZE - 1);
+
+    if (size_field_page == js_obj->page_base_addr) {
+        // Size field is in the current buffer, update it directly before flushing
+        size_t offset_in_page = size_field_addr - js_obj->page_base_addr;
+        memcpy(js_obj->page_buffer + offset_in_page, &code_size, sizeof(uint32_t));
+    } else {
+        // Size field is in an already-flushed page, use replace
+        if (!jit_stream_flash_replace_at_addr(js_obj->pf_ctx, size_field_addr,
+                (const uint8_t *) &code_size,
+                sizeof(uint32_t))) {
+            RAISE_ERROR(BADARG_ATOM);
+        }
+    }
+
+    // Flush the final page
+    if (!jit_stream_flash_flush_page(js_obj)) {
+        fprintf(stderr, "jit_stream_flash_flush_page failed\n");
+        RAISE_ERROR(BADARG_ATOM);
+    }
+
+    return argv[0];
+}
+
+static term nif_jit_stream_module(Context *ctx, int argc, term argv[])
+{
+    UNUSED(argc);
+    UNUSED(argv);
+
+    return globalcontext_make_atom(ctx->global, ATOM_STR("\x10", "jit_stream_flash"));
+}
+
+static const struct Nif jit_stream_module_nif = {
+    .base.type = NIFFunctionType,
+    .nif_ptr = nif_jit_stream_module
+};
+static const struct Nif jit_stream_flash_new_nif = {
+    .base.type = NIFFunctionType,
+    .nif_ptr = nif_jit_stream_flash_new
+};
+static const struct Nif jit_stream_flash_offset_nif = {
+    .base.type = NIFFunctionType,
+    .nif_ptr = nif_jit_stream_flash_offset
+};
+static const struct Nif jit_stream_flash_append_nif = {
+    .base.type = NIFFunctionType,
+    .nif_ptr = nif_jit_stream_flash_append
+};
+static const struct Nif jit_stream_flash_replace_nif = {
+    .base.type = NIFFunctionType,
+    .nif_ptr = nif_jit_stream_flash_replace
+};
+static const struct Nif jit_stream_flash_read_nif = {
+    .base.type = NIFFunctionType,
+    .nif_ptr = nif_jit_stream_flash_read
+};
+static const struct Nif jit_stream_flash_flush_nif = {
+    .base.type = NIFFunctionType,
+    .nif_ptr = nif_jit_stream_flash_flush
+};
+
+ModuleNativeEntryPoint jit_stream_flash_entry_point(Context *ctx, term jit_stream)
+{
+    void *js_obj_ptr;
+    if (UNLIKELY(!enif_get_resource(erl_nif_env_from_context(ctx), jit_stream, jit_stream_flash_resource_type, &js_obj_ptr))) {
+        return NULL;
+    }
+    struct JITStreamFlash *js_obj = (struct JITStreamFlash *) js_obj_ptr;
+
+    uintptr_t base_addr = ((uintptr_t) js_obj->jit_entry + sizeof(struct JITEntry));
+
+    // Convert to executable address (handles DBUS→IBUS, Thumb bit, etc.)
+    base_addr = jit_stream_flash_platform_ptr_to_executable(base_addr);
+
+    return (ModuleNativeEntryPoint) base_addr;
+}
+
+static void jit_stream_flash_dtor(ErlNifEnv *caller_env, void *obj)
+{
+    UNUSED(caller_env);
+    struct JITStreamFlash *js_obj = (struct JITStreamFlash *) obj;
+    if (js_obj->pf_ctx) {
+        jit_stream_flash_platform_destroy(js_obj->pf_ctx);
+    }
+}
+
+const struct Nif *jit_stream_flash_get_nif(const char *nifname)
+{
+    if (strcmp("jit:stream_module/0", nifname) == 0) {
+        return &jit_stream_module_nif;
+    }
+    if (strncmp("jit_stream_flash:", nifname, 17) == 0) {
+        const char *rest = nifname + 17;
+        if (strcmp("new/1", rest) == 0) {
+            return &jit_stream_flash_new_nif;
+        }
+        if (strcmp("offset/1", rest) == 0) {
+            return &jit_stream_flash_offset_nif;
+        }
+        if (strcmp("append/2", rest) == 0) {
+            return &jit_stream_flash_append_nif;
+        }
+        if (strcmp("replace/3", rest) == 0) {
+            return &jit_stream_flash_replace_nif;
+        }
+        if (strcmp("read/3", rest) == 0) {
+            return &jit_stream_flash_read_nif;
+        }
+        if (strcmp("flush/1", rest) == 0) {
+            return &jit_stream_flash_flush_nif;
+        }
+    }
+    return NULL;
+}
+
+void jit_stream_flash_init(GlobalContext *global)
+{
+    ErlNifEnv env;
+    erl_nif_env_partial_init_from_globalcontext(&env, global);
+    jit_stream_flash_resource_type = enif_init_resource_type(&env, "jit_stream_flash", &jit_stream_flash_resource_type_init, ERL_NIF_RT_CREATE, NULL);
+}
+
+void globalcontext_set_cache_native_code(GlobalContext *global, Module *mod, uint16_t version, ModuleNativeEntryPoint entry_point, uint32_t labels)
+{
+    bool is_valid;
+    (void) globalcontext_find_first_jit_entry(global, &is_valid);
+
+    struct JSFlashPlatformContext *pf_ctx = jit_stream_flash_platform_init();
+    if (IS_NULL_PTR(pf_ctx)) {
+        fprintf(stderr, "Failed to initialize platform flash context\n");
+        return;
+    }
+
+    // Reverse the executable address transformation to get data address
+    // Platform-specific: Thumb (clear bit 0), RISC-V (IBUS→DBUS conversion)
+    uintptr_t data_addr = jit_stream_flash_platform_executable_to_ptr((uintptr_t) entry_point);
+
+    struct JITEntry *jit_entry = (struct JITEntry *) (data_addr - sizeof(struct JITEntry));
+    uintptr_t code = (uintptr_t) mod->code;
+
+    // Finalize the entry
+    if (!jit_stream_flash_finalize_entry(pf_ctx, jit_entry, JIT_ENTRY_MAGIC, version, (uint32_t) code, labels)) {
+        fprintf(stderr, "jit_stream_flash_finalize_entry failed\n");
+        jit_stream_flash_platform_destroy(pf_ctx);
+        return;
+    }
+
+#ifdef ENABLE_TRACE
+    // Compute CRC of entire module for verification
+    uint32_t module_crc = crc32((const uint8_t *) jit_entry, sizeof(struct JITEntry) + jit_entry->size);
+    TRACE("After finalize - jit_entry=%p CRC32=0x%08x (entry+code size=%u)\n",
+        (void *) jit_entry, (unsigned int) module_crc, (unsigned int) (sizeof(struct JITEntry) + jit_entry->size));
+#endif
+
+    // Erase next sector if it's completely after the current module
+    struct JITEntry *current_entry = (struct JITEntry *) (data_addr - sizeof(struct JITEntry));
+    struct JITEntry *next_entry = jit_entry_next(current_entry);
+    uintptr_t next_entry_addr = (uintptr_t) next_entry;
+    uintptr_t next_sector = next_entry_addr & ~(FLASH_SECTOR_SIZE - 1);
+
+    // Calculate the sector where the current module ENDS (not where it starts)
+    uintptr_t current_module_end = (uintptr_t) current_entry + sizeof(struct JITEntry) + current_entry->size;
+    uintptr_t current_end_sector = current_module_end & ~(FLASH_SECTOR_SIZE - 1);
+
+    // Only erase next sector if it's completely after the current module's end
+    // This prevents erasing a sector that contains the tail of the current module
+    if (next_sector > current_end_sector) {
+        // Next entry is in a sector completely after current module, erase it if it has stale data
+        if (next_entry->magic != 0xFFFF) {
+            TRACE("globalcontext_set_cache_native_code -- NOT erasing new sector at %lx\n", (unsigned long) next_sector);
+            if (!jit_stream_flash_platform_erase_sector(pf_ctx, next_sector)) {
+                fprintf(stderr, "jit_stream_flash_platform_erase_sector failed\n");
+                jit_stream_flash_platform_destroy(pf_ctx);
+                return;
+            }
+        } else {
+            TRACE("globalcontext_set_cache_native_code -- NOT erasing new sector at %lx\n", (unsigned long) next_sector);
+        }
+    }
+
+    if (!is_valid) {
+        // Mark that cache entry is valid by replacing end with END in installed AVM
+        globalcontext_set_cache_valid(global);
+    }
+
+    jit_stream_flash_platform_destroy(pf_ctx);
+}
+
+// Implementation of jit_stream_entry_point, sys_get_cache_native_code and
+// sys_set_cache_native_code using this jit_stream
+#ifndef TEST_JIT_STREAM_FLASH
+ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream)
+{
+    return jit_stream_flash_entry_point(ctx, jit_stream);
+}
+
+bool sys_get_cache_native_code(GlobalContext *global, Module *mod, uint16_t *version, ModuleNativeEntryPoint *entry_point, uint32_t *labels)
+{
+    bool is_valid;
+    struct JITEntry *jit_entry = globalcontext_find_first_jit_entry(global, &is_valid);
+    if (!is_valid) {
+        return false;
+    }
+    uintptr_t code = (uintptr_t) mod->code;
+    while (jit_entry->magic == JIT_ENTRY_MAGIC) {
+        if (jit_entry->code == (uint32_t) code) {
+            *version = jit_entry->version;
+            uintptr_t ep_addr = (uintptr_t) jit_entry + sizeof(struct JITEntry);
+            ep_addr = jit_stream_flash_platform_ptr_to_executable(ep_addr);
+            *entry_point = (ModuleNativeEntryPoint) ep_addr;
+            *labels = jit_entry->labels;
+
+#ifdef ENABLE_TRACE
+            // Compute CRC of entire module for verification
+            uint32_t module_crc = crc32((const uint8_t *) jit_entry, sizeof(struct JITEntry) + jit_entry->size);
+            TRACE("Loading from cache - jit_entry=%p CRC32=0x%08x (entry+code size=%u)\n",
+                (void *) jit_entry, (unsigned int) module_crc, (unsigned int) (sizeof(struct JITEntry) + jit_entry->size));
+#endif
+
+            return true;
+        }
+        jit_entry = jit_entry_next(jit_entry);
+    }
+    return false;
+}
+
+void sys_set_cache_native_code(GlobalContext *global, Module *mod, uint16_t version, ModuleNativeEntryPoint entry_point, uint32_t labels)
+{
+    globalcontext_set_cache_native_code(global, mod, version, entry_point, labels);
+}
+#endif
diff --git a/src/libAtomVM/jit_stream_flash.h b/src/libAtomVM/jit_stream_flash.h
new file mode 100644
index 0000000000..30644189ea
--- /dev/null
+++ b/src/libAtomVM/jit_stream_flash.h
@@ -0,0 +1,129 @@
+/*
+ * This file is part of AtomVM.
+ *
+ * Copyright 2025 by Paul Guyot <pguyot@kallisys.net>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+ */
+
+/**
+ * @file jit_stream_flash.h
+ * @brief JIT code caching in flash memory - common implementation
+ */
+
+#ifndef _JIT_STREAM_FLASH_H_
+#define _JIT_STREAM_FLASH_H_
+
+#include "globalcontext.h"
+#include "jit_stream_flash_platform.h"
+#include "module.h"
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @brief Platform-specific flash context (opaque)
+ */
+struct JSFlashPlatformContext;
+
+/**
+ * @brief Initialize JIT stream flash subsystem
+ *
+ * @param global Global context
+ */
+void jit_stream_flash_init(GlobalContext *global);
+
+/**
+ * @brief Get NIF for jit_stream_flash operations
+ *
+ * @param nifname NIF name
+ * @return NIF pointer or NULL
+ */
+const struct Nif *jit_stream_flash_get_nif(const char *nifname);
+
+/**
+ * @brief Get entry point from jit_stream_flash.
+ * Called by `jit_stream_entry_point`
+ *
+ * @param ctx Context
+ * @param jit_stream JIT stream term
+ * @return Entry point or NULL
+ */
+ModuleNativeEntryPoint jit_stream_flash_entry_point(Context *ctx, term jit_stream);
+
+/**
+ * @brief Finalize flash operation by marking an entry point as valid for
+ * a given module. This is called by `sys_set_cache_native_code`.
+ *
+ * @param global Global context
+ * @param mod Module
+ * @param version Module version
+ * @param entry_point Entry point
+ * @param labels Number of labels
+ */
+void globalcontext_set_cache_native_code(GlobalContext *global, Module *mod, uint16_t version, ModuleNativeEntryPoint entry_point, uint32_t labels);
+
+/**
+ * @brief Initialize platform flash context
+ * @return Platform flash context, or NULL on error
+ */
+struct JSFlashPlatformContext *jit_stream_flash_platform_init(void);
+
+/**
+ * @brief Destroy platform flash context
+ * @param pf_ctx Platform flash context to destroy
+ */
+void jit_stream_flash_platform_destroy(struct JSFlashPlatformContext *pf_ctx);
+
+/**
+ * @brief Erase a flash sector at the given address
+ * @param pf_ctx Platform flash context
+ * @param addr Virtual address of the sector to erase (must be sector-aligned)
+ * @return true on success, false on error
+ */
+bool jit_stream_flash_platform_erase_sector(struct JSFlashPlatformContext *pf_ctx, uintptr_t addr);
+
+/**
+ * @brief Write a page to flash
+ * @param pf_ctx Platform flash context
+ * @param addr Virtual address to write to (must be page-aligned)
+ * @param data Data to write (must be FLASH_PAGE_SIZE bytes)
+ * @return true on success, false on error
+ */
+bool jit_stream_flash_platform_write_page(struct JSFlashPlatformContext *pf_ctx, uintptr_t addr, const uint8_t *data);
+
+/**
+ * @brief Convert data bus address to instruction bus address
+ * @param addr Data bus address
+ * @return Instruction bus address (executable pointer)
+ */
+uintptr_t jit_stream_flash_platform_ptr_to_executable(uintptr_t addr);
+
+/**
+ * @brief Convert instruction bus address to data bus address
+ * @param addr Instruction bus address (executable pointer)
+ * @return Data bus address
+ */
+uintptr_t jit_stream_flash_platform_executable_to_ptr(uintptr_t addr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _JIT_STREAM_FLASH_H_
diff --git a/src/libAtomVM/module.c b/src/libAtomVM/module.c
index 108d5027d8..f0eec5770d 100644
--- a/src/libAtomVM/module.c
+++ b/src/libAtomVM/module.c
@@ -38,6 +38,9 @@
 #include <stdio.h>
 #include <stdlib.h>
 
+// #define ENABLE_TRACE
+#include "trace.h"
+
 #ifdef WITH_ZLIB
 #include <zlib.h>
 #endif
@@ -316,13 +319,11 @@ Module *module_new_from_iff_binary(GlobalContext *global, const void *iff_binary
         return NULL;
     }
 
-#ifdef ENABLE_ADVANCED_TRACE
-    mod->import_table = beam_file + offsets[IMPT];
-#endif
     if (offsets[CODE]) {
         mod->code = (CodeChunk *) (beam_file + offsets[CODE]);
     }
     mod->export_table = beam_file + offsets[EXPT];
+    mod->import_table = beam_file + offsets[IMPT];
     mod->local_table = beam_file + offsets[LOCT];
     mod->atom_table = beam_file + offsets[AT8U];
     mod->fun_table = beam_file + offsets[FUNT];
@@ -353,6 +354,13 @@ Module *module_new_from_iff_binary(GlobalContext *global, const void *iff_binary
                 fprintf(stderr, "Native code chunk found but no compatible architecture or variant found\n");
             }
         }
+    } else {
+        ModuleNativeEntryPoint module_entry_point;
+        uint32_t labels;
+        uint16_t version;
+        if (sys_get_cache_native_code(global, mod, &version, &module_entry_point, &labels) && version == JIT_FORMAT_VERSION) {
+            module_set_native_code(mod, labels, module_entry_point);
+        }
     }
 #endif
 
diff --git a/src/libAtomVM/module.h b/src/libAtomVM/module.h
index 2f7b56321c..1748ed4f6a 100644
--- a/src/libAtomVM/module.h
+++ b/src/libAtomVM/module.h
@@ -109,12 +109,9 @@ struct Module
 {
     int module_index;
 
-#ifdef ENABLE_ADVANCED_TRACE
-    void *import_table;
-#endif
-
     CodeChunk *code;
     void *export_table;
+    void *import_table;
     void *local_table;
     void *atom_table;
     void *fun_table;
diff --git a/src/libAtomVM/nifs.c b/src/libAtomVM/nifs.c
index e3d76eba41..24003cfcc7 100644
--- a/src/libAtomVM/nifs.c
+++ b/src/libAtomVM/nifs.c
@@ -203,6 +203,7 @@ static term nif_code_server_code_chunk(Context *ctx, int argc, term argv[]);
 static term nif_code_server_atom_resolver(Context *ctx, int argc, term argv[]);
 static term nif_code_server_literal_resolver(Context *ctx, int argc, term argv[]);
 static term nif_code_server_type_resolver(Context *ctx, int argc, term argv[]);
+static term nif_code_server_import_resolver(Context *ctx, int argc, term argv[]);
 static term nif_code_server_set_native_code(Context *ctx, int argc, term argv[]);
 #endif
 static term nif_erlang_module_loaded(Context *ctx, int argc, term argv[]);
@@ -774,6 +775,10 @@ static const struct Nif code_server_type_resolver_nif = {
     .base.type = NIFFunctionType,
     .nif_ptr = nif_code_server_type_resolver
 };
+static const struct Nif code_server_import_resolver_nif = {
+    .base.type = NIFFunctionType,
+    .nif_ptr = nif_code_server_import_resolver
+};
 static const struct Nif code_server_set_native_code_nif = {
     .base.type = NIFFunctionType,
     .nif_ptr = nif_code_server_set_native_code
@@ -4806,7 +4811,7 @@ static term nif_atomvm_get_start_beam(Context *ctx, int argc, term argv[])
             uint32_t size;
             const void *beam;
             const char *module_name;
-            if (!avmpack_find_section_by_flag(avmpack_data->data, BEAM_START_FLAG, &beam, &size, &module_name)) {
+            if (!avmpack_find_section_by_flag(avmpack_data->data, BEAM_START_FLAG, BEAM_START_FLAG, &beam, &size, &module_name)) {
                 synclist_unlock(&ctx->global->avmpack_data);
                 if (UNLIKELY(memory_ensure_free(ctx, TUPLE_SIZE(2)) != MEMORY_GC_OK)) {
                     RAISE_ERROR(OUT_OF_MEMORY_ATOM);
@@ -5622,10 +5627,60 @@ static term nif_code_server_type_resolver(Context *ctx, int argc, term argv[])
     if (IS_NULL_PTR(mod)) {
         RAISE_ERROR(BADARG_ATOM);
     }
+
     int type_index = term_to_int(argv[1]);
     return module_get_type_by_index(mod, type_index, ctx);
 }
 
+static term nif_code_server_import_resolver(Context *ctx, int argc, term argv[])
+{
+    UNUSED(argc);
+    VALIDATE_VALUE(argv[0], term_is_atom);
+    VALIDATE_VALUE(argv[1], term_is_integer);
+
+    term module_name = argv[0];
+    Module *mod = globalcontext_get_module(ctx->global, term_to_atom_index(module_name));
+    if (IS_NULL_PTR(mod)) {
+        RAISE_ERROR(BADARG_ATOM);
+    }
+    int import_index = term_to_int(argv[1]);
+
+    // Get the imported function entry at the given index
+    if (IS_NULL_PTR(mod->imported_funcs) || import_index < 0) {
+        RAISE_ERROR(BADARG_ATOM);
+    }
+
+    // Parse the import table to get the module, function, and arity
+    // Import table format: each entry is 12 bytes (module_atom_index, function_atom_index, arity)
+    const uint8_t *import_table = mod->import_table;
+    if (IS_NULL_PTR(import_table)) {
+        RAISE_ERROR(BADARG_ATOM);
+    }
+
+    int functions_count = READ_32_UNALIGNED(import_table + 8);
+    if (import_index >= functions_count) {
+        RAISE_ERROR(BADARG_ATOM);
+    }
+
+    int local_module_atom_index = READ_32_UNALIGNED(import_table + import_index * 12 + 12);
+    int local_function_atom_index = READ_32_UNALIGNED(import_table + import_index * 12 + 4 + 12);
+    uint32_t arity = READ_32_UNALIGNED(import_table + import_index * 12 + 8 + 12);
+
+    term module_atom = module_get_atom_term_by_id(mod, local_module_atom_index);
+    term function_atom = module_get_atom_term_by_id(mod, local_function_atom_index);
+    term arity_term = term_from_int(arity);
+
+    if (UNLIKELY(memory_ensure_free(ctx, TUPLE_SIZE(3)) != MEMORY_GC_OK)) {
+        RAISE_ERROR(OUT_OF_MEMORY_ATOM);
+    }
+
+    term result = term_alloc_tuple(3, &ctx->heap);
+    term_put_tuple_element(result, 0, module_atom);
+    term_put_tuple_element(result, 1, function_atom);
+    term_put_tuple_element(result, 2, arity_term);
+
+    return result;
+}
 static term nif_code_server_set_native_code(Context *ctx, int argc, term argv[])
 {
     UNUSED(argc);
@@ -5633,6 +5688,8 @@ static term nif_code_server_set_native_code(Context *ctx, int argc, term argv[])
     VALIDATE_VALUE(argv[0], term_is_atom);
     VALIDATE_VALUE(argv[1], term_is_integer);
 
+    avm_int_t labels_count = term_to_int(argv[1]);
+
     term module_name = argv[0];
     Module *mod = globalcontext_get_module(ctx->global, term_to_atom_index(module_name));
     if (IS_NULL_PTR(mod)) {
@@ -5646,10 +5703,12 @@ static term nif_code_server_set_native_code(Context *ctx, int argc, term argv[])
 
     SMP_MODULE_LOCK(mod);
     if (mod->native_code == NULL) {
-        module_set_native_code(mod, term_to_int(argv[1]), entry_point);
+        module_set_native_code(mod, labels_count, entry_point);
     }
     SMP_MODULE_UNLOCK(mod);
 
+    sys_set_cache_native_code(ctx->global, mod, JIT_FORMAT_VERSION, entry_point, labels_count);
+
     return OK_ATOM;
 }
 #endif
@@ -5688,6 +5747,8 @@ static term nif_jit_backend_module(Context *ctx, int argc, term argv[])
     return JIT_AARCH64_ATOM;
 #elif JIT_ARCH_TARGET == JIT_ARCH_ARMV6M
     return JIT_ARMV6M_ATOM;
+#elif JIT_ARCH_TARGET == JIT_ARCH_RISCV32
+    return JIT_RISCV32_ATOM;
 #else
 #error Unknown JIT target
 #endif
diff --git a/src/libAtomVM/nifs.gperf b/src/libAtomVM/nifs.gperf
index a4a2591fa0..6423381dd4 100644
--- a/src/libAtomVM/nifs.gperf
+++ b/src/libAtomVM/nifs.gperf
@@ -183,6 +183,7 @@ code_server:code_chunk/1, IF_HAVE_JIT(&code_server_code_chunk_nif)
 code_server:atom_resolver/2, IF_HAVE_JIT(&code_server_atom_resolver_nif)
 code_server:literal_resolver/2, IF_HAVE_JIT(&code_server_literal_resolver_nif)
 code_server:type_resolver/2, IF_HAVE_JIT(&code_server_type_resolver_nif)
+code_server:import_resolver/2, IF_HAVE_JIT(&code_server_import_resolver_nif)
 code_server:set_native_code/3, IF_HAVE_JIT(&code_server_set_native_code_nif)
 console:print/1, &console_print_nif
 base64:encode/1, &base64_encode_nif
diff --git a/src/libAtomVM/opcodesswitch.h b/src/libAtomVM/opcodesswitch.h
index d8fc4106b0..ea56be68ae 100644
--- a/src/libAtomVM/opcodesswitch.h
+++ b/src/libAtomVM/opcodesswitch.h
@@ -4074,6 +4074,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
                         RAISE_ERROR(OUT_OF_MEMORY_ATOM);
                     }
                     term t = term_create_empty_binary(size_val, &ctx->heap, ctx->global);
+                    if (UNLIKELY(term_is_invalid_term(t))) {
+                        RAISE_ERROR(OUT_OF_MEMORY_ATOM);
+                    }
 
                     ctx->bs = t;
                     ctx->bs_offset = 0;
@@ -4122,6 +4125,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
                         RAISE_ERROR(OUT_OF_MEMORY_ATOM);
                     }
                     term t = term_create_empty_binary(size_val / 8, &ctx->heap, ctx->global);
+                    if (UNLIKELY(term_is_invalid_term(t))) {
+                        RAISE_ERROR(OUT_OF_MEMORY_ATOM);
+                    }
 
                     ctx->bs = t;
                     ctx->bs_offset = 0;
@@ -4530,6 +4536,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
                         RAISE_ERROR(OUT_OF_MEMORY_ATOM);
                     }
                     term t = term_create_empty_binary(0, &ctx->heap, ctx->global);
+                    if (UNLIKELY(term_is_invalid_term(t))) {
+                        RAISE_ERROR(OUT_OF_MEMORY_ATOM);
+                    }
 
                     ctx->bs = t;
                     ctx->bs_offset = 0;
@@ -4595,6 +4604,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
                     TRACE("bs_append/8, fail=%u size=" AVM_INT_FMT " unit=%u src=0x%" TERM_X_FMT " dreg=%c%i\n", (unsigned) fail, size_val, (unsigned) unit, src, T_DEST_REG(dreg));
                     src = x_regs[live];
                     term t = term_create_empty_binary(src_size + size_val / 8, &ctx->heap, ctx->global);
+                    if (UNLIKELY(term_is_invalid_term(t))) {
+                        RAISE_ERROR(OUT_OF_MEMORY_ATOM);
+                    }
                     memcpy((void *) term_binary_data(t), (void *) term_binary_data(src), src_size);
 
                     ctx->bs = t;
@@ -4641,8 +4653,10 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
                         RAISE_ERROR(OUT_OF_MEMORY_ATOM);
                     }
                     DECODE_COMPACT_TERM(src, src_pc)
-                    term t = term_create_empty_binary(src_size + size_val / 8, &ctx->heap, ctx->global);
-                    memcpy((void *) term_binary_data(t), (void *) term_binary_data(src), src_size);
+                    term t = term_reuse_binary(src, src_size + size_val / 8, &ctx->heap, ctx->global);
+                    if (UNLIKELY(term_is_invalid_term(t))) {
+                        RAISE_ERROR(OUT_OF_MEMORY_ATOM);
+                    }
 
                     ctx->bs = t;
                     ctx->bs_offset = src_size * 8;
@@ -5283,6 +5297,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
                     avm_int_t bs_offset = term_get_match_state_offset(src);
                     bool status;
                     switch (size_val) {
+                        case 16:
+                            status = bitstring_extract_f16(bs_bin, bs_offset, increment, flags_value, &value);
+                            break;
                         case 32:
                             status = bitstring_extract_f32(bs_bin, bs_offset, increment, flags_value, &value);
                             break;
@@ -6736,6 +6753,7 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
                 // Verify parameters and compute binary size in first iteration
                 #ifdef IMPL_EXECUTE_LOOP
                     size_t binary_size = 0;
+                    term reuse_binary = term_invalid_term();
                 #endif
                 for (size_t j = 0; j < nb_segments; j++) {
                     term atom_type;
@@ -6803,6 +6821,31 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
                                 segment_size = signed_size_value;
                                 break;
                             }
+                            case FLOAT_ATOM: {
+                                if (!term_is_number(src)) {
+                                    if (fail == 0) {
+                                        RAISE_ERROR(BADARG_ATOM);
+                                    } else {
+                                        JUMP_TO_LABEL(mod, fail);
+                                    }
+                                }
+                                // size is optional for floats, defaults to 64
+                                avm_int_t signed_size_value = 64;
+                                if (size != term_nil()) {
+                                    VERIFY_IS_INTEGER(size, "bs_create_bin/6", fail);
+                                    signed_size_value = term_to_int(size);
+                                    if (UNLIKELY(signed_size_value != 16 && signed_size_value != 32 && signed_size_value != 64)) {
+                                        if (fail == 0) {
+                                            RAISE_ERROR(BADARG_ATOM);
+                                        } else {
+                                            JUMP_TO_LABEL(mod, fail);
+                                        }
+                                    }
+                                }
+                                segment_size = signed_size_value;
+                                segment_unit = 1;
+                                break;
+                            }
                             case STRING_ATOM: {
                                 VERIFY_IS_INTEGER(size, "bs_create_bin/6", fail);
                                 avm_int_t signed_size_value = term_to_int(size);
@@ -6824,6 +6867,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
                                     // We only support src as a binary of bytes here.
                                     segment_size = term_binary_size(src);
                                     segment_unit = 8;
+                                    if (atom_type == PRIVATE_APPEND_ATOM && j == 0) {
+                                        reuse_binary = src;
+                                    }
                                 } else {
                                     VERIFY_IS_INTEGER(size, "bs_create_bin/6", fail);
                                     avm_int_t signed_size_value = term_to_int(size);
@@ -6864,7 +6910,17 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
                     if (UNLIKELY(memory_ensure_free_with_roots(ctx, alloc + term_binary_heap_size(binary_size / 8), live, x_regs, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) {
                         RAISE_ERROR(OUT_OF_MEMORY_ATOM);
                     }
-                    term t = term_create_empty_binary(binary_size / 8, &ctx->heap, ctx->global);
+                    term t;
+                    size_t original_size = 0;
+                    if (term_is_invalid_term(reuse_binary)) {
+                        t = term_create_empty_binary(binary_size / 8, &ctx->heap, ctx->global);
+                    } else {
+                        original_size = term_binary_size(reuse_binary);
+                        t = term_reuse_binary(reuse_binary, binary_size / 8, &ctx->heap, ctx->global);
+                    }
+                    if (UNLIKELY(term_is_invalid_term(t))) {
+                        RAISE_ERROR(OUT_OF_MEMORY_ATOM);
+                    }
                     size_t offset = 0;
 
                     for (size_t j = 0; j < nb_segments; j++) {
@@ -6888,6 +6944,7 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
                             case UTF16_ATOM:
                             case UTF32_ATOM:
                             case INTEGER_ATOM:
+                            case FLOAT_ATOM:
                                 DECODE_FLAGS_LIST(flags_value, flags, opcode);
                                 break;
                             default:
@@ -6911,6 +6968,13 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
                             case STRING_ATOM:
                                 size_value = (size_t) term_to_int(size);
                                 break;
+                            case FLOAT_ATOM:
+                                if (size != term_nil()) {
+                                    size_value = (size_t) term_to_int(size);
+                                } else {
+                                    size_value = 64;
+                                }
+                                break;
                             default:
                                 break;
                         }
@@ -6953,6 +7017,38 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
                                 segment_size = size_value;
                                 break;
                             }
+                            case FLOAT_ATOM: {
+                                avm_float_t float_value;
+                                if (term_is_float(src)) {
+                                    float_value = term_to_float(src);
+                                } else if (term_is_any_integer(src)) {
+                                    float_value = (avm_float_t) term_maybe_unbox_int64(src);
+                                } else {
+                                    if (fail == 0) {
+                                        RAISE_ERROR(BADARG_ATOM);
+                                    } else {
+                                        JUMP_TO_LABEL(mod, fail);
+                                    }
+                                }
+                                bool result;
+                                if (size_value == 16) {
+                                    result = bitstring_insert_f16(t, offset, float_value, flags_value);
+                                } else if (size_value == 32) {
+                                    result = bitstring_insert_f32(t, offset, float_value, flags_value);
+                                } else {
+                                    result = bitstring_insert_f64(t, offset, float_value, flags_value);
+                                }
+                                if (UNLIKELY(!result)) {
+                                    TRACE("bs_create_bin/6: Failed to insert float into binary\n");
+                                    if (fail == 0) {
+                                        RAISE_ERROR(BADARG_ATOM);
+                                    } else {
+                                        JUMP_TO_LABEL(mod, fail);
+                                    }
+                                }
+                                segment_size = size_value;
+                                break;
+                            }
                             case STRING_ATOM: {
                                 uint8_t *dst = (uint8_t *) term_binary_data(t);
                                 size_t remaining = 0;
@@ -6968,6 +7064,10 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
                                     TRACE("bs_create_bin/6: current offset (%d) is not evenly divisible by 8\n", (int) offset);
                                     RAISE_ERROR(UNSUPPORTED_ATOM);
                                 }
+                                if (reuse_binary == src && j == 0) {
+                                    segment_size = original_size * 8;
+                                    break;
+                                }
                                 uint8_t *dst = (uint8_t *) term_binary_data(t) + (offset / 8);
                                 const uint8_t *bin = (const uint8_t *) term_binary_data(src);
                                 size_t binary_size = term_binary_size(src);
@@ -7444,7 +7544,7 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
         }
 
 terminate_context:
-        TRACE("-- Code execution finished for %i--\n", ctx->process_id);
+        TRACE("-- Code execution finished for %i--\n", (int) ctx->process_id);
         GlobalContext *global = ctx->global;
         if (ctx->leader) {
             scheduler_stop_all(global);
diff --git a/src/libAtomVM/refc_binary.c b/src/libAtomVM/refc_binary.c
index 61e4b05b8f..159d6f2ba9 100644
--- a/src/libAtomVM/refc_binary.c
+++ b/src/libAtomVM/refc_binary.c
@@ -141,3 +141,86 @@ size_t refc_binary_total_size(Context *ctx)
     synclist_unlock(&ctx->global->refc_binaries);
     return size;
 }
+
+COLD_FUNC void refc_binary_dump_info(Context *ctx)
+{
+    struct ListHead *item;
+    struct ListHead *refc_binaries = synclist_rdlock(&ctx->global->refc_binaries);
+
+    // Note: This only counts non-const refc binaries (ones that allocate memory).
+    // Const binaries (created by term_from_const_binary) point to existing data
+    // and are never added to the global refc_binaries list, so they don't appear here.
+
+    // First pass: count and calculate total size
+    size_t count = 0;
+    size_t total_size = 0;
+    LIST_FOR_EACH (item, refc_binaries) {
+        struct RefcBinary *refc = GET_LIST_ENTRY(item, struct RefcBinary, head);
+        count++;
+        total_size += refc->size;
+    }
+
+    fprintf(stderr, "refc_binary_count = %d\n", (int) count);
+    fprintf(stderr, "refc_binary_total_size = %d\n", (int) total_size);
+
+    if (count == 0) {
+        synclist_unlock(&ctx->global->refc_binaries);
+        return;
+    }
+
+// Find top 5 largest binaries
+#define TOP_N 5
+    struct RefcBinary *top[TOP_N] = { NULL };
+    size_t top_indices[TOP_N] = { 0 };
+
+    size_t index = 0;
+    LIST_FOR_EACH (item, refc_binaries) {
+        struct RefcBinary *refc = GET_LIST_ENTRY(item, struct RefcBinary, head);
+
+        // Try to insert into top 5
+        for (size_t i = 0; i < TOP_N; i++) {
+            if (top[i] == NULL || refc->size > top[i]->size) {
+                // Shift down
+                for (size_t j = TOP_N - 1; j > i; j--) {
+                    top[j] = top[j - 1];
+                    top_indices[j] = top_indices[j - 1];
+                }
+                top[i] = refc;
+                top_indices[i] = index;
+                break;
+            }
+        }
+        index++;
+    }
+
+    // Display top binaries
+    fprintf(stderr, "\nTop %d largest refc binaries:\n", TOP_N);
+    for (size_t i = 0; i < TOP_N && top[i] != NULL; i++) {
+        struct RefcBinary *refc = top[i];
+        fprintf(stderr, "  [%zu] size=%d bytes (%.1f%%), refcount=%d",
+            top_indices[i],
+            (int) refc->size,
+            (double) refc->size * 100.0 / (double) total_size,
+            (int) refc->ref_count);
+
+        if (refc->resource_type) {
+            fprintf(stderr, " [resource]");
+        }
+
+        // Print first 32 bytes as hex
+        fprintf(stderr, "\n      data: ");
+        size_t print_size = refc->size < 32 ? refc->size : 32;
+        for (size_t j = 0; j < print_size; j++) {
+            fprintf(stderr, "%02x", refc->data[j]);
+            if (j % 4 == 3 && j < print_size - 1) {
+                fprintf(stderr, " ");
+            }
+        }
+        if (refc->size > 32) {
+            fprintf(stderr, "...");
+        }
+        fprintf(stderr, "\n");
+    }
+
+    synclist_unlock(&ctx->global->refc_binaries);
+}
diff --git a/src/libAtomVM/refc_binary.h b/src/libAtomVM/refc_binary.h
index 3fc1784bd8..7ff38f545e 100644
--- a/src/libAtomVM/refc_binary.h
+++ b/src/libAtomVM/refc_binary.h
@@ -142,6 +142,16 @@ term refc_binary_create_binary_info(Context *ctx);
  */
 size_t refc_binary_total_size(Context *ctx);
 
+/**
+ * @brief Dump detailed information about reference counted binaries
+ *
+ * @details This function prints diagnostic information including the count,
+ * total size, and details about the top 5 largest binaries including
+ * their first bytes. Used for debugging memory issues.
+ * @param ctx the context
+ */
+COLD_FUNC void refc_binary_dump_info(Context *ctx);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/libAtomVM/sys.h b/src/libAtomVM/sys.h
index 0735d86ed3..8ad701c3cb 100644
--- a/src/libAtomVM/sys.h
+++ b/src/libAtomVM/sys.h
@@ -296,6 +296,33 @@ void sys_free_platform(GlobalContext *global);
  */
 ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t size, size_t offset);
 
+/**
+ * @brief Get the cache (typically on flash) of native code for a given module
+ *
+ * @details If module is found in cache, return a pointer to the entry point.
+ * Only implemented on platforms with JIT. Implementations on flash typically
+ * check if the jit cache is valid (for lib or for app) and use the pointer to
+ * code as a key.
+ * @param global the global context
+ * @param mod module to return the cache native code for
+ * @param version version of the cache entry (for compatibility with the VM)
+ * @param entry_point entry point to the module, if found
+ * @param labels number of labels
+ * @return \c true if the cache entry was found
+ */
+bool sys_get_cache_native_code(GlobalContext *global, Module *mod, uint16_t *version, ModuleNativeEntryPoint *entry_point, uint32_t *labels);
+
+/**
+ * @brief Add native code to cache for a given module
+ *
+ * @param global the global context
+ * @param mod module to add the native code for
+ * @param version version of the native code
+ * @param entry_point entry point to the module
+ * @param labels number of labels
+ */
+void sys_set_cache_native_code(GlobalContext *global, Module *mod, uint16_t version, ModuleNativeEntryPoint entry_point, uint32_t labels);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/libAtomVM/term.c b/src/libAtomVM/term.c
index 838fd41eee..fc30674124 100644
--- a/src/libAtomVM/term.c
+++ b/src/libAtomVM/term.c
@@ -909,7 +909,7 @@ term term_alloc_refc_binary(size_t size, bool is_const, Heap *heap, GlobalContex
         if (IS_NULL_PTR(refc)) {
             // TODO propagate error to callers of this function, e.g., as an invalid term
             fprintf(stderr, "memory_create_refc_binary: Unable to allocate %zu bytes for refc_binary.\n", size);
-            AVM_ABORT();
+            return term_invalid_term();
         }
         boxed_value[3] = (term) refc;
         refc->ref_count = 1; // added to mso list, increment ref count
@@ -919,6 +919,64 @@ term term_alloc_refc_binary(size_t size, bool is_const, Heap *heap, GlobalContex
     return ret;
 }
 
+term term_reuse_binary(term src, size_t size, Heap *heap, GlobalContext *glb)
+{
+    if (term_is_refc_binary(src) && !term_refc_binary_is_const(src)) {
+        term *boxed_value = term_to_term_ptr(src);
+        struct RefcBinary *old_refc = (struct RefcBinary *) boxed_value[3];
+        size_t old_size = old_refc->size;
+
+        // Only reuse if refcount is 1 (only this term references it)
+        if (old_refc->ref_count == 1) {
+            // Lock the list of refc binaries while we're trying to realloc.
+            struct ListHead *refc_binaries = synclist_wrlock(&glb->refc_binaries);
+
+            // Remove from list before realloc because realloc might move the memory
+            list_remove(&old_refc->head);
+
+            // Realloc to new size.
+            size_t n = sizeof(struct RefcBinary) + size;
+            struct RefcBinary *new_refc = realloc(old_refc, n);
+            if (IS_NULL_PTR(new_refc)) {
+                // Re-add to list before unlocking
+                list_append(refc_binaries, &old_refc->head);
+                synclist_unlock(&glb->refc_binaries);
+                fprintf(stderr, "term_reuse_binary: Unable to reallocate %zu bytes for refc_binary.\n", size);
+                return term_invalid_term();
+            }
+
+            // Update size
+            new_refc->size = size;
+
+            // Zero the new part if size increased
+            if (LIKELY(size > old_size)) {
+                memset((char *) &new_refc->data + old_size, 0, size - old_size);
+            }
+
+            // Update the boxed value to point to the new refc BEFORE unlocking
+            // so other threads see a consistent state
+            boxed_value[1] = (term) size;
+            boxed_value[3] = (term) new_refc;
+
+            // Re-add to list after realloc (whether pointer changed or not)
+            list_append(refc_binaries, &new_refc->head);
+
+            // Unlock the list of refc binaries
+            synclist_unlock(&glb->refc_binaries);
+
+            // Return the same term (boxed_value pointer hasn't changed)
+            return src;
+        }
+    }
+    // Not a refc binary or it's a const refc binary - create a new one
+    size_t src_size = term_binary_size(src);
+    term t = term_create_uninitialized_binary(size, heap, glb);
+    // Copy the source data (up to the smaller of src_size and size)
+    size_t copy_size = src_size < size ? src_size : size;
+    memcpy((void *) term_binary_data(t), (void *) term_binary_data(src), copy_size);
+    return t;
+}
+
 static term find_binary(term binary_or_state)
 {
     term t = binary_or_state;
diff --git a/src/libAtomVM/term.h b/src/libAtomVM/term.h
index 9a38768bcb..9c925ea5c8 100644
--- a/src/libAtomVM/term.h
+++ b/src/libAtomVM/term.h
@@ -286,7 +286,8 @@ TermCompareResult term_compare(term t, term other, TermCompareOpts opts, GlobalC
  * @param is_const designates whether the data pointed to is "const", such as a term literal
  * @param heap the heap to allocate the binary in
  * @param glb the global context as refc binaries are global
- * @return a term (reference) pointing to the newly allocated binary in the process heap.
+ * @return a term (reference) pointing to the newly allocated binary in the process heap or
+ * `term_invalid_term()` if there isn't enough memory to allocate the refc buffer.
  */
 term term_alloc_refc_binary(size_t size, bool is_const, Heap *heap, GlobalContext *glb);
 
@@ -1262,7 +1263,8 @@ static inline const char *term_binary_data(term t)
 * @param size size of binary data buffer.
 * @param heap the heap to allocate the binary in
 * @param glb the global context as refc binaries are global
-* @return a term pointing to the boxed binary pointer.
+* @return a term pointing to the boxed binary pointer or `term_invalid_term()`
+* if there isn't enough memory to allocate the refc buffer
 */
 static inline term term_create_uninitialized_binary(size_t size, Heap *heap, GlobalContext *glb)
 {
@@ -1350,7 +1352,9 @@ static inline void term_set_refc_binary_data(term t, const void *data)
 static inline term term_from_const_binary(const void *data, size_t size, Heap *heap, GlobalContext *glb)
 {
     term binary = term_alloc_refc_binary(size, true, heap, glb);
-    term_set_refc_binary_data(binary, data);
+    if (LIKELY(!term_is_invalid_term(binary))) {
+        term_set_refc_binary_data(binary, data);
+    }
     return binary;
 }
 
@@ -1366,10 +1370,25 @@ static inline term term_from_const_binary(const void *data, size_t size, Heap *h
 static inline term term_create_empty_binary(size_t size, Heap *heap, GlobalContext *glb)
 {
     term t = term_create_uninitialized_binary(size, heap, glb);
-    memset((char *) term_binary_data(t), 0x00, size);
+    if (LIKELY(!term_is_invalid_term(t))) {
+        memset((char *) term_binary_data(t), 0x00, size);
+    }
     return t;
 }
 
+/**
+* @brief Reuse a binary.  If the binary is a refc binary with a ref count of
+* 1, try to reuse it. Otherwise, create a new binary and copy the data.
+*
+* @details Try to reuse a binary and return a term pointing to it.
+* @param src binary to reuse.
+* @param size size of binary data buffer.
+* @param heap the heap to allocate memory in
+* @param glb the global context as refc binaries are global
+* @return a term pointing to the boxed binary pointer.
+*/
+term term_reuse_binary(term src, size_t size, Heap *heap, GlobalContext *glb);
+
 static inline bool term_normalize_binary_pos_len(term binary, avm_int_t pos, avm_int_t len, BinaryPosLen *pos_len)
 {
     avm_int_t size = (avm_int_t) term_binary_size(binary);
diff --git a/src/platforms/emscripten/src/main.c b/src/platforms/emscripten/src/main.c
index 27e02c3a67..c8c00dc07a 100644
--- a/src/platforms/emscripten/src/main.c
+++ b/src/platforms/emscripten/src/main.c
@@ -59,7 +59,7 @@ static int load_module(const char *path)
             const void *startup_beam = NULL;
             uint32_t startup_beam_size;
             const char *startup_module_name;
-            avmpack_find_section_by_flag(avmpack_data->data, 1, &startup_beam, &startup_beam_size, &startup_module_name);
+            avmpack_find_section_by_flag(avmpack_data->data, BEAM_START_FLAG, BEAM_START_FLAG, &startup_beam, &startup_beam_size, &startup_module_name);
             if (startup_beam) {
                 avmpack_data->in_use = true;
                 main_module = module_new_from_iff_binary(global, startup_beam, startup_beam_size);
diff --git a/src/platforms/esp32/CMakeLists.txt b/src/platforms/esp32/CMakeLists.txt
index 9dec6ec5f3..4dc81e0aad 100644
--- a/src/platforms/esp32/CMakeLists.txt
+++ b/src/platforms/esp32/CMakeLists.txt
@@ -51,11 +51,27 @@ endif()
 # On Esp32, select is run in a loop in a dedicated task
 set(AVM_SELECT_IN_TASK ON)
 
-# JIT is not available yet on esp32
-set(AVM_DISABLE_JIT ON)
+# By default, JIT is disabled
+set(AVM_DISABLE_JIT OFF)
 
 project(atomvm-esp32)
 
+# JIT is only supported on RISC-V targets (ESP32-C2, ESP32-C3, ESP32-C6, ESP32-H2, ESP32-P4)
+# Configuration comes from idf.py menuconfig (KConfig), not CMake options
+if(CONFIG_JIT_ENABLED)
+    if (${IDF_TARGET} MATCHES "esp32c2|esp32c3|esp32c6|esp32h2|esp32p4")
+        set(AVM_DISABLE_JIT OFF)
+        set(AVM_JIT_TARGET_ARCH riscv32)
+        message(STATUS "JIT compilation enabled for ${IDF_TARGET} (RISC-V32)")
+    else()
+        message(WARNING "JIT is not supported on ${IDF_TARGET} (Xtensa architecture)")
+        set(AVM_DISABLE_JIT ON)
+    endif()
+else()
+    set(AVM_DISABLE_JIT ON)
+    message(STATUS "JIT compilation disabled")
+endif()
+
 # esp-idf does not use compile_feature but instead sets version in
 # c_compile_options
 # Ensure project is compiled with at least C11
diff --git a/src/platforms/esp32/components/avm_sys/CMakeLists.txt b/src/platforms/esp32/components/avm_sys/CMakeLists.txt
index ebcedd3b57..2f942073f7 100644
--- a/src/platforms/esp32/components/avm_sys/CMakeLists.txt
+++ b/src/platforms/esp32/components/avm_sys/CMakeLists.txt
@@ -25,6 +25,8 @@ set(AVM_SYS_COMPONENT_SRCS
     "sys.c"
     "platform_nifs.c"
     "platform_defaultatoms.c"
+    "jit_stream_flash_platform.c"
+    "../../../../libAtomVM/jit_stream_flash.c"
     "../../../../libAtomVM/inet.c"
     "../../../../libAtomVM/otp_crypto.c"
     "../../../../libAtomVM/otp_net.c"
diff --git a/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.c b/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.c
new file mode 100644
index 0000000000..bfaed52215
--- /dev/null
+++ b/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.c
@@ -0,0 +1,141 @@
+/*
+ * This file is part of AtomVM.
+ *
+ * Copyright 2025 by Paul Guyot <pguyot@kallisys.net>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+ */
+
+#ifndef AVM_NO_JIT
+
+#include "jit_stream_flash.h"
+
+#include <esp_partition.h>
+#include <stdio.h>
+
+#include "esp32_sys.h"
+
+#if ESP_IDF_VERSION_MAJOR >= 5
+#include <spi_flash_mmap.h>
+#endif
+
+#ifdef CONFIG_IDF_TARGET_ARCH_RISCV
+#include <soc/ext_mem_defs.h>
+#endif
+
+struct JSFlashPlatformContext
+{
+    const esp_partition_t *partition;
+};
+
+struct JSFlashPlatformContext *jit_stream_flash_platform_init(void)
+{
+    const esp_partition_t *partition = esp_partition_find_first(
+        ESP_PARTITION_TYPE_DATA, ESP_PARTITION_SUBTYPE_ANY, JIT_PARTITION_NAME);
+    if (IS_NULL_PTR(partition)) {
+        fprintf(stderr, "Failed to find partition '%s' for JIT cache\n", JIT_PARTITION_NAME);
+        return NULL;
+    }
+
+    struct JSFlashPlatformContext *pf_ctx = malloc(sizeof(struct JSFlashPlatformContext));
+    if (IS_NULL_PTR(pf_ctx)) {
+        return NULL;
+    }
+
+    pf_ctx->partition = partition;
+    return pf_ctx;
+}
+
+void jit_stream_flash_platform_destroy(struct JSFlashPlatformContext *ctx)
+{
+    free(ctx);
+}
+
+bool jit_stream_flash_platform_erase_sector(struct JSFlashPlatformContext *ctx, uintptr_t addr)
+{
+    if (UNLIKELY(!ctx || !ctx->partition)) {
+        return false;
+    }
+
+    size_t flash_offset = spi_flash_cache2phys((const void *) addr);
+    if (UNLIKELY(flash_offset == SPI_FLASH_CACHE2PHYS_FAIL)) {
+        fprintf(stderr, "Failed to convert cache address 0x%lx to physical address\n", (unsigned long) addr);
+        return false;
+    }
+
+    esp_err_t err = esp_partition_erase_range(ctx->partition,
+        flash_offset - ctx->partition->address, FLASH_SECTOR_SIZE);
+    if (UNLIKELY(err != ESP_OK)) {
+        fprintf(stderr, "Failed to erase sector at offset 0x%lx: %d\n", (unsigned long) flash_offset, err);
+        return false;
+    }
+
+    return true;
+}
+
+bool jit_stream_flash_platform_write_page(struct JSFlashPlatformContext *ctx, uintptr_t addr, const uint8_t *data)
+{
+    if (UNLIKELY(!ctx || !ctx->partition)) {
+        return false;
+    }
+
+    size_t flash_offset = spi_flash_cache2phys((const void *) addr);
+    if (UNLIKELY(flash_offset == SPI_FLASH_CACHE2PHYS_FAIL)) {
+        fprintf(stderr, "Failed to convert cache address 0x%lx to physical address\n", (unsigned long) addr);
+        return false;
+    }
+
+    esp_err_t err = esp_partition_write(ctx->partition,
+        flash_offset - ctx->partition->address, data, FLASH_PAGE_SIZE);
+    if (UNLIKELY(err != ESP_OK)) {
+        fprintf(stderr, "Failed to write page at offset 0x%lx: %d\n", (unsigned long) flash_offset, err);
+        return false;
+    }
+
+    return true;
+}
+
+uintptr_t jit_stream_flash_platform_ptr_to_executable(uintptr_t addr)
+{
+    // Convert data cache address to instruction cache address for RISC-V targets
+    // On ESP32-C3/C6/H2, flash is mapped to both DBUS (0x3C...) and IBUS (0x42...)
+    // but only IBUS addresses are executable
+#ifdef CONFIG_IDF_TARGET_ARCH_RISCV
+    if ((addr & ~SOC_MMU_VADDR_MASK) == SOC_MMU_DBUS_VADDR_BASE) {
+        return (addr & SOC_MMU_VADDR_MASK) | SOC_MMU_IBUS_VADDR_BASE;
+    }
+    return addr;
+#else
+    return addr;
+#endif
+}
+
+uintptr_t jit_stream_flash_platform_executable_to_ptr(uintptr_t addr)
+{
+    // Convert instruction cache address to data cache address for RISC-V targets
+    // This is the reverse of ptr_to_executable
+#ifdef CONFIG_IDF_TARGET_ARCH_RISCV
+    if ((addr & ~SOC_MMU_VADDR_MASK) == SOC_MMU_IBUS_VADDR_BASE) {
+        return (addr & SOC_MMU_VADDR_MASK) | SOC_MMU_DBUS_VADDR_BASE;
+    }
+    return addr;
+#else
+    return addr;
+#endif
+}
+
+REGISTER_NIF_COLLECTION(jit_stream_flash, jit_stream_flash_init, NULL, jit_stream_flash_get_nif)
+
+#endif // AVM_NO_JIT
diff --git a/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.h b/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.h
new file mode 100644
index 0000000000..6f8d9bffc5
--- /dev/null
+++ b/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.h
@@ -0,0 +1,43 @@
+/*
+ * This file is part of AtomVM.
+ *
+ * Copyright 2025 by Paul Guyot <pguyot@kallisys.net>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+ */
+
+#ifndef _JIT_STREAM_FLASH_PLATFORM_H_
+#define _JIT_STREAM_FLASH_PLATFORM_H_
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// ESP32 flash constants
+#define FLASH_SECTOR_SIZE 4096
+#define FLASH_PAGE_SIZE 256
+
+// JIT code is stored in main.avm partition
+#define JIT_PARTITION_NAME "main.avm"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _JIT_STREAM_FLASH_PLATFORM_H_
diff --git a/src/platforms/esp32/components/avm_sys/sys.c b/src/platforms/esp32/components/avm_sys/sys.c
index 8318ae759a..ec229d70b2 100644
--- a/src/platforms/esp32/components/avm_sys/sys.c
+++ b/src/platforms/esp32/components/avm_sys/sys.c
@@ -807,3 +807,28 @@ void sys_mbedtls_ctr_drbg_context_unlock(GlobalContext *global)
     UNUSED(global);
 #endif
 }
+
+#ifndef AVM_NO_JIT
+#include <soc/soc.h>
+
+ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t size, size_t offset)
+{
+    UNUSED(size);
+    uintptr_t addr = (uintptr_t) (native_code + offset);
+
+#if defined(CONFIG_IDF_TARGET_ARCH_RISCV)
+    // On RISC-V ESP32 targets, native code in flash needs to be accessed
+    // through the instruction cache (IROM) not data cache (DROM)
+#if defined(CONFIG_IDF_TARGET_ESP32C3) || defined(CONFIG_IDF_TARGET_ESP32C2)
+    // ESP32-C3 and C2 have separate DROM and IROM regions
+    if (addr >= SOC_DROM_LOW && addr < SOC_DROM_HIGH) {
+        // Convert from data cache address to instruction cache address
+        addr = addr - SOC_DROM_LOW + SOC_IROM_LOW;
+    }
+#endif
+    // ESP32-C6, H2, and P4 have unified DROM/IROM, no conversion needed
+#endif
+
+    return (ModuleNativeEntryPoint) addr;
+}
+#endif
diff --git a/src/platforms/esp32/components/libatomvm/CMakeLists.txt b/src/platforms/esp32/components/libatomvm/CMakeLists.txt
index 97580dbfea..00595afeef 100644
--- a/src/platforms/esp32/components/libatomvm/CMakeLists.txt
+++ b/src/platforms/esp32/components/libatomvm/CMakeLists.txt
@@ -31,7 +31,12 @@ if (HAVE_PLATFORM_ATOMIC_H)
     target_include_directories(libAtomVM PUBLIC ../avm_sys/)
 endif()
 
-target_link_libraries(${COMPONENT_LIB}
-    INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init")
+if (AVM_DISABLE_JIT)
+    target_link_libraries(${COMPONENT_LIB}
+        INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init")
+else()
+    target_link_libraries(${COMPONENT_LIB}
+        INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init" "-u jit_stream_entry_point" "-u sys_map_native_code")
+endif()
 
 target_compile_features(${COMPONENT_LIB} INTERFACE c_std_11)
diff --git a/src/platforms/esp32/main/Kconfig.projbuild b/src/platforms/esp32/main/Kconfig.projbuild
index 88bf92aa1a..1eba944ed7 100755
--- a/src/platforms/esp32/main/Kconfig.projbuild
+++ b/src/platforms/esp32/main/Kconfig.projbuild
@@ -39,5 +39,11 @@ menu "AtomVM configuration"
          depends on USE_USB_SERIAL
          help
              Enable TinyUSB CDC functionality if USE_USB_SERIAL is enabled.
+
+     config JIT_ENABLED
+         bool "Enable just in time compilation"
+         default n
+         help
+             Enable Just in time compilation, or just execution of precompiled native code
              
 endmenu
diff --git a/src/platforms/esp32/main/main.c b/src/platforms/esp32/main/main.c
index bc25c82c64..ca6d02e2f3 100644
--- a/src/platforms/esp32/main/main.c
+++ b/src/platforms/esp32/main/main.c
@@ -98,7 +98,7 @@ void app_main()
         ESP_LOGE(TAG, "Invalid startup avmpack. size=%u", size);
         AVM_ABORT();
     }
-    if (!avmpack_find_section_by_flag(startup_avm, BEAM_START_FLAG, &startup_beam, &startup_beam_size, &startup_module_name)) {
+    if (!avmpack_find_section_by_flag(startup_avm, BEAM_START_FLAG, BEAM_START_FLAG, &startup_beam, &startup_beam_size, &startup_module_name)) {
         ESP_LOGE(TAG, "Error: Failed to locate start module in startup partition. (Did you flash a library by mistake?)");
         AVM_ABORT();
     }
diff --git a/src/platforms/esp32/partitions.csv b/src/platforms/esp32/partitions.csv
index 95c1cf74bc..d313cbdc81 100644
--- a/src/platforms/esp32/partitions.csv
+++ b/src/platforms/esp32/partitions.csv
@@ -7,6 +7,5 @@
 # Note: if you change the phy_init or app partition offset, make sure to change the offset in Kconfig.projbuild
 nvs,      data, nvs,       0x9000,     0x6000,
 phy_init, data, phy,       0xf000,     0x1000,
-factory,  app,  factory,  0x10000,   0x1C0000,
-boot.avm,  data, phy,     0x1D0000,    0x40000,
-main.avm, data, phy,     0x210000,   0x100000
+factory,  app,  factory,  0x10000,   0x160000,
+main.avm,  data, phy,     0x170000,    0x290000,
diff --git a/src/platforms/esp32/test/CMakeLists.txt b/src/platforms/esp32/test/CMakeLists.txt
index 2d97d91345..cee138d34c 100644
--- a/src/platforms/esp32/test/CMakeLists.txt
+++ b/src/platforms/esp32/test/CMakeLists.txt
@@ -57,8 +57,16 @@ endif()
 # On Esp32, select is run in a loop in a dedicated task
 set(AVM_SELECT_IN_TASK ON)
 
-# JIT is not available yet on esp32
-set(AVM_DISABLE_JIT ON)
+# JIT is only supported on RISC-V targets (ESP32-C2, ESP32-C3, ESP32-C6, ESP32-H2, ESP32-P4)
+# This must be set before project() so libAtomVM is configured correctly
+if (${IDF_TARGET} MATCHES "esp32c2|esp32c3|esp32c6|esp32h2|esp32p4")
+    set(AVM_DISABLE_JIT OFF)
+    set(AVM_JIT_TARGET_ARCH riscv32)
+    message(STATUS "JIT compilation enabled for ${IDF_TARGET} (RISC-V32)")
+else()
+    message(STATUS "JIT is not supported on ${IDF_TARGET} (Xtensa architecture) - using interpreter")
+    set(AVM_DISABLE_JIT ON)
+endif()
 
 project(atomvm-esp32-test)
 
diff --git a/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt b/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt
index e2d67269e8..dc4789f374 100644
--- a/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt
+++ b/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt
@@ -20,11 +20,31 @@
 
 add_library(esp32_test_modules)
 
+include(ExternalProject)
+if(NOT AVM_DISABLE_JIT)
+set(host_atomvm_jit_target "--target=jit")
+else()
+set(host_atomvm_jit_target "")
+endif()
 ExternalProject_Add(HostAtomVM
     SOURCE_DIR ../../../../../../../../
     INSTALL_COMMAND cmake -E echo "Skipping install step."
+    BUILD_COMMAND cmake --build . --target=atomvmlib ${host_atomvm_jit_target} --target=PackBEAM
 )
 
+macro(jit_precompile module_name)
+    if(NOT AVM_DISABLE_JIT)
+        add_custom_command(
+            OUTPUT ${AVM_JIT_TARGET_ARCH}/${module_name}.beam
+            COMMAND mkdir -p ${AVM_JIT_TARGET_ARCH}
+                && erl -pa HostAtomVM-prefix/src/HostAtomVM-build/libs/jit/src/beams/ -noshell -s jit_precompile -s init stop -- ${AVM_JIT_TARGET_ARCH} ${AVM_JIT_TARGET_ARCH}/ ${module_name}.beam
+            DEPENDS ${module_name}.beam HostAtomVM
+            COMMENT "Compiling ${module_name}.beam to ${AVM_JIT_TARGET_ARCH}"
+            VERBATIM
+        )
+    endif()
+endmacro()
+
 function(compile_erlang module_name)
     add_custom_command(
         OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${module_name}.beam"
@@ -33,6 +53,7 @@ function(compile_erlang module_name)
         COMMENT "Compiling ${module_name}.erl"
         WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
     )
+    jit_precompile(${module_name})
 
     set_property(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${CMAKE_CURRENT_BINARY_DIR}/${module_name}.beam")
 endfunction()
@@ -55,46 +76,44 @@ compile_erlang(test_time_and_processes)
 compile_erlang(test_twdt)
 compile_erlang(test_tz)
 
+set(erlang_test_beams
+    test_esp_partition.beam
+    test_file.beam
+    test_wifi_example.beam
+    test_list_to_atom.beam
+    test_list_to_binary.beam
+    test_md5.beam
+    test_crypto.beam
+    test_monotonic_time.beam
+    test_mount.beam
+    test_net.beam
+    test_rtc_slow.beam
+    test_select.beam
+    test_socket.beam
+    test_ssl.beam
+    test_time_and_processes.beam
+    test_twdt.beam
+    test_tz.beam
+)
+
+if(NOT AVM_DISABLE_JIT)
+    set(erlang_test_beams_${AVM_JIT_TARGET_ARCH} ${erlang_test_beams})
+    list(TRANSFORM erlang_test_beams_${AVM_JIT_TARGET_ARCH} PREPEND ${AVM_JIT_TARGET_ARCH}/)
+    set(erlang_test_beams_to_package ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}})
+    set(erlang_test_beams_depends ${erlang_test_beams} ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}})
+else()
+    set(erlang_test_beams_to_package ${erlang_test_beams})
+    set(erlang_test_beams_depends ${erlang_test_beams})
+endif()
+
 add_custom_command(
     OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/esp32_test_modules.avm"
     COMMAND HostAtomVM-prefix/src/HostAtomVM-build/tools/packbeam/PackBEAM -i esp32_test_modules.avm
         HostAtomVM-prefix/src/HostAtomVM-build/libs/atomvmlib.avm
-        test_esp_partition.beam
-        test_file.beam
-        test_wifi_example.beam
-        test_list_to_atom.beam
-        test_list_to_binary.beam
-        test_md5.beam
-        test_crypto.beam
-        test_monotonic_time.beam
-        test_mount.beam
-        test_net.beam
-        test_rtc_slow.beam
-        test_select.beam
-        test_socket.beam
-        test_ssl.beam
-        test_time_and_processes.beam
-        test_twdt.beam
-        test_tz.beam
+        ${erlang_test_beams_to_package}
     DEPENDS
         HostAtomVM
-        "${CMAKE_CURRENT_BINARY_DIR}/test_esp_partition.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_wifi_example.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_file.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_list_to_atom.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_list_to_binary.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_md5.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_crypto.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_monotonic_time.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_mount.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_net.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_rtc_slow.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_select.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_socket.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_ssl.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_time_and_processes.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_twdt.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_tz.beam"
+        ${erlang_test_beams_depends}
     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
     VERBATIM
 )
diff --git a/src/platforms/generic_unix/lib/jit_stream_mmap.c b/src/platforms/generic_unix/lib/jit_stream_mmap.c
index f246a9791d..096cfe7faf 100644
--- a/src/platforms/generic_unix/lib/jit_stream_mmap.c
+++ b/src/platforms/generic_unix/lib/jit_stream_mmap.c
@@ -193,6 +193,28 @@ static term nif_jit_stream_mmap_read(Context *ctx, int argc, term argv[])
     return term_from_literal_binary(js_obj->stream_base + offset, len, &ctx->heap, ctx->global);
 }
 
+static term nif_jit_stream_mmap_flush(Context *ctx, int argc, term argv[])
+{
+    UNUSED(argc);
+
+    void *js_obj_ptr;
+    if (UNLIKELY(!enif_get_resource(erl_nif_env_from_context(ctx), argv[0], jit_stream_mmap_resource_type, &js_obj_ptr))) {
+        RAISE_ERROR(BADARG_ATOM);
+    }
+    struct JITStreamMMap *js_obj = (struct JITStreamMMap *) js_obj_ptr;
+    if (IS_NULL_PTR(js_obj->stream_base)) {
+        RAISE_ERROR(BADARG_ATOM);
+    }
+
+#if defined(__APPLE__)
+    sys_icache_invalidate(js_obj->stream_base, js_obj->stream_size);
+#elif defined(__GNUC__)
+    __builtin___clear_cache(js_obj->stream_base, js_obj->stream_base + js_obj->stream_size);
+#endif
+
+    return argv[0];
+}
+
 static term nif_jit_stream_module(Context *ctx, int argc, term argv[])
 {
     UNUSED(argc);
@@ -226,6 +248,10 @@ static const struct Nif jit_stream_mmap_read_nif = {
     .base.type = NIFFunctionType,
     .nif_ptr = nif_jit_stream_mmap_read
 };
+static const struct Nif jit_stream_mmap_flush_nif = {
+    .base.type = NIFFunctionType,
+    .nif_ptr = nif_jit_stream_mmap_flush
+};
 
 ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream)
 {
@@ -239,11 +265,6 @@ ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream)
         return NULL;
     }
 
-#if defined(__APPLE__)
-    sys_icache_invalidate(js_obj->stream_base, js_obj->stream_size);
-#elif defined(__GNUC__)
-    __builtin___clear_cache(js_obj->stream_base, js_obj->stream_base + js_obj->stream_size);
-#endif
 #if JIT_ARCH_TARGET == JIT_ARCH_ARMV6M
     // Set thumb bit for armv6m
     ModuleNativeEntryPoint result = (ModuleNativeEntryPoint) js_obj->stream_base + 1;
@@ -251,6 +272,7 @@ ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream)
     ModuleNativeEntryPoint result = (ModuleNativeEntryPoint) js_obj->stream_base;
 #endif
 
+    // Prevent module from being unmapped by dtor
     js_obj->stream_base = NULL;
     return result;
 }
@@ -291,6 +313,9 @@ const struct Nif *jit_stream_mmap_get_nif(const char *nifname)
         if (strcmp("read/3", rest) == 0) {
             return &jit_stream_mmap_read_nif;
         }
+        if (strcmp("flush/1", rest) == 0) {
+            return &jit_stream_mmap_flush_nif;
+        }
     }
     return NULL;
 }
diff --git a/src/platforms/generic_unix/lib/sys.c b/src/platforms/generic_unix/lib/sys.c
index 099164dd89..eedbe060b4 100644
--- a/src/platforms/generic_unix/lib/sys.c
+++ b/src/platforms/generic_unix/lib/sys.c
@@ -853,4 +853,24 @@ ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t si
     return (ModuleNativeEntryPoint) (native_code + offset);
 #endif
 }
+
+bool sys_get_cache_native_code(GlobalContext *global, Module *mod, uint16_t *version, ModuleNativeEntryPoint *entry_point, uint32_t *labels)
+{
+    UNUSED(global);
+    UNUSED(mod);
+    UNUSED(version);
+    UNUSED(entry_point);
+    UNUSED(labels);
+    return false;
+}
+
+void sys_set_cache_native_code(GlobalContext *global, Module *mod, uint16_t version, ModuleNativeEntryPoint entry_point, uint32_t labels)
+{
+    UNUSED(global);
+    UNUSED(mod);
+    UNUSED(version);
+    UNUSED(entry_point);
+    UNUSED(labels);
+}
+
 #endif
diff --git a/src/platforms/generic_unix/main.c b/src/platforms/generic_unix/main.c
index f45fd7f14f..6908aa50f8 100644
--- a/src/platforms/generic_unix/main.c
+++ b/src/platforms/generic_unix/main.c
@@ -104,7 +104,7 @@ int main(int argc, char **argv)
                 const void *startup_beam = NULL;
                 const char *startup_module_name;
                 uint32_t startup_beam_size;
-                avmpack_find_section_by_flag(avmpack_data->data, 1, &startup_beam, &startup_beam_size, &startup_module_name);
+                avmpack_find_section_by_flag(avmpack_data->data, BEAM_START_FLAG, BEAM_START_FLAG, &startup_beam, &startup_beam_size, &startup_module_name);
 
                 if (startup_beam) {
                     avmpack_data->in_use = true;
diff --git a/src/platforms/rp2/CMakeLists.txt b/src/platforms/rp2/CMakeLists.txt
index 6dbcdf7bb8..86e5e6683a 100644
--- a/src/platforms/rp2/CMakeLists.txt
+++ b/src/platforms/rp2/CMakeLists.txt
@@ -54,6 +54,8 @@ set(HAVE_MKFIFO "" CACHE INTERNAL "Have symbol mkfifo" FORCE)
 set(HAVE_UNLINK "" CACHE INTERNAL "Have symbol unlink" FORCE)
 # Likewise with EXECVE
 set(HAVE_EXECVE "" CACHE INTERNAL "Have symbol execve" FORCE)
+# getcwd is defined in newlib header but not implemented
+set(HAVE_GETCWD "" CACHE INTERNAL "Have symbol getcwd" FORCE)
 
 # Options that make sense for this platform
 option(AVM_DISABLE_SMP "Disable SMP support." OFF)
@@ -69,8 +71,13 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "^cortex-m.+$")
     if (NOT AVM_DISABLE_JIT)
         set(AVM_JIT_TARGET_ARCH "armv6m")
     endif()
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^hazard3$")
+    # Pico2 RISC-V processor (Hazard3)
+    if (NOT AVM_DISABLE_JIT)
+        set(AVM_JIT_TARGET_ARCH "riscv32")
+    endif()
 else()
-    # Typically riscv is not supported yet
+    # Other processors not supported yet
     if (NOT AVM_DISABLE_JIT)
         message("JIT is not supported on ${CMAKE_SYSTEM_PROCESSOR}")
         set(AVM_DISABLE_JIT ON CACHE BOOL "Disable just in time compilation." FORCE)
diff --git a/src/platforms/rp2/src/CMakeLists.txt b/src/platforms/rp2/src/CMakeLists.txt
index c79433551e..f4ef67cd8f 100644
--- a/src/platforms/rp2/src/CMakeLists.txt
+++ b/src/platforms/rp2/src/CMakeLists.txt
@@ -55,6 +55,10 @@ else()
     target_compile_definitions(AtomVM PRIVATE PICO_STDIO_USB_CONNECT_WAIT_TIMEOUT_MS=20000)
 endif()
 
+if (AVM_DISABLE_SMP)
+    target_compile_definitions(AtomVM PRIVATE PICO_FLASH_ASSUME_CORE1_SAFE)
+endif()
+
 if (AVM_WAIT_BOOTSEL_ON_EXIT)
     target_compile_definitions(AtomVM PRIVATE WAIT_BOOTSEL_ON_EXIT)
 endif()
diff --git a/src/platforms/rp2/src/lib/CMakeLists.txt b/src/platforms/rp2/src/lib/CMakeLists.txt
index 3cc69b56a3..b9e594c9df 100644
--- a/src/platforms/rp2/src/lib/CMakeLists.txt
+++ b/src/platforms/rp2/src/lib/CMakeLists.txt
@@ -31,7 +31,6 @@ set(HEADER_FILES
 
 set(SOURCE_FILES
     gpiodriver.c
-    jit_stream_flash.c
     networkdriver.c
     otp_crypto_platform.c
     platform_defaultatoms.c
@@ -110,4 +109,16 @@ if (PICO_CYW43_SUPPORTED)
     define_if_function_exists(libAtomVM${PLATFORM_LIB_SUFFIX} gethostname "unistd.h" PRIVATE HAVE_GETHOSTNAME)
 endif()
 
+if (NOT AVM_DISABLE_JIT)
+    target_sources(
+        libAtomVM${PLATFORM_LIB_SUFFIX}
+        PRIVATE
+            jit_stream_flash_platform.c
+            ../../../../libAtomVM/jit_stream_flash.c
+            jit_stream_flash_platform.h
+            ../../../../libAtomVM/jit_stream_flash.h
+    )
+    target_link_options(libAtomVM${PLATFORM_LIB_SUFFIX} PUBLIC "SHELL:-Wl,-u -Wl,jit_stream_flash_get_nif")
+endif()
+
 target_link_options(libAtomVM${PLATFORM_LIB_SUFFIX} PUBLIC "SHELL:-Wl,-u -Wl,gpio_nif -Wl,-u -Wl,otp_crypto_nif")
diff --git a/src/platforms/rp2/src/lib/jit_stream_flash_platform.c b/src/platforms/rp2/src/lib/jit_stream_flash_platform.c
new file mode 100644
index 0000000000..e8a17c3537
--- /dev/null
+++ b/src/platforms/rp2/src/lib/jit_stream_flash_platform.c
@@ -0,0 +1,117 @@
+/*
+ * This file is part of AtomVM.
+ *
+ * Copyright 2025 by Paul Guyot <pguyot@kallisys.net>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+ */
+
+#ifndef AVM_NO_JIT
+
+#include "jit_stream_flash.h"
+
+#include <hardware/flash.h>
+#include <pico/flash.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "rp2_sys.h"
+
+// Helper structures for flash_safe_execute
+struct EraseParams
+{
+    uintptr_t addr;
+};
+
+struct WriteParams
+{
+    uintptr_t addr;
+    const uint8_t *data;
+    size_t len;
+};
+
+static void __not_in_flash_func(do_erase_sector)(void *params_ptr)
+{
+    struct EraseParams *params = (struct EraseParams *) params_ptr;
+    flash_range_erase(params->addr - XIP_BASE, FLASH_SECTOR_SIZE);
+}
+
+static void __not_in_flash_func(do_write_page)(void *params_ptr)
+{
+    struct WriteParams *params = (struct WriteParams *) params_ptr;
+    flash_range_program(params->addr - XIP_BASE, params->data, params->len);
+}
+
+struct JSFlashPlatformContext *jit_stream_flash_platform_init(void)
+{
+    return (struct JSFlashPlatformContext *) 1;
+}
+
+void jit_stream_flash_platform_destroy(struct JSFlashPlatformContext *pf_ctx)
+{
+    UNUSED(pf_ctx);
+}
+
+bool jit_stream_flash_platform_erase_sector(struct JSFlashPlatformContext *pf_ctx, uintptr_t addr)
+{
+    UNUSED(pf_ctx);
+
+    struct EraseParams params = {
+        .addr = addr
+    };
+
+    int r = flash_safe_execute(do_erase_sector, &params, UINT32_MAX);
+    if (UNLIKELY(r != PICO_OK)) {
+        fprintf(stderr, "flash_safe_execute (erase) failed with error %d\n", r);
+        return false;
+    }
+
+    return true;
+}
+
+bool jit_stream_flash_platform_write_page(struct JSFlashPlatformContext *pf_ctx, uintptr_t addr, const uint8_t *data)
+{
+    UNUSED(pf_ctx);
+
+    struct WriteParams params = {
+        .addr = addr,
+        .data = data,
+        .len = FLASH_PAGE_SIZE
+    };
+
+    int r = flash_safe_execute(do_write_page, &params, UINT32_MAX);
+    if (UNLIKELY(r != PICO_OK)) {
+        fprintf(stderr, "flash_safe_execute (write) failed with error %d\n", r);
+        return false;
+    }
+
+    return true;
+}
+
+uintptr_t jit_stream_flash_platform_ptr_to_executable(uintptr_t addr)
+{
+    // Set Thumb bit
+    return addr | 0x1;
+}
+
+uintptr_t jit_stream_flash_platform_executable_to_ptr(uintptr_t addr)
+{
+    // Clear Thumb bit
+    return addr & ~0x1UL;
+}
+
+REGISTER_NIF_COLLECTION(jit_stream_flash, jit_stream_flash_init, NULL, jit_stream_flash_get_nif)
+
+#endif // AVM_NO_JIT
diff --git a/src/platforms/rp2/src/lib/jit_stream_flash_platform.h b/src/platforms/rp2/src/lib/jit_stream_flash_platform.h
new file mode 100644
index 0000000000..3ee8c660bd
--- /dev/null
+++ b/src/platforms/rp2/src/lib/jit_stream_flash_platform.h
@@ -0,0 +1,40 @@
+/*
+ * This file is part of AtomVM.
+ *
+ * Copyright 2025 by Paul Guyot <pguyot@kallisys.net>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+ */
+
+#ifndef _JIT_STREAM_FLASH_PLATFORM_H_
+#define _JIT_STREAM_FLASH_PLATFORM_H_
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// RP2040 flash constants (W25Q16JV chip)
+#define FLASH_SECTOR_SIZE 4096
+#define FLASH_PAGE_SIZE 256
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _JIT_STREAM_FLASH_PLATFORM_H_
diff --git a/src/platforms/rp2/src/lib/smp.c b/src/platforms/rp2/src/lib/smp.c
index 946b066305..44251e8913 100644
--- a/src/platforms/rp2/src/lib/smp.c
+++ b/src/platforms/rp2/src/lib/smp.c
@@ -57,14 +57,17 @@ static void scheduler_core1_entry_point(void)
 {
     _Static_assert(sizeof(uintptr_t) == sizeof(uint32_t), "Expected pointers to be 32 bits");
     uint32_t ctx_int = multicore_fifo_pop_blocking();
+    multicore_lockout_victim_init();
     int result = scheduler_entry_point((GlobalContext *) ctx_int);
     UNUSED(result);
+    multicore_lockout_victim_deinit();
 }
 
 void smp_scheduler_start(GlobalContext *ctx)
 {
     multicore_launch_core1(scheduler_core1_entry_point);
     multicore_fifo_push_blocking((uint32_t) ctx);
+    multicore_lockout_victim_init();
 }
 
 bool smp_is_main_thread(GlobalContext *glb)
diff --git a/src/platforms/rp2/src/main.c b/src/platforms/rp2/src/main.c
index e25e1398d0..0733c85e63 100644
--- a/src/platforms/rp2/src/main.c
+++ b/src/platforms/rp2/src/main.c
@@ -87,9 +87,12 @@ static int app_main()
     if (!avmpack_is_valid(MAIN_AVM, XIP_SRAM_BASE - (uintptr_t) MAIN_AVM)) {
         sleep_ms(5000);
         fprintf(stderr, "Fatal error: invalid main.avm packbeam\n");
+        if (avmpack_is_valid(LIB_AVM, (uintptr_t) MAIN_AVM - (uintptr_t) LIB_AVM)) {
+            fprintf(stderr, "Lib avm packbeam is valid, though\n");
+        }
         AVM_ABORT();
     }
-    if (!avmpack_find_section_by_flag(MAIN_AVM, BEAM_START_FLAG, &startup_beam, &startup_beam_size, &startup_module_name)) {
+    if (!avmpack_find_section_by_flag(MAIN_AVM, BEAM_START_FLAG, BEAM_START_FLAG, &startup_beam, &startup_beam_size, &startup_module_name)) {
         sleep_ms(5000);
         fprintf(stderr, "Fatal error: Failed to locate start module in main.avm packbeam.  (Did you flash a library by mistake?)");
         AVM_ABORT();
diff --git a/src/platforms/stm32/src/main.c b/src/platforms/stm32/src/main.c
index 7febe37717..689aa925f1 100644
--- a/src/platforms/stm32/src/main.c
+++ b/src/platforms/stm32/src/main.c
@@ -248,7 +248,7 @@ int main()
     port_driver_init_all(glb);
     nif_collection_init_all(glb);
 
-    if (!avmpack_is_valid(flashed_avm, size) || !avmpack_find_section_by_flag(flashed_avm, BEAM_START_FLAG, &startup_beam, &startup_beam_size, &startup_module_name)) {
+    if (!avmpack_is_valid(flashed_avm, size) || !avmpack_find_section_by_flag(flashed_avm, BEAM_START_FLAG, BEAM_START_FLAG, &startup_beam, &startup_beam_size, &startup_module_name)) {
         AVM_LOGE(TAG, "Invalid AVM Pack");
         AVM_ABORT();
     }
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 42ef857dda..c7652f4b65 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -24,12 +24,14 @@ project (tests)
 add_executable(test-erlang test.c)
 add_executable(test-enif test-enif.c)
 add_executable(test-heap test-heap.c)
+add_executable(test-jit_stream_flash test-jit_stream_flash.c ../src/libAtomVM/jit_stream_flash.c)
 add_executable(test-mailbox test-mailbox.c)
 add_executable(test-structs test-structs.c)
 
 target_compile_features(test-erlang PUBLIC c_std_11)
 target_compile_features(test-enif PUBLIC c_std_11)
 target_compile_features(test-heap PUBLIC c_std_11)
+target_compile_features(test-jit_stream_flash PUBLIC c_std_11)
 target_compile_features(test-mailbox PUBLIC c_std_11)
 target_compile_features(test-structs PUBLIC c_std_11)
 
@@ -37,6 +39,7 @@ if(CMAKE_COMPILER_IS_GNUCC)
     target_compile_options(test-erlang PUBLIC -Wall -pedantic -Wextra -ggdb)
     target_compile_options(test-enif PUBLIC -Wall -pedantic -Wextra -ggdb)
     target_compile_options(test-heap PUBLIC -Wall -pedantic -Wextra -ggdb)
+    target_compile_options(test-jit_stream_flash PUBLIC -Wall -pedantic -Wextra -ggdb)
     target_compile_options(test-mailbox PUBLIC -Wall -pedantic -Wextra -ggdb)
     target_compile_options(test-structs PUBLIC -Wall -pedantic -Wextra -ggdb)
 endif()
@@ -50,6 +53,7 @@ if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
         target_link_libraries(test-erlang PRIVATE ${LIBRT})
         target_link_libraries(test-enif PRIVATE ${LIBRT})
         target_link_libraries(test-heap PRIVATE ${LIBRT})
+        target_link_libraries(test-jit_stream_flash PRIVATE ${LIBRT})
         target_link_libraries(test-mailbox PRIVATE ${LIBRT})
         target_link_libraries(test-structs PRIVATE ${LIBRT})
     else()
@@ -63,6 +67,7 @@ if (MbedTLS_FOUND)
     target_link_libraries(test-erlang PRIVATE MbedTLS::mbedtls)
     target_link_libraries(test-enif PRIVATE MbedTLS::mbedtls)
     target_link_libraries(test-heap PRIVATE MbedTLS::mbedtls)
+    target_link_libraries(test-jit_stream_flash PRIVATE MbedTLS::mbedtls)
     target_link_libraries(test-mailbox PRIVATE MbedTLS::mbedtls)
     target_link_libraries(test-structs PRIVATE MbedTLS::mbedtls)
 endif()
@@ -79,6 +84,7 @@ if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") OR
     target_include_directories(test-erlang PRIVATE ../src/platforms/generic_unix/lib)
     target_include_directories(test-enif PRIVATE ../src/platforms/generic_unix/lib)
     target_include_directories(test-heap PRIVATE ../src/platforms/generic_unix/lib)
+    target_include_directories(test-jit_stream_flash PRIVATE ../src/platforms/generic_unix/lib)
     target_include_directories(test-mailbox PRIVATE ../src/platforms/generic_unix/lib)
     target_include_directories(test-structs PRIVATE ../src/platforms/generic_unix/lib)
 else()
@@ -88,11 +94,15 @@ endif()
 target_include_directories(test-erlang PRIVATE ../src/libAtomVM)
 target_include_directories(test-enif PRIVATE ../src/libAtomVM)
 target_include_directories(test-heap PRIVATE ../src/libAtomVM)
+target_include_directories(test-jit_stream_flash PRIVATE ../src/libAtomVM ${CMAKE_CURRENT_SOURCE_DIR})
 target_include_directories(test-mailbox PRIVATE ../src/libAtomVM)
 target_include_directories(test-structs PRIVATE ../src/libAtomVM)
 target_link_libraries(test-erlang PRIVATE libAtomVM libAtomVM${PLATFORM_LIB_SUFFIX})
 target_link_libraries(test-enif PRIVATE libAtomVM libAtomVM${PLATFORM_LIB_SUFFIX})
 target_link_libraries(test-heap PRIVATE libAtomVM libAtomVM${PLATFORM_LIB_SUFFIX})
+# test-jit_stream_flash includes jit_stream_flash.c and provides its own mock platform implementation
+target_compile_definitions(test-jit_stream_flash PRIVATE TEST_JIT_STREAM_FLASH)
+target_link_libraries(test-jit_stream_flash PRIVATE libAtomVM libAtomVM${PLATFORM_LIB_SUFFIX})
 target_link_libraries(test-mailbox PRIVATE libAtomVM libAtomVM${PLATFORM_LIB_SUFFIX})
 target_link_libraries(test-structs PRIVATE libAtomVM libAtomVM${PLATFORM_LIB_SUFFIX})
 
@@ -120,11 +130,13 @@ if (COVERAGE)
     append_coverage_compiler_flags_to_target(test-erlang)
     append_coverage_compiler_flags_to_target(test-enif)
     append_coverage_compiler_flags_to_target(test-heap)
+    append_coverage_compiler_flags_to_target(test-jit_stream_flash)
     append_coverage_compiler_flags_to_target(test-mailbox)
     append_coverage_compiler_flags_to_target(test-structs)
     append_coverage_linker_flags_to_target(test-erlang)
     append_coverage_linker_flags_to_target(test-enif)
     append_coverage_linker_flags_to_target(test-heap)
+    append_coverage_linker_flags_to_target(test-jit_stream_flash)
     append_coverage_linker_flags_to_target(test-mailbox)
     append_coverage_linker_flags_to_target(test-structs)
     if (CMAKE_COMPILER_IS_GNUCC)
diff --git a/tests/erlang_tests/CMakeLists.txt b/tests/erlang_tests/CMakeLists.txt
index 267a4b3d29..ed77bc537c 100644
--- a/tests/erlang_tests/CMakeLists.txt
+++ b/tests/erlang_tests/CMakeLists.txt
@@ -613,6 +613,8 @@ compile_erlang(test_lists_member)
 compile_erlang(test_lists_keymember)
 compile_erlang(test_lists_keyfind)
 
+compile_erlang(test_inline_arith)
+
 if(Erlang_VERSION VERSION_GREATER_EQUAL "23")
     set(OTP23_OR_GREATER_TESTS
         test_op_bs_start_match_asm.beam
@@ -1135,6 +1137,8 @@ set(erlang_test_beams
     test_lists_keymember.beam
     test_lists_keyfind.beam
 
+    test_inline_arith.beam
+
     test_code_server_nifs.beam
 
     test_op_bs_start_match.beam
diff --git a/tests/erlang_tests/test_bs.erl b/tests/erlang_tests/test_bs.erl
index 460e8774ea..36ab0bb400 100644
--- a/tests/erlang_tests/test_bs.erl
+++ b/tests/erlang_tests/test_bs.erl
@@ -99,6 +99,8 @@ start() ->
 
     ok = test_bs_skip_bits2_little(),
 
+    ok = test_float(),
+
     0.
 
 test_pack_small_ints({A, B, C}, Expect) ->
@@ -532,6 +534,72 @@ test_bs_match_string_select() ->
 test_bs_skip_bits2_little() ->
     ok = check_x86_64_jt(id(<<16#e9, 0:32>>)).
 
+test_float() ->
+    Pi = id(3.14),
+    <<64,9,30,184,81,235,133,31,3,14>> = <<Pi/float, 3, 14>>,
+    <<64,9,30,184,81,235,133,31,3,14>> = <<Pi/float-big, 3, 14>>,
+    <<31,133,235,81,184,30,9,64,3,14>> = <<Pi/float-little, 3, 14>>,
+    <<_:64,3,14>> = <<Pi/float-native, 3, 14>>,
+    <<64,72,245,195,3,14>> = <<Pi:32/float, 3, 14>>,
+    <<195,245,72,64,3,14>> = <<Pi:32/float-little, 3, 14>>,
+
+    <<Pi/float, 3, 14>> = id(<<64,9,30,184,81,235,133,31,3,14>>),
+    <<Pi/float-little, 3, 14>> = id(<<31,133,235,81,184,30,9,64,3,14>>),
+    <<PiS:32/float, 3, 14>> = id(<<64,72,245,195,3,14>>),
+    <<PiS:32/float-little, 3, 14>> = id(<<195,245,72,64,3,14>>),
+    true = abs(PiS - Pi) < 0.0001,
+
+    % Test integer to float conversion
+    Int2 = id(2),
+    IntNeg2 = id(-2),
+    Int32 = id(32),
+    <<64,0,0,0,0,0,0,0>> = <<Int2/float>>,
+    <<192,0,0,0,0,0,0,0>> = <<IntNeg2/float>>,
+    <<66,0,0,0>> = <<Int32:32/float>>,
+
+    % 16-bit floats are supported in OTP 24+ and AtomVM
+    Has16BitFloats =
+        case erlang:system_info(machine) of
+            "BEAM" ->
+                erlang:system_info(otp_release) >= "24";
+            "ATOM" ->
+                true
+        end,
+    if
+        Has16BitFloats ->
+            % Test that 16-bit floats work
+            Pi16 = id(3.14),
+            <<66,72>> = <<Pi16:16/float>>,
+            <<66,72>> = <<Pi16:16/float-big>>,
+            <<72,66>> = <<Pi16:16/float-little>>,
+            <<Pi16B:16/float, 3, 14>> = <<66, 72, 3, 14>>,
+            <<Pi16B:16/float-little, 3, 14>> = <<72, 66, 3, 14>>,
+            true = abs(Pi16B - Pi16) < 0.001,
+            ok;
+        true ->
+            ok
+    end,
+
+    ok = test_create_with_invalid_float_value(),
+    ok = test_create_with_invalid_float_size(),
+    ok.
+
+test_create_with_invalid_float_value() ->
+    ok = expect_error(fun() -> create_float_binary(foo, id(64)) end, badarg),
+    ok = expect_error(fun() -> create_float_binary([1, 2, 3], id(32)) end, badarg),
+    ok = expect_error(fun() -> create_float_binary(<<"binary">>, id(64)) end, badarg),
+    ok.
+
+test_create_with_invalid_float_size() ->
+    % These sizes are invalid in both BEAM and AtomVM
+    ok = expect_error(fun() -> create_float_binary(3.14, id(8)) end, badarg),
+    ok = expect_error(fun() -> create_float_binary(3.14, id(128)) end, badarg),
+    ok = expect_error(fun() -> create_float_binary(3.14, id(foo)) end, badarg),
+    ok.
+
+create_float_binary(Value, Size) ->
+    <<Value:Size/float>>.
+
 check_x86_64_jt(<<>>) -> ok;
 check_x86_64_jt(<<16#e9, _Offset:32/little, Tail/binary>>) -> check_x86_64_jt(Tail);
 check_x86_64_jt(Bin) -> {unexpected, Bin}.
diff --git a/src/platforms/rp2/src/lib/jit_stream_flash.c b/tests/jit_stream_flash_platform.h
similarity index 69%
rename from src/platforms/rp2/src/lib/jit_stream_flash.c
rename to tests/jit_stream_flash_platform.h
index 77dfcca908..b9e39dd36b 100644
--- a/src/platforms/rp2/src/lib/jit_stream_flash.c
+++ b/tests/jit_stream_flash_platform.h
@@ -18,17 +18,23 @@
  * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
  */
 
-#ifndef AVM_NO_JIT
+#ifndef _JIT_STREAM_FLASH_PLATFORM_H_
+#define _JIT_STREAM_FLASH_PLATFORM_H_
 
-#include "context.h"
-#include "jit.h"
-#include "term.h"
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
 
-ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream)
-{
-    UNUSED(ctx);
-    UNUSED(jit_stream);
-    return NULL;
-}
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Host test flash constants
+#define FLASH_SECTOR_SIZE 4096
+#define FLASH_PAGE_SIZE 256
 
+#ifdef __cplusplus
+}
 #endif
+
+#endif // _JIT_STREAM_FLASH_PLATFORM_H_
diff --git a/tests/libs/jit/CMakeLists.txt b/tests/libs/jit/CMakeLists.txt
index 26ab6b4ecc..45473d9f10 100644
--- a/tests/libs/jit/CMakeLists.txt
+++ b/tests/libs/jit/CMakeLists.txt
@@ -30,6 +30,8 @@ set(ERLANG_MODULES
     jit_aarch64_asm_tests
     jit_armv6m_tests
     jit_armv6m_asm_tests
+    jit_riscv32_tests
+    jit_riscv32_asm_tests
     jit_x86_64_tests
     jit_x86_64_asm_tests
 )
diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl
index 23291a400c..c696f93d0b 100644
--- a/tests/libs/jit/jit_aarch64_tests.erl
+++ b/tests/libs/jit/jit_aarch64_tests.erl
@@ -88,6 +88,51 @@ call_primitive_2_args_test() ->
         >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
+call_primitive_5_args_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:call_primitive_last(State0, ?PRIM_ALLOCATE, [ctx, jit_state, 16, 32, 2]),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:	f9401447 	ldr	x7, [x2, #40]\n"
+            "   4:	d2800202 	mov	x2, #0x10                  	// #16\n"
+            "   8:	d2800403 	mov	x3, #0x20                  	// #32\n"
+            "   c:	d2800044 	mov	x4, #0x2                   	// #2\n"
+            "  10:	d61f00e0 	br	x7"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_6_args_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    % Get bin_ptr from x_reg 0 (similar to get_list_test pattern)
+    {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    {State2, RegA} = ?BACKEND:and_(State1, {free, RegA}, ?TERM_PRIMARY_CLEAR_MASK),
+    % Get another register for the last parameter to test {free, Reg} handling
+    {State3, OtherReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 1}),
+    % Call PRIM_BITSTRING_EXTRACT_INTEGER with 6 arguments
+    {State4, _ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_BITSTRING_EXTRACT_INTEGER, [
+        ctx, jit_state, {free, RegA}, 64, 8, {free, OtherReg}
+    ]),
+    Stream = ?BACKEND:stream(State4),
+    Dump =
+        <<
+            "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+            "   4:	927ef4e7 	and	x7, x7, #0xfffffffffffffffc\n"
+            "   8:	f9401c08 	ldr	x8, [x0, #56]\n"
+            "   c:	f940b850 	ldr	x16, [x2, #368]\n"
+            "  10:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+            "  14:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+            "  18:	aa0703e2 	mov	x2, x7\n"
+            "  1c:	d2800803 	mov	x3, #0x40                  	// #64\n"
+            "  20:	d2800104 	mov	x4, #0x8                   	// #8\n"
+            "  24:	aa0803e5 	mov	x5, x8\n"
+            "  28:	d63f0200 	blr	x16\n"
+            "  2c:	aa0003e7 	mov	x7, x0\n"
+            "  30:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+            "  34:	a8c103fe 	ldp	x30, x0, [sp], #16"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
 call_primitive_extended_regs_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
     {State1, RegA} = ?BACKEND:call_primitive(State0, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]),
@@ -146,6 +191,44 @@ call_primitive_extended_regs_test() ->
         >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
+call_primitive_few_free_regs_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, Reg1} = ?BACKEND:move_to_native_register(State0, 1),
+    {State2, Reg2} = ?BACKEND:move_to_native_register(State1, 2),
+    {State3, Reg3} = ?BACKEND:move_to_native_register(State2, 3),
+    {State4, Reg4} = ?BACKEND:move_to_native_register(State3, 4),
+    {State5, Reg5} = ?BACKEND:move_to_native_register(State4, 5),
+    {State6, ResultReg} = ?BACKEND:call_primitive(State5, ?PRIM_BITSTRING_INSERT_INTEGER, [
+        Reg2, Reg1, {free, Reg4}, Reg3, {free, Reg5}
+    ]),
+    State7 = ?BACKEND:free_native_registers(State6, [ResultReg, Reg2, Reg1, Reg3]),
+    ?BACKEND:assert_all_native_free(State7),
+    Stream = ?BACKEND:stream(State7),
+    Dump = <<
+        "   0:	d2800027 	mov	x7, #0x1                   	// #1\n"
+        "   4:	d2800048 	mov	x8, #0x2                   	// #2\n"
+        "   8:	d2800069 	mov	x9, #0x3                   	// #3\n"
+        "   c:	d280008a 	mov	x10, #0x4                   	// #4\n"
+        "  10:	d28000ab 	mov	x11, #0x5                   	// #5\n"
+        "  14:	f940e450 	ldr	x16, [x2, #456]\n"
+        "  18:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+        "  1c:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+        "  20:	a9bf23e9 	stp	x9, x8, [sp, #-16]!\n"
+        "  24:	f81f0fe7 	str	x7, [sp, #-16]!\n"
+        "  28:	aa0803e0 	mov	x0, x8\n"
+        "  2c:	aa0703e1 	mov	x1, x7\n"
+        "  30:	aa0a03e2 	mov	x2, x10\n"
+        "  34:	aa0903e3 	mov	x3, x9\n"
+        "  38:	aa0b03e4 	mov	x4, x11\n"
+        "  3c:	d63f0200 	blr	x16\n"
+        "  40:	aa0003ea 	mov	x10, x0\n"
+        "  44:	f84107e7 	ldr	x7, [sp], #16\n"
+        "  48:	a8c123e9 	ldp	x9, x8, [sp], #16\n"
+        "  4c:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+        "  50:	a8c103fe 	ldp	x30, x0, [sp], #16"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
 call_ext_only_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
     State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0),
@@ -168,6 +251,23 @@ call_ext_only_test() ->
     >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
+call_primitive_last_5_args_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [
+        ctx, jit_state, offset, ?CASE_CLAUSE_ATOM, {free, RegA}
+    ]),
+    Stream = ?BACKEND:stream(State2),
+    Dump = <<
+        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+        "   4:	f9404c48 	ldr	x8, [x2, #152]\n"
+        "   8:	d2800102 	mov	x2, #0x8                   	// #8\n"
+        "   c:	d2805963 	mov	x3, #0x2cb                 	// #715\n"
+        "  10:	aa0703e4 	mov	x4, x7\n"
+        "  14:	d61f0100 	br	x8"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
 call_ext_last_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
     State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0),
@@ -729,6 +829,82 @@ if_block_test_() ->
                     >>,
                     ?assertEqual(dump_to_bin(Dump), Stream),
                     ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {100, '<', RegA},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	f10190ff 	cmp	x7, #0x64\n"
+                        "   c:	5400004d 	b.le	0x14\n"
+                        "  10:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {100, '<', {free, RegA}},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	f10190ff 	cmp	x7, #0x64\n"
+                        "   c:	5400004d 	b.le	0x14\n"
+                        "  10:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '<', 100},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	f10190ff 	cmp	x7, #0x64\n"
+                        "   c:	5400004a 	b.ge	0x14  // b.tcont\n"
+                        "  10:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '<', 100},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	f10190ff 	cmp	x7, #0x64\n"
+                        "   c:	5400004a 	b.ge	0x14  // b.tcont\n"
+                        "  10:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
                 end)
             ]
         end}.
@@ -833,6 +1009,38 @@ call_only_or_schedule_next_and_label_relocation_test() ->
         >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
+call_only_or_schedule_next_known_label_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 2),
+    State2 = ?BACKEND:add_label(State1, 1),
+    State3 = ?BACKEND:add_label(State2, 2, 16#2c),
+    State4 = ?BACKEND:call_only_or_schedule_next(State3, 2),
+    State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]),
+    % OP_INT_CALL_END
+    State6 = ?BACKEND:add_label(State5, 0),
+    State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]),
+    State8 = ?BACKEND:update_branches(State7),
+    Stream = ?BACKEND:stream(State8),
+    Dump =
+        <<
+            "   0:	1400000d 	b	0x34\n"
+            "   4:	14000002 	b	0xc\n"
+            "   8:	14000009 	b	0x2c\n"
+            "   c:	b9401027 	ldr	w7, [x1, #16]\n"
+            "  10:	f10004e7 	subs	x7, x7, #0x1\n"
+            "  14:	b9001027 	str	w7, [x1, #16]\n"
+            "  18:	540000a1 	b.ne	0x2c  // b.any\n"
+            "  1c:	10000087 	adr	x7, 0x2c\n"
+            "  20:	f9000427 	str	x7, [x1, #8]\n"
+            "  24:	f9400847 	ldr	x7, [x2, #16]\n"
+            "  28:	d61f00e0 	br	x7\n"
+            "  2c:	f9400047 	ldr	x7, [x2]\n"
+            "  30:	d61f00e0 	br	x7\n"
+            "  34:	f9400447 	ldr	x7, [x2, #8]\n"
+            "  38:	d61f00e0 	br	x7"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
 call_bif_with_large_literal_integer_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
     {State1, FuncPtr} = ?BACKEND:call_primitive(State0, 8, [jit_state, 2]),
@@ -892,7 +1100,7 @@ call_bif_with_large_literal_integer_test() ->
 get_list_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
     {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
-    State2 = ?BACKEND:and_(State1, Reg, -4),
+    {State2, Reg} = ?BACKEND:and_(State1, {free, Reg}, -4),
     State3 = ?BACKEND:move_array_element(State2, Reg, 1, {y_reg, 1}),
     State4 = ?BACKEND:move_array_element(State3, Reg, 0, {y_reg, 0}),
     State5 = ?BACKEND:free_native_registers(State4, [Reg]),
@@ -912,17 +1120,18 @@ get_list_test() ->
 
 is_integer_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 1),
     Label = 1,
     Arg1 = {x_reg, 0},
-    {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1),
-    State2 = ?BACKEND:if_block(
-        State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) ->
+    {State2, Reg} = ?BACKEND:move_to_native_register(State1, Arg1),
+    State3 = ?BACKEND:if_block(
+        State2, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) ->
             MSt1 = ?BACKEND:if_block(
                 MSt0, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) ->
                     ?BACKEND:jump_to_label(BSt0, Label)
                 end
             ),
-            MSt2 = ?BACKEND:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+            {MSt2, Reg} = ?BACKEND:and_(MSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
             MSt3 = ?BACKEND:move_array_element(MSt2, Reg, 0, Reg),
             ?BACKEND:if_block(
                 MSt3,
@@ -933,27 +1142,29 @@ is_integer_test() ->
             )
         end
     ),
-    State3 = ?BACKEND:free_native_registers(State2, [Reg]),
-    ?BACKEND:assert_all_native_free(State3),
-    Offset = ?BACKEND:offset(State3),
-    State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100),
-    State5 = ?BACKEND:update_branches(State4),
-    Stream = ?BACKEND:stream(State5),
+    State4 = ?BACKEND:free_native_registers(State3, [Reg]),
+    ?BACKEND:assert_all_native_free(State4),
+    Offset = ?BACKEND:offset(State4),
+    State5 = ?BACKEND:add_label(State4, Label, Offset + 16#100),
+    State6 = ?BACKEND:update_branches(State5),
+    Stream = ?BACKEND:stream(State6),
     Dump = <<
-        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
-        "   4:	92400ce8 	and	x8, x7, #0xf\n"
-        "   8:	f1003d1f 	cmp	x8, #0xf\n"
-        "   c:	54000160 	b.eq	0x38  // b.none\n"
-        "  10:	924004e8 	and	x8, x7, #0x3\n"
-        "  14:	f100091f 	cmp	x8, #0x2\n"
-        "  18:	54000040 	b.eq	0x20  // b.none\n"
-        "  1c:	14000047 	b	0x138\n"
-        "  20:	927ef4e7 	and	x7, x7, #0xfffffffffffffffc\n"
-        "  24:	f94000e7 	ldr	x7, [x7]\n"
-        "  28:	924014e7 	and	x7, x7, #0x3f\n"
-        "  2c:	f10020ff 	cmp	x7, #0x8\n"
-        "  30:	54000040 	b.eq	0x38  // b.none\n"
-        "  34:	14000041 	b	0x138"
+        "   0:	14000000 	b	0x0\n"
+        "   4:	1400004f 	b	0x140\n"
+        "   8:	f9401807 	ldr	x7, [x0, #48]\n"
+        "   c:	92400ce8 	and	x8, x7, #0xf\n"
+        "  10:	f1003d1f 	cmp	x8, #0xf\n"
+        "  14:	54000160 	b.eq	0x40\n"
+        "  18:	924004e8 	and	x8, x7, #0x3\n"
+        "  1c:	f100091f 	cmp	x8, #0x2\n"
+        "  20:	54000040 	b.eq	0x28\n"
+        "  24:	14000047 	b	0x140\n"
+        "  28:	927ef4e7 	and	x7, x7, #0xfffffffffffffffc\n"
+        "  2c:	f94000e7 	ldr	x7, [x7]\n"
+        "  30:	924014e7 	and	x7, x7, #0x3f\n"
+        "  34:	f10020ff 	cmp	x7, #0x8\n"
+        "  38:	54000040 	b.eq	0x40\n"
+        "  3c:	14000041 	b	0x140"
     >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
@@ -964,15 +1175,16 @@ cond_jump_to_label(Cond, Label, MMod, MSt0) ->
 
 is_number_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 1),
     Label = 1,
     Arg1 = {x_reg, 0},
-    {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1),
-    State2 = ?BACKEND:if_block(
-        State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) ->
+    {State2, Reg} = ?BACKEND:move_to_native_register(State1, Arg1),
+    State3 = ?BACKEND:if_block(
+        State2, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) ->
             BSt1 = cond_jump_to_label(
                 {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0
             ),
-            BSt2 = ?BACKEND:and_(BSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+            {BSt2, Reg} = ?BACKEND:and_(BSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
             BSt3 = ?BACKEND:move_array_element(BSt2, Reg, 0, Reg),
             cond_jump_to_label(
                 {'and', [
@@ -985,56 +1197,262 @@ is_number_test() ->
             )
         end
     ),
-    State3 = ?BACKEND:free_native_registers(State2, [Reg]),
-    ?BACKEND:assert_all_native_free(State3),
-    Offset = ?BACKEND:offset(State3),
-    State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100),
-    State5 = ?BACKEND:update_branches(State4),
-    Stream = ?BACKEND:stream(State5),
+    State4 = ?BACKEND:free_native_registers(State3, [Reg]),
+    ?BACKEND:assert_all_native_free(State4),
+    Offset = ?BACKEND:offset(State4),
+    State5 = ?BACKEND:add_label(State4, Label, Offset + 16#100),
+    State6 = ?BACKEND:update_branches(State5),
+    Stream = ?BACKEND:stream(State6),
     Dump = <<
-        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
-        "   4:	92400ce8 	and	x8, x7, #0xf\n"
-        "   8:	f1003d1f 	cmp	x8, #0xf\n"
-        "   c:	540001c0 	b.eq	0x44  // b.none\n"
-        "  10:	924004e8 	and	x8, x7, #0x3\n"
-        "  14:	f100091f 	cmp	x8, #0x2\n"
-        "  18:	54000040 	b.eq	0x20  // b.none\n"
-        "  1c:	1400004a 	b	0x144\n"
-        "  20:	927ef4e7 	and	x7, x7, #0xfffffffffffffffc\n"
-        "  24:	f94000e7 	ldr	x7, [x7]\n"
-        "  28:	924014e8 	and	x8, x7, #0x3f\n"
-        "  2c:	f100211f 	cmp	x8, #0x8\n"
-        "  30:	540000a0 	b.eq	0x44  // b.none\n"
-        "  34:	924014e7 	and	x7, x7, #0x3f\n"
-        "  38:	f10060ff 	cmp	x7, #0x18\n"
-        "  3c:	54000040 	b.eq	0x44  // b.none\n"
-        "  40:	14000041 	b	0x144"
+        "   0:	14000000 	b	0x0\n"
+        "   4:	14000052 	b	0x14c\n"
+        "   8:	f9401807 	ldr	x7, [x0, #48]\n"
+        "   c:	92400ce8 	and	x8, x7, #0xf\n"
+        "  10:	f1003d1f 	cmp	x8, #0xf\n"
+        "  14:	540001c0 	b.eq	0x4c\n"
+        "  18:	924004e8 	and	x8, x7, #0x3\n"
+        "  1c:	f100091f 	cmp	x8, #0x2\n"
+        "  20:	54000040 	b.eq	0x28\n"
+        "  24:	1400004a 	b	0x14c\n"
+        "  28:	927ef4e7 	and	x7, x7, #0xfffffffffffffffc\n"
+        "  2c:	f94000e7 	ldr	x7, [x7]\n"
+        "  30:	924014e8 	and	x8, x7, #0x3f\n"
+        "  34:	f100211f 	cmp	x8, #0x8\n"
+        "  38:	540000a0 	b.eq	0x4c\n"
+        "  3c:	924014e7 	and	x7, x7, #0x3f\n"
+        "  40:	f10060ff 	cmp	x7, #0x18\n"
+        "  44:	54000040 	b.eq	0x4c\n"
+        "  48:	14000041 	b	0x14c"
     >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
 is_boolean_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 1),
     Label = 1,
-    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
-    State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) ->
+    {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}),
+    State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) ->
         ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) ->
             ?BACKEND:jump_to_label(BSt1, Label)
         end)
     end),
-    State3 = ?BACKEND:free_native_registers(State2, [Reg]),
-    ?BACKEND:assert_all_native_free(State3),
-    Offset = ?BACKEND:offset(State3),
-    State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100),
+    State4 = ?BACKEND:free_native_registers(State3, [Reg]),
+    ?BACKEND:assert_all_native_free(State4),
+    Offset = ?BACKEND:offset(State4),
+    State5 = ?BACKEND:add_label(State4, Label, Offset + 16#100),
+    State6 = ?BACKEND:update_branches(State5),
+    Stream = ?BACKEND:stream(State6),
+    Dump = <<
+        "   0:	14000000 	b	0x0\n"
+        "   4:	14000047 	b	0x120\n"
+        "   8:	f9401807 	ldr	x7, [x0, #48]\n"
+        "   c:	f1012cff 	cmp	x7, #0x4b\n"
+        "  10:	54000080 	b.eq	0x20\n"
+        "  14:	f1002cff 	cmp	x7, #0xb\n"
+        "  18:	54000040 	b.eq	0x20\n"
+        "  1c:	14000041 	b	0x120"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test OP_WAIT_TIMEOUT pattern
+wait_timeout_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+
+    Label = 42,
+    {State1, OffsetRef0} = ?BACKEND:set_continuation_to_offset(State0),
+    {State2, TimeoutReg} = ?BACKEND:move_to_native_register(State1, 5000),
+    State3 = ?BACKEND:call_primitive_last(State2, ?PRIM_WAIT_TIMEOUT, [
+        ctx, jit_state, {free, TimeoutReg}, Label
+    ]),
+    State4 = ?BACKEND:add_label(State3, OffsetRef0),
+    State5 = ?BACKEND:continuation_entry_point(State4),
+    {State6, ResultReg0} = ?BACKEND:call_primitive(State5, ?PRIM_PROCESS_SIGNAL_MESSAGES, [
+        ctx, jit_state
+    ]),
+    State7 = ?BACKEND:return_if_not_equal_to_ctx(State6, {free, ResultReg0}),
+    % ?WAITING_TIMEOUT_EXPIRED
+    {State8, ResultReg1} = ?BACKEND:call_primitive(State7, ?PRIM_CONTEXT_GET_FLAGS, [ctx, 2]),
+    State9 = ?BACKEND:if_block(State8, {{free, ResultReg1}, '==', 0}, fun(BlockSt) ->
+        ?BACKEND:call_primitive_last(BlockSt, ?PRIM_WAIT_TIMEOUT_TRAP_HANDLER, [
+            ctx, jit_state, Label
+        ])
+    end),
+    State10 = ?BACKEND:update_branches(State9),
+
+    Stream = ?BACKEND:stream(State10),
+    Dump = <<
+        "   0:	100000e7 	adr	x7, 0x1c\n"
+        "   4:	f9000427 	str	x7, [x1, #8]\n"
+        "   8:	d2827107 	mov	x7, #0x1388                	// #5000\n"
+        "   c:	f9407848 	ldr	x8, [x2, #240]\n"
+        "  10:	aa0703e2 	mov	x2, x7\n"
+        "  14:	d2800543 	mov	x3, #0x2a                  	// #42\n"
+        "  18:	d61f0100 	br	x8\n"
+        "  1c:	f9405450 	ldr	x16, [x2, #168]\n"
+        "  20:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+        "  24:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+        "  28:	d63f0200 	blr	x16\n"
+        "  2c:	aa0003e7 	mov	x7, x0\n"
+        "  30:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+        "  34:	a8c103fe 	ldp	x30, x0, [sp], #16\n"
+        "  38:	eb0000ff 	cmp	x7, x0\n"
+        "  3c:	54000060 	b.eq	0x48  // b.none\n"
+        "  40:	aa0703e0 	mov	x0, x7\n"
+        "  44:	d65f03c0 	ret\n"
+        "  48:	f9408450 	ldr	x16, [x2, #264]\n"
+        "  4c:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+        "  50:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+        "  54:	d2800041 	mov	x1, #0x2                   	// #2\n"
+        "  58:	d63f0200 	blr	x16\n"
+        "  5c:	aa0003e7 	mov	x7, x0\n"
+        "  60:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+        "  64:	a8c103fe 	ldp	x30, x0, [sp], #16\n"
+        "  68:	b5000087 	cbnz	x7, 0x78\n"
+        "  6c:	f9407c47 	ldr	x7, [x2, #248]\n"
+        "  70:	d2800542 	mov	x2, #0x2a                  	// #42\n"
+        "  74:	d61f00e0 	br	x7"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test OP_WAIT pattern that uses set_continuation_to_label
+wait_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+
+    State1 = ?BACKEND:jump_table(State0, 5),
+    State2 = ?BACKEND:add_label(State1, 1),
+    Label = 2,
+    State3 = ?BACKEND:set_continuation_to_label(State2, Label),
+    State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]),
+    State5 = ?BACKEND:add_label(State4, Label, 16#100),
+    State6 = ?BACKEND:update_branches(State5),
+
+    Stream = ?BACKEND:stream(State6),
+    Dump = <<
+        "   0:	14000000 	b	0x0\n"
+        "   4:	14000005 	b	0x18\n"
+        "   8:	1400003e 	b	0x100\n"
+        "   c:	14000000 	b	0xc\n"
+        "  10:	14000000 	b	0x10\n"
+        "  14:	14000000 	b	0x14\n"
+        "  18:	10000747 	adr	x7, 0x100\n"
+        "  1c:	f9000427 	str	x7, [x1, #8]\n"
+        "  20:	f9407447 	ldr	x7, [x2, #232]\n"
+        "  24:	d61f00e0 	br	x7"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test set_continuation_to_label with known label
+wait_known_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+
+    State1 = ?BACKEND:jump_table(State0, 5),
+    State2 = ?BACKEND:add_label(State1, 1),
+    Label = 2,
+    State3 = ?BACKEND:add_label(State2, Label, 16#100),
+    State4 = ?BACKEND:set_continuation_to_label(State3, Label),
+    State5 = ?BACKEND:call_primitive_last(State4, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]),
+    State6 = ?BACKEND:update_branches(State5),
+
+    Stream = ?BACKEND:stream(State6),
+    Dump = <<
+        "   0:	14000000 	b	0x0\n"
+        "   4:	14000005 	b	0x18\n"
+        "   8:	1400003e 	b	0x100\n"
+        "   c:	14000000 	b	0xc\n"
+        "  10:	14000000 	b	0x10\n"
+        "  14:	14000000 	b	0x14\n"
+        "  18:	10000747 	adr	x7, 0x100\n"
+        "  1c:	f9000427 	str	x7, [x1, #8]\n"
+        "  20:	f9407447 	ldr	x7, [x2, #232]\n"
+        "  24:	d61f00e0 	br	x7"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+return_labels_and_lines_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 2),
+
+    % Test return_labels_and_lines with some sample labels and lines
+    State2 = ?BACKEND:add_label(State1, 1, 16),
+    % {Line, Offset} pairs
+    SortedLines = [{10, 16}, {20, 32}],
+
+    State3 = ?BACKEND:add_label(State2, 0),
+
+    State4 = ?BACKEND:return_labels_and_lines(State3, SortedLines),
     State5 = ?BACKEND:update_branches(State4),
     Stream = ?BACKEND:stream(State5),
-    Offset = ?BACKEND:offset(State3),
+
+    ?assert(byte_size(Stream) >= 44),
+
     Dump = <<
-        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
-        "   4:	f1012cff 	cmp	x7, #0x4b\n"
-        "   8:	54000080 	b.eq	0x18  // b.none\n"
-        "   c:	f1002cff 	cmp	x7, #0xb\n"
-        "  10:	54000040 	b.eq	0x18  // b.none\n"
-        "  14:	14000041 	b	0x118"
+        "   0:	14000003 	b	0xc\n"
+        "   4:	14000003 	b	0x10\n"
+        "   8:	14000000 	b	0x8\n"
+        "   c:	10000040 	adr	x0, 0x14\n"
+        "  10:	d65f03c0 	ret\n"
+        "  14:	00000200 	.inst	0x00000200\n"
+        "  18:	0c000000 	st4	{v0.8b-v3.8b}, [x0]\n"
+        "  1c:	00000100 	.inst	0x00000100\n"
+        "  20:	02001000 	.inst	0x02001000\n"
+        "  24:	00000a00 	.inst	0x00000a00\n"
+        "  28:	14001000 	b	0x4028\n"
+        "  2c:	20000000 	.inst	0x20000000"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test call_primitive with {free, {x_reg, X}}
+gc_bif2_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_GET_IMPORTED_BIF, [jit_state, 42]),
+    {State2, _ResultReg} = ?BACKEND:call_func_ptr(State1, {free, FuncPtr}, [
+        ctx, 0, 3, {y_reg, 0}, {free, {x_reg, 0}}
+    ]),
+
+    Stream = ?BACKEND:stream(State2),
+    Dump = <<
+        "   0:	f9402050 	ldr	x16, [x2, #64]\n"
+        "   4:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+        "   8:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+        "   c:	aa0103e0 	mov	x0, x1\n"
+        "  10:	d2800541 	mov	x1, #0x2a                  	// #42\n"
+        "  14:	d63f0200 	blr	x16\n"
+        "  18:	aa0003e7 	mov	x7, x0\n"
+        "  1c:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+        "  20:	a8c103fe 	ldp	x30, x0, [sp], #16\n"
+        "  24:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+        "  28:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+        "  2c:	d2800001 	mov	x1, #0x0                   	// #0\n"
+        "  30:	d2800062 	mov	x2, #0x3                   	// #3\n"
+        "  34:	f9401403 	ldr	x3, [x0, #40]\n"
+        "  38:	f9400063 	ldr	x3, [x3]\n"
+        "  3c:	f9401804 	ldr	x4, [x0, #48]\n"
+        "  40:	d63f00e0 	blr	x7\n"
+        "  44:	aa0003e7 	mov	x7, x0\n"
+        "  48:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+        "  4c:	a8c103fe 	ldp	x30, x0, [sp], #16"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test case where parameter value is in r1
+memory_ensure_free_with_roots_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, _FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_MEMORY_ENSURE_FREE_WITH_ROOTS, [
+        ctx, jit_state, {free, r1}, 4, 1
+    ]),
+
+    Stream = ?BACKEND:stream(State1),
+    Dump = <<
+        "   0:	f940b050 	ldr	x16, [x2, #352]\n"
+        "   4:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+        "   8:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+        "   c:	aa0103e2 	mov	x2, x1\n"
+        "  10:	d2800083 	mov	x3, #0x4                   	// #4\n"
+        "  14:	d2800024 	mov	x4, #0x1                   	// #1\n"
+        "  18:	d63f0200 	blr	x16\n"
+        "  1c:	aa0003e7 	mov	x7, x0\n"
+        "  20:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+        "  24:	a8c103fe 	ldp	x30, x0, [sp], #16"
     >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
@@ -1081,7 +1499,7 @@ call_fun_test() ->
             ])
         end
     ),
-    State5 = ?BACKEND:and_(State4, RegCopy, ?TERM_PRIMARY_CLEAR_MASK),
+    {State5, RegCopy} = ?BACKEND:and_(State4, {free, RegCopy}, ?TERM_PRIMARY_CLEAR_MASK),
     State6 = ?BACKEND:move_array_element(State5, RegCopy, 0, RegCopy),
     State7 = ?BACKEND:if_block(
         State6, {RegCopy, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) ->
@@ -1536,8 +1954,8 @@ move_to_array_element_test_() ->
                 end),
                 %% move_to_array_element/5: x_reg to reg[x+offset]
                 ?_test(begin
-                    State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r8, r9]),
-                    State2 = setelement(7, State1, [r8, r9]),
+                    State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [r8, r9]),
+                    State2 = setelement(8, State1, [r8, r9]),
                     [r8, r9] = ?BACKEND:used_regs(State2),
                     State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, r8, r9, 1),
                     Stream = ?BACKEND:stream(State3),
@@ -1550,8 +1968,8 @@ move_to_array_element_test_() ->
                 end),
                 %% move_to_array_element/5: imm to reg[x+offset]
                 ?_test(begin
-                    State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r8, r9]),
-                    State2 = setelement(7, State1, [r8, r9]),
+                    State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [r8, r9]),
+                    State2 = setelement(8, State1, [r8, r9]),
                     [r8, r9] = ?BACKEND:used_regs(State2),
                     State3 = ?BACKEND:move_to_array_element(State2, 42, r8, r9, 1),
                     Stream = ?BACKEND:stream(State3),
@@ -1662,6 +2080,66 @@ move_to_native_register_test_() ->
             ]
         end}.
 
+add_test0(State0, Reg, Imm, Dump) ->
+    State1 = ?BACKEND:add(State0, Reg, Imm),
+    Stream = ?BACKEND:stream(State1),
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+add_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                ?_test(begin
+                    add_test0(State0, r2, 2, <<
+                        "   0:	91000842 	add	x2, x2, #0x2"
+                    >>)
+                end),
+                ?_test(begin
+                    add_test0(State0, r2, 256, <<
+                        "   0:	91040042 	add	x2, x2, #0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    add_test0(State0, r2, r3, <<
+                        "   0:	8b030042 	add	x2, x2, x3"
+                    >>)
+                end)
+            ]
+        end}.
+
+sub_test0(State0, Reg, Imm, Dump) ->
+    State1 = ?BACKEND:sub(State0, Reg, Imm),
+    Stream = ?BACKEND:stream(State1),
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+sub_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                ?_test(begin
+                    sub_test0(State0, r2, 2, <<
+                        "   0:	d1000842 	sub	x2, x2, #0x2"
+                    >>)
+                end),
+                ?_test(begin
+                    sub_test0(State0, r2, 256, <<
+                        "   0:	d1040042 	sub	x2, x2, #0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    sub_test0(State0, r2, r3, <<
+                        "   0:	cb030042 	sub	x2, x2, x3"
+                    >>)
+                end)
+            ]
+        end}.
+
 mul_test0(State0, Reg, Imm, Dump) ->
     State1 = ?BACKEND:mul(State0, Reg, Imm),
     Stream = ?BACKEND:stream(State1),
diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl
index ceaf926d7d..50098b44cf 100644
--- a/tests/libs/jit/jit_armv6m_tests.erl
+++ b/tests/libs/jit/jit_armv6m_tests.erl
@@ -32,7 +32,7 @@
 -define(BACKEND, jit_armv6m).
 
 % disassembly obtained with:
-% arm-elf-objdump -b binary -D dump.bin -M arm
+%  arm-elf-objdump -D -b binary -marm --disassembler-options=force-thumb -z
 
 call_primitive_0_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
@@ -107,7 +107,7 @@ call_primitive_6_args_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
     % Get bin_ptr from x_reg 0 (similar to get_list_test pattern)
     {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
-    State2 = ?BACKEND:and_(State1, RegA, ?TERM_PRIMARY_CLEAR_MASK),
+    {State2, RegA} = ?BACKEND:and_(State1, {free, RegA}, ?TERM_PRIMARY_CLEAR_MASK),
     % Get another register for the last parameter to test {free, Reg} handling
     {State3, OtherReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 1}),
     % Call PRIM_BITSTRING_EXTRACT_INTEGER with 6 arguments
@@ -312,13 +312,13 @@ call_primitive_last_5_args_test() ->
         "   6:	9700      	str	r7, [sp, #0]\n"
         "   8:	9902      	ldr	r1, [sp, #8]\n"
         "   a:	2204      	movs	r2, #4\n"
-        "   c:	4b00      	ldr	r3, [pc, #0]	; (0x10)\n"
-        "   e:	e001      	b.n	0x14\n"
-        "  10:	02cb      	lsrs	r3, r1, #16\n"
-        "  12:	0000      	movs	r0, r0\n"
-        "  14:	47b0      	blx	r6\n"
-        "  16:	b002      	add	sp, #8\n"
-        "  18:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}"
+        "   c:	4b01      	ldr	r3, [pc, #4]	; (0x14)\n"
+        "   e:	47b0      	blx	r6\n"
+        "  10:	b002      	add	sp, #8\n"
+        "  12:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        % Literal pool
+        "  14:	02cb      	lsls	r3, r1, #11\n"
+        "  16:	0000      	movs	r0, r0"
     >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
@@ -535,17 +535,19 @@ if_block_test_() ->
                             ?BACKEND:add(BSt0, RegB, 2)
                         end
                     ),
-                    Stream = ?BACKEND:stream(State1),
+                    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+                    Stream = ?BACKEND:stream(State2),
                     Dump = <<
                         "   0:	6987      	ldr	r7, [r0, #24]\n"
                         "   2:	69c6      	ldr	r6, [r0, #28]\n"
-                        "   4:	4d00      	ldr	r5, [pc, #0]	; (0x8)\n"
-                        "   6:	da04      	bge.n	0x12\n"
-                        "   8:	0400      	lsls	r0, r0, #16\n"
-                        "   a:	0000      	movs	r0, r0\n"
-                        "   c:	42af      	cmp	r7, r5\n"
-                        "   e:	dafe      	bge.n	0xe\n"
-                        "  10:	3602      	adds	r6, #2"
+                        "   4:	4d02      	ldr	r5, [pc, #8]	; (0x10)\n"
+                        "   6:	da01      	bge.n	0xc\n"
+                        "   8:	ffff      	; to be rewritten\n"
+                        "   a:	3602      	adds	r6, #2\n"
+                        "   c:	e078      	b.n	0x100\n"
+                        "   e:	0000      	movs	r0, r0\n"
+                        "  10:	0400      	lsls	r0, r0, #16\n"
+                        "  12:	0000      	movs	r0, r0"
                     >>,
                     ?assertEqual(dump_to_bin(Dump), Stream),
                     ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
@@ -713,17 +715,19 @@ if_block_test_() ->
                             ?BACKEND:add(BSt0, RegB, 1)
                         end
                     ),
-                    Stream = ?BACKEND:stream(State1),
+                    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+                    Stream = ?BACKEND:stream(State2),
                     Dump = <<
                         "   0:	6987      	ldr	r7, [r0, #24]\n"
                         "   2:	69c6      	ldr	r6, [r0, #28]\n"
-                        "   4:	4d00      	ldr	r5, [pc, #0]	; (0x8)\n"
-                        "   6:	e001      	b.n	0xc\n"
-                        "   8:	07cb      	lsls	r3, r1, #31\n"
-                        "   a:	0000      	movs	r0, r0\n"
-                        "   c:	42af      	cmp	r7, r5\n"
-                        "   e:	d000      	beq.n	0x12\n"
-                        "  10:	3601      	adds	r6, #1"
+                        "   4:	4d02      	ldr	r5, [pc, #8]	; (0x10)\n"
+                        "   6:	42af      	cmp	r7, r5\n"
+                        "   8:	d000      	beq.n	0xc\n"
+                        "   a:	3601      	adds	r6, #1\n"
+                        "   c:	e078      	b.n	0x100\n"
+                        "   e:	0000      	movs	r0, r0\n"
+                        "  10:	07cb      	lsls	r3, r1, #31\n"
+                        "  12:	0000      	movs	r0, r0"
                     >>,
                     ?assertEqual(dump_to_bin(Dump), Stream)
                 end),
@@ -1063,6 +1067,94 @@ if_block_test_() ->
                     >>,
                     ?assertEqual(dump_to_bin(Dump), Stream),
                     ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {100, '<', RegA},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	2f64      	cmp	r7, #100	; 0x64\n"
+                        "   6:	dd00      	ble.n	0xa\n"
+                        "   8:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {100, '<', {free, RegA}},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	2f64      	cmp	r7, #100	; 0x64\n"
+                        "   6:	dd00      	ble.n	0xa\n"
+                        "   8:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {1024, '<', RegA},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+                    Stream = ?BACKEND:stream(State2),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	4d02      	ldr	r5, [pc, #8]	; (0x10)\n"
+                        "   6:	dd01      	ble.n	0xc\n"
+                        "   8:	ffff      	; to be rewritten\n"
+                        "   a:	3602      	adds	r6, #2\n"
+                        "   c:	e078      	b.n	0x100\n"
+                        "   e:	0000      	movs	r0, r0\n"
+                        "  10:	0400      	lsls	r0, r0, #16\n"
+                        "  12:	0000      	movs	r0, r0"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {1024, '<', {free, RegA}},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+                    Stream = ?BACKEND:stream(State2),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	4d02      	ldr	r5, [pc, #8]	; (0x10)\n"
+                        "   6:	dd01      	ble.n	0xc\n"
+                        "   8:	ffff      	; to be rewritten\n"
+                        "   a:	3602      	adds	r6, #2\n"
+                        "   c:	e078      	b.n	0x100\n"
+                        "   e:	0000      	movs	r0, r0\n"
+                        "  10:	0400      	lsls	r0, r0, #16\n"
+                        "  12:	0000      	movs	r0, r0"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
                 end)
             ]
         end}.
@@ -1395,35 +1487,33 @@ call_only_or_schedule_next_and_label_relocation_large_gap_test() ->
         " 128:	3f01      	subs	r7, #1\n"
         " 12a:	60b7      	str	r7, [r6, #8]\n"
         " 12c:	d004      	beq.n	0x138\n"
-        " 12e:	e011      	b.n	0x154\n"
+        " 12e:	e00f      	b.n	0x150\n"
         " 130:	46c0      	nop			; (mov r8, r8)\n"
         " 132:	46c0      	nop			; (mov r8, r8)\n"
         " 134:	46c0      	nop			; (mov r8, r8)\n"
         " 136:	46c0      	nop			; (mov r8, r8)\n"
         " 138:	a700      	add	r7, pc, #0	; (adr r7, 0x13c)\n"
-        " 13a:	4e01      	ldr	r6, [pc, #4]	; (0x140)\n"
-        " 13c:	e002      	b.n	0x144\n"
-        " 13e:	0000      	movs	r0, r0\n"
-        " 140:	fedd ffff 	stcl2	15, cr13, [sp, #-1020]	; 0xfffffc04\n"
-        " 144:	19f6      	adds	r6, r6, r7\n"
-        " 146:	9f00      	ldr	r7, [sp, #0]\n"
-        " 148:	607e      	str	r6, [r7, #4]\n"
-        " 14a:	6897      	ldr	r7, [r2, #8]\n"
-        " 14c:	9e05      	ldr	r6, [sp, #20]\n"
-        " 14e:	9705      	str	r7, [sp, #20]\n"
-        " 150:	46b6      	mov	lr, r6\n"
-        " 152:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
-        " 154:	6817      	ldr	r7, [r2, #0]\n"
-        " 156:	9e05      	ldr	r6, [sp, #20]\n"
-        " 158:	9705      	str	r7, [sp, #20]\n"
-        " 15a:	46b6      	mov	lr, r6\n"
-        " 15c:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
-        " 15e:	46c0      	nop			; (mov r8, r8)\n"
-        " 160:	6857      	ldr	r7, [r2, #4]\n"
-        " 162:	9e05      	ldr	r6, [sp, #20]\n"
-        " 164:	9705      	str	r7, [sp, #20]\n"
-        " 166:	46b6      	mov	lr, r6\n"
-        " 168:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}"
+        " 13a:	4e04      	ldr	r6, [pc, #16]	; (0x14c)\n"
+        " 13c:	19f6      	adds	r6, r6, r7\n"
+        " 13e:	9f00      	ldr	r7, [sp, #0]\n"
+        " 140:	607e      	str	r6, [r7, #4]\n"
+        " 142:	6897      	ldr	r7, [r2, #8]\n"
+        " 144:	9e05      	ldr	r6, [sp, #20]\n"
+        " 146:	9705      	str	r7, [sp, #20]\n"
+        " 148:	46b6      	mov	lr, r6\n"
+        " 14a:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        " 14c:	fedd ffff 	mrc2	15, 6, pc, cr13, cr15, {7}\n"
+        " 150:	6817      	ldr	r7, [r2, #0]\n"
+        " 152:	9e05      	ldr	r6, [sp, #20]\n"
+        " 154:	9705      	str	r7, [sp, #20]\n"
+        " 156:	46b6      	mov	lr, r6\n"
+        " 158:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        " 15a:	46c0      	nop			; (mov r8, r8)\n"
+        " 15c:	6857      	ldr	r7, [r2, #4]\n"
+        " 15e:	9e05      	ldr	r6, [sp, #20]\n"
+        " 160:	9705      	str	r7, [sp, #20]\n"
+        " 162:	46b6      	mov	lr, r6\n"
+        " 164:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}"
     >>,
     {_, RelevantBinary} = split_binary(Stream, 16#124),
     ?assertEqual(dump_to_bin(Dump), RelevantBinary).
@@ -1459,35 +1549,33 @@ call_only_or_schedule_next_and_label_relocation_large_gap_unaligned_test() ->
         " 128:	3f01      	subs	r7, #1\n"
         " 12a:	60b7      	str	r7, [r6, #8]\n"
         " 12c:	d004      	beq.n	0x138\n"
-        " 12e:	e011      	b.n	0x154\n"
+        " 12e:	e00f      	b.n	0x150\n"
         " 130:	46c0      	nop			; (mov r8, r8)\n"
         " 132:	46c0      	nop			; (mov r8, r8)\n"
         " 134:	46c0      	nop			; (mov r8, r8)\n"
         " 136:	46c0      	nop			; (mov r8, r8)\n"
         " 138:	a700      	add	r7, pc, #0	; (adr r7, 0x13c)\n"
-        " 13a:	4e01      	ldr	r6, [pc, #4]	; (0x140)\n"
-        " 13c:	e002      	b.n	0x144\n"
-        " 13e:	0000      	movs	r0, r0\n"
-        " 140:	fedd ffff 	stcl2	15, cr13, [sp, #-1020]	; 0xfffffc04\n"
-        " 144:	19f6      	adds	r6, r6, r7\n"
-        " 146:	9f00      	ldr	r7, [sp, #0]\n"
-        " 148:	607e      	str	r6, [r7, #4]\n"
-        " 14a:	6897      	ldr	r7, [r2, #8]\n"
-        " 14c:	9e05      	ldr	r6, [sp, #20]\n"
-        " 14e:	9705      	str	r7, [sp, #20]\n"
-        " 150:	46b6      	mov	lr, r6\n"
-        " 152:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
-        " 154:	6817      	ldr	r7, [r2, #0]\n"
-        " 156:	9e05      	ldr	r6, [sp, #20]\n"
-        " 158:	9705      	str	r7, [sp, #20]\n"
-        " 15a:	46b6      	mov	lr, r6\n"
-        " 15c:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
-        " 15e:	46c0      	nop			; (mov r8, r8)\n"
-        " 160:	6857      	ldr	r7, [r2, #4]\n"
-        " 162:	9e05      	ldr	r6, [sp, #20]\n"
-        " 164:	9705      	str	r7, [sp, #20]\n"
-        " 166:	46b6      	mov	lr, r6\n"
-        " 168:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}"
+        " 13a:	4e04      	ldr	r6, [pc, #16]	; (0x14c)\n"
+        " 13c:	19f6      	adds	r6, r6, r7\n"
+        " 13e:	9f00      	ldr	r7, [sp, #0]\n"
+        " 140:	607e      	str	r6, [r7, #4]\n"
+        " 142:	6897      	ldr	r7, [r2, #8]\n"
+        " 144:	9e05      	ldr	r6, [sp, #20]\n"
+        " 146:	9705      	str	r7, [sp, #20]\n"
+        " 148:	46b6      	mov	lr, r6\n"
+        " 14a:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        " 14c:	fedd ffff 	mrc2	15, 6, pc, cr13, cr15, {7}\n"
+        " 150:	6817      	ldr	r7, [r2, #0]\n"
+        " 152:	9e05      	ldr	r6, [sp, #20]\n"
+        " 154:	9705      	str	r7, [sp, #20]\n"
+        " 156:	46b6      	mov	lr, r6\n"
+        " 158:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        " 15a:	46c0      	nop			; (mov r8, r8)\n"
+        " 15c:	6857      	ldr	r7, [r2, #4]\n"
+        " 15e:	9e05      	ldr	r6, [sp, #20]\n"
+        " 160:	9705      	str	r7, [sp, #20]\n"
+        " 162:	46b6      	mov	lr, r6\n"
+        " 164:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}"
     >>,
     {_, RelevantBinary} = split_binary(Stream, 16#122),
     ?assertEqual(dump_to_bin(Dump), RelevantBinary).
@@ -1517,39 +1605,38 @@ call_bif_with_large_literal_integer_test() ->
             "   c:	bc05      	pop	{r0, r2}\n"
             "   e:	6bd6      	ldr	r6, [r2, #60]	; 0x3c\n"
             "  10:	b4c5      	push	{r0, r2, r6, r7}\n"
-            "  12:	4901      	ldr	r1, [pc, #4]	; (0x18)\n"
-            "  14:	e002      	b.n	0x1c\n"
-            "  16:	0000      	movs	r0, r0\n"
-            "  18:	e895 3b7f 	ldmia.w	r5, {r0, r1, r2, r3, r4, r5, r6, r8, r9, fp, ip, sp}\n"
-            "  1c:	47b0      	blx	r6\n"
-            "  1e:	4605      	mov	r5, r0\n"
-            "  20:	bcc5      	pop	{r0, r2, r6, r7}\n"
-            "  22:	b405      	push	{r0, r2}\n"
-            "  24:	b082      	sub	sp, #8\n"
-            "  26:	9500      	str	r5, [sp, #0]\n"
-            "  28:	2100      	movs	r1, #0\n"
-            "  2a:	2201      	movs	r2, #1\n"
-            "  2c:	6983      	ldr	r3, [r0, #24]\n"
-            "  2e:	47b8      	blx	r7\n"
-            "  30:	4607      	mov	r7, r0\n"
-            "  32:	b002      	add	sp, #8\n"
-            "  34:	bc05      	pop	{r0, r2}\n"
-            "  36:	2f00      	cmp	r7, #0\n"
-            "  38:	d105      	bne.n	0x46\n"
-            "  3a:	6997      	ldr	r7, [r2, #24]\n"
-            "  3c:	223c      	movs	r2, #60	; 0x3c\n"
-            "  3e:	9e05      	ldr	r6, [sp, #20]\n"
-            "  40:	9705      	str	r7, [sp, #20]\n"
-            "  42:	46b6      	mov	lr, r6\n"
-            "  44:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
-            "  46:	6187      	str	r7, [r0, #24]"
+            "  12:	490b      	ldr	r1, [pc, #44]	; (0x40)\n"
+            "  14:	47b0      	blx	r6\n"
+            "  16:	4605      	mov	r5, r0\n"
+            "  18:	bcc5      	pop	{r0, r2, r6, r7}\n"
+            "  1a:	b405      	push	{r0, r2}\n"
+            "  1c:	b082      	sub	sp, #8\n"
+            "  1e:	9500      	str	r5, [sp, #0]\n"
+            "  20:	2100      	movs	r1, #0\n"
+            "  22:	2201      	movs	r2, #1\n"
+            "  24:	6983      	ldr	r3, [r0, #24]\n"
+            "  26:	47b8      	blx	r7\n"
+            "  28:	4607      	mov	r7, r0\n"
+            "  2a:	b002      	add	sp, #8\n"
+            "  2c:	bc05      	pop	{r0, r2}\n"
+            "  2e:	2f00      	cmp	r7, #0\n"
+            "  30:	d108      	bne.n	0x44\n"
+            "  32:	6997      	ldr	r7, [r2, #24]\n"
+            "  34:	2234      	movs	r2, #52	; 0x34\n"
+            "  36:	9e05      	ldr	r6, [sp, #20]\n"
+            "  38:	9705      	str	r7, [sp, #20]\n"
+            "  3a:	46b6      	mov	lr, r6\n"
+            "  3c:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+            "  3e:	0000      	movs	r0, r0\n"
+            "  40:	e895 3b7f 	ldmia.w	r5, {r0, r1, r2, r3, r4, r5, r6, r8, r9, fp, ip, sp}\n"
+            "  44:	6187      	str	r7, [r0, #24]"
         >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
 get_list_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
     {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
-    State2 = ?BACKEND:and_(State1, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+    {State2, Reg} = ?BACKEND:and_(State1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
     State3 = ?BACKEND:move_array_element(State2, Reg, 1, {y_reg, 1}),
     State4 = ?BACKEND:move_array_element(State3, Reg, 0, {y_reg, 0}),
     State5 = ?BACKEND:free_native_registers(State4, [Reg]),
@@ -1580,7 +1667,7 @@ is_integer_test() ->
                     ?BACKEND:jump_to_label(BSt0, Label)
                 end
             ),
-            MSt2 = ?BACKEND:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+            {MSt2, Reg} = ?BACKEND:and_(MSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
             MSt3 = ?BACKEND:move_array_element(MSt2, Reg, 0, Reg),
             ?BACKEND:if_block(
                 MSt3,
@@ -1642,7 +1729,7 @@ is_number_test() ->
             BSt1 = cond_jump_to_label(
                 {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0
             ),
-            BSt2 = ?BACKEND:and_(BSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+            {BSt2, Reg} = ?BACKEND:and_(BSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
             BSt3 = ?BACKEND:move_array_element(BSt2, Reg, 0, Reg),
             cond_jump_to_label(
                 {'and', [
@@ -1698,57 +1785,83 @@ is_number_test() ->
 
 is_boolean_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 1),
     Label = 1,
-    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
-    State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) ->
+    {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}),
+    State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) ->
         ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) ->
             ?BACKEND:jump_to_label(BSt1, Label)
         end)
     end),
-    State3 = ?BACKEND:free_native_registers(State2, [Reg]),
-    ?BACKEND:assert_all_native_free(State3),
-    State4 = ?BACKEND:add_label(State3, Label, 16#100),
-    State5 = ?BACKEND:update_branches(State4),
-    Stream = ?BACKEND:stream(State5),
+    State4 = ?BACKEND:free_native_registers(State3, [Reg]),
+    ?BACKEND:assert_all_native_free(State4),
+    State5 = ?BACKEND:add_label(State4, Label, 16#100),
+    State6 = ?BACKEND:update_branches(State5),
+    Stream = ?BACKEND:stream(State6),
     Dump = <<
-        "   0:	6987      	ldr	r7, [r0, #24]\n"
-        "   2:	2f4b      	cmp	r7, #75	; 0x4b\n"
-        "   4:	d006      	beq.n	0x14\n"
-        "   6:	2f0b      	cmp	r7, #11\n"
-        "   8:	d004      	beq.n	0x14\n"
-        "   a:	e079      	b.n	0x100\n"
-        "   c:	46c0      	nop			; (mov r8, r8)\n"
-        "   e:	46c0      	nop			; (mov r8, r8)\n"
-        "  10:	46c0      	nop			; (mov r8, r8)\n"
-        "  12:	46c0      	nop			; (mov r8, r8)"
+        "   0:	4b01      	ldr	r3, [pc, #4]\n"
+        "   2:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "   4:	449f      	add	pc, r3\n"
+        "   6:	46c0      	nop\n"
+        "   8:	ffff      	.short	0xffff\n"
+        "   a:	ffff      	.short	0xffff\n"
+        "   c:	4b01      	ldr	r3, [pc, #4]\n"
+        "   e:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "  10:	449f      	add	pc, r3\n"
+        "  12:	46c0      	nop\n"
+        "  14:	00ec      	lsls	r4, r5, #3\n"
+        "  16:	0000      	movs	r0, r0\n"
+        "  18:	6987      	ldr	r7, [r0, #24]\n"
+        "  1a:	2f4b      	cmp	r7, #75\n"
+        "  1c:	d006      	beq.n	0x2c\n"
+        "  1e:	2f0b      	cmp	r7, #11\n"
+        "  20:	d004      	beq.n	0x2c\n"
+        "  22:	e06d      	b.n	0x100\n"
+        "  24:	46c0      	nop\n"
+        "  26:	46c0      	nop\n"
+        "  28:	46c0      	nop\n"
+        "  2a:	46c0      	nop"
     >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
 is_boolean_far_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 1),
     Label = 1,
-    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
-    State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) ->
+    {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}),
+    State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) ->
         ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) ->
             ?BACKEND:jump_to_label(BSt1, Label)
         end)
     end),
-    State3 = ?BACKEND:free_native_registers(State2, [Reg]),
-    ?BACKEND:assert_all_native_free(State3),
-    State4 = ?BACKEND:add_label(State3, Label, 16#1000),
-    State5 = ?BACKEND:update_branches(State4),
-    Stream = ?BACKEND:stream(State5),
+    State4 = ?BACKEND:free_native_registers(State3, [Reg]),
+    ?BACKEND:assert_all_native_free(State4),
+    State5 = ?BACKEND:add_label(State4, Label, 16#1000),
+    State6 = ?BACKEND:update_branches(State5),
+    Stream = ?BACKEND:stream(State6),
     Dump = <<
-        "   0:	6987      	ldr	r7, [r0, #24]\n"
-        "   2:	2f4b      	cmp	r7, #75	; 0x4b\n"
-        "   4:	d006      	beq.n	0x14\n"
-        "   6:	2f0b      	cmp	r7, #11\n"
-        "   8:	d004      	beq.n	0x14\n"
-        "   a:	4e01      	ldr	r6, [pc, #4]	; (0x10)\n"
-        "   c:	447e      	add	r6, pc\n"
-        "   e:	4730      	bx	r6\n"
-        "  10:	0ff1      	lsrs	r0, r6, #31\n"
-        "  12:	0000      	movs	r0, r0"
+        "   0:	4b01      	ldr	r3, [pc, #4]\n"
+        "   2:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "   4:	449f      	add	pc, r3\n"
+        "   6:	46c0      	nop\n"
+        "   8:	ffff      	.short	0xffff\n"
+        "   a:	ffff      	.short	0xffff\n"
+        "   c:	4b01      	ldr	r3, [pc, #4]\n"
+        "   e:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "  10:	449f      	add	pc, r3\n"
+        "  12:	46c0      	nop\n"
+        "  14:	0fec      	lsrs	r4, r5, #31\n"
+        "  16:	0000      	movs	r0, r0\n"
+        "  18:	6987      	ldr	r7, [r0, #24]\n"
+        "  1a:	2f4b      	cmp	r7, #75\n"
+        "  1c:	d006      	beq.n	0x2c\n"
+        "  1e:	2f0b      	cmp	r7, #11\n"
+        "  20:	d004      	beq.n	0x2c\n"
+        "  22:	4e01      	ldr	r6, [pc, #4]\n"
+        "  24:	447e      	add	r6, pc\n"
+        "  26:	4730      	bx	r6\n"
+        "  28:	0fd9      	lsrs	r1, r3, #31\n"
+        "  2a:	0000      	movs	r0, r0"
     >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
@@ -1790,29 +1903,42 @@ is_boolean_far_unaligned_test() ->
 
 is_boolean_far_known_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 1),
     Label = 1,
-    State1 = ?BACKEND:add_label(State0, Label, 16#1000),
-    {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}),
-    State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) ->
+    State2 = ?BACKEND:add_label(State1, Label, 16#1000),
+    {State3, Reg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}),
+    State4 = ?BACKEND:if_block(State3, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) ->
         ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) ->
             ?BACKEND:jump_to_label(BSt1, Label)
         end)
     end),
-    State4 = ?BACKEND:free_native_registers(State3, [Reg]),
-    ?BACKEND:assert_all_native_free(State4),
-    State5 = ?BACKEND:update_branches(State4),
-    Stream = ?BACKEND:stream(State5),
+    State5 = ?BACKEND:free_native_registers(State4, [Reg]),
+    ?BACKEND:assert_all_native_free(State5),
+    State6 = ?BACKEND:update_branches(State5),
+    Stream = ?BACKEND:stream(State6),
     Dump = <<
-        "   0:	6987      	ldr	r7, [r0, #24]\n"
-        "   2:	2f4b      	cmp	r7, #75	; 0x4b\n"
-        "   4:	d006      	beq.n	0x14\n"
-        "   6:	2f0b      	cmp	r7, #11\n"
-        "   8:	d004      	beq.n	0x14\n"
-        "   a:	4e01      	ldr	r6, [pc, #4]	; (0x10)\n"
-        "   c:	447e      	add	r6, pc\n"
-        "   e:	4730      	bx	r6\n"
-        "  10:	0ff1      	lsrs	r1, r6, #31\n"
-        "  12:	0000      	movs	r0, r0"
+        "   0:	4b01      	ldr	r3, [pc, #4]\n"
+        "   2:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "   4:	449f      	add	pc, r3\n"
+        "   6:	46c0      	nop\n"
+        "   8:	ffff      	.short	0xffff\n"
+        "   a:	ffff      	.short	0xffff\n"
+        "   c:	4b01      	ldr	r3, [pc, #4]\n"
+        "   e:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "  10:	449f      	add	pc, r3\n"
+        "  12:	46c0      	nop\n"
+        "  14:	0fec      	lsrs	r4, r5, #31\n"
+        "  16:	0000      	movs	r0, r0\n"
+        "  18:	6987      	ldr	r7, [r0, #24]\n"
+        "  1a:	2f4b      	cmp	r7, #75\n"
+        "  1c:	d006      	beq.n	0x2c\n"
+        "  1e:	2f0b      	cmp	r7, #11\n"
+        "  20:	d004      	beq.n	0x2c\n"
+        "  22:	4e01      	ldr	r6, [pc, #4]\n"
+        "  24:	447e      	add	r6, pc\n"
+        "  26:	4730      	bx	r6\n"
+        "  28:	0fd9      	lsrs	r1, r3, #31\n"
+        "  2a:	0000      	movs	r0, r0"
     >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
@@ -1823,32 +1949,45 @@ is_boolean_far_known_unaligned_test() ->
     TempState = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
     TempStream = jit_stream_binary:append(?BACKEND:stream(TempState), PaddingInstruction),
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, TempStream),
+    State1 = ?BACKEND:jump_table(State0, 1),
 
     Label = 1,
-    State1 = ?BACKEND:add_label(State0, Label, 16#1000),
-    {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}),
-    State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) ->
+    State2 = ?BACKEND:add_label(State1, Label, 16#1000),
+    {State3, Reg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}),
+    State4 = ?BACKEND:if_block(State3, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) ->
         ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) ->
             ?BACKEND:jump_to_label(BSt1, Label)
         end)
     end),
-    State4 = ?BACKEND:free_native_registers(State3, [Reg]),
-    ?BACKEND:assert_all_native_free(State4),
-    State5 = ?BACKEND:update_branches(State4),
-    Stream = ?BACKEND:stream(State5),
+    State5 = ?BACKEND:free_native_registers(State4, [Reg]),
+    ?BACKEND:assert_all_native_free(State5),
+    State6 = ?BACKEND:update_branches(State5),
+    Stream = ?BACKEND:stream(State6),
     Dump = <<
         "   0:	4770      	bx	lr\n"
-        "   2:	6987      	ldr	r7, [r0, #24]\n"
-        "   4:	2f4b      	cmp	r7, #75	; 0x4b\n"
-        "   6:	d007      	beq.n	0x18\n"
-        "   8:	2f0b      	cmp	r7, #11\n"
-        "   a:	d005      	beq.n	0x18\n"
-        "   c:	4e01      	ldr	r6, [pc, #4]	; (0x14)\n"
-        "   e:	447e      	add	r6, pc\n"
-        "  10:	4730      	bx	r6\n"
-        "  12:	46c0      	nop			; (mov r8, r8)\n"
-        "  14:	0fef      	lsrs	r7, r5, #31\n"
-        "  16:	0000      	movs	r0, r0"
+        "   2:	4b01      	ldr	r3, [pc, #4]\n"
+        "   4:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "   6:	449f      	add	pc, r3\n"
+        "   8:	46c0      	nop\n"
+        "   a:	ffff      	.short	0xffff\n"
+        "   c:	ffff      	.short	0xffff\n"
+        "   e:	4b01      	ldr	r3, [pc, #4]\n"
+        "  10:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "  12:	449f      	add	pc, r3\n"
+        "  14:	46c0      	nop\n"
+        "  16:	0fea      	lsrs	r2, r5, #31\n"
+        "  18:	0000      	movs	r0, r0\n"
+        "  1a:	6987      	ldr	r7, [r0, #24]\n"
+        "  1c:	2f4b      	cmp	r7, #75\n"
+        "  1e:	d007      	beq.n	0x30\n"
+        "  20:	2f0b      	cmp	r7, #11\n"
+        "  22:	d005      	beq.n	0x30\n"
+        "  24:	4e01      	ldr	r6, [pc, #4]\n"
+        "  26:	447e      	add	r6, pc\n"
+        "  28:	4730      	bx	r6\n"
+        "  2a:	46c0      	nop\n"
+        "  2c:	0fd7      	lsrs	r7, r2, #31\n"
+        "  2e:	0000      	movs	r0, r0"
     >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
@@ -1879,48 +2018,46 @@ wait_timeout_test() ->
 
     Stream = ?BACKEND:stream(State10),
     Dump = <<
-        "   0:	a707      	add	r7, pc, #28	; (adr r7, 0x22)\n"
+        "   0:	a706      	add	r7, pc, #24	; (adr r7, 0x1c)\n"
         "   2:	3701      	adds	r7, #1\n"
         "   4:	9e00      	ldr	r6, [sp, #0]\n"
         "   6:	6077      	str	r7, [r6, #4]\n"
-        "   8:	4f00      	ldr	r7, [pc, #0]	; (0xc)\n"
-        "   a:	e001      	b.n	0x10\n"
-        "   c:	1388      	asrs	r0, r1, #14\n"
-        "   e:	0000      	movs	r0, r0\n"
-        "  10:	6f96      	ldr	r6, [r2, #120]	; 0x78\n"
-        "  14:	463a      	mov	r2, r7\n"
-        "  16:	232a      	movs	r3, #42	; 0x2a\n"
-        "  18:	9f05      	ldr	r7, [sp, #20]\n"
-        "  1a:	9605      	str	r6, [sp, #20]\n"
-        "  1c:	46be      	mov	lr, r7\n"
-        "  1e:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
-        "  20:	46c0      	nop			; (mov r8, r8)\n"
-        "  22:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
-        "  24:	6d57      	ldr	r7, [r2, #84]	; 0x54\n"
-        "  26:	b405      	push	{r0, r2}\n"
-        "  28:	9902      	ldr	r1, [sp, #8]\n"
-        "  2a:	47b8      	blx	r7\n"
-        "  2c:	4607      	mov	r7, r0\n"
-        "  2e:	bc05      	pop	{r0, r2}\n"
-        "  30:	4287      	cmp	r7, r0\n"
-        "  32:	d001      	beq.n	0x38\n"
-        "  34:	4638      	mov	r0, r7\n"
-        "  36:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
-        "  38:	2784      	movs	r7, #132	; 0x84\n"
-        "  3a:	59d7      	ldr	r7, [r2, r7]\n"
-        "  3c:	b405      	push	{r0, r2}\n"
-        "  3e:	2102      	movs	r1, #2\n"
-        "  40:	47b8      	blx	r7\n"
-        "  42:	4607      	mov	r7, r0\n"
-        "  44:	bc05      	pop	{r0, r2}\n"
-        "  46:	2f00      	cmp	r7, #0\n"
-        "  48:	d105      	bne.n	0x56\n"
-        "  4a:	6fd7      	ldr	r7, [r2, #124]	; 0x7c\n"
-        "  4c:	222a      	movs	r2, #42	; 0x2a\n"
-        "  4e:	9e05      	ldr	r6, [sp, #20]\n"
-        "  50:	9705      	str	r7, [sp, #20]\n"
-        "  52:	46b6      	mov	lr, r6\n"
-        "  54:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}"
+        "   8:	4f03      	ldr	r7, [pc, #12]	; (0x18)\n"
+        "   a:	6f96      	ldr	r6, [r2, #120]	; 0x78\n"
+        "   c:	463a      	mov	r2, r7\n"
+        "   e:	232a      	movs	r3, #42	; 0x2a\n"
+        "  10:	9f05      	ldr	r7, [sp, #20]\n"
+        "  12:	9605      	str	r6, [sp, #20]\n"
+        "  14:	46be      	mov	lr, r7\n"
+        "  16:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        "  18:	1388      	asrs	r0, r1, #14\n"
+        "  1a:	0000      	movs	r0, r0\n"
+        "  1c:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "  1e:	6d57      	ldr	r7, [r2, #84]	; 0x54\n"
+        "  20:	b405      	push	{r0, r2}\n"
+        "  22:	9902      	ldr	r1, [sp, #8]\n"
+        "  24:	47b8      	blx	r7\n"
+        "  26:	4607      	mov	r7, r0\n"
+        "  28:	bc05      	pop	{r0, r2}\n"
+        "  2a:	4287      	cmp	r7, r0\n"
+        "  2c:	d001      	beq.n	0x32\n"
+        "  2e:	4638      	mov	r0, r7\n"
+        "  30:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        "  32:	2784      	movs	r7, #132	; 0x84\n"
+        "  34:	59d7      	ldr	r7, [r2, r7]\n"
+        "  36:	b405      	push	{r0, r2}\n"
+        "  38:	2102      	movs	r1, #2\n"
+        "  3a:	47b8      	blx	r7\n"
+        "  3c:	4607      	mov	r7, r0\n"
+        "  3e:	bc05      	pop	{r0, r2}\n"
+        "  40:	2f00      	cmp	r7, #0\n"
+        "  42:	d105      	bne.n	0x50\n"
+        "  44:	6fd7      	ldr	r7, [r2, #124]	; 0x7c\n"
+        "  46:	222a      	movs	r2, #42	; 0x2a\n"
+        "  48:	9e05      	ldr	r6, [sp, #20]\n"
+        "  4a:	9705      	str	r7, [sp, #20]\n"
+        "  4c:	46b6      	mov	lr, r6\n"
+        "  4e:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}"
     >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
@@ -1928,102 +2065,106 @@ wait_timeout_test() ->
 wait_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
 
-    State1 = ?BACKEND:jump_table(State0, 5),
+    State1 = ?BACKEND:jump_table(State0, 2),
     State2 = ?BACKEND:add_label(State1, 1),
     Label = 2,
     State3 = ?BACKEND:set_continuation_to_label(State2, Label),
     State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]),
+    State5 = ?BACKEND:add_label(State4, 2),
+    State6 = ?BACKEND:add_label(State5, 0),
+    State7 = ?BACKEND:update_branches(State6),
 
-    Stream = ?BACKEND:stream(State4),
+    Stream = ?BACKEND:stream(State7),
     Dump = <<
         "   0:	4b01      	ldr	r3, [pc, #4]	; (0x8)\n"
         "   2:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
         "   4:	449f      	add	pc, r3\n"
         "   6:	46c0      	nop			; (mov r8, r8)\n"
-        "   8:	0000      	movs	r0, r0\n"
+        "   8:	0034      	movs	r4, r6\n"
         "   a:	0000      	movs	r0, r0\n"
         "   c:	4b01      	ldr	r3, [pc, #4]	; (0x14)\n"
         "   e:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
         "  10:	449f      	add	pc, r3\n"
         "  12:	46c0      	nop			; (mov r8, r8)\n"
-        "  14:	0000      	movs	r0, r0\n"
+        "  14:	0010      	movs	r0, r2\n"
         "  16:	0000      	movs	r0, r0\n"
         "  18:	4b01      	ldr	r3, [pc, #4]	; (0x20)\n"
         "  1a:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
         "  1c:	449f      	add	pc, r3\n"
         "  1e:	46c0      	nop			; (mov r8, r8)\n"
-        "  20:	0000      	movs	r0, r0\n"
+        "  20:	001c      	movs	r4, r3\n"
         "  22:	0000      	movs	r0, r0\n"
-        "  24:	4b01      	ldr	r3, [pc, #4]	; (0x2c)\n"
-        "  26:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
-        "  28:	449f      	add	pc, r3\n"
-        "  2a:	46c0      	nop			; (mov r8, r8)\n"
-        "  2c:	0000      	movs	r0, r0\n"
-        "  2e:	0000      	movs	r0, r0\n"
-        "  30:	4b01      	ldr	r3, [pc, #4]	; (0x38)\n"
-        "  32:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
-        "  34:	449f      	add	pc, r3\n"
-        "  36:	46c0      	nop			; (mov r8, r8)\n"
-        "  38:	0000      	movs	r0, r0\n"
-        "  3a:	0000      	movs	r0, r0\n"
-        "  3c:	4b01      	ldr	r3, [pc, #4]	; (0x44)\n"
-        "  3e:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
-        "  40:	449f      	add	pc, r3\n"
-        "  42:	46c0      	nop			; (mov r8, r8)\n"
-        "  44:	0000      	movs	r0, r0\n"
-        "  46:	0000      	movs	r0, r0\n"
-        "  48:	a700      	add	r7, pc, #0	; (adr r7, 0x4c)\n"
-        "  4a:	2633      	movs	r6, #51	; 0x33\n"
-        "  4c:	4276      	negs	r6, r6\n"
-        "  4e:	19f6      	adds	r6, r6, r7\n"
-        "  50:	9f00      	ldr	r7, [sp, #0]\n"
-        "  52:	607e      	str	r6, [r7, #4]\n"
-        "  54:	6f57      	ldr	r7, [r2, #116]	; 0x74\n"
-        "  56:	9e05      	ldr	r6, [sp, #20]\n"
-        "  58:	9705      	str	r7, [sp, #20]\n"
-        "  5a:	46b6      	mov	lr, r6\n"
-        "  5c:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}"
+        "  24:	a700      	add	r7, pc, #0	; (adr r7, 0x28)\n"
+        "  26:	260f      	movs	r6, #15\n"
+        "  28:	4276      	negs	r6, r6\n"
+        "  2a:	19f6      	adds	r6, r6, r7\n"
+        "  2c:	9f00      	ldr	r7, [sp, #0]\n"
+        "  2e:	607e      	str	r6, [r7, #4]\n"
+        "  30:	6f57      	ldr	r7, [r2, #116]	; 0x74\n"
+        "  32:	9e05      	ldr	r6, [sp, #20]\n"
+        "  34:	9705      	str	r7, [sp, #20]\n"
+        "  36:	46b6      	mov	lr, r6\n"
+        "  38:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        "  3a:	46c0      	nop			; (mov r8, r8)"
     >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
 %% Test return_labels_and_lines/2 function
 return_labels_and_lines_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 2),
 
     % Test return_labels_and_lines with some sample labels and lines
-    State1 = ?BACKEND:add_label(State0, 2, 32),
-    State2 = ?BACKEND:add_label(State1, 1, 16),
+    State2 = ?BACKEND:add_label(State1, 2, 32),
+    State3 = ?BACKEND:add_label(State2, 1, 16),
 
     % {Line, Offset} pairs
     SortedLines = [{10, 16}, {20, 32}],
 
-    State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines),
-    Stream = ?BACKEND:stream(State3),
+    State4 = ?BACKEND:return_labels_and_lines(State3, SortedLines),
+    Stream = ?BACKEND:stream(State4),
 
     % Should have generated adr + pop {r1,r4,r5,r6,r7,pc} + labels table + lines table
     % adr = 4 bytes, pop = 2 bytes, labels table = 6*2 = 12 bytes, lines table = 6*2 = 12 bytes
     % Total minimum: 30 bytes
     ?assert(byte_size(Stream) >= 30),
 
-    % Expected: adr r0, <offset> + pop {r1,r4,r5,r6,r7,pc} + labels table + lines table
-    % The data tables start at offset 4, so adr should be adr r0, 4 not adr r0, 8
+    % Expected: jump table (3 entries) + adr r0, <offset> + pop {r1,r4,r5,r6,r7,pc} + labels table + lines table
     Dump = <<
-        "   0:	a000      	add	r0, pc, #0	; (adr r0, 0x4)\n"
-        "   2:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
-        "   4:	0200      	lsls	r0, r0, #8\n"
-        "   6:	0100      	lsls	r0, r0, #4\n"
-        "   8:	0000      	movs	r0, r0\n"
-        "   a:	1000      	asrs	r0, r0, #32\n"
-        "   c:	0200      	lsls	r0, r0, #8\n"
-        "   e:	0000      	movs	r0, r0\n"
-        "  10:	2000      	movs	r0, #0\n"
-        "  12:	0200      	lsls	r0, r0, #8\n"
-        "  14:	0a00      	lsrs	r0, r0, #8\n"
-        "  16:	0000      	movs	r0, r0\n"
-        "  18:	1000      	asrs	r0, r0, #32\n"
-        "  1a:	1400      	asrs	r0, r0, #16\n"
-        "  1c:	0000      	movs	r0, r0\n"
-        "  1e:	2000      	movs	r0, #0"
+        "   0:	4b01      	ldr	r3, [pc, #4]\n"
+        "   2:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "   4:	449f      	add	pc, r3\n"
+        "   6:	46c0      	nop\n"
+        "   8:	ffff      	.short	0xffff\n"
+        "   a:	ffff      	.short	0xffff\n"
+        "   c:	4b01      	ldr	r3, [pc, #4]\n"
+        "   e:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "  10:	449f      	add	pc, r3\n"
+        "  12:	46c0      	nop\n"
+        "  14:	fffc      	.short	0xfffc\n"
+        "  16:	ffff      	.short	0xffff\n"
+        "  18:	4b01      	ldr	r3, [pc, #4]\n"
+        "  1a:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "  1c:	449f      	add	pc, r3\n"
+        "  1e:	46c0      	nop\n"
+        "  20:	0000      	movs	r0, r0\n"
+        "  22:	0000      	movs	r0, r0\n"
+        "  24:	a000      	add	r0, pc, #0\n"
+        "  26:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        "  28:	0200      	lsls	r0, r0, #8\n"
+        "  2a:	0100      	lsls	r0, r0, #4\n"
+        "  2c:	0000      	movs	r0, r0\n"
+        "  2e:	1000      	asrs	r0, r0, #32\n"
+        "  30:	0200      	lsls	r0, r0, #8\n"
+        "  32:	0000      	movs	r0, r0\n"
+        "  34:	2000      	movs	r0, #0\n"
+        "  36:	0200      	lsls	r0, r0, #8\n"
+        "  38:	0a00      	lsrs	r0, r0, #8\n"
+        "  3a:	0000      	movs	r0, r0\n"
+        "  3c:	1000      	asrs	r0, r0, #32\n"
+        "  3e:	1400      	asrs	r0, r0, #16\n"
+        "  40:	0000      	movs	r0, r0\n"
+        "  42:	2000      	movs	r0, #0"
     >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
@@ -2035,36 +2176,55 @@ return_labels_and_lines_unaligned_test() ->
     TempState = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
     TempStream = jit_stream_binary:append(?BACKEND:stream(TempState), PaddingInstruction),
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, TempStream),
+    State1 = ?BACKEND:jump_table(State0, 2),
 
     % Test return_labels_and_lines with some sample labels and lines
-    State1 = ?BACKEND:add_label(State0, 2, 32),
-    State2 = ?BACKEND:add_label(State1, 1, 16),
+    State2 = ?BACKEND:add_label(State1, 2, 32),
+    State3 = ?BACKEND:add_label(State2, 1, 16),
 
     % {Line, Offset} pairs
     SortedLines = [{10, 16}, {20, 32}],
 
-    State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines),
-    Stream = ?BACKEND:stream(State3),
+    State4 = ?BACKEND:return_labels_and_lines(State3, SortedLines),
+    Stream = ?BACKEND:stream(State4),
 
     Dump = <<
         "   0:	4770      	bx	lr\n"
-        "2:	a001      	add	r0, pc, #4	; (adr r0, 0x8)\n"
-        "4:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
-        "6:	0000      	movs	r0, r0\n"
-        "8:	0200      	lsls	r0, r0, #8\n"
-        "a:	0100      	lsls	r0, r0, #4\n"
-        "c:	0000      	movs	r0, r0\n"
-        "e:	1000      	asrs	r0, r0, #32\n"
-        "10:	0200      	lsls	r0, r0, #8\n"
-        "12:	0000      	movs	r0, r0\n"
-        "14:	2000      	movs	r0, #0\n"
-        "16:	0200      	lsls	r0, r0, #8\n"
-        "18:	0a00      	lsrs	r0, r0, #8\n"
-        "1a:	0000      	movs	r0, r0\n"
-        "1c:	1000      	asrs	r0, r0, #32\n"
-        "1e:	1400      	asrs	r0, r0, #16\n"
-        "20:	0000      	movs	r0, r0\n"
-        "22:	2000      	movs	r0, #0"
+        "   2:	4b01      	ldr	r3, [pc, #4]\n"
+        "   4:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "   6:	449f      	add	pc, r3\n"
+        "   8:	46c0      	nop\n"
+        "   a:	ffff      	.short	0xffff\n"
+        "   c:	ffff      	.short	0xffff\n"
+        "   e:	4b01      	ldr	r3, [pc, #4]\n"
+        "  10:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "  12:	449f      	add	pc, r3\n"
+        "  14:	46c0      	nop\n"
+        "  16:	fffa      	.short	0xfffa\n"
+        "  18:	ffff      	.short	0xffff\n"
+        "  1a:	4b01      	ldr	r3, [pc, #4]\n"
+        "  1c:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "  1e:	449f      	add	pc, r3\n"
+        "  20:	46c0      	nop\n"
+        "  22:	fffe      	.short	0xfffe\n"
+        "  24:	ffff      	.short	0xffff\n"
+        "  26:	a001      	add	r0, pc, #4\n"
+        "  28:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        "  2a:	0000      	movs	r0, r0\n"
+        "  2c:	0200      	lsls	r0, r0, #8\n"
+        "  2e:	0100      	lsls	r0, r0, #4\n"
+        "  30:	0000      	movs	r0, r0\n"
+        "  32:	1000      	asrs	r0, r0, #32\n"
+        "  34:	0200      	lsls	r0, r0, #8\n"
+        "  36:	0000      	movs	r0, r0\n"
+        "  38:	2000      	movs	r0, #0\n"
+        "  3a:	0200      	lsls	r0, r0, #8\n"
+        "  3c:	0a00      	lsrs	r0, r0, #8\n"
+        "  3e:	0000      	movs	r0, r0\n"
+        "  40:	1000      	asrs	r0, r0, #32\n"
+        "  42:	1400      	asrs	r0, r0, #16\n"
+        "  44:	0000      	movs	r0, r0\n"
+        "  46:	2000      	movs	r0, #0"
     >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
@@ -2187,7 +2347,7 @@ call_fun_test() ->
             ])
         end
     ),
-    State5 = ?BACKEND:and_(State4, RegCopy, ?TERM_PRIMARY_CLEAR_MASK),
+    {State5, RegCopy} = ?BACKEND:and_(State4, {free, RegCopy}, ?TERM_PRIMARY_CLEAR_MASK),
     State6 = ?BACKEND:move_array_element(State5, RegCopy, 0, RegCopy),
     State7 = ?BACKEND:if_block(
         State6, {RegCopy, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) ->
@@ -2224,55 +2384,55 @@ call_fun_test() ->
         "  24:	2403      	movs	r4, #3\n"
         "  26:	4025      	ands	r5, r4\n"
         "  28:	2d02      	cmp	r5, #2\n"
-        "  2a:	d00c      	beq.n	0x46\n"
+        "  2a:	d00b      	beq.n	0x44\n"
         "  2c:	6cd7      	ldr	r7, [r2, #76]	; 0x4c\n"
         "  2e:	b082      	sub	sp, #8\n"
         "  30:	9600      	str	r6, [sp, #0]\n"
         "  32:	9902      	ldr	r1, [sp, #8]\n"
         "  34:	222e      	movs	r2, #46	; 0x2e\n"
-        "  36:	4b01      	ldr	r3, [pc, #4]	; (0x3c)\n"
-        "  38:	e002      	b.n	0x40\n"
-        "  3a:	0000      	movs	r0, r0\n"
-        "  3c:	018b      	lsls	r3, r1, #6\n"
+        "  36:	4b02      	ldr	r3, [pc, #8]	; (0x40)\n"
+        "  38:	47b8      	blx	r7\n"
+        "  3a:	b002      	add	sp, #8\n"
+        "  3c:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
         "  3e:	0000      	movs	r0, r0\n"
-        "  40:	47b8      	blx	r7\n"
-        "  42:	b002      	add	sp, #8\n"
-        "  44:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
-        "  46:	2503      	movs	r5, #3\n"
-        "  48:	43ae      	bics	r6, r5\n"
-        "  4a:	6836      	ldr	r6, [r6, #0]\n"
-        "  4c:	4635      	mov	r5, r6\n"
-        "  4e:	243f      	movs	r4, #63	; 0x3f\n"
-        "  50:	4025      	ands	r5, r4\n"
-        "  52:	2d14      	cmp	r5, #20\n"
-        "  54:	d00b      	beq.n	0x6e\n"
-        "  56:	6cd7      	ldr	r7, [r2, #76]	; 0x4c\n"
-        "  58:	b082      	sub	sp, #8\n"
-        "  5a:	9600      	str	r6, [sp, #0]\n"
-        "  5c:	9902      	ldr	r1, [sp, #8]\n"
-        "  5e:	2258      	movs	r2, #88	; 0x58\n"
-        "  60:	4b00      	ldr	r3, [pc, #0]	; (0x64)\n"
-        "  62:	e001      	b.n	0x68\n"
-        "  64:	018b      	lsls	r3, r1, #6\n"
+        "  40:	018b      	lsls	r3, r1, #6\n"
+        "  42:	0000      	movs	r0, r0\n"
+        "  44:	2503      	movs	r5, #3\n"
+        "  46:	43ae      	bics	r6, r5\n"
+        "  48:	6836      	ldr	r6, [r6, #0]\n"
+        "  4a:	4635      	mov	r5, r6\n"
+        "  4c:	243f      	movs	r4, #63	; 0x3f\n"
+        "  4e:	4025      	ands	r5, r4\n"
+        "  50:	2d14      	cmp	r5, #20\n"
+        "  52:	d00b      	beq.n	0x6c\n"
+        "  54:	6cd7      	ldr	r7, [r2, #76]	; 0x4c\n"
+        "  56:	b082      	sub	sp, #8\n"
+        "  58:	9600      	str	r6, [sp, #0]\n"
+        "  5a:	9902      	ldr	r1, [sp, #8]\n"
+        "  5c:	2256      	movs	r2, #86	; 0x56\n"
+        "  5e:	4b02      	ldr	r3, [pc, #8]	; (0x68)\n"
+        "  60:	47b8      	blx	r7\n"
+        "  62:	b002      	add	sp, #8\n"
+        "  64:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
         "  66:	0000      	movs	r0, r0\n"
-        "  68:	47b8      	blx	r7\n"
-        "  6a:	b002      	add	sp, #8\n"
-        "  6c:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
-        "  6e:	9d00      	ldr	r5, [sp, #0]\n"
-        "  70:	682e      	ldr	r6, [r5, #0]\n"
-        "  72:	6836      	ldr	r6, [r6, #0]\n"
-        "  74:	0636      	lsls	r6, r6, #24\n"
-        "  76:	4d05      	ldr	r5, [pc, #20]	; (0x8c)\n"
-        "  78:	432e      	orrs	r6, r5\n"
-        "  7a:	65c6      	str	r6, [r0, #92]	; 0x5c\n"
-        "  7c:	2680      	movs	r6, #128	; 0x80\n"
-        "  7e:	5996      	ldr	r6, [r2, r6]\n"
-        "  80:	463a      	mov	r2, r7\n"
-        "  82:	2300      	movs	r3, #0\n"
-        "  84:	9f05      	ldr	r7, [sp, #20]\n"
-        "  86:	9605      	str	r6, [sp, #20]\n"
-        "  88:	46be      	mov	lr, r7\n"
-        "  8a:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        "  68:	018b      	lsls	r3, r1, #6\n"
+        "  6a:	0000      	movs	r0, r0\n"
+        "  6c:	9d00      	ldr	r5, [sp, #0]\n"
+        "  6e:	682e      	ldr	r6, [r5, #0]\n"
+        "  70:	6836      	ldr	r6, [r6, #0]\n"
+        "  72:	0636      	lsls	r6, r6, #24\n"
+        "  74:	4d05      	ldr	r5, [pc, #20]	; (0x8c)\n"
+        "  76:	432e      	orrs	r6, r5\n"
+        "  78:	65c6      	str	r6, [r0, #92]	; 0x5c\n"
+        "  7a:	2680      	movs	r6, #128	; 0x80\n"
+        "  7c:	5996      	ldr	r6, [r2, r6]\n"
+        "  7e:	463a      	mov	r2, r7\n"
+        "  80:	2300      	movs	r3, #0\n"
+        "  82:	9f05      	ldr	r7, [sp, #20]\n"
+        "  84:	9605      	str	r6, [sp, #20]\n"
+        "  86:	46be      	mov	lr, r7\n"
+        "  88:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        "  8a:	0000      	movs	r0, r0\n"
         "  8c:	0240      	lsls	r0, r0, #9\n"
         "  8e:	0000      	movs	r0, r0\n"
         "  90:	b5f2      	push	{r1, r4, r5, r6, r7, lr}"
@@ -2281,7 +2441,8 @@ call_fun_test() ->
 
 move_to_vm_register_test0(State, Source, Dest, Dump) ->
     State1 = ?BACKEND:move_to_vm_register(State, Source, Dest),
-    Stream = ?BACKEND:stream(State1),
+    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+    Stream = ?BACKEND:stream(State2),
     ?assertEqual(dump_to_bin(Dump), Stream).
 
 move_to_vm_register_test_() ->
@@ -2294,88 +2455,101 @@ move_to_vm_register_test_() ->
                 ?_test(begin
                     move_to_vm_register_test0(State0, 0, {x_reg, 0}, <<
                         "   0:	2700      	movs	r7, #0\n"
-                        "   2:	6187      	str	r7, [r0, #24]"
+                        "   2:	6187      	str	r7, [r0, #24]\n"
+                        "   4:	e07c      	b.n	0x100"
                     >>)
                 end),
                 ?_test(begin
                     move_to_vm_register_test0(State0, 0, {x_reg, extra}, <<
                         "   0:	2700      	movs	r7, #0\n"
-                        "   2:	6587      	str	r7, [r0, #88]	; 0x58"
+                        "   2:	6587      	str	r7, [r0, #88]	; 0x58\n"
+                        "   4:	e07c      	b.n	0x100"
                     >>)
                 end),
                 ?_test(begin
                     move_to_vm_register_test0(State0, 0, {ptr, r6}, <<
                         "   0:	2700      	movs	r7, #0\n"
-                        "   2:	6037      	str	r7, [r6, #0]"
+                        "   2:	6037      	str	r7, [r6, #0]\n"
+                        "   4:	e07c      	b.n	0x100"
                     >>)
                 end),
                 ?_test(begin
                     move_to_vm_register_test0(State0, 0, {y_reg, 2}, <<
                         "   0:	2600      	movs	r6, #0\n"
                         "   2:	6947      	ldr	r7, [r0, #20]\n"
-                        "   4:	60be      	str	r6, [r7, #8]"
+                        "   4:	60be      	str	r6, [r7, #8]\n"
+                        "   6:	e07b      	b.n	0x100"
                     >>)
                 end),
                 ?_test(begin
                     move_to_vm_register_test0(State0, 0, {y_reg, 20}, <<
                         "   0:	2600      	movs	r6, #0\n"
                         "   2:	6947      	ldr	r7, [r0, #20]\n"
-                        "   4:	653e      	str	r6, [r7, #80]	; 0x50"
+                        "   4:	653e      	str	r6, [r7, #80]	; 0x50\n"
+                        "   6:	e07b      	b.n	0x100"
                     >>)
                 end),
                 %% Test: Immediate to x_reg
                 ?_test(begin
                     move_to_vm_register_test0(State0, 42, {x_reg, 0}, <<
                         "   0:	272a      	movs	r7, #42	; 0x2a\n"
-                        "   2:	6187      	str	r7, [r0, #24]"
+                        "   2:	6187      	str	r7, [r0, #24]\n"
+                        "   4:	e07c      	b.n	0x100"
                     >>)
                 end),
                 ?_test(begin
                     move_to_vm_register_test0(State0, 42, {x_reg, extra}, <<
                         "   0:	272a      	movs	r7, #42	; 0x2a\n"
-                        "   2:	6587      	str	r7, [r0, #88]	; 0x58"
+                        "   2:	6587      	str	r7, [r0, #88]	; 0x58\n"
+                        "   4:	e07c      	b.n	0x100"
                     >>)
                 end),
                 ?_test(begin
                     move_to_vm_register_test0(State0, 42, {y_reg, 2}, <<
                         "   0:	262a      	movs	r6, #42	; 0x2a\n"
                         "   2:	6947      	ldr	r7, [r0, #20]\n"
-                        "   4:	60be      	str	r6, [r7, #8]"
+                        "   4:	60be      	str	r6, [r7, #8]\n"
+                        "   6:	e07b      	b.n	0x100"
                     >>)
                 end),
                 ?_test(begin
                     move_to_vm_register_test0(State0, 42, {y_reg, 20}, <<
                         "   0:	262a      	movs	r6, #42	; 0x2a\n"
                         "   2:	6947      	ldr	r7, [r0, #20]\n"
-                        "   4:	653e      	str	r6, [r7, #80]	; 0x50"
+                        "   4:	653e      	str	r6, [r7, #80]	; 0x50\n"
+                        "   6:	e07b      	b.n	0x100"
                     >>)
                 end),
                 %% Test: Immediate to ptr
                 ?_test(begin
                     move_to_vm_register_test0(State0, 99, {ptr, r3}, <<
                         "   0:	2763      	movs	r7, #99	; 0x63\n"
-                        "   2:	601f      	str	r7, [r3, #0]"
+                        "   2:	601f      	str	r7, [r3, #0]\n"
+                        "   4:	e07c      	b.n	0x100"
                     >>)
                 end),
                 %% Test: x_reg to x_reg
                 ?_test(begin
                     move_to_vm_register_test0(State0, {x_reg, 1}, {x_reg, 2}, <<
                         "   0:	69c7      	ldr	r7, [r0, #28]\n"
-                        "   2:	6207      	str	r7, [r0, #32]"
+                        "   2:	6207      	str	r7, [r0, #32]\n"
+                        "   4:	e07c      	b.n	0x100"
                     >>)
                 end),
                 %% Test: x_reg to ptr
                 ?_test(begin
                     move_to_vm_register_test0(State0, {x_reg, 1}, {ptr, r1}, <<
                         "   0:	69c7      	ldr	r7, [r0, #28]\n"
-                        "   2:	600f      	str	r7, [r1, #0]"
+                        "   2:	600f      	str	r7, [r1, #0]\n"
+                        "   4:	e07c      	b.n	0x100"
                     >>)
                 end),
                 %% Test: ptr to x_reg
                 ?_test(begin
                     move_to_vm_register_test0(State0, {ptr, r4}, {x_reg, 3}, <<
                         "   0:	6827      	ldr	r7, [r4, #0]\n"
-                        "   2:	6247      	str	r7, [r0, #36]	; 0x24"
+                        "   2:	6247      	str	r7, [r0, #36]	; 0x24\n"
+                        "   4:	e07c      	b.n	0x100"
                     >>)
                 end),
                 %% Test: x_reg to y_reg
@@ -2383,7 +2557,8 @@ move_to_vm_register_test_() ->
                     move_to_vm_register_test0(State0, {x_reg, 0}, {y_reg, 1}, <<
                         "   0:	6987      	ldr	r7, [r0, #24]\n"
                         "   2:	6946      	ldr	r6, [r0, #20]\n"
-                        "   4:	6077      	str	r7, [r6, #4]"
+                        "   4:	6077      	str	r7, [r6, #4]\n"
+                        "   6:	e07b      	b.n	0x100"
                     >>)
                 end),
                 %% Test: y_reg to x_reg
@@ -2391,7 +2566,8 @@ move_to_vm_register_test_() ->
                     move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, <<
                         "   0:	6946      	ldr	r6, [r0, #20]\n"
                         "   2:	6837      	ldr	r7, [r6, #0]\n"
-                        "   4:	6247      	str	r7, [r0, #36]	; 0x24"
+                        "   4:	6247      	str	r7, [r0, #36]	; 0x24\n"
+                        "   6:	e07b      	b.n	0x100"
                     >>)
                 end),
                 %% Test: y_reg to y_reg
@@ -2399,41 +2575,47 @@ move_to_vm_register_test_() ->
                     move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, <<
                         "   0:	6946      	ldr	r6, [r0, #20]\n"
                         "   2:	6877      	ldr	r7, [r6, #4]\n"
-                        "   4:	6247      	str	r7, [r0, #36]	; 0x24"
+                        "   4:	6247      	str	r7, [r0, #36]	; 0x24\n"
+                        "   6:	e07b      	b.n	0x100"
                     >>)
                 end),
                 %% Test: Native register to x_reg
                 ?_test(begin
                     move_to_vm_register_test0(State0, r5, {x_reg, 0}, <<
-                        "   0:	6185      	str	r5, [r0, #24]"
+                        "   0:	6185      	str	r5, [r0, #24]\n"
+                        "   2:	e07d      	b.n	0x100"
                     >>)
                 end),
                 ?_test(begin
                     move_to_vm_register_test0(State0, r6, {x_reg, extra}, <<
-                        "   0:	6586      	str	r6, [r0, #88]	; 0x58"
+                        "   0:	6586      	str	r6, [r0, #88]	; 0x58\n"
+                        "   2:	e07d      	b.n	0x100"
                     >>)
                 end),
                 %% Test: Native register to ptr
                 ?_test(begin
                     move_to_vm_register_test0(State0, r4, {ptr, r3}, <<
-                        "   0:	601c      	str	r4, [r3, #0]"
+                        "   0:	601c      	str	r4, [r3, #0]\n"
+                        "   2:	e07d      	b.n	0x100"
                     >>)
                 end),
                 %% Test: Native register to y_reg
                 ?_test(begin
                     move_to_vm_register_test0(State0, r1, {y_reg, 0}, <<
                         "   0:	6947      	ldr	r7, [r0, #20]\n"
-                        "   2:	6039      	str	r1, [r7, #0]"
+                        "   2:	6039      	str	r1, [r7, #0]\n"
+                        "   4:	e07c      	b.n	0x100"
                     >>)
                 end),
                 %% Test: Large immediate to x_reg (32-bit literal pool, aligned case)
                 ?_test(begin
                     move_to_vm_register_test0(State0, 16#12345678, {x_reg, 0}, <<
-                        "   0:	4f00      	ldr	r7, [pc, #0]	; (0x4)\n"
-                        "   2:	e001      	b.n	0x8\n"
-                        "   4:	5678      	ldrsb	r0, [r7, r1]\n"
-                        "   6:	1234      	asrs	r4, r6, #8\n"
-                        "   8:	6187      	str	r7, [r0, #24]"
+                        "   0:	4f01      	ldr	r7, [pc, #4]	; (0x8)\n"
+                        "   2:	6187      	str	r7, [r0, #24]\n"
+                        "   4:	e07c      	b.n	0x100\n"
+                        "   6:	0000      	movs	r0, r0\n"
+                        "   8:	5678      	ldrsb	r0, [r7, r1]\n"
+                        "   a:	1234      	asrs	r4, r6, #8"
                     >>)
                 end),
                 %% Test: Large immediate to x_reg (32-bit literal pool, unaligned case)
@@ -2442,55 +2624,57 @@ move_to_vm_register_test_() ->
                     State1 = ?BACKEND:move_to_vm_register(State0, r1, {ptr, r3}),
                     %% Then do large immediate which should handle unaligned case
                     State2 = ?BACKEND:move_to_vm_register(State1, 16#12345678, {x_reg, 0}),
-                    Stream = ?BACKEND:stream(State2),
+                    State3 = ?BACKEND:jump_to_offset(State2, 16#100),
+                    Stream = ?BACKEND:stream(State3),
                     Expected = dump_to_bin(<<
                         "   0:	6019      	str	r1, [r3, #0]\n"
                         "   2:	4f01      	ldr	r7, [pc, #4]	; (0x8)\n"
-                        "   4:	e002      	b.n	0xc\n"
-                        "   6:	0000      	movs	r0, r0\n"
+                        "   4:	6187      	str	r7, [r0, #24]\n"
+                        "   6:	e07b      	b.n	0x100\n"
                         "   8:	5678      	ldrsb	r0, [r7, r1]\n"
-                        "   a:	1234      	asrs	r4, r6, #8\n"
-                        "   c:	6187      	str	r7, [r0, #24]"
+                        "   a:	1234      	asrs	r4, r6, #8"
                     >>),
                     ?assertEqual(Expected, Stream)
                 end),
                 ?_test(begin
                     move_to_vm_register_test0(State0, 16#12345678, {x_reg, extra}, <<
-                        "   0:	4f00      	ldr	r7, [pc, #0]	; (0x4)\n"
-                        "   2:	e001      	b.n	0x8\n"
-                        "   4:	5678      	ldrsb	r0, [r7, r1]\n"
-                        "   6:	1234      	asrs	r4, r6, #8\n"
-                        "   8:	6587      	str	r7, [r0, #88]	; 0x58"
+                        "   0:	4f01      	ldr	r7, [pc, #4]	; (0x8)\n"
+                        "   2:	6587      	str	r7, [r0, #88]	; 0x58\n"
+                        "   4:	e07c      	b.n	0x100\n"
+                        "   6:	0000      	movs	r0, r0\n"
+                        "   8:	5678      	ldrsb	r0, [r7, r1]\n"
+                        "   a:	1234      	asrs	r4, r6, #8"
                     >>)
                 end),
                 ?_test(begin
                     move_to_vm_register_test0(State0, 16#12345678, {y_reg, 2}, <<
-                        "   0:	4f00      	ldr	r7, [pc, #0]	; (0x4)\n"
-                        "   2:	e001      	b.n	0x8\n"
-                        "   4:	5678      	ldrsb	r0, [r7, r1]\n"
-                        "   6:	1234      	asrs	r4, r6, #8\n"
-                        "   8:	6946      	ldr	r6, [r0, #20]\n"
-                        "   a:	60b7      	str	r7, [r6, #8]"
+                        "   0:	4f01      	ldr	r7, [pc, #4]	; (0x8)\n"
+                        "   2:	6946      	ldr	r6, [r0, #20]\n"
+                        "   4:	60b7      	str	r7, [r6, #8]\n"
+                        "   6:	e07b      	b.n	0x100\n"
+                        "   8:	5678      	ldrsb	r0, [r7, r1]\n"
+                        "   a:	1234      	asrs	r4, r6, #8"
                     >>)
                 end),
                 ?_test(begin
                     move_to_vm_register_test0(State0, 16#12345678, {y_reg, 20}, <<
-                        "   0:	4f00      	ldr	r7, [pc, #0]	; (0x4)\n"
-                        "   2:	e001      	b.n	0x8\n"
-                        "   4:	5678      	ldrsb	r0, [r7, r1]\n"
-                        "   6:	1234      	asrs	r4, r6, #8\n"
-                        "   8:	6946      	ldr	r6, [r0, #20]\n"
-                        "   a:	6537      	str	r7, [r6, #80]	; 0x50"
+                        "   0:	4f01      	ldr	r7, [pc, #4]	; (0x8)\n"
+                        "   2:	6946      	ldr	r6, [r0, #20]\n"
+                        "   4:	6537      	str	r7, [r6, #80]	; 0x50\n"
+                        "   6:	e07b      	b.n	0x100\n"
+                        "   8:	5678      	ldrsb	r0, [r7, r1]\n"
+                        "   a:	1234      	asrs	r4, r6, #8"
                     >>)
                 end),
                 %% Test: Large immediate to ptr
                 ?_test(begin
                     move_to_vm_register_test0(State0, 16#12345678, {ptr, r3}, <<
-                        "   0:	4f00      	ldr	r7, [pc, #0]	; (0x4)\n"
-                        "   2:	e001      	b.n	0x8\n"
-                        "   4:	5678      	ldrsb	r0, [r7, r1]\n"
-                        "   6:	1234      	asrs	r4, r6, #8\n"
-                        "   8:	601f      	str	r7, [r3, #0]"
+                        "   0:	4f01      	ldr	r7, [pc, #4]	; (0x8)\n"
+                        "   2:	601f      	str	r7, [r3, #0]\n"
+                        "   4:	e07c      	b.n	0x100\n"
+                        "   6:	0000      	movs	r0, r0\n"
+                        "   8:	5678      	ldrsb	r0, [r7, r1]\n"
+                        "   a:	1234      	asrs	r4, r6, #8"
                     >>)
                 end),
                 %% Test: x_reg to y_reg (high index)
@@ -2498,7 +2682,8 @@ move_to_vm_register_test_() ->
                     move_to_vm_register_test0(State0, {x_reg, 15}, {y_reg, 31}, <<
                         "   0:	6d47      	ldr	r7, [r0, #84]	; 0x54\n"
                         "   2:	6946      	ldr	r6, [r0, #20]\n"
-                        "   4:	67f7      	str	r7, [r6, #124]	; 0x7c"
+                        "   4:	67f7      	str	r7, [r6, #124]	; 0x7c\n"
+                        "   6:	e07b      	b.n	0x100"
                     >>)
                 end),
                 %% Test: y_reg to x_reg (high index)
@@ -2506,7 +2691,8 @@ move_to_vm_register_test_() ->
                     move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, <<
                         "   0:	6946      	ldr	r6, [r0, #20]\n"
                         "   2:	6ff7      	ldr	r7, [r6, #124]	; 0x7c\n"
-                        "   4:	6547      	str	r7, [r0, #84]	; 0x54"
+                        "   4:	6547      	str	r7, [r0, #84]	; 0x54\n"
+                        "   6:	e07b      	b.n	0x100"
                     >>)
                 end),
                 %% Test: Large y_reg index (32) that exceeds str immediate offset limit
@@ -2516,7 +2702,8 @@ move_to_vm_register_test_() ->
                         "   2:	6947      	ldr	r7, [r0, #20]\n"
                         "   4:	2580      	movs	r5, #128	; 0x80\n"
                         "   6:	443d      	add	r5, r7\n"
-                        "   8:	602e      	str	r6, [r5, #0]"
+                        "   8:	602e      	str	r6, [r5, #0]\n"
+                        "   a:	e079      	b.n	0x100"
                     >>)
                 end),
                 %% Test: Negative immediate to x_reg
@@ -2524,7 +2711,8 @@ move_to_vm_register_test_() ->
                     move_to_vm_register_test0(State0, -1, {x_reg, 0}, <<
                         "   0:	2701      	movs	r7, #1\n"
                         "   2:	427f      	negs	r7, r7\n"
-                        "   4:	6187      	str	r7, [r0, #24]"
+                        "   4:	6187      	str	r7, [r0, #24]\n"
+                        "   6:	e07b      	b.n	0x100"
                     >>)
                 end)
             ]
@@ -2624,6 +2812,34 @@ move_array_element_test_() ->
                         "   2:	68be      	ldr	r6, [r7, #8]\n"
                         "   4:	62c6      	str	r6, [r0, #44]	; 0x2c"
                     >>)
+                end),
+                %% move_array_element: reg[32] to x_reg (large offset, index 32, offset 128)
+                ?_test(begin
+                    move_array_element_test0(State0, r3, 32, {x_reg, 0}, <<
+                        "   0:	2704      	movs	r7, #4\n"
+                        "   2:	441f      	add	r7, r3\n"
+                        "   4:	6ffe      	ldr	r6, [r7, #124]	; 0x7c\n"
+                        "   6:	6186      	str	r6, [r0, #24]"
+                    >>)
+                end),
+                %% move_array_element: reg[32] to ptr (large offset)
+                ?_test(begin
+                    move_array_element_test0(State0, r3, 32, {ptr, r5}, <<
+                        "   0:	2704      	movs	r7, #4\n"
+                        "   2:	441f      	add	r7, r3\n"
+                        "   4:	6fff      	ldr	r7, [r7, #124]	; 0x7c\n"
+                        "   6:	602f      	str	r7, [r5, #0]"
+                    >>)
+                end),
+                %% move_array_element: reg[32] to y_reg (large offset)
+                ?_test(begin
+                    move_array_element_test0(State0, r3, 32, {y_reg, 2}, <<
+                        "   0:	2604      	movs	r6, #4\n"
+                        "   2:	441e      	add	r6, r3\n"
+                        "   4:	6ff6      	ldr	r6, [r6, #124]	; 0x7c\n"
+                        "   6:	6947      	ldr	r7, [r0, #20]\n"
+                        "   8:	60be      	str	r6, [r7, #8]"
+                    >>)
                 end)
             ]
         end}.
@@ -2644,6 +2860,19 @@ get_array_element_test_() ->
                     >>,
                     ?assertEqual(dump_to_bin(Dump), Stream),
                     ?assertEqual(r7, Reg)
+                end),
+                %% get_array_element: reg[x] with large offset (index 32, offset 128)
+                %% For offset 128, we use ldr with max offset 124 + temp register for remainder (4)
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:get_array_element(State0, r4, 32),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	2604      	movs	r6, #4\n"
+                        "   2:	4426      	add	r6, r4\n"
+                        "   4:	6ff7      	ldr	r7, [r6, #124]	; 0x7c"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual(r7, Reg)
                 end)
             ]
         end}.
@@ -2665,6 +2894,18 @@ move_to_array_element_test_() ->
                     >>,
                     ?assertEqual(dump_to_bin(Dump), Stream)
                 end),
+                %% move_to_array_element/4: x_reg to reg[x], larger immediate offset
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r3, 32),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	2604      	movs	r6, #4\n"
+                        "   4:	441e      	add	r6, r3\n"
+                        "   6:	67f7      	str	r7, [r6, #124]	; 0x7c"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
                 %% move_to_array_element/4: x_reg to reg[reg]
                 ?_test(begin
                     State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r3, r4),
@@ -2714,8 +2955,8 @@ move_to_array_element_test_() ->
                 end),
                 %% move_to_array_element/5: x_reg to reg[x+offset]
                 ?_test(begin
-                    State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r3, r4]),
-                    State2 = setelement(7, State1, [r3, r4]),
+                    State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [r3, r4]),
+                    State2 = setelement(8, State1, [r3, r4]),
                     [r3, r4] = ?BACKEND:used_regs(State2),
                     State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, r3, r4, 1),
                     Stream = ?BACKEND:stream(State3),
@@ -2729,8 +2970,8 @@ move_to_array_element_test_() ->
                 end),
                 %% move_to_array_element/5: imm to reg[x+offset]
                 ?_test(begin
-                    State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r3, r4]),
-                    State2 = setelement(7, State1, [r3, r4]),
+                    State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [r3, r4]),
+                    State2 = setelement(8, State1, [r3, r4]),
                     [r3, r4] = ?BACKEND:used_regs(State2),
                     State3 = ?BACKEND:move_to_array_element(State2, 42, r3, r4, 1),
                     Stream = ?BACKEND:stream(State3),
@@ -2787,11 +3028,12 @@ move_to_native_register_test_() ->
                 %% move_to_native_register/2: -256 (boundary case, should use literal pool)
                 ?_test(begin
                     {State1, Reg} = ?BACKEND:move_to_native_register(State0, -256),
-                    Stream = ?BACKEND:stream(State1),
+                    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+                    Stream = ?BACKEND:stream(State2),
                     ?assertEqual(r7, Reg),
                     Dump = <<
                         "   0:	4f00      	ldr	r7, [pc, #0]	; (0x4)\n"
-                        "   2:	e001      	b.n	0x8\n"
+                        "   2:	e07d      	b.n	0x100\n"
                         "   4:	ff00 ffff 	vmaxnm.f32	<illegal reg q7.5>, q8, <illegal reg q15.5>"
                     >>,
                     ?assertEqual(dump_to_bin(Dump), Stream)
@@ -2895,7 +3137,9 @@ move_to_native_register_test_() ->
 
 add_test0(State0, Reg, Imm, Dump) ->
     State1 = ?BACKEND:add(State0, Reg, Imm),
-    Stream = ?BACKEND:stream(State1),
+    % Force emission of literal pool
+    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+    Stream = ?BACKEND:stream(State2),
     ?assertEqual(dump_to_bin(Dump), Stream).
 
 add_test_() ->
@@ -2907,21 +3151,24 @@ add_test_() ->
             [
                 ?_test(begin
                     add_test0(State0, r2, 2, <<
-                        "   0:	3202      	adds	r2, #2"
+                        "   0:	3202      	adds	r2, #2\n"
+                        "   2:	e07d      	b.n	0x100"
                     >>)
                 end),
                 ?_test(begin
                     add_test0(State0, r2, 256, <<
-                        "   0:	4f00      	ldr	r7, [pc, #0]	; (0x4)\n"
-                        "   2:	e001      	b.n	0x8\n"
-                        "   4:	0100      	lsls	r0, r0, #4\n"
+                        "   0:	4f01      	ldr	r7, [pc, #4]	; (0x8)\n"
+                        "   2:	19d2      	adds	r2, r2, r7\n"
+                        "   4:	e07c      	b.n	0x100\n"
                         "   6:	0000      	movs	r0, r0\n"
-                        "   8:	19d2      	adds	r2, r2, r7"
+                        "   8:	0100      	lsls	r0, r0, #4\n"
+                        "   a:	0000      	movs	r0, r0"
                     >>)
                 end),
                 ?_test(begin
                     add_test0(State0, r2, r3, <<
-                        "   0:	18d2      	adds	r2, r2, r3"
+                        "   0:	18d2      	adds	r2, r2, r3\n"
+                        "   2:	e07d      	b.n	0x100"
                     >>)
                 end)
             ]
@@ -2929,7 +3176,9 @@ add_test_() ->
 
 sub_test0(State0, Reg, Imm, Dump) ->
     State1 = ?BACKEND:sub(State0, Reg, Imm),
-    Stream = ?BACKEND:stream(State1),
+    % Force emission of literal pool
+    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+    Stream = ?BACKEND:stream(State2),
     ?assertEqual(dump_to_bin(Dump), Stream).
 
 sub_test_() ->
@@ -2941,21 +3190,24 @@ sub_test_() ->
             [
                 ?_test(begin
                     sub_test0(State0, r2, 2, <<
-                        "   0:	3a02      	subs	r2, #2"
+                        "   0:	3a02      	subs	r2, #2\n"
+                        "   2:	e07d      	b.n	0x100"
                     >>)
                 end),
                 ?_test(begin
                     sub_test0(State0, r2, 256, <<
-                        "   0:	4f00      	ldr	r7, [pc, #0]	; (0x4)\n"
-                        "   2:	e001      	b.n	0x8\n"
-                        "   4:	0100      	lsls	r0, r0, #4\n"
+                        "   0:	4f01      	ldr	r7, [pc, #4]	@ (0xc)\n"
+                        "   2:	1bd2      	subs	r2, r2, r7\n"
+                        "   4:	e07c      	b.n	0x100\n"
                         "   6:	0000      	movs	r0, r0\n"
-                        "   8:	1bd2      	subs	r2, r2, r7"
+                        "   8:	0100      	lsls	r0, r0, #4\n"
+                        "   a:	0000      	movs	r0, r0"
                     >>)
                 end),
                 ?_test(begin
                     sub_test0(State0, r2, r3, <<
-                        "   0:	1ad2      	subs	r2, r2, r3"
+                        "   0:	1ad2      	subs	r2, r2, r3\n"
+                        "   2:	e07d      	b.n	0x110"
                     >>)
                 end)
             ]
@@ -3184,7 +3436,7 @@ and_register_exhaustion_negative_test() ->
     {State5, r3} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}),
     {StateNoRegs, r1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}),
     % Test negative immediate (-4) which should use BICS with r0 as temp
-    StateResult = ?BACKEND:and_(StateNoRegs, r7, -4),
+    {StateResult, r7} = ?BACKEND:and_(StateNoRegs, {free, r7}, -4),
     Stream = ?BACKEND:stream(StateResult),
     ExpectedDump = <<
         "   0:	6987      	ldr	r7, [r0, #24]\n"
@@ -3210,7 +3462,7 @@ and_register_exhaustion_positive_test() ->
     {State5, r3} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}),
     {StateNoRegs, r1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}),
     % Test positive immediate (0x3F) which should use ANDS with r0 as temp
-    StateResult = ?BACKEND:and_(StateNoRegs, r7, 16#3F),
+    {StateResult, r7} = ?BACKEND:and_(StateNoRegs, {free, r7}, 16#3F),
     Stream = ?BACKEND:stream(StateResult),
     ExpectedDump = <<
         "   0:	6987      	ldr	r7, [r0, #24]\n"
@@ -3256,23 +3508,36 @@ alloc_boxed_integer_fragment_large_test() ->
     {State1, ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_ALLOC_BOXED_INTEGER_FRAGMENT, [
         ctx, {avm_int64_t, 16#123456789ABCDEF0}
     ]),
+    % Add a call primitive last to emit literal pool
+    State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [
+        ctx, jit_state, offset, ?BADMATCH_ATOM, {free, ResultReg}
+    ]),
     ?assertEqual(r7, ResultReg),
-    Stream = ?BACKEND:stream(State1),
+    Stream = ?BACKEND:stream(State2),
     Dump =
         <<
-            "   0:	6bd7      	ldr	r7, [r2, #60]	; 0x3c\n"
+            "   0:	6bd7      	ldr	r7, [r2, #60]	@ 0x3c\n"
             "   2:	b405      	push	{r0, r2}\n"
-            "   4:	4a00      	ldr	r2, [pc, #0]	; (0x8)\n"
-            "   6:	e001      	b.n	0xc\n"
-            "   8:	def0      	udf	#240	; 0xf0\n"
-            "   a:	9abc      	ldr	r2, [sp, #752]	; 0x2f0\n"
-            "   c:	4b00      	ldr	r3, [pc, #0]	; (0x10)\n"
-            "   e:	e001      	b.n	0x14\n"
-            "  10:	5678      	ldrsb	r0, [r7, r1]\n"
-            "  12:	1234      	asrs	r4, r6, #8\n"
-            "  14:	47b8      	blx	r7\n"
-            "  16:	4607      	mov	r7, r0\n"
-            "  18:	bc05      	pop	{r0, r2}"
+            "   4:	4a06      	ldr	r2, [pc, #24]	@ (0x20)\n"
+            "   6:	4b07      	ldr	r3, [pc, #28]	@ (0x24)\n"
+            "   8:	47b8      	blx	r7\n"
+            "   a:	4607      	mov	r7, r0\n"
+            "   c:	bc05      	pop	{r0, r2}\n"
+            "   e:	6cd6      	ldr	r6, [r2, #76]	@ 0x4c\n"
+            "  10:	b082      	sub	sp, #8\n"
+            "  12:	9700      	str	r7, [sp, #0]\n"
+            "  14:	9902      	ldr	r1, [sp, #8]\n"
+            "  16:	2210      	movs	r2, #16\n"
+            "  18:	4b03      	ldr	r3, [pc, #12]	@ (0x28)\n"
+            "  1a:	47b0      	blx	r6\n"
+            "  1c:	b002      	add	sp, #8\n"
+            "  1e:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+            "  20:	def0      	udf	#240	@ 0xf0\n"
+            "  22:	9abc      	ldr	r2, [sp, #752]	@ 0x2f0\n"
+            "  24:	5678      	ldrsb	r0, [r7, r1]\n"
+            "  26:	1234      	asrs	r4, r6, #8\n"
+            "  28:	028b      	lsls	r3, r1, #10\n"
+            "  2a:	0000      	movs	r0, r0"
         >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
diff --git a/tests/libs/jit/jit_riscv32_asm_tests.erl b/tests/libs/jit/jit_riscv32_asm_tests.erl
new file mode 100644
index 0000000000..28a0f4fa58
--- /dev/null
+++ b/tests/libs/jit/jit_riscv32_asm_tests.erl
@@ -0,0 +1,900 @@
+%
+% This file is part of AtomVM.
+%
+% Copyright 2025 Paul Guyot <pguyot@kallisys.net>
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%    http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+%
+% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+%
+
+-module(jit_riscv32_asm_tests).
+
+-include_lib("eunit/include/eunit.hrl").
+
+-define(_assertAsmEqual(Bin, Str, Value),
+    ?_assertEqual(jit_tests_common:asm(riscv32, Bin, Str), Value)
+).
+
+%%-----------------------------------------------------------------------------
+%% R-type arithmetic and logical instruction tests
+%%-----------------------------------------------------------------------------
+
+add_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#00628533:32/little>>, "add a0, t0, t1", jit_riscv32_asm:add(a0, t0, t1)
+        ),
+        ?_assertAsmEqual(
+            <<16#95aa:16/little>>, "add a1, a1, a0", jit_riscv32_asm:add(a1, a1, a0)
+        ),
+        ?_assertAsmEqual(
+            <<16#97fa:16/little>>, "add a5, a5, t5", jit_riscv32_asm:add(a5, a5, t5)
+        )
+    ].
+
+sub_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#40628533:32/little>>, "sub a0, t0, t1", jit_riscv32_asm:sub(a0, t0, t1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8d89:16/little>>, "sub a1, a1, a0", jit_riscv32_asm:sub(a1, a1, a0)
+        ),
+        ?_assertAsmEqual(
+            <<16#41e787b3:32/little>>, "sub a5, a5, t5", jit_riscv32_asm:sub(a5, a5, t5)
+        )
+    ].
+
+and_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0062f533:32/little>>, "and a0, t0, t1", jit_riscv32_asm:and_(a0, t0, t1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8df1:16/little>>, "and a1, a1, a2", jit_riscv32_asm:and_(a1, a1, a2)
+        )
+    ].
+
+or_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0062e533:32/little>>, "or a0, t0, t1", jit_riscv32_asm:or_(a0, t0, t1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8dd1:16/little>>, "or a1, a1, a2", jit_riscv32_asm:or_(a1, a1, a2)
+        )
+    ].
+
+xor_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0062c533:32/little>>, "xor a0, t0, t1", jit_riscv32_asm:xor_(a0, t0, t1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8db1:16/little>>, "xor a1, a1, a2", jit_riscv32_asm:xor_(a1, a1, a2)
+        )
+    ].
+
+sll_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#00629533:32/little>>, "sll a0, t0, t1", jit_riscv32_asm:sll(a0, t0, t1)
+        ),
+        ?_assertAsmEqual(
+            <<16#00c59633:32/little>>, "sll a2, a1, a2", jit_riscv32_asm:sll(a2, a1, a2)
+        )
+    ].
+
+srl_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0062d533:32/little>>, "srl a0, t0, t1", jit_riscv32_asm:srl(a0, t0, t1)
+        ),
+        ?_assertAsmEqual(
+            <<16#00c5d633:32/little>>, "srl a2, a1, a2", jit_riscv32_asm:srl(a2, a1, a2)
+        )
+    ].
+
+sra_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#4062d533:32/little>>, "sra a0, t0, t1", jit_riscv32_asm:sra(a0, t0, t1)
+        ),
+        ?_assertAsmEqual(
+            <<16#40c5d633:32/little>>, "sra a2, a1, a2", jit_riscv32_asm:sra(a2, a1, a2)
+        )
+    ].
+
+slt_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0062a533:32/little>>, "slt a0, t0, t1", jit_riscv32_asm:slt(a0, t0, t1)
+        ),
+        ?_assertAsmEqual(
+            <<16#00c5a633:32/little>>, "slt a2, a1, a2", jit_riscv32_asm:slt(a2, a1, a2)
+        )
+    ].
+
+sltu_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0062b533:32/little>>, "sltu a0, t0, t1", jit_riscv32_asm:sltu(a0, t0, t1)
+        ),
+        ?_assertAsmEqual(
+            <<16#00c5b633:32/little>>, "sltu a2, a1, a2", jit_riscv32_asm:sltu(a2, a1, a2)
+        )
+    ].
+
+%%-----------------------------------------------------------------------------
+%% I-type immediate instruction tests
+%%-----------------------------------------------------------------------------
+
+addi_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#01428513:32/little>>, "addi a0, t0, 20", jit_riscv32_asm:addi(a0, t0, 20)
+        ),
+        ?_assertAsmEqual(
+            <<16#15fd:16/little>>, "addi a1, a1, -1", jit_riscv32_asm:addi(a1, a1, -1)
+        ),
+        ?_assertAsmEqual(
+            <<16#7ff00513:32/little>>, "addi a0, zero, 2047", jit_riscv32_asm:addi(a0, zero, 2047)
+        ),
+        ?_assertAsmEqual(
+            <<16#80000593:32/little>>, "addi a1, zero, -2048", jit_riscv32_asm:addi(a1, zero, -2048)
+        )
+    ].
+
+andi_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0ff2f513:32/little>>, "andi a0, t0, 255", jit_riscv32_asm:andi(a0, t0, 255)
+        ),
+        ?_assertAsmEqual(
+            <<16#89bd:16/little>>, "andi a1, a1, 15", jit_riscv32_asm:andi(a1, a1, 15)
+        )
+    ].
+
+ori_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0ff2e513:32/little>>, "ori a0, t0, 255", jit_riscv32_asm:ori(a0, t0, 255)
+        ),
+        ?_assertAsmEqual(
+            <<16#00f5e593:32/little>>, "ori a1, a1, 15", jit_riscv32_asm:ori(a1, a1, 15)
+        )
+    ].
+
+xori_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0ff2c513:32/little>>, "xori a0, t0, 255", jit_riscv32_asm:xori(a0, t0, 255)
+        ),
+        ?_assertAsmEqual(
+            <<16#fff5c593:32/little>>, "xori a1, a1, -1", jit_riscv32_asm:xori(a1, a1, -1)
+        )
+    ].
+
+slli_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#00329513:32/little>>, "slli a0, t0, 3", jit_riscv32_asm:slli(a0, t0, 3)
+        ),
+        ?_assertAsmEqual(
+            <<16#05fe:16/little>>, "slli a1, a1, 31", jit_riscv32_asm:slli(a1, a1, 31)
+        ),
+        ?_assertAsmEqual(
+            <<16#00051513:32/little>>, "slli a0, a0, 0", jit_riscv32_asm:slli(a0, a0, 0)
+        )
+    ].
+
+srli_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0032d513:32/little>>, "srli a0, t0, 3", jit_riscv32_asm:srli(a0, t0, 3)
+        ),
+        ?_assertAsmEqual(
+            <<16#81fd:16/little>>, "srli a1, a1, 31", jit_riscv32_asm:srli(a1, a1, 31)
+        )
+    ].
+
+srai_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#4032d513:32/little>>, "srai a0, t0, 3", jit_riscv32_asm:srai(a0, t0, 3)
+        ),
+        ?_assertAsmEqual(
+            <<16#85fd:16/little>>, "srai a1, a1, 31", jit_riscv32_asm:srai(a1, a1, 31)
+        )
+    ].
+
+slti_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0142a513:32/little>>, "slti a0, t0, 20", jit_riscv32_asm:slti(a0, t0, 20)
+        ),
+        ?_assertAsmEqual(
+            <<16#fff5a593:32/little>>, "slti a1, a1, -1", jit_riscv32_asm:slti(a1, a1, -1)
+        )
+    ].
+
+sltiu_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0142b513:32/little>>, "sltiu a0, t0, 20", jit_riscv32_asm:sltiu(a0, t0, 20)
+        ),
+        ?_assertAsmEqual(
+            <<16#00153513:32/little>>, "sltiu a0, a0, 1", jit_riscv32_asm:sltiu(a0, a0, 1)
+        )
+    ].
+
+%%-----------------------------------------------------------------------------
+%% Load instruction tests
+%%-----------------------------------------------------------------------------
+
+lw_test_() ->
+    [
+        ?_assertAsmEqual(<<16#4108:16/little>>, "lw a0, 0(a0)", jit_riscv32_asm:lw(a0, a0, 0)),
+        ?_assertAsmEqual(<<16#4108:16/little>>, "lw a0, 0(a0)", jit_riscv32_asm:lw(a0, a0)),
+        ?_assertAsmEqual(<<16#414c:16/little>>, "lw a1, 4(a0)", jit_riscv32_asm:lw(a1, a0, 4)),
+        ?_assertAsmEqual(
+            <<16#ffc52503:32/little>>, "lw a0, -4(a0)", jit_riscv32_asm:lw(a0, a0, -4)
+        ),
+        ?_assertAsmEqual(
+            <<16#7ff52503:32/little>>, "lw a0, 2047(a0)", jit_riscv32_asm:lw(a0, a0, 2047)
+        )
+    ].
+
+lh_test_() ->
+    [
+        ?_assertAsmEqual(<<16#00051503:32/little>>, "lh a0, 0(a0)", jit_riscv32_asm:lh(a0, a0, 0)),
+        ?_assertAsmEqual(<<16#00051503:32/little>>, "lh a0, 0(a0)", jit_riscv32_asm:lh(a0, a0)),
+        ?_assertAsmEqual(<<16#00251583:32/little>>, "lh a1, 2(a0)", jit_riscv32_asm:lh(a1, a0, 2))
+    ].
+
+lhu_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#00055503:32/little>>, "lhu a0, 0(a0)", jit_riscv32_asm:lhu(a0, a0, 0)
+        ),
+        ?_assertAsmEqual(<<16#00055503:32/little>>, "lhu a0, 0(a0)", jit_riscv32_asm:lhu(a0, a0)),
+        ?_assertAsmEqual(<<16#00255583:32/little>>, "lhu a1, 2(a0)", jit_riscv32_asm:lhu(a1, a0, 2))
+    ].
+
+lb_test_() ->
+    [
+        ?_assertAsmEqual(<<16#00050503:32/little>>, "lb a0, 0(a0)", jit_riscv32_asm:lb(a0, a0, 0)),
+        ?_assertAsmEqual(<<16#00050503:32/little>>, "lb a0, 0(a0)", jit_riscv32_asm:lb(a0, a0)),
+        ?_assertAsmEqual(<<16#00150583:32/little>>, "lb a1, 1(a0)", jit_riscv32_asm:lb(a1, a0, 1))
+    ].
+
+lbu_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#00054503:32/little>>, "lbu a0, 0(a0)", jit_riscv32_asm:lbu(a0, a0, 0)
+        ),
+        ?_assertAsmEqual(<<16#00054503:32/little>>, "lbu a0, 0(a0)", jit_riscv32_asm:lbu(a0, a0)),
+        ?_assertAsmEqual(<<16#00154583:32/little>>, "lbu a1, 1(a0)", jit_riscv32_asm:lbu(a1, a0, 1))
+    ].
+
+%%-----------------------------------------------------------------------------
+%% Store instruction tests
+%%-----------------------------------------------------------------------------
+
+sw_test_() ->
+    [
+        ?_assertAsmEqual(<<16#c10c:16/little>>, "sw a1, 0(a0)", jit_riscv32_asm:sw(a0, a1, 0)),
+        ?_assertAsmEqual(<<16#c10c:16/little>>, "sw a1, 0(a0)", jit_riscv32_asm:sw(a1, a0)),
+        ?_assertAsmEqual(<<16#c14c:16/little>>, "sw a1, 4(a0)", jit_riscv32_asm:sw(a0, a1, 4)),
+        ?_assertAsmEqual(<<16#feb52e23:32/little>>, "sw a1, -4(a0)", jit_riscv32_asm:sw(a0, a1, -4))
+    ].
+
+sh_test_() ->
+    [
+        ?_assertAsmEqual(<<16#00b51023:32/little>>, "sh a1, 0(a0)", jit_riscv32_asm:sh(a0, a1, 0)),
+        ?_assertAsmEqual(<<16#00b51023:32/little>>, "sh a1, 0(a0)", jit_riscv32_asm:sh(a1, a0)),
+        ?_assertAsmEqual(<<16#00b51123:32/little>>, "sh a1, 2(a0)", jit_riscv32_asm:sh(a0, a1, 2))
+    ].
+
+sb_test_() ->
+    [
+        ?_assertAsmEqual(<<16#00b50023:32/little>>, "sb a1, 0(a0)", jit_riscv32_asm:sb(a0, a1, 0)),
+        ?_assertAsmEqual(<<16#00b50023:32/little>>, "sb a1, 0(a0)", jit_riscv32_asm:sb(a1, a0)),
+        ?_assertAsmEqual(<<16#00b500a3:32/little>>, "sb a1, 1(a0)", jit_riscv32_asm:sb(a0, a1, 1))
+    ].
+
+%%-----------------------------------------------------------------------------
+%% Branch instruction tests
+%%-----------------------------------------------------------------------------
+
+beq_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#00628463:32/little>>, "beq t0, t1, .+8", jit_riscv32_asm:beq(t0, t1, 8)
+        ),
+        ?_assertAsmEqual(
+            <<16#feb50ee3:32/little>>, "beq a0, a1, .-4", jit_riscv32_asm:beq(a0, a1, -4)
+        ),
+        ?_assertAsmEqual(
+            <<16#c101:16/little>>, "beq a0, zero, .", jit_riscv32_asm:beq(a0, zero, 0)
+        )
+    ].
+
+bne_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#00629463:32/little>>, "bne t0, t1, .+8", jit_riscv32_asm:bne(t0, t1, 8)
+        ),
+        ?_assertAsmEqual(
+            <<16#feb51ee3:32/little>>, "bne a0, a1, .-4", jit_riscv32_asm:bne(a0, a1, -4)
+        )
+    ].
+
+blt_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0062c463:32/little>>, "blt t0, t1, .+8", jit_riscv32_asm:blt(t0, t1, 8)
+        ),
+        ?_assertAsmEqual(
+            <<16#feb54ee3:32/little>>, "blt a0, a1, .-4", jit_riscv32_asm:blt(a0, a1, -4)
+        )
+    ].
+
+bge_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0062d463:32/little>>, "bge t0, t1, .+8", jit_riscv32_asm:bge(t0, t1, 8)
+        ),
+        ?_assertAsmEqual(
+            <<16#feb55ee3:32/little>>, "bge a0, a1, .-4", jit_riscv32_asm:bge(a0, a1, -4)
+        )
+    ].
+
+bltu_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0062e463:32/little>>, "bltu t0, t1, .+8", jit_riscv32_asm:bltu(t0, t1, 8)
+        ),
+        ?_assertAsmEqual(
+            <<16#feb56ee3:32/little>>, "bltu a0, a1, .-4", jit_riscv32_asm:bltu(a0, a1, -4)
+        )
+    ].
+
+bgeu_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0062f463:32/little>>, "bgeu t0, t1, .+8", jit_riscv32_asm:bgeu(t0, t1, 8)
+        ),
+        ?_assertAsmEqual(
+            <<16#feb57ee3:32/little>>, "bgeu a0, a1, .-4", jit_riscv32_asm:bgeu(a0, a1, -4)
+        )
+    ].
+
+%%-----------------------------------------------------------------------------
+%% Jump instruction tests
+%%-----------------------------------------------------------------------------
+
+jal_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#2021:16/little>>, "jal .+8", jit_riscv32_asm:jal(ra, 8)
+        ),
+        ?_assertAsmEqual(
+            <<16#3ff5:16/little>>, "jal .-4", jit_riscv32_asm:jal(ra, -4)
+        ),
+        ?_assertAsmEqual(
+            <<16#00000517:32/little, 16#9502:16/little>>,
+            "auipc a0, 0\njalr a0",
+            jit_riscv32_asm:call(a0, 0)
+        ),
+        ?_assertAsmEqual(
+            <<16#00002517:32/little, 16#800500e7:32/little>>,
+            "auipc a0, 0x2\njalr -2048(a0)",
+            jit_riscv32_asm:call(a0, 16#1800)
+        )
+    ].
+
+jalr_test_() ->
+    [
+        ?_assertAsmEqual(<<16#9502:16/little>>, "jalr a0", jit_riscv32_asm:jalr(ra, a0, 0)),
+        ?_assertAsmEqual(<<16#9502:16/little>>, "jalr a0", jit_riscv32_asm:jalr(ra, a0)),
+        ?_assertAsmEqual(<<16#004500e7:32/little>>, "jalr 4(a0)", jit_riscv32_asm:jalr(ra, a0, 4))
+    ].
+
+%%-----------------------------------------------------------------------------
+%% Upper immediate instruction tests
+%%-----------------------------------------------------------------------------
+
+lui_test_() ->
+    [
+        ?_assertAsmEqual(<<16#65c9:16/little>>, "lui a1, 18", jit_riscv32_asm:lui(a1, 18)),
+        ?_assertAsmEqual(<<16#6505:16/little>>, "lui a0, 1", jit_riscv32_asm:lui(a0, 1)),
+        ?_assertAsmEqual(<<16#75fd:16/little>>, "lui a1, 0xfffff", jit_riscv32_asm:lui(a1, -1))
+    ].
+
+auipc_test_() ->
+    [
+        ?_assertAsmEqual(<<16#00012597:32/little>>, "auipc a1, 18", jit_riscv32_asm:auipc(a1, 18)),
+        ?_assertAsmEqual(<<16#00001517:32/little>>, "auipc a0, 1", jit_riscv32_asm:auipc(a0, 1))
+    ].
+
+%%-----------------------------------------------------------------------------
+%% Pseudo-instruction tests
+%%-----------------------------------------------------------------------------
+
+nop_test_() ->
+    [
+        % We want a 4-byte NOP for padding, so use .option norvc to force non-compressed
+        ?_assertAsmEqual(<<16#00000013:32/little>>, ".option norvc\nnop", jit_riscv32_asm:nop())
+    ].
+
+li_small_test_() ->
+    [
+        ?_assertAsmEqual(<<16#4529:16/little>>, "li a0, 10", jit_riscv32_asm:li(a0, 10)),
+        ?_assertAsmEqual(<<16#557d:16/little>>, "li a0, -1", jit_riscv32_asm:li(a0, -1)),
+        ?_assertAsmEqual(<<16#7ff00513:32/little>>, "li a0, 2047", jit_riscv32_asm:li(a0, 2047))
+    ].
+
+li_large_test_() ->
+    [
+        % 0x12345 = 74565 - requires lui + addi
+        ?_assertAsmEqual(
+            <<16#6549:16/little, 16#34550513:32/little>>,
+            "lui a0, 0x12\naddi a0, a0, 0x345",
+            jit_riscv32_asm:li(a0, 16#12345)
+        ),
+        % 0x80000000 = -2147483648 (minimum 32-bit signed)
+        ?_assertAsmEqual(
+            <<16#800005b7:32/little, 16#0581:16/little>>,
+            "lui a1, 0x80000\nc.addi a1, 0",
+            jit_riscv32_asm:li(a1, -16#80000000)
+        ),
+        % 0x7FFFFFFF = 2147483647 (maximum 32-bit signed)
+        ?_assertAsmEqual(
+            <<16#80000537:32/little, 16#157d:16/little>>,
+            "lui a0, 0x80000\naddi a0, a0, -1",
+            jit_riscv32_asm:li(a0, 16#7FFFFFFF)
+        )
+    ].
+
+mv_test_() ->
+    [
+        ?_assertAsmEqual(<<16#852a:16/little>>, "mv a0, a0", jit_riscv32_asm:mv(a0, a0)),
+        ?_assertAsmEqual(<<16#85ae:16/little>>, "mv a1, a1", jit_riscv32_asm:mv(a1, a1))
+    ].
+
+not_test_() ->
+    [
+        ?_assertAsmEqual(<<16#fff54513:32/little>>, "not a0, a0", jit_riscv32_asm:not_(a0, a0)),
+        ?_assertAsmEqual(<<16#fff5c593:32/little>>, "not a1, a1", jit_riscv32_asm:not_(a1, a1))
+    ].
+
+neg_test_() ->
+    [
+        ?_assertAsmEqual(<<16#40a00533:32/little>>, "neg a0, a0", jit_riscv32_asm:neg(a0, a0)),
+        ?_assertAsmEqual(<<16#40b005b3:32/little>>, "neg a1, a1", jit_riscv32_asm:neg(a1, a1))
+    ].
+
+j_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#a021:16/little>>, "j .+8", jit_riscv32_asm:j(8)
+        ),
+        ?_assertAsmEqual(
+            <<16#bff5:16/little>>, "j .-4", jit_riscv32_asm:j(-4)
+        )
+    ].
+
+jr_test_() ->
+    [
+        ?_assertAsmEqual(<<16#8502:16/little>>, "jr a0", jit_riscv32_asm:jr(a0)),
+        ?_assertAsmEqual(<<16#8282:16/little>>, "jr t0", jit_riscv32_asm:jr(t0))
+    ].
+
+ret_test_() ->
+    [
+        ?_assertAsmEqual(<<16#8082:16/little>>, "ret", jit_riscv32_asm:ret())
+    ].
+
+%%-----------------------------------------------------------------------------
+%% M Extension (Multiply/Divide) instruction tests
+%%-----------------------------------------------------------------------------
+
+mul_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#02f50533:32/little>>, "mul a0, a0, a5", jit_riscv32_asm:mul(a0, a0, a5)
+        ),
+        ?_assertAsmEqual(
+            <<16#03f60633:32/little>>, "mul a2, a2, t6", jit_riscv32_asm:mul(a2, a2, t6)
+        ),
+        ?_assertAsmEqual(
+            <<16#026585b3:32/little>>, "mul a1, a1, t1", jit_riscv32_asm:mul(a1, a1, t1)
+        ),
+        ?_assertAsmEqual(
+            <<16#02d282b3:32/little>>, "mul t0, t0, a3", jit_riscv32_asm:mul(t0, t0, a3)
+        )
+    ].
+
+%%-----------------------------------------------------------------------------
+%% System instruction tests
+%%-----------------------------------------------------------------------------
+
+c_ebreak_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#9002:16/little>>, "c.ebreak", jit_riscv32_asm:c_ebreak()
+        )
+    ].
+
+%%-----------------------------------------------------------------------------
+%% C Extension - Arithmetic and Logical instruction tests
+%%-----------------------------------------------------------------------------
+
+c_add_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#9532:16/little>>, "c.add a0, a2", jit_riscv32_asm:c_add(a0, a2)
+        ),
+        ?_assertAsmEqual(
+            <<16#95be:16/little>>, "c.add a1, a5", jit_riscv32_asm:c_add(a1, a5)
+        ),
+        ?_assertAsmEqual(
+            <<16#9522:16/little>>, "c.add a0, s0", jit_riscv32_asm:c_add(a0, s0)
+        )
+    ].
+
+c_mv_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#8532:16/little>>, "c.mv a0, a2", jit_riscv32_asm:c_mv(a0, a2)
+        ),
+        ?_assertAsmEqual(
+            <<16#85be:16/little>>, "c.mv a1, a5", jit_riscv32_asm:c_mv(a1, a5)
+        ),
+        ?_assertAsmEqual(
+            <<16#842a:16/little>>, "c.mv s0, a0", jit_riscv32_asm:c_mv(s0, a0)
+        )
+    ].
+
+c_sub_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#8d09:16/little>>, "c.sub a0, a0", jit_riscv32_asm:c_sub(a0, a0)
+        ),
+        ?_assertAsmEqual(
+            <<16#8d8d:16/little>>, "c.sub a1, a1", jit_riscv32_asm:c_sub(a1, a1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8c0d:16/little>>, "c.sub s0, a1", jit_riscv32_asm:c_sub(s0, a1)
+        )
+    ].
+
+c_and_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#8d6d:16/little>>, "c.and a0, a1", jit_riscv32_asm:c_and(a0, a1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8fed:16/little>>, "c.and a5, a1", jit_riscv32_asm:c_and(a5, a1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8c6d:16/little>>, "c.and s0, a1", jit_riscv32_asm:c_and(s0, a1)
+        )
+    ].
+
+c_or_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#8d4d:16/little>>, "c.or a0, a1", jit_riscv32_asm:c_or(a0, a1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8fcd:16/little>>, "c.or a5, a1", jit_riscv32_asm:c_or(a5, a1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8c4d:16/little>>, "c.or s0, a1", jit_riscv32_asm:c_or(s0, a1)
+        )
+    ].
+
+c_xor_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#8d2d:16/little>>, "c.xor a0, a1", jit_riscv32_asm:c_xor(a0, a1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8fad:16/little>>, "c.xor a5, a1", jit_riscv32_asm:c_xor(a5, a1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8c2d:16/little>>, "c.xor s0, a1", jit_riscv32_asm:c_xor(s0, a1)
+        )
+    ].
+
+%%-----------------------------------------------------------------------------
+%% C Extension - Immediate instruction tests
+%%-----------------------------------------------------------------------------
+
+c_addi_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0511:16/little>>, "c.addi a0, 4", jit_riscv32_asm:c_addi(a0, 4)
+        ),
+        ?_assertAsmEqual(
+            <<16#15fd:16/little>>, "c.addi a1, -1", jit_riscv32_asm:c_addi(a1, -1)
+        ),
+        ?_assertAsmEqual(
+            <<16#0541:16/little>>, "c.addi a0, 16", jit_riscv32_asm:c_addi(a0, 16)
+        ),
+        ?_assertAsmEqual(
+            <<16#1561:16/little>>, "c.addi a0, -8", jit_riscv32_asm:c_addi(a0, -8)
+        )
+    ].
+
+c_andi_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#8929:16/little>>, "c.andi a0, 10", jit_riscv32_asm:c_andi(a0, 10)
+        ),
+        ?_assertAsmEqual(
+            <<16#99fd:16/little>>, "c.andi a1, -1", jit_riscv32_asm:c_andi(a1, -1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8941:16/little>>, "c.andi a0, 16", jit_riscv32_asm:c_andi(a0, 16)
+        )
+    ].
+
+c_li_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#4529:16/little>>, "c.li a0, 10", jit_riscv32_asm:c_li(a0, 10)
+        ),
+        ?_assertAsmEqual(
+            <<16#55fd:16/little>>, "c.li a1, -1", jit_riscv32_asm:c_li(a1, -1)
+        ),
+        ?_assertAsmEqual(
+            <<16#4505:16/little>>, "c.li a0, 1", jit_riscv32_asm:c_li(a0, 1)
+        ),
+        ?_assertAsmEqual(
+            <<16#5501:16/little>>, "c.li a0, -32", jit_riscv32_asm:c_li(a0, -32)
+        )
+    ].
+
+c_lui_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#6529:16/little>>, "c.lui a0, 10", jit_riscv32_asm:c_lui(a0, 10)
+        ),
+        ?_assertAsmEqual(
+            <<16#75fd:16/little>>, "c.lui a1, 0xfffff", jit_riscv32_asm:c_lui(a1, -1)
+        ),
+        ?_assertAsmEqual(
+            <<16#6505:16/little>>, "c.lui a0, 1", jit_riscv32_asm:c_lui(a0, 1)
+        )
+    ].
+
+c_addi16sp_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#6141:16/little>>, "c.addi16sp sp, 16", jit_riscv32_asm:c_addi16sp(16)
+        ),
+        ?_assertAsmEqual(
+            <<16#7101:16/little>>, "c.addi16sp sp, -512", jit_riscv32_asm:c_addi16sp(-512)
+        ),
+        ?_assertAsmEqual(
+            <<16#6161:16/little>>, "c.addi16sp sp, 80", jit_riscv32_asm:c_addi16sp(80)
+        )
+    ].
+
+c_addi4spn_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0048:16/little>>, "c.addi4spn a0, sp, 4", jit_riscv32_asm:c_addi4spn(a0, 4)
+        ),
+        ?_assertAsmEqual(
+            <<16#1010:16/little>>, "c.addi4spn a2, sp, 32", jit_riscv32_asm:c_addi4spn(a2, 32)
+        ),
+        ?_assertAsmEqual(
+            <<16#1ffc:16/little>>,
+            "c.addi4spn a5, sp, 1020",
+            jit_riscv32_asm:c_addi4spn(a5, 1020)
+        )
+    ].
+
+%%-----------------------------------------------------------------------------
+%% C Extension - Shift instruction tests
+%%-----------------------------------------------------------------------------
+
+c_slli_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#050e:16/little>>, "c.slli a0, 3", jit_riscv32_asm:c_slli(a0, 3)
+        ),
+        ?_assertAsmEqual(
+            <<16#05fe:16/little>>, "c.slli a1, 31", jit_riscv32_asm:c_slli(a1, 31)
+        ),
+        ?_assertAsmEqual(
+            <<16#0542:16/little>>, "c.slli a0, 16", jit_riscv32_asm:c_slli(a0, 16)
+        )
+    ].
+
+c_srli_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#810d:16/little>>, "c.srli a0, 3", jit_riscv32_asm:c_srli(a0, 3)
+        ),
+        ?_assertAsmEqual(
+            <<16#81fd:16/little>>, "c.srli a1, 31", jit_riscv32_asm:c_srli(a1, 31)
+        ),
+        ?_assertAsmEqual(
+            <<16#8141:16/little>>, "c.srli a0, 16", jit_riscv32_asm:c_srli(a0, 16)
+        )
+    ].
+
+c_srai_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#850d:16/little>>, "c.srai a0, 3", jit_riscv32_asm:c_srai(a0, 3)
+        ),
+        ?_assertAsmEqual(
+            <<16#85fd:16/little>>, "c.srai a1, 31", jit_riscv32_asm:c_srai(a1, 31)
+        ),
+        ?_assertAsmEqual(
+            <<16#8541:16/little>>, "c.srai a0, 16", jit_riscv32_asm:c_srai(a0, 16)
+        )
+    ].
+
+%%-----------------------------------------------------------------------------
+%% C Extension - Load/Store instruction tests
+%%-----------------------------------------------------------------------------
+
+c_lw_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#4188:16/little>>, "c.lw a0, 0(a1)", jit_riscv32_asm:c_lw(a0, {a1, 0})
+        ),
+        ?_assertAsmEqual(
+            <<16#41d8:16/little>>, "c.lw a4, 4(a1)", jit_riscv32_asm:c_lw(a4, {a1, 4})
+        ),
+        ?_assertAsmEqual(
+            <<16#5ffc:16/little>>, "c.lw a5, 124(a5)", jit_riscv32_asm:c_lw(a5, {a5, 124})
+        )
+    ].
+
+c_sw_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#c188:16/little>>, "c.sw a0, 0(a1)", jit_riscv32_asm:c_sw(a0, {a1, 0})
+        ),
+        ?_assertAsmEqual(
+            <<16#c1d8:16/little>>, "c.sw a4, 4(a1)", jit_riscv32_asm:c_sw(a4, {a1, 4})
+        ),
+        ?_assertAsmEqual(
+            <<16#dffc:16/little>>, "c.sw a5, 124(a5)", jit_riscv32_asm:c_sw(a5, {a5, 124})
+        )
+    ].
+
+c_lwsp_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#4502:16/little>>, "c.lwsp a0, 0(sp)", jit_riscv32_asm:c_lwsp(a0, 0)
+        ),
+        ?_assertAsmEqual(
+            <<16#4512:16/little>>, "c.lwsp a0, 4(sp)", jit_riscv32_asm:c_lwsp(a0, 4)
+        ),
+        ?_assertAsmEqual(
+            <<16#50fe:16/little>>, "c.lwsp ra, 252(sp)", jit_riscv32_asm:c_lwsp(ra, 252)
+        )
+    ].
+
+c_swsp_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#c02a:16/little>>, "c.swsp a0, 0(sp)", jit_riscv32_asm:c_swsp(a0, 0)
+        ),
+        ?_assertAsmEqual(
+            <<16#c22a:16/little>>, "c.swsp a0, 4(sp)", jit_riscv32_asm:c_swsp(a0, 4)
+        ),
+        ?_assertAsmEqual(
+            <<16#dfe6:16/little>>, "c.swsp s9, 252(sp)", jit_riscv32_asm:c_swsp(s9, 252)
+        )
+    ].
+
+%%-----------------------------------------------------------------------------
+%% C Extension - Branch and Jump instruction tests
+%%-----------------------------------------------------------------------------
+
+c_beqz_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#c111:16/little>>, "c.beqz a0, .+4", jit_riscv32_asm:c_beqz(a0, 4)
+        ),
+        ?_assertAsmEqual(
+            <<16#dced:16/little>>, "c.beqz s1, .-6", jit_riscv32_asm:c_beqz(s1, -6)
+        ),
+        ?_assertAsmEqual(
+            <<16#c101:16/little>>, "c.beqz a0, .", jit_riscv32_asm:c_beqz(a0, 0)
+        )
+    ].
+
+c_bnez_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#e111:16/little>>, "c.bnez a0, .+4", jit_riscv32_asm:c_bnez(a0, 4)
+        ),
+        ?_assertAsmEqual(
+            <<16#fced:16/little>>, "c.bnez s1, .-6", jit_riscv32_asm:c_bnez(s1, -6)
+        ),
+        ?_assertAsmEqual(
+            <<16#e101:16/little>>, "c.bnez a0, .", jit_riscv32_asm:c_bnez(a0, 0)
+        )
+    ].
+
+c_j_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#a011:16/little>>, "c.j .+4", jit_riscv32_asm:c_j(4)
+        ),
+        ?_assertAsmEqual(
+            <<16#bfed:16/little>>, "c.j .-6", jit_riscv32_asm:c_j(-6)
+        ),
+        ?_assertAsmEqual(
+            <<16#a001:16/little>>, "c.j .", jit_riscv32_asm:c_j(0)
+        )
+    ].
+
+c_jal_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#2021:16/little>>, "c.jal .+8", jit_riscv32_asm:c_jal(8)
+        ),
+        ?_assertAsmEqual(
+            <<16#3ff5:16/little>>, "c.jal .-4", jit_riscv32_asm:c_jal(-4)
+        ),
+        ?_assertAsmEqual(
+            <<16#2001:16/little>>, "c.jal .", jit_riscv32_asm:c_jal(0)
+        )
+    ].
+
+c_jr_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#8502:16/little>>, "c.jr a0", jit_riscv32_asm:c_jr(a0)
+        ),
+        ?_assertAsmEqual(
+            <<16#8402:16/little>>, "c.jr s0", jit_riscv32_asm:c_jr(s0)
+        ),
+        ?_assertAsmEqual(
+            <<16#8082:16/little>>, "c.jr ra", jit_riscv32_asm:c_jr(ra)
+        )
+    ].
+
+c_jalr_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#9502:16/little>>, "c.jalr a0", jit_riscv32_asm:c_jalr(a0)
+        ),
+        ?_assertAsmEqual(
+            <<16#9402:16/little>>, "c.jalr s0", jit_riscv32_asm:c_jalr(s0)
+        )
+    ].
+
+%%-----------------------------------------------------------------------------
+%% C Extension - Pseudo-instruction tests
+%%-----------------------------------------------------------------------------
+
+c_nop_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0001:16/little>>, "c.nop", jit_riscv32_asm:c_nop()
+        )
+    ].
diff --git a/tests/libs/jit/jit_riscv32_tests.erl b/tests/libs/jit/jit_riscv32_tests.erl
new file mode 100644
index 0000000000..ab13f91c28
--- /dev/null
+++ b/tests/libs/jit/jit_riscv32_tests.erl
@@ -0,0 +1,3640 @@
+%
+% This file is part of AtomVM.
+%
+% Copyright 2025 Paul Guyot <pguyot@kallisys.net>
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%    http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+%
+% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+%
+
+-module(jit_riscv32_tests).
+
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+-endif.
+
+-include("jit/include/jit.hrl").
+-include("jit/src/term.hrl").
+-include("jit/src/default_atoms.hrl").
+-include("jit/src/primitives.hrl").
+
+-define(BACKEND, jit_riscv32).
+
+% disassembly obtained with:
+% arm-elf-objdump -b binary -D dump.bin -M arm
+
+call_primitive_0_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, ResultReg} = ?BACKEND:call_primitive(State0, 0, [ctx, jit_state]),
+    ?assertEqual(t6, ResultReg),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:  00062f83            lw  t6,0(a2)\n"
+            "   4:  1141                addi    sp,sp,-16\n"
+            "   6:  c006                sw  ra,0(sp)\n"
+            "   8:  c22a                sw  a0,4(sp)\n"
+            "   a:  c42e                sw  a1,8(sp)\n"
+            "   c:  c632                sw  a2,12(sp)\n"
+            "   e:  9f82                jalr    t6\n"
+            "  10:  8faa                mv  t6,a0\n"
+            "  12:  4082                lw  ra,0(sp)\n"
+            "  14:  4512                lw  a0,4(sp)\n"
+            "  16:  45a2                lw  a1,8(sp)\n"
+            "  18:  4632                lw  a2,12(sp)\n"
+            "  1a:  0141                addi    sp,sp,16"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_1_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, ResultReg} = ?BACKEND:call_primitive(State0, 1, [ctx, jit_state]),
+    ?assertEqual(t6, ResultReg),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:  00462f83            lw  t6,4(a2)\n"
+            "   4:  1141                addi    sp,sp,-16\n"
+            "   6:  c006                sw  ra,0(sp)\n"
+            "   8:  c22a                sw  a0,4(sp)\n"
+            "   a:  c42e                sw  a1,8(sp)\n"
+            "   c:  c632                sw  a2,12(sp)\n"
+            "   e:  9f82                jalr    t6\n"
+            "  10:  8faa                mv  t6,a0\n"
+            "  12:  4082                lw  ra,0(sp)\n"
+            "  14:  4512                lw  a0,4(sp)\n"
+            "  16:  45a2                lw  a1,8(sp)\n"
+            "  18:  4632                lw  a2,12(sp)\n"
+            "  1a:  0141                addi    sp,sp,16"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_2_args_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, ResultReg} = ?BACKEND:call_primitive(State0, 2, [ctx, 42, 43, 44]),
+    ?assertEqual(t6, ResultReg),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:  00862f83            lw  t6,8(a2)\n"
+            "   4:  1141                addi    sp,sp,-16\n"
+            "   6:  c006                sw  ra,0(sp)\n"
+            "   8:  c22a                sw  a0,4(sp)\n"
+            "   a:  c42e                sw  a1,8(sp)\n"
+            "   c:  c632                sw  a2,12(sp)\n"
+            "   e:  02a00593            li  a1,42\n"
+            "  12:  02b00613            li  a2,43\n"
+            "  16:  02c00693            li  a3,44\n"
+            "  1a:  9f82                jalr    t6\n"
+            "  1c:  8faa                mv  t6,a0\n"
+            "  1e:  4082                lw  ra,0(sp)\n"
+            "  20:  4512                lw  a0,4(sp)\n"
+            "  22:  45a2                lw  a1,8(sp)\n"
+            "  24:  4632                lw  a2,12(sp)\n"
+            "  26:  0141                addi    sp,sp,16"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_5_args_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:call_primitive_last(State0, ?PRIM_ALLOCATE, [ctx, jit_state, 16, 32, 2]),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:  01462f83            lw  t6,20(a2)\n"
+            "   4:  4641                li  a2,16\n"
+            "   6:  02000693            li  a3,32\n"
+            "   a:  4709                li  a4,2\n"
+            "   c:  8f82                jr  t6"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_6_args_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    % Get bin_ptr from x_reg 0 (similar to get_list_test pattern)
+    {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    {State2, RegA} = ?BACKEND:and_(State1, {free, RegA}, ?TERM_PRIMARY_CLEAR_MASK),
+    % Get another register for the last parameter to test {free, Reg} handling
+    {State3, OtherReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 1}),
+    % Call PRIM_BITSTRING_EXTRACT_INTEGER with 6 arguments
+    {State4, _ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_BITSTRING_EXTRACT_INTEGER, [
+        ctx, jit_state, {free, RegA}, 64, 8, {free, OtherReg}
+    ]),
+    Stream = ?BACKEND:stream(State4),
+    Dump =
+        <<
+            "   0:  01852f83            lw  t6,24(a0)\n"
+            "   4:  4f0d                li  t5,3\n"
+            "   6:  ffff4f13            not t5,t5\n"
+            "   a:  01efffb3            and t6,t6,t5\n"
+            "   e:  01c52f03            lw  t5,28(a0)\n"
+            "  12:  0b800e93            li  t4,184\n"
+            "  16:  9eb2                add t4,t4,a2\n"
+            "  18:  000eae83            lw  t4,0(t4)\n"
+            "  1c:  1141                addi    sp,sp,-16\n"
+            "  1e:  c006                sw  ra,0(sp)\n"
+            "  20:  c22a                sw  a0,4(sp)\n"
+            "  22:  c42e                sw  a1,8(sp)\n"
+            "  24:  c632                sw  a2,12(sp)\n"
+            "  26:  867e                mv  a2,t6\n"
+            "  28:  04000693            li  a3,64\n"
+            "  2c:  4721                li  a4,8\n"
+            "  2e:  87fa                mv  a5,t5\n"
+            "  30:  9e82                jalr    t4\n"
+            "  32:  8eaa                mv  t4,a0\n"
+            "  34:  4082                lw  ra,0(sp)\n"
+            "  36:  4512                lw  a0,4(sp)\n"
+            "  38:  45a2                lw  a1,8(sp)\n"
+            "  3a:  4632                lw  a2,12(sp)\n"
+            "  3c:  0141                addi    sp,sp,16"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_extended_regs_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, RegA} = ?BACKEND:call_primitive(State0, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]),
+    {State2, RegB} = ?BACKEND:call_primitive(State1, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 20]),
+    {State3, RegC} = ?BACKEND:call_primitive(State2, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]),
+    {State4, ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_PUT_LIST, [
+        ctx, {free, {ptr, RegA}}, {free, {ptr, RegB}}
+    ]),
+    State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {ptr, RegC}),
+    State6 = ?BACKEND:free_native_registers(State5, [ResultReg, {ptr, RegC}]),
+    ?BACKEND:assert_all_native_free(State6),
+    Stream = ?BACKEND:stream(State6),
+    Dump = <<
+        "   0:  04862f83            lw  t6,72(a2)\n"
+        "   4:  1141                addi    sp,sp,-16\n"
+        "   6:  c006                sw  ra,0(sp)\n"
+        "   8:  c22a                sw  a0,4(sp)\n"
+        "   a:  c42e                sw  a1,8(sp)\n"
+        "   c:  c632                sw  a2,12(sp)\n"
+        "   e:  45cd                li  a1,19\n"
+        "  10:  9f82                jalr    t6\n"
+        "  12:  8faa                mv  t6,a0\n"
+        "  14:  4082                lw  ra,0(sp)\n"
+        "  16:  4512                lw  a0,4(sp)\n"
+        "  18:  45a2                lw  a1,8(sp)\n"
+        "  1a:  4632                lw  a2,12(sp)\n"
+        "  1c:  0141                addi    sp,sp,16\n"
+        "  1e:  04862f03            lw  t5,72(a2)\n"
+        "  22:  1101                addi    sp,sp,-32\n"
+        "  24:  c006                sw  ra,0(sp)\n"
+        "  26:  c22a                sw  a0,4(sp)\n"
+        "  28:  c42e                sw  a1,8(sp)\n"
+        "  2a:  c632                sw  a2,12(sp)\n"
+        "  2c:  c87e                sw  t6,16(sp)\n"
+        "  2e:  45d1                li  a1,20\n"
+        "  30:  9f02                jalr    t5\n"
+        "  32:  8f2a                mv  t5,a0\n"
+        "  34:  4082                lw  ra,0(sp)\n"
+        "  36:  4512                lw  a0,4(sp)\n"
+        "  38:  45a2                lw  a1,8(sp)\n"
+        "  3a:  4632                lw  a2,12(sp)\n"
+        "  3c:  4fc2                lw  t6,16(sp)\n"
+        "  3e:  02010113            addi    sp,sp,32\n"
+        "  42:  04862e83            lw  t4,72(a2)\n"
+        "  46:  1101                addi    sp,sp,-32\n"
+        "  48:  c006                sw  ra,0(sp)\n"
+        "  4a:  c22a                sw  a0,4(sp)\n"
+        "  4c:  c42e                sw  a1,8(sp)\n"
+        "  4e:  c632                sw  a2,12(sp)\n"
+        "  50:  c87a                sw  t5,16(sp)\n"
+        "  52:  ca7e                sw  t6,20(sp)\n"
+        "  54:  45cd                li  a1,19\n"
+        "  56:  9e82                jalr    t4\n"
+        "  58:  8eaa                mv  t4,a0\n"
+        "  5a:  4082                lw  ra,0(sp)\n"
+        "  5c:  4512                lw  a0,4(sp)\n"
+        "  5e:  45a2                lw  a1,8(sp)\n"
+        "  60:  4632                lw  a2,12(sp)\n"
+        "  62:  4f42                lw  t5,16(sp)\n"
+        "  64:  4fd2                lw  t6,20(sp)\n"
+        "  66:  02010113            addi    sp,sp,32\n"
+        "  6a:  03462e03            lw  t3,52(a2)\n"
+        "  6e:  1101                addi    sp,sp,-32\n"
+        "  70:  c006                sw  ra,0(sp)\n"
+        "  72:  c22a                sw  a0,4(sp)\n"
+        "  74:  c42e                sw  a1,8(sp)\n"
+        "  76:  c632                sw  a2,12(sp)\n"
+        "  78:  c876                sw  t4,16(sp)\n"
+        "  7a:  000fa583            lw  a1,0(t6)\n"
+        "  7e:  000f2603            lw  a2,0(t5)\n"
+        "  82:  9e02                jalr    t3\n"
+        "  84:  8e2a                mv  t3,a0\n"
+        "  86:  4082                lw  ra,0(sp)\n"
+        "  88:  4512                lw  a0,4(sp)\n"
+        "  8a:  45a2                lw  a1,8(sp)\n"
+        "  8c:  4632                lw  a2,12(sp)\n"
+        "  8e:  4ec2                lw  t4,16(sp)\n"
+        "  90:  02010113            addi    sp,sp,32\n"
+        "  94:  01cea023            sw  t3,0(t4)"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_few_free_regs_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, t6} = ?BACKEND:move_to_native_register(State0, 1),
+    {State2, t5} = ?BACKEND:move_to_native_register(State1, 2),
+    {State3, t4} = ?BACKEND:move_to_native_register(State2, 3),
+    {State4, t3} = ?BACKEND:move_to_native_register(State3, 4),
+    {State5, t2} = ?BACKEND:move_to_native_register(State4, 5),
+    {State6, ResultReg} = ?BACKEND:call_primitive(State5, ?PRIM_BITSTRING_INSERT_INTEGER, [
+        t5, t6, {free, t3}, t4, {free, t2}
+    ]),
+    State7 = ?BACKEND:free_native_registers(State6, [ResultReg, t5, t6, t4]),
+    ?BACKEND:assert_all_native_free(State7),
+    Stream = ?BACKEND:stream(State7),
+    Dump = <<
+        "   0:  4f85                li  t6,1\n"
+        "   2:  4f09                li  t5,2\n"
+        "   4:  4e8d                li  t4,3\n"
+        "   6:  4e11                li  t3,4\n"
+        "   8:  4395                li  t2,5\n"
+        "   a:  0e400313            li  t1,228\n"
+        "   e:  9332                add t1,t1,a2\n"
+        "  10:  00032303            lw  t1,0(t1)\n"
+        "  14:  1101                addi    sp,sp,-32\n"
+        "  16:  c006                sw  ra,0(sp)\n"
+        "  18:  c22a                sw  a0,4(sp)\n"
+        "  1a:  c42e                sw  a1,8(sp)\n"
+        "  1c:  c632                sw  a2,12(sp)\n"
+        "  1e:  c876                sw  t4,16(sp)\n"
+        "  20:  ca7a                sw  t5,20(sp)\n"
+        "  22:  cc7e                sw  t6,24(sp)\n"
+        "  24:  857a                mv  a0,t5\n"
+        "  26:  85fe                mv  a1,t6\n"
+        "  28:  8672                mv  a2,t3\n"
+        "  2a:  86f6                mv  a3,t4\n"
+        "  2c:  871e                mv  a4,t2\n"
+        "  2e:  9302                jalr    t1\n"
+        "  30:  832a                mv  t1,a0\n"
+        "  32:  4082                lw  ra,0(sp)\n"
+        "  34:  4512                lw  a0,4(sp)\n"
+        "  36:  45a2                lw  a1,8(sp)\n"
+        "  38:  4632                lw  a2,12(sp)\n"
+        "  3a:  4ec2                lw  t4,16(sp)\n"
+        "  3c:  4f52                lw  t5,20(sp)\n"
+        "  3e:  4fe2                lw  t6,24(sp)\n"
+        "  40:  02010113            addi    sp,sp,32"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_ext_only_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0),
+    State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, -1]),
+    Stream = ?BACKEND:stream(State2),
+    Dump = <<
+        "   0:  0085af83            lw  t6,8(a1)\n"
+        "   4:  1ffd                addi    t6,t6,-1\n"
+        "   6:  01f5a423            sw  t6,8(a1)\n"
+        "   a:  000f9b63            bnez    t6,0x20\n"
+        "   e:  00000f97            auipc   t6,0x0\n"
+        "  12:  0fc9                addi    t6,t6,18 # 0x20\n"
+        "  14:  0001                nop\n"
+        "  16:  01f5a223            sw  t6,4(a1)\n"
+        "  1a:  00862f83            lw  t6,8(a2)\n"
+        "  1e:  8f82                jr  t6\n"
+        "  20:  01062f83            lw  t6,16(a2)\n"
+        "  24:  02400613            li  a2,36\n"
+        "  28:  4689                li  a3,2\n"
+        "  2a:  4709                li  a4,2\n"
+        "  2c:  57fd                li  a5,-1\n"
+        "  2e:  8f82                jr  t6"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_last_5_args_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [
+        ctx, jit_state, offset, ?CASE_CLAUSE_ATOM, {free, RegA}
+    ]),
+    Stream = ?BACKEND:stream(State2),
+    Dump = <<
+        "   0: 01852f83            lw  t6,24(a0)\n"
+        "   4: 04c62f03            lw  t5,76(a2)\n"
+        "   8: 4621                li  a2,8\n"
+        "   a: 2cb00693            li  a3,715\n"
+        "   e: 877e                mv  a4,t6\n"
+        "  10: 8f02                jr  t5"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_ext_last_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0),
+    State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, 10]),
+    Stream = ?BACKEND:stream(State2),
+    Dump = <<
+        "   0: 0085af83            lw  t6,8(a1)\n"
+        "   4: 1ffd                    addi    t6,t6,-1\n"
+        "   6: 01f5a423            sw  t6,8(a1)\n"
+        "   a: 000f9b63            bnez    t6,0x20\n"
+        "   e: 00000f97            auipc   t6,0x0\n"
+        "   12:    0fc9                    addi    t6,t6,18 # 0x20\n"
+        "   14:    0001                    nop\n"
+        "   16:    01f5a223            sw  t6,4(a1)\n"
+        "   1a:    00862f83            lw  t6,8(a2)\n"
+        "   1e:    8f82                    jr  t6\n"
+        "   20:    01062f83            lw  t6,16(a2)\n"
+        "   24:    02400613            li  a2,36\n"
+        "   28:    4689                    li  a3,2\n"
+        "   2a:    4709                    li  a4,2\n"
+        "   2c:    47a9                    li  a5,10\n"
+        "   2e:    8f82                    jr  t6"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_last_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:call_primitive_last(State0, 0, [ctx, jit_state, 42]),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0: 00062f83            lw  t6,0(a2)\n"
+            "   4: 02a00613            li  a2,42\n"
+            "   8: 8f82                    jr  t6"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+return_if_not_equal_to_ctx_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                ?_test(begin
+                    {State1, ResultReg} = ?BACKEND:call_primitive(
+                        State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [
+                            ctx, jit_state
+                        ]
+                    ),
+                    ?assertEqual(t6, ResultReg),
+                    State2 = ?BACKEND:return_if_not_equal_to_ctx(State1, {free, ResultReg}),
+                    Stream = ?BACKEND:stream(State2),
+                    Dump =
+                        <<
+                            "   0:  05462f83            lw  t6,84(a2)\n"
+                            "   4:  1141                addi    sp,sp,-16\n"
+                            "   6:  c006                sw  ra,0(sp)\n"
+                            "   8:  c22a                sw  a0,4(sp)\n"
+                            "   a:  c42e                sw  a1,8(sp)\n"
+                            "   c:  c632                sw  a2,12(sp)\n"
+                            "   e:  9f82                jalr    t6\n"
+                            "  10:  8faa                mv  t6,a0\n"
+                            "  12:  4082                lw  ra,0(sp)\n"
+                            "  14:  4512                lw  a0,4(sp)\n"
+                            "  16:  45a2                lw  a1,8(sp)\n"
+                            "  18:  4632                lw  a2,12(sp)\n"
+                            "  1a:  0141                addi    sp,sp,16\n"
+                            "  1c:  00af8463            beq t6,a0,0x24\n"
+                            "  20:  857e                mv  a0,t6\n"
+                            "  22:  8082                ret"
+                        >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                ?_test(begin
+                    {State1, ResultReg} = ?BACKEND:call_primitive(
+                        State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [
+                            ctx, jit_state
+                        ]
+                    ),
+                    ?assertEqual(t6, ResultReg),
+                    {State2, OtherReg} = ?BACKEND:copy_to_native_register(State1, ResultReg),
+                    ?assertEqual(t5, OtherReg),
+                    State3 = ?BACKEND:return_if_not_equal_to_ctx(State2, {free, OtherReg}),
+                    Stream = ?BACKEND:stream(State3),
+                    Dump =
+                        <<
+                            "   0:  05462f83            lw  t6,84(a2)\n"
+                            "   4:  1141                addi    sp,sp,-16\n"
+                            "   6:  c006                sw  ra,0(sp)\n"
+                            "   8:  c22a                sw  a0,4(sp)\n"
+                            "   a:  c42e                sw  a1,8(sp)\n"
+                            "   c:  c632                sw  a2,12(sp)\n"
+                            "   e:  9f82                jalr    t6\n"
+                            "  10:  8faa                mv  t6,a0\n"
+                            "  12:  4082                lw  ra,0(sp)\n"
+                            "  14:  4512                lw  a0,4(sp)\n"
+                            "  16:  45a2                lw  a1,8(sp)\n"
+                            "  18:  4632                lw  a2,12(sp)\n"
+                            "  1a:  0141                addi    sp,sp,16\n"
+                            "  1c:  8f7e                mv  t5,t6\n"
+                            "  1e:  00af0463            beq t5,a0,0x26\n"
+                            "  22:  857a                mv  a0,t5\n"
+                            "  24:  8082                ret"
+                        >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end)
+            ]
+        end}.
+
+move_to_cp_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:move_to_cp(State0, {y_reg, 0}),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:  01452f03            lw  t5,20(a0)\n"
+            "   4:  000f2f83            lw  t6,0(t5)\n"
+            "   8:  05f52e23            sw  t6,92(a0)"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+increment_sp_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:increment_sp(State0, 7),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0: 01452f83            lw  t6,20(a0)\n"
+            "   4: 0ff1                addi    t6,t6,28\n"
+            "   6: 01f52a23            sw  t6,20(a0)"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+if_block_test_() ->
+    {setup,
+        fun() ->
+            State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+            {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+            {State2, RegB} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+            {State2, RegA, RegB}
+        end,
+        fun({State0, RegA, RegB}) ->
+            [
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '<', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  000fd363            bgez    t6,0xe\n"
+                        "   c:  0f09                addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '<', RegB},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  01efd363            bge t6,t5,0xe\n"
+                        "   c:  0f09                addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '<', 42},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  02a00e93            li  t4,42\n"
+                        "   c:  01dfd363            bge t6,t4,0x12\n"
+                        "  10:  0f09                addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '<', 1024},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+                    Stream = ?BACKEND:stream(State2),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  40000e93            li  t4,1024\n"
+                        "   c:  01dfd363            bge t6,t4,0x12\n"
+                        "  10:  0f09                addi    t5,t5,2\n"
+                        "  12:  a0fd                j   0x100"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '==', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  000f9363            bnez    t6,0xe\n"
+                        "   c:  0f09                addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '==', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  000f9363            bnez    t6,0xe\n"
+                        "   c:  0f09                addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '==', -1},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  5efd                li  t4,-1\n"
+                        "   a:  01df9363            bne t6,t4,0x10\n"
+                        "   e:  0f09                addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(int)', RegA, '==', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  000f9363            bnez    t6,0xe\n"
+                        "   c:  0f09                addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(int)', {free, RegA}, '==', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  000f9363            bnez    t6,0xe\n"
+                        "   c:  0f09                addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '!=', ?TERM_NIL},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  03b00e93            li  t4,59\n"
+                        "   c:  01df8363            beq t6,t4,0x12\n"
+                        "  10:  0f09                addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '!=', ?TERM_NIL},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   03b00e93            li  t4,59\n"
+                        "      c:   01df8363            beq t6,t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(int)', RegA, '!=', 42},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   02a00e93            li  t4,42\n"
+                        "      c:   01df8363            beq t6,t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    % Test large immediate (1995) that requires temporary register
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '!=', 1995},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 1)
+                        end
+                    ),
+                    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+                    Stream = ?BACKEND:stream(State2),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   7cb00e93            li  t4,1995\n"
+                        "      c:   01df8363            beq t6,t4,0x12\n"
+                        "     10:   0f05                    addi    t5,t5,1\n"
+                        "     12:   a0fd                    j   0x100"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(int)', {free, RegA}, '!=', 42},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   02a00e93            li  t4,42\n"
+                        "      c:   01df8363            beq t6,t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '==', ?TERM_NIL},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   03b00e93            li  t4,59\n"
+                        "      c:   01df9363            bne t6,t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '==', ?TERM_NIL},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   03b00e93            li  t4,59\n"
+                        "      c:   01df9363            bne t6,t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(int)', RegA, '==', 42},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   02a00e93            li  t4,42\n"
+                        "      c:   01df9363            bne t6,t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(int)', {free, RegA}, '==', 42},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   02a00e93            li  t4,42\n"
+                        "      c:   01df9363            bne t6,t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(bool)', RegA, '==', false},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   01ff9e93            slli    t4,t6,0x1f\n"
+                        "      c:   000ec363            bltz    t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(bool)', {free, RegA}, '==', false},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   01ff9e93            slli    t4,t6,0x1f\n"
+                        "      c:   000ec363            bltz    t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(bool)', RegA, '!=', false},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   01ff9e93            slli    t4,t6,0x1f\n"
+                        "      c:   000ed363            bgez    t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(bool)', {free, RegA}, '!=', false},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   01ff9e93            slli    t4,t6,0x1f\n"
+                        "      c:   000ed363            bgez    t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '&', 16#7, '!=', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   007ffe93            andi    t4,t6,7\n"
+                        "      c:   000e8363            beqz    t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '&', 16#5, '!=', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   005ffe93            andi    t4,t6,5\n"
+                        "      c:   000e8363            beqz    t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '&', 16#7, '!=', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  007ffe93            andi    t4,t6,7\n"
+                        "   c:  000e8363            beqz    t4,0x12\n"
+                        "  10:  0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   ffffce93            not t4,t6\n"
+                        "      c:   0ef2                    slli    t4,t4,0x1c\n"
+                        "      e:   000e8363            beqz    t4,0x14\n"
+                        "      12:  0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  ffffcf93            not t6,t6\n"
+                        "   c:  0ff2                    slli    t6,t6,0x1c\n"
+                        "   e:  000f8363            beqz    t6,0x14\n"
+                        "  12:  0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  8efe                    mv  t4,t6\n"
+                        "   a:  03f00e13            li  t3,63\n"
+                        "   e:  01cefeb3            and t4,t4,t3\n"
+                        "  12:  4e21                    li  t3,8\n"
+                        "  14:  01ce8363            beq t4,t3,0x1a\n"
+                        "  18:  0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '<', RegB},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  01efd363            bge t6,t5,0xe\n"
+                        "   c:  0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {
+                            {free, RegA},
+                            '&',
+                            ?TERM_BOXED_TAG_MASK,
+                            '!=',
+                            ?TERM_BOXED_POSITIVE_INTEGER
+                        },
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   03f00e93            li  t4,63\n"
+                        "      c:   01dfffb3            and t6,t6,t4\n"
+                        "      10:  4ea1                    li  t4,8\n"
+                        "      12:  01df8363            beq t6,t4,0x18\n"
+                        "      16:  0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                %% Test {RegA, '&', 16#3, '!=', 0} using ANDI instruction
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '&', 16#3, '!=', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   003ffe93            andi    t4,t6,3\n"
+                        "      c:   000e8363            beqz    t4,0x12\n"
+                        "      10:  0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {100, '<', RegA},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	01852f83          	lw	t6,24(a0)\n"
+                        "   4:	01c52f03          	lw	t5,28(a0)\n"
+                        "   8:	06400e93          	li	t4,100\n"
+                        "   c:	01fed363          	bge	t4,t6,0x12\n"
+                        "  10:	0f09                	addi	t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {100, '<', {free, RegA}},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	01852f83          	lw	t6,24(a0)\n"
+                        "   4:	01c52f03          	lw	t5,28(a0)\n"
+                        "   8:	06400e93          	li	t4,100\n"
+                        "   c:	01fed363          	bge	t4,t6,0x12\n"
+                        "  10:	0f09                	addi	t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {1024, '<', RegA},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	01852f83          	lw	t6,24(a0)\n"
+                        "   4:	01c52f03          	lw	t5,28(a0)\n"
+                        "   8:	40000e93          	li	t4,1024\n"
+                        "   c:	01fed363          	bge	t4,t6,0x12\n"
+                        "  10:	0f09                	addi	t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {1024, '<', {free, RegA}},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	01852f83          	lw	t6,24(a0)\n"
+                        "   4:	01c52f03          	lw	t5,28(a0)\n"
+                        "   8:	40000e93          	li	t4,1024\n"
+                        "   c:	01fed363          	bge	t4,t6,0x12\n"
+                        "  10:	0f09                	addi	t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end)
+            ]
+        end}.
+
+if_else_block_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+    State3 = ?BACKEND:if_else_block(
+        State2,
+        {Reg1, '==', ?TERM_NIL},
+        fun(BSt0) ->
+            ?BACKEND:add(BSt0, Reg2, 2)
+        end,
+        fun(BSt0) ->
+            ?BACKEND:add(BSt0, Reg2, 4)
+        end
+    ),
+    Stream = ?BACKEND:stream(State3),
+    Dump =
+        <<
+            "0: 01852f83            lw  t6,24(a0)\n"
+            "4: 01c52f03            lw  t5,28(a0)\n"
+            "8: 03b00e93            li  t4,59\n"
+            "c: 01df9463            bne t6,t4,0x14\n"
+            "10:    0f09                    addi    t5,t5,2\n"
+            "12:    a011                    j   0x16\n"
+            "14:    0f11                    addi    t5,t5,4"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+shift_right_test_() ->
+    [
+        ?_test(begin
+            State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+            {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+            {State2, Reg} = ?BACKEND:shift_right(State1, {free, Reg}, 3),
+            Stream = ?BACKEND:stream(State2),
+            Dump =
+                <<
+                    "   0:  01852f83            lw  t6,24(a0)\n"
+                    "   4:  003fdf93            srli    t6,t6,0x3"
+                >>,
+            ?assertEqual(dump_to_bin(Dump), Stream)
+        end),
+        ?_test(begin
+            State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+            {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+            {State2, OtherReg} = ?BACKEND:shift_right(State1, Reg, 3),
+            ?assertNotEqual(OtherReg, Reg),
+            Stream = ?BACKEND:stream(State2),
+            Dump =
+                <<
+                    "   0:  01852f83            lw  t6,24(a0)\n"
+                    "   4:  003fdf13            srli    t5,t6,0x3"
+                >>,
+            ?assertEqual(dump_to_bin(Dump), Stream)
+        end)
+    ].
+
+shift_left_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    State2 = ?BACKEND:shift_left(State1, Reg, 3),
+    Stream = ?BACKEND:stream(State2),
+    Dump =
+        <<
+            "0: 01852f83            lw  t6,24(a0)\n"
+            "4: 0f8e                    slli    t6,t6,0x3"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_only_or_schedule_next_and_label_relocation_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 2),
+    State2 = ?BACKEND:add_label(State1, 1),
+    State3 = ?BACKEND:call_only_or_schedule_next(State2, 2),
+    State4 = ?BACKEND:add_label(State3, 2),
+    State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]),
+    % OP_INT_CALL_END
+    State6 = ?BACKEND:add_label(State5, 0),
+    State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]),
+    State8 = ?BACKEND:update_branches(State7),
+    Stream = ?BACKEND:stream(State8),
+    Dump =
+        <<
+            "   0:  00000697            auipc   a3,0x0\n"
+            "   4:  04668067            jr  70(a3) # 0x46\n"
+            "   8:  00000697            auipc   a3,0x0\n"
+            "   c:  01068067            jr  16(a3) # 0x18\n"
+            "  10:  00000697            auipc   a3,0x0\n"
+            "  14:  03068067            jr  48(a3) # 0x40\n"
+            "  18:  0085af83            lw  t6,8(a1)\n"
+            "  1c:  1ffd                    addi    t6,t6,-1\n"
+            "  1e:  01f5a423            sw  t6,8(a1)\n"
+            "  22:  000f8663            beqz    t6,0x2e\n"
+            "  26:  a829                    j   0x40\n"
+            "  28:  0001                    nop\n"
+            "  2a:  00000013            nop\n"
+            "  2e:  00000f97            auipc   t6,0x0\n"
+            "  32:  0fc9                    addi    t6,t6,18 # 0x40\n"
+            "  34:  0001                    nop\n"
+            "  36:  01f5a223            sw  t6,4(a1)\n"
+            "  3a:  00862f83            lw  t6,8(a2)\n"
+            "  3e:  8f82                    jr  t6\n"
+            "  40:  00062f83            lw  t6,0(a2)\n"
+            "  44:  8f82                    jr  t6\n"
+            "  46:  00462f83            lw  t6,4(a2)\n"
+            "  4a:  8f82                    jr  t6"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_only_or_schedule_next_known_label_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 2),
+    State2 = ?BACKEND:add_label(State1, 1),
+    State3 = ?BACKEND:add_label(State2, 2, 16#36),
+    State4 = ?BACKEND:call_only_or_schedule_next(State3, 2),
+    State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]),
+    % OP_INT_CALL_END
+    State6 = ?BACKEND:add_label(State5, 0),
+    State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]),
+    State8 = ?BACKEND:update_branches(State7),
+    Stream = ?BACKEND:stream(State8),
+    Dump =
+        <<
+            "   0:  00000697            auipc   a3,0x0\n"
+            "   4:  03c68067            jr  60(a3) # 0x3c\n"
+            "   8:  00000697            auipc   a3,0x0\n"
+            "   c:  01068067            jr  16(a3) # 0x18\n"
+            "  10:  00000697            auipc   a3,0x0\n"
+            "  14:  02668067            jr  38(a3) # 0x36\n"
+            "  18:  0085af83            lw  t6,8(a1)\n"
+            "  1c:  1ffd                    addi    t6,t6,-1\n"
+            "  1e:  01f5a423            sw  t6,8(a1)\n"
+            "  22:  000f9a63            bnez    t6,0x36\n"
+            "  26:  00000f97            auipc   t6,0x0\n"
+            "  2a:  0fc1                    addi    t6,t6,16 # 0x36\n"
+            "  2c:  01f5a223            sw  t6,4(a1)\n"
+            "  30:  00862f83            lw  t6,8(a2)\n"
+            "  34:  8f82                    jr  t6\n"
+            "  36:  00062f83            lw  t6,0(a2)\n"
+            "  3a:  8f82                    jr  t6\n"
+            "  3c:  00462f83            lw  t6,4(a2)\n"
+            "  40:  8f82                    jr  t6"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test with large gap (256+ bytes) to force mov_immediate path
+call_only_or_schedule_next_and_label_relocation_large_gap_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 2),
+    % Add large padding by emitting many move_to_native_register operations
+    % This creates a large gap between the jump table and the actual function bodies
+    % Each operation emits ~2 bytes, so 128 operations = ~256 bytes
+    StatePadded = lists:foldl(
+        fun(_, S) ->
+            ?BACKEND:move_to_native_register(S, {x_reg, 2}, a3)
+        end,
+        State1,
+        lists:seq(1, 128)
+    ),
+    State2 = ?BACKEND:add_label(StatePadded, 1),
+    State3 = ?BACKEND:call_only_or_schedule_next(State2, 2),
+    State4 = ?BACKEND:add_label(State3, 2),
+    State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]),
+    % OP_INT_CALL_END
+    State6 = ?BACKEND:add_label(State5, 0),
+    State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]),
+    State8 = ?BACKEND:update_branches(State7),
+    Stream = ?BACKEND:stream(State8),
+    % Extract the final section starting at 0x118 (after jump table 24 bytes + 128 loads 256 bytes)
+    % RISC-V: Jump table is 3×8=24 bytes, loads are 2 bytes each (compressed)
+    Dump = <<
+        "   0:  0085af83            lw  t6,8(a1)\n"
+        "   4:  1ffd                    addi    t6,t6,-1\n"
+        "   6:  01f5a423            sw  t6,8(a1)\n"
+        "   a:  000f8663            beqz    t6,0x16\n"
+        "   e:  a829                    j   0x28\n"
+        "  10:  0001                    nop\n"
+        "  12:  00000013            nop\n"
+        "  16:  00000f97            auipc   t6,0x0\n"
+        "  1a:  0fc9                    addi    t6,t6,18 # 0x28\n"
+        "  1c:  0001                    nop\n"
+        "  1e:  01f5a223            sw  t6,4(a1)\n"
+        "  22:  00862f83            lw  t6,8(a2)\n"
+        "  26:  8f82                    jr  t6\n"
+        "  28:  00062f83            lw  t6,0(a2)\n"
+        "  2c:  8f82                    jr  t6\n"
+        "  2e:  00462f83            lw  t6,4(a2)\n"
+        "  32:  8f82                    jr  t6"
+    >>,
+    {_, RelevantBinary} = split_binary(Stream, 16#118),
+    ?assertEqual(dump_to_bin(Dump), RelevantBinary).
+
+call_bif_with_large_literal_integer_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, FuncPtr} = ?BACKEND:call_primitive(State0, 8, [jit_state, 2]),
+    {State2, ArgReg} = ?BACKEND:call_primitive(State1, 15, [ctx, 998238357]),
+    {State3, ResultReg} = ?BACKEND:call_func_ptr(State2, {free, FuncPtr}, [
+        ctx, 0, 1, {free, {x_reg, 0}}, {free, ArgReg}
+    ]),
+    State4 = ?BACKEND:if_block(State3, {ResultReg, '==', 0}, fun(BSt0) ->
+        ?BACKEND:call_primitive_last(BSt0, ?PRIM_HANDLE_ERROR, [ctx, jit_state, offset])
+    end),
+    State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {x_reg, 0}),
+    State6 = ?BACKEND:free_native_registers(State5, [ResultReg]),
+    ?BACKEND:assert_all_native_free(State6),
+    Stream = ?BACKEND:stream(State6),
+    Dump =
+        <<
+            "      0:   02062f83            lw  t6,32(a2)\n"
+            "      4:   1141                    addi    sp,sp,-16\n"
+            "      6:   c006                    sw  ra,0(sp)\n"
+            "      8:   c22a                    sw  a0,4(sp)\n"
+            "      a:   c42e                    sw  a1,8(sp)\n"
+            "      c:   c632                    sw  a2,12(sp)\n"
+            "      e:   852e                    mv  a0,a1\n"
+            "     10:   4589                    li  a1,2\n"
+            "     12:   9f82                    jalr    t6\n"
+            "     14:   8faa                    mv  t6,a0\n"
+            "     16:   4082                    lw  ra,0(sp)\n"
+            "     18:   4512                    lw  a0,4(sp)\n"
+            "     1a:   45a2                    lw  a1,8(sp)\n"
+            "     1c:   4632                    lw  a2,12(sp)\n"
+            "     1e:   0141                    addi    sp,sp,16\n"
+            "     20:   03c62f03            lw  t5,60(a2)\n"
+            "     24:   1101                    addi    sp,sp,-32\n"
+            "     26:   c006                    sw  ra,0(sp)\n"
+            "     28:   c22a                    sw  a0,4(sp)\n"
+            "     2a:   c42e                    sw  a1,8(sp)\n"
+            "     2c:   c632                    sw  a2,12(sp)\n"
+            "     2e:   c87e                    sw  t6,16(sp)\n"
+            "     30:   3b7ff5b7            lui a1,0x3b7ff\n"
+            "     34:   89558593            addi    a1,a1,-1899 # 0x3b7fe895\n"
+            "     38:   9f02                    jalr    t5\n"
+            "     3a:   8f2a                    mv  t5,a0\n"
+            "     3c:   4082                    lw  ra,0(sp)\n"
+            "     3e:   4512                    lw  a0,4(sp)\n"
+            "     40:   45a2                    lw  a1,8(sp)\n"
+            "     42:   4632                    lw  a2,12(sp)\n"
+            "     44:   4fc2                    lw  t6,16(sp)\n"
+            "     46:   02010113            addi    sp,sp,32\n"
+            "     4a:   1141                    addi    sp,sp,-16\n"
+            "     4c:   c006                    sw  ra,0(sp)\n"
+            "     4e:   c22a                    sw  a0,4(sp)\n"
+            "     50:   c42e                    sw  a1,8(sp)\n"
+            "     52:   c632                    sw  a2,12(sp)\n"
+            "     54:   4581                    li  a1,0\n"
+            "     56:   4605                    li  a2,1\n"
+            "     58:   4d14                    lw  a3,24(a0)\n"
+            "     5a:   877a                    mv  a4,t5\n"
+            "     5c:   9f82                    jalr    t6\n"
+            "     5e:   8faa                    mv  t6,a0\n"
+            "     60:   4082                    lw  ra,0(sp)\n"
+            "     62:   4512                    lw  a0,4(sp)\n"
+            "     64:   45a2                    lw  a1,8(sp)\n"
+            "     66:   4632                    lw  a2,12(sp)\n"
+            "     68:   0141                    addi    sp,sp,16\n"
+            "     6a:   000f9763            bnez    t6,0x78\n"
+            "     6e:   01862f83            lw  t6,24(a2)\n"
+            "     72:   07200613            li  a2,114\n"
+            "     76:   8f82                    jr  t6\n"
+            "     78:   01f52c23            sw  t6,24(a0)"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+get_list_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    {State2, Reg} = ?BACKEND:and_(State1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
+    State3 = ?BACKEND:move_array_element(State2, Reg, 1, {y_reg, 1}),
+    State4 = ?BACKEND:move_array_element(State3, Reg, 0, {y_reg, 0}),
+    State5 = ?BACKEND:free_native_registers(State4, [Reg]),
+    ?BACKEND:assert_all_native_free(State5),
+    Stream = ?BACKEND:stream(State5),
+    Dump =
+        <<
+            "0: 01852f83            lw  t6,24(a0)\n"
+            "4: 4f0d                    li  t5,3\n"
+            "6: ffff4f13            not t5,t5\n"
+            "a: 01efffb3            and t6,t6,t5\n"
+            "e: 004fae83            lw  t4,4(t6)\n"
+            "12:    01452f03            lw  t5,20(a0)\n"
+            "16:    01df2223            sw  t4,4(t5)\n"
+            "1a:    000fae83            lw  t4,0(t6)\n"
+            "1e:    01452f03            lw  t5,20(a0)\n"
+            "22:    01df2023            sw  t4,0(t5)"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+is_integer_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 1),
+    Label = 1,
+    Arg1 = {x_reg, 0},
+    {State2, Reg} = ?BACKEND:move_to_native_register(State1, Arg1),
+    State3 = ?BACKEND:if_block(
+        State2, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) ->
+            MSt1 = ?BACKEND:if_block(
+                MSt0, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) ->
+                    ?BACKEND:jump_to_label(BSt0, Label)
+                end
+            ),
+            {MSt2, Reg} = ?BACKEND:and_(MSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
+            MSt3 = ?BACKEND:move_array_element(MSt2, Reg, 0, Reg),
+            ?BACKEND:if_block(
+                MSt3,
+                {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER},
+                fun(BSt0) ->
+                    ?BACKEND:jump_to_label(BSt0, Label)
+                end
+            )
+        end
+    ),
+    State4 = ?BACKEND:free_native_registers(State3, [Reg]),
+    ?BACKEND:assert_all_native_free(State4),
+    State5 = ?BACKEND:add_label(State4, Label, 16#100),
+    State6 = ?BACKEND:update_branches(State5),
+    Stream = ?BACKEND:stream(State6),
+    Dump =
+        <<
+            "   0:  ffff                .insn   2, 0xffff\n"
+            "   2:  ffff                .insn   2, 0xffff\n"
+            "   4:  ffff                .insn   2, 0xffff\n"
+            "   6:  ffff                .insn   2, 0xffff\n"
+            "   8:  00000697            auipc   a3,0x0\n"
+            "   c:  0f868067            jr  248(a3) # 0x100\n"
+            "  10:  01852f83            lw  t6,24(a0)\n"
+            "  14:  ffffcf13            not t5,t6\n"
+            "  18:  0f72                slli    t5,t5,0x1c\n"
+            "  1a:  020f0f63            beqz    t5,0x58\n"
+            "  1e:  8f7e                mv  t5,t6\n"
+            "  20:  4e8d                li  t4,3\n"
+            "  22:  01df7f33            and t5,t5,t4\n"
+            "  26:  4e89                li  t4,2\n"
+            "  28:  01df0663            beq t5,t4,0x34\n"
+            "  2c:  a8d1                j   0x100\n"
+            "  2e:  0001                nop\n"
+            "  30:  00000013            nop\n"
+            "  34:  4f0d                li  t5,3\n"
+            "  36:  ffff4f13            not t5,t5\n"
+            "  3a:  01efffb3            and t6,t6,t5\n"
+            "  3e:  000faf83            lw  t6,0(t6)\n"
+            "  42:  03f00f13            li  t5,63\n"
+            "  46:  01efffb3            and t6,t6,t5\n"
+            "  4a:  4f21                li  t5,8\n"
+            "  4c:  01ef8663            beq t6,t5,0x58\n"
+            "  50:  a845                j   0x100\n"
+            "  52:  0001                nop\n"
+            "  54:  00000013            nop"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+cond_jump_to_label(Cond, Label, MMod, MSt0) ->
+    MMod:if_block(MSt0, Cond, fun(BSt0) ->
+        MMod:jump_to_label(BSt0, Label)
+    end).
+
+%% Keep the unoptimized version to test the and case.
+is_number_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 1),
+    Label = 1,
+    Arg1 = {x_reg, 0},
+    {State2, Reg} = ?BACKEND:move_to_native_register(State1, Arg1),
+    State3 = ?BACKEND:if_block(
+        State2, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) ->
+            BSt1 = cond_jump_to_label(
+                {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0
+            ),
+            {BSt2, Reg} = ?BACKEND:and_(BSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
+            BSt3 = ?BACKEND:move_array_element(BSt2, Reg, 0, Reg),
+            cond_jump_to_label(
+                {'and', [
+                    {Reg, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER},
+                    {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FLOAT}
+                ]},
+                Label,
+                ?BACKEND,
+                BSt3
+            )
+        end
+    ),
+    State4 = ?BACKEND:free_native_registers(State3, [Reg]),
+    ?BACKEND:assert_all_native_free(State4),
+    State5 = ?BACKEND:add_label(State4, Label, 16#100),
+    State6 = ?BACKEND:update_branches(State5),
+    Stream = ?BACKEND:stream(State6),
+    Dump =
+        <<
+            "   0:  ffff                .insn   2, 0xffff\n"
+            "   2:  ffff                .insn   2, 0xffff\n"
+            "   4:  ffff                .insn   2, 0xffff\n"
+            "   6:  ffff                .insn   2, 0xffff\n"
+            "   8:  00000697            auipc   a3,0x0\n"
+            "   c:  0f868067            jr  248(a3) # 0x100\n"
+            "  10:  01852f83            lw  t6,24(a0)\n"
+            "  14:  ffffcf13            not t5,t6\n"
+            "  18:  0f72                slli    t5,t5,0x1c\n"
+            "  1a:  040f0763            beqz    t5,0x68\n"
+            "  1e:  8f7e                mv  t5,t6\n"
+            "  20:  4e8d                li  t4,3\n"
+            "  22:  01df7f33            and t5,t5,t4\n"
+            "  26:  4e89                li  t4,2\n"
+            "  28:  01df0663            beq t5,t4,0x34\n"
+            "  2c:  a8d1                j   0x100\n"
+            "  2e:  0001                nop\n"
+            "  30:  00000013            nop\n"
+            "  34:  4f0d                li  t5,3\n"
+            "  36:  ffff4f13            not t5,t5\n"
+            "  3a:  01efffb3            and t6,t6,t5\n"
+            "  3e:  000faf83            lw  t6,0(t6)\n"
+            "  42:  8f7e                mv  t5,t6\n"
+            "  44:  03f00e93            li  t4,63\n"
+            "  48:  01df7f33            and t5,t5,t4\n"
+            "  4c:  4ea1                li  t4,8\n"
+            "  4e:  01df0d63            beq t5,t4,0x68\n"
+            "  52:  03f00f13            li  t5,63\n"
+            "  56:  01efffb3            and t6,t6,t5\n"
+            "  5a:  4f61                li  t5,24\n"
+            "  5c:  01ef8663            beq t6,t5,0x68\n"
+            "  60:  a045                j   0x100\n"
+            "  62:  0001                nop\n"
+            "  64:  00000013            nop"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+is_boolean_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 1),
+    Label = 1,
+    {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}),
+    State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) ->
+        ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) ->
+            ?BACKEND:jump_to_label(BSt1, Label)
+        end)
+    end),
+    State4 = ?BACKEND:free_native_registers(State3, [Reg]),
+    ?BACKEND:assert_all_native_free(State4),
+    State5 = ?BACKEND:add_label(State4, Label, 16#100),
+    State6 = ?BACKEND:update_branches(State5),
+    Stream = ?BACKEND:stream(State6),
+    Dump = <<
+        "   0:  ffff                .insn   2, 0xffff\n"
+        "   2:  ffff                .insn   2, 0xffff\n"
+        "   4:  ffff                .insn   2, 0xffff\n"
+        "   6:  ffff                .insn   2, 0xffff\n"
+        "   8:  00000697            auipc   a3,0x0\n"
+        "   c:  0f868067            jr  248(a3) # 0x100\n"
+        "  10:  01852f83            lw  t6,24(a0)\n"
+        "  14:  04b00f13            li  t5,75\n"
+        "  18:  01ef8963            beq t6,t5,0x2a\n"
+        "  1c:  4f2d                li  t5,11\n"
+        "  1e:  01ef8663            beq t6,t5,0x2a\n"
+        "  22:  a8f9                j   0x100\n"
+        "  24:  0001                nop\n"
+        "  26:  00000013            nop"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+is_boolean_far_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    Label = 1,
+    State1 = ?BACKEND:jump_table(State0, 1),
+    {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}),
+    State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) ->
+        ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) ->
+            ?BACKEND:jump_to_label(BSt1, Label)
+        end)
+    end),
+    State4 = ?BACKEND:free_native_registers(State3, [Reg]),
+    ?BACKEND:assert_all_native_free(State4),
+    State5 = ?BACKEND:add_label(State4, Label, 16#1000),
+    State6 = ?BACKEND:update_branches(State5),
+    Stream = ?BACKEND:stream(State6),
+    Dump =
+        <<
+            "   0:  ffff                .insn   2, 0xffff\n"
+            "   2:  ffff                .insn   2, 0xffff\n"
+            "   4:  ffff                .insn   2, 0xffff\n"
+            "   6:  ffff                .insn   2, 0xffff\n"
+            "   8:  00001697            auipc   a3,0x1\n"
+            "   c:  ff868067            jr  -8(a3) # 0x1000\n"
+            "  10:  01852f83            lw  t6,24(a0)\n"
+            "  14:  04b00f13            li  t5,75\n"
+            "  18:  01ef8963            beq t6,t5,0x2a\n"
+            "  1c:  4f2d                li  t5,11\n"
+            "  1e:  01ef8663            beq t6,t5,0x2a\n"
+            "  22:  7df0006f            j   0x1000\n"
+            "  26:  00000013            nop"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+is_boolean_far_known_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 1),
+    Label = 1,
+    State2 = ?BACKEND:add_label(State1, Label, 16#1000),
+    {State3, Reg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}),
+    State4 = ?BACKEND:if_block(State3, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) ->
+        ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) ->
+            ?BACKEND:jump_to_label(BSt1, Label)
+        end)
+    end),
+    State5 = ?BACKEND:free_native_registers(State4, [Reg]),
+    ?BACKEND:assert_all_native_free(State5),
+    State6 = ?BACKEND:update_branches(State5),
+    Stream = ?BACKEND:stream(State6),
+    Dump =
+        <<
+            "   0:  ffff                .insn   2, 0xffff\n"
+            "   2:  ffff                .insn   2, 0xffff\n"
+            "   4:  ffff                .insn   2, 0xffff\n"
+            "   6:  ffff                .insn   2, 0xffff\n"
+            "   8:  00001697            auipc   a3,0x1\n"
+            "   c:  ff868067            jr  -8(a3) # 0x1000\n"
+            "  10:  01852f83            lw  t6,24(a0)\n"
+            "  14:  04b00f13            li  t5,75\n"
+            "  18:  01ef8963            beq t6,t5,0x2a\n"
+            "  1c:  4f2d                li  t5,11\n"
+            "  1e:  01ef8663            beq t6,t5,0x2a\n"
+            "  22:  00001f17            auipc   t5,0x1\n"
+            "  26:  fdef0067            jr  -34(t5) # 0x1000"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test OP_WAIT_TIMEOUT pattern that uses set_continuation_to_offset and continuation_entry_point
+wait_timeout_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+
+    Label = 42,
+    {State1, OffsetRef0} = ?BACKEND:set_continuation_to_offset(State0),
+    {State2, TimeoutReg} = ?BACKEND:move_to_native_register(State1, 5000),
+    State3 = ?BACKEND:call_primitive_last(State2, ?PRIM_WAIT_TIMEOUT, [
+        ctx, jit_state, {free, TimeoutReg}, Label
+    ]),
+    State4 = ?BACKEND:add_label(State3, OffsetRef0),
+    State5 = ?BACKEND:continuation_entry_point(State4),
+    {State6, ResultReg0} = ?BACKEND:call_primitive(State5, ?PRIM_PROCESS_SIGNAL_MESSAGES, [
+        ctx, jit_state
+    ]),
+    State7 = ?BACKEND:return_if_not_equal_to_ctx(State6, {free, ResultReg0}),
+    % ?WAITING_TIMEOUT_EXPIRED
+    {State8, ResultReg1} = ?BACKEND:call_primitive(State7, ?PRIM_CONTEXT_GET_FLAGS, [ctx, 2]),
+    State9 = ?BACKEND:if_block(State8, {{free, ResultReg1}, '==', 0}, fun(BlockSt) ->
+        ?BACKEND:call_primitive_last(BlockSt, ?PRIM_WAIT_TIMEOUT_TRAP_HANDLER, [
+            ctx, jit_state, Label
+        ])
+    end),
+    State10 = ?BACKEND:update_branches(State9),
+
+    Stream = ?BACKEND:stream(State10),
+    Dump =
+        <<
+            "   0:  00000f97            auipc   t6,0x0\n"
+            "   4:  0ff9                    addi    t6,t6,30 # 0x1e\n"
+            "   6:  0001                    nop\n"
+            "   8:  01f5a223            sw  t6,4(a1)\n"
+            "   c:  6f85                    lui t6,0x1\n"
+            "   e:  388f8f93            addi    t6,t6,904 # 0x1388\n"
+            "  12:  07862f03            lw  t5,120(a2)\n"
+            "  16:  867e                    mv  a2,t6\n"
+            "  18:  02a00693            li  a3,42\n"
+            "  1c:  8f02                    jr  t5\n"
+            "  1e:  05462f83            lw  t6,84(a2)\n"
+            "  22:  1141                    addi    sp,sp,-16\n"
+            "  24:  c006                    sw  ra,0(sp)\n"
+            "  26:  c22a                    sw  a0,4(sp)\n"
+            "  28:  c42e                    sw  a1,8(sp)\n"
+            "  2a:  c632                    sw  a2,12(sp)\n"
+            "  2c:  9f82                    jalr    t6\n"
+            "  2e:  8faa                    mv  t6,a0\n"
+            "  30:  4082                    lw  ra,0(sp)\n"
+            "  32:  4512                    lw  a0,4(sp)\n"
+            "  34:  45a2                    lw  a1,8(sp)\n"
+            "  36:  4632                    lw  a2,12(sp)\n"
+            "  38:  0141                    addi    sp,sp,16\n"
+            "  3a:  00af8463            beq t6,a0,0x42\n"
+            "  3e:  857e                    mv  a0,t6\n"
+            "  40:  8082                    ret\n"
+            "  42:  08400f93            li  t6,132\n"
+            "  46:  9fb2                    add t6,t6,a2\n"
+            "  48:  000faf83            lw  t6,0(t6)\n"
+            "  4c:  1141                    addi    sp,sp,-16\n"
+            "  4e:  c006                    sw  ra,0(sp)\n"
+            "  50:  c22a                    sw  a0,4(sp)\n"
+            "  52:  c42e                    sw  a1,8(sp)\n"
+            "  54:  c632                    sw  a2,12(sp)\n"
+            "  56:  4589                    li  a1,2\n"
+            "  58:  9f82                    jalr    t6\n"
+            "  5a:  8faa                    mv  t6,a0\n"
+            "  5c:  4082                    lw  ra,0(sp)\n"
+            "  5e:  4512                    lw  a0,4(sp)\n"
+            "  60:  45a2                    lw  a1,8(sp)\n"
+            "  62:  4632                    lw  a2,12(sp)\n"
+            "  64:  0141                    addi    sp,sp,16\n"
+            "  66:  000f9763            bnez    t6,0x74\n"
+            "  6a:  07c62f83            lw  t6,124(a2)\n"
+            "  6e:  02a00613            li  a2,42\n"
+            "  72:  8f82                    jr  t6"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test OP_WAIT pattern that uses set_continuation_to_label
+wait_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+
+    State1 = ?BACKEND:jump_table(State0, 5),
+    State2 = ?BACKEND:add_label(State1, 1),
+    Label = 2,
+    State3 = ?BACKEND:set_continuation_to_label(State2, Label),
+    State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]),
+    State5 = ?BACKEND:add_label(State4, Label, 16#100),
+    State6 = ?BACKEND:update_branches(State5),
+
+    Stream = ?BACKEND:stream(State6),
+    Dump =
+        <<
+            "   0:  ffff                .insn   2, 0xffff\n"
+            "   2:  ffff                .insn   2, 0xffff\n"
+            "   4:  ffff                .insn   2, 0xffff\n"
+            "   6:  ffff                .insn   2, 0xffff\n"
+            "   8:  00000697            auipc   a3,0x0\n"
+            "   c:  02868067            jr  40(a3) # 0x30\n"
+            "  10:  00000697            auipc   a3,0x0\n"
+            "  14:  0f068067            jr  240(a3) # 0x100\n"
+            "  18:  ffff                .insn   2, 0xffff\n"
+            "  1a:  ffff                .insn   2, 0xffff\n"
+            "  1c:  ffff                .insn   2, 0xffff\n"
+            "  1e:  ffff                .insn   2, 0xffff\n"
+            "  20:  ffff                .insn   2, 0xffff\n"
+            "  22:  ffff                .insn   2, 0xffff\n"
+            "  24:  ffff                .insn   2, 0xffff\n"
+            "  26:  ffff                .insn   2, 0xffff\n"
+            "  28:  ffff                .insn   2, 0xffff\n"
+            "  2a:  ffff                .insn   2, 0xffff\n"
+            "  2c:  ffff                .insn   2, 0xffff\n"
+            "  2e:  ffff                .insn   2, 0xffff\n"
+            "  30:  00000f97            auipc   t6,0x0\n"
+            "  34:  0d0f8f93            addi    t6,t6,208 # 0x100\n"
+            "  38:  01f5a223            sw  t6,4(a1)\n"
+            "  3c:  07462f83            lw  t6,116(a2)\n"
+            "  40:  8f82                jr  t6"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test set_continuation_to_label with known label
+wait_known_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+
+    State1 = ?BACKEND:jump_table(State0, 5),
+    State2 = ?BACKEND:add_label(State1, 1),
+    Label = 2,
+    State3 = ?BACKEND:add_label(State2, Label, 16#100),
+    State4 = ?BACKEND:set_continuation_to_label(State3, Label),
+    State5 = ?BACKEND:call_primitive_last(State4, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]),
+    State6 = ?BACKEND:update_branches(State5),
+
+    Stream = ?BACKEND:stream(State6),
+    Dump =
+        <<
+            "   0:  ffff                .insn   2, 0xffff\n"
+            "   2:  ffff                .insn   2, 0xffff\n"
+            "   4:  ffff                .insn   2, 0xffff\n"
+            "   6:  ffff                .insn   2, 0xffff\n"
+            "   8:  00000697            auipc   a3,0x0\n"
+            "   c:  02868067            jr  40(a3) # 0x30\n"
+            "  10:  00000697            auipc   a3,0x0\n"
+            "  14:  0f068067            jr  240(a3) # 0x100\n"
+            "  18:  ffff                .insn   2, 0xffff\n"
+            "  1a:  ffff                .insn   2, 0xffff\n"
+            "  1c:  ffff                .insn   2, 0xffff\n"
+            "  1e:  ffff                .insn   2, 0xffff\n"
+            "  20:  ffff                .insn   2, 0xffff\n"
+            "  22:  ffff                .insn   2, 0xffff\n"
+            "  24:  ffff                .insn   2, 0xffff\n"
+            "  26:  ffff                .insn   2, 0xffff\n"
+            "  28:  ffff                .insn   2, 0xffff\n"
+            "  2a:  ffff                .insn   2, 0xffff\n"
+            "  2c:  ffff                .insn   2, 0xffff\n"
+            "  2e:  ffff                .insn   2, 0xffff\n"
+            "  30:  00000f97            auipc   t6,0x0\n"
+            "  34:  0d0f8f93            addi    t6,t6,208 # 0x100\n"
+            "  38:  01f5a223            sw  t6,4(a1)\n"
+            "  3c:  07462f83            lw  t6,116(a2)\n"
+            "  40:  8f82                jr  t6"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test return_labels_and_lines/2 function
+return_labels_and_lines_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 2),
+
+    % Test return_labels_and_lines with some sample labels and lines
+    State2 = ?BACKEND:add_label(State1, 2, 32),
+    State3 = ?BACKEND:add_label(State2, 1, 16),
+
+    % {Line, Offset} pairs
+    SortedLines = [{10, 16}, {20, 32}],
+
+    State4 = ?BACKEND:return_labels_and_lines(State3, SortedLines),
+    Stream = ?BACKEND:stream(State4),
+
+    % Should have jump table + generated code with label/line tables
+    ?assert(byte_size(Stream) >= 32),
+
+    % Expected: jump table (3 entries, 24 bytes) + auipc + addi + ret + padding + labels table + lines table
+    Dump =
+        <<
+            "   0:  ffff                .insn   2, 0xffff\n"
+            "   2:  ffff                .insn   2, 0xffff\n"
+            "   4:  ffff                .insn   2, 0xffff\n"
+            "   6:  ffff                .insn   2, 0xffff\n"
+            "   8:  00000697            auipc   a3,0x0\n"
+            "   c:  00868067            jr  8(a3) # 0x10\n"
+            "  10:  00000697            auipc   a3,0x0\n"
+            "  14:  01068067            jr  16(a3) # 0x20\n"
+            "  18:  00000517            auipc   a0,0x0\n"
+            "  1c:  0529                addi    a0,a0,10 # 0x22\n"
+            "  1e:  8082                ret\n"
+            "  20:  ffff                .insn   2, 0xffff\n"
+            "  22:  0200                addi    s0,sp,256\n"
+            "  24:  0100                addi    s0,sp,128\n"
+            "  26:  0000                unimp\n"
+            "  28:  1000                addi    s0,sp,32\n"
+            "  2a:  0200                addi    s0,sp,256\n"
+            "  2c:  0000                unimp\n"
+            "  2e:  2000                fld fs0,0(s0)\n"
+            "  30:  0200                addi    s0,sp,256\n"
+            "  32:  0a00                addi    s0,sp,272\n"
+            "  34:  0000                unimp\n"
+            "  36:  1000                addi    s0,sp,32\n"
+            "  38:  1400                addi    s0,sp,544\n"
+            "  3a:  0000                unimp\n"
+            "  3c:  2000                fld fs0,0(s0)"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test call_primitive with {free, {x_reg, X}}
+gc_bif2_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_GET_IMPORTED_BIF, [jit_state, 42]),
+    {State2, _ResultReg} = ?BACKEND:call_func_ptr(State1, {free, FuncPtr}, [
+        ctx, 0, 3, {y_reg, 0}, {free, {x_reg, 0}}
+    ]),
+
+    Stream = ?BACKEND:stream(State2),
+    Dump =
+        <<
+            "   0:  02062f83            lw  t6,32(a2)\n"
+            "   4:  1141                    addi    sp,sp,-16\n"
+            "   6:  c006                    sw  ra,0(sp)\n"
+            "   8:  c22a                    sw  a0,4(sp)\n"
+            "   a:  c42e                    sw  a1,8(sp)\n"
+            "   c:  c632                    sw  a2,12(sp)\n"
+            "   e:  852e                    mv  a0,a1\n"
+            "  10:  02a00593            li  a1,42\n"
+            "  14:  9f82                    jalr    t6\n"
+            "  16:  8faa                    mv  t6,a0\n"
+            "  18:  4082                    lw  ra,0(sp)\n"
+            "  1a:  4512                    lw  a0,4(sp)\n"
+            "  1c:  45a2                    lw  a1,8(sp)\n"
+            "  1e:  4632                    lw  a2,12(sp)\n"
+            "  20:  0141                    addi    sp,sp,16\n"
+            "  22:  1141                    addi    sp,sp,-16\n"
+            "  24:  c006                    sw  ra,0(sp)\n"
+            "  26:  c22a                    sw  a0,4(sp)\n"
+            "  28:  c42e                    sw  a1,8(sp)\n"
+            "  2a:  c632                    sw  a2,12(sp)\n"
+            "  2c:  4581                    li  a1,0\n"
+            "  2e:  460d                    li  a2,3\n"
+            "  30:  01452f03            lw  t5,20(a0)\n"
+            "  34:  000f2683            lw  a3,0(t5)\n"
+            "  38:  4d18                    lw  a4,24(a0)\n"
+            "  3a:  9f82                    jalr    t6\n"
+            "  3c:  8faa                    mv  t6,a0\n"
+            "  3e:  4082                    lw  ra,0(sp)\n"
+            "  40:  4512                    lw  a0,4(sp)\n"
+            "  42:  45a2                    lw  a1,8(sp)\n"
+            "  44:  4632                    lw  a2,12(sp)\n"
+            "  46:  0141                    addi    sp,sp,16"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test case where parameter value is in a1
+memory_ensure_free_with_roots_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, _FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_MEMORY_ENSURE_FREE_WITH_ROOTS, [
+        ctx, jit_state, {free, a1}, 4, 1
+    ]),
+
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:  0b000f93            li  t6,176\n"
+            "   4:  9fb2                    add t6,t6,a2\n"
+            "   6:  000faf83            lw  t6,0(t6)\n"
+            "   a:  1141                    addi    sp,sp,-16\n"
+            "   c:  c006                    sw  ra,0(sp)\n"
+            "   e:  c22a                    sw  a0,4(sp)\n"
+            "  10:  c42e                    sw  a1,8(sp)\n"
+            "  12:  c632                    sw  a2,12(sp)\n"
+            "  14:  8f2e                    mv  t5,a1\n"
+            "  16:  867a                    mv  a2,t5\n"
+            "  18:  4691                    li  a3,4\n"
+            "  1a:  4705                    li  a4,1\n"
+            "  1c:  9f82                    jalr    t6\n"
+            "  1e:  8faa                    mv  t6,a0\n"
+            "  20:  4082                    lw  ra,0(sp)\n"
+            "  22:  4512                    lw  a0,4(sp)\n"
+            "  24:  45a2                    lw  a1,8(sp)\n"
+            "  26:  4632                    lw  a2,12(sp)\n"
+            "  28:  0141                    addi    sp,sp,16"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_ext_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0),
+    State2 = ?BACKEND:call_primitive_with_cp(State1, 4, [ctx, jit_state, 2, 5, -1]),
+    ?BACKEND:assert_all_native_free(State2),
+    Stream = ?BACKEND:stream(State2),
+    Dump =
+        <<
+            "   0:  0085af83            lw  t6,8(a1)\n"
+            "   4:  1ffd                    addi    t6,t6,-1\n"
+            "   6:  01f5a423            sw  t6,8(a1)\n"
+            "   a:  000f9b63            bnez    t6,0x20\n"
+            "   e:  00000f97            auipc   t6,0x0\n"
+            "  12:  0fc9                    addi    t6,t6,18 # 0x20\n"
+            "  14:  0001                    nop\n"
+            "  16:  01f5a223            sw  t6,4(a1)\n"
+            "  1a:  00862f83            lw  t6,8(a2)\n"
+            "  1e:  8f82                    jr  t6\n"
+            "  20:  0005af03            lw  t5,0(a1)\n"
+            "  24:  000f2f03            lw  t5,0(t5)\n"
+            "  28:  0f62                    slli    t5,t5,0x18\n"
+            "  2a:  11800f93            li  t6,280\n"
+            "  2e:  00000013            nop\n"
+            "  32:  01ff6f33            or  t5,t5,t6\n"
+            "  36:  05e52e23            sw  t5,92(a0)\n"
+            "  3a:  01062f83            lw  t6,16(a2)\n"
+            "  3e:  4609                    li  a2,2\n"
+            "  40:  4695                    li  a3,5\n"
+            "  42:  577d                    li  a4,-1\n"
+            "  44:  8f82                    jr  t6"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_fun_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0),
+    FuncReg = {x_reg, 0},
+    ArgsCount = 0,
+    {State2, Reg} = ?BACKEND:move_to_native_register(State1, FuncReg),
+    {State3, RegCopy} = ?BACKEND:copy_to_native_register(State2, Reg),
+    State4 = ?BACKEND:if_block(
+        State3, {RegCopy, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) ->
+            ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [
+                ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy
+            ])
+        end
+    ),
+    {State5, RegCopy} = ?BACKEND:and_(State4, {free, RegCopy}, ?TERM_PRIMARY_CLEAR_MASK),
+    State6 = ?BACKEND:move_array_element(State5, RegCopy, 0, RegCopy),
+    State7 = ?BACKEND:if_block(
+        State6, {RegCopy, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) ->
+            ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [
+                ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy
+            ])
+        end
+    ),
+    State8 = ?BACKEND:free_native_registers(State7, [RegCopy]),
+    State9 = ?BACKEND:call_primitive_with_cp(State8, ?PRIM_CALL_FUN, [
+        ctx, jit_state, Reg, ArgsCount
+    ]),
+    ?BACKEND:assert_all_native_free(State9),
+    Stream = ?BACKEND:stream(State9),
+    Dump =
+        <<
+            "   0:  0085af83            lw  t6,8(a1)\n"
+            "   4:  1ffd                    addi    t6,t6,-1\n"
+            "   6:  01f5a423            sw  t6,8(a1)\n"
+            "   a:  000f9b63            bnez    t6,0x20\n"
+            "   e:  00000f97            auipc   t6,0x0\n"
+            "  12:  0fc9                    addi    t6,t6,18 # 0x20\n"
+            "  14:  0001                    nop\n"
+            "  16:  01f5a223            sw  t6,4(a1)\n"
+            "  1a:  00862f83            lw  t6,8(a2)\n"
+            "  1e:  8f82                    jr  t6\n"
+            "  20:  01852f83            lw  t6,24(a0)\n"
+            "  24:  8f7e                    mv  t5,t6\n"
+            "  26:  8efa                    mv  t4,t5\n"
+            "  28:  4e0d                    li  t3,3\n"
+            "  2a:  01cefeb3            and t4,t4,t3\n"
+            "  2e:  4e09                    li  t3,2\n"
+            "  30:  01ce8a63            beq t4,t3,0x44\n"
+            "  34:  04c62f83            lw  t6,76(a2)\n"
+            "  38:  03800613            li  a2,56\n"
+            "  3c:  18b00693            li  a3,395\n"
+            "  40:  877a                    mv  a4,t5\n"
+            "  42:  8f82                    jr  t6\n"
+            "  44:  4e8d                    li  t4,3\n"
+            "  46:  fffece93            not t4,t4\n"
+            "  4a:  01df7f33            and t5,t5,t4\n"
+            "  4e:  000f2f03            lw  t5,0(t5)\n"
+            "  52:  8efa                    mv  t4,t5\n"
+            "  54:  03f00e13            li  t3,63\n"
+            "  58:  01cefeb3            and t4,t4,t3\n"
+            "  5c:  4e51                    li  t3,20\n"
+            "  5e:  01ce8a63            beq t4,t3,0x72\n"
+            "  62:  04c62f83            lw  t6,76(a2)\n"
+            "  66:  06600613            li  a2,102\n"
+            "  6a:  18b00693            li  a3,395\n"
+            "  6e:  877a                    mv  a4,t5\n"
+            "  70:  8f82                    jr  t6\n"
+            "  72:  0005ae83            lw  t4,0(a1)\n"
+            "  76:  000eae83            lw  t4,0(t4)\n"
+            "  7a:  0ee2                    slli    t4,t4,0x18\n"
+            "  7c:  27000f13            li  t5,624\n"
+            "  80:  00000013            nop\n"
+            "  84:  01eeeeb3            or  t4,t4,t5\n"
+            "  88:  05d52e23            sw  t4,92(a0)\n"
+            "  8c:  08000f13            li  t5,128\n"
+            "  90:  9f32                    add t5,t5,a2\n"
+            "  92:  000f2f03            lw  t5,0(t5)\n"
+            "  96:  867e                    mv  a2,t6\n"
+            "  98:  4681                    li  a3,0\n"
+            "  9a:  8f02                    jr  t5"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+move_to_vm_register_test0(State, Source, Dest, Dump) ->
+    State1 = ?BACKEND:move_to_vm_register(State, Source, Dest),
+    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+    Stream = ?BACKEND:stream(State2),
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+move_to_vm_register_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 0, {x_reg, 0}, <<
+                        "      0:   4f81                    li  t6,0\n"
+                        "      2:   01f52c23            sw  t6,24(a0)\n"
+                        "      6:   a8ed                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 0, {x_reg, extra}, <<
+                        "      0:   4f81                    li  t6,0\n"
+                        "      2:   05f52c23            sw  t6,88(a0)\n"
+                        "      6:   a8ed                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 0, {ptr, t5}, <<
+                        "      0:   4f81                    li  t6,0\n"
+                        "      2:   01ff2023            sw  t6,0(t5)\n"
+                        "      6:   a8ed                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 0, {y_reg, 2}, <<
+                        "      0:   4f01                    li  t5,0\n"
+                        "      2:   01452f83            lw  t6,20(a0)\n"
+                        "      6:   01efa423            sw  t5,8(t6)\n"
+                        "      a:   a8dd                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 0, {y_reg, 20}, <<
+                        "      0:   4f01                    li  t5,0\n"
+                        "      2:   01452f83            lw  t6,20(a0)\n"
+                        "      6:   05efa823            sw  t5,80(t6)\n"
+                        "      a:   a8dd                    j   0x100"
+                    >>)
+                end),
+                %% Test: Immediate to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 42, {x_reg, 0}, <<
+                        "      0:   02a00f93            li  t6,42\n"
+                        "      4:   01f52c23            sw  t6,24(a0)\n"
+                        "      8:   a8e5                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 42, {x_reg, extra}, <<
+                        "      0:   02a00f93            li  t6,42\n"
+                        "      4:   05f52c23            sw  t6,88(a0)\n"
+                        "      8:   a8e5                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 42, {y_reg, 2}, <<
+                        "      0:   02a00f13            li  t5,42\n"
+                        "      4:   01452f83            lw  t6,20(a0)\n"
+                        "      8:   01efa423            sw  t5,8(t6)\n"
+                        "      c:   a8d5                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 42, {y_reg, 20}, <<
+                        "      0:   02a00f13            li  t5,42\n"
+                        "      4:   01452f83            lw  t6,20(a0)\n"
+                        "      8:   05efa823            sw  t5,80(t6)\n"
+                        "      c:   a8d5                    j   0x100"
+                    >>)
+                end),
+                %% Test: Immediate to ptr
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 99, {ptr, a3}, <<
+                        "      0:   06300f93            li  t6,99\n"
+                        "      4:   01f6a023            sw  t6,0(a3)\n"
+                        "      8:   a8e5                    j   0x100"
+                    >>)
+                end),
+                %% Test: x_reg to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {x_reg, 1}, {x_reg, 2}, <<
+                        "      0:   01c52f83            lw  t6,28(a0)\n"
+                        "      4:   03f52023            sw  t6,32(a0)\n"
+                        "      8:   a8e5                    j   0x100"
+                    >>)
+                end),
+                %% Test: x_reg to ptr
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {x_reg, 1}, {ptr, a1}, <<
+                        "      0:   01c52f83            lw  t6,28(a0)\n"
+                        "      4:   01f5a023            sw  t6,0(a1)\n"
+                        "      8:   a8e5                    j   0x100"
+                    >>)
+                end),
+                %% Test: ptr to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {ptr, t3}, {x_reg, 3}, <<
+                        "      0:   000e2f83            lw  t6,0(t3)\n"
+                        "      4:   03f52223            sw  t6,36(a0)\n"
+                        "      8:   a8e5                    j   0x100"
+                    >>)
+                end),
+                %% Test: x_reg to y_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {x_reg, 0}, {y_reg, 1}, <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01452f03            lw  t5,20(a0)\n"
+                        "      8:   01ff2223            sw  t6,4(t5)\n"
+                        "      c:   a8d5                    j   0x100"
+                    >>)
+                end),
+                %% Test: y_reg to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, <<
+                        "      0:   01452f03            lw  t5,20(a0)\n"
+                        "      4:   000f2f83            lw  t6,0(t5)\n"
+                        "      8:   03f52223            sw  t6,36(a0)\n"
+                        "      c:   a8d5                    j   0x100"
+                    >>)
+                end),
+                %% Test: y_reg to y_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, <<
+                        "      0:   01452f03            lw  t5,20(a0)\n"
+                        "      4:   004f2f83            lw  t6,4(t5)\n"
+                        "      8:   03f52223            sw  t6,36(a0)\n"
+                        "      c:   a8d5                    j   0x100"
+                    >>)
+                end),
+                %% Test: Native register to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, t4, {x_reg, 0}, <<
+                        "      0:   01d52c23            sw  t4,24(a0)\n"
+                        "      4:   a8f5                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, t5, {x_reg, extra}, <<
+                        "      0:   05e52c23            sw  t5,88(a0)\n"
+                        "      4:   a8f5                    j   0x100"
+                    >>)
+                end),
+                %% Test: Native register to ptr
+                ?_test(begin
+                    move_to_vm_register_test0(State0, t3, {ptr, a3}, <<
+                        "      0:   01c6a023            sw  t3,0(a3)\n"
+                        "      4:   a8f5                    j   0x100"
+                    >>)
+                end),
+                %% Test: Native register to y_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, a1, {y_reg, 0}, <<
+                        "      0:   01452f83            lw  t6,20(a0)\n"
+                        "      4:   00bfa023            sw  a1,0(t6)\n"
+                        "      8:   a8e5                    j   0x100"
+                    >>)
+                end),
+                %% Test: Large immediate to x_reg (uses lui + addi in RISC-V)
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 16#12345678, {x_reg, 0}, <<
+                        "      0:   12345fb7            lui t6,0x12345\n"
+                        "      4:   678f8f93            addi    t6,t6,1656 # 0x12345678\n"
+                        "      8:   01f52c23            sw  t6,24(a0)\n"
+                        "      c:   a8d5                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 16#12345678, {x_reg, extra}, <<
+                        "      0:   12345fb7            lui t6,0x12345\n"
+                        "      4:   678f8f93            addi    t6,t6,1656 # 0x12345678\n"
+                        "      8:   05f52c23            sw  t6,88(a0)\n"
+                        "      c:   a8d5                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 16#12345678, {y_reg, 2}, <<
+                        "      0:   12345fb7            lui t6,0x12345\n"
+                        "      4:   678f8f93            addi    t6,t6,1656 # 0x12345678\n"
+                        "      8:   01452f03            lw  t5,20(a0)\n"
+                        "      c:   01ff2423            sw  t6,8(t5)\n"
+                        "      10:  a8c5                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 16#12345678, {y_reg, 20}, <<
+                        "      0:   12345fb7            lui t6,0x12345\n"
+                        "      4:   678f8f93            addi    t6,t6,1656 # 0x12345678\n"
+                        "      8:   01452f03            lw  t5,20(a0)\n"
+                        "      c:   05ff2823            sw  t6,80(t5)\n"
+                        "      10:  a8c5                    j   0x100"
+                    >>)
+                end),
+                %% Test: Large immediate to ptr
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 16#12345678, {ptr, a3}, <<
+                        "      0:   12345fb7            lui t6,0x12345\n"
+                        "      4:   678f8f93            addi    t6,t6,1656 # 0x12345678\n"
+                        "      8:   01f6a023            sw  t6,0(a3)\n"
+                        "      c:   a8d5                    j   0x100"
+                    >>)
+                end),
+                %% Test: x_reg to y_reg (high index)
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {x_reg, 15}, {y_reg, 31}, <<
+                        "      0:   05452f83            lw  t6,84(a0)\n"
+                        "      4:   01452f03            lw  t5,20(a0)\n"
+                        "      8:   07ff2e23            sw  t6,124(t5)\n"
+                        "      c:   a8d5                    j   0x100"
+                    >>)
+                end),
+                %% Test: y_reg to x_reg (high index)
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, <<
+                        "      0:   01452f03            lw  t5,20(a0)\n"
+                        "      4:   07cf2f83            lw  t6,124(t5)\n"
+                        "      8:   05f52a23            sw  t6,84(a0)\n"
+                        "      c:   a8d5                    j   0x100"
+                    >>)
+                end),
+                %% Test: Large y_reg index (32) that exceeds str immediate offset limit
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 42, {y_reg, 32}, <<
+                        "   0:  02a00f13            li  t5,42\n"
+                        "   4:  01452f83            lw  t6,20(a0)\n"
+                        "   8:  08000e93            li  t4,128\n"
+                        "   c:  9efe                    add t4,t4,t6\n"
+                        "   e:  01eea023            sw  t5,0(t4)\n"
+                        "  12:  a0fd                    j   0x100"
+                    >>)
+                end),
+                %% Test: Negative immediate to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, -1, {x_reg, 0}, <<
+                        "      0:   5ffd                    li  t6,-1\n"
+                        "      2:   01f52c23            sw  t6,24(a0)\n"
+                        "      6:   a8ed                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, -100, {x_reg, 0}, <<
+                        "      0:   f9c00f93            li  t6,-100\n"
+                        "      4:   01f52c23            sw  t6,24(a0)\n"
+                        "      8:   a8e5                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, -1000, {x_reg, 0}, <<
+                        "      0:   c1800f93            li  t6,-1000\n"
+                        "      4:   01f52c23            sw  t6,24(a0)\n"
+                        "      8:   a8e5                    j   0x100"
+                    >>)
+                end)
+            ]
+        end}.
+
+move_array_element_test0(State, Reg, Index, Dest, Dump) ->
+    State1 = ?BACKEND:move_array_element(State, Reg, Index, Dest),
+    Stream = ?BACKEND:stream(State1),
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+move_array_element_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                %% move_array_element: reg[x] to x_reg
+                ?_test(begin
+                    move_array_element_test0(State0, a3, 2, {x_reg, 0}, <<
+                        "   0:  0086af83            lw  t6,8(a3)\n"
+                        "   4:  01f52c23            sw  t6,24(a0)"
+                    >>)
+                end),
+                %% move_array_element: reg[x] to ptr
+                ?_test(begin
+                    move_array_element_test0(State0, a3, 3, {ptr, t4}, <<
+                        "   0:  00c6af83            lw  t6,12(a3)\n"
+                        "   4:  01fea023            sw  t6,0(t4)"
+                    >>)
+                end),
+                %% move_array_element: reg[x] to y_reg
+                ?_test(begin
+                    move_array_element_test0(State0, a3, 1, {y_reg, 2}, <<
+                        "   0:  0046af03            lw  t5,4(a3)\n"
+                        "   4:  01452f83            lw  t6,20(a0)\n"
+                        "   8:  01efa423            sw  t5,8(t6)"
+                    >>)
+                end),
+                %% move_array_element: reg[x] to native reg (t4)
+                ?_test(begin
+                    move_array_element_test0(State0, a3, 1, t4, <<
+                        "   0:  0046ae83            lw  t4,4(a3)"
+                    >>)
+                end),
+                %% move_array_element: reg[x] to y_reg
+                ?_test(begin
+                    move_array_element_test0(State0, a3, 7, {y_reg, 31}, <<
+                        "   0:  01c6af03            lw  t5,28(a3)\n"
+                        "   4:  01452f83            lw  t6,20(a0)\n"
+                        "   8:  07efae23            sw  t5,124(t6)"
+                    >>)
+                end),
+                %% move_array_element: reg[x] to x_reg
+                ?_test(begin
+                    move_array_element_test0(State0, a3, 7, {x_reg, 15}, <<
+                        "   0:  01c6af83            lw  t6,28(a3)\n"
+                        "   4:  05f52a23            sw  t6,84(a0)"
+                    >>)
+                end),
+                %% move_array_element: reg_x[reg_y] to x_reg
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4),
+                    move_array_element_test0(State1, a3, {free, Reg}, {x_reg, 2}, <<
+                        "   0:  0106af83            lw  t6,16(a3)\n"
+                        "   4:  0f8a                    slli    t6,t6,0x2\n"
+                        "   6:  01f68fb3            add t6,a3,t6\n"
+                        "   a:  000faf83            lw  t6,0(t6)\n"
+                        "   e:  03f52023            sw  t6,32(a0)"
+                    >>)
+                end),
+                %% move_array_element: reg_x[reg_y] to pointer (large x reg)
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4),
+                    move_array_element_test0(State1, a3, {free, Reg}, {ptr, t4}, <<
+                        "   0:  0106af83            lw  t6,16(a3)\n"
+                        "   4:  0f8a                    slli    t6,t6,0x2\n"
+                        "   6:  01f68fb3            add t6,a3,t6\n"
+                        "   a:  000faf83            lw  t6,0(t6)\n"
+                        "   e:  01fea023            sw  t6,0(t4)"
+                    >>)
+                end),
+                %% move_array_element: reg_x[reg_y] to y_reg
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4),
+                    move_array_element_test0(State1, a3, {free, Reg}, {y_reg, 31}, <<
+                        "   0:  0106af83            lw  t6,16(a3)\n"
+                        "   4:  0f8a                    slli    t6,t6,0x2\n"
+                        "   6:  01f68fb3            add t6,a3,t6\n"
+                        "   a:  000faf83            lw  t6,0(t6)\n"
+                        "   e:  01452f03            lw  t5,20(a0)\n"
+                        "  12:  07ff2e23            sw  t6,124(t5)"
+                    >>)
+                end),
+                %% move_array_element with integer index and x_reg destination
+                ?_test(begin
+                    {State1, BaseReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+                    move_array_element_test0(State1, BaseReg, 2, {x_reg, 5}, <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  008faf03            lw  t5,8(t6)\n"
+                        "   8:  03e52623            sw  t5,44(a0)"
+                    >>)
+                end)
+            ]
+        end}.
+
+get_array_element_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                %% get_array_element: reg[x] to new native reg
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:get_array_element(State0, t3, 4),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  010e2f83            lw  t6,16(t3)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual(t6, Reg)
+                end)
+            ]
+        end}.
+
+move_to_array_element_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                %% move_to_array_element/4: x_reg to reg[x]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, 2),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01f6a423            sw  t6,8(a3)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_array_element/4: x_reg to reg[reg]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, t3),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  8f72                    mv  t5,t3\n"
+                        "   6:  0f0a                    slli    t5,t5,0x2\n"
+                        "   8:  01e68f33            add t5,a3,t5\n"
+                        "   c:  01ff2023            sw  t6,0(t5)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_array_element/4: ptr to reg[reg]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_array_element(State0, {ptr, t6}, a3, t3),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  000faf83            lw  t6,0(t6)\n"
+                        "   4:  8f72                    mv  t5,t3\n"
+                        "   6:  0f0a                    slli    t5,t5,0x2\n"
+                        "   8:  01e68f33            add t5,a3,t5\n"
+                        "   c:  01ff2023            sw  t6,0(t5)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_array_element/4: y_reg to reg[reg]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_array_element(State0, {y_reg, 2}, a3, t3),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01452f03            lw  t5,20(a0)\n"
+                        "   4:  008f2f83            lw  t6,8(t5)\n"
+                        "   8:  8f72                    mv  t5,t3\n"
+                        "   a:  0f0a                    slli    t5,t5,0x2\n"
+                        "   c:  01e68f33            add t5,a3,t5\n"
+                        "  10:  01ff2023            sw  t6,0(t5)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_array_element/5: x_reg to reg[x+offset]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, 2, 1),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01f6a423            sw  t6,8(a3)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_array_element/5: x_reg to reg[x+offset]
+                ?_test(begin
+                    State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [a3, t3]),
+                    State2 = setelement(8, State1, [a3, t3]),
+                    [a3, t3] = ?BACKEND:used_regs(State2),
+                    State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, a3, t3, 1),
+                    Stream = ?BACKEND:stream(State3),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  001e0f13            addi    t5,t3,1\n"
+                        "   8:  0f0a                    slli    t5,t5,0x2\n"
+                        "   a:  01e68f33            add t5,a3,t5\n"
+                        "   e:  01ff2023            sw  t6,0(t5)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_array_element/5: imm to reg[x+offset]
+                ?_test(begin
+                    State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [a3, t3]),
+                    State2 = setelement(8, State1, [a3, t3]),
+                    [a3, t3] = ?BACKEND:used_regs(State2),
+                    State3 = ?BACKEND:move_to_array_element(State2, 42, a3, t3, 1),
+                    Stream = ?BACKEND:stream(State3),
+                    Dump = <<
+                        "      0:   02a00f93            li  t6,42\n"
+                        "      4:   001e0f13            addi    t5,t3,1\n"
+                        "      8:   0f0a                    slli    t5,t5,0x2\n"
+                        "      a:   01e68f33            add t5,a3,t5\n"
+                        "      e:   01ff2023            sw  t6,0(t5)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end)
+            ]
+        end}.
+
+move_to_native_register_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                %% move_to_native_register/2: imm
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, 42),
+                    Stream = ?BACKEND:stream(State1),
+                    ?assertEqual(t6, Reg),
+                    Dump = <<
+                        "   0:  02a00f93            li  t6,42"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/2: negative value
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, -42),
+                    Stream = ?BACKEND:stream(State1),
+                    ?assertEqual(t6, Reg),
+                    Dump = <<
+                        "   0:  fd600f93            li  t6,-42"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/2: -255 (boundary case)
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, -255),
+                    Stream = ?BACKEND:stream(State1),
+                    ?assertEqual(t6, Reg),
+                    Dump = <<
+                        "   0:  f0100f93            li  t6,-255"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/2: -256 (boundary case, fits in immediate for RISC-V)
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, -256),
+                    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+                    Stream = ?BACKEND:stream(State2),
+                    ?assertEqual(t6, Reg),
+                    Dump = <<
+                        "   0:  f0000f93            li  t6,-256\n"
+                        "   4:  a8f5                    j   0x100"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/2: {ptr, reg}
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {ptr, t5}),
+                    Stream = ?BACKEND:stream(State1),
+                    ?assertEqual(t5, Reg),
+                    Dump = <<
+                        "   0:  000f2f03            lw  t5,0(t5)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/2: {x_reg, N}
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 5}),
+                    Stream = ?BACKEND:stream(State1),
+                    ?assertEqual(t6, Reg),
+                    Dump = <<
+                        "   0:  02c52f83            lw  t6,44(a0)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/2: {y_reg, N}
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 3}),
+                    Stream = ?BACKEND:stream(State1),
+                    ?assertEqual(t6, Reg),
+                    Dump = <<
+                        "   0:  01452f03            lw  t5,20(a0)\n"
+                        "   4:  00cf2f83            lw  t6,12(t5)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/3: imm to reg
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_native_register(State0, 42, t5),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  02a00f13            li  t5,42"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/3: reg to reg
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_native_register(State0, t6, t4),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  8efe                    mv  t4,t6"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/3: {ptr, reg} to reg
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_native_register(State0, {ptr, t6}, t3),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  000fae03            lw  t3,0(t6)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/3: {x_reg, x} to reg[reg]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, a3),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  5114                    lw  a3,32(a0)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/3: {y_reg, y} to reg[reg]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, a1),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01452f83            lw  t6,20(a0)\n"
+                        "   4:  008fa583            lw  a1,8(t6)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% Test: ptr with offset to fp_reg (term_to_float)
+                ?_test(begin
+                    {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+                    State2 = ?BACKEND:move_to_vm_register(
+                        State1, {free, {ptr, RegA, 1}}, {fp_reg, 3}
+                    ),
+                    Stream = ?BACKEND:stream(State2),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  06052f03            lw  t5,96(a0)\n"
+                        "   8:  004fae83            lw  t4,4(t6)\n"
+                        "   c:  01df2c23            sw  t4,24(t5)\n"
+                        "  10:  008fae83            lw  t4,8(t6)\n"
+                        "  14:  01df2e23            sw  t4,28(t5)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end)
+            ]
+        end}.
+
+add_test0(State0, Reg, Imm, Dump) ->
+    State1 = ?BACKEND:add(State0, Reg, Imm),
+    % Force emission of literal pool
+    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+    Stream = ?BACKEND:stream(State2),
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+add_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                ?_test(begin
+                    add_test0(State0, a2, 2, <<
+                        "   0:  0609                    addi    a2,a2,2\n"
+                        "   2:  a8fd                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    add_test0(State0, a2, 256, <<
+                        "   0:  10000f93            li  t6,256\n"
+                        "   4:  967e                    add a2,a2,t6\n"
+                        "   6:  a8ed                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    add_test0(State0, a2, a3, <<
+                        "   0:  9636                    add a2,a2,a3\n"
+                        "   2:  a8fd                    j   0x100"
+                    >>)
+                end)
+            ]
+        end}.
+
+sub_test0(State0, Reg, Imm, Dump) ->
+    State1 = ?BACKEND:sub(State0, Reg, Imm),
+    % Force emission of literal pool
+    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+    Stream = ?BACKEND:stream(State2),
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+sub_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                ?_test(begin
+                    sub_test0(State0, a2, 2, <<
+                        "   0:  1679                    addi    a2,a2,-2\n"
+                        "   2:  a8fd                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    sub_test0(State0, a2, 256, <<
+                        "      0:   10000f93            li  t6,256\n"
+                        "      4:   41f60633            sub a2,a2,t6\n"
+                        "      8:   a8e5                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    sub_test0(State0, a2, a3, <<
+                        "      0:   8e15                    sub a2,a2,a3\n"
+                        "      2:   a8fd                    j   0x100"
+                    >>)
+                end)
+            ]
+        end}.
+
+mul_test0(State0, Reg, Imm, Dump) ->
+    State1 = ?BACKEND:mul(State0, Reg, Imm),
+    Stream = ?BACKEND:stream(State1),
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+mul_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                ?_test(begin
+                    mul_test0(State0, a2, 2, <<
+                        "      0:   0606                    slli    a2,a2,0x1"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, a2, 3, <<
+                        "      0:   00161f93            slli    t6,a2,0x1\n"
+                        "      4:   00cf8633            add a2,t6,a2"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, a2, 4, <<
+                        "      0:   060a                    slli    a2,a2,0x2"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, a2, 5, <<
+                        "      0:   00261f93            slli    t6,a2,0x2\n"
+                        "      4:   00cf8633            add a2,t6,a2"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, a2, 6, <<
+                        "      0:   00161f93            slli    t6,a2,0x1\n"
+                        "      4:   00cf8633            add a2,t6,a2\n"
+                        "      8:   0606                    slli    a2,a2,0x1"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, a2, 7, <<
+                        "      0:   00361f93            slli    t6,a2,0x3\n"
+                        "      4:   40cf8633            sub a2,t6,a2"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, a2, 8, <<
+                        "      0:   060e                    slli    a2,a2,0x3"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, a2, 9, <<
+                        "      0:   00361f93            slli    t6,a2,0x3\n"
+                        "      4:   00cf8633            add a2,t6,a2"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, a2, 10, <<
+                        "      0:   00261f93            slli    t6,a2,0x2\n"
+                        "      4:   00cf8633            add a2,t6,a2\n"
+                        "      8:   0606                    slli    a2,a2,0x1"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, a2, 11, <<
+                        "      0:   4fad                    li  t6,11\n"
+                        "      2:   03f60633            mul a2,a2,t6"
+                    >>)
+                end)
+            ]
+        end}.
+
+%% Test set_args1 with y_reg pattern
+set_args1_y_reg_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+
+    % Call primitive with y_reg argument to trigger {y_reg, X} pattern in set_args1
+    % This mirrors: {MSt2, Value} = MMod:call_primitive(MSt1, ?PRIM_BITSTRING_GET_UTF8, [{free, Src}])
+    % but with {y_reg, 5} instead of {free, Src}
+    {State1, _ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_BITSTRING_GET_UTF8, [
+        {y_reg, 5}
+    ]),
+
+    Stream = ?BACKEND:stream(State1),
+    % Expected disassembly for loading from y_reg and calling primitive
+    Dump = <<
+        "   0:  04300f93            li  t6,67\n"
+        "   4:  0f8a                    slli    t6,t6,0x2\n"
+        "   6:  9fb2                    add t6,t6,a2\n"
+        "   8:  000faf83            lw  t6,0(t6)\n"
+        "   c:  1141                    addi    sp,sp,-16\n"
+        "   e:  c006                    sw  ra,0(sp)\n"
+        "  10:  c22a                    sw  a0,4(sp)\n"
+        "  12:  c42e                    sw  a1,8(sp)\n"
+        "  14:  c632                    sw  a2,12(sp)\n"
+        "  16:  01452f03            lw  t5,20(a0)\n"
+        "  1a:  014f2503            lw  a0,20(t5)\n"
+        "  1e:  9f82                    jalr    t6\n"
+        "  20:  8faa                    mv  t6,a0\n"
+        "  22:  4082                    lw  ra,0(sp)\n"
+        "  24:  4512                    lw  a0,4(sp)\n"
+        "  26:  45a2                    lw  a1,8(sp)\n"
+        "  28:  4632                    lw  a2,12(sp)\n"
+        "  2a:  0141                    addi    sp,sp,16"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test large Y register read (Y=123, offset=492, exceeds immediate limit)
+large_y_reg_read_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    % Move from a large Y register (123 * 4 = 492 bytes, exceeds immediate limit)
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 123}),
+    Stream = ?BACKEND:stream(State1),
+    % Expected: uses helper with temp register for large offset
+    Dump = <<
+        "   0:  01452f03            lw  t5,20(a0)\n"
+        "   4:  1ec00f93            li  t6,492\n"
+        "   8:  9ffa                    add t6,t6,t5\n"
+        "   a:  000faf83            lw  t6,0(t6)"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream),
+    ?assertEqual(t6, Reg).
+
+%% Test large Y register write with immediate value
+large_y_reg_write_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    % Move immediate to a large Y register (123 * 4 = 492 bytes)
+    State1 = ?BACKEND:move_to_vm_register(State0, 42, {y_reg, 123}),
+    Stream = ?BACKEND:stream(State1),
+    % Expected: uses helper with temp registers for large offset
+    Dump = <<
+        "   0:  02a00f13            li  t5,42\n"
+        "   4:  01452f83            lw  t6,20(a0)\n"
+        "   8:  1ec00e93            li  t4,492\n"
+        "   c:  9efe                    add t4,t4,t6\n"
+        "   e:  01eea023            sw  t5,0(t4)"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test large Y register read with limited registers (uses IP_REG fallback)
+large_y_reg_read_register_exhaustion_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    % Allocate most available registers to simulate near-exhaustion (leave 1 for the y_reg helper)
+    {State1, _} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    {State2, _} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+    {State3, _} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}),
+    {State4, _} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}),
+    {State5, _} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}),
+    % Leave one register available so the y_reg helper can work, but it will need IP_REG fallback
+    {StateFinal, ResultReg} = ?BACKEND:move_to_native_register(State5, {y_reg, 35}),
+    Stream = ?BACKEND:stream(StateFinal),
+    % Expected: uses t0+t1 fallback sequence when temps are exhausted
+    Dump = <<
+        "   0:  01852f83            lw  t6,24(a0)\n"
+        "   4:  01c52f03            lw  t5,28(a0)\n"
+        "   8:  02052e83            lw  t4,32(a0)\n"
+        "   c:  02452e03            lw  t3,36(a0)\n"
+        "  10:  02852383            lw  t2,40(a0)\n"
+        "  14:  01452283            lw  t0,20(a0)\n"
+        "  18:  08c00313            li  t1,140\n"
+        "  1c:  9316                    add t1,t1,t0\n"
+        "  1e:  00032303            lw  t1,0(t1)"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream),
+    ?assertEqual(t1, ResultReg).
+
+%% Test large Y register write with register exhaustion (uses t1/t0 fallback)
+large_y_reg_write_register_exhaustion_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    % Get a source register first
+    {State1, SrcReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    % Allocate most remaining registers to simulate exhaustion
+    {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+    {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}),
+    {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}),
+    {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}),
+    % Try to write to large Y register when only one temp register is available
+    StateFinal = ?BACKEND:move_to_vm_register(State5, SrcReg, {y_reg, 50}),
+    Stream = ?BACKEND:stream(StateFinal),
+    % Expected: uses t1/t0 fallback sequence
+    Dump = <<
+        "      0:   01852f83            lw  t6,24(a0)\n"
+        "      4:   01c52f03            lw  t5,28(a0)\n"
+        "      8:   02052e83            lw  t4,32(a0)\n"
+        "      c:   02452e03            lw  t3,36(a0)\n"
+        "     10:   02852383            lw  t2,40(a0)\n"
+        "     14:   01452303            lw  t1,20(a0)\n"
+        "     18:   0c800293            li  t0,200\n"
+        "     1c:   929a                    add t0,t0,t1\n"
+        "     1e:   01f2a023            sw  t6,0(t0)"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test boundary case: Y=31 (124 bytes, exactly at limit, should use direct addressing)
+y_reg_boundary_direct_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 31}),
+    Stream = ?BACKEND:stream(State1),
+    % Expected: uses direct addressing since 31 * 4 = 124 < 2048
+    Dump = <<
+        "   0:  01452f03            lw  t5,20(a0)\n"
+        "   4:  07cf2f83            lw  t6,124(t5)"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream),
+    ?assertEqual(t6, Reg).
+
+%% Test debugger function
+debugger_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:debugger(State0),
+    Stream = ?BACKEND:stream(State1),
+    Dump = <<
+        "      0:   9002                    ebreak"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+and_register_exhaustion_negative_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    % Allocate all available registers to simulate register exhaustion
+    {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+    {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}),
+    {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}),
+    {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}),
+    {StateNoRegs, t1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}),
+    % Test negative immediate (-4) which should use NOT+AND with t0 as temp
+    {StateResult, t6} = ?BACKEND:and_(StateNoRegs, {free, t6}, -4),
+    Stream = ?BACKEND:stream(StateResult),
+    ExpectedDump = <<
+        "      0:   01852f83            lw  t6,24(a0)\n"
+        "      4:   01c52f03            lw  t5,28(a0)\n"
+        "      8:   02052e83            lw  t4,32(a0)\n"
+        "      c:   02452e03            lw  t3,36(a0)\n"
+        "     10:   02852383            lw  t2,40(a0)\n"
+        "     14:   02c52303            lw  t1,44(a0)\n"
+        "     18:   428d                    li  t0,3\n"
+        "     1a:   fff2c293            not t0,t0\n"
+        "     1e:   005fffb3            and t6,t6,t0"
+    >>,
+    ?assertEqual(dump_to_bin(ExpectedDump), Stream).
+
+and_register_exhaustion_positive_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    % Allocate all available registers to simulate register exhaustion
+    {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+    {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}),
+    {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}),
+    {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}),
+    {StateNoRegs, t1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}),
+    % Test positive immediate (0x3F) which should use AND with t0 as temp
+    {StateResult, t6} = ?BACKEND:and_(StateNoRegs, {free, t6}, 16#3F),
+    Stream = ?BACKEND:stream(StateResult),
+    ExpectedDump = <<
+        "   0:  01852f83            lw  t6,24(a0)\n"
+        "   4:  01c52f03            lw  t5,28(a0)\n"
+        "   8:  02052e83            lw  t4,32(a0)\n"
+        "   c:  02452e03            lw  t3,36(a0)\n"
+        "  10:  02852383            lw  t2,40(a0)\n"
+        "  14:  02c52303            lw  t1,44(a0)\n"
+        "  18:  03f00293            li  t0,63\n"
+        "  1c:  005fffb3            and t6,t6,t0"
+    >>,
+    ?assertEqual(dump_to_bin(ExpectedDump), Stream).
+
+jump_table_large_labels_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 512),
+    Stream = ?BACKEND:stream(State1),
+    % RISC-V: Each jump table entry is 8 bytes (AUIPC + JALR)
+    ?assertEqual((512 + 1) * 8, byte_size(Stream)).
+
+alloc_boxed_integer_fragment_small_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_ALLOC_BOXED_INTEGER_FRAGMENT, [
+        ctx, {avm_int64_t, 42}
+    ]),
+    ?assertEqual(t6, ResultReg),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "      0:   03c62f83            lw  t6,60(a2)\n"
+            "      4:   1141                    addi    sp,sp,-16\n"
+            "      6:   c006                    sw  ra,0(sp)\n"
+            "      8:   c22a                    sw  a0,4(sp)\n"
+            "      a:   c42e                    sw  a1,8(sp)\n"
+            "      c:   c632                    sw  a2,12(sp)\n"
+            "      e:   02a00593            li  a1,42\n"
+            "     12:   4601                    li  a2,0\n"
+            "     14:   9f82                    jalr    t6\n"
+            "     16:   8faa                    mv  t6,a0\n"
+            "     18:   4082                    lw  ra,0(sp)\n"
+            "     1a:   4512                    lw  a0,4(sp)\n"
+            "     1c:   45a2                    lw  a1,8(sp)\n"
+            "     1e:   4632                    lw  a2,12(sp)\n"
+            "     20:   0141                    addi    sp,sp,16"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+alloc_boxed_integer_fragment_large_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_ALLOC_BOXED_INTEGER_FRAGMENT, [
+        ctx, {avm_int64_t, 16#123456789ABCDEF0}
+    ]),
+    % Add a call primitive last to emit literal pool
+    State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [
+        ctx, jit_state, offset, ?BADMATCH_ATOM, {free, ResultReg}
+    ]),
+    ?assertEqual(t6, ResultReg),
+    Stream = ?BACKEND:stream(State2),
+    Dump =
+        <<
+            "      0:   03c62f83            lw  t6,60(a2)\n"
+            "      4:   1141                    addi    sp,sp,-16\n"
+            "      6:   c006                    sw  ra,0(sp)\n"
+            "      8:   c22a                    sw  a0,4(sp)\n"
+            "      a:   c42e                    sw  a1,8(sp)\n"
+            "      c:   c632                    sw  a2,12(sp)\n"
+            "      e:   9abce5b7            lui a1,0x9abce\n"
+            "     12:   ef058593            addi    a1,a1,-272 # 0x9abcdef0\n"
+            "     16:   12345637            lui a2,0x12345\n"
+            "     1a:   67860613            addi    a2,a2,1656 # 0x12345678\n"
+            "     1e:   9f82                    jalr    t6\n"
+            "     20:   8faa                    mv  t6,a0\n"
+            "     22:   4082                    lw  ra,0(sp)\n"
+            "     24:   4512                    lw  a0,4(sp)\n"
+            "     26:   45a2                    lw  a1,8(sp)\n"
+            "     28:   4632                    lw  a2,12(sp)\n"
+            "     2a:   0141                    addi    sp,sp,16\n"
+            "     2c:   04c62f03            lw  t5,76(a2)\n"
+            "     30:   03000613            li  a2,48\n"
+            "     34:   28b00693            li  a3,651\n"
+            "     38:   877e                    mv  a4,t6\n"
+            "     3a:   8f02                    jr  t5"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test for stack alignment issue in call_func_ptr
+%% RISC-V maintains 16-byte stack alignment (RISC-V calling convention)
+call_func_ptr_stack_alignment_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+    {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}),
+    {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}),
+    {State5, _ResultReg} = ?BACKEND:call_func_ptr(State4, {free, t3}, [42]),
+    Stream = ?BACKEND:stream(State5),
+    Dump =
+        <<
+            "      0:   01852f83            lw  t6,24(a0)\n"
+            "      4:   01c52f03            lw  t5,28(a0)\n"
+            "      8:   02052e83            lw  t4,32(a0)\n"
+            "      c:   02452e03            lw  t3,36(a0)\n"
+            "     10:   1101                    addi    sp,sp,-32\n"
+            "     12:   c006                    sw  ra,0(sp)\n"
+            "     14:   c22a                    sw  a0,4(sp)\n"
+            "     16:   c42e                    sw  a1,8(sp)\n"
+            "     18:   c632                    sw  a2,12(sp)\n"
+            "     1a:   c876                    sw  t4,16(sp)\n"
+            "     1c:   ca7a                    sw  t5,20(sp)\n"
+            "     1e:   cc7e                    sw  t6,24(sp)\n"
+            "     20:   02a00513            li  a0,42\n"
+            "     24:   9e02                    jalr    t3\n"
+            "     26:   8e2a                    mv  t3,a0\n"
+            "     28:   4082                    lw  ra,0(sp)\n"
+            "     2a:   4512                    lw  a0,4(sp)\n"
+            "     2c:   45a2                    lw  a1,8(sp)\n"
+            "     2e:   4632                    lw  a2,12(sp)\n"
+            "     30:   4ec2                    lw  t4,16(sp)\n"
+            "     32:   4f52                    lw  t5,20(sp)\n"
+            "     34:   4fe2                    lw  t6,24(sp)\n"
+            "     36:   02010113            addi    sp,sp,32"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test for register exhaustion issue in call_func_ptr with 5+ arguments
+%% When all registers are used and we call a function with 5+ args,
+%% set_args needs temporary registers but none are available
+call_func_ptr_register_exhaustion_test_() ->
+    {setup,
+        fun() ->
+            State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+
+            % Allocate all available registers to simulate register pressure
+            {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+            {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+            {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}),
+            {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}),
+            {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}),
+            {State6, t1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}),
+            State6
+        end,
+        fun(State6) ->
+            [
+                ?_test(begin
+                    {State7, _ResultReg} = ?BACKEND:call_func_ptr(
+                        State6,
+                        {free, t5},
+                        [ctx, jit_state, {free, t2}, 3, 1]
+                    ),
+                    Stream = ?BACKEND:stream(State7),
+                    Dump =
+                        <<
+                            "      0:   01852f83            lw  t6,24(a0)\n"
+                            "      4:   01c52f03            lw  t5,28(a0)\n"
+                            "      8:   02052e83            lw  t4,32(a0)\n"
+                            "      c:   02452e03            lw  t3,36(a0)\n"
+                            "     10:   02852383            lw  t2,40(a0)\n"
+                            "     14:   02c52303            lw  t1,44(a0)\n"
+                            "     18:   1101                    addi    sp,sp,-32\n"
+                            "     1a:   c006                    sw  ra,0(sp)\n"
+                            "     1c:   c22a                    sw  a0,4(sp)\n"
+                            "     1e:   c42e                    sw  a1,8(sp)\n"
+                            "     20:   c632                    sw  a2,12(sp)\n"
+                            "     22:   c81a                    sw  t1,16(sp)\n"
+                            "     24:   ca72                    sw  t3,20(sp)\n"
+                            "     26:   cc76                    sw  t4,24(sp)\n"
+                            "     28:   ce7e                    sw  t6,28(sp)\n"
+                            "     2a:   861e                    mv  a2,t2\n"
+                            "     2c:   468d                    li  a3,3\n"
+                            "     2e:   4705                    li  a4,1\n"
+                            "     30:   9f02                    jalr    t5\n"
+                            "     32:   8f2a                    mv  t5,a0\n"
+                            "     34:   4082                    lw  ra,0(sp)\n"
+                            "     36:   4512                    lw  a0,4(sp)\n"
+                            "     38:   45a2                    lw  a1,8(sp)\n"
+                            "     3a:   4632                    lw  a2,12(sp)\n"
+                            "     3c:   4342                    lw  t1,16(sp)\n"
+                            "     3e:   4e52                    lw  t3,20(sp)\n"
+                            "     40:   4ee2                    lw  t4,24(sp)\n"
+                            "     42:   4ff2                    lw  t6,28(sp)\n"
+                            "     44:   02010113            addi    sp,sp,32"
+                        >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                ?_test(begin
+                    {State7, _ResultReg} = ?BACKEND:call_func_ptr(
+                        State6,
+                        {free, t5},
+                        [ctx, jit_state, {free, t2}, 1, t1]
+                    ),
+                    Stream = ?BACKEND:stream(State7),
+                    Dump =
+                        <<
+                            "      0:   01852f83            lw  t6,24(a0)\n"
+                            "      4:   01c52f03            lw  t5,28(a0)\n"
+                            "      8:   02052e83            lw  t4,32(a0)\n"
+                            "      c:   02452e03            lw  t3,36(a0)\n"
+                            "     10:   02852383            lw  t2,40(a0)\n"
+                            "     14:   02c52303            lw  t1,44(a0)\n"
+                            "     18:   1101                    addi    sp,sp,-32\n"
+                            "     1a:   c006                    sw  ra,0(sp)\n"
+                            "     1c:   c22a                    sw  a0,4(sp)\n"
+                            "     1e:   c42e                    sw  a1,8(sp)\n"
+                            "     20:   c632                    sw  a2,12(sp)\n"
+                            "     22:   c81a                    sw  t1,16(sp)\n"
+                            "     24:   ca72                    sw  t3,20(sp)\n"
+                            "     26:   cc76                    sw  t4,24(sp)\n"
+                            "     28:   ce7e                    sw  t6,28(sp)\n"
+                            "     2a:   861e                    mv  a2,t2\n"
+                            "     2c:   4685                    li  a3,1\n"
+                            "     2e:   871a                    mv  a4,t1\n"
+                            "     30:   9f02                    jalr    t5\n"
+                            "     32:   8f2a                    mv  t5,a0\n"
+                            "     34:   4082                    lw  ra,0(sp)\n"
+                            "     36:   4512                    lw  a0,4(sp)\n"
+                            "     38:   45a2                    lw  a1,8(sp)\n"
+                            "     3a:   4632                    lw  a2,12(sp)\n"
+                            "     3c:   4342                    lw  t1,16(sp)\n"
+                            "     3e:   4e52                    lw  t3,20(sp)\n"
+                            "     40:   4ee2                    lw  t4,24(sp)\n"
+                            "     42:   4ff2                    lw  t6,28(sp)\n"
+                            "     44:   02010113            addi    sp,sp,32"
+                        >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                ?_test(begin
+                    {State7, ResultReg} = ?BACKEND:call_func_ptr(
+                        State6,
+                        {free, t5},
+                        [ctx, jit_state, {free, t2}, t1, 1]
+                    ),
+                    Stream = ?BACKEND:stream(State7),
+                    Dump =
+                        <<
+                            "      0:   01852f83            lw  t6,24(a0)\n"
+                            "      4:   01c52f03            lw  t5,28(a0)\n"
+                            "      8:   02052e83            lw  t4,32(a0)\n"
+                            "      c:   02452e03            lw  t3,36(a0)\n"
+                            "     10:   02852383            lw  t2,40(a0)\n"
+                            "     14:   02c52303            lw  t1,44(a0)\n"
+                            "     18:   1101                    addi    sp,sp,-32\n"
+                            "     1a:   c006                    sw  ra,0(sp)\n"
+                            "     1c:   c22a                    sw  a0,4(sp)\n"
+                            "     1e:   c42e                    sw  a1,8(sp)\n"
+                            "     20:   c632                    sw  a2,12(sp)\n"
+                            "     22:   c81a                    sw  t1,16(sp)\n"
+                            "     24:   ca72                    sw  t3,20(sp)\n"
+                            "     26:   cc76                    sw  t4,24(sp)\n"
+                            "     28:   ce7e                    sw  t6,28(sp)\n"
+                            "     2a:   861e                    mv  a2,t2\n"
+                            "     2c:   869a                    mv  a3,t1\n"
+                            "     2e:   4705                    li  a4,1\n"
+                            "     30:   9f02                    jalr    t5\n"
+                            "     32:   8f2a                    mv  t5,a0\n"
+                            "     34:   4082                    lw  ra,0(sp)\n"
+                            "     36:   4512                    lw  a0,4(sp)\n"
+                            "     38:   45a2                    lw  a1,8(sp)\n"
+                            "     3a:   4632                    lw  a2,12(sp)\n"
+                            "     3c:   4342                    lw  t1,16(sp)\n"
+                            "     3e:   4e52                    lw  t3,20(sp)\n"
+                            "     40:   4ee2                    lw  t4,24(sp)\n"
+                            "     42:   4ff2                    lw  t6,28(sp)\n"
+                            "     44:   02010113            addi    sp,sp,32"
+                        >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual(t5, ResultReg)
+                end),
+                ?_test(begin
+                    {State7, _ResultReg} = ?BACKEND:call_func_ptr(
+                        State6,
+                        {free, a1},
+                        [t5, a3]
+                    ),
+                    Stream = ?BACKEND:stream(State7),
+                    Dump =
+                        <<
+                            "   0:  01852f83            lw  t6,24(a0)\n"
+                            "   4:  01c52f03            lw  t5,28(a0)\n"
+                            "   8:  02052e83            lw  t4,32(a0)\n"
+                            "   c:  02452e03            lw  t3,36(a0)\n"
+                            "  10:  02852383            lw  t2,40(a0)\n"
+                            "  14:  02c52303            lw  t1,44(a0)\n"
+                            "  18:  fd010113            addi    sp,sp,-48\n"
+                            "  1c:  c006                sw  ra,0(sp)\n"
+                            "  1e:  c22a                sw  a0,4(sp)\n"
+                            "  20:  c42e                sw  a1,8(sp)\n"
+                            "  22:  c632                sw  a2,12(sp)\n"
+                            "  24:  c81a                sw  t1,16(sp)\n"
+                            "  26:  ca1e                sw  t2,20(sp)\n"
+                            "  28:  cc72                sw  t3,24(sp)\n"
+                            "  2a:  ce76                sw  t4,28(sp)\n"
+                            "  2c:  d07a                sw  t5,32(sp)\n"
+                            "  2e:  d27e                sw  t6,36(sp)\n"
+                            "  30:  832e                mv  t1,a1\n"
+                            "  32:  857a                mv  a0,t5\n"
+                            "  34:  85b6                mv  a1,a3\n"
+                            "  36:  9302                jalr    t1\n"
+                            "  38:  c42a                sw  a0,8(sp)\n"
+                            "  3a:  4082                lw  ra,0(sp)\n"
+                            "  3c:  4512                lw  a0,4(sp)\n"
+                            "  3e:  45a2                lw  a1,8(sp)\n"
+                            "  40:  4632                lw  a2,12(sp)\n"
+                            "  42:  4342                lw  t1,16(sp)\n"
+                            "  44:  43d2                lw  t2,20(sp)\n"
+                            "  46:  4e62                lw  t3,24(sp)\n"
+                            "  48:  4ef2                lw  t4,28(sp)\n"
+                            "  4a:  5f02                lw  t5,32(sp)\n"
+                            "  4c:  5f92                lw  t6,36(sp)\n"
+                            "  4e:  03010113            addi    sp,sp,48"
+                        >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                ?_test(begin
+                    {State7, ResultReg} = ?BACKEND:call_func_ptr(
+                        State6,
+                        {primitive, 2},
+                        [{free, t5}, a3]
+                    ),
+                    ?assertEqual(ResultReg, t5),
+                    Stream = ?BACKEND:stream(State7),
+                    Dump =
+                        <<
+                            "   0:  01852f83            lw  t6,24(a0)\n"
+                            "   4:  01c52f03            lw  t5,28(a0)\n"
+                            "   8:  02052e83            lw  t4,32(a0)\n"
+                            "   c:  02452e03            lw  t3,36(a0)\n"
+                            "  10:  02852383            lw  t2,40(a0)\n"
+                            "  14:  02c52303            lw  t1,44(a0)\n"
+                            "  18:  fd010113            addi    sp,sp,-48\n"
+                            "  1c:  c006                sw  ra,0(sp)\n"
+                            "  1e:  c22a                sw  a0,4(sp)\n"
+                            "  20:  c42e                sw  a1,8(sp)\n"
+                            "  22:  c632                sw  a2,12(sp)\n"
+                            "  24:  c81a                sw  t1,16(sp)\n"
+                            "  26:  ca1e                sw  t2,20(sp)\n"
+                            "  28:  cc72                sw  t3,24(sp)\n"
+                            "  2a:  ce76                sw  t4,28(sp)\n"
+                            "  2c:  d07e                sw  t6,32(sp)\n"
+                            "  2e:  00862303            lw  t1,8(a2)\n"
+                            "  32:  857a                mv  a0,t5\n"
+                            "  34:  85b6                mv  a1,a3\n"
+                            "  36:  9302                jalr    t1\n"
+                            "  38:  8f2a                mv  t5,a0\n"
+                            "  3a:  4082                lw  ra,0(sp)\n"
+                            "  3c:  4512                lw  a0,4(sp)\n"
+                            "  3e:  45a2                lw  a1,8(sp)\n"
+                            "  40:  4632                lw  a2,12(sp)\n"
+                            "  42:  4342                lw  t1,16(sp)\n"
+                            "  44:  43d2                lw  t2,20(sp)\n"
+                            "  46:  4e62                lw  t3,24(sp)\n"
+                            "  48:  4ef2                lw  t4,28(sp)\n"
+                            "  4a:  5f82                lw  t6,32(sp)\n"
+                            "  4c:  03010113            addi    sp,sp,48"
+                        >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end)
+            ]
+        end}.
+
+%% Test jump_to_continuation optimization for intra-module returns
+jump_to_continuation_test_() ->
+    [
+        ?_test(begin
+            % Test 1: jump_to_continuation at offset 0
+            State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+            State1 = ?BACKEND:jump_to_continuation(State0, {free, a0}),
+            Stream = ?BACKEND:stream(State1),
+            % Expected: riscv32 PIC sequence
+            Dump =
+                <<
+                    "   0:  00000f97            auipc   t6,0x0\n"
+                    "   4:  9faa                add t6,t6,a0\n"
+                    "   6:  8f82                jr  t6"
+                >>,
+            ?assertEqual(dump_to_bin(Dump), Stream)
+        end),
+        ?_test(begin
+            % Test 2: jump_to_continuation after jump table (non-zero relative address)
+            State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+            % Generate a jump table for 3 labels (4 entries * 8 bytes = 32 bytes)
+            State1 = ?BACKEND:jump_table(State0, 3),
+            State2 = ?BACKEND:jump_to_continuation(State1, {free, a0}),
+            Stream = ?BACKEND:stream(State2),
+            % Expected: jump table (32 bytes) + jump_to_continuation
+            % NetOffset = 0 - 32 = -32 (0xFFFFFFE0)
+            Dump =
+                <<
+                    "   0:  ffffffff            .insn   4, 0xffffffff\n"
+                    "   4:  ffffffff            .insn   4, 0xffffffff\n"
+                    "   8:  ffffffff            .insn   4, 0xffffffff\n"
+                    "   c:  ffffffff            .insn   4, 0xffffffff\n"
+                    "  10:  ffffffff            .insn   4, 0xffffffff\n"
+                    "  14:  ffffffff            .insn   4, 0xffffffff\n"
+                    "  18:  ffffffff            .insn   4, 0xffffffff\n"
+                    "  1c:  ffffffff            .insn   4, 0xffffffff\n"
+                    "  20:  00000f97            auipc   t6,0x0\n"
+                    "  24:  1f81                addi    t6,t6,-32 # 0x0\n"
+                    "  26:  9faa                add t6,t6,a0\n"
+                    "  28:  8f82                jr  t6"
+                >>,
+            ?assertEqual(dump_to_bin(Dump), Stream)
+        end)
+    ].
+
+%% Mimic part of add.beam
+add_beam_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 3),
+    State2 = ?BACKEND:add_label(State1, 1),
+    State3 = ?BACKEND:move_to_vm_register(State2, 16#9f, {x_reg, 1}),
+    State4 = ?BACKEND:move_to_vm_register(State3, 16#8f, {x_reg, 0}),
+    State5 = ?BACKEND:call_only_or_schedule_next(State4, 2),
+    State6 = ?BACKEND:add_label(State5, 2),
+    {State7, ResultReg} = ?BACKEND:call_primitive(State6, ?PRIM_ALLOCATE, [
+        ctx, jit_state, 1, 0, 1
+    ]),
+    State8 = ?BACKEND:if_block(State7, {'(bool)', {free, ResultReg}, '==', false}, fun(BSt0) ->
+        ?BACKEND:call_primitive_last(BSt0, ?PRIM_HANDLE_ERROR, [ctx, jit_state, offset])
+    end),
+    State9 = ?BACKEND:move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}),
+    State10 = ?BACKEND:call_or_schedule_next(State9, 3),
+    State11 = ?BACKEND:add_label(State10, 3),
+    State12 = ?BACKEND:call_primitive_last(State11, ?PRIM_RETURN, [
+        ctx, jit_state
+    ]),
+    % OP_INT_CALL_END
+    State13 = ?BACKEND:add_label(State12, 0),
+    State14 = ?BACKEND:call_primitive_last(State13, 1, [ctx, jit_state]),
+    State15 = ?BACKEND:update_branches(State14),
+    Stream = ?BACKEND:stream(State15),
+    Dump =
+        <<
+            % jump table (new 8-byte format)
+            "   0:  00000697            auipc   a3,0x0\n"
+            "   4:  0e068067            jr  224(a3) # 0xe0\n"
+            "   8:  00000697            auipc   a3,0x0\n"
+            "   c:  01868067            jr  24(a3) # 0x20\n"
+            "  10:  00000697            auipc   a3,0x0\n"
+            "  14:  04868067            jr  72(a3) # 0x58\n"
+            "  18:  00000697            auipc   a3,0x0\n"
+            "  1c:  0c268067            jr  194(a3) # 0xda\n"
+            % label 1
+            % {move,{integer,9},{x,1}}.
+            "  20:  09f00f93            li  t6,159\n"
+            "  24:  01f52e23            sw  t6,28(a0)\n"
+            % {move,{integer,8},{x,0}}
+            "  28:  08f00f93            li  t6,143\n"
+            "  2c:  01f52c23            sw  t6,24(a0)\n"
+            % {call_only,2,{f,2}}.
+            "  30:  0085af83            lw  t6,8(a1)\n"
+            "  34:  1ffd                addi    t6,t6,-1\n"
+            "  36:  01f5a423            sw  t6,8(a1)\n"
+            "  3a:  000f8663            beqz    t6,0x46\n"
+            "  3e:  a829                j   0x58\n"
+            "  40:  0001                nop\n"
+            "  42:  00000013            nop\n"
+            "  46:  00000f97            auipc   t6,0x0\n"
+            "  4a:  0fc9                addi    t6,t6,18 # 0x58\n"
+            "  4c:  0001                nop\n"
+            "  4e:  01f5a223            sw  t6,4(a1)\n"
+            "  52:  00862f83            lw  t6,8(a2)\n"
+            "  56:  8f82                jr  t6\n"
+            % label 2
+            % {allocate,1,1}.
+            "  58:  01462f83            lw  t6,20(a2)\n"
+            "  5c:  1141                addi    sp,sp,-16\n"
+            "  5e:  c006                sw  ra,0(sp)\n"
+            "  60:  c22a                sw  a0,4(sp)\n"
+            "  62:  c42e                sw  a1,8(sp)\n"
+            "  64:  c632                sw  a2,12(sp)\n"
+            "  66:  4605                li  a2,1\n"
+            "  68:  4681                li  a3,0\n"
+            "  6a:  4705                li  a4,1\n"
+            "  6c:  9f82                jalr    t6\n"
+            "  6e:  8faa                mv  t6,a0\n"
+            "  70:  4082                lw  ra,0(sp)\n"
+            "  72:  4512                lw  a0,4(sp)\n"
+            "  74:  45a2                lw  a1,8(sp)\n"
+            "  76:  4632                lw  a2,12(sp)\n"
+            "  78:  0141                addi    sp,sp,16\n"
+            "  7a:  01ff9f13            slli    t5,t6,0x1f\n"
+            "  7e:  000f4763            bltz    t5,0x8c\n"
+            "  82:  01862f83            lw  t6,24(a2)\n"
+            "  86:  08600613            li  a2,134\n"
+            "  8a:  8f82                jr  t6\n"
+            % {init_yregs,{list,[{y,0}]}}.
+            %% move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}),
+            "  8c:  03b00f13            li  t5,59\n"
+            "  90:  01452f83            lw  t6,20(a0)\n"
+            "  94:  01efa023            sw  t5,0(t6)\n"
+            % {call,1,{f,3}}
+            %% call_or_schedule_next(State9, 3),
+            "  98:  0005af03            lw  t5,0(a1)\n"
+            "  9c:  000f2f03            lw  t5,0(t5)\n"
+            "  a0:  0f62                slli    t5,t5,0x18\n"
+            "  a2:  36800f93            li  t6,872\n"
+            "  a6:  00000013            nop\n"
+            "  aa:  01ff6f33            or  t5,t5,t6\n"
+            "  ae:  05e52e23            sw  t5,92(a0)\n"
+            "  b2:  0085af83            lw  t6,8(a1)\n"
+            "  b6:  1ffd                addi    t6,t6,-1\n"
+            "  b8:  01f5a423            sw  t6,8(a1)\n"
+            "  bc:  000f8663            beqz    t6,0xc8\n"
+            "  c0:  a829                j   0xda\n"
+            "  c2:  0001                nop\n"
+            "  c4:  00000013            nop\n"
+            "  c8:  00000f97            auipc   t6,0x0\n"
+            "  cc:  0fc9                addi    t6,t6,18 # 0xda\n"
+            "  ce:  0001                nop\n"
+            "  d0:  01f5a223            sw  t6,4(a1)\n"
+            "  d4:  00862f83            lw  t6,8(a2)\n"
+            "  d8:  8f82                jr  t6\n"
+            %% (continuation)
+            % label 3
+            "  da:  00462f83            lw  t6,4(a2)\n"
+            "  de:  8f82                jr  t6\n"
+            % label 0
+            "  e0:  00462f83            lw  t6,4(a2)\n"
+            "  e4:  8f82                jr  t6"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+dump_to_bin(Dump) ->
+    dump_to_bin0(Dump, addr, []).
+
+-define(IS_HEX_DIGIT(C),
+    ((C >= $0 andalso C =< $9) orelse (C >= $a andalso C =< $f) orelse (C >= $A andalso C =< $F))
+).
+
+dump_to_bin0(<<N, $:, Tail/binary>>, addr, Acc) when ?IS_HEX_DIGIT(N) ->
+    dump_to_bin0(Tail, hex, Acc);
+dump_to_bin0(<<N, Tail/binary>>, addr, Acc) when ?IS_HEX_DIGIT(N) ->
+    dump_to_bin0(Tail, addr, Acc);
+dump_to_bin0(<<$\n, Tail/binary>>, addr, Acc) ->
+    dump_to_bin0(Tail, addr, Acc);
+dump_to_bin0(<<$\s, Tail/binary>>, addr, Acc) ->
+    dump_to_bin0(Tail, addr, Acc);
+dump_to_bin0(<<$\t, Tail/binary>>, addr, Acc) ->
+    dump_to_bin0(Tail, addr, Acc);
+dump_to_bin0(<<$\s, Tail/binary>>, hex, Acc) ->
+    dump_to_bin0(Tail, hex, Acc);
+dump_to_bin0(<<$\t, Tail/binary>>, hex, Acc) ->
+    dump_to_bin0(Tail, hex, Acc);
+%% Handle RISC-V 32-bit instructions (8 consecutive hex digits)
+dump_to_bin0(<<H1, H2, H3, H4, H5, H6, H7, H8, Sp, Rest/binary>>, hex, Acc) when
+    (Sp =:= $\t orelse Sp =:= $\s) andalso
+        ?IS_HEX_DIGIT(H1) andalso
+        ?IS_HEX_DIGIT(H2) andalso
+        ?IS_HEX_DIGIT(H3) andalso
+        ?IS_HEX_DIGIT(H4) andalso
+        ?IS_HEX_DIGIT(H5) andalso
+        ?IS_HEX_DIGIT(H6) andalso
+        ?IS_HEX_DIGIT(H7) andalso
+        ?IS_HEX_DIGIT(H8)
+->
+    %% RISC-V instructions are 32-bit little-endian
+    Instr = list_to_integer([H1, H2, H3, H4, H5, H6, H7, H8], 16),
+    dump_to_bin0(Rest, instr, [<<Instr:32/little>> | Acc]);
+%% Handle 32-bits undefined instruction (ARM format with space: "1234 5678")
+dump_to_bin0(<<H1, H2, H3, H4, $\s, H5, H6, H7, H8, Sp, Rest/binary>>, hex, Acc) when
+    (Sp =:= $\t orelse Sp =:= $\s) andalso
+        ?IS_HEX_DIGIT(H1) andalso
+        ?IS_HEX_DIGIT(H2) andalso
+        ?IS_HEX_DIGIT(H3) andalso
+        ?IS_HEX_DIGIT(H4) andalso
+        ?IS_HEX_DIGIT(H5) andalso
+        ?IS_HEX_DIGIT(H6) andalso
+        ?IS_HEX_DIGIT(H7) andalso
+        ?IS_HEX_DIGIT(H8)
+->
+    InstrA = list_to_integer([H1, H2, H3, H4], 16),
+    InstrB = list_to_integer([H5, H6, H7, H8], 16),
+    dump_to_bin0(Rest, instr, [<<InstrB:16/little>>, <<InstrA:16/little>> | Acc]);
+%% Handle 16-bit ARM32 Thumb instructions (4 hex digits)
+dump_to_bin0(<<H1, H2, H3, H4, Sp, Rest/binary>>, hex, Acc) when
+    (Sp =:= $\t orelse Sp =:= $\s) andalso
+        ?IS_HEX_DIGIT(H1) andalso
+        ?IS_HEX_DIGIT(H2) andalso
+        ?IS_HEX_DIGIT(H3) andalso
+        ?IS_HEX_DIGIT(H4)
+->
+    %% Parse 4 hex digits (ARM32 Thumb 16-bit instruction)
+    Instr = list_to_integer([H1, H2, H3, H4], 16),
+    dump_to_bin0(Rest, instr, [<<Instr:16/little>> | Acc]);
+dump_to_bin0(<<$\n, Tail/binary>>, hex, Acc) ->
+    dump_to_bin0(Tail, addr, Acc);
+dump_to_bin0(<<$\n, Tail/binary>>, instr, Acc) ->
+    dump_to_bin0(Tail, addr, Acc);
+dump_to_bin0(<<_Other, Tail/binary>>, instr, Acc) ->
+    dump_to_bin0(Tail, instr, Acc);
+dump_to_bin0(<<>>, _, Acc) ->
+    list_to_binary(lists:reverse(Acc)).
diff --git a/tests/libs/jit/jit_tests.erl b/tests/libs/jit/jit_tests.erl
index 72a356ae3c..5b1dfdae3a 100644
--- a/tests/libs/jit/jit_tests.erl
+++ b/tests/libs/jit/jit_tests.erl
@@ -74,6 +74,7 @@ compile_minimal_x86_64_test() ->
         fun(_) -> undefined end,
         fun(_) -> undefined end,
         fun(_) -> any end,
+        fun(_) -> undefined end,
         jit_x86_64,
         Stream2
     ),
@@ -117,10 +118,11 @@ term_to_int_verify_is_match_state_typed_optimization_x86_64_test() ->
     AtomResolver = jit_precompile:atom_resolver(?ATU8_CHUNK_1),
     LiteralResolver = fun(_) -> test_literal end,
     TypeResolver = jit_precompile:type_resolver(?TYPE_CHUNK_1),
+    ImportResolver = fun(_) -> test_function end,
 
     % Compile with typed register support
     {_LabelsCount, Stream3} = jit:compile(
-        ?CODE_CHUNK_1, AtomResolver, LiteralResolver, TypeResolver, jit_x86_64, Stream2
+        ?CODE_CHUNK_1, AtomResolver, LiteralResolver, TypeResolver, ImportResolver, jit_x86_64, Stream2
     ),
     CompiledCode = jit_x86_64:stream(Stream3),
 
@@ -194,10 +196,11 @@ verify_is_function_typed_optimization_x86_64_test() ->
     AtomResolver = jit_precompile:atom_resolver(?ATU8_CHUNK_2),
     LiteralResolver = fun(_) -> test_literal end,
     TypeResolver = jit_precompile:type_resolver(?TYPE_CHUNK_2),
+    ImportResolver = fun(_) -> test_function end,
 
     % Compile with typed register support
     {_LabelsCount, Stream3} = jit:compile(
-        ?CODE_CHUNK_2, AtomResolver, LiteralResolver, TypeResolver, jit_x86_64, Stream2
+        ?CODE_CHUNK_2, AtomResolver, LiteralResolver, TypeResolver, ImportResolver, jit_x86_64, Stream2
     ),
     CompiledCode = jit_x86_64:stream(Stream3),
 
diff --git a/tests/libs/jit/jit_tests_common.erl b/tests/libs/jit/jit_tests_common.erl
index cfabfcf15f..cf989e746d 100644
--- a/tests/libs/jit/jit_tests_common.erl
+++ b/tests/libs/jit/jit_tests_common.erl
@@ -77,6 +77,8 @@ asm(Arch, Bin, Str) ->
 find_binutils(Arch) ->
     ArchStr = atom_to_list(Arch),
     BinutilsList = [
+        {ArchStr ++ "-esp-elf-as", ArchStr ++ "-esp-elf-objdump"},
+        {ArchStr ++ "-unknown-elf-as", ArchStr ++ "-unknown-elf-objdump"},
         {ArchStr ++ "-elf-as", ArchStr ++ "-elf-objdump"},
         {ArchStr ++ "-none-eabi-as", ArchStr ++ "-none-eabi-objdump"},
         {ArchStr ++ "-linux-gnu-as", ArchStr ++ "-linux-gnu-objdump"}
@@ -104,6 +106,8 @@ get_asm_header(arm) ->
 get_asm_header(aarch64) ->
     ".text\n";
 get_asm_header(x86_64) ->
+    ".text\n";
+get_asm_header(riscv32) ->
     ".text\n".
 
 %% Get architecture-specific assembler flags
@@ -113,7 +117,9 @@ get_as_flags(arm) ->
 get_as_flags(aarch64) ->
     "";
 get_as_flags(x86_64) ->
-    "--64".
+    "--64";
+get_as_flags(riscv32) ->
+    "-march=rv32imac".
 
 %% Parse objdump output lines and extract binary data
 -spec asm_lines([binary()], binary(), atom()) -> binary().
diff --git a/tests/libs/jit/jit_x86_64_asm_tests.erl b/tests/libs/jit/jit_x86_64_asm_tests.erl
index 797ed9077c..a1c9bb949f 100644
--- a/tests/libs/jit/jit_x86_64_asm_tests.erl
+++ b/tests/libs/jit/jit_x86_64_asm_tests.erl
@@ -866,6 +866,19 @@ jge_rel8_test_() ->
         )
     ].
 
+jle_test_() ->
+    [
+        ?_assertAsmEqual(<<16#7e, 16#f4>>, "jle .-10", jit_x86_64_asm:jle(-10))
+    ].
+
+jle_rel8_test_() ->
+    [
+        ?_assertEqual(
+            {1, jit_tests_common:asm(x86_64, <<16#7e, 16#05>>, "jle .+7")},
+            jit_x86_64_asm:jle_rel8(7)
+        )
+    ].
+
 jmp_rel8_test_() ->
     [
         ?_assertEqual(
@@ -914,9 +927,50 @@ andb_test_() ->
 
 subq_test_() ->
     [
+        % Register-register forms
         ?_assertAsmEqual(<<16#48, 16#29, 16#c1>>, "subq %rax, %rcx", jit_x86_64_asm:subq(rax, rcx)),
         ?_assertAsmEqual(<<16#49, 16#29, 16#c2>>, "subq %rax, %r10", jit_x86_64_asm:subq(rax, r10)),
-        ?_assertAsmEqual(<<16#4c, 16#29, 16#c1>>, "subq %r8, %rcx", jit_x86_64_asm:subq(r8, rcx))
+        ?_assertAsmEqual(<<16#4c, 16#29, 16#c1>>, "subq %r8, %rcx", jit_x86_64_asm:subq(r8, rcx)),
+        % 8-bit immediate forms
+        ?_assertAsmEqual(
+            <<16#48, 16#83, 16#e8, 16#0a>>, "subq $10, %rax", jit_x86_64_asm:subq(10, rax)
+        ),
+        ?_assertAsmEqual(
+            <<16#48, 16#83, 16#e9, 16#05>>, "subq $5, %rcx", jit_x86_64_asm:subq(5, rcx)
+        ),
+        ?_assertAsmEqual(
+            <<16#49, 16#83, 16#ea, 16#08>>, "subq $8, %r10", jit_x86_64_asm:subq(8, r10)
+        ),
+        ?_assertAsmEqual(
+            <<16#49, 16#83, 16#eb, 16#7f>>, "subq $127, %r11", jit_x86_64_asm:subq(127, r11)
+        ),
+        % 32-bit immediate, special short form for %rax
+        ?_assertAsmEqual(
+            <<16#48, 16#2d, 16#00, 16#01, 16#00, 16#00>>,
+            "subq $256, %rax",
+            jit_x86_64_asm:subq(256, rax)
+        ),
+        ?_assertAsmEqual(
+            <<16#48, 16#2d, 16#00, 16#04, 16#00, 16#00>>,
+            "subq $1024, %rax",
+            jit_x86_64_asm:subq(1024, rax)
+        ),
+        % 32-bit immediate forms for other registers
+        ?_assertAsmEqual(
+            <<16#48, 16#81, 16#e9, 16#00, 16#01, 16#00, 16#00>>,
+            "subq $256, %rcx",
+            jit_x86_64_asm:subq(256, rcx)
+        ),
+        ?_assertAsmEqual(
+            <<16#49, 16#81, 16#ea, 16#00, 16#04, 16#00, 16#00>>,
+            "subq $1024, %r10",
+            jit_x86_64_asm:subq(1024, r10)
+        ),
+        ?_assertAsmEqual(
+            <<16#49, 16#81, 16#eb, 16#00, 16#10, 16#00, 16#00>>,
+            "subq $4096, %r11",
+            jit_x86_64_asm:subq(4096, r11)
+        )
     ].
 
 decl_test_() ->
diff --git a/tests/libs/jit/jit_x86_64_tests.erl b/tests/libs/jit/jit_x86_64_tests.erl
index 9aa86b6427..87ca4cefae 100644
--- a/tests/libs/jit/jit_x86_64_tests.erl
+++ b/tests/libs/jit/jit_x86_64_tests.erl
@@ -789,6 +789,166 @@ if_block_test_() ->
                     >>,
                     ?assertEqual(dump_to_bin(Dump), Stream),
                     ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {100, '<', RegA},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	48 8b 47 30          	mov    0x30(%rdi),%rax\n"
+                        "   4:	4c 8b 5f 38          	mov    0x38(%rdi),%r11\n"
+                        "   8:	48 83 f8 64          	cmp    $0x64,%rax\n"
+                        "   c:	7e 04                	jle    0x12\n"
+                        "   e:	49 83 c3 02          	add    $0x2,%r11"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {100, '<', {free, RegA}},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	48 8b 47 30          	mov    0x30(%rdi),%rax\n"
+                        "   4:	4c 8b 5f 38          	mov    0x38(%rdi),%r11\n"
+                        "   8:	48 83 f8 64          	cmp    $0x64,%rax\n"
+                        "   c:	7e 04                	jle    0x12\n"
+                        "   e:	49 83 c3 02          	add    $0x2,%r11"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '<', 100},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	48 8b 47 30          	mov    0x30(%rdi),%rax\n"
+                        "   4:	4c 8b 5f 38          	mov    0x38(%rdi),%r11\n"
+                        "   8:	48 83 f8 64          	cmp    $0x64,%rax\n"
+                        "   c:	7d 04                	jge    0x12\n"
+                        "   e:	49 83 c3 02          	add    $0x2,%r11"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '<', 100},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	48 8b 47 30          	mov    0x30(%rdi),%rax\n"
+                        "   4:	4c 8b 5f 38          	mov    0x38(%rdi),%r11\n"
+                        "   8:	48 83 f8 64          	cmp    $0x64,%rax\n"
+                        "   c:	7d 04                	jge    0x12\n"
+                        "   e:	49 83 c3 02          	add    $0x2,%r11"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '<', 16#100000000},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	48 8b 47 30          	mov    0x30(%rdi),%rax\n"
+                        "   4:	4c 8b 5f 38          	mov    0x38(%rdi),%r11\n"
+                        "   8:	49 bb 00 00 00 00 01 	movabs $0x100000000,%r11\n"
+                        "   f:	00 00 00 \n"
+                        "  12:	4c 39 d8             	cmp    %r11,%rax\n"
+                        "  15:	7d 04                	jge    0x1b\n"
+                        "  17:	49 83 c3 02          	add    $0x2,%r11"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '<', 16#100000000},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	48 8b 47 30          	mov    0x30(%rdi),%rax\n"
+                        "   4:	4c 8b 5f 38          	mov    0x38(%rdi),%r11\n"
+                        "   8:	49 bb 00 00 00 00 01 	movabs $0x100000000,%r11\n"
+                        "   f:	00 00 00 \n"
+                        "  12:	4c 39 d8             	cmp    %r11,%rax\n"
+                        "  15:	7d 04                	jge    0x1b\n"
+                        "  17:	49 83 c3 02          	add    $0x2,%r11"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {16#100000000, '<', RegA},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	48 8b 47 30          	mov    0x30(%rdi),%rax\n"
+                        "   4:	4c 8b 5f 38          	mov    0x38(%rdi),%r11\n"
+                        "   8:	49 bb 00 00 00 00 01 	movabs $0x100000000,%r11\n"
+                        "   f:	00 00 00 \n"
+                        "  12:	4c 39 d8             	cmp    %r11,%rax\n"
+                        "  15:	7e 04                	jle    0x1b\n"
+                        "  17:	49 83 c3 02          	add    $0x2,%r11"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {16#100000000, '<', {free, RegA}},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	48 8b 47 30          	mov    0x30(%rdi),%rax\n"
+                        "   4:	4c 8b 5f 38          	mov    0x38(%rdi),%r11\n"
+                        "   8:	49 bb 00 00 00 00 01 	movabs $0x100000000,%r11\n"
+                        "   f:	00 00 00 \n"
+                        "  12:	4c 39 d8             	cmp    %r11,%rax\n"
+                        "  15:	7e 04                	jle    0x1b\n"
+                        "  17:	49 83 c3 02          	add    $0x2,%r11"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
                 end)
             ]
         end}.
@@ -893,6 +1053,37 @@ call_only_or_schedule_next_and_label_relocation_test() ->
         >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
+call_only_or_schedule_next_known_label_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 2),
+    State2 = ?BACKEND:add_label(State1, 1),
+    State3 = ?BACKEND:add_label(State2, 2, 16#2a),
+    State4 = ?BACKEND:call_only_or_schedule_next(State3, 2),
+    State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]),
+    % OP_INT_CALL_END
+    State6 = ?BACKEND:add_label(State5, 0),
+    State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]),
+    State8 = ?BACKEND:update_branches(State7),
+    Stream = ?BACKEND:stream(State8),
+    Dump =
+        <<
+            "   0:	e9 2a 00 00 00       	jmpq   0x2f\n"
+            "   5:	e9 05 00 00 00       	jmpq   0xf\n"
+            "   a:	e9 1b 00 00 00       	jmpq   0x2a\n"
+            "   f:	ff 4e 10             	decl   0x10(%rsi)\n"
+            "  12:	74 05                	je     0x19\n"
+            "  14:	e9 11 00 00 00       	jmpq   0x2a\n"
+            "  19:	48 8d 05 0a 00 00 00 	lea    0xa(%rip),%rax        # 0x2a\n"
+            "  20:	48 89 46 08          	mov    %rax,0x8(%rsi)\n"
+            "  24:	48 8b 42 10          	mov    0x10(%rdx),%rax\n"
+            "  28:	ff e0                	jmpq   *%rax\n"
+            "  2a:	48 8b 02             	mov    (%rdx),%rax\n"
+            "  2d:	ff e0                	jmpq   *%rax\n"
+            "  2f:	48 8b 42 08          	mov    0x8(%rdx),%rax\n"
+            "  33:	ff e0                	jmpq   *%rax\n"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
 call_bif_with_large_literal_integer_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
     {State1, FuncPtr} = ?BACKEND:call_primitive(State0, 8, [jit_state, 2]),
@@ -957,7 +1148,7 @@ call_bif_with_large_literal_integer_test() ->
 get_list_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
     {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
-    State2 = ?BACKEND:and_(State1, Reg, -4),
+    {State2, Reg} = ?BACKEND:and_(State1, {free, Reg}, -4),
     State3 = ?BACKEND:move_array_element(State2, Reg, 1, {y_reg, 1}),
     State4 = ?BACKEND:move_array_element(State3, Reg, 0, {y_reg, 0}),
     State5 = ?BACKEND:free_native_registers(State4, [Reg]),
@@ -977,17 +1168,18 @@ get_list_test() ->
 
 is_integer_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 1),
     Label = 1,
     Arg1 = {x_reg, 0},
-    {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1),
-    State2 = ?BACKEND:if_block(
-        State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) ->
+    {State2, Reg} = ?BACKEND:move_to_native_register(State1, Arg1),
+    State3 = ?BACKEND:if_block(
+        State2, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) ->
             MSt1 = ?BACKEND:if_block(
                 MSt0, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) ->
                     ?BACKEND:jump_to_label(BSt0, Label)
                 end
             ),
-            MSt2 = ?BACKEND:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+            {MSt2, Reg} = ?BACKEND:and_(MSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
             MSt3 = ?BACKEND:move_array_element(MSt2, Reg, 0, Reg),
             ?BACKEND:if_block(
                 MSt3,
@@ -998,29 +1190,31 @@ is_integer_test() ->
             )
         end
     ),
-    State3 = ?BACKEND:free_native_registers(State2, [Reg]),
-    ?BACKEND:assert_all_native_free(State3),
-    Offset = ?BACKEND:offset(State3),
-    State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100),
-    State5 = ?BACKEND:update_branches(State4),
-    Stream = ?BACKEND:stream(State5),
+    State4 = ?BACKEND:free_native_registers(State3, [Reg]),
+    ?BACKEND:assert_all_native_free(State4),
+    Offset = ?BACKEND:offset(State4),
+    State5 = ?BACKEND:add_label(State4, Label, Offset + 16#100),
+    State6 = ?BACKEND:update_branches(State5),
+    Stream = ?BACKEND:stream(State6),
     Dump = <<
-        "   0:	48 8b 47 30          	mov    0x30(%rdi),%rax\n"
-        "   4:	49 89 c3             	mov    %rax,%r11\n"
-        "   7:	41 80 e3 0f          	and    $0xf,%r11b\n"
-        "   b:	41 80 fb 0f          	cmp    $0xf,%r11b\n"
-        "   f:	74 25                	je     0x36\n"
-        "  11:	49 89 c3             	mov    %rax,%r11\n"
-        "  14:	41 80 e3 03          	and    $0x3,%r11b\n"
-        "  18:	41 80 fb 02          	cmp    $0x2,%r11b\n"
-        "  1c:	74 05                	je     0x23\n"
-        "  1e:	e9 13 01 00 00       	jmpq   0x136\n"
-        "  23:	48 83 e0 fc          	and    $0xfffffffffffffffc,%rax\n"
-        "  27:	48 8b 00             	mov    (%rax),%rax\n"
-        "  2a:	24 3f                	and    $0x3f,%al\n"
-        "  2c:	80 f8 08             	cmp    $0x8,%al\n"
-        "  2f:	74 05                	je     0x36\n"
-        "  31:	e9 00 01 00 00       	jmpq   0x136"
+        "   0:	e9 ff ff ff ff       	jmpq   0x4\n"
+        "   5:	e9 36 01 00 00       	jmpq   0x140\n"
+        "   a:	48 8b 47 30          	mov    0x30(%rdi),%rax\n"
+        "   e:	49 89 c3             	mov    %rax,%r11\n"
+        "  11:	41 80 e3 0f          	and    $0xf,%r11b\n"
+        "  15:	41 80 fb 0f          	cmp    $0xf,%r11b\n"
+        "  19:	74 25                	je     0x40\n"
+        "  1b:	49 89 c3             	mov    %rax,%r11\n"
+        "  1e:	41 80 e3 03          	and    $0x3,%r11b\n"
+        "  22:	41 80 fb 02          	cmp    $0x2,%r11b\n"
+        "  26:	74 05                	je     0x2d\n"
+        "  28:	e9 13 01 00 00       	jmpq   0x140\n"
+        "  2d:	48 83 e0 fc          	and    $0xfffffffffffffffc,%rax\n"
+        "  31:	48 8b 00             	mov    (%rax),%rax\n"
+        "  34:	24 3f                	and    $0x3f,%al\n"
+        "  36:	80 f8 08             	cmp    $0x8,%al\n"
+        "  39:	74 05                	je     0x40\n"
+        "  3b:	e9 00 01 00 00       	jmpq   0x140"
     >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
@@ -1031,15 +1225,16 @@ cond_jump_to_label(Cond, Label, MMod, MSt0) ->
 
 is_number_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 1),
     Label = 1,
     Arg1 = {x_reg, 0},
-    {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1),
-    State2 = ?BACKEND:if_block(
-        State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) ->
+    {State2, Reg} = ?BACKEND:move_to_native_register(State1, Arg1),
+    State3 = ?BACKEND:if_block(
+        State2, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) ->
             BSt1 = cond_jump_to_label(
                 {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0
             ),
-            BSt2 = ?BACKEND:and_(BSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+            {BSt2, Reg} = ?BACKEND:and_(BSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK),
             BSt3 = ?BACKEND:move_array_element(BSt2, Reg, 0, Reg),
             cond_jump_to_label(
                 {'and', [
@@ -1052,58 +1247,63 @@ is_number_test() ->
             )
         end
     ),
-    State3 = ?BACKEND:free_native_registers(State2, [Reg]),
-    ?BACKEND:assert_all_native_free(State3),
-    Offset = ?BACKEND:offset(State3),
-    State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100),
-    State5 = ?BACKEND:update_branches(State4),
-    Stream = ?BACKEND:stream(State5),
+    State4 = ?BACKEND:free_native_registers(State3, [Reg]),
+    ?BACKEND:assert_all_native_free(State4),
+    Offset = ?BACKEND:offset(State4),
+    State5 = ?BACKEND:add_label(State4, Label, Offset + 16#100),
+    State6 = ?BACKEND:update_branches(State5),
+    Stream = ?BACKEND:stream(State6),
     Dump = <<
-        "   0:	48 8b 47 30          	mov    0x30(%rdi),%rax\n"
-        "   4:	49 89 c3             	mov    %rax,%r11\n"
-        "   7:	41 80 e3 0f          	and    $0xf,%r11b\n"
-        "   b:	41 80 fb 0f          	cmp    $0xf,%r11b\n"
-        "   f:	74 32                	je     0x43\n"
-        "  11:	49 89 c3             	mov    %rax,%r11\n"
-        "  14:	41 80 e3 03          	and    $0x3,%r11b\n"
-        "  18:	41 80 fb 02          	cmp    $0x2,%r11b\n"
-        "  1c:	74 05                	je     0x23\n"
-        "  1e:	e9 20 01 00 00       	jmpq   0x143\n"
-        "  23:	48 83 e0 fc          	and    $0xfffffffffffffffc,%rax\n"
-        "  27:	48 8b 00             	mov    (%rax),%rax\n"
-        "  2a:	49 89 c3             	mov    %rax,%r11\n"
-        "  2d:	41 80 e3 3f          	and    $0x3f,%r11b\n"
-        "  31:	41 80 fb 08          	cmp    $0x8,%r11b\n"
-        "  35:	74 0c                	je     0x43\n"
-        "  37:	24 3f                	and    $0x3f,%al\n"
-        "  39:	80 f8 18             	cmp    $0x18,%al\n"
-        "  3c:	74 05                	je     0x43\n"
-        "  3e:	e9 00 01 00 00       	jmpq   0x143"
+        "   0:	e9 ff ff ff ff       	jmpq   0x4\n"
+        "   5:	e9 43 01 00 00       	jmpq   0x14d\n"
+        "   a:	48 8b 47 30          	mov    0x30(%rdi),%rax\n"
+        "   e:	49 89 c3             	mov    %rax,%r11\n"
+        "  11:	41 80 e3 0f          	and    $0xf,%r11b\n"
+        "  15:	41 80 fb 0f          	cmp    $0xf,%r11b\n"
+        "  19:	74 32                	je     0x4d\n"
+        "  1b:	49 89 c3             	mov    %rax,%r11\n"
+        "  1e:	41 80 e3 03          	and    $0x3,%r11b\n"
+        "  22:	41 80 fb 02          	cmp    $0x2,%r11b\n"
+        "  26:	74 05                	je     0x2d\n"
+        "  28:	e9 20 01 00 00       	jmpq   0x14d\n"
+        "  2d:	48 83 e0 fc          	and    $0xfffffffffffffffc,%rax\n"
+        "  31:	48 8b 00             	mov    (%rax),%rax\n"
+        "  34:	49 89 c3             	mov    %rax,%r11\n"
+        "  37:	41 80 e3 3f          	and    $0x3f,%r11b\n"
+        "  3b:	41 80 fb 08          	cmp    $0x8,%r11b\n"
+        "  3f:	74 0c                	je     0x4d\n"
+        "  41:	24 3f                	and    $0x3f,%al\n"
+        "  43:	80 f8 18             	cmp    $0x18,%al\n"
+        "  46:	74 05                	je     0x4d\n"
+        "  48:	e9 00 01 00 00       	jmpq   0x14d"
     >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
 is_boolean_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 1),
     Label = 1,
-    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
-    State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) ->
+    {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}),
+    State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) ->
         ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) ->
             ?BACKEND:jump_to_label(BSt1, Label)
         end)
     end),
-    State3 = ?BACKEND:free_native_registers(State2, [Reg]),
-    ?BACKEND:assert_all_native_free(State3),
-    Offset = ?BACKEND:offset(State3),
-    State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100),
-    State5 = ?BACKEND:update_branches(State4),
-    Stream = ?BACKEND:stream(State5),
+    State4 = ?BACKEND:free_native_registers(State3, [Reg]),
+    ?BACKEND:assert_all_native_free(State4),
+    Offset = ?BACKEND:offset(State4),
+    State5 = ?BACKEND:add_label(State4, Label, Offset + 16#100),
+    State6 = ?BACKEND:update_branches(State5),
+    Stream = ?BACKEND:stream(State6),
     Dump = <<
-        "   0:	48 8b 47 30          	mov    0x30(%rdi),%rax\n"
-        "   4:	48 83 f8 4b          	cmp    $0x4b,%rax\n"
-        "   8:	74 0b                	je     0x15\n"
-        "   a:	48 83 f8 0b          	cmp    $0xb,%rax\n"
-        "   e:	74 05                	je     0x15\n"
-        "  10:	e9 00 01 00 00       	jmpq   0x115\n"
+        "   0:	e9 ff ff ff ff       	jmpq   0x4\n"
+        "   5:	e9 15 01 00 00       	jmpq   0x11f\n"
+        "   a:	48 8b 47 30          	mov    0x30(%rdi),%rax\n"
+        "   e:	48 83 f8 4b          	cmp    $0x4b,%rax\n"
+        "  12:	74 0b                	je     0x1f\n"
+        "  14:	48 83 f8 0b          	cmp    $0xb,%rax\n"
+        "  18:	74 05                	je     0x1f\n"
+        "  1a:	e9 00 01 00 00       	jmpq   0x11f\n"
     >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
@@ -1148,7 +1348,7 @@ call_fun_test() ->
             ])
         end
     ),
-    State5 = ?BACKEND:and_(State4, RegCopy, ?TERM_PRIMARY_CLEAR_MASK),
+    {State5, RegCopy} = ?BACKEND:and_(State4, {free, RegCopy}, ?TERM_PRIMARY_CLEAR_MASK),
     State6 = ?BACKEND:move_array_element(State5, RegCopy, 0, RegCopy),
     State7 = ?BACKEND:if_block(
         State6, {RegCopy, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) ->
@@ -1592,6 +1792,62 @@ jump_to_continuation_test() ->
         >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
+%% Test set_continuation_to_label with unknown label
+wait_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+
+    State1 = ?BACKEND:jump_table(State0, 5),
+    State2 = ?BACKEND:add_label(State1, 1),
+    Label = 2,
+    State3 = ?BACKEND:set_continuation_to_label(State2, Label),
+    State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]),
+    State5 = ?BACKEND:add_label(State4, Label, 16#100),
+    State6 = ?BACKEND:update_branches(State5),
+
+    Stream = ?BACKEND:stream(State6),
+    Dump =
+        <<
+            "   0:	e9 ff ff ff ff       	jmpq   0x4\n"
+            "   5:	e9 14 00 00 00       	jmpq   0x1e\n"
+            "   a:	e9 f1 00 00 00       	jmpq   0x100\n"
+            "   f:	e9 ff ff ff ff       	jmpq   0x13\n"
+            "  14:	e9 ff ff ff ff       	jmpq   0x18\n"
+            "  19:	e9 ff ff ff ff       	jmpq   0x1d\n"
+            "  1e:	48 8d 05 db 00 00 00 	lea    0xdb(%rip),%rax\n"
+            "  25:	48 89 46 08          	mov    %rax,0x8(%rsi)\n"
+            "  29:	48 8b 82 e8 00 00 00 	mov    0xe8(%rdx),%rax\n"
+            "  30:	ff e0                	jmpq   *%rax"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test set_continuation_to_label with known label
+wait_known_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+
+    State1 = ?BACKEND:jump_table(State0, 5),
+    State2 = ?BACKEND:add_label(State1, 1),
+    Label = 2,
+    State3 = ?BACKEND:add_label(State2, Label, 16#100),
+    State4 = ?BACKEND:set_continuation_to_label(State3, Label),
+    State5 = ?BACKEND:call_primitive_last(State4, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]),
+    State6 = ?BACKEND:update_branches(State5),
+
+    Stream = ?BACKEND:stream(State6),
+    Dump =
+        <<
+            "   0:	e9 ff ff ff ff       	jmpq   0x4\n"
+            "   5:	e9 14 00 00 00       	jmpq   0x1e\n"
+            "   a:	e9 f1 00 00 00       	jmpq   0x100\n"
+            "   f:	e9 ff ff ff ff       	jmpq   0x13\n"
+            "  14:	e9 ff ff ff ff       	jmpq   0x18\n"
+            "  19:	e9 ff ff ff ff       	jmpq   0x1d\n"
+            "  1e:	48 8d 05 db 00 00 00 	lea    0xdb(%rip),%rax\n"
+            "  25:	48 89 46 08          	mov    %rax,0x8(%rsi)\n"
+            "  29:	48 8b 82 e8 00 00 00 	mov    0xe8(%rdx),%rax\n"
+            "  30:	ff e0                	jmpq   *%rax"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
 dump_to_bin(Dump) ->
     dump_to_bin0(Dump, addr, []).
 
diff --git a/tests/libs/jit/tests.erl b/tests/libs/jit/tests.erl
index ff272f6eac..2d130cad03 100644
--- a/tests/libs/jit/tests.erl
+++ b/tests/libs/jit/tests.erl
@@ -31,6 +31,8 @@ start() ->
         jit_aarch64_asm_tests,
         jit_armv6m_tests,
         jit_armv6m_asm_tests,
+        jit_riscv32_tests,
+        jit_riscv32_asm_tests,
         jit_x86_64_tests,
         jit_x86_64_asm_tests
     ]).
diff --git a/tests/test-jit_stream_flash.c b/tests/test-jit_stream_flash.c
new file mode 100644
index 0000000000..d35b565584
--- /dev/null
+++ b/tests/test-jit_stream_flash.c
@@ -0,0 +1,858 @@
+/*
+ * This file is part of AtomVM.
+ *
+ * Copyright 2025 by Paul Guyot <pguyot@kallisys.net>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "avmpack.h"
+#include "context.h"
+#include "globalcontext.h"
+#include "jit_stream_flash.h"
+#include "jit_stream_flash_platform.h"
+#include "scheduler.h"
+#include "synclist.h"
+#include "term.h"
+#include "utils.h"
+
+// Mock flash memory - simulate 64KB of flash
+#define MOCK_FLASH_SIZE (64 * 1024)
+// Align to sector boundary for proper flash simulation
+static uint8_t mock_flash[MOCK_FLASH_SIZE] __attribute__((aligned(FLASH_SECTOR_SIZE)));
+
+// JIT entry header (copied from jit_stream_flash.c for testing)
+struct JITEntry
+{
+    uint16_t magic;
+    uint16_t version;
+    uint32_t code;
+    uint32_t labels;
+    uint32_t size;
+};
+
+// CRC32 for verification (copied from jit_stream_flash.c)
+static uint32_t crc32(const uint8_t *data, size_t len)
+{
+    uint32_t crc = 0xFFFFFFFF;
+    for (size_t i = 0; i < len; i++) {
+        crc ^= data[i];
+        for (int j = 0; j < 8; j++) {
+            crc = (crc >> 1) ^ (0xEDB88320 & -(crc & 1));
+        }
+    }
+    return ~crc;
+}
+
+// Platform context (opaque)
+struct JSFlashPlatformContext
+{
+    uintptr_t base_addr;
+};
+
+// Forward declarations of mock platform functions
+struct JSFlashPlatformContext *jit_stream_flash_platform_init(void);
+void jit_stream_flash_platform_destroy(struct JSFlashPlatformContext *ctx);
+bool jit_stream_flash_platform_erase_sector(struct JSFlashPlatformContext *ctx, uintptr_t addr);
+bool jit_stream_flash_platform_write_page(struct JSFlashPlatformContext *ctx, uintptr_t addr, const uint8_t *data);
+uintptr_t jit_stream_flash_platform_ptr_to_executable(uintptr_t addr);
+
+// Mock platform implementation
+struct JSFlashPlatformContext *jit_stream_flash_platform_init(void)
+{
+    struct JSFlashPlatformContext *ctx = malloc(sizeof(struct JSFlashPlatformContext));
+    if (!ctx) {
+        return NULL;
+    }
+
+    // DO NOT erase flash here - it should persist across multiple stream creations
+    // Flash initialization happens once at test startup
+
+    ctx->base_addr = (uintptr_t) mock_flash;
+    return ctx;
+}
+
+void jit_stream_flash_platform_destroy(struct JSFlashPlatformContext *ctx)
+{
+    free(ctx);
+}
+
+bool jit_stream_flash_platform_erase_sector(struct JSFlashPlatformContext *ctx, uintptr_t addr)
+{
+    assert(ctx);
+
+    // Check alignment
+    if ((addr - ctx->base_addr) % FLASH_SECTOR_SIZE != 0) {
+        fprintf(stderr, "Erase address 0x%lx not sector-aligned\n", (unsigned long) addr);
+        return false;
+    }
+
+    size_t offset = addr - ctx->base_addr;
+    if (offset >= MOCK_FLASH_SIZE) {
+        fprintf(stderr, "Erase address 0x%lx out of bounds\n", (unsigned long) addr);
+        return false;
+    }
+
+    // Erase the sector
+    memset(&mock_flash[offset], 0xFF, FLASH_SECTOR_SIZE);
+
+    return true;
+}
+
+bool jit_stream_flash_platform_write_page(struct JSFlashPlatformContext *ctx, uintptr_t addr, const uint8_t *data)
+{
+    assert(ctx);
+
+    // Check alignment
+    if ((addr - ctx->base_addr) % FLASH_PAGE_SIZE != 0) {
+        fprintf(stderr, "Write address 0x%lx not page-aligned (base_addr=0x%lx, offset=0x%lx)\n",
+            (unsigned long) addr, (unsigned long) ctx->base_addr,
+            (unsigned long) (addr - ctx->base_addr));
+        return false;
+    }
+
+    size_t offset = addr - ctx->base_addr;
+    if (offset + FLASH_PAGE_SIZE > MOCK_FLASH_SIZE) {
+        fprintf(stderr, "Write at offset 0x%zx would exceed flash bounds\n", offset);
+        return false;
+    }
+
+    // Validate write - flash can only transition bits from 1→0 without erase
+    for (size_t i = 0; i < FLASH_PAGE_SIZE; i++) {
+        uint8_t current = mock_flash[offset + i];
+        uint8_t new_val = data[i];
+
+        // Check if we're trying to set any bits from 0→1
+        if ((~current & new_val) != 0) {
+            fprintf(stderr, "FLASH VALIDATION ERROR at offset 0x%zx:\n", offset + i);
+            fprintf(stderr, "  Attempting to set bits 0→1 without erase\n");
+            fprintf(stderr, "  Current: 0x%02x, New: 0x%02x, Invalid bits: 0x%02x\n",
+                current, new_val, ~current & new_val);
+            return false;
+        }
+    }
+
+    // Write the page
+    memcpy(&mock_flash[offset], data, FLASH_PAGE_SIZE);
+
+    return true;
+}
+
+uintptr_t jit_stream_flash_platform_ptr_to_executable(uintptr_t addr)
+{
+    // For host testing, no conversion needed
+    return addr;
+}
+
+uintptr_t jit_stream_flash_platform_executable_to_ptr(uintptr_t addr)
+{
+    // For host testing, no conversion needed
+    return addr;
+}
+
+// Create a minimal AVM pack for testing
+static uint8_t create_minimal_avmpack(void)
+{
+    // Create a minimal AVM pack with an "end" section
+    uint8_t *pack = mock_flash + 0x100; // Place pack at offset 0x100
+
+    // AVM Pack header: "#!/usr/bin/env AtomVM\n" (23 bytes) + padding to 24 bytes
+    const char header_str[] = "#!/usr/bin/env AtomVM\n";
+    memcpy(pack, header_str, 23);
+    pack[23] = 0; // Padding to align to 4 bytes
+
+    // Section header for "end" section
+    uint8_t *section = pack + 24;
+    uint32_t *sec_header = (uint32_t *) section;
+
+    // Section format: size (4) + flags (4) + reserved (4) + name (null-terminated)
+    // Write size in big-endian (total section size including header)
+    uint32_t section_size = 4 + 4 + 4 + 4; // size + flags + reserved + "end\0"
+    sec_header[0] = __builtin_bswap32(section_size);
+
+    // Write flags in big-endian
+    uint32_t flags = END_OF_FILE;
+    sec_header[1] = __builtin_bswap32(flags);
+
+    // Write reserved field (seems to be 0)
+    sec_header[2] = 0;
+
+    // Write null-terminated name starting at offset 12
+    memcpy(section + 12, "end", 4); // includes null terminator
+
+    return 0;
+}
+
+// Register AVM pack with global context
+static void register_test_avmpack(GlobalContext *glb)
+{
+    create_minimal_avmpack();
+
+    // Create AVMPackData
+    struct ConstAVMPack *pack = malloc(sizeof(struct ConstAVMPack));
+    avmpack_data_init(&pack->base, &const_avm_pack_info);
+    pack->base.data = mock_flash + 0x100;
+    pack->base.in_use = true;
+
+    // Add to global context's avmpack list
+    synclist_append(&glb->avmpack_data, &pack->base.avmpack_head);
+}
+
+// Test helper: create binary term with proper GC rooting
+static term make_binary_rooted(Context *ctx, const uint8_t *data, size_t len, term *roots, int num_roots)
+{
+    if (UNLIKELY(memory_ensure_free_with_roots(ctx, term_binary_heap_size(len), num_roots, roots, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) {
+        return term_invalid_term();
+    }
+    return term_from_literal_binary(data, len, &ctx->heap, ctx->global);
+}
+
+// Test helper: get NIF function
+typedef term (*nif_function)(Context *ctx, int argc, term argv[]);
+
+static nif_function get_nif(const char *name)
+{
+    const struct Nif *nif = jit_stream_flash_get_nif(name);
+    if (!nif || nif->base.type != NIFFunctionType) {
+        return NULL;
+    }
+    return nif->nif_ptr;
+}
+
+// Test 1: Basic append and flush
+void test_basic_append_flush(void)
+{
+    fprintf(stderr, "\n=== Test: Basic Append and Flush ===\n");
+
+    // Reset flash for this test
+    memset(mock_flash, 0x00, MOCK_FLASH_SIZE);
+    memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); // first page with AVM
+
+    GlobalContext *glb = globalcontext_new();
+    Context *ctx = context_new(glb);
+
+    register_test_avmpack(glb);
+    jit_stream_flash_init(glb);
+
+    nif_function new_nif = get_nif("jit_stream_flash:new/1");
+    nif_function append_nif = get_nif("jit_stream_flash:append/2");
+    nif_function flush_nif = get_nif("jit_stream_flash:flush/1");
+
+    assert(new_nif != NULL);
+    assert(append_nif != NULL);
+    assert(flush_nif != NULL);
+
+    // Create stream
+    term argv[3];
+    argv[0] = term_from_int(10); // label count
+    term stream = new_nif(ctx, 1, argv);
+    assert(term_is_binary(stream)); // Resource is a binary
+
+    // Append some data - root the stream during binary allocation
+    uint8_t data[100];
+    memset(data, 0xAA, sizeof(data));
+    argv[0] = stream;
+    argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1); // Root argv[0] (stream)
+    stream = append_nif(ctx, 2, argv); // Update stream in case GC moved it
+    assert(stream == argv[0]); // Should return the stream
+
+    // Flush
+    argv[0] = stream;
+    stream = flush_nif(ctx, 1, argv); // Update stream
+    assert(stream == argv[0]);
+
+    scheduler_terminate(ctx);
+    globalcontext_destroy(glb);
+
+    fprintf(stderr, "PASS: Basic append and flush\n");
+}
+
+// Test 2: Multiple appends crossing page boundaries
+void test_multiple_appends(void)
+{
+    fprintf(stderr, "\n=== Test: Multiple Appends Crossing Pages ===\n");
+
+    // Reset flash for this test
+    memset(mock_flash, 0x00, MOCK_FLASH_SIZE);
+    memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); // first page with AVM
+
+    GlobalContext *glb = globalcontext_new();
+    Context *ctx = context_new(glb);
+
+    register_test_avmpack(glb);
+    jit_stream_flash_init(glb);
+
+    nif_function new_nif = get_nif("jit_stream_flash:new/1");
+    nif_function append_nif = get_nif("jit_stream_flash:append/2");
+    nif_function flush_nif = get_nif("jit_stream_flash:flush/1");
+
+    // Create stream
+    term argv[3];
+    argv[0] = term_from_int(10);
+    term stream = new_nif(ctx, 1, argv);
+
+    // Append multiple chunks to cross page boundaries
+    for (int i = 0; i < 10; i++) {
+        uint8_t data[100];
+        memset(data, 0xA0 + i, sizeof(data));
+        argv[0] = stream;
+        argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1);
+        stream = append_nif(ctx, 2, argv);
+        argv[0] = stream; // Update for next iteration
+    }
+
+    // Flush
+    argv[0] = stream;
+    flush_nif(ctx, 1, argv);
+
+    scheduler_terminate(ctx);
+    globalcontext_destroy(glb);
+
+    fprintf(stderr, "PASS: Multiple appends crossing pages\n");
+}
+
+// Test 3: Replace operation
+void test_replace(void)
+{
+    fprintf(stderr, "\n=== Test: Replace Operation ===\n");
+
+    // Reset flash for this test
+    memset(mock_flash, 0x00, MOCK_FLASH_SIZE);
+    memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); // first page with AVM
+
+    GlobalContext *glb = globalcontext_new();
+    Context *ctx = context_new(glb);
+
+    register_test_avmpack(glb);
+    jit_stream_flash_init(glb);
+
+    nif_function new_nif = get_nif("jit_stream_flash:new/1");
+    nif_function append_nif = get_nif("jit_stream_flash:append/2");
+    nif_function replace_nif = get_nif("jit_stream_flash:replace/3");
+    nif_function flush_nif = get_nif("jit_stream_flash:flush/1");
+
+    // Create stream
+    term argv[3];
+    argv[0] = term_from_int(10);
+    term stream = new_nif(ctx, 1, argv);
+
+    // Append initial data
+    uint8_t data[200];
+    memset(data, 0xAA, sizeof(data));
+    argv[0] = stream;
+    argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1);
+    stream = append_nif(ctx, 2, argv); // Update stream
+
+    // Replace some bytes in the middle
+    uint8_t replace_data[] = { 0x11, 0x22, 0x33, 0x44 };
+    argv[0] = stream;
+    argv[1] = term_from_int(50); // offset
+    argv[2] = make_binary_rooted(ctx, replace_data, sizeof(replace_data), &argv[0], 1);
+    stream = replace_nif(ctx, 3, argv); // Update stream
+
+    // Flush
+    argv[0] = stream;
+    stream = flush_nif(ctx, 1, argv); // Update stream
+
+    scheduler_terminate(ctx);
+    globalcontext_destroy(glb);
+
+    fprintf(stderr, "PASS: Replace operation\n");
+}
+
+// Test 4: Second module bug scenario - this is the critical test!
+void test_second_module_bug(void)
+{
+    fprintf(stderr, "\n=== Test: Second Module Bug Scenario (THE ACTUAL BUG) ===\n");
+
+    // Reset flash for this test
+    memset(mock_flash, 0x00, MOCK_FLASH_SIZE);
+    memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); // first page with AVM
+
+    GlobalContext *glb = globalcontext_new();
+    Context *ctx = context_new(glb);
+
+    register_test_avmpack(glb);
+    jit_stream_flash_init(glb);
+
+    nif_function new_nif = get_nif("jit_stream_flash:new/1");
+    nif_function append_nif = get_nif("jit_stream_flash:append/2");
+    nif_function flush_nif = get_nif("jit_stream_flash:flush/1");
+
+    // Simulate first module compilation - fill most of first sector
+    fprintf(stderr, "Simulating first module compilation...\n");
+    term argv[3];
+    argv[0] = term_from_int(100);
+    term stream1 = new_nif(ctx, 1, argv);
+
+    // Write 3.5KB of code (leaves 0.5KB in first sector)
+    for (int i = 0; i < 35; i++) {
+        uint8_t data[100];
+        memset(data, 0xA0 + (i % 16), sizeof(data));
+        argv[0] = stream1;
+        argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1);
+        stream1 = append_nif(ctx, 2, argv); // Update stream1
+    }
+
+    argv[0] = stream1;
+
+    stream1 = flush_nif(ctx, 1, argv); // Update stream1
+
+    fprintf(stderr, "First module compiled and flushed\n");
+
+    // Finalize the first module to mark it as valid and prepare for the second
+    ModuleNativeEntryPoint entry1 = jit_stream_flash_entry_point(ctx, stream1);
+    Module fake_mod1;
+    fake_mod1.code = (CodeChunk *) 0x12345678; // Fake code pointer for testing
+
+    globalcontext_set_cache_native_code(glb, &fake_mod1, 1, entry1, 100);
+
+    // Now simulate second module - this should trigger the bug
+    // The bug was: when creating a new stream, if we're in a new sector
+    // that hasn't been erased, we need to erase it before writing
+    fprintf(stderr, "\nSimulating second module compilation...\n");
+    argv[0] = term_from_int(50);
+    term stream2 = new_nif(ctx, 1, argv);
+
+    // Append data - this will cross into next sector
+    for (int i = 0; i < 20; i++) {
+        uint8_t data[100];
+        memset(data, 0xB0 + (i % 16), sizeof(data));
+        argv[0] = stream2;
+        argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1);
+        stream2 = append_nif(ctx, 2, argv); // Update stream2
+    }
+
+    argv[0] = stream2;
+    stream2 = flush_nif(ctx, 1, argv); // Update stream2
+
+    fprintf(stderr, "Second module compiled and flushed successfully!\n");
+
+    // Finalize the second module
+    ModuleNativeEntryPoint entry2 = jit_stream_flash_entry_point(ctx, stream2);
+    Module fake_mod2;
+    fake_mod2.code = (CodeChunk *) 0x87654321; // Fake code pointer for testing
+    globalcontext_set_cache_native_code(glb, &fake_mod2, 1, entry2, 50);
+
+    scheduler_terminate(ctx);
+    globalcontext_destroy(glb);
+
+    fprintf(stderr, "PASS: Second module bug scenario - bug is FIXED!\n");
+}
+
+void test_magic_0xffff_but_garbage_bug(void)
+{
+    fprintf(stderr, "\n=== Test: Magic is 0xFFFF but Sector Has Garbage ===\n");
+
+    // Simulate ESP32 scenario where first JIT entry is at start of sector
+    // and magic happens to be 0xFFFF but rest has garbage
+    memset(mock_flash, 0x00, MOCK_FLASH_SIZE);
+    memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); // first sector with AVM
+
+    // Set magic to 0xFFFF at start of sector 1, but rest is garbage (0x97)
+    uint16_t *magic_ptr = (uint16_t *) (mock_flash + 0x1000);
+    *magic_ptr = 0xFFFF;
+    // Fill rest of sector with garbage
+    for (size_t i = 2; i < FLASH_SECTOR_SIZE; i++) {
+        mock_flash[0x1000 + i] = 0x97;
+    }
+
+    fprintf(stderr, "Sector 1: magic=0xFFFF at offset 0, but rest has garbage (0x97)\n");
+
+    GlobalContext *glb = globalcontext_new();
+    Context *ctx = context_new(glb);
+
+    register_test_avmpack(glb);
+    jit_stream_flash_init(glb);
+
+    nif_function new_nif = get_nif("jit_stream_flash:new/1");
+    nif_function append_nif = get_nif("jit_stream_flash:append/2");
+    nif_function flush_nif = get_nif("jit_stream_flash:flush/1");
+
+    // Compile a small module - should detect garbage and erase
+    fprintf(stderr, "Compiling module (should detect garbage despite magic=0xFFFF)...\n");
+    term argv[3];
+    argv[0] = term_from_int(100);
+    term stream1 = new_nif(ctx, 1, argv);
+
+    // Append some data
+    uint8_t data[100];
+    memset(data, 0xAA, sizeof(data));
+    argv[0] = stream1;
+    argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1);
+    stream1 = append_nif(ctx, 2, argv);
+
+    argv[0] = stream1;
+    stream1 = flush_nif(ctx, 1, argv);
+    fprintf(stderr, "Module compiled successfully!\n");
+
+    scheduler_terminate(ctx);
+    globalcontext_destroy(glb);
+
+    fprintf(stderr, "PASS: Magic 0xFFFF but garbage test\n");
+}
+
+void test_garbage_flash_bug(void)
+{
+    fprintf(stderr, "\n=== Test: Garbage Flash Bug - JIT Sectors Not Erased After AVM Flash ===\n");
+
+    // Reset flash for this test
+    memset(mock_flash, 0x00, MOCK_FLASH_SIZE);
+    memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); // first page with AVM
+
+    fprintf(stderr, "Flash state: Sector 0 erased (0xFF), sectors 1+ have garbage (0x00)\n");
+
+    GlobalContext *glb = globalcontext_new();
+    Context *ctx = context_new(glb);
+
+    register_test_avmpack(glb);
+    jit_stream_flash_init(glb);
+
+    nif_function new_nif = get_nif("jit_stream_flash:new/1");
+    nif_function append_nif = get_nif("jit_stream_flash:append/2");
+    nif_function flush_nif = get_nif("jit_stream_flash:flush/1");
+
+    // Compile first module that spans two sectors (like benchmark: 8254 bytes)
+    fprintf(stderr, "Compiling first module spanning sectors 1-2 (8254 bytes)...\n");
+    term argv[3];
+    argv[0] = term_from_int(100);
+    term stream1 = new_nif(ctx, 1, argv);
+
+    // Write 82 blocks of 100 bytes = 8200 bytes + 16 byte header = 8216 bytes
+    for (int i = 0; i < 82; i++) {
+        uint8_t data[100];
+        memset(data, 0xAA, sizeof(data));
+        argv[0] = stream1;
+        argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1);
+        stream1 = append_nif(ctx, 2, argv);
+    }
+
+    argv[0] = stream1;
+    stream1 = flush_nif(ctx, 1, argv);
+    fprintf(stderr, "First module compiled and flushed\n");
+
+    ModuleNativeEntryPoint entry1 = jit_stream_flash_entry_point(ctx, stream1);
+    Module fake_mod1;
+    fake_mod1.code = (CodeChunk *) 0x12345678;
+    globalcontext_set_cache_native_code(glb, &fake_mod1, 1, entry1, 100);
+
+    scheduler_terminate(ctx);
+    globalcontext_destroy(glb);
+
+    fprintf(stderr, "PASS: Garbage flash bug test\n");
+}
+
+void test_esp32_crash_bug(void)
+{
+    fprintf(stderr, "\n=== Test: ESP32 Crash Bug - Module Spanning Multiple Sectors ===\n");
+
+    // Reset flash for this test
+    memset(mock_flash, 0x00, MOCK_FLASH_SIZE);
+    memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); // first page with AVM
+
+    GlobalContext *glb = globalcontext_new();
+    Context *ctx = context_new(glb);
+
+    register_test_avmpack(glb);
+    jit_stream_flash_init(glb);
+
+    nif_function new_nif = get_nif("jit_stream_flash:new/1");
+    nif_function append_nif = get_nif("jit_stream_flash:append/2");
+    nif_function flush_nif = get_nif("jit_stream_flash:flush/1");
+
+    // Simulate first module like ESP32 benchmark: ~8254 bytes
+    // This will span sectors 0, 1, and part of sector 2
+    fprintf(stderr, "First module: writing ~8254 bytes (spans 3 sectors)...\n");
+    term argv[3];
+    argv[0] = term_from_int(100);
+    term stream1 = new_nif(ctx, 1, argv);
+
+    // Write 82 blocks of 100 bytes = 8200 bytes + 16 byte header = 8216 bytes
+    for (int i = 0; i < 82; i++) {
+        uint8_t data[100];
+        memset(data, 0xAA, sizeof(data));
+        argv[0] = stream1;
+        argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1);
+        stream1 = append_nif(ctx, 2, argv);
+    }
+
+    argv[0] = stream1;
+    stream1 = flush_nif(ctx, 1, argv);
+    fprintf(stderr, "First module flushed\n");
+
+    ModuleNativeEntryPoint entry1 = jit_stream_flash_entry_point(ctx, stream1);
+    Module fake_mod1;
+    fake_mod1.code = (CodeChunk *) 0x12345678;
+    globalcontext_set_cache_native_code(glb, &fake_mod1, 1, entry1, 100);
+
+    // Second module like ESP32 pingpong: ~6690 bytes
+    // This will start in sector 2 (which already has tail of first module!)
+    fprintf(stderr, "Second module: writing ~6690 bytes...\n");
+    argv[0] = term_from_int(50);
+    term stream2 = new_nif(ctx, 1, argv);
+
+    // Write 67 blocks of 100 bytes = 6700 bytes
+    for (int i = 0; i < 67; i++) {
+        uint8_t data[100];
+        memset(data, 0xBB, sizeof(data));
+        argv[0] = stream2;
+        argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1);
+        stream2 = append_nif(ctx, 2, argv);
+    }
+
+    argv[0] = stream2;
+    stream2 = flush_nif(ctx, 1, argv);
+    fprintf(stderr, "Second module flushed\n");
+
+    ModuleNativeEntryPoint entry2 = jit_stream_flash_entry_point(ctx, stream2);
+    Module fake_mod2;
+    fake_mod2.code = (CodeChunk *) 0x87654321;
+    globalcontext_set_cache_native_code(glb, &fake_mod2, 1, entry2, 50);
+
+    scheduler_terminate(ctx);
+    globalcontext_destroy(glb);
+
+    fprintf(stderr, "PASS: ESP32 crash bug test\n");
+}
+
+// Test for the tail corruption bug: when first module extends into next sector,
+// creating the second module should NOT erase the sector containing the first module's tail
+static void test_tail_corruption_bug(void)
+{
+    fprintf(stderr, "\n=== Test: Tail Corruption Bug - Module Tail in Next Sector ===\n");
+
+    // Initialize flash: sector 0 erased (AVM), rest is garbage
+    memset(mock_flash, 0x00, MOCK_FLASH_SIZE);
+    memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE);
+
+    create_minimal_avmpack();
+
+    GlobalContext *glb = globalcontext_new();
+    Context *ctx = context_new(glb);
+
+    register_test_avmpack(glb);
+    jit_stream_flash_init(glb);
+
+    nif_function new_nif = get_nif("jit_stream_flash:new/1");
+    nif_function append_nif = get_nif("jit_stream_flash:append/2");
+    nif_function flush_nif = get_nif("jit_stream_flash:flush/1");
+
+    // Create first module that will extend into the next sector
+    // Module size: 8270 bytes (like benchmark on ESP32)
+    // Entry header: 16 bytes at 0x0 in sector 0x1000
+    // Native code: 8254 bytes, extends from sector 0x1000 into sector 0x2000
+    // Module ends at: 0x1000 + 16 + 8254 = 0x304E (in sector 0x2000)
+    // Next entry would be at: 0x3050 (also in sector 0x2000)
+
+    term argv[3];
+    argv[0] = term_from_int(10);
+    term stream1 = new_nif(ctx, 1, argv);
+
+    // Append 8254 bytes of native code
+    uint8_t code1[8254];
+    memset(code1, 0xAB, sizeof(code1));
+    argv[0] = stream1;
+    argv[1] = make_binary_rooted(ctx, code1, sizeof(code1), &argv[0], 1);
+    stream1 = append_nif(ctx, 2, argv);
+
+    argv[0] = stream1;
+    term stream1_flushed = flush_nif(ctx, 1, argv);
+    ModuleNativeEntryPoint entry1 = jit_stream_flash_entry_point(ctx, stream1_flushed);
+
+    Module fake_mod1;
+    fake_mod1.code = (CodeChunk *) 0x12345678;
+    globalcontext_set_cache_native_code(glb, &fake_mod1, 1, entry1, 30);
+
+    // Compute CRC of first module for verification
+    uintptr_t data_addr1 = jit_stream_flash_platform_executable_to_ptr((uintptr_t) entry1);
+    struct JITEntry *jit_entry1 = (struct JITEntry *) (data_addr1 - sizeof(struct JITEntry));
+    uint32_t crc1_after_finalize = crc32((const uint8_t *) jit_entry1, sizeof(struct JITEntry) + jit_entry1->size);
+    fprintf(stderr, "First module: entry=%p size=%u CRC=0x%08x\n",
+        (void *) jit_entry1, (unsigned int) jit_entry1->size, (unsigned int) crc1_after_finalize);
+
+    // Verify first module extends into sector 0x2000
+    uintptr_t entry1_addr = (uintptr_t) jit_entry1;
+    uintptr_t entry1_end = entry1_addr + sizeof(struct JITEntry) + jit_entry1->size;
+    uintptr_t entry1_sector = entry1_addr & ~(FLASH_SECTOR_SIZE - 1);
+    uintptr_t entry1_end_sector = entry1_end & ~(FLASH_SECTOR_SIZE - 1);
+    fprintf(stderr, "First module: starts in sector 0x%lx, ends at 0x%lx (sector 0x%lx)\n",
+        (unsigned long) entry1_sector, (unsigned long) entry1_end,
+        (unsigned long) entry1_end_sector);
+
+    if (entry1_sector == entry1_end_sector) {
+        fprintf(stderr, "FAIL: Test setup error - first module should span sectors\n");
+        exit(1);
+    }
+
+    // Create second module - THIS SHOULD NOT CORRUPT THE FIRST MODULE
+    argv[0] = term_from_int(10);
+    term stream2 = new_nif(ctx, 1, argv);
+
+    uint8_t code2[100];
+    memset(code2, 0xCD, sizeof(code2));
+    argv[0] = stream2;
+    argv[1] = make_binary_rooted(ctx, code2, sizeof(code2), &argv[0], 1);
+    stream2 = append_nif(ctx, 2, argv);
+
+    argv[0] = stream2;
+    term stream2_flushed = flush_nif(ctx, 1, argv);
+    ModuleNativeEntryPoint entry2 = jit_stream_flash_entry_point(ctx, stream2_flushed);
+
+    Module fake_mod2;
+    fake_mod2.code = (CodeChunk *) 0x87654321;
+    globalcontext_set_cache_native_code(glb, &fake_mod2, 1, entry2, 20);
+
+    // Verify first module's CRC is still intact
+    uint32_t crc1_after_second = crc32((const uint8_t *) jit_entry1, sizeof(struct JITEntry) + jit_entry1->size);
+    fprintf(stderr, "First module after second: CRC=0x%08x (expected 0x%08x)\n",
+        (unsigned int) crc1_after_second, (unsigned int) crc1_after_finalize);
+
+    if (crc1_after_second != crc1_after_finalize) {
+        fprintf(stderr, "FAIL: First module corrupted after creating second module!\n");
+        fprintf(stderr, "Expected CRC: 0x%08x, Got: 0x%08x\n",
+            (unsigned int) crc1_after_finalize, (unsigned int) crc1_after_second);
+        exit(1);
+    }
+
+    scheduler_terminate(ctx);
+    globalcontext_destroy(glb);
+
+    fprintf(stderr, "PASS: Tail corruption bug test\n");
+}
+
+// Test 9: Stale data cleanup after failed compilation
+static void test_stale_data_cleanup(void)
+{
+    fprintf(stderr, "\n=== Test: Stale Data Cleanup After Failed Compilation ===\n");
+
+    // Initialize flash: sector 0 erased (AVM), rest is garbage
+    memset(mock_flash, 0x00, MOCK_FLASH_SIZE);
+    memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE);
+
+    create_minimal_avmpack();
+
+    GlobalContext *glb = globalcontext_new();
+    Context *ctx = context_new(glb);
+
+    register_test_avmpack(glb);
+    jit_stream_flash_init(glb);
+
+    nif_function new_nif = get_nif("jit_stream_flash:new/1");
+    nif_function append_nif = get_nif("jit_stream_flash:append/2");
+    nif_function flush_nif = get_nif("jit_stream_flash:flush/1");
+
+    // Create first module and finalize it (small, stays in first sector after AVM)
+    term argv[3];
+    argv[0] = term_from_int(10);
+    term stream1 = new_nif(ctx, 1, argv);
+
+    uint8_t code1[500];
+    memset(code1, 0xAA, sizeof(code1));
+    argv[0] = stream1;
+    argv[1] = make_binary_rooted(ctx, code1, sizeof(code1), &argv[0], 1);
+    stream1 = append_nif(ctx, 2, argv);
+
+    argv[0] = stream1;
+    term stream1_flushed = flush_nif(ctx, 1, argv);
+    ModuleNativeEntryPoint entry1 = jit_stream_flash_entry_point(ctx, stream1_flushed);
+
+    Module fake_mod1;
+    fake_mod1.code = (CodeChunk *) 0x12345678;
+    globalcontext_set_cache_native_code(glb, &fake_mod1, 1, entry1, 30);
+
+    // Compute CRC of first module
+    uintptr_t data_addr1 = jit_stream_flash_platform_executable_to_ptr((uintptr_t) entry1);
+    struct JITEntry *jit_entry1 = (struct JITEntry *) (data_addr1 - sizeof(struct JITEntry));
+    uint32_t crc1_original = crc32((const uint8_t *) jit_entry1, sizeof(struct JITEntry) + jit_entry1->size);
+    fprintf(stderr, "First module: CRC=0x%08x, size=%u bytes\n",
+        (unsigned int) crc1_original, (unsigned int) jit_entry1->size);
+
+    // Start creating a second module but DON'T finalize (simulate crash/OOM)
+    argv[0] = term_from_int(10);
+    term stream2_attempt1 = new_nif(ctx, 1, argv);
+
+    uint8_t code2[200];
+    memset(code2, 0xBB, sizeof(code2));
+    argv[0] = stream2_attempt1;
+    argv[1] = make_binary_rooted(ctx, code2, sizeof(code2), &argv[0], 1);
+    stream2_attempt1 = append_nif(ctx, 2, argv);
+
+    // DON'T flush or finalize - this simulates a failed compilation
+    // Now there's stale data in flash after the first module
+
+    fprintf(stderr, "Simulated failed compilation - stale data left in flash\n");
+
+    // Try to create the second module again - should detect and clean up stale data
+    argv[0] = term_from_int(10);
+    term stream2_attempt2 = new_nif(ctx, 1, argv);
+
+    memset(code2, 0xCC, sizeof(code2));
+    argv[0] = stream2_attempt2;
+    argv[1] = make_binary_rooted(ctx, code2, sizeof(code2), &argv[0], 1);
+    stream2_attempt2 = append_nif(ctx, 2, argv);
+
+    argv[0] = stream2_attempt2;
+    term stream2_flushed = flush_nif(ctx, 1, argv);
+    ModuleNativeEntryPoint entry2 = jit_stream_flash_entry_point(ctx, stream2_flushed);
+
+    Module fake_mod2;
+    fake_mod2.code = (CodeChunk *) 0x87654321;
+    globalcontext_set_cache_native_code(glb, &fake_mod2, 1, entry2, 20);
+
+    fprintf(stderr, "Second module successfully created after cleanup\n");
+
+    // Verify first module's CRC is still intact
+    uint32_t crc1_after_cleanup = crc32((const uint8_t *) jit_entry1, sizeof(struct JITEntry) + jit_entry1->size);
+    fprintf(stderr, "First module after cleanup: CRC=0x%08x (expected 0x%08x)\n",
+        (unsigned int) crc1_after_cleanup, (unsigned int) crc1_original);
+
+    if (crc1_after_cleanup != crc1_original) {
+        fprintf(stderr, "FAIL: First module corrupted during stale data cleanup!\n");
+        exit(1);
+    }
+
+    scheduler_terminate(ctx);
+    globalcontext_destroy(glb);
+
+    fprintf(stderr, "PASS: Stale data cleanup test\n");
+}
+
+int main(int argc, char **argv)
+{
+    UNUSED(argc);
+    UNUSED(argv);
+
+    fprintf(stderr, "Starting jit_stream_flash tests...\n");
+
+    test_basic_append_flush();
+    test_multiple_appends();
+    test_replace();
+    test_second_module_bug();
+    test_magic_0xffff_but_garbage_bug();
+    test_garbage_flash_bug();
+    test_esp32_crash_bug();
+    test_tail_corruption_bug();
+    test_stale_data_cleanup();
+
+    fprintf(stderr, "\nAll tests passed!\n");
+    return EXIT_SUCCESS;
+}
diff --git a/tests/test.c b/tests/test.c
index 577572c967..accbe9534a 100644
--- a/tests/test.c
+++ b/tests/test.c
@@ -607,6 +607,8 @@ struct Test tests[] = {
     TEST_CASE(test_lists_keymember),
     TEST_CASE(test_lists_keyfind),
 
+    TEST_CASE(test_inline_arith),
+
     // TEST CRASHES HERE: TEST_CASE(memlimit),
 
     { NULL, 0, false, false }
@@ -713,6 +715,11 @@ int test_modules_execution(bool beam, bool skip, int count, char **item)
             perror("Error: cannot find armv6m directory");
             return EXIT_FAILURE;
         }
+#elif JIT_ARCH_TARGET == JIT_ARCH_RISCV32
+        if (chdir("riscv32") != 0) {
+            perror("Error: cannot find riscv32 directory");
+            return EXIT_FAILURE;
+        }
 #else
 #error Unknown JIT target
 #endif