diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml index 4c2c066994..f78249bcaa 100644 --- a/.github/workflows/build-and-test.yaml +++ b/.github/workflows/build-and-test.yaml @@ -366,6 +366,19 @@ jobs: arch: "s390x" library-arch: s390x-linux-gnu + # riscv32-ilp32 build + - os: "ubuntu-24.04" + cc: "riscv32-unknown-linux-gnu-gcc" + cxx: "riscv32-unknown-linux-gnu-g++" + cflags: "-O2" + otp: "28" + elixir_version: "1.17" + rebar3_version: "3.24.0" + cmake_opts_other: "-DAVM_WARNINGS_ARE_ERRORS=ON -DCMAKE_TOOLCHAIN_FILE=${RUNNER_TEMP}/riscv32_ilp32_toolchain.cmake" + compiler_pkgs: "qemu-user qemu-user-binfmt binfmt-support" + arch: "riscv32" + library-arch: riscv32-linux-gnu-ilp32 + env: ImageOS: ${{ matrix.container == 'ubuntu:20.04' && 'ubuntu20' || matrix.os == 'ubuntu-20.04' && 'ubuntu20' || matrix.os == 'ubuntu-22.04' && 'ubuntu22' || matrix.os == 'ubuntu-24.04' && 'ubuntu24' || 'ubuntu24' }} CC: ${{ matrix.cc }} @@ -386,7 +399,7 @@ jobs: run: sudo dpkg --add-architecture i386 - name: "Setup cross compilation architecture" - if: matrix.library-arch != '' + if: matrix.library-arch != '' && matrix.library-arch != 'riscv32-linux-gnu-ilp32' run: | sudo dpkg --add-architecture ${{ matrix.arch }} cat > ${RUNNER_TEMP}/cross-compile-sources.list <> $GITHUB_PATH + + # Install the libs + sudo dpkg -i libc6-ilp32_2.39-0ubuntu1_riscv32.deb + sudo dpkg -i libc6-dev-ilp32_2.39-0ubuntu1_riscv32.deb + sudo dpkg -i libc6-dbg-ilp32_2.39-0ubuntu1_riscv32.deb + + sudo dpkg -i zlib1g-ilp32_1.3.1-0ubuntu1_riscv32.deb + sudo dpkg -i zlib1g-dev-ilp32_1.3.1-0ubuntu1_riscv32.deb + + # Install mbedtls runtime packages first (in dependency order) + sudo dpkg -i libmbedcrypto7-ilp32_2.28.8-0ubuntu1_riscv32.deb + sudo dpkg -i libmbedx509-1-ilp32_2.28.8-0ubuntu1_riscv32.deb + sudo dpkg -i libmbedtls14-ilp32_2.28.8-0ubuntu1_riscv32.deb + # Then install the dev package + sudo dpkg -i libmbedtls-dev-ilp32_2.28.8-0ubuntu1_riscv32.deb + + sudo sed -i '/Types: deb/a Architectures: amd64' /etc/apt/sources.list.d/ubuntu.sources + + cat > ${RUNNER_TEMP}/riscv32_ilp32_toolchain.cmake <<'EOF' + # Toolchain file for RISC-V32 ILP32 (RV32-IMAC) cross-compilation + set(CMAKE_SYSTEM_NAME Linux) + set(CMAKE_SYSTEM_PROCESSOR riscv32) + set(CMAKE_C_LIBRARY_ARCHITECTURE riscv32-linux-gnu-ilp32) + + # Specify the cross compiler + set(CMAKE_C_COMPILER riscv32-unknown-linux-gnu-gcc) + set(CMAKE_CXX_COMPILER riscv32-unknown-linux-gnu-g++) + + # Specify the target architecture + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=rv32imac -mabi=ilp32" CACHE STRING "" FORCE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=rv32imac -mabi=ilp32" CACHE STRING "" FORCE) + + # Set up paths for cross-compiled libraries + set(ZLIB_LIBRARY /usr/lib/riscv32-linux-gnu-ilp32/libz.so CACHE FILEPATH "") + set(ZLIB_INCLUDE_DIR /usr/include/riscv32-linux-gnu CACHE PATH "") + set(ZLIB_FOUND TRUE CACHE BOOL "") + + # MbedTLS configuration + set(MBEDTLS_ROOT_DIR /usr) + set(MBEDTLS_LIBRARIES_DIR /usr/lib/riscv32-linux-gnu-ilp32) + + # Add cross-compilation include path to compiler flags + include_directories(SYSTEM /usr/include/riscv32-linux-gnu) + + # Search for programs in the build host directories + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + + # Search for libraries and headers in the target directories + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) + EOF + + # Set up qemu-user binfmt to find libraries + sudo ln -s /opt/riscv32-ilp32/sysroot/lib/ld-linux-riscv32-ilp32.so.1 /lib/ld-linux-riscv32-ilp32.so.1 + sudo mkdir -p /usr/gnemul + sudo ln -s /opt/riscv32-ilp32/sysroot /usr/gnemul/qemu-riscv32 + + # Copy cross-compiled libraries to sysroot for qemu-user + sudo cp /usr/lib/${{ matrix.library-arch }}/libz.so.1* /opt/riscv32-ilp32/sysroot/lib/ + sudo cp /usr/lib/${{ matrix.library-arch }}/libmbedtls.so.14 /opt/riscv32-ilp32/sysroot/lib/ + sudo cp /usr/lib/${{ matrix.library-arch }}/libmbedcrypto.so.7 /opt/riscv32-ilp32/sysroot/lib/ + sudo cp /usr/lib/${{ matrix.library-arch }}/libmbedx509.so.1 /opt/riscv32-ilp32/sysroot/lib/ + + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: "APT update" run: sudo apt update -y @@ -526,6 +630,19 @@ jobs: ulimit -c unlimited ./tests/test-heap + - name: "Test: test-jit_stream_flash with valgrind" + if: matrix.library-arch == '' + working-directory: build + run: | + ulimit -c unlimited + valgrind --error-exitcode=1 ./tests/test-jit_stream_flash + + - name: "Test: test-jit_stream_flash" + working-directory: build + run: | + ulimit -c unlimited + ./tests/test-jit_stream_flash + - name: "Test: test-mailbox with valgrind" if: matrix.library-arch == '' working-directory: build diff --git a/.github/workflows/pico-build.yaml b/.github/workflows/pico-build.yaml index 9cf01d045a..c5ce30f371 100644 --- a/.github/workflows/pico-build.yaml +++ b/.github/workflows/pico-build.yaml @@ -41,7 +41,17 @@ jobs: strategy: matrix: board: ["pico", "pico_w", "pico2"] + platform: [""] language: ["cpp"] + jit: ["", "-DAVM_DISABLE_JIT=OFF"] + include: + - board: "pico2" + platform: "-DPICO_PLATFORM=rp2350-riscv" + jit: "" + + - board: "pico2" + platform: "-DPICO_PLATFORM=rp2350-riscv" + jit: "-DAVM_DISABLE_JIT=OFF" steps: - name: Checkout repo @@ -57,6 +67,16 @@ jobs: libnewlib-arm-none-eabi libstdc++-arm-none-eabi-newlib \ erlang-base erlang-dev erlang-dialyzer erlang-eunit rebar3 + - name: Install riscv32 toolchain + if: matrix.platform == '-DPICO_PLATFORM=rp2350-riscv' + run: | + sudo mkdir -p /opt + cd /opt + sudo wget https://github.com/raspberrypi/pico-sdk-tools/releases/download/v2.2.0-3/riscv-toolchain-15-x86_64-lin.tar.gz + sudo tar xzf riscv-toolchain-15-x86_64-lin.tar.gz + ls /opt + echo "/opt/riscv-toolchain-15-x86_64-lin/bin" >> $GITHUB_PATH + - name: "Git config safe.directory for codeql" run: git config --global --add safe.directory /__w/AtomVM/AtomVM @@ -74,7 +94,7 @@ jobs: set -euo pipefail mkdir build cd build - cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} + cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} ${{ matrix.platform }} ${{ matrix.jit }} ninja - name: "Perform CodeQL Analysis" @@ -97,7 +117,7 @@ jobs: mkdir build.nosmp cd build.nosmp # TODO: fix all warnings and enable -DAVM_WARNINGS_ARE_ERRORS=ON - cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} -DAVM_DISABLE_SMP=1 + cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} ${{ matrix.jit }} -DAVM_DISABLE_SMP=1 cmake --build . --target=rp2_tests - name: Run tests with rp2040js @@ -112,7 +132,7 @@ jobs: npx tsx run-tests.ts ../build.nosmp/tests/rp2_tests.uf2 ../build.nosmp/tests/test_erl_sources/rp2_test_modules.uf2 - name: Build atomvmlib.uf2 - if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' + if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' && matrix.platform == '' && matrix.jit == '' shell: bash run: | set -euo pipefail @@ -122,7 +142,7 @@ jobs: make atomvmlib-${{ matrix.board }}.uf2 - name: Rename AtomVM and write sha256sum - if: startsWith(github.ref, 'refs/tags/') + if: startsWith(github.ref, 'refs/tags/') && matrix.platform == '' && matrix.jit == '' shell: bash run: | pushd src/platforms/rp2/build @@ -137,7 +157,7 @@ jobs: popd - name: Rename atomvmlib and write sha256sum - if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' + if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' && matrix.platform == '' && matrix.jit == '' shell: bash run: | pushd build/libs @@ -148,7 +168,7 @@ jobs: - name: Release (Pico & Pico2) uses: softprops/action-gh-release@v1 - if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' + if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' && matrix.platform == '' && matrix.jit == '' with: draft: true fail_on_unmatched_files: true @@ -160,7 +180,7 @@ jobs: - name: Release (PicoW) uses: softprops/action-gh-release@v1 - if: startsWith(github.ref, 'refs/tags/') && matrix.board == 'pico_w' + if: startsWith(github.ref, 'refs/tags/') && matrix.board == 'pico_w' && matrix.platform == '' && matrix.jit == '' with: draft: true fail_on_unmatched_files: true diff --git a/CMakeLists.txt b/CMakeLists.txt index 32484ee851..307917422f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,7 +64,7 @@ if (NOT AVM_DISABLE_JIT AND NOT DEFINED AVM_JIT_TARGET_ARCH) endif() endif() -set(AVM_PRECOMPILED_TARGETS "x86_64;aarch64;armv6m;armv6m+float32" CACHE STRING "Targets to precompile code to if AVM_DISABLE_JIT is OFF or AVM_ENABLE_PRECOMPILED is ON") +set(AVM_PRECOMPILED_TARGETS "x86_64;aarch64;armv6m;armv6m+float32;riscv32" CACHE STRING "Targets to precompile code to if AVM_DISABLE_JIT is OFF or AVM_ENABLE_PRECOMPILED is ON") if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") OR (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") OR diff --git a/doc/src/atomvm-internals.md b/doc/src/atomvm-internals.md index 60e2919b02..4d30e6767d 100644 --- a/doc/src/atomvm-internals.md +++ b/doc/src/atomvm-internals.md @@ -137,7 +137,7 @@ Following BEAM, there are two flavors of the emulator: jit and emu, but eventual - Native: the VM only runs native code and all code must be precompiled on the desktop using the JIT compiler (which effectively is a AOT or Ahead-of-Time compiler). In this mode, it is not necessary to bundle the jit compiler on the embedded target. - Hybrid: the VM can run native code as well as emulated BEAM code and some code is precompiled on the desktop. -JIT is available on some platforms (currently only x86_64 and aarch64) and compiles Erlang bytecode at runtime. Erlang bytecode is never interpreted. EMU is available on all platforms and Erlang bytecode is interpreted. +JIT is available on some platforms (currently x86_64, aarch64, armv6m and riscv32) and compiles Erlang bytecode at runtime. Erlang bytecode is never interpreted. EMU is available on all platforms and Erlang bytecode is interpreted. Modules can include precompiled code in a dedicated beam chunk with name 'avmN'. The chunk can contain native code for several architectures, however it may only contain native code for a given version of the native interface. Current version is 1. This native code is executed by the jit-flavor of the emulator as well as the emu flavor if execution of precompiled is enabled. @@ -154,9 +154,37 @@ The JIT compiler is written in Erlang and is therefore precompiled. When a proce JIT compiler is composed of two main interfaces : backend and stream. -A backend implementation is required for each architecture. The backend is called by jit module as it translates bytecodes to machine code. The current implementations are `jit_x86_64` and `jit_aarch64` which are suitable for systems with System V X86 64 ABI or AArch64 ABI. +A backend implementation is required for each architecture. The backend is called by jit module as it translates bytecodes to machine code. The current implementations are : +- `jit_x86_64` for System V X86 64 ABI +- `jit_aarch64` for AArch64 ABI +- `jit_armv6m` for AArch32 ABI +- `jit_riscv32` for rv32imc ilp32 ABI. -A stream implementation is responsible for streaming the machine code, especially in the context of low memory. Two implementations currently exist: `jit_stream_binary` that streams assembly code to an Erlang binary, suitable for tests and precompilation on the desktop, and `jit_stream_mmap` that streams assembly code in an `mmap(2)` allocated page, suitable for JIT compilation on Unix. +A stream implementation is responsible for streaming the machine code, especially in the context of low memory. Three implementations currently exist: +- `jit_stream_binary` that streams assembly code to an Erlang binary, suitable for tests and precompilation on the desktop +- `jit_stream_mmap` that streams assembly code in an `mmap(2)` allocated page, suitable for JIT compilation on Unix +- `jit_stream_flash` available on Pico that allows for embedded JIT. + +### Embedded JIT and Native + +On embedded devices, Native mode means the code is precompiled on the desktop and executed natively on the device. This currently works on all ARMv6M devices (Pico and STM32). + +The default partition scheme on all platforms is optimized for the Emulated VM which is larger than the JIT or Native VM, and for the Emulated atomvmlib (with no native code for estdlib and no jit library) which is smaller than the JIT atomvmlib (that includes native code for estdlib and jit library). + +JIT mode means the Erlang bytecode is compiled to native code directly on the device. This actually is possible on Raspberry Pi Pico by using the flash to store the native code. The first time the code is executed, it is compiled and streamed to flash, and for next runs (including at a future boot), the native code is directly executed. + +To achive embedded JIT, it is required to flash the device with the JIT compiler for armv6m which is part of the jit library. This library is quite large, so for Pico boards that come with 2MB of flash, it is required to remove jit modules for other backends. It is also required to change the way code is partitioned. + +For example, it is possible to have the following offsets defined in `src/platforms/rp2/src/main.c`: + +``` +#define LIB_AVM ((void *) 0x10060000) +#define MAIN_AVM ((void *) 0x101B0000) +``` + +To fit in the lib partition, all networking modules should also be removed (the Pico doesn't have any networking capacity). + +After the first run, compiled modules in flash are used unless there is a version mismatch or the application avm or the library avm have been updated on the device. AVM packages end with a section called "end" (0x656E64). When the JIT compiler flashes native code, it changes this name to "END" (0x454E44), by effectively clearing 3 bits in the flash, which is possible without erasing any flash block. Any rewrite of these avm packages will overwrite the section names to "end". ## The Scheduler diff --git a/libs/estdlib/src/code_server.erl b/libs/estdlib/src/code_server.erl index 427d5fa529..8d20574cf6 100644 --- a/libs/estdlib/src/code_server.erl +++ b/libs/estdlib/src/code_server.erl @@ -39,6 +39,7 @@ atom_resolver/2, literal_resolver/2, type_resolver/2, + import_resolver/2, set_native_code/3 ]). @@ -135,6 +136,14 @@ literal_resolver(_Module, _Index) -> type_resolver(_Module, _Index) -> erlang:nif_error(undefined). +%% @doc Get an imported function triplet from its index +%% @return The imported function as {Module, Function, Arity} +%% @param Module module to get the imported function from +%% @param Index imported function index in the module +-spec import_resolver(Module :: module(), Index :: non_neg_integer()) -> {atom(), atom(), non_neg_integer()}. +import_resolver(_Module, _Index) -> + erlang:nif_error(undefined). + %% @doc Associate a native code stream with a module %% @return ok %% @param Module module to set the native code of @@ -152,7 +161,7 @@ set_native_code(_Module, _LabelsCount, _Stream) -> load(Module) -> case erlang:system_info(emu_flavor) of jit -> - % atomvm_heap_growth, fibonacci divides compilation time by two + % atomvm_heap_growth, fibonacci reduces compilation time {Pid, Ref} = spawn_opt( fun() -> try @@ -164,18 +173,23 @@ load(Module) -> code_server:literal_resolver(Module, Index) end, TypeResolver = fun(Index) -> code_server:type_resolver(Module, Index) end, - Stream0 = jit:stream(jit_mmap_size(byte_size(Code))), - {BackendModule, BackendState0} = jit:backend(Stream0), + ImportResolver = fun(Index) -> + code_server:import_resolver(Module, Index) + end, + {StreamModule, Stream0} = jit:stream(jit_mmap_size(byte_size(Code))), + {BackendModule, BackendState0} = jit:backend(StreamModule, Stream0), {LabelsCount, BackendState1} = jit:compile( Code, AtomResolver, LiteralResolver, TypeResolver, + ImportResolver, BackendModule, BackendState0 ), Stream1 = BackendModule:stream(BackendState1), - code_server:set_native_code(Module, LabelsCount, Stream1), + Stream2 = StreamModule:flush(Stream1), + code_server:set_native_code(Module, LabelsCount, Stream2), End = erlang:system_time(millisecond), io:format("~B ms (bytecode: ~B bytes, native code: ~B bytes)\n", [ End - Start, byte_size(Code), BackendModule:offset(BackendState1) diff --git a/libs/jit/include/jit.hrl b/libs/jit/include/jit.hrl index b006c5f34f..81ff1c42c2 100644 --- a/libs/jit/include/jit.hrl +++ b/libs/jit/include/jit.hrl @@ -23,6 +23,7 @@ -define(JIT_ARCH_X86_64, 1). -define(JIT_ARCH_AARCH64, 2). -define(JIT_ARCH_ARMV6M, 3). +-define(JIT_ARCH_RISCV32, 4). -define(JIT_VARIANT_PIC, 1). -define(JIT_VARIANT_FLOAT32, 2). diff --git a/libs/jit/src/CMakeLists.txt b/libs/jit/src/CMakeLists.txt index 7aad016575..ae62643c30 100644 --- a/libs/jit/src/CMakeLists.txt +++ b/libs/jit/src/CMakeLists.txt @@ -31,6 +31,8 @@ set(ERLANG_MODULES jit_aarch64_asm jit_armv6m jit_armv6m_asm + jit_riscv32 + jit_riscv32_asm jit_x86_64 jit_x86_64_asm ) diff --git a/libs/jit/src/jit.erl b/libs/jit/src/jit.erl index 1992bf5841..0a5d4689b1 100644 --- a/libs/jit/src/jit.erl +++ b/libs/jit/src/jit.erl @@ -22,9 +22,10 @@ -export([ stream/1, - backend/1, + backend/2, beam_chunk_header/3, - compile/6 + compile/7, + decode_value64/1 ]). % NIFs @@ -100,7 +101,9 @@ labels_count :: pos_integer(), atom_resolver :: fun((integer()) -> atom()), literal_resolver :: fun((integer()) -> any()), - type_resolver :: fun((integer()) -> any()) + type_resolver :: fun((integer()) -> any()), + import_resolver :: fun((integer()) -> {atom(), atom(), non_neg_integer()}), + tail_cache :: [{tuple(), non_neg_integer()}] }). -type stream() :: any(). @@ -113,6 +116,14 @@ -define(ASSERT_ALL_NATIVE_FREE(St), ok). -define(ASSERT(Expr), ok). +%-define(JIT_INSTRUMENT, true). + +-ifdef(JIT_INSTRUMENT). +-define(INSTRUMENT(Tag, State, MSt), instrument(Tag, State, MSt)). +-else. +-define(INSTRUMENT(Tag, State, MSt), ok). +-endif. + %%----------------------------------------------------------------------------- %% @param LabelsCount number of labels %% @param Arch code for the architecture @@ -133,30 +144,40 @@ compile( AtomResolver, LiteralResolver, TypeResolver, + ImportResolver, MMod, MSt0 ) when OpcodeMax =< ?OPCODE_MAX -> - MSt1 = MMod:jump_table(MSt0, LabelsCount), State0 = #state{ line_offsets = [], labels_count = LabelsCount, atom_resolver = AtomResolver, literal_resolver = LiteralResolver, - type_resolver = TypeResolver + type_resolver = TypeResolver, + import_resolver = ImportResolver, + tail_cache = [] }, + ?INSTRUMENT("compile_start", State0, MSt0), + MSt1 = MMod:jump_table(MSt0, LabelsCount), + ?INSTRUMENT("after_jump_table", State0, MSt1), {State1, MSt2} = first_pass(Opcodes, MMod, MSt1, State0), + ?INSTRUMENT("after_first_pass", State1, MSt2), MSt3 = second_pass(MMod, MSt2, State1), - {LabelsCount, MSt3}; + ?INSTRUMENT("after_second_pass", State1, MSt3), + MSt4 = MMod:flush(MSt3), + ?INSTRUMENT("after_flush", State1, MSt4), + {LabelsCount, MSt4}; compile( <<16:32, 0:32, OpcodeMax:32, _LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>>, _AtomResolver, _LiteralResolver, _TypeResolver, + _ImportResolver, _MMod, _MSt ) -> error(badarg, [OpcodeMax]); -compile(CodeChunk, _AtomResolver, _LiteralResolver, _TypeResolver, _MMod, _MSt) -> +compile(CodeChunk, _AtomResolver, _LiteralResolver, _TypeResolver, _ImportResolver, _MMod, _MSt) -> error(badarg, [CodeChunk]). % 1 @@ -170,18 +191,30 @@ first_pass( ?ASSERT_ALL_NATIVE_FREE(MSt1), first_pass(Rest1, MMod, MSt1, State0); % 2 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt0, #state{tail_cache = TC} = State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {_ModuleAtom, Rest1} = decode_atom(Rest0), {_FunctionName, Rest2} = decode_atom(Rest1), {_Arity, Rest3} = decode_literal(Rest2), ?TRACE("OP_FUNC_INFO ~p, ~p, ~p\n", [_ModuleAtom, _FunctionName, _Arity]), - % Implement function clause at the previous label. (TODO: optimize it out to save space) - MSt1 = MMod:call_primitive_last(MSt0, ?PRIM_RAISE_ERROR, [ - ctx, jit_state, offset, ?FUNCTION_CLAUSE_ATOM - ]), - ?ASSERT_ALL_NATIVE_FREE(MSt1), - first_pass(Rest3, MMod, MSt1, State0); + % Implement function clause at the previous label. + Offset = MMod:offset(MSt0), + {MSt1, OffsetReg} = MMod:move_to_native_register(MSt0, Offset), + TailCacheKey = {call_primitive_last, ?PRIM_RAISE_ERROR, [OffsetReg, ?FUNCTION_CLAUSE_ATOM]}, + State1 = + case lists:keyfind(TailCacheKey, 1, TC) of + false -> + MSt3 = MMod:call_primitive_last(MSt1, ?PRIM_RAISE_ERROR, [ + ctx, jit_state, {free, OffsetReg}, ?FUNCTION_CLAUSE_ATOM + ]), + State0#state{tail_cache = [{TailCacheKey, Offset} | TC]}; + {TailCacheKey, CacheOffset} -> + MSt2 = MMod:jump_to_offset(MSt1, CacheOffset), + MSt3 = MMod:free_native_registers(MSt2, [OffsetReg]), + State0 + end, + ?ASSERT_ALL_NATIVE_FREE(MSt3), + first_pass(Rest3, MMod, MSt3, State1); % 3 first_pass( <>, MMod, MSt0, #state{labels_count = LabelsCount} = State @@ -203,26 +236,56 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt1), first_pass(Rest2, MMod, MSt1, State0); % 5 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt0, #state{tail_cache = TC} = State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {_Arity, Rest1} = decode_literal(Rest0), {Label, Rest2} = decode_label(Rest1), {NWords, Rest3} = decode_literal(Rest2), ?TRACE("OP_CALL_LAST ~p, ~p, ~p\n", [_Arity, Label, NWords]), - MSt1 = MMod:move_to_cp(MSt0, {y_reg, NWords}), - MSt2 = MMod:increment_sp(MSt1, NWords + 1), - MSt3 = MMod:call_only_or_schedule_next(MSt2, Label), + TailCacheKey0 = {op_call_last, NWords, Label}, + case lists:keyfind(TailCacheKey0, 1, TC) of + false -> + Offset0 = MMod:offset(MSt0), + MSt1 = MMod:move_to_cp(MSt0, {y_reg, NWords}), + MSt2 = MMod:increment_sp(MSt1, NWords + 1), + TailCacheKey1 = {op_call_only, Label}, + case lists:keyfind(TailCacheKey1, 1, TC) of + false -> + Offset1 = MMod:offset(MSt2), + MSt3 = MMod:call_only_or_schedule_next(MSt2, Label), + State1 = State0#state{ + tail_cache = [{TailCacheKey1, Offset1}, {TailCacheKey0, Offset0} | TC] + }; + {TailCacheKey1, Offset1} -> + MSt3 = MMod:jump_to_offset(MSt2, Offset1), + State1 = State0#state{ + tail_cache = [{TailCacheKey0, Offset0} | TC] + } + end; + {TailCacheKey0, Offset0} -> + MSt3 = MMod:jump_to_offset(MSt0, Offset0), + State1 = State0 + end, ?ASSERT_ALL_NATIVE_FREE(MSt3), - first_pass(Rest3, MMod, MSt3, State0); + first_pass(Rest3, MMod, MSt3, State1); % 6 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt0, #state{tail_cache = TC} = State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {_Arity, Rest1} = decode_literal(Rest0), {Label, Rest2} = decode_label(Rest1), ?TRACE("OP_CALL_ONLY ~p, ~p\n", [_Arity, Label]), - MSt1 = MMod:call_only_or_schedule_next(MSt0, Label), + TailCacheKey = {op_call_only, Label}, + case lists:keyfind(TailCacheKey, 1, TC) of + false -> + Offset = MMod:offset(MSt0), + MSt1 = MMod:call_only_or_schedule_next(MSt0, Label), + State1 = State0#state{tail_cache = [{TailCacheKey, Offset} | TC]}; + {TailCacheKey, Offset} -> + MSt1 = MMod:jump_to_offset(MSt0, Offset), + State1 = State0 + end, ?ASSERT_ALL_NATIVE_FREE(MSt1), - first_pass(Rest2, MMod, MSt1, State0); + first_pass(Rest2, MMod, MSt1, State1); % 7 first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), @@ -348,7 +411,7 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest1, MMod, MSt2, State0); % 19 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt0, #state{tail_cache = TC} = State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), ?TRACE("OP_RETURN\n", []), % Optimized return: check if returning within same module @@ -363,7 +426,7 @@ first_pass(<>, MMod, MSt0, State0) -> % Same module: fast intra-module return fun(BSt0) -> % Mask to get lower 24 bits and shift right by 2 for offset - BSt1 = MMod:and_(BSt0, CpReg0, 16#FFFFFF), + {BSt1, CpReg0} = MMod:and_(BSt0, {free, CpReg0}, 16#FFFFFF), {BSt3, CPReg1} = MMod:shift_right(BSt1, {free, CpReg0}, 2), % Jump to continuation (this is a tail call) MMod:jump_to_continuation(BSt3, {free, CPReg1}) @@ -371,9 +434,18 @@ first_pass(<>, MMod, MSt0, State0) -> ), MSt5 = MMod:free_native_registers(MSt4, [CpReg0]), % Different module: use existing slow path - MSt6 = MMod:call_primitive_last(MSt5, ?PRIM_RETURN, [ctx, jit_state]), + TailCacheKey = {call_primitive_last, ?PRIM_RETURN}, + case lists:keyfind(TailCacheKey, 1, TC) of + false -> + Offset = MMod:offset(MSt5), + MSt6 = MMod:call_primitive_last(MSt5, ?PRIM_RETURN, [ctx, jit_state]), + State1 = State0#state{tail_cache = [{TailCacheKey, Offset} | TC]}; + {TailCacheKey, Offset} -> + MSt6 = MMod:jump_to_offset(MSt5, Offset), + State1 = State0 + end, ?ASSERT_ALL_NATIVE_FREE(MSt6), - first_pass(Rest, MMod, MSt6, State0); + first_pass(Rest, MMod, MSt6, State1); % 20 first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), @@ -499,16 +571,10 @@ first_pass(<>, MMod, MSt0, State0) -> first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), - {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), - {MSt2, Arg2, Rest3} = decode_compact_term(Rest2, MMod, MSt1, State0), + {MSt1, Arg1, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0), + {MSt2, Arg2, Rest3} = decode_typed_compact_term(Rest2, MMod, MSt1, State0), ?TRACE("OP_IS_GE ~p, ~p, ~p\n", [Label, Arg1, Arg2]), - {MSt3, ResultReg} = MMod:call_primitive(MSt2, ?PRIM_TERM_COMPARE, [ - ctx, jit_state, {free, Arg1}, {free, Arg2}, ?TERM_COMPARE_NO_OPTS - ]), - MSt4 = handle_error_if({'(int)', ResultReg, '==', ?TERM_COMPARE_MEMORY_ALLOC_FAIL}, MMod, MSt3), - MSt5 = cond_jump_to_label( - {'(int)', {free, ResultReg}, '==', ?TERM_LESS_THAN}, Label, MMod, MSt4 - ), + MSt5 = op_is_ge(MMod, MSt2, Label, Arg1, Arg2), ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest3, MMod, MSt5, State0); % 41 @@ -631,7 +697,7 @@ first_pass(<>, MMod, MSt0, State0) -> BSt1 = cond_jump_to_label( {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, MMod, BSt0 ), - BSt2 = MMod:and_(BSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {BSt2, Reg} = MMod:and_(BSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), BSt3 = MMod:move_array_element(BSt2, Reg, 0, Reg), % Optimization : ((Reg & 0x3F) != 0x8) && ((Reg & 0x3F) != 0x18) % is equivalent to (Reg & 0x2F) != 0x8 @@ -684,9 +750,9 @@ first_pass(<>, MMod, MSt0, State0) -> MSt3 = cond_jump_to_label( {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, MMod, MSt2 ), - MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt5 = MMod:move_array_element(MSt4, Reg, 0, Reg), - MSt6 = MMod:and_(MSt5, Reg, ?TERM_BOXED_TAG_MASK), + {MSt6, Reg} = MMod:and_(MSt5, {free, Reg}, ?TERM_BOXED_TAG_MASK), MSt7 = cond_jump_to_label( {'and', [{Reg, '!=', ?TERM_BOXED_REF}, {Reg, '!=', ?TERM_BOXED_EXTERNAL_REF}]}, Label, @@ -775,7 +841,7 @@ first_pass(<>, MMod, MSt0, State0) -> {Arity, Rest3} = decode_literal(Rest2), ?TRACE("OP_TEST_ARITY ~p, ~p, ~p\n", [Label, Arg1, Arity]), {MSt2, Reg} = MMod:move_to_native_register(MSt1, Arg1), - MSt3 = MMod:and_(MSt2, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt3, Reg} = MMod:and_(MSt2, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt4 = MMod:move_array_element(MSt3, Reg, 0, Reg), {MSt5, ArityReg} = MMod:shift_right(MSt4, {free, Reg}, 6), MSt6 = cond_jump_to_label({{free, ArityReg}, '!=', Arity}, Label, MMod, MSt5), @@ -836,13 +902,22 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest4, MMod, MSt5, State0); % 61 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt0, #state{tail_cache = TC} = State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), ?TRACE("OP_JUMP ~p\n", [Label]), - MSt1 = MMod:call_only_or_schedule_next(MSt0, Label), - ?ASSERT_ALL_NATIVE_FREE(MSt1), - first_pass(Rest1, MMod, MSt1, State0); + TailCacheKey = {op_call_only, Label}, + case lists:keyfind(TailCacheKey, 1, TC) of + false -> + Offset = MMod:offset(MSt0), + MSt1 = MMod:call_only_or_schedule_next(MSt0, Label), + ?ASSERT_ALL_NATIVE_FREE(MSt1), + first_pass(Rest1, MMod, MSt1, State0#state{tail_cache = [{TailCacheKey, Offset} | TC]}); + {TailCacheKey, Offset} -> + MSt1 = MMod:jump_to_offset(MSt0, Offset), + ?ASSERT_ALL_NATIVE_FREE(MSt1), + first_pass(Rest1, MMod, MSt1, State0) + end; % 62 % Same implementation as OP_TRY, to confirm. first_pass(<>, MMod, MSt0, State0) -> @@ -882,7 +957,7 @@ first_pass(<>, MMod, MSt0, State0) -> {MSt3, TailDest, Rest3} = decode_dest(Rest2, MMod, MSt2), ?TRACE("OP_GET_LIST ~p, ~p, ~p\n", [List, HeadDest, TailDest]), {MSt4, Reg} = MMod:move_to_native_register(MSt3, List), - MSt5 = MMod:and_(MSt4, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt5, Reg} = MMod:and_(MSt4, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt6 = MMod:move_array_element(MSt5, Reg, ?LIST_HEAD_INDEX, HeadDest), MSt7 = MMod:free_native_registers(MSt6, [HeadDest]), MSt8 = MMod:move_array_element(MSt7, Reg, ?LIST_TAIL_INDEX, TailDest), @@ -898,7 +973,7 @@ first_pass(<>, MMod, MSt0, State0) -> {MSt2, Dest, Rest3} = decode_dest(Rest2, MMod, MSt1), ?TRACE("OP_GET_TUPLE_ELEMENT ~p, ~p, ~p\n", [Source, Element, Dest]), {MSt3, Reg} = MMod:move_to_native_register(MSt2, Source), - MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt5 = MMod:move_array_element(MSt4, Reg, Element + 1, Dest), MSt6 = MMod:free_native_registers(MSt5, [Reg, Dest]), ?ASSERT_ALL_NATIVE_FREE(MSt6), @@ -911,7 +986,7 @@ first_pass(<>, MMod, MSt0, State0) -> {Position, Rest3} = decode_literal(Rest2), ?TRACE("OP_SET_TUPLE_ELEMENT ~p, ~p, ~p\n", [NewElement, Tuple, Position]), {MSt3, Reg} = MMod:move_to_native_register(MSt2, Tuple), - MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt5 = MMod:move_to_array_element(MSt4, NewElement, Reg, Position + 1), MSt6 = MMod:free_native_registers(MSt5, [NewElement, Reg]), ?ASSERT_ALL_NATIVE_FREE(MSt6), @@ -1011,7 +1086,7 @@ first_pass(<>, MMod, MSt0, State0) -> {MSt2, ResultReg} = MMod:call_primitive(MSt1, ?PRIM_CONTEXT_ENSURE_FPREGS, [ctx]), MSt3 = MMod:free_native_registers(MSt2, [ResultReg]), {MSt4, Reg} = MMod:move_to_native_register(MSt3, SrcValue), - MSt5 = MMod:and_(MSt4, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt5, Reg} = MMod:and_(MSt4, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt6 = MMod:move_to_vm_register(MSt5, {free, {ptr, Reg, 1}}, FPReg), ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest2, MMod, MSt6, State0); @@ -1294,7 +1369,7 @@ first_pass(<>, MMod, MSt0, State0) -> MMod:call_primitive_last(BlockSt, ?PRIM_RAISE_ERROR, [ctx, jit_state, offset, ?BADARG_ATOM]) end), {MSt8, BSOffsetReg1} = MMod:shift_right(MSt7, {free, BSOffsetReg0}, 3), - MSt9 = MMod:and_(MSt8, BSBinaryReg0, ?TERM_PRIMARY_CLEAR_MASK), + {MSt9, BSBinaryReg0} = MMod:and_(MSt8, {free, BSBinaryReg0}, ?TERM_PRIMARY_CLEAR_MASK), {MSt10, SizeReg} = MMod:get_array_element(MSt9, {free, BSBinaryReg0}, 1), {MSt13, SizeValue} = if @@ -1427,31 +1502,21 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt7), first_pass(Rest5, MMod, MSt7, State0); % 125 -first_pass(<>, MMod, MSt0, State0) -> +first_pass( + <>, MMod, MSt0, #state{import_resolver = ImportResolver} = State0 +) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {FailLabel, Rest1} = decode_label(Rest0), {Live, Rest2} = decode_literal(Rest1), - {MSt1, TrimResultReg} = MMod:call_primitive(MSt0, ?PRIM_TRIM_LIVE_REGS, [ctx, Live]), - MSt2 = MMod:free_native_registers(MSt1, [TrimResultReg]), - CappedLive = - if - Live > ?MAX_REG -> ?MAX_REG; - true -> Live - end, {Bif, Rest3} = decode_literal(Rest2), - {MSt3, FuncPtr} = MMod:call_primitive(MSt2, ?PRIM_GET_IMPORTED_BIF, [ - jit_state, Bif - ]), - {MSt4, Arg1, Rest4} = decode_compact_term(Rest3, MMod, MSt3, State0), - {MSt5, Arg2, Rest5} = decode_compact_term(Rest4, MMod, MSt4, State0), - {MSt6, Dest, Rest6} = decode_dest(Rest5, MMod, MSt5), + {MSt1, Arg1, Rest4} = decode_typed_compact_term(Rest3, MMod, MSt0, State0), + {MSt2, Arg2, Rest5} = decode_typed_compact_term(Rest4, MMod, MSt1, State0), + {MSt3, Dest, Rest6} = decode_dest(Rest5, MMod, MSt2), + {BifModule, BifFunName, 2} = ImportResolver(Bif), ?TRACE("OP_GC_BIF2 ~p, ~p, ~p, ~p, ~p, ~p\n", [FailLabel, Live, Bif, Arg1, Arg2, Dest]), - {MSt7, ResultReg} = MMod:call_func_ptr(MSt6, {free, FuncPtr}, [ - ctx, FailLabel, CappedLive, {free, Arg1}, {free, Arg2} - ]), - MSt8 = bif_faillabel_test(FailLabel, MMod, MSt7, {free, ResultReg}, {free, Dest}), - ?ASSERT_ALL_NATIVE_FREE(MSt8), - first_pass(Rest6, MMod, MSt8, State0); + MSt4 = op_gc_bif2(MMod, MSt3, FailLabel, Live, Bif, BifModule, BifFunName, Arg1, Arg2, Dest), + ?ASSERT_ALL_NATIVE_FREE(MSt4), + first_pass(Rest6, MMod, MSt4, State0); % 129 first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), @@ -1462,9 +1527,9 @@ first_pass(<>, MMod, MSt0, State0) -> MSt3 = cond_jump_to_label( {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, MMod, MSt2 ), - MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt5 = MMod:move_array_element(MSt4, Reg, 0, Reg), - MSt6 = MMod:and_(MSt5, Reg, ?TERM_BOXED_TAG_MASK), + {MSt6, Reg} = MMod:and_(MSt5, {free, Reg}, ?TERM_BOXED_TAG_MASK), MSt7 = cond_jump_to_label( {'and', [ {Reg, '!=', ?TERM_BOXED_REFC_BINARY}, @@ -1510,11 +1575,16 @@ first_pass(<>, MMod, MSt0, State0) -> ), MSt2 = handle_error_if({'(bool)', {free, MemoryEnsureFreeReg}, '==', false}, MMod, MSt1), {MSt3, CreatedBin} = MMod:call_primitive(MSt2, ?PRIM_TERM_CREATE_EMPTY_BINARY, [ctx, 0]), - MSt4 = MMod:set_bs(MSt3, CreatedBin), - MSt5 = MMod:move_to_vm_register(MSt4, CreatedBin, {x_reg, 0}), - MSt6 = MMod:free_native_registers(MSt5, [CreatedBin]), - ?ASSERT_ALL_NATIVE_FREE(MSt6), - first_pass(Rest0, MMod, MSt6, State0); + MSt4 = MMod:if_block(MSt3, {CreatedBin, '==', ?TERM_INVALID_TERM}, fun(BSt0) -> + MMod:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR, [ + ctx, jit_state, offset, ?OUT_OF_MEMORY_ATOM + ]) + end), + MSt5 = MMod:set_bs(MSt4, CreatedBin), + MSt6 = MMod:move_to_vm_register(MSt5, CreatedBin, {x_reg, 0}), + MSt7 = MMod:free_native_registers(MSt6, [CreatedBin]), + ?ASSERT_ALL_NATIVE_FREE(MSt7), + first_pass(Rest0, MMod, MSt7, State0); % 136 first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), @@ -1778,7 +1848,7 @@ first_pass(<>, MMod, MSt0, State0) -> Src, Live, {free, SrcSizeReg}, MMod, MSt7 ), {MSt9, NewMapPtrReg} = MMod:call_primitive(MSt8, ?PRIM_TERM_COPY_MAP, [ctx, NewSrc]), - MSt10 = MMod:and_(MSt9, NewMapPtrReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt10, NewMapPtrReg} = MMod:and_(MSt9, {free, NewMapPtrReg}, ?TERM_PRIMARY_CLEAR_MASK), {MSt11, Rest6} = lists:foldl( fun(_Index, {ASt0, ARest0}) -> {ASt1, Key, ARest1} = decode_compact_term(ARest0, MMod, ASt0, State0), @@ -1880,14 +1950,13 @@ first_pass(<>, MMod, MSt0, State0) -> ]) end), {MSt6, SrcReg} = MMod:move_to_native_register(MSt5, Src), - {MSt7, MapReg} = MMod:copy_to_native_register(MSt6, SrcReg), - MSt8 = MMod:and_(MSt7, MapReg, ?TERM_PRIMARY_CLEAR_MASK), - MSt9 = MMod:add(MSt8, MapReg, MMod:word_size() * 2), - {MSt10, Dest1, Rest5} = decode_dest(Rest4, MMod, MSt9), + {MSt7, MapReg} = MMod:and_(MSt6, SrcReg, ?TERM_PRIMARY_CLEAR_MASK), + MSt8 = MMod:add(MSt7, MapReg, MMod:word_size() * 2), + {MSt9, Dest1, Rest5} = decode_dest(Rest4, MMod, MSt8), ?TRACE(",~p", [Dest1]), - MSt11 = MMod:move_array_element(MSt10, MapReg, {free, PosReg1}, Dest1), - MSt12 = MMod:free_native_registers(MSt11, [Dest1]), - {MSt13, Rest6} = lists:foldl( + MSt10 = MMod:move_array_element(MSt9, MapReg, {free, PosReg1}, Dest1), + MSt11 = MMod:free_native_registers(MSt10, [Dest1]), + {MSt12, Rest6} = lists:foldl( fun(_Index, {AccMSt0, AccRest0}) -> {AccMSt1, Key, AccRest1} = decode_compact_term(AccRest0, MMod, AccMSt0, State0), ?TRACE(",~p", [Key]), @@ -1912,13 +1981,13 @@ first_pass(<>, MMod, MSt0, State0) -> AccMSt8 = MMod:free_native_registers(AccMSt7, [Dest]), {AccMSt8, AccRest2} end, - {MSt12, Rest5}, + {MSt11, Rest5}, lists:seq(2, ListSize div 2) ), ?TRACE("]\n", []), - MSt14 = MMod:free_native_registers(MSt13, [MapReg, SrcReg]), - ?ASSERT_ALL_NATIVE_FREE(MSt14), - first_pass(Rest6, MMod, MSt14, State0); + MSt13 = MMod:free_native_registers(MSt12, [MapReg, SrcReg]), + ?ASSERT_ALL_NATIVE_FREE(MSt13), + first_pass(Rest6, MMod, MSt13, State0); % 159 first_pass( <>, MMod, MSt0, #state{atom_resolver = AtomResolver} = State0 @@ -1933,7 +2002,7 @@ first_pass( MSt3 = cond_jump_to_label( {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, MMod, MSt2 ), - MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), {MSt5, TagReg0} = MMod:get_array_element(MSt4, Reg, 0), MSt6 = cond_jump_to_label( {TagReg0, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_TUPLE}, Label, MMod, MSt5 @@ -1987,7 +2056,7 @@ first_pass(<>, MMod, MSt0, State0) -> {MSt2, Dest, Rest3} = decode_dest(Rest1, MMod, MSt1), ?TRACE("OP_GET_HD ~p, ~p\n", [SrcValue, Dest]), {MSt3, Reg} = MMod:move_to_native_register(MSt2, SrcValue), - MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt5 = MMod:move_array_element(MSt4, Reg, ?LIST_HEAD_INDEX, Dest), MSt6 = MMod:free_native_registers(MSt5, [Dest, Reg]), ?ASSERT_ALL_NATIVE_FREE(MSt6), @@ -1999,7 +2068,7 @@ first_pass(<>, MMod, MSt0, State0) -> {MSt2, Dest, Rest3} = decode_dest(Rest1, MMod, MSt1), ?TRACE("OP_GET_TL ~p, ~p\n", [SrcValue, Dest]), {MSt3, Reg} = MMod:move_to_native_register(MSt2, SrcValue), - MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt5 = MMod:move_array_element(MSt4, Reg, ?LIST_TAIL_INDEX, Dest), MSt6 = MMod:free_native_registers(MSt5, [Dest, Reg]), ?ASSERT_ALL_NATIVE_FREE(MSt6), @@ -2011,7 +2080,7 @@ first_pass(<>, MMod, MSt0, State0) -> {ListSize, Rest2} = decode_extended_list_header(Rest1), ?TRACE("OP_PUT_TUPLE2 ~p, [", [Dest]), {MSt2, ResultReg} = MMod:call_primitive(MSt1, ?PRIM_TERM_ALLOC_TUPLE, [ctx, ListSize]), - MSt3 = MMod:and_(MSt2, ResultReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt3, ResultReg} = MMod:and_(MSt2, {free, ResultReg}, ?TERM_PRIMARY_CLEAR_MASK), {MSt4, Rest3} = lists:foldl( fun(Index, {AccMSt0, AccRest0}) -> {AccMSt1, Element, AccRest1} = decode_compact_term(AccRest0, MMod, AccMSt0, State0), @@ -2040,13 +2109,13 @@ first_pass(<>, MMod, MSt0, State0) -> {MSt4, BSBinaryReg} = MMod:get_array_element(MSt3, MatchStateRegPtr, 1), {MSt5, BSOffsetReg} = MMod:get_array_element(MSt4, MatchStateRegPtr, 2), MSt6 = MMod:free_native_registers(MSt5, [MatchStateRegPtr]), - MSt7 = MMod:and_(MSt6, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt7, BSBinaryReg} = MMod:and_(MSt6, {free, BSBinaryReg}, ?TERM_PRIMARY_CLEAR_MASK), {MSt8, ResultTerm, NewMatchState} = do_get_tail( Src, Live, BSOffsetReg, BSBinaryReg, MMod, MSt7 ), MSt9 = MMod:free_native_registers(MSt8, [BSBinaryReg]), {MSt10, MatchStateReg1} = MMod:move_to_native_register(MSt9, NewMatchState), - MSt11 = MMod:and_(MSt10, MatchStateReg1, ?TERM_PRIMARY_CLEAR_MASK), + {MSt11, MatchStateReg1} = MMod:and_(MSt10, {free, MatchStateReg1}, ?TERM_PRIMARY_CLEAR_MASK), MSt12 = MMod:move_to_array_element(MSt11, BSOffsetReg, MatchStateReg1, 2), MSt13 = MMod:move_to_vm_register(MSt12, ResultTerm, Dest), MSt14 = MMod:free_native_registers(MSt13, [MatchStateReg1, BSOffsetReg, ResultTerm, Dest]), @@ -2073,7 +2142,7 @@ first_pass(<>, MMod, MSt0, State0) -> {_Live, Rest3} = decode_literal(Rest2), ?TRACE("OP_BS_GET_POSITION ~p, ~p, ~p\n", [Src, Dest, _Live]), {MSt3, Reg} = MMod:move_to_native_register(MSt2, Src), - MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt5 = MMod:move_array_element(MSt4, Reg, 2, Reg), MSt6 = MMod:shift_left(MSt5, Reg, 4), MSt7 = MMod:or_(MSt6, Reg, ?TERM_INTEGER_TAG), @@ -2136,7 +2205,7 @@ first_pass(<>, MMod, MSt0, State0) -> {MSt2, ResultReg} = MMod:call_primitive(MSt1, ?PRIM_TERM_ALLOC_FUN, [ ctx, jit_state, FunIndex, NumFree ]), - MSt3 = MMod:and_(MSt2, ResultReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt3, ResultReg} = MMod:and_(MSt2, {free, ResultReg}, ?TERM_PRIMARY_CLEAR_MASK), {MSt4, Rest4} = lists:foldl( fun(Index, {AccMSt0, AccRest0}) -> {AccMSt1, Element, AccRest1} = decode_compact_term(AccRest0, MMod, AccMSt0, State0), @@ -2221,8 +2290,8 @@ first_pass( {ListLen, Rest6} = decode_extended_list_header(Rest5), % Compute binary size and verify types in first iteration NBSegments = ListLen div 6, - {Rest7, MSt2, BinaryLitSize, BinaryRegSize, State1} = lists:foldl( - fun(_Index, {AccRest0, AccMSt0, AccLiteralSize0, AccSizeReg0, AccState0}) -> + {Rest7, MSt2, BinaryLitSize, BinaryRegSize, State1, ReuseSourceBinary} = lists:foldl( + fun(Index, {AccRest0, AccMSt0, AccLiteralSize0, AccSizeReg0, AccState0, AccReuseSrc}) -> {AtomTypeIndex, AccRest1} = decode_atom(AccRest0), AtomType = AtomResolver(AtomTypeIndex), {_Seg, AccRest2} = decode_literal(AccRest1), @@ -2242,10 +2311,13 @@ first_pass( AccMSt2, AccState0 ), + NewReuseSrc = + AccReuseSrc orelse + (Index =:= 1 andalso AtomType =:= private_append andalso Size =:= ?ALL_ATOM), AccMSt4 = MMod:free_native_registers(AccMSt3, [Src, Size]), - {AccRest6, AccMSt4, AccLiteralSize1, AccSizeReg1, AccState1} + {AccRest6, AccMSt4, AccLiteralSize1, AccSizeReg1, AccState1, NewReuseSrc} end, - {Rest6, MSt1, 0, undefined, State0}, + {Rest6, MSt1, 0, undefined, State0, false}, lists:seq(1, NBSegments) ), {MSt4, BinaryTotalSize} = @@ -2302,12 +2374,30 @@ first_pass( ] ), MSt14 = handle_error_if({'(bool)', {free, MemoryEnsureFreeReg}, '==', false}, MMod, MSt13), - {MSt15, CreatedBin} = MMod:call_primitive(MSt14, ?PRIM_TERM_CREATE_EMPTY_BINARY, [ - ctx, {free, BinaryTotalSizeInBytes} - ]), + {MSt17, InitialCreatedBin} = + case ReuseSourceBinary of + false -> + % No reuse - create the binary now + {MSt15, CreatedBinResult} = MMod:call_primitive( + MSt14, ?PRIM_TERM_CREATE_EMPTY_BINARY, [ + ctx, {free, BinaryTotalSizeInBytes} + ] + ), + MSt16 = MMod:if_block(MSt15, {CreatedBinResult, '==', ?TERM_INVALID_TERM}, fun( + BSt0 + ) -> + MMod:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR, [ + ctx, jit_state, offset, ?OUT_OF_MEMORY_ATOM + ]) + end), + {MSt16, CreatedBinResult}; + true -> + % Will reuse - defer creation until first segment + {MSt14, {private_append, BinaryTotalSizeInBytes}} + end, % We redo the decoding. Rest7 should still be equal to previous value. - {Rest7, MSt16, FinalOffset} = lists:foldl( - fun(_Index, {AccRest0, AccMSt0, AccOffset0}) -> + {Rest7, MSt18, FinalOffset, CreatedBin} = lists:foldl( + fun(_Index, {AccRest0, AccMSt0, AccOffset0, AccCreatedBin}) -> {AtomTypeIndex, AccRest1} = decode_atom(AccRest0), AtomType = AtomResolver(AtomTypeIndex), {_Seg, AccRest2} = decode_literal(AccRest1), @@ -2316,30 +2406,30 @@ first_pass( {AccMSt2, Src, AccRest5} = decode_compact_term(AccRest4, MMod, AccMSt1, State1), {AccMSt3, Size, AccRest6} = decode_compact_term(AccRest5, MMod, AccMSt2, State1), ?TRACE("{~p,~p,~p,~p,~p,~p},", [AtomType, _Seg, SegmentUnit, Flags, Src, Size]), - {AccMSt4, AccOffset1} = first_pass_bs_create_bin_insert_value( + {AccMSt4, AccOffset1, AccCreatedBin1} = first_pass_bs_create_bin_insert_value( AtomType, Flags, Src, Size, SegmentUnit, Fail, - CreatedBin, + AccCreatedBin, AccOffset0, MMod, AccMSt3 ), AccMSt5 = MMod:free_native_registers(AccMSt4, [Flags, Src, Size]), - {AccRest6, AccMSt5, AccOffset1} + {AccRest6, AccMSt5, AccOffset1, AccCreatedBin1} end, - {Rest6, MSt15, 0}, + {Rest6, MSt17, 0, InitialCreatedBin}, lists:seq(1, NBSegments) ), ?TRACE("]\n", []), - MSt17 = MMod:free_native_registers(MSt16, [FinalOffset]), - MSt18 = MMod:move_to_vm_register(MSt17, CreatedBin, Dest), - MSt19 = MMod:free_native_registers(MSt18, [CreatedBin, Dest]), - ?ASSERT_ALL_NATIVE_FREE(MSt19), - first_pass(Rest7, MMod, MSt19, State1); + MSt19 = MMod:free_native_registers(MSt18, [FinalOffset]), + MSt20 = MMod:move_to_vm_register(MSt19, CreatedBin, Dest), + MSt21 = MMod:free_native_registers(MSt20, [CreatedBin, Dest]), + ?ASSERT_ALL_NATIVE_FREE(MSt21), + first_pass(Rest7, MMod, MSt21, State1); % 178 first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), @@ -2376,12 +2466,12 @@ first_pass( {Size, Rest2} = decode_literal(Rest1), {MSt1, Src, Rest3} = decode_compact_term(Rest2, MMod, MSt0, State0), {MSt2, SrcReg} = MMod:move_to_native_register(MSt1, Src), - MSt3 = MMod:and_(MSt2, SrcReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt3, SrcReg} = MMod:and_(MSt2, {free, SrcReg}, ?TERM_PRIMARY_CLEAR_MASK), {MSt4, Dest, Rest4} = decode_dest(Rest3, MMod, MSt3), {ListLen, Rest5} = decode_extended_list_header(Rest4), ?TRACE("OP_UPDATE_RECORD ~p, ~p, ~p, ~p, [", [Hint, Size, Src, Dest]), {MSt5, DestReg} = MMod:call_primitive(MSt4, ?PRIM_TERM_ALLOC_TUPLE, [ctx, Size]), - MSt6 = MMod:and_(MSt5, DestReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt6, DestReg} = MMod:and_(MSt5, {free, DestReg}, ?TERM_PRIMARY_CLEAR_MASK), {MSt7, ReuseReg} = MMod:move_to_native_register( MSt6, if @@ -2451,20 +2541,19 @@ first_pass(<>, MMod, MSt0, State0) -> {MSt1, MatchState, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), {ListLen, Rest3} = decode_extended_list_header(Rest2), ?TRACE("OP_BS_MATCH ~p, ~p, [", [Fail, MatchState]), - {MSt2, MatchStateReg0} = MMod:copy_to_native_register(MSt1, MatchState), - MSt3 = MMod:and_(MSt2, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK), - {MSt4, BSBinaryReg} = MMod:get_array_element(MSt3, MatchStateReg0, 1), - {MSt5, BSOffsetReg} = MMod:get_array_element(MSt4, MatchStateReg0, 2), - MSt6 = MMod:free_native_registers(MSt5, [MatchStateReg0]), - MSt7 = MMod:and_(MSt6, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK), - {MSt8, MatchStateReg1} = MMod:move_to_native_register(MSt7, MatchState), - {MSt9, Rest4, NewMatchState, NewBSOffsetReg} = first_pass_bs_match( - Fail, MatchStateReg1, BSBinaryReg, BSOffsetReg, ListLen, Rest3, MMod, MSt8, State0 + {MSt2, MatchStateReg0} = MMod:move_to_native_register(MSt1, MatchState), + {MSt3, MatchStateReg1} = MMod:and_(MSt2, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, BSBinaryReg} = MMod:get_array_element(MSt3, MatchStateReg1, 1), + {MSt5, BSOffsetReg} = MMod:get_array_element(MSt4, MatchStateReg1, 2), + MSt6 = MMod:free_native_registers(MSt5, [MatchStateReg1]), + {MSt7, BSBinaryReg} = MMod:and_(MSt6, {free, BSBinaryReg}, ?TERM_PRIMARY_CLEAR_MASK), + {MSt8, Rest4, MatchStateReg2, NewBSOffsetReg} = first_pass_bs_match( + Fail, MatchStateReg0, BSBinaryReg, BSOffsetReg, ListLen, Rest3, MMod, MSt7, State0 ), ?TRACE("]\n", []), - MSt10 = MMod:free_native_registers(MSt9, [BSBinaryReg, NewBSOffsetReg, NewMatchState]), - ?ASSERT_ALL_NATIVE_FREE(MSt10), - first_pass(Rest4, MMod, MSt10, State0). + MSt9 = MMod:free_native_registers(MSt8, [BSBinaryReg, NewBSOffsetReg, MatchStateReg2]), + ?ASSERT_ALL_NATIVE_FREE(MSt9), + first_pass(Rest4, MMod, MSt9, State0). first_pass_bs_create_bin_compute_size( AtomType, Src, _Size, _SegmentUnit, Fail, AccLiteralSize0, AccSizeReg0, MMod, MSt0, State0 @@ -2494,6 +2583,42 @@ first_pass_bs_create_bin_compute_size( ) -> MSt1 = verify_is_integer(Src, Fail, MMod, MSt0), {MSt1, AccLiteralSize0 + 32, AccSizeReg0, State0}; +first_pass_bs_create_bin_compute_size( + float, Src, Size, _SegmentUnit, Fail, AccLiteralSize0, AccSizeReg0, MMod, MSt0, State0 +) -> + MSt1 = verify_is_number(Src, Fail, MMod, MSt0), + % Verify and get the float size (defaults to 64 if nil) + case Size of + ?TERM_NIL -> + {MSt1, AccLiteralSize0 + 64, AccSizeReg0, State0}; + _ -> + {MSt2, SizeValue} = term_to_int(Size, Fail, MMod, MSt1), + if + is_integer(SizeValue) -> + % If size is a literal, compiler would only allow 16/32/64. + {MSt2, AccLiteralSize0 + SizeValue, AccSizeReg0, State0}; + is_atom(SizeValue) -> + % Check if size is 16, 32, or 64 using 'and' of '!=' checks + MSt3 = cond_raise_badarg_or_jump_to_fail_label( + {'and', [ + {SizeValue, '!=', 16}, + {SizeValue, '!=', 32}, + {SizeValue, '!=', 64} + ]}, + Fail, + MMod, + MSt2 + ), + case AccSizeReg0 of + undefined -> + {MSt3, AccLiteralSize0, SizeValue, State0}; + _ -> + MSt4 = MMod:add(MSt3, AccSizeReg0, SizeValue), + MSt5 = MMod:free_native_registers(MSt4, [SizeValue]), + {MSt5, AccLiteralSize0, AccSizeReg0, State0} + end + end + end; first_pass_bs_create_bin_compute_size( integer, Src, Size, SegmentUnit, Fail, AccLiteralSize0, AccSizeReg0, MMod, MSt0, State0 ) -> @@ -2540,7 +2665,7 @@ first_pass_bs_create_bin_compute_size( ) when AtomType =:= binary orelse AtomType =:= append orelse AtomType =:= private_append -> MSt1 = verify_is_binary(Src, Fail, MMod, MSt0), {MSt2, Reg} = MMod:copy_to_native_register(MSt1, Src), - MSt3 = MMod:and_(MSt2, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt3, Reg} = MMod:and_(MSt2, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt4 = MMod:move_array_element(MSt3, Reg, 1, Reg), MSt5 = MMod:shift_left(MSt4, Reg, 3), case AccSizeReg0 of @@ -2566,7 +2691,7 @@ first_pass_bs_create_bin_compute_size( MSt1 = verify_is_binary(Src, Fail, MMod, MSt0), {MSt2, Reg0} = MMod:copy_to_native_register(MSt1, Size), {MSt3, Reg1} = MMod:copy_to_native_register(MSt2, Src), - MSt4 = MMod:and_(MSt3, Reg1, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, Reg1} = MMod:and_(MSt3, {free, Reg1}, ?TERM_PRIMARY_CLEAR_MASK), MSt5 = MMod:move_array_element(MSt4, Reg1, 1, Reg1), MSt6 = MMod:shift_left(MSt5, Reg1, 3), MSt7 = MMod:if_block(MSt6, {{free, Reg0}, '!=', ?ALL_ATOM}, fun(BSt0) -> @@ -2600,7 +2725,7 @@ first_pass_bs_create_bin_insert_value( {MSt3, NewOffset} = first_pass_bs_create_bin_insert_value_increment_offset( MMod, MSt2, Offset, Size, 8 ), - {MSt3, NewOffset}; + {MSt3, NewOffset, CreatedBin}; first_pass_bs_create_bin_insert_value( utf16, Flags, Src, _Size, _SegmentUnit, Fail, CreatedBin, Offset, MMod, MSt0 ) -> @@ -2612,7 +2737,7 @@ first_pass_bs_create_bin_insert_value( {MSt4, NewOffset} = first_pass_bs_create_bin_insert_value_increment_offset( MMod, MSt3, Offset, Size, 8 ), - {MSt4, NewOffset}; + {MSt4, NewOffset, CreatedBin}; first_pass_bs_create_bin_insert_value( utf32, Flags, Src, _Size, _SegmentUnit, Fail, CreatedBin, Offset, MMod, MSt0 ) -> @@ -2627,7 +2752,7 @@ first_pass_bs_create_bin_insert_value( {MSt5, NewOffset} = first_pass_bs_create_bin_insert_value_increment_offset( MMod, MSt4, Offset, 4, 8 ), - {MSt5, NewOffset}; + {MSt5, NewOffset, CreatedBin}; first_pass_bs_create_bin_insert_value( integer, Flags, Src, Size, SegmentUnit, Fail, CreatedBin, Offset, MMod, MSt0 ) -> @@ -2648,7 +2773,32 @@ first_pass_bs_create_bin_insert_value( {MSt7, NewOffset} = first_pass_bs_create_bin_insert_value_increment_offset( MMod, MSt6, Offset, SizeValue, 1 ), - {MSt7, NewOffset}; + {MSt7, NewOffset, CreatedBin}; +first_pass_bs_create_bin_insert_value( + float, Flags, Src, Size, _SegmentUnit, Fail, CreatedBin, Offset, MMod, MSt0 +) -> + % Src is a term (boxed float or integer) + {MSt1, SrcReg} = MMod:move_to_native_register(MSt0, Src), + {MSt2, FlagsValue} = decode_flags_list(Flags, MMod, MSt1), + % Get the float size (defaults to 64 if nil) + {MSt3, SizeValue} = + case Size of + ?TERM_NIL -> + {MSt2, 64}; + _ -> + term_to_int(Size, Fail, MMod, MSt2) + end, + % Call single primitive with size parameter + {MSt4, BoolResult} = MMod:call_primitive(MSt3, ?PRIM_BITSTRING_INSERT_FLOAT, [ + CreatedBin, Offset, {free, SrcReg}, SizeValue, {free, FlagsValue} + ]), + MSt5 = cond_raise_badarg_or_jump_to_fail_label( + {'(bool)', {free, BoolResult}, '==', false}, Fail, MMod, MSt4 + ), + {MSt6, NewOffset} = first_pass_bs_create_bin_insert_value_increment_offset( + MMod, MSt5, Offset, SizeValue, 1 + ), + {MSt6, NewOffset, CreatedBin}; first_pass_bs_create_bin_insert_value( string, _Flags, Src, Size, SegmentUnit, Fail, CreatedBin, Offset, MMod, MSt0 ) -> @@ -2668,7 +2818,37 @@ first_pass_bs_create_bin_insert_value( {MSt6, NewOffset} = first_pass_bs_create_bin_insert_value_increment_offset( MMod, MSt5, Offset, BitSize, 1 ), - {MSt6, NewOffset}; + {MSt6, NewOffset, CreatedBin}; +first_pass_bs_create_bin_insert_value( + private_append, + _Flags, + Src, + _Size, + _SegmentUnit, + _Fail, + {private_append, BinaryTotalSizeInBytes}, + Offset, + MMod, + MSt0 +) -> + % Special case: first segment is private_append with undefined CreatedBin + % Get original size before reusing + {MSt1, OriginalSize} = term_binary_size(Src, MMod, MSt0), + % Reuse the source binary (content is already there, no need to copy) + {MSt2, CreatedBin} = MMod:call_primitive(MSt1, ?PRIM_TERM_REUSE_BINARY, [ + ctx, {free, Src}, {free, BinaryTotalSizeInBytes} + ]), + MSt3 = MMod:if_block(MSt2, {CreatedBin, '==', ?TERM_INVALID_TERM}, fun(BSt0) -> + MMod:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR, [ + ctx, jit_state, offset, ?OUT_OF_MEMORY_ATOM + ]) + end), + % Convert original size to bits and update offset + MSt4 = MMod:shift_left(MSt3, OriginalSize, 3), + {MSt5, NewOffset} = first_pass_bs_create_bin_insert_value_increment_offset( + MMod, MSt4, Offset, OriginalSize, 1 + ), + {MSt5, NewOffset, CreatedBin}; first_pass_bs_create_bin_insert_value( AtomType, _Flags, Src, Size, _SegmentUnit, _Fail, CreatedBin, Offset, MMod, MSt0 ) when AtomType =:= binary orelse AtomType =:= append orelse AtomType =:= private_append -> @@ -2683,11 +2863,11 @@ first_pass_bs_create_bin_insert_value( {MSt3, NewOffset} = first_pass_bs_create_bin_insert_value_increment_offset( MMod, MSt2, Offset, SizeValue, 1 ), - {MSt3, NewOffset}; + {MSt3, NewOffset, CreatedBin}; first_pass_bs_create_bin_insert_value( - _OtherType, _Flag, _Src, _Size, _SegmentUnit, _Fail, _CreatedBin, Offset, _MMod, MSt0 + _OtherType, _Flag, _Src, _Size, _SegmentUnit, _Fail, CreatedBin, Offset, _MMod, MSt0 ) -> - {MSt0, Offset}. + {MSt0, Offset, CreatedBin}. first_pass_bs_create_bin_insert_value_increment_offset(_MMod, MSt0, Offset, Size, Unit) when is_integer(Offset) andalso is_integer(Size) andalso is_integer(Unit) @@ -2772,12 +2952,11 @@ first_pass_bs_match( first_pass_bs_match_skip(MatchState, BSOffsetReg, J1, Rest1, MMod, MSt0) end, % offset needs to be updated in the loop - {MSt2, MatchStateReg1} = MMod:copy_to_native_register(MSt1, NewMatchState), - MSt3 = MMod:and_(MSt2, MatchStateReg1, ?TERM_PRIMARY_CLEAR_MASK), - MSt4 = MMod:move_to_array_element(MSt3, NewBSOffsetReg, MatchStateReg1, 2), - MSt5 = MMod:free_native_registers(MSt4, [MatchStateReg1]), + {MSt2, MatchStateReg1} = MMod:and_(MSt1, NewMatchState, ?TERM_PRIMARY_CLEAR_MASK), + MSt3 = MMod:move_to_array_element(MSt2, NewBSOffsetReg, MatchStateReg1, 2), + MSt4 = MMod:free_native_registers(MSt3, [MatchStateReg1]), first_pass_bs_match( - Fail, NewMatchState, BSBinaryReg, NewBSOffsetReg, J2, Rest2, MMod, MSt5, State0 + Fail, NewMatchState, BSBinaryReg, NewBSOffsetReg, J2, Rest2, MMod, MSt4, State0 ). first_pass_bs_match_ensure_at_least( @@ -2862,7 +3041,7 @@ first_pass_bs_match_integer( MSt13 = MMod:free_native_registers(MSt12, [Result, Dest]), case MMod:available_regs(MSt9) of [] -> - MSt14 = MMod:and_(MSt13, MatchState, ?TERM_PRIMARY_CLEAR_MASK), + {MSt14, MatchState} = MMod:and_(MSt13, {free, MatchState}, ?TERM_PRIMARY_CLEAR_MASK), {MSt15, NewBSOffsetReg} = MMod:get_array_element(MSt14, MatchState, 2), MSt16 = MMod:or_(MSt15, MatchState, ?TERM_PRIMARY_BOXED), MSt17 = MMod:add(MSt16, NewBSOffsetReg, NumBits), @@ -2915,13 +3094,13 @@ first_pass_bs_match_binary( ), % Restore BSBinaryReg as it may have been gc'd as well {MSt9, MatchStateReg0} = MMod:copy_to_native_register(MSt8, NewMatchState), - MSt10 = MMod:and_(MSt9, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK), + {MSt10, MatchStateReg0} = MMod:and_(MSt9, {free, MatchStateReg0}, ?TERM_PRIMARY_CLEAR_MASK), MSt11 = MMod:move_array_element(MSt10, MatchStateReg0, 1, BSBinaryReg), MSt12 = MMod:free_native_registers(MSt11, [MatchStateReg0]), {MSt13, ResultTerm} = MMod:call_primitive(MSt12, ?PRIM_TERM_MAYBE_CREATE_SUB_BINARY, [ ctx, BSBinaryReg, {free, BSOffseBytesReg}, MatchedBytes ]), - MSt14 = MMod:and_(MSt13, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt14, BSBinaryReg} = MMod:and_(MSt13, {free, BSBinaryReg}, ?TERM_PRIMARY_CLEAR_MASK), {MSt15, Dest, Rest5} = decode_dest(Rest4, MMod, MSt14), ?TRACE("~p},", [Dest]), MSt16 = MMod:move_to_vm_register(MSt15, ResultTerm, Dest), @@ -2958,10 +3137,10 @@ do_get_tail( ), % Restore BSBinaryReg as it may have been gc'd as well {MSt7, MatchStateReg0} = MMod:copy_to_native_register(MSt6, NewMatchState), - MSt8 = MMod:and_(MSt7, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK), + {MSt8, MatchStateReg0} = MMod:and_(MSt7, {free, MatchStateReg0}, ?TERM_PRIMARY_CLEAR_MASK), MSt9 = MMod:move_array_element(MSt8, MatchStateReg0, 1, BSBinaryReg), MSt10 = MMod:free_native_registers(MSt9, [MatchStateReg0]), - MSt11 = MMod:and_(MSt10, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt11, BSBinaryReg} = MMod:and_(MSt10, {free, BSBinaryReg}, ?TERM_PRIMARY_CLEAR_MASK), {MSt12, TailBytesReg1} = MMod:get_array_element(MSt11, BSBinaryReg, 1), MSt13 = MMod:sub(MSt12, TailBytesReg0, BSOffseBytesReg), MSt14 = MMod:add(MSt13, BSBinaryReg, ?TERM_PRIMARY_BOXED), @@ -2995,7 +3174,7 @@ first_pass_bs_match_equal_colon_equal( MMod:jump_to_label(BSt0, Fail) end ), - MSt4 = MMod:and_(MSt3, Result, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, Result} = MMod:and_(MSt3, {free, Result}, ?TERM_PRIMARY_CLEAR_MASK), {MSt5, IntValue} = MMod:get_array_element(MSt4, {free, Result}, 1), cond_jump_to_label({{free, IntValue}, '!=', PatternValue}, Fail, MMod, MSt5); _ -> @@ -3011,6 +3190,255 @@ first_pass_bs_match_skip(MatchState, BSOffsetReg, J0, Rest0, MMod, MSt0) -> ?TRACE("{skip,~p},", [Stride]), {J0 - 1, Rest1, MatchState, BSOffsetReg, MSt1}. +op_gc_bif2( + MMod, + MSt0, + FailLabel, + Live, + Bif, + erlang, + '+', + {typed, Arg1, {t_integer, Range1}}, + {typed, Arg2, {t_integer, Range2}}, + Dest +) -> + op_gc_bif2_add(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2); +op_gc_bif2( + MMod, MSt0, FailLabel, Live, Bif, erlang, '+', {typed, Arg1, {t_integer, Range1}}, Arg2, Dest +) when is_integer(Arg2), Arg2 band ?TERM_IMMED_TAG_MASK =:= ?TERM_INTEGER_TAG -> + % Arg2 is a small integer literal, extract its value and create a range + Arg2Value = Arg2 bsr 4, + Range2 = {Arg2Value, Arg2Value}, + op_gc_bif2_add(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2); +op_gc_bif2( + MMod, + MSt0, + FailLabel, + Live, + Bif, + erlang, + '-', + {typed, Arg1, {t_integer, Range1}}, + {typed, Arg2, {t_integer, Range2}}, + Dest +) -> + op_gc_bif2_sub(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2); +op_gc_bif2( + MMod, MSt0, FailLabel, Live, Bif, erlang, '-', {typed, Arg1, {t_integer, Range1}}, Arg2, Dest +) when is_integer(Arg2), Arg2 band ?TERM_IMMED_TAG_MASK =:= ?TERM_INTEGER_TAG -> + % Arg2 is a small integer literal, extract its value and create a range + Arg2Value = Arg2 bsr 4, + Range2 = {Arg2Value, Arg2Value}, + op_gc_bif2_sub(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2); +% Default case +op_gc_bif2( + MMod, MSt0, FailLabel, Live, Bif, _Module, _Function, {typed, Arg1, _}, {typed, Arg2, _}, Dest +) -> + op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest); +op_gc_bif2(MMod, MSt0, FailLabel, Live, Bif, _Module, _Function, {typed, Arg1, _}, Arg2, Dest) -> + op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest); +op_gc_bif2(MMod, MSt0, FailLabel, Live, Bif, _Module, _Function, Arg1, {typed, Arg2, _}, Dest) -> + op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest); +op_gc_bif2(MMod, MSt0, FailLabel, Live, Bif, _Module, _Function, Arg1, Arg2, Dest) -> + op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest). + +op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest) -> + {MSt1, TrimResultReg} = MMod:call_primitive(MSt0, ?PRIM_TRIM_LIVE_REGS, [ctx, Live]), + MSt2 = MMod:free_native_registers(MSt1, [TrimResultReg]), + CappedLive = + if + Live > ?MAX_REG -> ?MAX_REG; + true -> Live + end, + {MSt3, FuncPtr} = MMod:call_primitive(MSt2, ?PRIM_GET_IMPORTED_BIF, [ + jit_state, Bif + ]), + {MSt4, ResultReg} = MMod:call_func_ptr(MSt3, {free, FuncPtr}, [ + ctx, FailLabel, CappedLive, {free, Arg1}, {free, Arg2} + ]), + bif_faillabel_test(FailLabel, MMod, MSt4, {free, ResultReg}, {free, Dest}). + +% Check if addition can overflow based on type ranges +% Returns true if the result is guaranteed to fit in a small integer +can_inline_add(Range1, Range2, MMod) -> + % Platform-specific bounds + {MinSafe, MaxSafe} = + case MMod:word_size() of + % 32-bit + 4 -> {-(1 bsl 27), (1 bsl 27) - 1}; + % 64-bit + 8 -> {-(1 bsl 59), (1 bsl 59) - 1} + end, + + case {Range1, Range2} of + {{Min1, Max1}, {Min2, Max2}} when + is_integer(Min1), + is_integer(Max1), + is_integer(Min2), + is_integer(Max2) + -> + % Calculate min and max possible results + MinResult = Min1 + Min2, + MaxResult = Max1 + Max2, + % Check if both are in safe range + MinResult >= MinSafe andalso MaxResult =< MaxSafe; + _ -> + % Unbounded range (has '-inf' or '+inf'), cannot optimize + false + end. + +% Optimized addition with compile-time range checking +op_gc_bif2_add(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2) when + is_integer(Arg2) +-> + case can_inline_add(Range1, Range2, MMod) of + true -> + % Safe to inline - no overflow possible + {MSt1, Reg} = MMod:move_to_native_register(MSt0, Arg1), + MSt2 = MMod:add(MSt1, Reg, Arg2 band (bnot (?TERM_IMMED_TAG_MASK))), + MSt3 = MMod:move_to_vm_register(MSt2, Reg, Dest), + MMod:free_native_registers(MSt3, [Reg]); + false -> + % Cannot prove safety, use default BIF call + op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest) + end; +op_gc_bif2_add(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2) -> + case can_inline_add(Range1, Range2, MMod) of + true -> + % Safe to inline both arguments + {MSt1, Reg1} = MMod:move_to_native_register(MSt0, Arg1), + {MSt2, Reg2} = MMod:move_to_native_register(MSt1, Arg2), + % Strip tag from Reg2 using AND, then add to Reg1 (Reg1 keeps its tag) + {MSt3, Reg2Stripped} = MMod:and_(MSt2, {free, Reg2}, bnot (?TERM_IMMED_TAG_MASK)), + MSt4 = MMod:add(MSt3, Reg1, Reg2Stripped), + MSt5 = MMod:move_to_vm_register(MSt4, Reg1, Dest), + MMod:free_native_registers(MSt5, [Reg1, Reg2Stripped]); + false -> + % Cannot prove safety, use default BIF call + op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest) + end. + +% Check if subtraction can overflow based on type ranges +% Returns true if the result is guaranteed to fit in a small integer +can_inline_sub(Range1, Range2, MMod) -> + % Platform-specific bounds + {MinSafe, MaxSafe} = + case MMod:word_size() of + 4 -> {-(1 bsl 27), (1 bsl 27) - 1}; + % 32-bit + 8 -> {-(1 bsl 59), (1 bsl 59) - 1} + % 64-bit + end, + + case {Range1, Range2} of + {{Min1, Max1}, {Min2, Max2}} when + is_integer(Min1), + is_integer(Max1), + is_integer(Min2), + is_integer(Max2) + -> + % Calculate min and max possible results + % Min result: Min1 - Max2 (smallest value minus largest value) + % Max result: Max1 - Min2 (largest value minus smallest value) + MinResult = Min1 - Max2, + MaxResult = Max1 - Min2, + % Check if both are in safe range + MinResult >= MinSafe andalso MaxResult =< MaxSafe; + _ -> + % Unbounded range (has '-inf' or '+inf'), cannot optimize + false + end. + +% Optimized subtraction with compile-time range checking +op_gc_bif2_sub(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2) when + is_integer(Arg2) +-> + case can_inline_sub(Range1, Range2, MMod) of + true -> + % Safe to inline - no overflow possible + {MSt1, Reg} = MMod:move_to_native_register(MSt0, Arg1), + MSt2 = MMod:sub(MSt1, Reg, Arg2 band (bnot (?TERM_IMMED_TAG_MASK))), + MSt3 = MMod:move_to_vm_register(MSt2, Reg, Dest), + MMod:free_native_registers(MSt3, [Reg]); + false -> + % Cannot prove safety, use default BIF call + op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest) + end; +op_gc_bif2_sub(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2) -> + case can_inline_sub(Range1, Range2, MMod) of + true -> + % Safe to inline both arguments + {MSt1, Reg1} = MMod:move_to_native_register(MSt0, Arg1), + {MSt2, Reg2} = MMod:move_to_native_register(MSt1, Arg2), + % Strip tag from Reg2 using AND, then subtract from Reg1 (Reg1 keeps its tag) + {MSt3, Reg2Stripped} = MMod:and_(MSt2, {free, Reg2}, bnot (?TERM_IMMED_TAG_MASK)), + MSt4 = MMod:sub(MSt3, Reg1, Reg2Stripped), + MSt5 = MMod:move_to_vm_register(MSt4, Reg1, Dest), + MMod:free_native_registers(MSt5, [Reg1, Reg2Stripped]); + false -> + % Cannot prove safety, use default BIF call + op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest) + end. + +% Helper to unwrap typed arguments +unwrap_typed({typed, Arg, _Type}) -> Arg; +unwrap_typed(Arg) -> Arg. + +% Optimized >= comparison for typed integers +% Test if Arg1 >= Arg2, jump to Label if false (i.e., if Arg1 < Arg2) +op_is_ge(MMod, MSt0, Label, Arg1, {typed, Arg2, {t_integer, _Range}}) when is_integer(Arg1) -> + % Arg1 is integer literal (already tagged by decode_compact_term), Arg2 is typed integer + % If Arg2 is boxed (bignum), the comparison result depends on the sign + {MSt1, Arg2Reg} = MMod:move_to_native_register(MSt0, Arg2), + % Check if Arg2 is a small integer (tagged with 0xF) + MSt2 = MMod:if_block(MSt1, {Arg2Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun( + BSt0 + ) -> + % Arg2 is boxed (bignum) - need to determine comparison result + % For small Arg1, if Arg2 is positive bignum -> Arg1 < Arg2 (fail) + % For small Arg1, if Arg2 is negative bignum -> Arg1 > Arg2 (pass) + % We need to check the sign of the boxed integer + {BSt1, BoxedReg} = MMod:and_(BSt0, Arg2Reg, bnot (?TERM_PRIMARY_MASK)), + BSt2 = MMod:move_array_element(BSt1, BoxedReg, 0, BoxedReg), + {BSt3, TagReg} = MMod:and_(BSt2, {free, BoxedReg}, ?TERM_BOXED_TAG_MASK), + % Jump to label if it's a positive bignum (tag = 0x8) + % For negative bignum (tag = 0x28), Arg1 >= Arg2 is true, so don't jump + cond_jump_to_label({{free, TagReg}, '==', ?TERM_BOXED_POSITIVE_INTEGER}, Label, MMod, BSt3) + end), + % If we're here, Arg2 is a small integer - do inline comparison + % is_ge tests Arg1 >= Arg2, jump to Label if Arg1 < Arg2 + % Arg1 is already tagged, use it directly + cond_jump_to_label({Arg1, '<', {free, Arg2Reg}}, Label, MMod, MSt2); +op_is_ge(MMod, MSt0, Label, {typed, Arg1, {t_integer, _Range}}, Arg2) when is_integer(Arg2) -> + % Arg1 is typed integer, Arg2 is integer literal (already tagged by decode_compact_term) + % If Arg1 is boxed (bignum), the comparison result depends on the sign + {MSt1, Arg1Reg} = MMod:move_to_native_register(MSt0, Arg1), + % Check if Arg1 is a small integer (tagged with 0xF) + MSt2 = MMod:if_block(MSt1, {Arg1Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun( + BSt0 + ) -> + % Arg1 is boxed (bignum) - need to determine comparison result + % For small Arg2, if Arg1 is positive bignum -> Arg1 > Arg2 (pass), don't jump + % For small Arg2, if Arg1 is negative bignum -> Arg1 < Arg2 (fail), jump + {BSt1, BoxedReg} = MMod:and_(BSt0, Arg1Reg, bnot (?TERM_PRIMARY_MASK)), + BSt2 = MMod:move_array_element(BSt1, BoxedReg, 0, BoxedReg), + {BSt3, TagReg} = MMod:and_(BSt2, {free, BoxedReg}, ?TERM_BOXED_TAG_MASK), + % Jump to label if it's a negative bignum (tag = 0x28) + % For positive bignum (tag = 0x8), Arg1 >= Arg2 is true, so don't jump + cond_jump_to_label({{free, TagReg}, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, Label, MMod, BSt3) + end), + % If we're here, Arg1 is a small integer - do inline comparison + % is_ge tests Arg1 >= Arg2, jump to Label if Arg1 < Arg2 + % Arg2 is already tagged, use it directly + cond_jump_to_label({{free, Arg1Reg}, '<', Arg2}, Label, MMod, MSt2); +% Fallback: use term_compare +op_is_ge(MMod, MSt0, Label, Arg1, Arg2) -> + {MSt1, ResultReg} = MMod:call_primitive(MSt0, ?PRIM_TERM_COMPARE, [ + ctx, jit_state, {free, unwrap_typed(Arg1)}, {free, unwrap_typed(Arg2)}, ?TERM_COMPARE_NO_OPTS + ]), + MSt2 = handle_error_if({'(int)', ResultReg, '==', ?TERM_COMPARE_MEMORY_ALLOC_FAIL}, MMod, MSt1), + cond_jump_to_label({'(int)', {free, ResultReg}, '==', ?TERM_LESS_THAN}, Label, MMod, MSt2). + term_alloc_bin_match_state(Live, Src, Dest, MMod, MSt0) -> {MSt1, TrimResultReg} = MMod:call_primitive(MSt0, ?PRIM_TRIM_LIVE_REGS, [ctx, Live]), MSt2 = MMod:free_native_registers(MSt1, [TrimResultReg]), @@ -3037,7 +3465,7 @@ term_is_boxed_with_tag_and_get_ptr(Label, Arg1, BoxedTag, MMod, MSt1) -> MSt3 = cond_jump_to_label( {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, MMod, MSt2 ), - MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), {MSt5, BoxTagReg} = MMod:get_array_element(MSt4, Reg, 0), MSt6 = cond_jump_to_label( {{free, BoxTagReg}, '&', ?TERM_BOXED_TAG_MASK, '!=', BoxedTag}, Label, MMod, MSt5 @@ -3062,28 +3490,30 @@ verify_is_function({typed, Func, _Other}, MMod, MSt0) -> ]), {MSt2, Reg}; verify_is_function(Func, MMod, MSt0) -> - {MSt1, Reg} = MMod:copy_to_native_register(MSt0, Func), + {MSt1, Reg} = MMod:move_to_native_register(MSt0, Func), MSt2 = MMod:if_block(MSt1, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> MMod:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ ctx, jit_state, offset, ?BADFUN_ATOM, Reg ]) end), - MSt3 = MMod:and_(MSt2, Reg, ?TERM_PRIMARY_CLEAR_MASK), - MSt4 = MMod:move_array_element(MSt3, Reg, 0, Reg), - MSt5 = MMod:if_block(MSt4, {Reg, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) -> - MMod:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ - ctx, jit_state, offset, ?BADFUN_ATOM, Reg - ]) - end), - MSt6 = MMod:free_native_registers(MSt5, [Reg]), - MMod:move_to_native_register(MSt6, Func). + {MSt3, BoxedPtrReg} = MMod:and_(MSt2, Reg, ?TERM_PRIMARY_CLEAR_MASK), + MSt4 = MMod:move_array_element(MSt3, BoxedPtrReg, 0, BoxedPtrReg), + MSt5 = MMod:if_block( + MSt4, {BoxedPtrReg, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) -> + MMod:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADFUN_ATOM, Reg + ]) + end + ), + MSt6 = MMod:free_native_registers(MSt5, [BoxedPtrReg]), + {MSt6, Reg}. verify_is_binary_or_match_state(Label, Src, MMod, MSt0) -> {MSt1, Reg} = MMod:copy_to_native_register(MSt0, Src), MSt2 = verify_is_boxed(MMod, MSt1, Reg, Label), - MSt3 = MMod:and_(MSt2, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt3, Reg} = MMod:and_(MSt2, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt4 = MMod:move_array_element(MSt3, Reg, 0, Reg), - MSt5 = MMod:and_(MSt4, Reg, ?TERM_BOXED_TAG_MASK), + {MSt5, Reg} = MMod:and_(MSt4, {free, Reg}, ?TERM_BOXED_TAG_MASK), MSt6 = cond_raise_badarg_or_jump_to_fail_label( {'and', [ {Reg, '!=', ?TERM_BOXED_REFC_BINARY}, @@ -3099,7 +3529,7 @@ verify_is_binary_or_match_state(Label, Src, MMod, MSt0) -> verify_is_boxed_with_tag(Label, {free, Reg}, BoxedTag, MMod, MSt0) when is_atom(Reg) -> MSt1 = verify_is_boxed(MMod, MSt0, Reg, Label), - MSt2 = MMod:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt2, Reg} = MMod:and_(MSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt3 = MMod:move_array_element(MSt2, Reg, 0, Reg), cond_raise_badarg_or_jump_to_fail_label( {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', BoxedTag}, Label, MMod, MSt3 @@ -3107,7 +3537,7 @@ verify_is_boxed_with_tag(Label, {free, Reg}, BoxedTag, MMod, MSt0) when is_atom( verify_is_boxed_with_tag(Label, Arg1, BoxedTag, MMod, MSt1) -> {MSt2, Reg} = MMod:copy_to_native_register(MSt1, Arg1), MSt3 = verify_is_boxed(MMod, MSt2, Reg, Label), - MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt5 = MMod:move_array_element(MSt4, Reg, 0, Reg), cond_raise_badarg_or_jump_to_fail_label( {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', BoxedTag}, Label, MMod, MSt5 @@ -3138,7 +3568,7 @@ verify_is_match_state_and_get_ptr(MMod, MSt0, Src) -> verify_is_match_state_and_get_ptr0(MMod, MSt2, Reg). verify_is_match_state_and_get_ptr0(MMod, MSt0, Reg) -> - MSt1 = MMod:and_(MSt0, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt1, Reg} = MMod:and_(MSt0, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), {MSt2, BoxTag} = MMod:get_array_element(MSt1, Reg, 0), MSt3 = cond_raise_badarg( {{free, BoxTag}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_BIN_MATCH_STATE}, MMod, MSt2 @@ -3195,6 +3625,13 @@ verify_is_any_integer(Arg1, Fail, MMod, MSt0) -> Arg1, ?TERM_INTEGER_TAG, ?TERM_BOXED_POSITIVE_INTEGER, Fail, MMod, MSt0 ). +verify_is_number(Arg1, Fail, MMod, MSt0) -> + {MSt1, Reg} = MMod:copy_to_native_register(MSt0, Arg1), + {MSt2, IsNumber} = MMod:call_primitive(MSt1, ?PRIM_TERM_IS_NUMBER, [{free, Reg}]), + cond_raise_badarg_or_jump_to_fail_label( + {'(bool)', {free, IsNumber}, '==', false}, Fail, MMod, MSt2 + ). + %%----------------------------------------------------------------------------- %% @doc Test if Arg1 is a binary, jump to FailLabel if it isn't or raise %% badarg if FailLabel is 0 @@ -3207,9 +3644,9 @@ verify_is_any_integer(Arg1, Fail, MMod, MSt0) -> verify_is_binary(Arg1, FailLabel, MMod, MSt0) -> {MSt1, Reg} = MMod:copy_to_native_register(MSt0, Arg1), MSt2 = verify_is_boxed(MMod, MSt1, Reg, FailLabel), - MSt3 = MMod:and_(MSt2, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt3, Reg} = MMod:and_(MSt2, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt4 = MMod:move_array_element(MSt3, Reg, 0, Reg), - MSt5 = MMod:and_(MSt4, Reg, ?TERM_BOXED_TAG_MASK), + {MSt5, Reg} = MMod:and_(MSt4, {free, Reg}, ?TERM_BOXED_TAG_MASK), MSt6 = cond_raise_badarg_or_jump_to_fail_label( {'and', [ {Reg, '!=', ?TERM_BOXED_REFC_BINARY}, @@ -3608,7 +4045,7 @@ term_get_tuple_arity(Tuple, MMod, MSt0) -> {free, TupleReg} -> MMod:move_to_native_register(MSt0, TupleReg); _ -> MMod:copy_to_native_register(MSt0, Tuple) end, - MSt2 = MMod:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt2, Reg} = MMod:and_(MSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt3 = MMod:move_array_element(MSt2, Reg, 0, Reg), {MSt4, ArityReg} = MMod:shift_right(MSt3, {free, Reg}, 6), {MSt4, ArityReg}. @@ -3623,7 +4060,7 @@ term_get_map_keys(Map, MMod, MSt0) -> {free, MapReg} -> MMod:move_to_native_register(MSt0, MapReg); _ -> MMod:copy_to_native_register(MSt0, Map) end, - MSt2 = MMod:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt2, Reg} = MMod:and_(MSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt3 = MMod:move_array_element(MSt2, Reg, 1, Reg), {MSt3, Reg}. @@ -3687,9 +4124,14 @@ term_binary_heap_size({free, Reg}, MMod, MSt0) -> {MSt1, Reg}. term_binary_size({free, BinReg}, MMod, MSt0) -> - MSt1 = MMod:and_(MSt0, BinReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt1, BinReg} = MMod:and_(MSt0, {free, BinReg}, ?TERM_PRIMARY_CLEAR_MASK), MSt2 = MMod:move_array_element(MSt1, BinReg, 1, BinReg), - {MSt2, BinReg}. + {MSt2, BinReg}; +term_binary_size(Src, MMod, MSt0) -> + {MSt1, SrcReg} = MMod:move_to_native_register(MSt0, Src), + {MSt2, SrcReg} = MMod:and_(MSt1, {free, SrcReg}, ?TERM_PRIMARY_CLEAR_MASK), + MSt3 = MMod:move_array_element(MSt2, SrcReg, 1, SrcReg), + {MSt3, SrcReg}. term_set_map_assoc(MapPtrReg, {free, PosReg}, {free, Key}, {free, Value}, MMod, MSt0) -> {MSt1, MapKeysReg} = MMod:get_array_element(MSt0, MapPtrReg, 1), @@ -3698,7 +4140,7 @@ term_set_map_assoc(MapPtrReg, {free, PosReg}, {free, Key}, {free, Value}, MMod, MMod:free_native_registers(MSt3, [PosReg, Value]). term_put_tuple_element({free, TupleReg}, PosReg, {free, Value}, MMod, MSt0) -> - MSt1 = MMod:and_(MSt0, TupleReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt1, TupleReg} = MMod:and_(MSt0, {free, TupleReg}, ?TERM_PRIMARY_CLEAR_MASK), MSt2 = MMod:move_to_array_element(MSt1, Value, TupleReg, PosReg, 1), MMod:free_native_registers(MSt2, [TupleReg, Value]). @@ -3730,8 +4172,42 @@ variant() -> %% @doc Instantiate backend for this platform %% @return A tuple with the backend module and the backend state for this platform -backend({StreamModule, Stream}) -> +backend(StreamModule, Stream) -> BackendModule = ?MODULE:backend_module(), Variant = ?MODULE:variant(), BackendState = BackendModule:new(Variant, StreamModule, Stream), {BackendModule, BackendState}. + +-ifdef(JIT_INSTRUMENT). +instrument(Tag, #state{line_offsets = Lines, tail_cache = TC}, MSt) -> + StateSize = erts_debug:flat_size({Lines, TC}), + MStSize = erts_debug:flat_size(MSt), + LinesCount = length(Lines), + TCCount = length(TC), + + % Extract branches count from backend state + % state record: {state, stream_module, stream, offset, branches, jump_table_start, ...} + BranchesCount = + case element(1, MSt) of + state -> length(element(5, MSt)); + _ -> unknown + end, + + {heap_size, HeapSize} = process_info(self(), heap_size), + {total_heap_size, TotalHeapSize} = process_info(self(), total_heap_size), + + io:format( + "~s: mst=~p words, state=~p words (lines=~p, tc=~p, br=~p), " + "heap=~p, total_heap=~p~n", + [ + Tag, + MStSize, + StateSize, + LinesCount, + TCCount, + BranchesCount, + HeapSize, + TotalHeapSize + ] + ). +-endif. diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 1eba4fba80..52c28ad465 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -25,6 +25,7 @@ new/3, stream/1, offset/1, + flush/1, debugger/1, used_regs/1, available_regs/1, @@ -38,6 +39,7 @@ return_if_not_equal_to_ctx/2, jump_to_label/2, jump_to_continuation/2, + jump_to_offset/2, if_block/3, if_else_block/4, shift_right/3, @@ -132,6 +134,7 @@ stream :: stream(), offset :: non_neg_integer(), branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], + jump_table_start :: non_neg_integer(), available_regs :: [aarch64_register()], used_regs :: [aarch64_register()], labels :: [{integer() | reference(), integer()}], @@ -151,6 +154,7 @@ -type condition() :: {aarch64_register(), '<', integer()} | {maybe_free_aarch64_register(), '<', aarch64_register()} + | {integer(), '<', maybe_free_aarch64_register()} | {maybe_free_aarch64_register(), '==', integer()} | {maybe_free_aarch64_register(), '!=', aarch64_register() | integer()} | {'(int)', maybe_free_aarch64_register(), '==', integer()} @@ -231,6 +235,7 @@ new(Variant, StreamModule, Stream) -> stream_module = StreamModule, stream = Stream, branches = [], + jump_table_start = 0, offset = StreamModule:offset(Stream), available_regs = ?AVAILABLE_REGS, used_regs = [], @@ -258,6 +263,16 @@ stream(#state{stream = Stream}) -> offset(#state{stream_module = StreamModule, stream = Stream}) -> StreamModule:offset(Stream). +%%----------------------------------------------------------------------------- +%% @doc Flush the current state (unused on aarch64) +%% @end +%% @param State current backend state +%% @return The flushed state +%%----------------------------------------------------------------------------- +-spec flush(state()) -> state(). +flush(#state{} = State) -> + State. + %%----------------------------------------------------------------------------- %% @doc Emit a debugger of breakpoint instruction. This is used for debugging %% and not in production. @@ -343,22 +358,78 @@ assert_all_native_free(#state{ %% @return Updated backend state %%----------------------------------------------------------------------------- -spec jump_table(state(), pos_integer()) -> state(). -jump_table(State, LabelsCount) -> - jump_table0(State, 0, LabelsCount). +jump_table(#state{stream_module = StreamModule, stream = Stream0} = State, LabelsCount) -> + JumpTableStart = StreamModule:offset(Stream0), + jump_table0(State#state{jump_table_start = JumpTableStart}, 0, LabelsCount). -spec jump_table0(state(), non_neg_integer(), pos_integer()) -> state(). jump_table0(State, N, LabelsCount) when N > LabelsCount -> State; jump_table0( - #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State, + #state{stream_module = StreamModule, stream = Stream0} = State, N, LabelsCount ) -> - Offset = StreamModule:offset(Stream0), - BranchInstr = jit_aarch64_asm:b(0), - Reloc = {N, Offset, b}, + % Placeholder jumps to next entry (1 instruction forward = 4 bytes) + BranchInstr = jit_aarch64_asm:b(1), Stream1 = StreamModule:append(Stream0, BranchInstr), - jump_table0(State#state{stream = Stream1, branches = [Reloc | Branches]}, N + 1, LabelsCount). + jump_table0(State#state{stream = Stream1}, N + 1, LabelsCount). + +%%----------------------------------------------------------------------------- +%% @doc Patch a single branch in the stream +%% @end +%% @param StreamModule stream module +%% @param Stream stream state +%% @param Offset offset of the branch to patch +%% @param Type type of the branch +%% @param LabelOffset target label offset +%% @return Updated stream +%%----------------------------------------------------------------------------- +-spec patch_branch(module(), stream(), non_neg_integer(), any(), non_neg_integer()) -> stream(). +patch_branch(StreamModule, Stream, Offset, Type, LabelOffset) -> + Rel = LabelOffset - Offset, + NewInstr = + case Type of + {bcc, CC} -> jit_aarch64_asm:bcc(CC, Rel); + {adr, Reg} -> jit_aarch64_asm:adr(Reg, Rel); + b -> jit_aarch64_asm:b(Rel) + end, + StreamModule:replace(Stream, Offset, NewInstr). + +%%----------------------------------------------------------------------------- +%% @doc Patch all branches targeting a specific label and return remaining branches +%% @end +%% @param StreamModule stream module +%% @param Stream stream state +%% @param TargetLabel label to patch branches for +%% @param LabelOffset offset of the target label +%% @param Branches list of pending branches +%% @return {UpdatedStream, RemainingBranches} +%%----------------------------------------------------------------------------- +-spec patch_branches_for_label( + module(), + stream(), + integer(), + non_neg_integer(), + [{integer(), non_neg_integer(), any()}] +) -> {stream(), [{integer(), non_neg_integer(), any()}]}. +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches, []). + +patch_branches_for_label(_StreamModule, Stream, _TargetLabel, _LabelOffset, [], Acc) -> + {Stream, lists:reverse(Acc)}; +patch_branches_for_label( + StreamModule, + Stream0, + TargetLabel, + LabelOffset, + [{Label, Offset, Type} | Rest], + Acc +) when Label =:= TargetLabel -> + Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset), + patch_branches_for_label(StreamModule, Stream1, TargetLabel, LabelOffset, Rest, Acc); +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, [Branch | Rest], Acc) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Rest, [Branch | Acc]). %%----------------------------------------------------------------------------- %% @doc Rewrite stream to update all branches for labels. @@ -378,14 +449,7 @@ update_branches( } = State ) -> {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), - Rel = LabelOffset - Offset, - NewInstr = - case Type of - {bcc, CC} -> jit_aarch64_asm:bcc(CC, Rel); - {adr, Reg} -> jit_aarch64_asm:adr(Reg, Rel); - b -> jit_aarch64_asm:b(Rel) - end, - Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), + Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset), update_branches(State#state{stream = Stream1, branches = BranchesT}). %%----------------------------------------------------------------------------- @@ -531,6 +595,13 @@ jump_to_label( State#state{stream = Stream1, branches = [Reloc | AccBranches]} end. +jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> + Offset = StreamModule:offset(Stream0), + Rel = TargetOffset - Offset, + I1 = jit_aarch64_asm:b(Rel), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}. + %%----------------------------------------------------------------------------- %% @doc Jump to a continuation address stored in a register. %% This is used for optimized intra-module returns. @@ -687,15 +758,47 @@ if_else_block( jit_aarch64_asm:cc() | {tbz | tbnz, atom(), 0..63} | {cbz, atom()}, non_neg_integer() }. -if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, {Reg, '<', 0}) -> +if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '<', 0}) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, I = jit_aarch64_asm:tbz(Reg, 63, 0), Stream1 = StreamModule:append(Stream0, I), - State1 = State0#state{stream = Stream1}, - {State1, {tbz, Reg, 63}, 0}; + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {tbz, Reg, 63}, 0}; +% Handle {Val, '<', Reg} - means Val < Reg, jump if false (i.e., if Val >= Reg or Reg <= Val) if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, - {Reg, '<', Val} -) when is_atom(Reg), is_integer(Val) -> + {Val, '<', RegOrTuple} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = jit_aarch64_asm:cmp(Reg, Val), + % le = less than or equal + I2 = jit_aarch64_asm:bcc(le, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, le, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '<', Val} +) when is_integer(Val), Val =/= 0 -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, I1 = jit_aarch64_asm:cmp(Reg, Val), % ge = greater than or equal I2 = jit_aarch64_asm:bcc(ge, 0), @@ -704,8 +807,9 @@ if_block_cond( I2/binary >>, Stream1 = StreamModule:append(Stream0, Code), - State1 = State0#state{stream = Stream1}, - {State1, ge, byte_size(I1)}; + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, ge, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '<', RegB} @@ -933,7 +1037,7 @@ if_block_cond( ) when ?IS_GPR(Reg) -> % AND with mask OffsetBefore = StreamModule:offset(Stream0), - State1 = and_(State0, Reg, Mask), + {State1, Reg} = and_(State0, RegTuple, Mask), Stream1 = State1#state.stream, % Compare with value I2 = jit_aarch64_asm:cmp(Reg, Val), @@ -1835,17 +1939,30 @@ set_continuation_to_label( stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _], - branches = Branches + branches = Branches, + labels = Labels } = State, Label ) -> Offset = StreamModule:offset(Stream0), - I1 = jit_aarch64_asm:adr(Temp, 0), - Reloc = {Label, Offset, {adr, Temp}}, - I2 = jit_aarch64_asm:str(Temp, ?JITSTATE_CONTINUATION), - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), - State#state{stream = Stream1, branches = [Reloc | Branches]}. + case lists:keyfind(Label, 1, Labels) of + {Label, LabelOffset} -> + % Label is already known, emit direct adr without relocation + Rel = LabelOffset - Offset, + I1 = jit_aarch64_asm:adr(Temp, Rel), + I2 = jit_aarch64_asm:str(Temp, ?JITSTATE_CONTINUATION), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; + false -> + % Label not yet known, emit placeholder and add relocation + I1 = jit_aarch64_asm:adr(Temp, 0), + Reloc = {Label, Offset, {adr, Temp}}, + I2 = jit_aarch64_asm:str(Temp, ?JITSTATE_CONTINUATION), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1, branches = [Reloc | Branches]} + end. %%----------------------------------------------------------------------------- %% @doc Set the continuation address to the current offset, creating a @@ -1945,9 +2062,18 @@ op_imm(#state{stream_module = StreamModule, stream = Stream0} = State, Op, RegA, %% @param Val immediate value to AND %% @return Updated backend state %%----------------------------------------------------------------------------- --spec and_(state(), aarch64_register(), integer()) -> state(). -and_(State, Reg, Val) -> - op_imm(State, and_, Reg, Reg, Val). +and_(State, {free, Reg}, Val) -> + NewState = op_imm(State, and_, Reg, Reg, Val), + {NewState, Reg}; +and_( + #state{available_regs = [ResultReg | T], used_regs = UR} = State, + Reg, + Val +) -> + NewState = op_imm( + State#state{available_regs = T, used_regs = [ResultReg | UR]}, and_, ResultReg, Reg, Val + ), + {NewState, ResultReg}. %%----------------------------------------------------------------------------- %% @doc Perform bitwise OR of a register with an immediate value. @@ -1981,8 +2107,10 @@ add(State, Reg, Val) -> %% @param Val immediate value to subtract %% @return Updated backend state %%----------------------------------------------------------------------------- --spec sub(state(), aarch64_register(), integer()) -> state(). -sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> +-spec sub(state(), aarch64_register(), integer() | aarch64_register()) -> state(). +sub(State, Reg, Val) when is_integer(Val) -> + op_imm(State, sub, Reg, Reg, Val); +sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when is_atom(Val) -> I1 = jit_aarch64_asm:sub(Reg, Reg, Val), Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}. @@ -2123,6 +2251,7 @@ call_only_or_schedule_next( stream_module = StreamModule, stream = Stream0, branches = Branches, + labels = Labels, available_regs = [Temp | _] } = State0, Label @@ -2135,11 +2264,22 @@ call_only_or_schedule_next( I3 = jit_aarch64_asm:str_w(Temp, ?JITSTATE_REDUCTIONCOUNT), Stream1 = StreamModule:append(Stream0, <>), BNEOffset = StreamModule:offset(Stream1), - % Branch to label if reduction count is not zero - I4 = jit_aarch64_asm:bcc(ne, 0), - Reloc1 = {Label, BNEOffset, {bcc, ne}}, - Stream2 = StreamModule:append(Stream1, I4), - State1 = State0#state{stream = Stream2, branches = [Reloc1 | Branches]}, + + case lists:keyfind(Label, 1, Labels) of + {Label, LabelOffset} -> + % Label is already known, emit direct branch with calculated offset + % Calculate relative offset (must be 4-byte aligned) + Rel = LabelOffset - BNEOffset, + I4 = jit_aarch64_asm:bcc(ne, Rel), + Stream2 = StreamModule:append(Stream1, I4), + State1 = State0#state{stream = Stream2}; + false -> + % Label not yet known, emit placeholder and add relocation + I4 = jit_aarch64_asm:bcc(ne, 0), + Reloc1 = {Label, BNEOffset, {bcc, ne}}, + Stream2 = StreamModule:append(Stream1, I4), + State1 = State0#state{stream = Stream2, branches = [Reloc1 | Branches]} + end, State2 = set_continuation_to_label(State1, Label), call_primitive_last(State2, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]). @@ -2315,5 +2455,35 @@ add_label(#state{stream_module = StreamModule, stream = Stream} = State, Label) %% @return Updated backend state %%----------------------------------------------------------------------------- -spec add_label(state(), integer() | reference(), integer()) -> state(). +add_label( + #state{ + stream_module = StreamModule, + stream = Stream0, + jump_table_start = JumpTableStart, + branches = Branches, + labels = Labels + } = State, + Label, + LabelOffset +) when is_integer(Label) -> + % Patch the jump table entry immediately + % Each b instruction is 4 bytes + JumpTableEntryOffset = JumpTableStart + Label * 4, + RelativeOffset = LabelOffset - JumpTableEntryOffset, + BranchInstr = jit_aarch64_asm:b(RelativeOffset), + Stream1 = StreamModule:replace(Stream0, JumpTableEntryOffset, BranchInstr), + + % Eagerly patch any branches targeting this label + {Stream2, RemainingBranches} = patch_branches_for_label( + StreamModule, + Stream1, + Label, + LabelOffset, + Branches + ), + + State#state{ + stream = Stream2, branches = RemainingBranches, labels = [{Label, LabelOffset} | Labels] + }; add_label(#state{labels = Labels} = State, Label, Offset) -> State#state{labels = [{Label, Offset} | Labels]}. diff --git a/libs/jit/src/jit_aarch64_asm.erl b/libs/jit/src/jit_aarch64_asm.erl index 6237294614..277a97d9ea 100644 --- a/libs/jit/src/jit_aarch64_asm.erl +++ b/libs/jit/src/jit_aarch64_asm.erl @@ -948,6 +948,8 @@ sub(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm), Imm >= 0, Imm = RdNum = reg_to_num(Rd), RnNum = reg_to_num(Rn), <<(16#D1000000 bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5) bor RdNum):32/little>>; +sub(_Rd, _Rn, Imm) when is_integer(Imm) -> + error({unencodable_immediate, Imm}); sub(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) -> sub(Rd, Rn, Rm, {lsl, 0}). diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index b051850135..7bfd329e89 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -25,6 +25,7 @@ new/3, stream/1, offset/1, + flush/1, debugger/1, used_regs/1, available_regs/1, @@ -38,6 +39,7 @@ return_if_not_equal_to_ctx/2, jump_to_label/2, jump_to_continuation/2, + jump_to_offset/2, if_block/3, if_else_block/4, shift_right/3, @@ -74,8 +76,10 @@ -include_lib("jit.hrl"). -include("primitives.hrl"). +-include("term.hrl"). --define(ASSERT(Expr), true = Expr). +%-define(ASSERT(Expr), true = Expr). +-define(ASSERT(Expr), ok). %% ARMv6-M AAPCS32 ABI: r0-r3 are used for argument passing and return value. %% r0-r1 form a double-word for 64-bit returns, additional args passed on stack. @@ -130,10 +134,12 @@ stream :: stream(), offset :: non_neg_integer(), branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], + jump_table_start :: non_neg_integer(), available_regs :: [armv6m_register()], used_regs :: [armv6m_register()], labels :: [{integer() | reference(), integer()}], - variant :: non_neg_integer() + variant :: non_neg_integer(), + literal_pool :: [{non_neg_integer(), armv6m_register(), non_neg_integer()}] }). -type state() :: #state{}. @@ -149,6 +155,7 @@ -type condition() :: {armv6m_register(), '<', integer()} | {maybe_free_armv6m_register(), '<', armv6m_register()} + | {integer(), '<', maybe_free_armv6m_register()} | {maybe_free_armv6m_register(), '==', integer()} | {maybe_free_armv6m_register(), '!=', armv6m_register() | integer()} | {'(int)', maybe_free_armv6m_register(), '==', integer()} @@ -242,11 +249,13 @@ new(Variant, StreamModule, Stream) -> stream_module = StreamModule, stream = Stream, branches = [], + jump_table_start = 0, offset = StreamModule:offset(Stream), available_regs = ?AVAILABLE_REGS, used_regs = [], labels = [], - variant = Variant + variant = Variant, + literal_pool = [] }. %%----------------------------------------------------------------------------- @@ -269,6 +278,16 @@ stream(#state{stream = Stream}) -> offset(#state{stream_module = StreamModule, stream = Stream}) -> StreamModule:offset(Stream). +%%----------------------------------------------------------------------------- +%% @doc Flush the current state, e.g. literal pools +%% @end +%% @param State current backend state +%% @return The flushed state +%%----------------------------------------------------------------------------- +-spec flush(state()) -> state(). +flush(#state{} = State) -> + flush_literal_pool(State). + %%----------------------------------------------------------------------------- %% @doc Emit a debugger of breakpoint instruction. This is used for debugging %% and not in production. @@ -364,13 +383,14 @@ assert_all_native_free(#state{ %% @return Updated backend state %%----------------------------------------------------------------------------- -spec jump_table(state(), pos_integer()) -> state(). -jump_table(State, LabelsCount) -> - jump_table0(State, 0, LabelsCount). +jump_table(#state{stream_module = StreamModule, stream = Stream0} = State, LabelsCount) -> + JumpTableStart = StreamModule:offset(Stream0), + jump_table0(State#state{jump_table_start = JumpTableStart}, 0, LabelsCount). jump_table0(State, N, LabelsCount) when N > LabelsCount -> State; jump_table0( - #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State, + #state{stream_module = StreamModule, stream = Stream0} = State, N, LabelsCount ) -> @@ -380,37 +400,23 @@ jump_table0( I3 = jit_armv6m_asm:add(pc, r3), I4 = jit_armv6m_asm:nop(), - JumpEntry = <>, + JumpEntry = <>, Stream1 = StreamModule:append(Stream0, JumpEntry), - % Add relocation for the data entry so update_branches/2 can patch the jump target - DataOffset = StreamModule:offset(Stream1) - 4, - % Calculate the offset of the add instruction (3rd instruction, at offset 4 from entry start) - EntryStartOffset = StreamModule:offset(Stream1) - 12, - AddInstrOffset = EntryStartOffset + 4, - DataReloc = {N, DataOffset, {jump_table_data, AddInstrOffset}}, - UpdatedState = State#state{stream = Stream1, branches = [DataReloc | Branches]}, - - jump_table0(UpdatedState, N + 1, LabelsCount). + jump_table0(State#state{stream = Stream1}, N + 1, LabelsCount). %%----------------------------------------------------------------------------- -%% @doc Rewrite stream to update all branches for labels. +%% @doc Patch a single branch in the stream %% @end -%% @param State current backend state -%% @return Updated backend state +%% @param StreamModule stream module +%% @param Stream stream state +%% @param Offset offset of the branch to patch +%% @param Type type of the branch +%% @param LabelOffset target label offset +%% @return Updated stream %%----------------------------------------------------------------------------- --spec update_branches(state()) -> state(). -update_branches(#state{branches = []} = State) -> - State; -update_branches( - #state{ - stream_module = StreamModule, - stream = Stream0, - branches = [{Label, Offset, Type} | BranchesT], - labels = Labels - } = State -) -> - {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), +-spec patch_branch(module(), stream(), non_neg_integer(), any(), non_neg_integer()) -> stream(). +patch_branch(StreamModule, Stream, Offset, Type, LabelOffset) -> Rel = LabelOffset - Offset, NewInstr = case Type of @@ -484,15 +490,64 @@ update_branches( I4 = <>, <> end - end; - {jump_table_data, AddInstrOffset} -> - % Calculate offset from 'add pc, pc, r3' instruction + 4 to target label - % PC when add instruction executes - AddPC = AddInstrOffset + 4, - RelativeOffset = LabelOffset - AddPC, - <> + end end, - Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), + StreamModule:replace(Stream, Offset, NewInstr). + +%%----------------------------------------------------------------------------- +%% @doc Patch all branches targeting a specific label and return remaining branches +%% @end +%% @param StreamModule stream module +%% @param Stream stream state +%% @param TargetLabel label to patch branches for +%% @param LabelOffset offset of the target label +%% @param Branches list of pending branches +%% @return {UpdatedStream, RemainingBranches} +%%----------------------------------------------------------------------------- +-spec patch_branches_for_label( + module(), + stream(), + integer(), + non_neg_integer(), + [{integer(), non_neg_integer(), any()}] +) -> {stream(), [{integer(), non_neg_integer(), any()}]}. +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches, []). + +patch_branches_for_label(_StreamModule, Stream, _TargetLabel, _LabelOffset, [], Acc) -> + {Stream, lists:reverse(Acc)}; +patch_branches_for_label( + StreamModule, + Stream0, + TargetLabel, + LabelOffset, + [{Label, Offset, Type} | Rest], + Acc +) when Label =:= TargetLabel -> + Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset), + patch_branches_for_label(StreamModule, Stream1, TargetLabel, LabelOffset, Rest, Acc); +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, [Branch | Rest], Acc) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Rest, [Branch | Acc]). + +%%----------------------------------------------------------------------------- +%% @doc Rewrite stream to update all branches for labels. +%% @end +%% @param State current backend state +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec update_branches(state()) -> state(). +update_branches(#state{branches = []} = State) -> + State; +update_branches( + #state{ + stream_module = StreamModule, + stream = Stream0, + branches = [{Label, Offset, Type} | BranchesT], + labels = Labels + } = State +) -> + {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), + Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset), update_branches(State#state{stream = Stream1, branches = BranchesT}). %%----------------------------------------------------------------------------- @@ -631,7 +686,8 @@ call_primitive_last( State2 = set_registers_args(State1, ArgsForTailCall, 0), tail_call_with_jit_state_registers_only(State2, Temp) end, - State4#state{available_regs = ?AVAILABLE_REGS, used_regs = []}. + State5 = State4#state{available_regs = ?AVAILABLE_REGS, used_regs = []}, + flush_literal_pool(State5). %%----------------------------------------------------------------------------- %% @doc Tail call to address in register, restoring prolog registers including @@ -724,7 +780,15 @@ jump_to_label( Offset = StreamModule:offset(Stream0), {State1, CodeBlock} = branch_to_label_code(State0, Offset, Label, LabelLookupResult), Stream1 = StreamModule:append(Stream0, CodeBlock), - State1#state{stream = Stream1}. + State2 = State1#state{stream = Stream1}, + flush_literal_pool(State2). + +jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> + Offset = StreamModule:offset(Stream0), + CodeBlock = branch_to_offset_code(State, Offset, TargetOffset), + Stream1 = StreamModule:append(Stream0, CodeBlock), + State2 = State#state{stream = Stream1}, + flush_literal_pool(State2). %%----------------------------------------------------------------------------- %% @doc Jump to address in continuation pointer register @@ -786,17 +850,17 @@ jump_to_continuation( Code = <>, Stream2 = StreamModule:append(State1#state.stream, Code), % Free all registers as this is a terminal instruction - State1#state{stream = Stream2, available_regs = ?AVAILABLE_REGS, used_regs = []}. + State2 = State1#state{stream = Stream2, available_regs = ?AVAILABLE_REGS, used_regs = []}, + flush_literal_pool(State2). -branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) when - LabelOffset - Offset =< 2050, LabelOffset - Offset >= -2044 +branch_to_offset_code(_State, Offset, TargetOffset) when + TargetOffset - Offset =< 2050, TargetOffset - Offset >= -2044 -> % Near branch: use direct B instruction - Rel = LabelOffset - Offset, - CodeBlock = jit_armv6m_asm:b(Rel), - {State, CodeBlock}; -branch_to_label_code( - #state{available_regs = [TempReg | _]} = State0, Offset, Label, {Label, LabelOffset} + Rel = TargetOffset - Offset, + jit_armv6m_asm:b(Rel); +branch_to_offset_code( + #state{available_regs = [TempReg | _]}, Offset, TargetOffset ) -> % Far branch: use register-based sequence, need temporary register if @@ -807,23 +871,26 @@ branch_to_label_code( I3 = jit_armv6m_asm:bx(TempReg), % Unaligned : need nop I4 = jit_armv6m_asm:nop(), - LiteralValue = LabelOffset - Offset - 5, + LiteralValue = TargetOffset - Offset - 5, I5 = <>, - CodeBlock = <>; + <>; true -> % Unaligned I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}), I2 = jit_armv6m_asm:add(TempReg, pc), I3 = jit_armv6m_asm:bx(TempReg), - LiteralValue = LabelOffset - Offset - 5, + LiteralValue = TargetOffset - Offset - 5, I4 = <>, - CodeBlock = <> - end, - {State0, CodeBlock}; + <> + end. + +branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) -> + CodeBlock = branch_to_offset_code(State, Offset, LabelOffset), + {State, CodeBlock}; branch_to_label_code( #state{available_regs = [TempReg | _], branches = Branches} = State0, Offset, Label, false ) -> - {CodeBlock, SequenceSize} = + SequenceSize = if Offset rem 4 =:= 0 -> % Aligned @@ -835,7 +902,7 @@ branch_to_label_code( % Placeholder offset I5 = <<0:32/little>>, Seq = <>, - {Seq, byte_size(Seq)}; + byte_size(Seq); true -> % Unaligned I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}), @@ -844,16 +911,17 @@ branch_to_label_code( % Placeholder offset I4 = <<0:32/little>>, Seq = <>, - {Seq, byte_size(Seq)} + byte_size(Seq) end, % Add relocation entry + CodeBlock = binary:copy(<<16#FF>>, SequenceSize), Reloc = {Label, Offset, {far_branch, SequenceSize, TempReg}}, State1 = State0#state{branches = [Reloc | Branches]}, {State1, CodeBlock}; branch_to_label_code( #state{available_regs = [], branches = Branches} = State0, Offset, Label, false ) -> - {CodeBlock, SequenceSize} = + SequenceSize = if Offset rem 4 =/= 0 -> % Unaligned @@ -871,7 +939,7 @@ branch_to_label_code( Seq = <>, - {Seq, byte_size(Seq)}; + byte_size(Seq); true -> % Aligned I1 = jit_armv6m_asm:push([r0]), @@ -885,9 +953,10 @@ branch_to_label_code( I7 = <<0:32/little>>, Seq = <>, - {Seq, byte_size(Seq)} + byte_size(Seq) end, % Add relocation entry + CodeBlock = binary:copy(<<16#FF>>, SequenceSize), Reloc = {Label, Offset, {far_branch, SequenceSize, ?IP_REG}}, State1 = State0#state{branches = [Reloc | Branches]}, {State1, CodeBlock}; @@ -971,7 +1040,8 @@ if_else_block( Stream2 = State2#state.stream, %% Emit unconditional branch to skip the else block (will be replaced) ElseJumpOffset = StreamModule:offset(Stream2), - ElseJumpInstr = jit_armv6m_asm:b(0), + ?ASSERT(byte_size(jit_armv6m_asm:b(0)) =:= 2), + ElseJumpInstr = <<16#FFFF:16>>, Stream3 = StreamModule:append(Stream2, ElseJumpInstr), %% Else block starts here. OffsetAfter = StreamModule:offset(Stream3), @@ -1000,44 +1070,95 @@ if_else_block( jit_armv6m_asm:cc() | {tbz | tbnz, atom(), 0..63} | {cbz, atom()}, non_neg_integer() }. -if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, {Reg, '<', 0}) -> +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '<', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, %% Compare register with 0 I1 = jit_armv6m_asm:cmp(Reg, 0), %% Branch if positive (N flag clear) - I2 = jit_armv6m_asm:bcc(pl, 0), - Stream1 = StreamModule:append(Stream0, <>), - State1 = State0#state{stream = Stream1}, - {State1, pl, byte_size(I1)}; + CC = pl, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(pl, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, CC, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, - {Reg, '<', Val} -) when is_atom(Reg), is_integer(Val), Val >= 0, Val =< 255 -> + {RegOrTuple, '<', Val} +) when is_integer(Val), Val >= 0, Val =< 255 -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, I1 = jit_armv6m_asm:cmp(Reg, Val), % ge = greater than or equal - I2 = jit_armv6m_asm:bcc(ge, 0), - Code = << - I1/binary, - I2/binary - >>, - Stream1 = StreamModule:append(Stream0, Code), - State1 = State0#state{stream = Stream1}, - {State1, ge, byte_size(I1)}; + CC = ge, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, CC, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, available_regs = [Temp | _]} = State0, - {Reg, '<', Val} -) when is_atom(Reg), is_integer(Val) -> + {RegOrTuple, '<', Val} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, State1 = mov_immediate(State0, Temp, Val), Stream0 = State1#state.stream, I1 = jit_armv6m_asm:cmp(Reg, Temp), % ge = greater than or equal - I2 = jit_armv6m_asm:bcc(ge, 0), - Code = << - I1/binary, - I2/binary - >>, - Stream1 = StreamModule:append(Stream0, Code), + CC = ge, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream1}, + {State3, CC, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {Val, '<', RegOrTuple} +) when is_integer(Val), Val >= 0, Val =< 255 -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = jit_armv6m_asm:cmp(Reg, Val), + % le = less than or equal (branch when Val >= Reg, i.e., NOT Val < Reg) + CC = le, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), + State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, ge, byte_size(I1)}; + {State2, CC, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, available_regs = [Temp | _]} = State0, + {Val, '<', RegOrTuple} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + State1 = mov_immediate(State0, Temp, Val), + Stream0 = State1#state.stream, + I1 = jit_armv6m_asm:cmp(Reg, Temp), + % le = less than or equal (branch when Val >= Reg, i.e., NOT Val < Reg) + CC = le, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream1}, + {State3, CC, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '<', RegB} @@ -1049,15 +1170,12 @@ if_block_cond( end, I1 = jit_armv6m_asm:cmp(Reg, RegB), % ge = greater than or equal - I2 = jit_armv6m_asm:bcc(ge, 0), - Code = << - I1/binary, - I2/binary - >>, - Stream1 = StreamModule:append(Stream0, Code), + CC = ge, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, ge, byte_size(I1)}; + {State2, CC, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '==', 0} ) -> @@ -1069,11 +1187,12 @@ if_block_cond( %% Compare register with 0 I1 = jit_armv6m_asm:cmp(Reg, 0), %% Branch if not equal - I2 = jit_armv6m_asm:bcc(ne, 0), - Stream1 = StreamModule:append(Stream0, <>), + CC = ne, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, ne, byte_size(I1)}; + {State2, CC, byte_size(I1)}; %% Delegate (int) forms to regular forms since we only have 32-bit words if_block_cond(State, {'(int)', RegOrTuple, '==', 0}) -> if_block_cond(State, {RegOrTuple, '==', 0}); @@ -1089,15 +1208,12 @@ if_block_cond( RegOrTuple -> RegOrTuple end, I1 = jit_armv6m_asm:cmp(Reg, Val), - I2 = jit_armv6m_asm:bcc(eq, 0), - Code = << - I1/binary, - I2/binary - >>, - Stream1 = StreamModule:append(Stream0, Code), + CC = eq, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, eq, byte_size(I1)}; + {State2, CC, byte_size(I1)}; if_block_cond(State, {'(int)', RegOrTuple, '!=', Val}) when is_integer(Val) -> if_block_cond(State, {RegOrTuple, '!=', Val}); if_block_cond( @@ -1110,28 +1226,25 @@ if_block_cond( RegOrTuple -> RegOrTuple end, I1 = jit_armv6m_asm:cmp(Reg, Val), - I2 = jit_armv6m_asm:bcc(ne, 0), - Code = << - I1/binary, - I2/binary - >>, - Stream1 = StreamModule:append(Stream0, Code), + CC = ne, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, ne, byte_size(I1)}; + {State2, CC, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {{free, RegA}, '==', {free, RegB}} ) -> % Compare two free registers: cmp RegA, RegB; beq I1 = jit_armv6m_asm:cmp(RegA, RegB), - Stream1 = StreamModule:append(Stream0, I1), - I2 = jit_armv6m_asm:bcc(ne, 0), - Stream2 = StreamModule:append(Stream1, I2), - State1 = State0#state{stream = Stream2}, + CC = ne, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), + State1 = State0#state{stream = Stream1}, State2 = if_block_free_reg({free, RegA}, State1), State3 = if_block_free_reg({free, RegB}, State2), - {State3, ne, byte_size(I1)}; + {State3, CC, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, {RegOrTuple, '==', Val} @@ -1146,15 +1259,12 @@ if_block_cond( Stream1 = State1#state.stream, Offset1 = StreamModule:offset(Stream1), I1 = jit_armv6m_asm:cmp(Reg, Temp), - I2 = jit_armv6m_asm:bcc(ne, 0), - Code = << - I1/binary, - I2/binary - >>, - Stream2 = StreamModule:append(Stream1, Code), + CC = ne, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream2 = StreamModule:append(Stream1, <>), State2 = if_block_free_reg(RegOrTuple, State1), State3 = State2#state{stream = Stream2}, - {State3, ne, Offset1 - Offset0 + byte_size(I1)}; + {State3, CC, Offset1 - Offset0 + byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, {RegOrTuple, '!=', Val} @@ -1169,15 +1279,12 @@ if_block_cond( Stream1 = State1#state.stream, Offset1 = StreamModule:offset(Stream1), I1 = jit_armv6m_asm:cmp(Reg, Temp), - I2 = jit_armv6m_asm:bcc(eq, 0), - Code = << - I1/binary, - I2/binary - >>, - Stream2 = StreamModule:append(Stream1, Code), + CC = eq, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream2 = StreamModule:append(Stream1, <>), State2 = if_block_free_reg(RegOrTuple, State1), State3 = State2#state{stream = Stream2}, - {State3, eq, Offset1 - Offset0 + byte_size(I1)}; + {State3, CC, Offset1 - Offset0 + byte_size(I1)}; if_block_cond( #state{ stream_module = StreamModule, @@ -1194,12 +1301,12 @@ if_block_cond( % Test bit 0: shift bit 0 to MSB and branch if positive (bit was 0/false) I1 = jit_armv6m_asm:lsls(Temp, Reg, 31), % branch if negative (bit was 1/true) - I2 = jit_armv6m_asm:bcc(mi, 0), - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), + CC = mi, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, mi, byte_size(I1)}; + {State2, CC, byte_size(I1)}; if_block_cond( #state{ stream_module = StreamModule, @@ -1216,12 +1323,12 @@ if_block_cond( % Test bit 0: shift bit 0 to MSB and branch if negative (bit was 1/true) I1 = jit_armv6m_asm:lsls(Temp, Reg, 31), % branch if positive (bit was 0/false) - I2 = jit_armv6m_asm:bcc(pl, 0), - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), + CC = pl, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, pl, byte_size(I1)}; + {State2, CC, byte_size(I1)}; if_block_cond( #state{ stream_module = StreamModule, @@ -1250,8 +1357,8 @@ if_block_cond( TestCode1 = jit_armv6m_asm:tst(Reg, Temp), {<>, eq} end, - I2 = jit_armv6m_asm:bcc(BranchCond, 0), - Code = <>, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(BranchCond, 0)) =:= 2), + Code = <>, Stream1 = StreamModule:append(Stream0, Code), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, @@ -1268,10 +1375,11 @@ if_block_cond( I1 = jit_armv6m_asm:mvns(Temp, Reg), % 32 - 4 I2 = jit_armv6m_asm:lsls(Temp, Temp, 28), - I3 = jit_armv6m_asm:bcc(eq, 0), - Stream1 = StreamModule:append(Stream0, <>), + CC = eq, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), State1 = State0#state{stream = Stream1}, - {State1, eq, byte_size(I1) + byte_size(I2)}; + {State1, CC, byte_size(I1) + byte_size(I2)}; if_block_cond( #state{ stream_module = StreamModule, @@ -1283,11 +1391,12 @@ if_block_cond( I1 = jit_armv6m_asm:mvns(Reg, Reg), % 32 - 4 I2 = jit_armv6m_asm:lsls(Reg, Reg, 28), - I3 = jit_armv6m_asm:bcc(eq, 0), - Stream1 = StreamModule:append(Stream0, <>), + CC = eq, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), State1 = State0#state{stream = Stream1}, State2 = if_block_free_reg(RegTuple, State1), - {State2, eq, byte_size(I1) + byte_size(I2)}; + {State2, CC, byte_size(I1) + byte_size(I2)}; if_block_cond( #state{ stream_module = StreamModule, @@ -1301,16 +1410,17 @@ if_block_cond( I1 = jit_armv6m_asm:mov(Temp, Reg), Stream1 = StreamModule:append(Stream0, I1), State1 = State0#state{stream = Stream1}, - State2 = and_(State1#state{available_regs = AT}, Temp, Mask), + {State2, Temp} = and_(State1#state{available_regs = AT}, {free, Temp}, Mask), Stream2 = State2#state.stream, % Compare with value I2 = jit_armv6m_asm:cmp(Temp, Val), Stream3 = StreamModule:append(Stream2, I2), OffsetAfter = StreamModule:offset(Stream3), - I3 = jit_armv6m_asm:bcc(eq, 0), - Stream4 = StreamModule:append(Stream3, I3), + CC = eq, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream4 = StreamModule:append(Stream3, <<16#FFFF:16>>), State3 = State2#state{stream = Stream4, available_regs = [Temp | State2#state.available_regs]}, - {State3, eq, OffsetAfter - OffsetBefore}; + {State3, CC, OffsetAfter - OffsetBefore}; if_block_cond( #state{ stream_module = StreamModule, @@ -1320,17 +1430,18 @@ if_block_cond( ) when ?IS_GPR(Reg) -> % AND with mask OffsetBefore = StreamModule:offset(Stream0), - State1 = and_(State0, Reg, Mask), + {State1, Reg} = and_(State0, RegTuple, Mask), Stream1 = State1#state.stream, % Compare with value I2 = jit_armv6m_asm:cmp(Reg, Val), Stream2 = StreamModule:append(Stream1, I2), OffsetAfter = StreamModule:offset(Stream2), - I3 = jit_armv6m_asm:bcc(eq, 0), - Stream3 = StreamModule:append(Stream2, I3), + CC = eq, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream3 = StreamModule:append(Stream2, <<16#FFFF:16>>), State3 = State1#state{stream = Stream3}, State4 = if_block_free_reg(RegTuple, State3), - {State4, eq, OffsetAfter - OffsetBefore}. + {State4, CC, OffsetAfter - OffsetBefore}. -spec if_block_free_reg(armv6m_register() | {free, armv6m_register()}, state()) -> state(). if_block_free_reg({free, Reg}, State0) -> @@ -1727,7 +1838,7 @@ set_registers_args( UsedRegs, Args ), - State0#state{ + State1#state{ stream = Stream1, available_regs = ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs, used_regs = ParamRegs ++ (NewUsedRegs -- ParamRegs) @@ -1785,7 +1896,7 @@ set_registers_args0( set_registers_args0( State, [Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset ) -> - false = lists:member(?CTX_REG, ArgsRegs), + ?ASSERT(not lists:member(?CTX_REG, ArgsRegs)), State1 = set_registers_args1(State, Arg, ?CTX_REG, StackOffset), set_registers_args0(State1, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); set_registers_args0( @@ -1990,33 +2101,95 @@ move_array_element( Reg, Index, {x_reg, X} -) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) -> +) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) andalso Index * 4 =< 124 -> I1 = jit_armv6m_asm:ldr(Temp, {Reg, Index * 4}), I2 = jit_armv6m_asm:str(Temp, ?X_REG(X)), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, available_regs = [Temp1, Temp2 | _]} = + State, + Reg, + Index, + {x_reg, X} +) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) -> + % For large offsets, use max offset (124) in ldr + remainder in temp register + Offset = Index * 4, + LdrOffset = 124, + Remainder = Offset - LdrOffset, + % Load offset remainder into temp register and add to base + State1 = mov_immediate(State, Temp1, Remainder), + Stream1 = State1#state.stream, + % add Temp1, Reg (Temp1 = Temp1 + Reg) + I1 = jit_armv6m_asm:add(Temp1, Reg), + % ldr Temp2, [Temp1, #124] + I2 = jit_armv6m_asm:ldr(Temp2, {Temp1, LdrOffset}), + % str Temp2, [r0, #X*4] + I3 = jit_armv6m_asm:str(Temp2, ?X_REG(X)), + Stream2 = StreamModule:append(Stream1, <>), + State1#state{stream = Stream2}; move_array_element( #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, Reg, Index, {ptr, Dest} -) when is_atom(Reg) andalso is_integer(Index) -> +) when is_atom(Reg) andalso is_integer(Index) andalso Index * 4 =< 124 -> I1 = jit_armv6m_asm:ldr(Temp, {Reg, Index * 4}), I2 = jit_armv6m_asm:str(Temp, {Dest, 0}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, available_regs = [Temp | _]} = + State, + Reg, + Index, + {ptr, Dest} +) when is_atom(Reg) andalso is_integer(Index) -> + % For large offsets, use max offset (124) in ldr + remainder in temp register + Offset = Index * 4, + LdrOffset = 124, + Remainder = Offset - LdrOffset, + % Load offset remainder into temp register and add to base + State1 = mov_immediate(State, Temp, Remainder), + Stream1 = State1#state.stream, + I1 = jit_armv6m_asm:add(Temp, Reg), + I2 = jit_armv6m_asm:ldr(Temp, {Temp, LdrOffset}), + I3 = jit_armv6m_asm:str(Temp, {Dest, 0}), + Stream2 = StreamModule:append(Stream1, <>), + State1#state{stream = Stream2}; move_array_element( #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | AT]} = State, Reg, Index, {y_reg, Y} -) when is_atom(Reg) andalso is_integer(Index) -> +) when is_atom(Reg) andalso is_integer(Index) andalso Index * 4 =< 124 -> I1 = jit_armv6m_asm:ldr(Temp2, {Reg, Index * 4}), YCode = str_y_reg(Temp2, Y, Temp1, AT), Code = <>, Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}; +move_array_element( + #state{ + stream_module = StreamModule, available_regs = [Temp1, Temp2 | AT] + } = + State, + Reg, + Index, + {y_reg, Y} +) when is_atom(Reg) andalso is_integer(Index) -> + % For large offsets, use max offset (124) in ldr + remainder in temp register + Offset = Index * 4, + LdrOffset = 124, + Remainder = Offset - LdrOffset, + State1 = mov_immediate(State, Temp2, Remainder), + Stream1 = State1#state.stream, + I1 = jit_armv6m_asm:add(Temp2, Reg), + I2 = jit_armv6m_asm:ldr(Temp2, {Temp2, LdrOffset}), + YCode = str_y_reg(Temp2, Y, Temp1, AT), + Code = <>, + Stream2 = StreamModule:append(Stream1, Code), + State1#state{stream = Stream2}; move_array_element( #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | AT]} = State, @@ -2116,10 +2289,32 @@ get_array_element( } = State, {free, Reg}, Index -) -> +) when Index * 4 =< 124 -> I1 = jit_armv6m_asm:ldr(Reg, {Reg, Index * 4}), Stream1 = StreamModule:append(Stream0, <>), {State#state{stream = Stream1}, Reg}; +get_array_element( + #state{ + stream_module = StreamModule, + available_regs = [Temp | _] + } = State, + {free, Reg}, + Index +) -> + % For large offsets, split into ldr immediate (max 124) + remainder in temp register + Offset = Index * 4, + LdrOffset = (Offset div 4) * 4, + LdrOffset1 = min(LdrOffset, 124), + Remainder = Offset - LdrOffset1, + % Load offset remainder into temp register and add to Reg + State1 = mov_immediate(State, Temp, Remainder), + Stream1 = State1#state.stream, + % add Reg, Temp (Reg = Reg + Temp) + I1 = jit_armv6m_asm:add(Reg, Temp), + % ldr Reg, [Reg, #LdrOffset1] + I2 = jit_armv6m_asm:ldr(Reg, {Reg, LdrOffset1}), + Stream2 = StreamModule:append(Stream1, <>), + {State1#state{stream = Stream2}, Reg}; get_array_element( #state{ stream_module = StreamModule, @@ -2129,7 +2324,7 @@ get_array_element( } = State, Reg, Index -) -> +) when Index * 4 =< 124 -> I1 = jit_armv6m_asm:ldr(ElemReg, {Reg, Index * 4}), Stream1 = StreamModule:append(Stream0, <>), { @@ -2137,6 +2332,32 @@ get_array_element( stream = Stream1, available_regs = AvailableT, used_regs = [ElemReg | UsedRegs0] }, ElemReg + }; +get_array_element( + #state{ + stream_module = StreamModule, + available_regs = [ElemReg, Temp | AvailableT], + used_regs = UsedRegs0 + } = State, + Reg, + Index +) -> + % For large offsets, split into ldr immediate (max 124) + remainder in temp register + Offset = Index * 4, + Remainder = Offset - 124, + % Load offset remainder into temp register + State1 = mov_immediate(State, Temp, Remainder), + Stream1 = State1#state.stream, + I1 = jit_armv6m_asm:add(Temp, Reg), + I2 = jit_armv6m_asm:ldr(ElemReg, {Temp, 124}), + Stream2 = StreamModule:append(Stream1, <>), + { + State1#state{ + stream = Stream2, + available_regs = [Temp | AvailableT], + used_regs = [ElemReg | UsedRegs0] + }, + ElemReg }. %% @doc move an integer, a vm or native register to reg[x] @@ -2148,10 +2369,26 @@ move_to_array_element( ValueReg, Reg, Index -) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) -> +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) andalso Index < 32 -> I1 = jit_armv6m_asm:str(ValueReg, {Reg, Index * 4}), Stream1 = StreamModule:append(Stream0, I1), State0#state{stream = Stream1}; +move_to_array_element( + #state{stream_module = StreamModule, available_regs = [Temp | _]} = State0, + ValueReg, + Reg, + Index +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) -> + % For large offsets, split into str immediate (max 124) + remainder in temp register + Offset = Index * 4, + Remainder = Offset - 124, + % Load offset remainder into temp register + State1 = mov_immediate(State0, Temp, Remainder), + Stream1 = State1#state.stream, + I1 = jit_armv6m_asm:add(Temp, Reg), + I2 = jit_armv6m_asm:str(ValueReg, {Temp, 124}), + Stream2 = StreamModule:append(Stream1, <>), + State1#state{stream = Stream2}; move_to_array_element( #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, ValueReg, @@ -2168,7 +2405,7 @@ move_to_array_element( Value, Reg, Index -) -> +) when not ?IS_GPR(Value) andalso ?IS_GPR(Reg) -> {State1, Temp} = copy_to_native_register(State0, Value), State2 = move_to_array_element(State1, Temp, Reg, Index), free_native_register(State2, Temp). @@ -2450,7 +2687,8 @@ set_continuation_to_offset( ) -> OffsetRef = make_ref(), Offset = StreamModule:offset(Stream0), - I1 = jit_armv6m_asm:adr(Temp, 4), + ?ASSERT(byte_size(jit_armv6m_asm:adr(Temp, 4)) =:= 2), + I1 = <<16#FFFF:16>>, Reloc = {OffsetRef, Offset, {adr, Temp}}, % Set thumb bit (LSB = 1) by adding 1 to the 4-byte aligned address I2 = jit_armv6m_asm:adds(Temp, Temp, 1), @@ -2508,34 +2746,34 @@ get_module_index( %% JIT currentl calls this with two values: ?TERM_PRIMARY_CLEAR_MASK (-4) to %% clear bits and ?TERM_BOXED_TAG_MASK (0x3F). We can avoid any literal pool %% by using BICS for -4. -and_(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, 16#FFFFFF) -> +and_(#state{stream_module = StreamModule, stream = Stream0} = State0, {free, Reg}, 16#FFFFFF) -> I1 = jit_armv6m_asm:lsls(Reg, Reg, 8), I2 = jit_armv6m_asm:lsrs(Reg, Reg, 8), Stream1 = StreamModule:append(Stream0, <>), - State0#state{stream = Stream1}; + {State0#state{stream = Stream1}, Reg}; and_( #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, - Reg, + {free, Reg}, Val ) when Val < 0 andalso Val >= -256 -> State1 = mov_immediate(State0#state{available_regs = AT}, Temp, bnot (Val)), Stream1 = State1#state.stream, I = jit_armv6m_asm:bics(Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - State1#state{available_regs = [Temp | AT], stream = Stream2}; + {State1#state{available_regs = [Temp | AT], stream = Stream2}, Reg}; and_( #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, - Reg, + {free, Reg}, Val ) -> State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), Stream1 = State1#state.stream, I = jit_armv6m_asm:ands(Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - State1#state{available_regs = [Temp | AT], stream = Stream2}; + {State1#state{available_regs = [Temp | AT], stream = Stream2}, Reg}; and_( #state{stream_module = StreamModule, available_regs = []} = State0, - Reg, + {free, Reg}, Val ) when Val < 0 andalso Val >= -256 -> % No available registers, use r0 as temp and save it to r12 @@ -2552,10 +2790,10 @@ and_( % Restore r0 from r12 Restore = jit_armv6m_asm:mov(r0, ?IP_REG), Stream4 = StreamModule:append(Stream3, Restore), - State0#state{stream = Stream4}; + {State0#state{stream = Stream4}, Reg}; and_( #state{stream_module = StreamModule, available_regs = []} = State0, - Reg, + {free, Reg}, Val ) -> % No available registers, use r0 as temp and save it to r12 @@ -2572,7 +2810,17 @@ and_( % Restore r0 from r12 Restore = jit_armv6m_asm:mov(r0, ?IP_REG), Stream4 = StreamModule:append(Stream3, Restore), - State0#state{stream = Stream4}. + {State0#state{stream = Stream4}, Reg}; +and_( + #state{stream_module = StreamModule, available_regs = [ResultReg | AT], used_regs = UR} = + State0, + Reg, + ?TERM_PRIMARY_CLEAR_MASK +) -> + I1 = jit_armv6m_asm:lsrs(ResultReg, Reg, 2), + I2 = jit_armv6m_asm:lsls(ResultReg, ResultReg, 2), + Stream1 = StreamModule:append(State0#state.stream, <>), + {State0#state{stream = Stream1, available_regs = AT, used_regs = [ResultReg | UR]}, ResultReg}. or_( #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, @@ -2611,41 +2859,42 @@ mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Re I2 = jit_armv6m_asm:negs(Reg, Reg), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; -mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> - %% Use a literal pool with a branch instruction (branch-over pattern) - %% Calculate where literal will be placed (must be word-aligned) - %% After LDR (2 bytes) + Branch (2 bytes) = 4 bytes from current position - CurrentOffset = StreamModule:offset(Stream0), - OffsetAfterInstructions = CurrentOffset + 4, - %% Find next word-aligned position for literal - LiteralPosition = - case OffsetAfterInstructions rem 4 of - % Already aligned - 0 -> OffsetAfterInstructions; - % Add 2 bytes padding to align - _ -> OffsetAfterInstructions + 2 +mov_immediate( + #state{stream_module = StreamModule, stream = Stream0, literal_pool = LP} = State, Reg, Val +) -> + LdrInstructionAddr = StreamModule:offset(Stream0), + ?ASSERT(byte_size(jit_armv6m_asm:ldr(Reg, {pc, 0})) =:= 2), + Stream1 = StreamModule:append(Stream0, <<16#FFFF:16>>), + State#state{stream = Stream1, literal_pool = [{LdrInstructionAddr, Reg, Val} | LP]}. + +flush_literal_pool(#state{literal_pool = []} = State) -> + State; +flush_literal_pool( + #state{stream_module = StreamModule, stream = Stream0, literal_pool = LP} = State +) -> + % Align + Offset = StreamModule:offset(Stream0), + Stream1 = + if + Offset rem 4 =:= 0 -> Stream0; + true -> StreamModule:append(Stream0, <<0:16>>) end, - PaddingNeeded = LiteralPosition - OffsetAfterInstructions, - - %% Calculate LDR PC-relative offset - %% PC = (current_instruction_address & ~3) + 4 - LdrInstructionAddr = CurrentOffset, - LdrPC = (LdrInstructionAddr band (bnot 3)) + 4, - LiteralOffset = LiteralPosition - LdrPC, - - %% Generate: ldr rTemp, [pc, #LiteralOffset] ; Load from literal - I1 = jit_armv6m_asm:ldr(Reg, {pc, LiteralOffset}), - %% Calculate branch offset - %% Branch is at CurrentOffset + 2, need to jump past literal - BranchPosition = CurrentOffset + 2, - % After the 4-byte literal - TargetPosition = LiteralPosition + 4, - BranchOffset = TargetPosition - BranchPosition, - I2 = jit_armv6m_asm:b(BranchOffset), - %% Generate padding if needed (just zeros) - Padding = <<0:(PaddingNeeded * 8)>>, - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}. + % Lay all values and update ldr instructions + Stream2 = lists:foldl( + fun({LdrInstructionAddr, Reg, Val}, AccStream) -> + LiteralPosition = StreamModule:offset(AccStream), + LdrPC = (LdrInstructionAddr band (bnot 3)) + 4, + LiteralOffset = LiteralPosition - LdrPC, + LdrInstruction = jit_armv6m_asm:ldr(Reg, {pc, LiteralOffset}), + AccStream1 = StreamModule:append(AccStream, <>), + StreamModule:replace( + AccStream1, LdrInstructionAddr, LdrInstruction + ) + end, + Stream1, + lists:reverse(LP) + ), + State#state{stream = Stream2, literal_pool = []}. sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when (Val >= 0 andalso Val =< 255) orelse is_atom(Val) @@ -2754,10 +3003,12 @@ decrement_reductions_and_maybe_schedule_next( Stream1 = StreamModule:append(Stream0, <>), BNEOffset = StreamModule:offset(Stream1), % Branch if reduction count is not zero - I4 = jit_armv6m_asm:bcc(ne, 0), + ?ASSERT(byte_size(jit_armv6m_asm:bcc(ne, 0)) =:= 2), + I4 = <<16#FFFF:16>>, % Set continuation to the next instruction ADROffset = BNEOffset + byte_size(I4), - I5 = jit_armv6m_asm:adr(Temp, 4), + ?ASSERT(byte_size(jit_armv6m_asm:adr(Temp, 4) =:= 2)), + I5 = <<16#FFFF:16>>, I6 = jit_armv6m_asm:adds(Temp, Temp, 1), I7 = jit_armv6m_asm:str(Temp, ?JITSTATE_CONTINUATION(TempJitState)), % Append the instructions to the stream @@ -2890,8 +3141,8 @@ set_cp(State0) -> Offset = StreamModule:offset(Stream0), % build cp with module_index << 24 I1 = jit_armv6m_asm:lsls(Reg, Reg, 24), - % Emit a single nop as placeholder for offset load instruction - I2 = jit_armv6m_asm:nop(), + % Placeholder for offset load instruction + I2 = <<16#FFFF:16>>, MOVOffset = Offset + byte_size(I1), % OR the module index with the offset (loaded in temp register) I3 = jit_armv6m_asm:orrs(Reg, TempReg), @@ -3124,5 +3375,47 @@ add_label(#state{stream_module = StreamModule, stream = Stream0} = State0, Label %% @return Updated backend state %%----------------------------------------------------------------------------- -spec add_label(state(), integer() | reference(), integer()) -> state(). +add_label( + #state{ + stream_module = StreamModule, + stream = Stream0, + jump_table_start = JumpTableStart, + branches = Branches, + labels = Labels + } = State, + Label, + LabelOffset +) when is_integer(Label) -> + % Patch the jump table entry immediately + % Each jump table entry is 12 bytes: + % - ldr r3, [pc, 4] (2 bytes) at offset 0 + % - push {...} (2 bytes) at offset 2 + % - add pc, r3 (2 bytes) at offset 4 + % - nop (2 bytes) at offset 6 + % - data (4 bytes) at offset 8 + JumpTableEntryStart = JumpTableStart + Label * 12, + DataOffset = JumpTableEntryStart + 8, + AddInstrOffset = JumpTableEntryStart + 4, + + % Calculate offset from 'add pc, pc, r3' instruction + 4 to target label + % PC when add instruction executes + AddPC = AddInstrOffset + 4, + RelativeOffset = LabelOffset - AddPC, + DataBytes = <>, + + Stream1 = StreamModule:replace(Stream0, DataOffset, DataBytes), + + % Eagerly patch any branches targeting this label + {Stream2, RemainingBranches} = patch_branches_for_label( + StreamModule, + Stream1, + Label, + LabelOffset, + Branches + ), + + State#state{ + stream = Stream2, branches = RemainingBranches, labels = [{Label, LabelOffset} | Labels] + }; add_label(#state{labels = Labels} = State, Label, Offset) -> State#state{labels = [{Label, Offset} | Labels]}. diff --git a/libs/jit/src/jit_precompile.erl b/libs/jit/src/jit_precompile.erl index cd9646790d..5d91690498 100644 --- a/libs/jit/src/jit_precompile.erl +++ b/libs/jit/src/jit_precompile.erl @@ -19,7 +19,7 @@ % -module(jit_precompile). --export([start/0, compile/3, atom_resolver/1, type_resolver/1]). +-export([start/0, compile/3, atom_resolver/1, type_resolver/1, import_resolver/2]). -include_lib("jit.hrl"). @@ -84,6 +84,15 @@ compile(Target, Dir, Path) -> end, TypeResolver = type_resolver(TypesChunk), + ImportedFunctionsChunk = + case lists:keyfind("ImpT", 1, InitialChunks) of + {"ImpT", ImportedFunctionsChunk0} -> + ImportedFunctionsChunk0; + false -> + <<>> + end, + ImportedFunctionResolver = import_resolver(ImportedFunctionsChunk, AtomResolver), + % Parse target to extract arch and variant {BaseTarget, RequestedVariant} = parse_target(Target), Backend = list_to_atom("jit_" ++ BaseTarget), @@ -93,6 +102,7 @@ compile(Target, Dir, Path) -> "x86_64" -> ?JIT_ARCH_X86_64; "aarch64" -> ?JIT_ARCH_AARCH64; "armv6m" -> ?JIT_ARCH_ARMV6M; + "riscv32" -> ?JIT_ARCH_RISCV32; _ -> error({unsupported_target, Target}) end, @@ -106,7 +116,7 @@ compile(Target, Dir, Path) -> Stream2 = Backend:new(RequestedVariant, jit_stream_binary, Stream1), {LabelsCount, Stream3} = jit:compile( - CodeChunk, AtomResolver, LiteralResolver, TypeResolver, Backend, Stream2 + CodeChunk, AtomResolver, LiteralResolver, TypeResolver, ImportedFunctionResolver, Backend, Stream2 ), NativeCode = Backend:stream(Stream3), UpdatedChunks = FilteredChunks ++ [{"avmN", NativeCode}], @@ -174,6 +184,26 @@ parse_literals_chunk0(N, <>, Term = binary_to_term(TermBin), parse_literals_chunk0(N - 1, Rest, [Term | Acc]). +import_resolver(FunctionChunks, AtomResolver) -> + ImportedFunctions = parse_imported_functions_chunk(FunctionChunks, AtomResolver), + fun(Index) -> lists:nth(Index + 1, ImportedFunctions) end. + +%% @doc Parse imported functions chunk to extract {Module, Function, Arity} triplets +parse_imported_functions_chunk(<>, AtomResolver) -> + parse_imported_functions_chunk0(FunctionsCount, Rest, AtomResolver, []); +parse_imported_functions_chunk(<<>>, _AtomResolver) -> + []. + +parse_imported_functions_chunk0(0, <<>>, _AtomResolver, Acc) -> + lists:reverse(Acc); +parse_imported_functions_chunk0( + N, <>, AtomResolver, Acc +) -> + Module = AtomResolver(ModuleIndex), + Function = AtomResolver(FunctionIndex), + ImportedFunction = {Module, Function, Arity}, + parse_imported_functions_chunk0(N - 1, Rest, AtomResolver, [ImportedFunction | Acc]). + %% Version (from beam_types.hrl) -define(BEAM_TYPES_VERSION, 3). diff --git a/libs/jit/src/jit_riscv32.erl b/libs/jit/src/jit_riscv32.erl new file mode 100644 index 0000000000..52a04c2a00 --- /dev/null +++ b/libs/jit/src/jit_riscv32.erl @@ -0,0 +1,3208 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_riscv32). + +-export([ + word_size/0, + new/3, + stream/1, + offset/1, + flush/1, + debugger/1, + used_regs/1, + available_regs/1, + free_native_registers/2, + assert_all_native_free/1, + jump_table/2, + update_branches/1, + call_primitive/3, + call_primitive_last/3, + call_primitive_with_cp/3, + return_if_not_equal_to_ctx/2, + jump_to_label/2, + jump_to_continuation/2, + jump_to_offset/2, + if_block/3, + if_else_block/4, + shift_right/3, + shift_left/3, + move_to_vm_register/3, + move_to_native_register/2, + move_to_native_register/3, + move_to_cp/2, + move_array_element/4, + move_to_array_element/4, + move_to_array_element/5, + set_bs/2, + copy_to_native_register/2, + get_array_element/3, + increment_sp/2, + set_continuation_to_label/2, + set_continuation_to_offset/1, + continuation_entry_point/1, + get_module_index/1, + and_/3, + or_/3, + add/3, + sub/3, + mul/3, + decrement_reductions_and_maybe_schedule_next/1, + call_or_schedule_next/2, + call_only_or_schedule_next/2, + call_func_ptr/3, + return_labels_and_lines/2, + add_label/2, + add_label/3 +]). + +-ifdef(JIT_DWARF). +-export([ + dwarf_opcode/2, + dwarf_label/2, + dwarf_function/3, + dwarf_line/2 +]). +-endif. + +-compile([warnings_as_errors]). + +-include_lib("jit.hrl"). + +-include("primitives.hrl"). +-include("term.hrl"). + +-define(ASSERT(Expr), true = Expr). + +%% RISC-V32 ILP32 ABI: a0-a7 are used for argument passing (8 registers). +%% a0-a1 are used for return values (a0 for 32-bit, a0-a1 for 64-bit returns). +%% s0-s11 are callee-saved registers (must be preserved across calls). +%% t0-t6 are caller-saved temporary registers. +%% sp is the stack pointer. +%% ra is the return address register. +%% zero (x0) is hardwired to constant 0. +%% This implementation uses RV32IMC (base + multiply/compressed extensions). +%% +%% See: RISC-V Calling Convention +%% https://riscv.org/wp-content/uploads/2024/12/riscv-calling.pdf +%% +%% Registers used by the JIT backend (RISC-V32): +%% - Argument/return: a0-a7 (up to 8 args in registers) +%% - Callee-saved: s0-s11 (must preserve) +%% - Temporaries: t0-t6 (caller-saved) +%% - Stack pointer: sp +%% - Return address: ra +%% - Zero register: zero (always 0) +%% - Available for JIT scratch: t0-t6 (7 temp registers) +%% +%% Note: RISC-V32 instructions are fixed 32-bit with uniform encoding, +%% allowing access to all 32 registers. +%% +%% For more details, refer to the RISC-V ILP32 Procedure Call Standard. + +-type riscv32_register() :: + a0 + | a1 + | a2 + | a3 + | a4 + | a5 + | a6 + | a7 + | t0 + | t1 + | t2 + | t3 + | t4 + | t5 + | t6 + | s0 + | s1 + | s2 + | s3 + | s4 + | s5 + | s6 + | s7 + | s8 + | s9 + | s10 + | s11 + | sp + | ra. + +-define(IS_GPR(Reg), + (Reg =:= a0 orelse Reg =:= a1 orelse Reg =:= a2 orelse Reg =:= a3 orelse Reg =:= a4 orelse + Reg =:= a5 orelse Reg =:= a6 orelse Reg =:= a7 orelse Reg =:= t0 orelse Reg =:= t1 orelse + Reg =:= t2 orelse Reg =:= t3 orelse Reg =:= t4 orelse Reg =:= t5 orelse Reg =:= t6 orelse + Reg =:= s0 orelse Reg =:= s1 orelse Reg =:= s2 orelse Reg =:= s3 orelse Reg =:= s4 orelse + Reg =:= s5 orelse Reg =:= s6 orelse Reg =:= s7 orelse Reg =:= s8 orelse Reg =:= s9 orelse + Reg =:= s10 orelse Reg =:= s11 orelse Reg =:= sp orelse Reg =:= ra) +). + +-type stream() :: any(). + +-record(state, { + stream_module :: module(), + stream :: stream(), + offset :: non_neg_integer(), + branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], + jump_table_start :: non_neg_integer(), + available_regs :: [riscv32_register()], + used_regs :: [riscv32_register()], + labels :: [{integer() | reference(), integer()}], + variant :: non_neg_integer() +}). + +-type state() :: #state{}. +-type immediate() :: non_neg_integer(). +-type vm_register() :: + {x_reg, non_neg_integer()} | {y_reg, non_neg_integer()} | {ptr, riscv32_register()}. +-type value() :: immediate() | vm_register() | riscv32_register() | {ptr, riscv32_register()}. +-type arg() :: ctx | jit_state | offset | value() | {free, value()} | {avm_int64_t, integer()}. + +-type maybe_free_riscv32_register() :: + {free, riscv32_register()} | riscv32_register(). + +-type condition() :: + {riscv32_register(), '<', integer()} + | {maybe_free_riscv32_register(), '<', riscv32_register()} + | {integer(), '<', maybe_free_riscv32_register()} + | {maybe_free_riscv32_register(), '==', integer()} + | {maybe_free_riscv32_register(), '!=', riscv32_register() | integer()} + | {'(int)', maybe_free_riscv32_register(), '==', integer()} + | {'(int)', maybe_free_riscv32_register(), '!=', riscv32_register() | integer()} + | {'(bool)', maybe_free_riscv32_register(), '==', false} + | {'(bool)', maybe_free_riscv32_register(), '!=', false} + | {maybe_free_riscv32_register(), '&', non_neg_integer(), '!=', integer()} + | {{free, riscv32_register()}, '==', {free, riscv32_register()}}. + +% Context offsets (32-bit architecture) +% ctx->e is 0x14 +% ctx->x is 0x18 +-define(CTX_REG, a0). +-define(NATIVE_INTERFACE_REG, a2). +-define(Y_REGS, {?CTX_REG, 16#14}). +-define(X_REG(N), {?CTX_REG, 16#18 + (N * 4)}). +-define(CP, {?CTX_REG, 16#5C}). +-define(FP_REGS, {?CTX_REG, 16#60}). +-define(BS, {?CTX_REG, 16#64}). +-define(BS_OFFSET, {?CTX_REG, 16#68}). +% JITSTATE is in a1 register (no prolog, following aarch64 model) +-define(JITSTATE_REG, a1). +% Return address register (like LR in AArch64) +-define(RA_REG, ra). +-define(JITSTATE_MODULE_OFFSET, 0). +-define(JITSTATE_CONTINUATION_OFFSET, 16#4). +-define(JITSTATE_REDUCTIONCOUNT_OFFSET, 16#8). +-define(PRIMITIVE(N), {?NATIVE_INTERFACE_REG, N * 4}). +-define(MODULE_INDEX(ModuleReg), {ModuleReg, 0}). + +-define(JUMP_TABLE_ENTRY_SIZE, 8). + +%% RISC-V32 register mappings + +%% Use t3 as temporary for some operations +-define(IP_REG, t3). + +-define(IS_SINT8_T(X), is_integer(X) andalso X >= -128 andalso X =< 127). +-define(IS_SINT32_T(X), is_integer(X) andalso X >= -16#80000000 andalso X < 16#80000000). +-define(IS_UINT8_T(X), is_integer(X) andalso X >= 0 andalso X =< 255). +-define(IS_UINT32_T(X), is_integer(X) andalso X >= 0 andalso X < 16#100000000). +-define(IS_SIGNED_OR_UNSIGNED_INT32_T(X), + is_integer(X) andalso X >= -16#80000000 andalso X < 16#100000000 +). + +%% RISC-V32 ILP32 ABI register allocation: +%% - a0: context pointer (reserved, passed as first parameter) +%% - a1-a5: available for parameters to native functions (up to 6 params) +%% - a2: native interface pointer (reserved) +%% - t0-t6: temporaries, caller-saved, available for JIT use +%% - s0-s11: callee-saved (would need to be saved/restored) +-define(AVAILABLE_REGS, [t6, t5, t4, t3, t2, t1, t0]). +-define(PARAMETER_REGS, [a0, a1, a2, a3, a4, a5, a6, a7]). +-define(SCRATCH_REGS, [t6, t5, t4, t2, t1, t0]). + +%%----------------------------------------------------------------------------- +%% @doc Return the word size in bytes, i.e. the sizeof(term) i.e. +%% sizeof(uintptr_t) +%% +%% C code equivalent is: +%% #if UINTPTR_MAX == UINT32_MAX +%% #define TERM_BYTES 4 +%% #elif UINTPTR_MAX == UINT64_MAX +%% #define TERM_BYTES 8 +%% #else +%% #error "Term size must be either 32 bit or 64 bit." +%% #endif +%% +%% @end +%% @return Word size in bytes +%%----------------------------------------------------------------------------- +-spec word_size() -> 4 | 8. +word_size() -> 4. + +%%----------------------------------------------------------------------------- +%% @doc Create a new backend state for provided variant, module and stream. +%% @end +%% @param Variant JIT variant to use (currently ?JIT_VARIANT_PIC) +%% @param StreamModule module to stream instructions +%% @param Stream stream state +%% @return New backend state +%%----------------------------------------------------------------------------- +-spec new(any(), module(), stream()) -> state(). +new(Variant, StreamModule, Stream) -> + #state{ + stream_module = StreamModule, + stream = Stream, + branches = [], + jump_table_start = 0, + offset = StreamModule:offset(Stream), + available_regs = ?AVAILABLE_REGS, + used_regs = [], + labels = [], + variant = Variant + }. + +%%----------------------------------------------------------------------------- +%% @doc Access the stream object. +%% @end +%% @param State current backend state +%% @return The stream object +%%----------------------------------------------------------------------------- +-spec stream(state()) -> stream(). +stream(#state{stream = Stream}) -> + Stream. + +%%----------------------------------------------------------------------------- +%% @doc Get the current offset in the stream +%% @end +%% @param State current backend state +%% @return The current offset +%%----------------------------------------------------------------------------- +-spec offset(state()) -> non_neg_integer(). +offset(#state{stream_module = StreamModule, stream = Stream}) -> + StreamModule:offset(Stream). + +%%----------------------------------------------------------------------------- +%% @doc Flush the stream. +%% @end +%% @param State current backend state +%% @return The new state +%%----------------------------------------------------------------------------- +-spec flush(state()) -> stream(). +flush(#state{stream_module = StreamModule, stream = Stream0} = State) -> + Stream1 = StreamModule:flush(Stream0), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a debugger of breakpoint instruction. This is used for debugging +%% and not in production. +%% @end +%% @param State current backend state +%% @return The updated backend state +%%----------------------------------------------------------------------------- +-spec debugger(state()) -> state(). +debugger(#state{stream_module = StreamModule, stream = Stream0} = State) -> + Stream1 = StreamModule:append(Stream0, jit_riscv32_asm:c_ebreak()), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Return the list of currently used native registers. This is used for +%% debugging and not in production. +%% @end +%% @param State current backend state +%% @return The list of used registers +%%----------------------------------------------------------------------------- +-spec used_regs(state()) -> [riscv32_register()]. +used_regs(#state{used_regs = Used}) -> Used. + +%%----------------------------------------------------------------------------- +%% @doc Return the list of currently available native scratch registers. This +%% is used for debugging and not in production. +%% @end +%% @param State current backend state +%% @return The list of available registers +%%----------------------------------------------------------------------------- +-spec available_regs(state()) -> [riscv32_register()]. +available_regs(#state{available_regs = Available}) -> Available. + +%%----------------------------------------------------------------------------- +%% @doc Free native registers. The passed list of registers can contain +%% registers, pointer to registers or other values that are ignored. +%% @end +%% @param State current backend state +%% @param Regs list of registers or other values +%% @return The updated backend state +%%----------------------------------------------------------------------------- +-spec free_native_registers(state(), [value()]) -> state(). +free_native_registers(State, []) -> + State; +free_native_registers(State, [Reg | Rest]) -> + State1 = free_native_register(State, Reg), + free_native_registers(State1, Rest). + +-spec free_native_register(state(), value()) -> state(). +free_native_register( + #state{available_regs = Available0, used_regs = Used0} = State, + Reg +) when + is_atom(Reg) +-> + {Available1, Used1} = free_reg(Available0, Used0, Reg), + State#state{available_regs = Available1, used_regs = Used1}; +free_native_register(State, {ptr, Reg}) -> + free_native_register(State, Reg); +free_native_register(State, _Other) -> + State. + +%%----------------------------------------------------------------------------- +%% @doc Assert that all native scratch registers are available. This is used +%% for debugging and not in production. +%% @end +%% @param State current backend state +%% @return ok +%%----------------------------------------------------------------------------- +-spec assert_all_native_free(state()) -> ok. +assert_all_native_free(#state{ + available_regs = ?AVAILABLE_REGS, used_regs = [] +}) -> + ok. + +%%----------------------------------------------------------------------------- +%% @doc Emit the jump table at the beginning of the module. Branches will be +%% updated afterwards with update_branches/2. Emit branches for labels from +%% 0 (special entry for lines and labels information) to LabelsCount included +%% (special entry for OP_INT_CALL_END). +%% +%% On this platform, each jump table entry is 12 bytes. +%% ``` +%% ldr a3, pc+4 +%% push {a1, r4, r5, r6, r7, lr} +%% add pc, pc, a3 +%% nop() +%% offset_to_label0 +%% ``` +%% +%% @end +%% @param State current backend state +%% @param LabelsCount number of labels in the module. +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec jump_table(state(), pos_integer()) -> state(). +jump_table(#state{stream_module = StreamModule, stream = Stream0} = State, LabelsCount) -> + JumpTableStart = StreamModule:offset(Stream0), + jump_table0(State#state{jump_table_start = JumpTableStart}, 0, LabelsCount). + +jump_table0(State, N, LabelsCount) when N > LabelsCount -> + State; +jump_table0( + #state{stream_module = StreamModule, stream = Stream0} = State, + N, + LabelsCount +) -> + % Create jump table entry: AUIPC + JALR (8 bytes total) + % This will be patched in add_label when the label offset is known + JumpEntry = <<16#FFFFFFFF:32, 16#FFFFFFFF:32>>, + Stream1 = StreamModule:append(Stream0, JumpEntry), + jump_table0(State#state{stream = Stream1}, N + 1, LabelsCount). + +%%----------------------------------------------------------------------------- +%% @doc Patch a single branch in the stream +%% @end +%% @param StreamModule stream module +%% @param Stream stream state +%% @param Offset offset of the branch to patch +%% @param Type type of the branch +%% @param LabelOffset target label offset +%% @return Updated stream +%%----------------------------------------------------------------------------- +-spec patch_branch(module(), stream(), non_neg_integer(), any(), non_neg_integer()) -> stream(). +patch_branch(StreamModule, Stream, Offset, Type, LabelOffset) -> + Rel = LabelOffset - Offset, + NewInstr = + case Type of + {adr, Reg} when Rel rem 4 =:= 0 -> + % Generate pc_relative_address and pad to 8 bytes with NOP + I = pc_relative_address(Reg, Rel), + case byte_size(I) of + 4 -> <>; + 6 -> <>; + 8 -> I + end; + {adr, Reg} when Rel rem 4 =:= 2; Rel rem 4 =:= -2 -> + % Handle 2-byte aligned offsets and pad to 8 bytes + % Handle both positive and negative offsets (Erlang rem can be negative) + I = pc_relative_address(Reg, Rel), + case byte_size(I) of + 4 -> <>; + 6 -> <>; + 8 -> I + end; + {far_branch, TempReg} -> + % Check if branch can now be optimized to near branch + if + Rel >= -1048576 andalso Rel =< 1048574 andalso (Rel rem 2) =:= 0 -> + % RISC-V jal has ±1MB range + % Optimize to near branch: jal + nops to fill original size + DirectBranch = jit_riscv32_asm:jal(zero, Rel), + case byte_size(DirectBranch) of + 2 -> + <>; + 4 -> + <> + end; + true -> + % Keep far branch sequence: auipc + jalr (PC-relative, 8 bytes) + % Split the relative offset into upper 20 bits and lower 12 bits + Hi20 = (Rel + 16#800) bsr 12, + Lo12 = Rel - (Hi20 bsl 12), + I1 = jit_riscv32_asm:auipc(TempReg, Hi20), + I2 = jit_riscv32_asm:jalr(zero, TempReg, Lo12), + Entry = <>, + case byte_size(Entry) of + 6 -> <>; + 8 -> Entry + end + end + end, + StreamModule:replace(Stream, Offset, NewInstr). + +%%----------------------------------------------------------------------------- +%% @doc Patch all branches targeting a specific label and return remaining branches +%% @end +%% @param StreamModule stream module +%% @param Stream stream state +%% @param TargetLabel label to patch branches for +%% @param LabelOffset offset of the target label +%% @param Branches list of pending branches +%% @return {UpdatedStream, RemainingBranches} +%%----------------------------------------------------------------------------- +-spec patch_branches_for_label( + module(), + stream(), + integer(), + non_neg_integer(), + [{integer(), non_neg_integer(), any()}] +) -> {stream(), [{integer(), non_neg_integer(), any()}]}. +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches, []). + +patch_branches_for_label(_StreamModule, Stream, _TargetLabel, _LabelOffset, [], Acc) -> + {Stream, lists:reverse(Acc)}; +patch_branches_for_label( + StreamModule, + Stream0, + TargetLabel, + LabelOffset, + [{Label, Offset, Type} | Rest], + Acc +) when Label =:= TargetLabel -> + Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset), + patch_branches_for_label(StreamModule, Stream1, TargetLabel, LabelOffset, Rest, Acc); +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, [Branch | Rest], Acc) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Rest, [Branch | Acc]). + +%%----------------------------------------------------------------------------- +%% @doc Rewrite stream to update all branches for labels. +%% @end +%% @param State current backend state +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec update_branches(state()) -> state(). +update_branches(#state{branches = []} = State) -> + State; +update_branches( + #state{ + stream_module = StreamModule, + stream = Stream0, + branches = [{Label, Offset, Type} | BranchesT], + labels = Labels + } = State +) -> + {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), + Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset), + update_branches(State#state{stream = Stream1, branches = BranchesT}). + +%%----------------------------------------------------------------------------- +%% @doc Generate code to load a primitive function pointer into a register +%% @param Primitive index to the primitive to call +%% @param TargetReg register to load the function pointer into +%% @return Binary instruction sequence +%%----------------------------------------------------------------------------- +-spec load_primitive_ptr(non_neg_integer(), riscv32_register()) -> binary(). +load_primitive_ptr(Primitive, TargetReg) -> + case Primitive of + 0 -> + jit_riscv32_asm:lw(TargetReg, ?NATIVE_INTERFACE_REG, 0); + N when N * 4 =< 124 -> + jit_riscv32_asm:lw(TargetReg, ?NATIVE_INTERFACE_REG, N * 4); + N when N * 4 < 256 -> + % Can encode N * 4 directly in li instruction + I1 = jit_riscv32_asm:li(TargetReg, N * 4), + I2 = jit_riscv32_asm:add(TargetReg, TargetReg, ?NATIVE_INTERFACE_REG), + I3 = jit_riscv32_asm:lw(TargetReg, TargetReg, 0), + <>; + N -> + % For very large primitive numbers, load N and shift left by 2 (multiply by 4) + I1 = jit_riscv32_asm:li(TargetReg, N), + I2 = jit_riscv32_asm:slli(TargetReg, TargetReg, 2), + I3 = jit_riscv32_asm:add(TargetReg, TargetReg, ?NATIVE_INTERFACE_REG), + I4 = jit_riscv32_asm:lw(TargetReg, TargetReg, 0), + <> + end. + +%%----------------------------------------------------------------------------- +%% @doc Emit a call (call with return) to a primitive with arguments. This +%% function converts arguments and pass them following the backend ABI +%% convention. It also saves scratch registers we need to preserve. +%% @end +%% @param State current backend state +%% @param Primitive index to the primitive to call +%% @param Args arguments to pass to the primitive +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec call_primitive(state(), non_neg_integer(), [arg()]) -> {state(), riscv32_register()}. +call_primitive( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [TempReg | RestRegs], + used_regs = UsedRegs + } = State, + Primitive, + Args +) -> + % Use a low register for LDR since ARM Thumb LDR only works with low registers + PrepCall = load_primitive_ptr(Primitive, TempReg), + Stream1 = StreamModule:append(Stream0, PrepCall), + StateCall = State#state{ + stream = Stream1, + available_regs = RestRegs, + used_regs = [TempReg | UsedRegs] + }, + call_func_ptr(StateCall, {free, TempReg}, Args); +call_primitive( + #state{available_regs = []} = State, + Primitive, + Args +) -> + call_func_ptr(State, {primitive, Primitive}, Args). + +%%----------------------------------------------------------------------------- +%% @doc Emit a jump (call without return) to a primitive with arguments. This +%% function converts arguments and pass them following the backend ABI +%% convention. +%% @end +%% @param State current backend state +%% @param Primitive index to the primitive to call +%% @param Args arguments to pass to the primitive +%% @return Updated backend state +%%----------------------------------------------------------------------------- +call_primitive_last( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + Primitive, + Args +) -> + % We need a register for the function pointer that should not be used as a parameter + % Since we're not returning, we can use all scratch registers except + % registers used for parameters + ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)), + ArgsRegs = args_regs(Args), + ScratchRegs = ?AVAILABLE_REGS -- ArgsRegs -- ParamRegs, + [Temp | AvailableRegs1] = ScratchRegs, + UsedRegs = ?AVAILABLE_REGS -- AvailableRegs1, + PrepCall = load_primitive_ptr(Primitive, Temp), + Stream1 = StreamModule:append(Stream0, PrepCall), + + State1 = State0#state{ + stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs + }, + + % Preprocess offset special arg + Args1 = lists:map( + fun(Arg) -> + case Arg of + offset -> StreamModule:offset(Stream1); + _ -> Arg + end + end, + Args + ), + + % In RISC-V, all up to 8 arguments fit in registers (a0-a7) + % Always use tail call when calling primitives in tail position + State4 = + case Args1 of + [FirstArg, jit_state | ArgsT] -> + % Use tail call + ArgsForTailCall = [FirstArg, jit_state_tail_call | ArgsT], + State2 = set_registers_args(State1, ArgsForTailCall, 0), + tail_call_with_jit_state_registers_only(State2, Temp) + end, + State4#state{available_regs = ?AVAILABLE_REGS, used_regs = []}. + +%%----------------------------------------------------------------------------- +%% @doc Tail call to address in register. +%% RA is preserved across regular calls (call_func_ptr saves/restores it), +%% so when the called C primitive returns, it returns to opcodesswitch.h. +%% @end +%% @param State current backend state +%% @param Reg register containing the target address +%% @return Updated backend state +%%----------------------------------------------------------------------------- +tail_call_with_jit_state_registers_only( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + Reg +) -> + % Jump to address in register (tail call) + I1 = jit_riscv32_asm:jr(Reg), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a return of a value if it's not equal to ctx. +%% This logic is used to break out to the scheduler, typically after signal +%% messages have been processed. +%% @end +%% @param State current backend state +%% @param Reg register to compare to (should be {free, Reg} as it's always freed) +%% @return Updated backend state +%%----------------------------------------------------------------------------- +return_if_not_equal_to_ctx( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + {free, Reg} +) -> + % RISC-V doesn't have a separate cmp instruction, use beq directly + I2 = + case Reg of + % Return value is already in a0 + a0 -> <<>>; + % Move to a0 (return register) + _ -> jit_riscv32_asm:mv(a0, Reg) + end, + I3 = jit_riscv32_asm:ret(), + % Branch if equal (skip the return) + % Offset must account for the beq instruction itself (4 bytes) plus I2 and I3 + I1 = jit_riscv32_asm:beq(Reg, ?CTX_REG, 4 + byte_size(I2) + byte_size(I3)), + Stream1 = StreamModule:append(Stream0, <>), + {AvailableRegs1, UsedRegs1} = free_reg( + AvailableRegs0, UsedRegs0, Reg + ), + State#state{ + stream = Stream1, + available_regs = AvailableRegs1, + used_regs = UsedRegs1 + }. + +%%----------------------------------------------------------------------------- +%% @doc Emit a jump to a label. The offset of the relocation is saved and will +%% be updated with `update_branches/2`. +%% @end +%% @param State current backend state +%% @param Label to jump to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +jump_to_label( + #state{stream_module = StreamModule, stream = Stream0, labels = Labels} = State0, Label +) -> + LabelLookupResult = lists:keyfind(Label, 1, Labels), + Offset = StreamModule:offset(Stream0), + {State1, CodeBlock} = branch_to_label_code(State0, Offset, Label, LabelLookupResult), + Stream1 = StreamModule:append(Stream0, CodeBlock), + State1#state{stream = Stream1}. + +jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> + Offset = StreamModule:offset(Stream0), + CodeBlock = branch_to_offset_code(State, Offset, TargetOffset), + Stream1 = StreamModule:append(Stream0, CodeBlock), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Jump to address in continuation pointer register +%% Calculate absolute address and jump to it. +%% @end +%% @param State current backend state +%% @param {free, OffsetReg} register containing the offset value +%% @return Updated backend state +%%----------------------------------------------------------------------------- +jump_to_continuation( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + offset = BaseOffset + } = State0, + {free, OffsetReg} +) -> + % Calculate absolute address: native_code_base + target_offset + % where native_code_base = current_pc + (BaseOffset - CurrentStreamOffset) + CurrentStreamOffset = StreamModule:offset(Stream0), + NetOffset = BaseOffset - CurrentStreamOffset, + + % Get native code base address into temporary register + I1 = pc_relative_address(Temp, NetOffset), + % Add target offset to get final absolute address + I2 = jit_riscv32_asm:add(Temp, Temp, OffsetReg), + % Indirect branch to the calculated absolute address + I3 = jit_riscv32_asm:jr(Temp), + + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + % Free all registers since this is a tail jump + State0#state{stream = Stream1, available_regs = ?AVAILABLE_REGS, used_regs = []}. + +branch_to_offset_code(_State, Offset, TargetOffset) when + TargetOffset - Offset =< 2050, TargetOffset - Offset >= -2044 +-> + % Near branch: use direct J instruction + Rel = TargetOffset - Offset, + jit_riscv32_asm:j(Rel); +branch_to_offset_code( + #state{available_regs = [TempReg | _]}, Offset, TargetOffset +) -> + % Far branch: use auipc + jalr sequence for PC-relative addressing + % This computes: PC + Immediate and jumps to it + + Rel = TargetOffset - Offset, + % Split the relative offset into upper 20 bits and lower 12 bits + % RISC-V PC-relative addressing: target = PC + (imm20 << 12) + sign_extend(imm12) + % Since jalr's imm12 is sign-extended, if bit 11 of Rel is set, + % we need to add 0x800 before splitting to compensate + Hi20 = (Rel + 16#800) bsr 12, + Lo12Unsigned = Rel band 16#FFF, + % Convert to signed 12-bit value: if bit 11 is set, subtract 4096 + Lo12 = + if + Lo12Unsigned >= 16#800 -> Lo12Unsigned - 16#1000; + true -> Lo12Unsigned + end, + + % TempReg = PC + (Hi20 << 12) + I1 = jit_riscv32_asm:auipc(TempReg, Hi20), + % Jump to TempReg + sign_extend(Lo12) + I2 = jit_riscv32_asm:jalr(zero, TempReg, Lo12), + <>. + +branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) -> + CodeBlock = branch_to_offset_code(State, Offset, LabelOffset), + {State, CodeBlock}; +branch_to_label_code( + #state{available_regs = [TempReg | _], branches = Branches} = State0, Offset, Label, false +) -> + % RISC-V: Far branch sequence using PC-relative auipc + jalr (8 bytes) + + % Placeholder: auipc TempReg, 0 + % Placeholder: jalr zero, TempReg, 0 + CodeBlock = <<16#FFFFFFFF:32, 16#FFFFFFFF:32>>, + % Add relocation entry + Reloc = {Label, Offset, {far_branch, TempReg}}, + State1 = State0#state{branches = [Reloc | Branches]}, + {State1, CodeBlock}; +branch_to_label_code( + #state{available_regs = [], branches = Branches} = State0, Offset, Label, false +) -> + % RISC-V: Use t6 as scratch (caller-saved, safe to clobber) + % Far branch sequence using PC-relative auipc + jalr (8 bytes) + + % Placeholder: auipc t6, 0 + % Placeholder: jalr zero, t6, 0 + CodeBlock = <<16#FFFFFFFF:32, 16#FFFFFFFF:32>>, + % Add relocation entry + Reloc = {Label, Offset, {far_branch, t6}}, + State1 = State0#state{branches = [Reloc | Branches]}, + {State1, CodeBlock}; +branch_to_label_code(#state{available_regs = []}, _Offset, _Label, _LabelLookup) -> + error({no_available_registers, _LabelLookup}). + +%%----------------------------------------------------------------------------- +%% @doc Emit an if block, i.e. emit a test of a condition and conditionnally +%% execute a block. +%% @end +%% @param State current backend state +%% @param Cond condition to test +%% @param BlockFn function to emit the block that may be executed +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec if_block(state(), condition() | {'and', [condition()]}, fun((state()) -> state())) -> state(). +if_block( + #state{stream_module = StreamModule} = State0, + {'and', CondList}, + BlockFn +) -> + {Replacements, State1} = lists:foldl( + fun(Cond, {AccReplacements, AccState}) -> + Offset = StreamModule:offset(AccState#state.stream), + {NewAccState, BranchInfo, ReplaceDelta} = if_block_cond(AccState, Cond), + {[{Offset + ReplaceDelta, BranchInfo} | AccReplacements], NewAccState} + end, + {[], State0}, + CondList + ), + State2 = BlockFn(State1), + Stream2 = State2#state.stream, + OffsetAfter = StreamModule:offset(Stream2), + Stream3 = lists:foldl( + fun({ReplacementOffset, {BranchFunc, Reg, Operand}}, AccStream) -> + BranchOffset = OffsetAfter - ReplacementOffset, + NewBranchInstr = apply(jit_riscv32_asm, BranchFunc, [Reg, Operand, BranchOffset]), + StreamModule:replace(AccStream, ReplacementOffset, NewBranchInstr) + end, + Stream2, + Replacements + ), + merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs); +if_block( + #state{stream_module = StreamModule, stream = Stream0} = State0, + Cond, + BlockFn +) -> + Offset = StreamModule:offset(Stream0), + {State1, {BranchFunc, Reg, Operand}, BranchInstrDelta} = if_block_cond(State0, Cond), + State2 = BlockFn(State1), + Stream2 = State2#state.stream, + OffsetAfter = StreamModule:offset(Stream2), + %% Patch the conditional branch instruction to jump to the end of the block + BranchInstrOffset = Offset + BranchInstrDelta, + BranchOffset = OffsetAfter - BranchInstrOffset, + NewBranchInstr = apply(jit_riscv32_asm, BranchFunc, [Reg, Operand, BranchOffset]), + Stream3 = StreamModule:replace(Stream2, BranchInstrOffset, NewBranchInstr), + merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs). + +%%----------------------------------------------------------------------------- +%% @doc Emit an if else block, i.e. emit a test of a condition and +%% conditionnally execute a block or another block. +%% @end +%% @param State current backend state +%% @param Cond condition to test +%% @param BlockTrueFn function to emit the block that is executed if condition is true +%% @param BlockFalseFn function to emit the block that is executed if condition is false +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec if_else_block(state(), condition(), fun((state()) -> state()), fun((state()) -> state())) -> + state(). +if_else_block( + #state{stream_module = StreamModule, stream = Stream0} = State0, + Cond, + BlockTrueFn, + BlockFalseFn +) -> + Offset = StreamModule:offset(Stream0), + {State1, {BranchFunc, Reg, Operand}, BranchInstrDelta} = if_block_cond(State0, Cond), + BranchInstrOffset = Offset + BranchInstrDelta, + State2 = BlockTrueFn(State1), + Stream2 = State2#state.stream, + %% Emit unconditional branch to skip the else block (will be replaced) + ElseJumpOffset = StreamModule:offset(Stream2), + ElseJumpInstr = jit_riscv32_asm:j(0), + Stream3 = StreamModule:append(Stream2, ElseJumpInstr), + %% Else block starts here. + OffsetAfter = StreamModule:offset(Stream3), + %% Patch the conditional branch to jump to the else block + ElseBranchOffset = OffsetAfter - BranchInstrOffset, + NewBranchInstr = apply(jit_riscv32_asm, BranchFunc, [Reg, Operand, ElseBranchOffset]), + Stream4 = StreamModule:replace(Stream3, BranchInstrOffset, NewBranchInstr), + %% Build the else block + StateElse = State2#state{ + stream = Stream4, + used_regs = State1#state.used_regs, + available_regs = State1#state.available_regs + }, + State3 = BlockFalseFn(StateElse), + Stream5 = State3#state.stream, + OffsetFinal = StreamModule:offset(Stream5), + %% Patch the unconditional branch to jump to the end + FinalJumpOffset = OffsetFinal - ElseJumpOffset, + NewElseJumpInstr = jit_riscv32_asm:j(FinalJumpOffset), + Stream6 = StreamModule:replace(Stream5, ElseJumpOffset, NewElseJumpInstr), + merge_used_regs(State3#state{stream = Stream6}, State2#state.used_regs). + +-spec if_block_cond(state(), condition()) -> + { + state(), + {beq | bne | blt | bge, atom(), atom() | integer()}, + non_neg_integer() + }. +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '<', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: bge Reg, zero, offset (branch if Reg >= 0, i.e., NOT negative/NOT less than 0) + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {bge, Reg, zero}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '<', Val} +) when is_integer(Val), Val >= 0, Val =< 255 -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % RISC-V: bge Reg, Val, offset (branch if Reg >= Val, i.e., NOT less than) + % Load immediate into a temp register for comparison + [Temp | _] = State0#state.available_regs, + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {bge, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '<', Val} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % RISC-V: bge Reg, Temp, offset (branch if Reg >= Temp, i.e., NOT less than) + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {bge, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {Val, '<', RegOrTuple} +) when is_integer(Val), Val >= 0, Val =< 255 -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % RISC-V: bge Temp, Reg, offset (branch if Val >= Reg, i.e., NOT Val < Reg) + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {bge, Temp, Reg}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {Val, '<', RegOrTuple} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % RISC-V: bge Temp, Reg, offset (branch if Val >= Reg, i.e., NOT Val < Reg) + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {bge, Temp, Reg}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '<', RegB} +) when is_atom(RegB) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % RISC-V: bge Reg, RegB, offset (branch if Reg >= RegB, i.e., NOT less than) + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {bge, Reg, RegB}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '==', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: bne Reg, zero, offset (branch if Reg != 0, i.e., NOT equal to 0) + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {bne, Reg, zero}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '==', RegB} +) when is_atom(RegB) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: bne Reg, RegB, offset (branch if Reg != RegB, i.e., NOT equal) + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {bne, Reg, RegB}, 0}; +%% Delegate (int) forms to regular forms since we only have 32-bit words +if_block_cond(State, {'(int)', RegOrTuple, '==', 0}) -> + if_block_cond(State, {RegOrTuple, '==', 0}); +if_block_cond(State, {'(int)', RegOrTuple, '==', Val}) when is_integer(Val) -> + if_block_cond(State, {RegOrTuple, '==', Val}); +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '!=', Val} +) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Load immediate into temp, then beq Reg, Temp, offset + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {beq, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '!=', Val} +) when ?IS_GPR(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: beq Reg, Val, offset (branch if Reg == Val, i.e., NOT not-equal) + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {beq, Reg, Val}, 0}; +if_block_cond(State, {'(int)', RegOrTuple, '!=', Val}) when is_integer(Val) -> + if_block_cond(State, {RegOrTuple, '!=', Val}); +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '==', Val} +) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Load immediate into temp, then bne Reg, Temp, offset + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {bne, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {{free, RegA}, '==', {free, RegB}} +) -> + %% RISC-V: bne RegA, RegB, offset (branch if RegA != RegB, i.e., NOT equal) + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = State0#state{stream = Stream1}, + State2 = if_block_free_reg({free, RegA}, State1), + State3 = if_block_free_reg({free, RegB}, State2), + {State3, {bne, RegA, RegB}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '==', Val} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + %% RISC-V: bne Reg, Temp, offset (branch if Reg != Temp, i.e., NOT equal) + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {bne, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '!=', Val} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + %% RISC-V: beq Reg, Temp, offset (branch if Reg == Temp, i.e., NOT not-equal) + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {beq, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {'(bool)', RegOrTuple, '==', false} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Test bit 0 by shifting to MSB, then branch if negative (bit was 1, NOT false) + I1 = jit_riscv32_asm:slli(Temp, Reg, 31), + Stream1 = StreamModule:append(Stream0, I1), + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream2}, + {State2, {blt, Temp, zero}, byte_size(I1)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {'(bool)', RegOrTuple, '!=', false} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Test bit 0 by shifting to MSB, then branch if non-negative (bit was 0, NOT true) + I1 = jit_riscv32_asm:slli(Temp, Reg, 31), + Stream1 = StreamModule:append(Stream0, I1), + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream2}, + {State2, {bge, Temp, zero}, byte_size(I1)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {RegOrTuple, '&', Val, '!=', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Test bits using ANDI or li+and + TestCode = + if + Val >= -2048 andalso Val =< 2047 -> + %% Can use ANDI instruction directly + jit_riscv32_asm:andi(Temp, Reg, Val); + true -> + %% Need to load immediate into temp register first + TestCode0 = jit_riscv32_asm:li(Temp, Val), + TestCode1 = jit_riscv32_asm:and_(Temp, Reg, Temp), + <> + end, + OffsetBefore = StreamModule:offset(Stream0), + Stream1 = StreamModule:append(Stream0, TestCode), + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + %% Branch if result is zero (no bits set, NOT != 0) + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream2}, + {State2, {beq, Temp, zero}, BranchDelta}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {Reg, '&', 16#F, '!=', 16#F} +) when ?IS_GPR(Reg) -> + %% RISC-V: Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG + I1 = jit_riscv32_asm:not_(Temp, Reg), + I2 = jit_riscv32_asm:slli(Temp, Temp, 28), + Stream1 = StreamModule:append(Stream0, <>), + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = State0#state{stream = Stream2}, + {State1, {beq, Temp, zero}, byte_size(I1) + byte_size(I2)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + {{free, Reg} = RegTuple, '&', 16#F, '!=', 16#F} +) when ?IS_GPR(Reg) -> + %% RISC-V: Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG + I1 = jit_riscv32_asm:not_(Reg, Reg), + I2 = jit_riscv32_asm:slli(Reg, Reg, 28), + Stream1 = StreamModule:append(Stream0, <>), + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = State0#state{stream = Stream2}, + State2 = if_block_free_reg(RegTuple, State1), + {State2, {beq, Reg, zero}, byte_size(I1) + byte_size(I2)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | AT] + } = State0, + {Reg, '&', Mask, '!=', Val} +) when ?IS_GPR(Reg) -> + %% RISC-V: AND with mask, then compare with value + OffsetBefore = StreamModule:offset(Stream0), + I1 = jit_riscv32_asm:mv(Temp, Reg), + Stream1 = StreamModule:append(Stream0, I1), + State1 = State0#state{stream = Stream1}, + {State2, Temp} = and_(State1#state{available_regs = AT}, {free, Temp}, Mask), + Stream2 = State2#state.stream, + %% Compare Temp with Val and branch if equal (NOT != Val) + case Val of + 0 -> + %% Optimize comparison with zero + BranchDelta = StreamModule:offset(Stream2) - OffsetBefore, + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream3 = StreamModule:append(Stream2, BranchInstr), + State3 = State2#state{ + stream = Stream3, available_regs = [Temp | State2#state.available_regs] + }, + {State3, {beq, Temp, zero}, BranchDelta}; + _ when ?IS_GPR(Val) -> + %% Val is a register + BranchDelta = StreamModule:offset(Stream2) - OffsetBefore, + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream3 = StreamModule:append(Stream2, BranchInstr), + State3 = State2#state{ + stream = Stream3, available_regs = [Temp | State2#state.available_regs] + }, + {State3, {beq, Temp, Val}, BranchDelta}; + _ -> + %% Val is an immediate - need second temp register + %% Reuse the mask register for the comparison value + [MaskReg | AT2] = AT, + State3 = mov_immediate(State2#state{available_regs = AT2}, MaskReg, Val), + Stream3 = State3#state.stream, + BranchDelta = StreamModule:offset(Stream3) - OffsetBefore, + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream4 = StreamModule:append(Stream3, BranchInstr), + State4 = State3#state{ + stream = Stream4, available_regs = [Temp, MaskReg | State3#state.available_regs] + }, + {State4, {beq, Temp, MaskReg}, BranchDelta} + end; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailRegs + } = State0, + {{free, Reg} = RegTuple, '&', Mask, '!=', Val} +) when ?IS_GPR(Reg) -> + %% RISC-V: AND with mask, then compare with value + OffsetBefore = StreamModule:offset(Stream0), + {State1, Reg} = and_(State0, RegTuple, Mask), + Stream1 = State1#state.stream, + %% Compare Reg with Val and branch if equal (NOT != Val) + case Val of + 0 -> + %% Optimize comparison with zero + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = State1#state{stream = Stream2}, + State3 = if_block_free_reg(RegTuple, State2), + {State3, {beq, Reg, zero}, BranchDelta}; + _ when ?IS_GPR(Val) -> + %% Val is a register + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = State1#state{stream = Stream2}, + State3 = if_block_free_reg(RegTuple, State2), + {State3, {beq, Reg, Val}, BranchDelta}; + _ -> + %% Val is an immediate - need temp register + %% Reuse the mask register for the comparison value + [MaskReg | AT] = State1#state.available_regs, + State2 = mov_immediate(State1#state{available_regs = AT}, MaskReg, Val), + Stream2 = State2#state.stream, + BranchDelta = StreamModule:offset(Stream2) - OffsetBefore, + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream3 = StreamModule:append(Stream2, BranchInstr), + State3 = State2#state{stream = Stream3, available_regs = AvailRegs}, + State4 = if_block_free_reg(RegTuple, State3), + {State4, {beq, Reg, MaskReg}, BranchDelta} + end. + +-spec if_block_free_reg(riscv32_register() | {free, riscv32_register()}, state()) -> state(). +if_block_free_reg({free, Reg}, State0) -> + #state{available_regs = AvR0, used_regs = UR0} = State0, + {AvR1, UR1} = free_reg(AvR0, UR0, Reg), + State0#state{ + available_regs = AvR1, + used_regs = UR1 + }; +if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) -> + State0. + +-spec merge_used_regs(state(), [riscv32_register()]) -> state(). +merge_used_regs(#state{used_regs = UR0, available_regs = AvR0} = State, [ + Reg | T +]) -> + case lists:member(Reg, UR0) of + true -> + merge_used_regs(State, T); + false -> + AvR1 = lists:delete(Reg, AvR0), + UR1 = [Reg | UR0], + merge_used_regs( + State#state{used_regs = UR1, available_regs = AvR1}, T + ) + end; +merge_used_regs(State, []) -> + State. + +%%----------------------------------------------------------------------------- +%% @doc Emit a shift register right by a fixed number of bits, effectively +%% dividing it by 2^Shift +%% @param State current state +%% @param Reg register to shift +%% @param Shift number of bits to shift +%% @return new state +%%----------------------------------------------------------------------------- +-spec shift_right(#state{}, maybe_free_riscv32_register(), non_neg_integer()) -> + {#state{}, riscv32_register()}. +shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Shift) when + ?IS_GPR(Reg) andalso is_integer(Shift) +-> + I = jit_riscv32_asm:srli(Reg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + {State#state{stream = Stream1}, Reg}; +shift_right( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [ResultReg | T], + used_regs = UR + } = State, + Reg, + Shift +) when + ?IS_GPR(Reg) andalso is_integer(Shift) +-> + I = jit_riscv32_asm:srli(ResultReg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + {State#state{stream = Stream1, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a shift register left by a fixed number of bits, effectively +%% multiplying it by 2^Shift +%% @param State current state +%% @param Reg register to shift +%% @param Shift number of bits to shift +%% @return new state +%%----------------------------------------------------------------------------- +shift_left(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when + is_atom(Reg) +-> + I = jit_riscv32_asm:slli(Reg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a call to a function pointer with arguments. This function converts +%% arguments and passes them following the backend ABI convention. +%% @end +%% @param State current backend state +%% @param FuncPtrTuple either {free, Reg} or {primitive, PrimitiveIndex} +%% @param Args arguments to pass to the function +%% @return Updated backend state and return register +%%----------------------------------------------------------------------------- +-spec call_func_ptr(state(), {free, riscv32_register()} | {primitive, non_neg_integer()}, [arg()]) -> + {state(), riscv32_register()}. +call_func_ptr( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State0, + FuncPtrTuple, + Args +) -> + FreeRegs = lists:flatmap( + fun + ({free, {ptr, Reg}}) -> [Reg]; + ({free, Reg}) when is_atom(Reg) -> [Reg]; + (_) -> [] + end, + [FuncPtrTuple | Args] + ), + UsedRegs1 = UsedRegs0 -- FreeRegs, + % Save RA (like AArch64 saves LR) so it's preserved across jalr calls + SavedRegs = [?RA_REG, ?CTX_REG, ?JITSTATE_REG, ?NATIVE_INTERFACE_REG | UsedRegs1], + + % Calculate available registers + FreeGPRegs = FreeRegs -- (FreeRegs -- ?AVAILABLE_REGS), + AvailableRegs1 = FreeGPRegs ++ AvailableRegs0, + + % Calculate stack space: round up to 16-byte boundary for RISC-V ABI + NumRegs = length(SavedRegs), + StackBytes = NumRegs * 4, + AlignedStackBytes = ((StackBytes + 15) div 16) * 16, + + Stream1 = push_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream0), + + % Set up arguments following RISC-V ILP32 calling convention + % Arguments are passed in a0-a7 (up to 8 register arguments) + Args1 = lists:map( + fun(Arg) -> + case Arg of + offset -> StreamModule:offset(Stream1); + _ -> Arg + end + end, + Args + ), + + RegArgs0 = Args1, + RegArgsRegs = lists:flatmap(fun arg_to_reg_list/1, RegArgs0), + + % We pushed registers to stack, so we can use these registers we saved + % and the currently available registers + SetArgsRegsOnlyAvailableArgs = (UsedRegs1 -- RegArgsRegs) ++ AvailableRegs0, + State1 = State0#state{ + available_regs = SetArgsRegsOnlyAvailableArgs, + used_regs = ?AVAILABLE_REGS -- SetArgsRegsOnlyAvailableArgs, + stream = Stream1 + }, + + ParameterRegs = parameter_regs(RegArgs0), + {Stream3, SetArgsAvailableRegs, FuncPtrReg, RegArgs} = + case FuncPtrTuple of + {free, FuncPtrReg0} -> + % If FuncPtrReg is in parameter regs, we must swap it with a free reg. + case lists:member(FuncPtrReg0, ParameterRegs) of + true -> + case SetArgsRegsOnlyAvailableArgs -- ParameterRegs of + [] -> + % Swap SetArgsRegsOnlyAvailableArgs with a reg used in RegArgs0 + % that is not in ParameterRegs + [NewArgReg | _] = SetArgsRegsOnlyAvailableArgs, + [FuncPtrReg1 | _] = RegArgsRegs -- ParameterRegs, + MovInstr1 = jit_riscv32_asm:mv(NewArgReg, FuncPtrReg1), + MovInstr2 = jit_riscv32_asm:mv(FuncPtrReg1, FuncPtrReg0), + SetArgsAvailableArgs1 = + (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++ + [FuncPtrReg0], + RegArgs1 = replace_reg(RegArgs0, FuncPtrReg1, NewArgReg), + { + StreamModule:append( + State1#state.stream, <> + ), + SetArgsAvailableArgs1, + FuncPtrReg1, + RegArgs1 + }; + [FuncPtrReg1 | _] -> + MovInstr = jit_riscv32_asm:mv(FuncPtrReg1, FuncPtrReg0), + SetArgsAvailableArgs1 = + (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++ + [FuncPtrReg0], + { + StreamModule:append(State1#state.stream, MovInstr), + SetArgsAvailableArgs1, + FuncPtrReg1, + RegArgs0 + } + end; + false -> + SetArgsAvailableArgs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0], + {State1#state.stream, SetArgsAvailableArgs1, FuncPtrReg0, RegArgs0} + end; + {primitive, Primitive} -> + [FuncPtrReg0 | _] = SetArgsRegsOnlyAvailableArgs -- ParameterRegs, + SetArgsAvailableRegs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0], + PrepCall = load_primitive_ptr(Primitive, FuncPtrReg0), + Stream2 = StreamModule:append(State1#state.stream, PrepCall), + {Stream2, SetArgsAvailableRegs1, FuncPtrReg0, RegArgs0} + end, + + State3 = State1#state{ + available_regs = SetArgsAvailableRegs, + used_regs = ?AVAILABLE_REGS -- SetArgsAvailableRegs, + stream = Stream3 + }, + + StackOffset = AlignedStackBytes, + State4 = set_registers_args(State3, RegArgs, ParameterRegs, StackOffset), + Stream4 = State4#state.stream, + + % Call the function pointer (using JALR for call with return) + Call = jit_riscv32_asm:jalr(ra, FuncPtrReg, 0), + Stream5 = StreamModule:append(Stream4, Call), + + % For result, we need a free register (including FuncPtrReg). + % If none are available (all registers were pushed to the stack), + % we write the result to the stack position of FuncPtrReg + {Stream6, UsedRegs2} = + case length(SavedRegs) of + N when N >= 7 andalso element(1, FuncPtrTuple) =:= free -> + % We use original FuncPtrReg then as we know it's available. + % Calculate stack offset: find register index in SavedRegs * 4 bytes + ResultReg = element(2, FuncPtrTuple), + RegIndex = index_of(ResultReg, SavedRegs), + case RegIndex >= 0 of + true -> + StoreResultStackOffset = RegIndex * 4, + StoreResult = jit_riscv32_asm:sw(sp, a0, StoreResultStackOffset), + {StreamModule:append(Stream5, StoreResult), [ResultReg | UsedRegs1]}; + false -> + % FuncPtrReg was not in SavedRegs, use an available register + [ResultReg1 | _] = AvailableRegs1 -- SavedRegs, + MoveResult = jit_riscv32_asm:mv(ResultReg1, a0), + {StreamModule:append(Stream5, MoveResult), [ResultReg1 | UsedRegs1]} + end; + _ -> + % Use any free that is not in SavedRegs + [ResultReg | _] = AvailableRegs1 -- SavedRegs, + MoveResult = jit_riscv32_asm:mv(ResultReg, a0), + {StreamModule:append(Stream5, MoveResult), [ResultReg | UsedRegs1]} + end, + + Stream8 = pop_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream6), + + AvailableRegs2 = lists:delete(ResultReg, AvailableRegs1), + AvailableRegs3 = ?AVAILABLE_REGS -- (?AVAILABLE_REGS -- AvailableRegs2), + { + State4#state{ + stream = Stream8, + available_regs = AvailableRegs3, + used_regs = UsedRegs2 + }, + ResultReg + }. + +arg_to_reg_list({free, {ptr, Reg}}) -> [Reg]; +arg_to_reg_list({free, Reg}) when is_atom(Reg) -> [Reg]; +arg_to_reg_list(Reg) when is_atom(Reg) -> [Reg]; +arg_to_reg_list(_) -> []. + +index_of(Item, List) -> index_of(Item, List, 0). + +index_of(_, [], _) -> -1; +index_of(Item, [Item | _], Index) -> Index; +index_of(Item, [_ | Rest], Index) -> index_of(Item, Rest, Index + 1). + +push_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream0) when length(SavedRegs) > 0 -> + % RISC-V: addi sp, sp, -AlignedStackBytes then sw reg, offset(sp) for each reg + StackAdjust = jit_riscv32_asm:addi(sp, sp, -AlignedStackBytes), + Stream1 = StreamModule:append(Stream0, StackAdjust), + {Stream2, _} = lists:foldl( + fun(Reg, {StreamAcc, Offset}) -> + Store = jit_riscv32_asm:sw(sp, Reg, Offset), + {StreamModule:append(StreamAcc, Store), Offset + 4} + end, + {Stream1, 0}, + SavedRegs + ), + Stream2; +push_registers([], _AlignedStackBytes, _StreamModule, Stream0) -> + Stream0. + +pop_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream0) when length(SavedRegs) > 0 -> + % RISC-V: lw reg, offset(sp) for each reg then addi sp, sp, AlignedStackBytes + {Stream1, _} = lists:foldl( + fun(Reg, {StreamAcc, Offset}) -> + Load = jit_riscv32_asm:lw(Reg, sp, Offset), + {StreamModule:append(StreamAcc, Load), Offset + 4} + end, + {Stream0, 0}, + SavedRegs + ), + StackAdjust = jit_riscv32_asm:addi(sp, sp, AlignedStackBytes), + StreamModule:append(Stream1, StackAdjust); +pop_registers([], _AlignedStackBytes, _StreamModule, Stream0) -> + Stream0. + +set_registers_args(State0, Args, StackOffset) -> + ParamRegs = parameter_regs(Args), + set_registers_args(State0, Args, ParamRegs, StackOffset). + +set_registers_args( + #state{used_regs = UsedRegs} = State0, + Args, + ParamRegs, + StackOffset +) -> + ArgsRegs = args_regs(Args), + AvailableScratchGP = ((?SCRATCH_REGS -- ParamRegs) -- ArgsRegs) -- UsedRegs, + State1 = set_registers_args0( + State0, Args, ArgsRegs, ParamRegs, AvailableScratchGP, StackOffset + ), + Stream1 = State1#state.stream, + NewUsedRegs = lists:foldl( + fun + ({free, {ptr, Reg}}, AccUsed) -> lists:delete(Reg, AccUsed); + ({free, Reg}, AccUsed) -> lists:delete(Reg, AccUsed); + (_, AccUsed) -> AccUsed + end, + UsedRegs, + Args + ), + State1#state{ + stream = Stream1, + available_regs = ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs, + used_regs = ParamRegs ++ (NewUsedRegs -- ParamRegs) + }. + +parameter_regs(Args) -> + parameter_regs0(Args, ?PARAMETER_REGS, []). + +% ILP32: 64-bit arguments require double-word alignment (even register number) +parameter_regs0([], _, Acc) -> + lists:reverse(Acc); +parameter_regs0([{avm_int64_t, _} | T], [a0, a1 | Rest], Acc) -> + parameter_regs0(T, Rest, [a1, a0 | Acc]); +parameter_regs0([{avm_int64_t, _} | T], [a1, a2 | Rest], Acc) -> + parameter_regs0(T, Rest, [a2, a1 | Acc]); +parameter_regs0([{avm_int64_t, _} | T], [a2, a3 | Rest], Acc) -> + parameter_regs0(T, Rest, [a3, a2 | Acc]); +parameter_regs0([_Other | T], [Reg | Rest], Acc) -> + parameter_regs0(T, Rest, [Reg | Acc]). + +replace_reg(Args, Reg1, Reg2) -> + replace_reg0(Args, Reg1, Reg2, []). + +replace_reg0([Reg | T], Reg, Replacement, Acc) -> + lists:reverse(Acc, [Replacement | T]); +replace_reg0([{free, Reg} | T], Reg, Replacement, Acc) -> + lists:reverse(Acc, [Replacement | T]); +replace_reg0([Other | T], Reg, Replacement, Acc) -> + replace_reg0(T, Reg, Replacement, [Other | Acc]). + +set_registers_args0(State, [], [], [], _AvailGP, _StackOffset) -> + State; +set_registers_args0(State, [{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset) -> + set_registers_args0(State, [FreeVal | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset); +set_registers_args0( + State, [ctx | ArgsT], [?CTX_REG | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset +) -> + set_registers_args0(State, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); +% Handle 64-bit arguments that need two registers according to ILP32 +set_registers_args0( + State, + [{avm_int64_t, Value} | ArgsT], + ArgsRegs, + ParamRegs, + AvailGP, + StackOffset +) when is_integer(Value) -> + LowPartUnsigned = Value band 16#FFFFFFFF, + HighPartUnsigned = (Value bsr 32) band 16#FFFFFFFF, + % Convert to signed 32-bit values for RISC-V li instruction + LowPart = + if + LowPartUnsigned > 16#7FFFFFFF -> LowPartUnsigned - 16#100000000; + true -> LowPartUnsigned + end, + HighPart = + if + HighPartUnsigned > 16#7FFFFFFF -> HighPartUnsigned - 16#100000000; + true -> HighPartUnsigned + end, + set_registers_args0( + State, [LowPart, HighPart | ArgsT], [imm | ArgsRegs], ParamRegs, AvailGP, StackOffset + ); +% ctx is special as we need it to access x_reg/y_reg/fp_reg and we don't +% want to replace it +set_registers_args0( + State, [Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset +) -> + false = lists:member(?CTX_REG, ArgsRegs), + State1 = set_registers_args1(State, Arg, ?CTX_REG, StackOffset), + set_registers_args0(State1, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); +set_registers_args0( + #state{stream_module = StreamModule} = State0, + [Arg | ArgsT], + [_ArgReg | ArgsRegsT], + [ParamReg | ParamRegsT], + AvailGP, + StackOffset +) -> + case lists:member(ParamReg, ArgsRegsT) of + false -> + State1 = set_registers_args1(State0, Arg, ParamReg, StackOffset), + set_registers_args0(State1, ArgsT, ArgsRegsT, ParamRegsT, AvailGP, StackOffset); + true -> + [Avail | AvailGPT] = AvailGP, + I = jit_riscv32_asm:mv(Avail, ParamReg), + Stream1 = StreamModule:append(State0#state.stream, I), + State1 = set_registers_args1( + State0#state{stream = Stream1}, Arg, ParamReg, StackOffset + ), + NewArgsT = replace_reg(ArgsT, ParamReg, Avail), + set_registers_args0( + State1, NewArgsT, ArgsRegsT, ParamRegsT, AvailGPT, StackOffset + ) + end. + +set_registers_args1(State, Reg, Reg, _Offset) -> + State; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, + jit_state, + ParamReg, + _StackOffset +) -> + % jit_state is always in a1, so we only need to move it if the param reg is different + case ParamReg of + a1 -> + State; + _ -> + I = jit_riscv32_asm:mv(ParamReg, a1), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1} + end; +% For tail calls, jit_state is already in a1 +set_registers_args1(State, jit_state_tail_call, a1, _StackOffset) -> + State; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, + {x_reg, extra}, + Reg, + _StackOffset +) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I = jit_riscv32_asm:lw(Reg, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, Reg, _StackOffset +) -> + {XReg, X_REGOffset} = ?X_REG(X), + I = jit_riscv32_asm:lw(Reg, XReg, X_REGOffset), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Source}, Reg, _StackOffset +) -> + I = jit_riscv32_asm:lw(Reg, Source, 0), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0, available_regs = AvailRegs} = State, + {y_reg, X}, + Reg, + _StackOffset +) -> + Code = ldr_y_reg(Reg, X, AvailRegs), + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, ArgReg, Reg, _StackOffset +) when + ?IS_GPR(ArgReg) +-> + I = jit_riscv32_asm:mv(Reg, ArgReg), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1(State, Value, Reg, _StackOffset) when ?IS_SIGNED_OR_UNSIGNED_INT32_T(Value) -> + mov_immediate(State, Reg, Value). + +%%----------------------------------------------------------------------------- +%% @doc Emit a move to a vm register (x_reg, y_reg, fpreg or a pointer on x_reg) +%% from an immediate, a native register or another vm register. +%% @end +%% @param State current backend state +%% @param Src value to move to vm register +%% @param Dest vm register to move to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec move_to_vm_register(state(), Src :: value() | vm_register(), Dest :: vm_register()) -> + state(). +% Native register to VM register +move_to_vm_register(State0, Src, {x_reg, extra}) when is_atom(Src) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_riscv32_asm:sw(BaseReg, Src, Off), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(State0, Src, {x_reg, X}) when is_atom(Src) -> + {BaseReg, Off} = ?X_REG(X), + I1 = jit_riscv32_asm:sw(BaseReg, Src, Off), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(State0, Src, {ptr, Reg}) when is_atom(Src) -> + I1 = jit_riscv32_asm:sw(Reg, Src, 0), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(#state{available_regs = [Temp1 | AT]} = State0, Src, {y_reg, Y}) when + is_atom(Src) +-> + Code = str_y_reg(Src, Y, Temp1, AT), + Stream1 = (State0#state.stream_module):append(State0#state.stream, Code), + State0#state{stream = Stream1}; +% Source is an integer to y_reg (optimized: ldr first, then movs) +move_to_vm_register(#state{available_regs = [Temp1, Temp2 | AT]} = State0, N, {y_reg, Y}) when + is_integer(N), N >= 0, N =< 255 +-> + I1 = jit_riscv32_asm:li(Temp2, N), + YCode = str_y_reg(Temp2, Y, Temp1, AT), + Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), + State0#state{stream = Stream1}; +% Source is an integer (0-255 for movs, negative values need different handling) +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when + is_integer(N), N >= 0, N =< 255 +-> + I1 = jit_riscv32_asm:li(Temp, N), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +%% Handle large values using simple literal pool (branch-over pattern) +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when + is_integer(N) +-> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, N), + State2 = move_to_vm_register(State1, Temp, Dest), + State2#state{available_regs = AR0}; +% Source is a VM register +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, extra}, Dest) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_riscv32_asm:lw(Temp, BaseReg, Off), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, X}, Dest) -> + {XReg, X_REGOffset} = ?X_REG(X), + I1 = jit_riscv32_asm:lw(Temp, XReg, X_REGOffset), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {ptr, Reg}, Dest) -> + I1 = jit_riscv32_asm:lw(Temp, Reg, 0), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {y_reg, Y}, Dest) -> + Code = ldr_y_reg(Temp, Y, AT), + Stream1 = (State0#state.stream_module):append(State0#state.stream, Code), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +% term_to_float +move_to_vm_register( + #state{ + stream_module = StreamModule, + available_regs = [Temp1, Temp2 | _], + stream = Stream0, + variant = Variant + } = + State0, + {free, {ptr, Reg, 1}}, + {fp_reg, F} +) -> + {BaseReg, Off} = ?FP_REGS, + I1 = jit_riscv32_asm:lw(Temp1, BaseReg, Off), + I2 = jit_riscv32_asm:lw(Temp2, Reg, 4), + case Variant band ?JIT_VARIANT_FLOAT32 of + 0 -> + % Double precision: write both 32-bit parts + I3 = jit_riscv32_asm:sw(Temp1, Temp2, F * 8), + I4 = jit_riscv32_asm:lw(Temp2, Reg, 8), + I5 = jit_riscv32_asm:sw(Temp1, Temp2, F * 8 + 4), + Code = <>; + _ -> + % Single precision: write only first 32-bit part + I3 = jit_riscv32_asm:sw(Temp1, Temp2, F * 4), + Code = <> + end, + Stream1 = StreamModule:append(Stream0, Code), + State1 = free_native_register(State0, Reg), + State1#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a move of an array element (reg[x]) to a vm or a native register. +%% @end +%% @param State current backend state +%% @param Reg base register of the array +%% @param Index index in the array, as an integer or a native register +%% @param Dest vm or native register to move to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec move_array_element( + state(), + riscv32_register(), + non_neg_integer() | riscv32_register(), + vm_register() | riscv32_register() +) -> state(). +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + Reg, + Index, + {x_reg, X} +) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Temp, Reg, Index * 4), + {BaseReg, Off} = ?X_REG(X), + I2 = jit_riscv32_asm:sw(BaseReg, Temp, Off), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + Reg, + Index, + {ptr, Dest} +) when is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Temp, Reg, Index * 4), + I2 = jit_riscv32_asm:sw(Dest, Temp, 0), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | AT]} = + State, + Reg, + Index, + {y_reg, Y} +) when is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Temp2, Reg, Index * 4), + YCode = str_y_reg(Temp2, Y, Temp1, AT), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | AT]} = + State, + {free, Reg}, + Index, + {y_reg, Y} +) when is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Reg, Reg, Index * 4), + YCode = str_y_reg(Reg, Y, Temp, AT), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State, Reg, Index, Dest +) when is_atom(Dest) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Dest, Reg, Index * 4), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + Reg, + {free, IndexReg}, + {x_reg, X} +) when X < ?MAX_REG andalso is_atom(IndexReg) -> + I1 = jit_riscv32_asm:slli(IndexReg, IndexReg, 2), + I2 = jit_riscv32_asm:add(IndexReg, Reg, IndexReg), + I3 = jit_riscv32_asm:lw(IndexReg, IndexReg, 0), + {BaseReg, Off} = ?X_REG(X), + I4 = jit_riscv32_asm:sw(BaseReg, IndexReg, Off), + {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg), + Stream1 = StreamModule:append(Stream0, <>), + State#state{ + available_regs = AvailableRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + Reg, + {free, IndexReg}, + {ptr, PtrReg} +) when is_atom(IndexReg) -> + I1 = jit_riscv32_asm:slli(IndexReg, IndexReg, 2), + I2 = jit_riscv32_asm:add(IndexReg, Reg, IndexReg), + I3 = jit_riscv32_asm:lw(IndexReg, IndexReg, 0), + I4 = jit_riscv32_asm:sw(PtrReg, IndexReg, 0), + {AvailableRegs1, UsedRegs1} = free_reg( + AvailableRegs0, UsedRegs0, IndexReg + ), + Stream1 = StreamModule:append(Stream0, <>), + State#state{ + available_regs = AvailableRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | AT] = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + Reg, + {free, IndexReg}, + {y_reg, Y} +) when is_atom(IndexReg) -> + I1 = jit_riscv32_asm:slli(IndexReg, IndexReg, 2), + I2 = jit_riscv32_asm:add(IndexReg, Reg, IndexReg), + I3 = jit_riscv32_asm:lw(IndexReg, IndexReg, 0), + Code = str_y_reg(IndexReg, Y, Temp, AT), + I4 = Code, + {AvailableRegs1, UsedRegs1} = free_reg( + AvailableRegs0, UsedRegs0, IndexReg + ), + Stream1 = StreamModule:append( + Stream0, <> + ), + State#state{ + available_regs = AvailableRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }. + +%% @doc move reg[x] to a vm or native register +-spec get_array_element( + state(), riscv32_register() | {free, riscv32_register()}, non_neg_integer() +) -> + {state(), riscv32_register()}. +get_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + {free, Reg}, + Index +) -> + I1 = jit_riscv32_asm:lw(Reg, Reg, Index * 4), + Stream1 = StreamModule:append(Stream0, <>), + {State#state{stream = Stream1}, Reg}; +get_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [ElemReg | AvailableT], + used_regs = UsedRegs0 + } = State, + Reg, + Index +) -> + I1 = jit_riscv32_asm:lw(ElemReg, Reg, Index * 4), + Stream1 = StreamModule:append(Stream0, <>), + { + State#state{ + stream = Stream1, available_regs = AvailableT, used_regs = [ElemReg | UsedRegs0] + }, + ElemReg + }. + +%% @doc move an integer, a vm or native register to reg[x] +-spec move_to_array_element( + state(), integer() | vm_register() | riscv32_register(), riscv32_register(), non_neg_integer() +) -> state(). +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State0, + ValueReg, + Reg, + Index +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:sw(Reg, ValueReg, Index * 4), + Stream1 = StreamModule:append(Stream0, I1), + State0#state{stream = Stream1}; +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + ValueReg, + Reg, + IndexReg +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) -> + I1 = jit_riscv32_asm:mv(Temp, IndexReg), + I2 = jit_riscv32_asm:slli(Temp, Temp, 2), + I3 = jit_riscv32_asm:add(Temp, Reg, Temp), + I4 = jit_riscv32_asm:sw(Temp, ValueReg, 0), + Stream1 = StreamModule:append(Stream0, <>), + State0#state{stream = Stream1}; +move_to_array_element( + State0, + Value, + Reg, + Index +) -> + {State1, Temp} = copy_to_native_register(State0, Value), + State2 = move_to_array_element(State1, Temp, Reg, Index), + free_native_register(State2, Temp). + +move_to_array_element( + State, + Value, + BaseReg, + IndexReg, + Offset +) when is_integer(IndexReg) andalso is_integer(Offset) andalso Offset div 8 =:= 0 -> + move_to_array_element(State, Value, BaseReg, IndexReg + (Offset div 8)); +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + ValueReg, + BaseReg, + IndexReg, + Offset +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> + I1 = jit_riscv32_asm:addi(Temp, IndexReg, Offset), + I2 = jit_riscv32_asm:slli(Temp, Temp, 2), + I3 = jit_riscv32_asm:add(Temp, BaseReg, Temp), + I4 = jit_riscv32_asm:sw(Temp, ValueReg, 0), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_array_element( + State0, + Value, + BaseReg, + IndexReg, + Offset +) -> + {State1, ValueReg} = copy_to_native_register(State0, Value), + [Temp | _] = State1#state.available_regs, + I1 = jit_riscv32_asm:addi(Temp, IndexReg, Offset), + I2 = jit_riscv32_asm:slli(Temp, Temp, 2), + I3 = jit_riscv32_asm:add(Temp, BaseReg, Temp), + I4 = jit_riscv32_asm:sw(Temp, ValueReg, 0), + Stream1 = (State1#state.stream_module):append( + State1#state.stream, <> + ), + State2 = State1#state{stream = Stream1}, + free_native_register(State2, ValueReg). + +-spec move_to_native_register(state(), value() | cp) -> {state(), riscv32_register()}. +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + cp +) -> + {BaseReg, Off} = ?CP, + I1 = jit_riscv32_asm:lw(Reg, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register(State, Reg) when is_atom(Reg) -> + {State, Reg}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg} +) when is_atom(Reg) -> + I1 = jit_riscv32_asm:lw(Reg, Reg, 0), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1}, Reg}; +move_to_native_register( + #state{ + available_regs = [Reg | AvailT], + used_regs = Used + } = State0, + Imm +) when + is_integer(Imm) +-> + State1 = State0#state{used_regs = [Reg | Used], available_regs = AvailT}, + {move_to_native_register(State1, Imm, Reg), Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {x_reg, extra} +) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_riscv32_asm:lw(Reg, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {x_reg, X} +) when + X < ?MAX_REG +-> + {BaseReg, Offset} = ?X_REG(X), + I1 = jit_riscv32_asm:lw(Reg, BaseReg, Offset), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {y_reg, Y} +) -> + Code = ldr_y_reg(Reg, Y, AvailT), + Stream1 = StreamModule:append(Stream0, Code), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [Reg | Used]}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [RegA, RegB | AvailT], + used_regs = Used + } = State, + {fp_reg, F} +) -> + {BaseReg, Off} = ?FP_REGS, + I1 = jit_riscv32_asm:lw(RegB, BaseReg, Off), + I2 = jit_riscv32_asm:lw(RegA, RegB, F * 8), + I3 = jit_riscv32_asm:lw(RegB, RegB, F * 8 + 4), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + { + State#state{stream = Stream1, available_regs = AvailT, used_regs = [RegB, RegA | Used]}, + {fp, RegA, RegB} + }. + +-spec move_to_native_register(state(), value(), riscv32_register()) -> state(). +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, RegSrc, RegDst +) when is_atom(RegSrc) -> + I = jit_riscv32_asm:mv(RegDst, RegSrc), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +move_to_native_register(State, ValSrc, RegDst) when is_integer(ValSrc) -> + mov_immediate(State, RegDst, ValSrc); +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg}, RegDst +) when ?IS_GPR(Reg) -> + I1 = jit_riscv32_asm:lw(RegDst, Reg, 0), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, extra}, RegDst +) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_riscv32_asm:lw(RegDst, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, RegDst +) when + X < ?MAX_REG +-> + {XReg, X_REGOffset} = ?X_REG(X), + I1 = jit_riscv32_asm:lw(RegDst, XReg, X_REGOffset), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0, available_regs = AT} = State, + {y_reg, Y}, + RegDst +) -> + Code = ldr_y_reg(RegDst, Y, AT), + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + {fp_reg, F}, + {fp, RegA, RegB} +) -> + {BaseReg, Off} = ?FP_REGS, + I1 = jit_riscv32_asm:lw(RegB, BaseReg, Off), + I2 = jit_riscv32_asm:lw(RegA, RegB, F * 8), + I3 = jit_riscv32_asm:lw(RegB, RegB, F * 8 + 4), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +-spec copy_to_native_register(state(), value()) -> {state(), riscv32_register()}. +copy_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [SaveReg | AvailT], + used_regs = Used + } = State, + Reg +) when is_atom(Reg) -> + I1 = jit_riscv32_asm:mv(SaveReg, Reg), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; +copy_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [SaveReg | AvailT], + used_regs = Used + } = State, + {ptr, Reg} +) when is_atom(Reg) -> + I1 = jit_riscv32_asm:lw(SaveReg, Reg, 0), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; +copy_to_native_register(State, Reg) -> + move_to_native_register(State, Reg). + +move_to_cp( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | AvailT]} = State, + {y_reg, Y} +) -> + I1 = ldr_y_reg(Reg, Y, AvailT), + {BaseReg, Off} = ?CP, + I2 = jit_riscv32_asm:sw(BaseReg, Reg, Off), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +increment_sp( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | _]} = State, + Offset +) -> + {BaseReg1, Off1} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(Reg, BaseReg1, Off1), + I2 = jit_riscv32_asm:addi(Reg, Reg, Offset * 4), + {BaseReg2, Off2} = ?Y_REGS, + I3 = jit_riscv32_asm:sw(BaseReg2, Reg, Off2), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +set_continuation_to_label( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + branches = Branches, + labels = Labels + } = State, + Label +) -> + Offset = StreamModule:offset(Stream0), + case lists:keyfind(Label, 1, Labels) of + {Label, LabelOffset} -> + % Label is already known, emit direct pc-relative address without relocation + Rel = LabelOffset - Offset, + I1 = pc_relative_address(Temp, Rel), + I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; + false -> + % Label not yet known, emit placeholder and add relocation + % Reserve 8 bytes (2 x 32-bit instructions) with all-1s placeholder for flash programming + % The relocation will replace these with the correct offset + I1 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>, + Reloc = {Label, Offset, {adr, Temp}}, + I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1, branches = [Reloc | Branches]} + end. + +%% @doc Set the contination to a given offset +%% Return a reference so the offset will be updated with update_branches +%% This is only used with OP_WAIT_TIMEOUT and the offset is after the current +%% code and not too far, so on Thumb we can use adr instruction. +set_continuation_to_offset( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + branches = Branches + } = State +) -> + OffsetRef = make_ref(), + Offset = StreamModule:offset(Stream0), + % Reserve 8 bytes with all-1s placeholder for flash programming + I1 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>, + Reloc = {OffsetRef, Offset, {adr, Temp}}, + % Store continuation (jit_state is in a1) + I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + {State#state{stream = Stream1, branches = [Reloc | Branches]}, OffsetRef}. + +%% @doc Implement a continuation entry point. +-spec continuation_entry_point(#state{}) -> #state{}. +continuation_entry_point(State) -> + State. + +get_module_index( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailableT], + used_regs = UsedRegs0 + } = State +) -> + % Load module from jit_state (which is in a1) + I1 = jit_riscv32_asm:lw(Reg, ?JITSTATE_REG, ?JITSTATE_MODULE_OFFSET), + I2 = jit_riscv32_asm:lw(Reg, Reg, 0), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + { + State#state{ + stream = Stream1, + available_regs = AvailableT, + used_regs = [Reg | UsedRegs0] + }, + Reg + }. + +%% @doc Perform an AND of a register with an immediate. +%% JIT currentl calls this with two values: ?TERM_PRIMARY_CLEAR_MASK (-4) to +%% clear bits and ?TERM_BOXED_TAG_MASK (0x3F). We can avoid any literal pool +%% by using BICS for -4. +and_(#state{stream_module = StreamModule, stream = Stream0} = State0, {free, Reg}, 16#FFFFFF) -> + I1 = jit_riscv32_asm:slli(Reg, Reg, 8), + I2 = jit_riscv32_asm:srli(Reg, Reg, 8), + Stream1 = StreamModule:append(Stream0, <>), + {State0#state{stream = Stream1}, Reg}; +and_( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + {free, Reg}, + Val +) when Val < 0 andalso Val >= -256 -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, bnot (Val)), + Stream1 = State1#state.stream, + % RISC-V doesn't have bics, use not + and + I1 = jit_riscv32_asm:not_(Temp, Temp), + I2 = jit_riscv32_asm:and_(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, <>), + {State1#state{available_regs = [Temp | AT], stream = Stream2}, Reg}; +and_( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + {free, Reg}, + Val +) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:and_(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + {State1#state{available_regs = [Temp | AT], stream = Stream2}, Reg}; +and_( + #state{stream_module = StreamModule, available_regs = []} = State0, + {free, Reg}, + Val +) when Val < 0 andalso Val >= -256 -> + % No available registers, use a0 as temp and save it to t3 + Stream0 = State0#state.stream, + % Save a0 to t3 + Save = jit_riscv32_asm:mv(?IP_REG, a0), + Stream1 = StreamModule:append(Stream0, Save), + % Load immediate value into a0 + State1 = mov_immediate(State0#state{stream = Stream1}, a0, bnot (Val)), + Stream2 = State1#state.stream, + % Perform BICS operation (RISC-V: not + and) + I1 = jit_riscv32_asm:not_(a0, a0), + I2 = jit_riscv32_asm:and_(Reg, Reg, a0), + Stream3 = StreamModule:append(Stream2, <>), + % Restore a0 from t3 + Restore = jit_riscv32_asm:mv(a0, ?IP_REG), + Stream4 = StreamModule:append(Stream3, Restore), + {State0#state{stream = Stream4}, Reg}; +and_( + #state{stream_module = StreamModule, available_regs = []} = State0, + {free, Reg}, + Val +) -> + % No available registers, use a0 as temp and save it to t3 + Stream0 = State0#state.stream, + % Save a0 to t3 + Save = jit_riscv32_asm:mv(?IP_REG, a0), + Stream1 = StreamModule:append(Stream0, Save), + % Load immediate value into a0 + State1 = mov_immediate(State0#state{stream = Stream1}, a0, Val), + Stream2 = State1#state.stream, + % Perform ANDS operation + I = jit_riscv32_asm:and_(Reg, Reg, a0), + Stream3 = StreamModule:append(Stream2, I), + % Restore a0 from t3 + Restore = jit_riscv32_asm:mv(a0, ?IP_REG), + Stream4 = StreamModule:append(Stream3, Restore), + {State0#state{stream = Stream4}, Reg}; +and_( + #state{stream_module = StreamModule, available_regs = [ResultReg | AT], used_regs = UR} = + State0, + Reg, + ?TERM_PRIMARY_CLEAR_MASK +) -> + I = jit_riscv32_asm:andi(ResultReg, Reg, -4), + Stream1 = StreamModule:append(State0#state.stream, I), + {State0#state{stream = Stream1, available_regs = AT, used_regs = [ResultReg | UR]}, ResultReg}. + +or_( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + Reg, + Val +) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:or_(Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}. + +add(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, Val) when + Val >= 0 andalso Val =< 255 +-> + I = jit_riscv32_asm:addi(Reg, Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State0#state{stream = Stream1}; +add(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, Val) when + is_atom(Val) +-> + I = jit_riscv32_asm:add(Reg, Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State0#state{stream = Stream1}; +add(#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:add(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}. + +mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when + Val >= -16#800, Val =< 16#7FF +-> + % RISC-V li can handle 12-bit signed immediates in a single instruction (addi) + I = jit_riscv32_asm:li(Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> + % For values outside 12-bit range, li will use lui + addi (2 instructions) + % which is efficient enough, no need for literal pool + I = jit_riscv32_asm:li(Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}. + +sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when + Val >= 0 andalso Val =< 255 +-> + I1 = jit_riscv32_asm:addi(Reg, Reg, -Val), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when + is_atom(Val) +-> + I = jit_riscv32_asm:sub(Reg, Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +sub(#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:sub(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}. + +mul(State, _Reg, 1) -> + State; +mul(State, Reg, 2) -> + shift_left(State, Reg, 1); +mul(#state{available_regs = [Temp | _]} = State, Reg, 3) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 1), + I2 = jit_riscv32_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State, Reg, 4) -> + shift_left(State, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 5) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 2), + I2 = jit_riscv32_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State0, Reg, 6) -> + State1 = mul(State0, Reg, 3), + mul(State1, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 7) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 3), + I2 = jit_riscv32_asm:sub(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State, Reg, 8) -> + shift_left(State, Reg, 3); +mul(#state{available_regs = [Temp | _]} = State, Reg, 9) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 3), + I2 = jit_riscv32_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State0, Reg, 10) -> + State1 = mul(State0, Reg, 5), + mul(State1, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 15) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 4), + I2 = jit_riscv32_asm:sub(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State, Reg, 16) -> + shift_left(State, Reg, 4); +mul(State, Reg, 32) -> + shift_left(State, Reg, 5); +mul(State, Reg, 64) -> + shift_left(State, Reg, 6); +mul( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + Reg, + Val +) -> + % multiply by decomposing by power of 2 + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:mul(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{stream = Stream2, available_regs = [Temp | State1#state.available_regs]}. + +%% +%% Analysis of AArch64 pattern and RISC-V32 implementation: +%% +%% AArch64 layout (from call_ext_only_test): +%% 0x0-0x8: Decrement reductions, store back +%% 0xc: b.ne 0x20 ; Branch if reductions != 0 to continuation +%% 0x10-0x1c: adr/str/ldr/br sequence for scheduling next process +%% 0x20: [CONTINUATION POINT] - Actual function starts here +%% +%% RISC-V32 implementation (no prolog/epilog needed due to 32 registers): +%% 0x0-0x8: Decrement reductions, store back +%% 0xc: bne continuation ; Branch if reductions != 0 to continuation +%% 0x10-0x?: adr/sw/ldr/jalr sequence for scheduling next process +%% continuation: [actual function body] +%% +%% Key insight: With 32 registers, RISC-V32 doesn't need prolog/epilog like ARM Thumb. +%% When reductions != 0, we branch directly to continue execution. +%% When reductions == 0, we schedule the next process, and resume at the continuation point. +%% +-spec decrement_reductions_and_maybe_schedule_next(state()) -> state(). +decrement_reductions_and_maybe_schedule_next( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0 +) -> + % Load reduction count + I1 = jit_riscv32_asm:lw(Temp, ?JITSTATE_REG, ?JITSTATE_REDUCTIONCOUNT_OFFSET), + % Decrement reduction count + I2 = jit_riscv32_asm:addi(Temp, Temp, -1), + % Store back the decremented value + I3 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_REDUCTIONCOUNT_OFFSET), + Stream1 = StreamModule:append(Stream0, <>), + BNEOffset = StreamModule:offset(Stream1), + % Branch if reduction count is not zero + I4 = <<16#FFFFFFFF:32/little>>, + % Set continuation to the next instruction + ADROffset = BNEOffset + byte_size(I4), + % Use 8-byte placeholder (2 words of 0xFFFFFFFF) for pc_relative_address + % This ensures we can always rewrite with either auipc alone (4 bytes) or auipc+addi (8 bytes) + I5 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>, + I6 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), + % Append the instructions to the stream + Stream2 = StreamModule:append(Stream1, <>), + State1 = State0#state{stream = Stream2}, + State2 = call_primitive_last(State1, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]), + % Rewrite the branch and adr instructions + #state{stream = Stream3} = State2, + NewOffset = StreamModule:offset(Stream3), + NewI4 = jit_riscv32_asm:bne(Temp, zero, NewOffset - BNEOffset), + NewI5Offset = NewOffset - ADROffset, + % Generate the new pc_relative_address instruction, padding with NOP if needed + NewI5 = + case pc_relative_address(Temp, NewI5Offset) of + I when byte_size(I) =:= 4 -> + % Only auipc, pad with NOP (4 bytes) + <>; + I when byte_size(I) =:= 6 -> + % auipc + c.addi, pad with c.nop (2 bytes) + <>; + I when byte_size(I) =:= 8 -> + % auipc + addi, no padding needed + I + end, + Stream4 = StreamModule:replace( + Stream3, BNEOffset, <> + ), + merge_used_regs(State2#state{stream = Stream4}, State1#state.used_regs). + +-spec call_or_schedule_next(state(), non_neg_integer()) -> state(). +call_or_schedule_next(State0, Label) -> + {State1, RewriteOffset, TempReg} = set_cp(State0), + State2 = call_only_or_schedule_next(State1, Label), + rewrite_cp_offset(State2, RewriteOffset, TempReg). + +call_only_or_schedule_next( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + Label +) -> + % Load reduction count (jit_state is in a1) + I1 = jit_riscv32_asm:lw(Temp, ?JITSTATE_REG, ?JITSTATE_REDUCTIONCOUNT_OFFSET), + % Decrement reduction count + I2 = jit_riscv32_asm:addi(Temp, Temp, -1), + % Store back the decremented value + I3 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_REDUCTIONCOUNT_OFFSET), + Stream1 = StreamModule:append(Stream0, <>), + % Use trampoline technique: branch if zero (eq) to skip over the long branch + % If not zero, we want to continue execution at Label + % If zero, we want to fall through to scheduling code + + % Look up label once to avoid duplicate lookup in helper + LabelLookupResult = lists:keyfind(Label, 1, State0#state.labels), + + BccOffset = StreamModule:offset(Stream1), + + State4 = + case LabelLookupResult of + {Label, LabelOffset} -> + % Label is known, check if we can optimize the conditional branch + % After branch instruction + Rel = LabelOffset - BccOffset, + + if + Rel >= -4096 andalso Rel =< 4094 andalso (Rel rem 2) =:= 0 -> + % Near branch: use direct conditional branch (RISC-V has ±4KB range) + + % Branch if NOT zero (temp != 0) + I4 = jit_riscv32_asm:bne(Temp, zero, Rel), + Stream2 = StreamModule:append(Stream1, I4), + State0#state{stream = Stream2}; + true -> + % Far branch: use trampoline with helper + % Get the code block size for the far branch sequence that will follow + + % RISC-V branch is 4 bytes + FarSeqOffset = BccOffset + 4, + {State1, FarCodeBlock} = branch_to_label_code( + State0, FarSeqOffset, Label, LabelLookupResult + ), + FarSeqSize = byte_size(FarCodeBlock), + % Skip over the far branch sequence if zero (temp == 0) + I4 = jit_riscv32_asm:beq(Temp, zero, FarSeqSize + 4), + Stream2 = StreamModule:append(Stream1, I4), + Stream3 = StreamModule:append(Stream2, FarCodeBlock), + State1#state{stream = Stream3} + end; + false -> + % Label not known, get the far branch size for the skip + + % RISC-V branch is 4 bytes + FarSeqOffset = BccOffset + 4, + {State1, FarCodeBlock} = branch_to_label_code(State0, FarSeqOffset, Label, false), + FarSeqSize = byte_size(FarCodeBlock), + I4 = jit_riscv32_asm:beq(Temp, zero, FarSeqSize + 4), + Stream2 = StreamModule:append(Stream1, I4), + Stream3 = StreamModule:append(Stream2, FarCodeBlock), + State1#state{stream = Stream3} + end, + State5 = set_continuation_to_label(State4, Label), + call_primitive_last(State5, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]). + +call_primitive_with_cp(State0, Primitive, Args) -> + {State1, RewriteOffset, TempReg} = set_cp(State0), + State2 = call_primitive_last(State1, Primitive, Args), + rewrite_cp_offset(State2, RewriteOffset, TempReg). + +-spec set_cp(state()) -> {state(), non_neg_integer(), riscv32_register()}. +set_cp(#state{available_regs = [TempReg | AvailT], used_regs = UsedRegs} = State0) -> + % Reserve a temporary register for the offset BEFORE calling get_module_index + % to avoid running out of available registers + State0b = State0#state{available_regs = AvailT, used_regs = [TempReg | UsedRegs]}, + % get module index (dynamically) + { + #state{stream_module = StreamModule, stream = Stream0} = State1, + Reg + } = get_module_index( + State0b + ), + + Offset = StreamModule:offset(Stream0), + % build cp with module_index << 24 + I1 = jit_riscv32_asm:slli(Reg, Reg, 24), + % Reserve space for offset load instruction + % li can generate 1 instruction (4 bytes) for small immediates (< 2048) + % or 2 instructions (8 bytes) for large immediates + % Since we don't know the final CP value yet (it depends on code size), + % we must always reserve 2 instructions (8 bytes) to be safe + % The final CP value is (final_offset << 2), and final_offset is unknown + % Use 0xFFFFFFFF placeholders for flash compatibility (can only flip 1->0) + I2 = <<16#FFFFFFFF:32/little>>, + I3 = <<16#FFFFFFFF:32/little>>, + MOVOffset = Offset + byte_size(I1), + % OR the module index with the offset (loaded in temp register) + I4 = jit_riscv32_asm:or_(Reg, TempReg), + {BaseReg, Off} = ?CP, + I5 = jit_riscv32_asm:sw(BaseReg, Reg, Off), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State2 = State1#state{stream = Stream1}, + State3 = free_native_register(State2, Reg), + State4 = free_native_register(State3, TempReg), + {State4, MOVOffset, TempReg}. + +-spec rewrite_cp_offset(state(), non_neg_integer(), riscv32_register()) -> state(). +rewrite_cp_offset( + #state{stream_module = StreamModule, stream = Stream0, offset = CodeOffset} = State0, + RewriteOffset, + TempReg +) -> + NewOffset = StreamModule:offset(Stream0) - CodeOffset, + CPValue = NewOffset bsl 2, + NewMoveInstr = jit_riscv32_asm:li(TempReg, CPValue), + % We reserved 8 bytes (2 instructions) for the CP value + % Pad with NOP if needed to maintain alignment + PaddedInstr = + case byte_size(NewMoveInstr) of + 4 -> <>; + 6 -> <>; + 8 -> NewMoveInstr + end, + Stream1 = StreamModule:replace(Stream0, RewriteOffset, PaddedInstr), + State0#state{stream = Stream1}. + +set_bs( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + TermReg +) -> + {BaseReg1, Off1} = ?BS, + I1 = jit_riscv32_asm:sw(BaseReg1, TermReg, Off1), + I2 = jit_riscv32_asm:li(Temp, 0), + {BaseReg2, Off2} = ?BS_OFFSET, + I3 = jit_riscv32_asm:sw(BaseReg2, Temp, Off2), + Stream1 = StreamModule:append(Stream0, <>), + State0#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @param State current state +%% @param SortedLines line information, sorted by offset +%% @doc Build labels and line tables and encode a function that returns it. +%% In this case, the function returns the effective address of what immediately +%% follows. +%% @end +%% @return New state +%%----------------------------------------------------------------------------- +return_labels_and_lines( + #state{ + stream_module = StreamModule, + stream = Stream0, + labels = Labels + } = State, + SortedLines +) -> + SortedLabels = lists:keysort(2, [ + {Label, LabelOffset} + || {Label, LabelOffset} <- Labels, is_integer(Label) + ]), + + I2 = jit_riscv32_asm:ret(), + % Assume total size is 10 bytes (8-byte I1 + 2-byte c.ret) + % If actual is 8 bytes (6-byte I1 + 2-byte c.ret), we'll pad with 2 bytes + I1 = pc_relative_address(a0, 10), + Prologue = <>, + ProloguePadded = + case byte_size(Prologue) of + 10 -> Prologue; + % 2-byte padding + 8 -> <> + end, + LabelsTable = <<<> || {Label, Offset} <- SortedLabels>>, + LinesTable = <<<> || {Line, Offset} <- SortedLines>>, + Stream1 = StreamModule:append( + Stream0, + <> + ), + State#state{stream = Stream1}. + +%% @doc Generate PC-relative address calculation using AUIPC + ADDI +%% This replaces the ARM-style 'adr' pseudo-instruction with native RISC-V instructions +-spec pc_relative_address(riscv32_register(), integer()) -> binary(). +pc_relative_address(Rd, 0) -> + % Simple case: just get current PC + jit_riscv32_asm:auipc(Rd, 0); +pc_relative_address(Rd, Offset) -> + % PC-relative address calculation + % Split offset into upper 20 bits and lower 12 bits + % AUIPC can represent offsets in range: (-524288 << 12) to (524287 << 12) + % Combined with ADDI: (-524288 << 12) - 2048 to (524287 << 12) + 2047 + Lower = Offset band 16#FFF, + % Sign extend lower 12 bits + LowerSigned = + if + Lower >= 16#800 -> Lower - 16#1000; + true -> Lower + end, + % Compute upper 20 bits, adjusting if lower is negative + % Use arithmetic right shift (bsr) which preserves sign in Erlang + Upper = + if + LowerSigned < 0 -> + (Offset bsr 12) + 1; + true -> + Offset bsr 12 + end, + % Validate that Upper is in valid range for AUIPC + if + Upper < -16#80000; Upper > 16#7FFFF -> + error({offset_out_of_range, Offset, Upper, -16#80000, 16#7FFFF}); + true -> + ok + end, + case {Upper, LowerSigned} of + {0, 0} -> + % Zero offset + jit_riscv32_asm:auipc(Rd, 0); + {0, _} -> + % Only lower bits needed: auipc + addi + AuipcInstr = jit_riscv32_asm:auipc(Rd, 0), + AddiInstr = jit_riscv32_asm:addi(Rd, Rd, LowerSigned), + <>; + {_, 0} -> + % Only upper bits needed + jit_riscv32_asm:auipc(Rd, Upper); + {_, _} -> + % Both upper and lower bits + AuipcInstr = jit_riscv32_asm:auipc(Rd, Upper), + AddiInstr = jit_riscv32_asm:addi(Rd, Rd, LowerSigned), + <> + end. + +%% Helper function to generate str instruction with y_reg offset, handling large offsets +str_y_reg(SrcReg, Y, TempReg, _AvailableRegs) when Y * 4 =< 124 -> + % Small offset - use immediate addressing + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg, BaseReg, Off), + I2 = jit_riscv32_asm:sw(TempReg, SrcReg, Y * 4), + <>; +str_y_reg(SrcReg, Y, TempReg1, [TempReg2 | _]) -> + % Large offset - use register arithmetic with second available register + Offset = Y * 4, + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg1, BaseReg, Off), + I2 = jit_riscv32_asm:li(TempReg2, Offset), + I3 = jit_riscv32_asm:add(TempReg2, TempReg2, TempReg1), + I4 = jit_riscv32_asm:sw(TempReg2, SrcReg, 0), + <>; +str_y_reg(SrcReg, Y, TempReg1, []) -> + % Large offset - no additional registers available, use IP_REG as second temp + Offset = Y * 4, + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg1, BaseReg, Off), + I2 = jit_riscv32_asm:mv(?IP_REG, TempReg1), + I3 = jit_riscv32_asm:li(TempReg1, Offset), + I4 = jit_riscv32_asm:add(TempReg1, TempReg1, ?IP_REG), + I5 = jit_riscv32_asm:sw(TempReg1, SrcReg, 0), + <>. + +%% Helper function to generate ldr instruction with y_reg offset, handling large offsets +ldr_y_reg(DstReg, Y, [TempReg | _]) when Y * 4 =< 124 -> + % Small offset - use immediate addressing + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg, BaseReg, Off), + I2 = jit_riscv32_asm:lw(DstReg, TempReg, Y * 4), + <>; +ldr_y_reg(DstReg, Y, [TempReg | _]) -> + % Large offset - use DstReg as second temp register for arithmetic + Offset = Y * 4, + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg, BaseReg, Off), + I2 = jit_riscv32_asm:li(DstReg, Offset), + I3 = jit_riscv32_asm:add(DstReg, DstReg, TempReg), + I4 = jit_riscv32_asm:lw(DstReg, DstReg, 0), + <>; +ldr_y_reg(DstReg, Y, []) when Y * 4 =< 124 -> + % Small offset, no registers available - use DstReg as temp + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(DstReg, BaseReg, Off), + I2 = jit_riscv32_asm:lw(DstReg, DstReg, Y * 4), + <>; +ldr_y_reg(DstReg, Y, []) -> + % Large offset, no registers available - use IP_REG as temp register + % Note: IP_REG (t3) can only be used with mov, not ldr directly + Offset = Y * 4, + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(DstReg, BaseReg, Off), + I2 = jit_riscv32_asm:mv(?IP_REG, DstReg), + I3 = jit_riscv32_asm:li(DstReg, Offset), + I4 = jit_riscv32_asm:add(DstReg, DstReg, ?IP_REG), + I5 = jit_riscv32_asm:lw(DstReg, DstReg, 0), + <>. + +free_reg(AvailableRegs0, UsedRegs0, Reg) when ?IS_GPR(Reg) -> + AvailableRegs1 = free_reg0(?AVAILABLE_REGS, AvailableRegs0, Reg, []), + true = lists:member(Reg, UsedRegs0), + UsedRegs1 = lists:delete(Reg, UsedRegs0), + {AvailableRegs1, UsedRegs1}. + +free_reg0([Reg | _SortedT], PrevRegs0, Reg, Acc) -> + lists:reverse(Acc, [Reg | PrevRegs0]); +free_reg0([PrevReg | SortedT], [PrevReg | PrevT], Reg, Acc) -> + free_reg0(SortedT, PrevT, Reg, [PrevReg | Acc]); +free_reg0([_Other | SortedT], PrevRegs, Reg, Acc) -> + free_reg0(SortedT, PrevRegs, Reg, Acc). + +args_regs(Args) -> + lists:map( + fun + ({free, {ptr, Reg}}) -> Reg; + ({free, Reg}) when is_atom(Reg) -> Reg; + ({free, Imm}) when is_integer(Imm) -> imm; + (offset) -> imm; + (ctx) -> ?CTX_REG; + (jit_state) -> jit_state; + (jit_state_tail_call) -> jit_state; + (stack) -> stack; + (Reg) when is_atom(Reg) -> Reg; + (Imm) when is_integer(Imm) -> imm; + ({ptr, Reg}) -> Reg; + ({x_reg, _}) -> ?CTX_REG; + ({y_reg, _}) -> ?CTX_REG; + ({fp_reg, _}) -> ?CTX_REG; + ({free, {x_reg, _}}) -> ?CTX_REG; + ({free, {y_reg, _}}) -> ?CTX_REG; + ({free, {fp_reg, _}}) -> ?CTX_REG; + ({avm_int64_t, _}) -> imm + end, + Args + ). + +%%----------------------------------------------------------------------------- +%% @doc Add a label at the current offset. +%% @end +%% @param State current backend state +%% @param Label the label number or reference +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec add_label(state(), integer() | reference()) -> state(). +add_label(#state{stream_module = StreamModule, stream = Stream0} = State0, Label) -> + Offset0 = StreamModule:offset(Stream0), + add_label(State0, Label, Offset0). + +%%----------------------------------------------------------------------------- +%% @doc Add a label at a specific offset +%% @end +%% @param State current backend state +%% @param Label the label number or reference +%% @param Offset the explicit offset for this label +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec add_label(state(), integer() | reference(), integer()) -> state(). +add_label( + #state{ + stream_module = StreamModule, + stream = Stream0, + jump_table_start = JumpTableStart, + branches = Branches, + labels = Labels + } = State, + Label, + LabelOffset +) when is_integer(Label) -> + % Patch the jump table entry immediately + % Each jump table entry is AUIPC + JALR (8 bytes) + JumpTableEntryOffset = JumpTableStart + Label * 8, + + % Calculate PC-relative offset from AUIPC instruction to target + PCRelOffset = LabelOffset - JumpTableEntryOffset, + + % Split into upper 20 bits and lower 12 bits + % RISC-V encodes: target = PC + (upper20 << 12) + sign_ext(lower12) + % If lower12 >= 0x800, it's negative when sign-extended, so add 1 to upper + Upper20 = (PCRelOffset + 16#800) bsr 12, + Lower12 = PCRelOffset band 16#FFF, + % Sign-extend lower 12 bits for JALR immediate + Lower12Signed = + if + Lower12 >= 16#800 -> Lower12 - 16#1000; + true -> Lower12 + end, + + % Encode AUIPC and JALR with computed offsets + I1 = jit_riscv32_asm:auipc(a3, Upper20), + I2 = jit_riscv32_asm:jalr(zero, a3, Lower12Signed), + % Create 8-byte jump table entry + JumpTableEntry = <>, + PaddedEntry = + case byte_size(JumpTableEntry) of + 6 -> <>; + 8 -> JumpTableEntry + end, + + Stream1 = StreamModule:replace(Stream0, JumpTableEntryOffset, PaddedEntry), + + % Eagerly patch any branches targeting this label + {Stream2, RemainingBranches} = patch_branches_for_label( + StreamModule, + Stream1, + Label, + LabelOffset, + Branches + ), + + State#state{ + stream = Stream2, branches = RemainingBranches, labels = [{Label, LabelOffset} | Labels] + }; +add_label(#state{labels = Labels} = State, Label, Offset) -> + State#state{labels = [{Label, Offset} | Labels]}. diff --git a/libs/jit/src/jit_riscv32_asm.erl b/libs/jit/src/jit_riscv32_asm.erl new file mode 100644 index 0000000000..25bf1ff574 --- /dev/null +++ b/libs/jit/src/jit_riscv32_asm.erl @@ -0,0 +1,1802 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_riscv32_asm). + +-export([ + % R-type arithmetic and logical instructions + add/3, + sub/3, + and_/3, + or_/2, + or_/3, + xor_/3, + sll/3, + srl/3, + sra/3, + slt/3, + sltu/3, + % I-type immediate instructions + addi/3, + andi/3, + ori/3, + xori/3, + slli/3, + srli/3, + srai/3, + slti/3, + sltiu/3, + % Load instructions + lw/2, + lw/3, + lh/2, + lh/3, + lhu/2, + lhu/3, + lb/2, + lb/3, + lbu/2, + lbu/3, + % Store instructions + sw/2, + sw/3, + sh/2, + sh/3, + sb/2, + sb/3, + % Branch instructions + beq/3, + bne/3, + blt/3, + bge/3, + bltu/3, + bgeu/3, + % Jump instructions + jal/2, + jalr/3, + jalr/2, + % Upper immediate instructions + lui/2, + auipc/2, + % Pseudo-instructions + nop/0, + li/2, + mv/2, + not_/2, + neg/2, + j/1, + jr/1, + ret/0, + call/2, + % M extension (multiply/divide) + mul/3, + % C extension (compressed) - arithmetic/logical + c_add/2, + c_sub/2, + c_and/2, + c_or/2, + c_xor/2, + c_mv/2, + % C extension - immediate instructions + c_addi/2, + c_andi/2, + c_li/2, + c_lui/2, + c_addi16sp/1, + c_addi4spn/2, + % C extension - shift instructions + c_slli/2, + c_srli/2, + c_srai/2, + % C extension - load/store + c_lw/2, + c_sw/2, + c_lwsp/2, + c_swsp/2, + % C extension - branches and jumps + c_beqz/2, + c_bnez/2, + c_j/1, + c_jal/1, + c_jr/1, + c_jalr/1, + % C extension - system instructions + c_ebreak/0, + % C extension - pseudo-instructions + c_nop/0 +]). + +-export_type([ + riscv_register/0 +]). + +%% RISC-V 32-bit (RV32I) Assembler +%% +%% This module provides an assembler for the RISC-V 32-bit instruction set. +%% It generates binary machine code for RISC-V instructions following the +%% RV32I base integer instruction set architecture. +%% +%% RISC-V Register Set (32 registers): +%% x0 (zero) - Hardwired zero (reads as 0, writes ignored) +%% x1 (ra) - Return address +%% x2 (sp) - Stack pointer +%% x3 (gp) - Global pointer +%% x4 (tp) - Thread pointer +%% x5 (t0) - Temporary register 0 +%% x6 (t1) - Temporary register 1 +%% x7 (t2) - Temporary register 2 +%% x8 (s0/fp)- Saved register 0 / Frame pointer +%% x9 (s1) - Saved register 1 +%% x10 (a0) - Function argument 0 / Return value 0 +%% x11 (a1) - Function argument 1 / Return value 1 +%% x12 (a2) - Function argument 2 +%% x13 (a3) - Function argument 3 +%% x14 (a4) - Function argument 4 +%% x15 (a5) - Function argument 5 +%% x16 (a6) - Function argument 6 +%% x17 (a7) - Function argument 7 +%% x18 (s2) - Saved register 2 +%% x19 (s3) - Saved register 3 +%% x20 (s4) - Saved register 4 +%% x21 (s5) - Saved register 5 +%% x22 (s6) - Saved register 6 +%% x23 (s7) - Saved register 7 +%% x24 (s8) - Saved register 8 +%% x25 (s9) - Saved register 9 +%% x26 (s10) - Saved register 10 +%% x27 (s11) - Saved register 11 +%% x28 (t3) - Temporary register 3 +%% x29 (t4) - Temporary register 4 +%% x30 (t5) - Temporary register 5 +%% x31 (t6) - Temporary register 6 +%% +%% RISC-V Calling Convention (ILP32): +%% - Arguments: a0-a7 (x10-x17) +%% - Return values: a0-a1 (x10-x11) +%% - Caller-saved: t0-t6, a0-a7 +%% - Callee-saved: s0-s11, sp, ra +%% - Stack grows downward +%% - Stack must be 16-byte aligned at function call boundaries +%% +%% Instruction Encoding: +%% All RV32I instructions are 32 bits (4 bytes). +%% Bit ordering is little-endian within each 32-bit word. +%% +%% See: RISC-V Instruction Set Manual, Volume I: User-Level ISA +%% https://riscv.org/technical/specifications/ +%% https://github.com/riscv/riscv-isa-manual/ + +-type riscv_register() :: + zero + | ra + | sp + | gp + | tp + | t0 + | t1 + | t2 + | s0 + | fp + | s1 + | a0 + | a1 + | a2 + | a3 + | a4 + | a5 + | a6 + | a7 + | s2 + | s3 + | s4 + | s5 + | s6 + | s7 + | s8 + | s9 + | s10 + | s11 + | t3 + | t4 + | t5 + | t6. + +%%----------------------------------------------------------------------------- +%% Helper functions +%%----------------------------------------------------------------------------- + +%% Convert register atoms to register numbers (0-31) +-spec reg_to_num(riscv_register()) -> 0..31. +% ABI names +reg_to_num(zero) -> 0; +reg_to_num(ra) -> 1; +reg_to_num(sp) -> 2; +reg_to_num(gp) -> 3; +reg_to_num(tp) -> 4; +reg_to_num(t0) -> 5; +reg_to_num(t1) -> 6; +reg_to_num(t2) -> 7; +reg_to_num(s0) -> 8; +reg_to_num(fp) -> 8; +reg_to_num(s1) -> 9; +reg_to_num(a0) -> 10; +reg_to_num(a1) -> 11; +reg_to_num(a2) -> 12; +reg_to_num(a3) -> 13; +reg_to_num(a4) -> 14; +reg_to_num(a5) -> 15; +reg_to_num(a6) -> 16; +reg_to_num(a7) -> 17; +reg_to_num(s2) -> 18; +reg_to_num(s3) -> 19; +reg_to_num(s4) -> 20; +reg_to_num(s5) -> 21; +reg_to_num(s6) -> 22; +reg_to_num(s7) -> 23; +reg_to_num(s8) -> 24; +reg_to_num(s9) -> 25; +reg_to_num(s10) -> 26; +reg_to_num(s11) -> 27; +reg_to_num(t3) -> 28; +reg_to_num(t4) -> 29; +reg_to_num(t5) -> 30; +reg_to_num(t6) -> 31. + +%%----------------------------------------------------------------------------- +%% R-type instruction encoding +%%----------------------------------------------------------------------------- + +%% R-type instruction format: +%% funct7 (7) | rs2 (5) | rs1 (5) | funct3 (3) | rd (5) | opcode (7) +%% Bits: 31-25 24-20 19-15 14-12 11-7 6-0 + +-spec encode_r_type( + Opcode :: 0..127, + Rd :: riscv_register(), + Funct3 :: 0..7, + Rs1 :: riscv_register(), + Rs2 :: riscv_register(), + Funct7 :: 0..127 +) -> binary(). +encode_r_type(Opcode, Rd, Funct3, Rs1, Rs2, Funct7) -> + RdNum = reg_to_num(Rd), + Rs1Num = reg_to_num(Rs1), + Rs2Num = reg_to_num(Rs2), + Instr = + (Funct7 bsl 25) bor + (Rs2Num bsl 20) bor + (Rs1Num bsl 15) bor + (Funct3 bsl 12) bor + (RdNum bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% R-type arithmetic and logical instructions +%%----------------------------------------------------------------------------- + +%% ADD - Add +%% rd = rs1 + rs2 +-spec add(riscv_register(), riscv_register(), riscv_register()) -> binary(). +add(Rd, Rs1, Rs2) when Rd =:= Rs1, Rd =/= zero, Rs2 =/= zero -> + % Use c.add when rd == rs1 and neither register is zero + c_add(Rd, Rs2); +add(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#00). + +%% SUB - Subtract +%% rd = rs1 - rs2 +-spec sub(riscv_register(), riscv_register(), riscv_register()) -> binary(). +sub(Rd, Rs1, Rs2) when Rd =:= Rs1 -> + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of + true -> c_sub(Rd, Rs2); + false -> encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#20) + end; +sub(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0100000 + encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#20). + +%% AND - Bitwise AND +%% rd = rs1 & rs2 +-spec and_(riscv_register(), riscv_register(), riscv_register()) -> binary(). +and_(Rd, Rs1, Rs2) when Rd =:= Rs1 -> + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of + true -> c_and(Rd, Rs2); + false -> encode_r_type(16#33, Rd, 16#7, Rs1, Rs2, 16#00) + end; +and_(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 111, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#7, Rs1, Rs2, 16#00). + +%% OR - Bitwise OR +%% rd = rs1 | rs2 +-spec or_(riscv_register(), riscv_register(), riscv_register()) -> binary(). +or_(Rd, Rs1, Rs2) when Rd =:= Rs1 -> + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of + true -> c_or(Rd, Rs2); + false -> encode_r_type(16#33, Rd, 16#6, Rs1, Rs2, 16#00) + end; +or_(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 110, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#6, Rs1, Rs2, 16#00). + +%% OR - Bitwise OR (in-place) +%% rd = rd | rs +-spec or_(riscv_register(), riscv_register()) -> binary(). +or_(Rd, Rs) -> + or_(Rd, Rd, Rs). + +%% XOR - Bitwise XOR +%% rd = rs1 ^ rs2 +-spec xor_(riscv_register(), riscv_register(), riscv_register()) -> binary(). +xor_(Rd, Rs1, Rs2) when Rd =:= Rs1 -> + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of + true -> c_xor(Rd, Rs2); + false -> encode_r_type(16#33, Rd, 16#4, Rs1, Rs2, 16#00) + end; +xor_(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 100, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#4, Rs1, Rs2, 16#00). + +%% SLL - Shift Left Logical +%% rd = rs1 << rs2[4:0] +-spec sll(riscv_register(), riscv_register(), riscv_register()) -> binary(). +sll(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 001, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#1, Rs1, Rs2, 16#00). + +%% SRL - Shift Right Logical +%% rd = rs1 >> rs2[4:0] (zero-extend) +-spec srl(riscv_register(), riscv_register(), riscv_register()) -> binary(). +srl(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 101, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#5, Rs1, Rs2, 16#00). + +%% SRA - Shift Right Arithmetic +%% rd = rs1 >> rs2[4:0] (sign-extend) +-spec sra(riscv_register(), riscv_register(), riscv_register()) -> binary(). +sra(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 101, Funct7: 0100000 + encode_r_type(16#33, Rd, 16#5, Rs1, Rs2, 16#20). + +%% SLT - Set Less Than +%% rd = (rs1 < rs2) ? 1 : 0 (signed) +-spec slt(riscv_register(), riscv_register(), riscv_register()) -> binary(). +slt(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 010, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#2, Rs1, Rs2, 16#00). + +%% SLTU - Set Less Than Unsigned +%% rd = (rs1 < rs2) ? 1 : 0 (unsigned) +-spec sltu(riscv_register(), riscv_register(), riscv_register()) -> binary(). +sltu(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 011, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#3, Rs1, Rs2, 16#00). + +%%----------------------------------------------------------------------------- +%% I-type instruction encoding +%%----------------------------------------------------------------------------- + +%% I-type instruction format: +%% imm[11:0] (12) | rs1 (5) | funct3 (3) | rd (5) | opcode (7) +%% Bits: 31-20 19-15 14-12 11-7 6-0 + +-spec encode_i_type( + Opcode :: 0..127, + Rd :: riscv_register(), + Funct3 :: 0..7, + Rs1 :: riscv_register(), + Imm :: integer() +) -> binary(). +encode_i_type(Opcode, Rd, Funct3, Rs1, Imm) -> + RdNum = reg_to_num(Rd), + Rs1Num = reg_to_num(Rs1), + % Sign-extend and mask to 12 bits + ImmMasked = Imm band 16#FFF, + Instr = + (ImmMasked bsl 20) bor + (Rs1Num bsl 15) bor + (Funct3 bsl 12) bor + (RdNum bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% I-type immediate arithmetic and logical instructions +%%----------------------------------------------------------------------------- + +%% ADDI - Add Immediate +%% rd = rs1 + imm +-spec addi(riscv_register(), riscv_register(), integer()) -> binary(). +addi(Rd, Rs1, Imm) when Rd =:= Rs1, Rd =/= zero, Imm >= -32, Imm =< 31 -> + % Use c.addi when rd == rs1, rd != zero, and imm fits in 6 bits (signed) + c_addi(Rd, Imm); +addi(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 000 + encode_i_type(16#13, Rd, 16#0, Rs1, Imm); +addi(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% ANDI - AND Immediate +%% rd = rs1 & imm +-spec andi(riscv_register(), riscv_register(), integer()) -> binary(). +andi(Rd, Rs1, Imm) when Rd =:= Rs1, Imm >= -32, Imm =< 31 -> + case is_compressed_reg(Rd) of + true -> c_andi(Rd, Imm); + false -> encode_i_type(16#13, Rd, 16#7, Rs1, Imm) + end; +andi(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 111 + encode_i_type(16#13, Rd, 16#7, Rs1, Imm); +andi(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% ORI - OR Immediate +%% rd = rs1 | imm +-spec ori(riscv_register(), riscv_register(), integer()) -> binary(). +ori(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 110 + encode_i_type(16#13, Rd, 16#6, Rs1, Imm); +ori(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% XORI - XOR Immediate +%% rd = rs1 ^ imm +-spec xori(riscv_register(), riscv_register(), integer()) -> binary(). +xori(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 100 + encode_i_type(16#13, Rd, 16#4, Rs1, Imm); +xori(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% SLTI - Set Less Than Immediate +%% rd = (rs1 < imm) ? 1 : 0 (signed) +-spec slti(riscv_register(), riscv_register(), integer()) -> binary(). +slti(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 010 + encode_i_type(16#13, Rd, 16#2, Rs1, Imm); +slti(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% SLTIU - Set Less Than Immediate Unsigned +%% rd = (rs1 < imm) ? 1 : 0 (unsigned) +-spec sltiu(riscv_register(), riscv_register(), integer()) -> binary(). +sltiu(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 011 + encode_i_type(16#13, Rd, 16#3, Rs1, Imm); +sltiu(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%%----------------------------------------------------------------------------- +%% I-type immediate shift instructions +%%----------------------------------------------------------------------------- + +%% SLLI - Shift Left Logical Immediate +%% rd = rs1 << shamt +-spec slli(riscv_register(), riscv_register(), 0..31) -> binary(). +slli(Rd, Rs1, Shamt) when Rd =:= Rs1, Rd =/= zero, Shamt >= 1, Shamt =< 31 -> + % Use c.slli when rd == rs1, rd != zero, and shamt != 0 (c.slli with shamt=0 is reserved) + c_slli(Rd, Shamt); +slli(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 -> + % Opcode: 0010011 (0x13), Funct3: 001, Imm[11:5] = 0000000 + encode_i_type(16#13, Rd, 16#1, Rs1, Shamt); +slli(_Rd, _Rs1, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 31}). + +%% SRLI - Shift Right Logical Immediate +%% rd = rs1 >> shamt (zero-extend) +-spec srli(riscv_register(), riscv_register(), 0..31) -> binary(). +srli(Rd, Rs1, Shamt) when Rd =:= Rs1, Shamt >= 0, Shamt =< 31 -> + case is_compressed_reg(Rd) of + true -> c_srli(Rd, Shamt); + false -> encode_i_type(16#13, Rd, 16#5, Rs1, Shamt) + end; +srli(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 -> + % Opcode: 0010011 (0x13), Funct3: 101, Imm[11:5] = 0000000 + encode_i_type(16#13, Rd, 16#5, Rs1, Shamt); +srli(_Rd, _Rs1, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 31}). + +%% SRAI - Shift Right Arithmetic Immediate +%% rd = rs1 >> shamt (sign-extend) +-spec srai(riscv_register(), riscv_register(), 0..31) -> binary(). +srai(Rd, Rs1, Shamt) when Rd =:= Rs1, Shamt >= 0, Shamt =< 31 -> + case is_compressed_reg(Rd) of + true -> + c_srai(Rd, Shamt); + false -> + ImmWithBit30 = Shamt bor (1 bsl 10), + encode_i_type(16#13, Rd, 16#5, Rs1, ImmWithBit30) + end; +srai(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 -> + % Opcode: 0010011 (0x13), Funct3: 101, Imm[11:5] = 0100000 + % The encoding uses bit 30 (Imm[10]) to distinguish SRAI from SRLI + ImmWithBit30 = Shamt bor (1 bsl 10), + encode_i_type(16#13, Rd, 16#5, Rs1, ImmWithBit30); +srai(_Rd, _Rs1, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 31}). + +%%----------------------------------------------------------------------------- +%% Load instructions (I-type) +%%----------------------------------------------------------------------------- + +%% LW - Load Word +%% rd = mem[rs1 + offset] (32-bit) +-spec lw({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lw(Rd, {Rs1, Offset}) -> + lw(Rd, Rs1, Offset); +lw(Rd, Rs1) when is_atom(Rs1) -> + lw(Rd, Rs1, 0). + +-spec lw(riscv_register(), riscv_register(), integer()) -> binary(). +lw(Rd, sp, Offset) when Rd =/= zero, Offset >= 0, Offset =< 252, Offset rem 4 =:= 0 -> + % Use c.lwsp for loads from sp with aligned offset in range + c_lwsp(Rd, Offset); +lw(Rd, Rs1, Offset) when Offset >= 0, Offset =< 124, Offset rem 4 =:= 0 -> + % Use c.lw when both registers are in compressed set and offset is aligned + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs1) of + true -> c_lw(Rd, {Rs1, Offset}); + false -> encode_i_type(16#03, Rd, 16#2, Rs1, Offset) + end; +lw(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 010 + encode_i_type(16#03, Rd, 16#2, Rs1, Offset); +lw(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% LH - Load Halfword (sign-extended) +%% rd = sign_extend(mem[rs1 + offset][15:0]) +-spec lh({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lh(Rd, {Rs1, Offset}) -> + lh(Rd, Rs1, Offset); +lh(Rd, Rs1) when is_atom(Rs1) -> + lh(Rd, Rs1, 0). + +-spec lh(riscv_register(), riscv_register(), integer()) -> binary(). +lh(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 001 + encode_i_type(16#03, Rd, 16#1, Rs1, Offset); +lh(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% LHU - Load Halfword Unsigned (zero-extended) +%% rd = zero_extend(mem[rs1 + offset][15:0]) +-spec lhu({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lhu(Rd, {Rs1, Offset}) -> + lhu(Rd, Rs1, Offset); +lhu(Rd, Rs1) when is_atom(Rs1) -> + lhu(Rd, Rs1, 0). + +-spec lhu(riscv_register(), riscv_register(), integer()) -> binary(). +lhu(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 101 + encode_i_type(16#03, Rd, 16#5, Rs1, Offset); +lhu(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% LB - Load Byte (sign-extended) +%% rd = sign_extend(mem[rs1 + offset][7:0]) +-spec lb({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lb(Rd, {Rs1, Offset}) -> + lb(Rd, Rs1, Offset); +lb(Rd, Rs1) when is_atom(Rs1) -> + lb(Rd, Rs1, 0). + +-spec lb(riscv_register(), riscv_register(), integer()) -> binary(). +lb(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 000 + encode_i_type(16#03, Rd, 16#0, Rs1, Offset); +lb(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% LBU - Load Byte Unsigned (zero-extended) +%% rd = zero_extend(mem[rs1 + offset][7:0]) +-spec lbu({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lbu(Rd, {Rs1, Offset}) -> + lbu(Rd, Rs1, Offset); +lbu(Rd, Rs1) when is_atom(Rs1) -> + lbu(Rd, Rs1, 0). + +-spec lbu(riscv_register(), riscv_register(), integer()) -> binary(). +lbu(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 100 + encode_i_type(16#03, Rd, 16#4, Rs1, Offset); +lbu(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%%----------------------------------------------------------------------------- +%% S-type instruction encoding (for stores) +%%----------------------------------------------------------------------------- + +%% S-type instruction format: +%% imm[11:5] (7) | rs2 (5) | rs1 (5) | funct3 (3) | imm[4:0] (5) | opcode (7) +%% Bits: 31-25 24-20 19-15 14-12 11-7 6-0 + +-spec encode_s_type( + Opcode :: 0..127, + Funct3 :: 0..7, + Rs1 :: riscv_register(), + Rs2 :: riscv_register(), + Imm :: integer() +) -> binary(). +encode_s_type(Opcode, Funct3, Rs1, Rs2, Imm) -> + Rs1Num = reg_to_num(Rs1), + Rs2Num = reg_to_num(Rs2), + % Split immediate: imm[11:5] goes to bits 31-25, imm[4:0] goes to bits 11-7 + ImmMasked = Imm band 16#FFF, + Imm11_5 = (ImmMasked bsr 5) band 16#7F, + Imm4_0 = ImmMasked band 16#1F, + Instr = + (Imm11_5 bsl 25) bor + (Rs2Num bsl 20) bor + (Rs1Num bsl 15) bor + (Funct3 bsl 12) bor + (Imm4_0 bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% Store instructions (S-type) +%%----------------------------------------------------------------------------- + +%% SW - Store Word +%% mem[rs1 + offset] = rs2[31:0] +-spec sw({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +sw(Rs2, {Rs1, Offset}) -> + sw(Rs1, Rs2, Offset); +sw(Rs2, Rs1) when is_atom(Rs1) -> + sw(Rs1, Rs2, 0). + +-spec sw(riscv_register(), riscv_register(), integer()) -> binary(). +sw(sp, Rs2, Offset) when Offset >= 0, Offset =< 252, Offset rem 4 =:= 0 -> + % Use c.swsp for stores to sp with aligned offset in range + c_swsp(Rs2, Offset); +sw(Rs1, Rs2, Offset) when Offset >= 0, Offset =< 124, Offset rem 4 =:= 0 -> + % Use c.sw when both registers are in compressed set and offset is aligned + case is_compressed_reg(Rs1) andalso is_compressed_reg(Rs2) of + true -> c_sw(Rs2, {Rs1, Offset}); + false -> encode_s_type(16#23, 16#2, Rs1, Rs2, Offset) + end; +sw(Rs1, Rs2, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0100011 (0x23), Funct3: 010 + encode_s_type(16#23, 16#2, Rs1, Rs2, Offset); +sw(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% SH - Store Halfword +%% mem[rs1 + offset][15:0] = rs2[15:0] +-spec sh({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +sh(Rs2, {Rs1, Offset}) -> + sh(Rs1, Rs2, Offset); +sh(Rs2, Rs1) when is_atom(Rs1) -> + sh(Rs1, Rs2, 0). + +-spec sh(riscv_register(), riscv_register(), integer()) -> binary(). +sh(Rs1, Rs2, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0100011 (0x23), Funct3: 001 + encode_s_type(16#23, 16#1, Rs1, Rs2, Offset); +sh(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% SB - Store Byte +%% mem[rs1 + offset][7:0] = rs2[7:0] +-spec sb({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +sb(Rs2, {Rs1, Offset}) -> + sb(Rs1, Rs2, Offset); +sb(Rs2, Rs1) when is_atom(Rs1) -> + sb(Rs1, Rs2, 0). + +-spec sb(riscv_register(), riscv_register(), integer()) -> binary(). +sb(Rs1, Rs2, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0100011 (0x23), Funct3: 000 + encode_s_type(16#23, 16#0, Rs1, Rs2, Offset); +sb(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%%----------------------------------------------------------------------------- +%% B-type instruction encoding (for branches) +%%----------------------------------------------------------------------------- + +%% B-type instruction format: +%% imm[12|10:5] (7) | rs2 (5) | rs1 (5) | funct3 (3) | imm[4:1|11] (5) | opcode (7) +%% Bits: 31-25 24-20 19-15 14-12 11-7 6-0 +%% +%% The immediate is split across the instruction and represents a signed offset +%% in multiples of 2 bytes (must be 2-byte aligned). +%% Range: ±4 KiB (±4096 bytes) + +-spec encode_b_type( + Opcode :: 0..127, + Funct3 :: 0..7, + Rs1 :: riscv_register(), + Rs2 :: riscv_register(), + Offset :: integer() +) -> binary(). +encode_b_type(Opcode, Funct3, Rs1, Rs2, Offset) -> + Rs1Num = reg_to_num(Rs1), + Rs2Num = reg_to_num(Rs2), + % Offset must be 2-byte aligned and in range [-4096, 4094] + % Extract bits: imm[12], imm[10:5], imm[4:1], imm[11] + OffsetMasked = Offset band 16#1FFF, + % imm[12] -> bit 31 + Imm12 = (OffsetMasked bsr 12) band 1, + % imm[10:5] -> bits 30-25 + Imm10_5 = (OffsetMasked bsr 5) band 16#3F, + % imm[4:1] -> bits 11-8 + Imm4_1 = (OffsetMasked bsr 1) band 16#F, + % imm[11] -> bit 7 + Imm11 = (OffsetMasked bsr 11) band 1, + Instr = + (Imm12 bsl 31) bor + (Imm10_5 bsl 25) bor + (Rs2Num bsl 20) bor + (Rs1Num bsl 15) bor + (Funct3 bsl 12) bor + (Imm4_1 bsl 8) bor + (Imm11 bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% Branch instructions (B-type) +%%----------------------------------------------------------------------------- + +%% BEQ - Branch if Equal +%% if (rs1 == rs2) pc += offset +-spec beq(riscv_register(), riscv_register(), integer()) -> binary(). +beq(Rs1, zero, Offset) when Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0 -> + % Use c.beqz when comparing with zero and offset fits + case is_compressed_reg(Rs1) of + true -> c_beqz(Rs1, Offset); + false -> encode_b_type(16#63, 16#0, Rs1, zero, Offset) + end; +beq(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 000 + encode_b_type(16#63, 16#0, Rs1, Rs2, Offset); +beq(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +beq(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BNE - Branch if Not Equal +%% if (rs1 != rs2) pc += offset +-spec bne(riscv_register(), riscv_register(), integer()) -> binary(). +bne(Rs1, zero, Offset) when Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0 -> + % Use c.bnez when comparing with zero and offset fits + case is_compressed_reg(Rs1) of + true -> c_bnez(Rs1, Offset); + false -> encode_b_type(16#63, 16#1, Rs1, zero, Offset) + end; +bne(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 001 + encode_b_type(16#63, 16#1, Rs1, Rs2, Offset); +bne(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +bne(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BLT - Branch if Less Than (signed) +%% if (rs1 < rs2) pc += offset +-spec blt(riscv_register(), riscv_register(), integer()) -> binary(). +blt(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 100 + encode_b_type(16#63, 16#4, Rs1, Rs2, Offset); +blt(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +blt(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BGE - Branch if Greater or Equal (signed) +%% if (rs1 >= rs2) pc += offset +-spec bge(riscv_register(), riscv_register(), integer()) -> binary(). +bge(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 101 + encode_b_type(16#63, 16#5, Rs1, Rs2, Offset); +bge(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +bge(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BLTU - Branch if Less Than Unsigned +%% if (rs1 < rs2) pc += offset (unsigned) +-spec bltu(riscv_register(), riscv_register(), integer()) -> binary(). +bltu(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 110 + encode_b_type(16#63, 16#6, Rs1, Rs2, Offset); +bltu(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +bltu(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BGEU - Branch if Greater or Equal Unsigned +%% if (rs1 >= rs2) pc += offset (unsigned) +-spec bgeu(riscv_register(), riscv_register(), integer()) -> binary(). +bgeu(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 111 + encode_b_type(16#63, 16#7, Rs1, Rs2, Offset); +bgeu(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +bgeu(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%%----------------------------------------------------------------------------- +%% J-type instruction encoding (for JAL) +%%----------------------------------------------------------------------------- + +%% J-type instruction format (JAL): +%% imm[20|10:1|11|19:12] (20) | rd (5) | opcode (7) +%% Bits: 31-12 11-7 6-0 +%% +%% The immediate represents a signed offset in multiples of 2 bytes. +%% Range: ±1 MiB (±1048576 bytes) + +-spec encode_j_type( + Opcode :: 0..127, Rd :: riscv_register(), Offset :: integer() +) -> binary(). +encode_j_type(Opcode, Rd, Offset) -> + RdNum = reg_to_num(Rd), + % Extract immediate bits: imm[20], imm[10:1], imm[11], imm[19:12] + OffsetMasked = Offset band 16#1FFFFF, + % imm[20] -> bit 31 + Imm20 = (OffsetMasked bsr 20) band 1, + % imm[10:1] -> bits 30-21 + Imm10_1 = (OffsetMasked bsr 1) band 16#3FF, + % imm[11] -> bit 20 + Imm11 = (OffsetMasked bsr 11) band 1, + % imm[19:12] -> bits 19-12 + Imm19_12 = (OffsetMasked bsr 12) band 16#FF, + Instr = + (Imm20 bsl 31) bor + (Imm10_1 bsl 21) bor + (Imm11 bsl 20) bor + (Imm19_12 bsl 12) bor + (RdNum bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% U-type instruction encoding (for LUI, AUIPC) +%%----------------------------------------------------------------------------- + +%% U-type instruction format: +%% imm[31:12] (20) | rd (5) | opcode (7) +%% Bits: 31-12 11-7 6-0 + +-spec encode_u_type( + Opcode :: 0..127, Rd :: riscv_register(), Imm :: integer() +) -> binary(). +encode_u_type(Opcode, Rd, Imm) -> + RdNum = reg_to_num(Rd), + % Upper 20 bits of immediate + ImmUpper = (Imm bsr 12) band 16#FFFFF, + Instr = (ImmUpper bsl 12) bor (RdNum bsl 7) bor Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% Jump and link instructions +%%----------------------------------------------------------------------------- + +%% JAL - Jump and Link +%% rd = pc + 4; pc += offset +-spec jal(riscv_register(), integer()) -> binary(). +jal(zero, Offset) when Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0 -> + % Use c.j when rd is zero (no link) and offset fits + c_j(Offset); +jal(ra, Offset) when Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0 -> + % Use c.jal when rd is ra and offset fits (RV32C only) + c_jal(Offset); +jal(Rd, Offset) when + Offset >= -1048576, Offset =< 1048574, (Offset rem 2) =:= 0 +-> + % Opcode: 1101111 (0x6F) + encode_j_type(16#6F, Rd, Offset); +jal(_Rd, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +jal(_Rd, Offset) -> + error({offset_out_of_range, Offset, -1048576, 1048574}). + +%% JALR - Jump and Link Register +%% rd = pc + 4; pc = (rs1 + offset) & ~1 +-spec jalr(riscv_register(), riscv_register(), integer()) -> binary(). +jalr(zero, Rs1, 0) when Rs1 =/= zero -> + % Use c.jr for jump to register without link (rd=zero, offset=0) + c_jr(Rs1); +jalr(ra, Rs1, 0) when Rs1 =/= zero -> + % Use c.jalr for jump to register with link (rd=ra, offset=0) + c_jalr(Rs1); +jalr(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 1100111 (0x67), Funct3: 000 + encode_i_type(16#67, Rd, 16#0, Rs1, Offset); +jalr(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% JALR - Jump and Link Register (no offset) +%% rd = pc + 4; pc = rs1 & ~1 +-spec jalr(riscv_register(), riscv_register()) -> binary(). +jalr(Rd, Rs1) -> + jalr(Rd, Rs1, 0). + +%%----------------------------------------------------------------------------- +%% Upper immediate instructions +%%----------------------------------------------------------------------------- + +%% LUI - Load Upper Immediate +%% rd = imm << 12 +-spec lui(riscv_register(), integer()) -> binary(). +lui(Rd, Imm) when Rd =/= zero, Imm >= -32, Imm =< 31, Imm =/= 0 -> + % Use c.lui when rd != zero and imm fits in 6 bits (signed) and imm != 0 + c_lui(Rd, Imm); +lui(Rd, Imm) when Imm >= -16#80000, Imm =< 16#7FFFF -> + % Opcode: 0110111 (0x37) + encode_u_type(16#37, Rd, Imm bsl 12); +lui(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -16#80000, 16#7FFFF}). + +%% AUIPC - Add Upper Immediate to PC +%% rd = pc + (imm << 12) +-spec auipc(riscv_register(), integer()) -> binary(). +auipc(Rd, Imm) when Imm >= -16#80000, Imm =< 16#7FFFF -> + % Opcode: 0010111 (0x17) + encode_u_type(16#17, Rd, Imm bsl 12); +auipc(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -16#80000, 16#7FFFF}). + +%%----------------------------------------------------------------------------- +%% Pseudo-instructions +%%----------------------------------------------------------------------------- +%% These are convenience instructions that map to actual RV32I instructions + +%% NOP - No Operation +%% Expands to: addi x0, x0, 0 +-spec nop() -> binary(). +nop() -> + addi(zero, zero, 0). + +%% LI - Load Immediate +%% Load a 32-bit immediate value into a register +%% For small immediates (-2048 to 2047): addi rd, x0, imm +%% For larger immediates: lui + addi sequence +-spec li(riscv_register(), integer()) -> binary(). +li(Rd, Imm) when Rd =/= zero, Imm >= -32, Imm =< 31 -> + % Use c.li when rd != zero and imm fits in 6 bits (signed) + c_li(Rd, Imm); +li(Rd, Imm) when Imm >= -2048, Imm =< 2047 -> + % Small immediate: addi rd, x0, imm + addi(Rd, zero, Imm); +li(Rd, Imm) when Imm >= -16#80000000, Imm =< 16#7FFFFFFF -> + % Large immediate: lui + addi + % Split into upper 20 bits and lower 12 bits + % Need to account for sign extension of lower 12 bits + Lower = Imm band 16#FFF, + % If lower 12 bits has sign bit set, we need to add 1 to upper + UpperRaw = + if + Lower >= 16#800 -> + (Imm bsr 12) + 1; + true -> + Imm bsr 12 + end, + % Mask to 20 bits first, then sign extend if needed + UpperMasked = UpperRaw band 16#FFFFF, + Upper = + if + UpperMasked band 16#80000 =/= 0 -> + % Bit 19 is set, so this is negative in 20-bit representation + % Sign extend from 20 bits + UpperMasked - 16#100000; + true -> + % Positive value + UpperMasked + end, + % Sign extend lower 12 bits + LowerSigned = + if + Lower >= 16#800 -> Lower - 16#1000; + true -> Lower + end, + LuiInstr = lui(Rd, Upper), + AddiInstr = addi(Rd, Rd, LowerSigned), + <>; +li(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -16#80000000, 16#7FFFFFFF}). + +%% MV - Move (copy register) +%% Expands to: addi rd, rs, 0 or c.mv rd, rs +-spec mv(riscv_register(), riscv_register()) -> binary(). +mv(Rd, Rs) when Rd =/= zero, Rs =/= zero -> + % Use c.mv when both rd and rs are not zero + c_mv(Rd, Rs); +mv(Rd, Rs) -> + addi(Rd, Rs, 0). + +%% NOT - Bitwise NOT +%% Expands to: xori rd, rs, -1 +-spec not_(riscv_register(), riscv_register()) -> binary(). +not_(Rd, Rs) -> + xori(Rd, Rs, -1). + +%% NEG - Negate (two's complement) +%% Expands to: sub rd, x0, rs +-spec neg(riscv_register(), riscv_register()) -> binary(). +neg(Rd, Rs) -> + sub(Rd, zero, Rs). + +%% J - Unconditional Jump +%% Expands to: jal x0, offset +-spec j(integer()) -> binary(). +j(Offset) -> + jal(zero, Offset). + +%% JR - Jump Register +%% Expands to: jalr x0, rs, 0 +-spec jr(riscv_register()) -> binary(). +jr(Rs) -> + jalr(zero, Rs, 0). + +%% RET - Return from subroutine +%% Expands to: jalr x0, ra, 0 +-spec ret() -> binary(). +ret() -> + jalr(zero, ra, 0). + +%% CALL - Call function (far call using AUIPC + JALR) +%% This is a two-instruction sequence for calling functions beyond JAL range +%% Expands to: auipc ra, offset[31:12]; jalr ra, ra, offset[11:0] +-spec call(riscv_register(), integer()) -> binary(). +call(Rd, Offset) when Offset >= -16#80000000, Offset =< 16#7FFFFFFF -> + % Split offset into upper 20 bits and lower 12 bits + Lower = Offset band 16#FFF, + % If lower 12 bits has sign bit set, we need to add 1 to upper + Upper = + if + Lower >= 16#800 -> + ((Offset bsr 12) + 1) band 16#FFFFF; + true -> + (Offset bsr 12) band 16#FFFFF + end, + % Sign extend lower 12 bits + LowerSigned = + if + Lower >= 16#800 -> Lower - 16#1000; + true -> Lower + end, + AuipcInstr = auipc(Rd, Upper), + JalrInstr = jalr(ra, Rd, LowerSigned), + <>; +call(_Rd, Offset) -> + error({offset_out_of_range, Offset, -16#80000000, 16#7FFFFFFF}). + +%% MUL - Multiply (RV32M extension) +%% Multiplies rs1 by rs2 and places the lower 32 bits in rd +%% Format: mul rd, rs1, rs2 +%% Encoding: R-type with opcode=0x33, funct3=0x0, funct7=0x01 +-spec mul(riscv_register(), riscv_register(), riscv_register()) -> binary(). +mul(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0000001 + encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#01). + +%%----------------------------------------------------------------------------- +%% C Extension (RV32C) - Compressed Instructions +%%----------------------------------------------------------------------------- +%% The C extension adds 16-bit compressed instructions to reduce code size. +%% All compressed instructions are 16 bits (2 bytes) and use a different +%% encoding format from the base 32-bit instructions. +%% +%% Register encoding for compressed instructions: +%% - Some instructions use the full 5-bit register encoding (x0-x31) +%% - Others use 3-bit encoding for registers x8-x15 (s0, s1, a0-a5) +%% This is called the "compressed register set" or "C register set" +%% +%% Instruction formats: +%% - CR (Register): funct4 | rd/rs1 | rs2 | op +%% - CI (Immediate): funct3 | imm | rd/rs1 | imm | op +%% - CSS (Stack Store): funct3 | imm | rs2 | op +%% - CIW (Wide Immediate): funct3 | imm | rd' | op +%% - CL (Load): funct3 | imm | rs1' | imm | rd' | op +%% - CS (Store): funct3 | imm | rs1' | imm | rs2' | op +%% - CA (Arithmetic): funct6 | rd'/rs1' | funct2 | rs2' | op +%% - CB (Branch): funct3 | offset | rs1' | offset | op +%% - CJ (Jump): funct3 | jump target | op +%% +%% See: RISC-V Instruction Set Manual, Volume I, Chapter 16 +%%----------------------------------------------------------------------------- + +%% Convert register to compressed register encoding (3 bits for x8-x15) +%% Returns the 3-bit encoding (0-7 maps to x8-x15) +-spec reg_to_c_num(riscv_register()) -> 0..7. +reg_to_c_num(s0) -> 0; +reg_to_c_num(fp) -> 0; +reg_to_c_num(s1) -> 1; +reg_to_c_num(a0) -> 2; +reg_to_c_num(a1) -> 3; +reg_to_c_num(a2) -> 4; +reg_to_c_num(a3) -> 5; +reg_to_c_num(a4) -> 6; +reg_to_c_num(a5) -> 7; +reg_to_c_num(Reg) -> error({register_not_in_compressed_set, Reg, 's0/fp, s1, a0-a5'}). + +%% Check if a register is in the compressed register set (s0/fp, s1, a0-a5) +-spec is_compressed_reg(riscv_register()) -> boolean(). +is_compressed_reg(s0) -> true; +is_compressed_reg(fp) -> true; +is_compressed_reg(s1) -> true; +is_compressed_reg(a0) -> true; +is_compressed_reg(a1) -> true; +is_compressed_reg(a2) -> true; +is_compressed_reg(a3) -> true; +is_compressed_reg(a4) -> true; +is_compressed_reg(a5) -> true; +is_compressed_reg(_) -> false. + +%%----------------------------------------------------------------------------- +%% CR-type instruction encoding (Compressed Register format) +%%----------------------------------------------------------------------------- +%% CR format: funct4 (4) | rd/rs1 (5) | rs2 (5) | op (2) +%% Bits: 15-12 11-7 6-2 1-0 + +-spec encode_cr_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Rs2 :: riscv_register(), + Funct4 :: 0..15 +) -> binary(). +encode_cr_type(Opcode, Rd, Rs2, Funct4) -> + RdNum = reg_to_num(Rd), + Rs2Num = reg_to_num(Rs2), + Instr = + (Funct4 bsl 12) bor + (RdNum bsl 7) bor + (Rs2Num bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CI-type instruction encoding (Compressed Immediate format) +%%----------------------------------------------------------------------------- +%% CI format: funct3 (3) | imm[5] (1) | rd/rs1 (5) | imm[4:0] (5) | op (2) +%% Bits: 15-13 12 11-7 6-2 1-0 + +-spec encode_ci_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_ci_type(Opcode, Rd, Imm, Funct3) -> + RdNum = reg_to_num(Rd), + % Extract immediate bits + ImmMasked = Imm band 16#3F, + Imm5 = (ImmMasked bsr 5) band 1, + Imm4_0 = ImmMasked band 16#1F, + Instr = + (Funct3 bsl 13) bor + (Imm5 bsl 12) bor + (RdNum bsl 7) bor + (Imm4_0 bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CSS-type instruction encoding (Compressed Stack Store format) +%%----------------------------------------------------------------------------- +%% CSS format: funct3 (3) | imm[5:0] (6) | rs2 (5) | op (2) +%% Bits: 15-13 12-7 6-2 1-0 + +-spec encode_css_type( + Opcode :: 0..3, + Rs2 :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_css_type(Opcode, Rs2, Imm, Funct3) -> + Rs2Num = reg_to_num(Rs2), + % Extract immediate bits (typically scaled for word access) + ImmMasked = Imm band 16#3F, + Instr = + (Funct3 bsl 13) bor + (ImmMasked bsl 7) bor + (Rs2Num bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CIW-type instruction encoding (Compressed Wide Immediate format) +%%----------------------------------------------------------------------------- +%% CIW format: funct3 (3) | imm[7:0] (8) | rd' (3) | op (2) +%% Bits: 15-13 12-5 4-2 1-0 + +-spec encode_ciw_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_ciw_type(Opcode, Rd, Imm, Funct3) -> + RdNum = reg_to_c_num(Rd), + ImmMasked = Imm band 16#FF, + Instr = + (Funct3 bsl 13) bor + (ImmMasked bsl 5) bor + (RdNum bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CL-type instruction encoding (Compressed Load format) +%%----------------------------------------------------------------------------- +%% CL format: funct3 (3) | imm (3) | rs1' (3) | imm (2) | rd' (3) | op (2) +%% Bits: 15-13 12-10 9-7 6-5 4-2 1-0 + +-spec encode_cl_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Rs1 :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_cl_type(Opcode, Rd, Rs1, Imm, Funct3) -> + RdNum = reg_to_c_num(Rd), + Rs1Num = reg_to_c_num(Rs1), + % For LW: imm[5:3] goes to bits 12-10, imm[2] goes to bit 6, imm[6] goes to bit 5 + ImmMasked = Imm band 16#7F, + Imm5_3 = (ImmMasked bsr 3) band 7, + Imm2 = (ImmMasked bsr 2) band 1, + Imm6 = (ImmMasked bsr 6) band 1, + Instr = + (Funct3 bsl 13) bor + (Imm5_3 bsl 10) bor + (Rs1Num bsl 7) bor + (Imm2 bsl 6) bor + (Imm6 bsl 5) bor + (RdNum bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CS-type instruction encoding (Compressed Store format) +%%----------------------------------------------------------------------------- +%% CS format: funct3 (3) | imm (3) | rs1' (3) | imm (2) | rs2' (3) | op (2) +%% Bits: 15-13 12-10 9-7 6-5 4-2 1-0 + +-spec encode_cs_type( + Opcode :: 0..3, + Rs1 :: riscv_register(), + Rs2 :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_cs_type(Opcode, Rs1, Rs2, Imm, Funct3) -> + Rs1Num = reg_to_c_num(Rs1), + Rs2Num = reg_to_c_num(Rs2), + % For SW: imm[5:3] goes to bits 12-10, imm[2] goes to bit 6, imm[6] goes to bit 5 + ImmMasked = Imm band 16#7F, + Imm5_3 = (ImmMasked bsr 3) band 7, + Imm2 = (ImmMasked bsr 2) band 1, + Imm6 = (ImmMasked bsr 6) band 1, + Instr = + (Funct3 bsl 13) bor + (Imm5_3 bsl 10) bor + (Rs1Num bsl 7) bor + (Imm2 bsl 6) bor + (Imm6 bsl 5) bor + (Rs2Num bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CA-type instruction encoding (Compressed Arithmetic format) +%%----------------------------------------------------------------------------- +%% CA format: funct6 (6) | rd'/rs1' (3) | funct2 (2) | rs2' (3) | op (2) +%% Bits: 15-10 9-7 6-5 4-2 1-0 + +-spec encode_ca_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Rs2 :: riscv_register(), + Funct2 :: 0..3, + Funct6 :: 0..63 +) -> binary(). +encode_ca_type(Opcode, Rd, Rs2, Funct2, Funct6) -> + RdNum = reg_to_c_num(Rd), + Rs2Num = reg_to_c_num(Rs2), + Instr = + (Funct6 bsl 10) bor + (RdNum bsl 7) bor + (Funct2 bsl 5) bor + (Rs2Num bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CB-type instruction encoding (Compressed Branch format) +%%----------------------------------------------------------------------------- +%% CB format: funct3 (3) | offset (8) | rs1' (3) | op (2) +%% Bits: 15-13 12-5 4-2 1-0 +%% Offset encoding: offset[8|4:3|7:6|2:1|5] -> bits [12|11:10|6:5|4:3|2] + +-spec encode_cb_type( + Opcode :: 0..3, + Rs1 :: riscv_register(), + Offset :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_cb_type(Opcode, Rs1, Offset, Funct3) -> + Rs1Num = reg_to_c_num(Rs1), + % Extract offset bits: offset[8|4:3|7:6|2:1|5] -> bits [12|11:10|6:5|4:3|2] + OffsetMasked = Offset band 16#1FF, + Offset8 = (OffsetMasked bsr 8) band 1, + Offset4_3 = (OffsetMasked bsr 3) band 3, + Offset7_6 = (OffsetMasked bsr 6) band 3, + Offset2_1 = (OffsetMasked bsr 1) band 3, + Offset5 = (OffsetMasked bsr 5) band 1, + Instr = + (Funct3 bsl 13) bor + (Offset8 bsl 12) bor + (Offset4_3 bsl 10) bor + (Rs1Num bsl 7) bor + (Offset7_6 bsl 5) bor + (Offset2_1 bsl 3) bor + (Offset5 bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CJ-type instruction encoding (Compressed Jump format) +%%----------------------------------------------------------------------------- +%% CJ format: funct3 (3) | jump target (11) | op (2) +%% Bits: 15-13 12-2 1-0 +%% Target encoding: target[11|4|9:8|10|6|7|3:1|5] -> bits [12|11|10:9|8|7|6|5:3|2] + +-spec encode_cj_type(Opcode :: 0..3, Offset :: integer(), Funct3 :: 0..7) -> binary(). +encode_cj_type(Opcode, Offset, Funct3) -> + % Extract offset bits: offset[11|4|9:8|10|6|7|3:1|5] + OffsetMasked = Offset band 16#FFF, + Offset11 = (OffsetMasked bsr 11) band 1, + Offset4 = (OffsetMasked bsr 4) band 1, + Offset9_8 = (OffsetMasked bsr 8) band 3, + Offset10 = (OffsetMasked bsr 10) band 1, + Offset6 = (OffsetMasked bsr 6) band 1, + Offset7 = (OffsetMasked bsr 7) band 1, + Offset3_1 = (OffsetMasked bsr 1) band 7, + Offset5 = (OffsetMasked bsr 5) band 1, + OffsetBits = + (Offset11 bsl 10) bor + (Offset4 bsl 9) bor + (Offset9_8 bsl 7) bor + (Offset10 bsl 6) bor + (Offset6 bsl 5) bor + (Offset7 bsl 4) bor + (Offset3_1 bsl 1) bor + Offset5, + Instr = + (Funct3 bsl 13) bor + (OffsetBits bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% C Extension - Arithmetic and Logical Instructions +%%----------------------------------------------------------------------------- + +%% C.ADD - Compressed Add +%% rd = rd + rs2 (both rd and rs2 are full 5-bit registers) +%% Format: CR-type +%% Encoding: funct4=1001 (0x9), op=10 (0x2) +-spec c_add(riscv_register(), riscv_register()) -> binary(). +c_add(Rd, Rs2) -> + encode_cr_type(16#2, Rd, Rs2, 16#9). + +%% C.MV - Compressed Move (copy register) +%% rd = rs2 (both are full 5-bit registers) +%% Format: CR-type +%% Encoding: funct4=1000 (0x8), op=10 (0x2) +-spec c_mv(riscv_register(), riscv_register()) -> binary(). +c_mv(Rd, Rs2) -> + encode_cr_type(16#2, Rd, Rs2, 16#8). + +%% C.SUB - Compressed Subtract +%% rd' = rd' - rs2' (both use 3-bit compressed register encoding) +%% Format: CA-type +%% Encoding: funct6=100011 (0x23), funct2=00, op=01 (0x1) +-spec c_sub(riscv_register(), riscv_register()) -> binary(). +c_sub(Rd, Rs2) -> + encode_ca_type(16#1, Rd, Rs2, 16#0, 16#23). + +%% C.AND - Compressed Bitwise AND +%% rd' = rd' & rs2' +%% Format: CA-type +%% Encoding: funct6=100011 (0x23), funct2=11, op=01 (0x1) +-spec c_and(riscv_register(), riscv_register()) -> binary(). +c_and(Rd, Rs2) -> + encode_ca_type(16#1, Rd, Rs2, 16#3, 16#23). + +%% C.OR - Compressed Bitwise OR +%% rd' = rd' | rs2' +%% Format: CA-type +%% Encoding: funct6=100011 (0x23), funct2=10, op=01 (0x1) +-spec c_or(riscv_register(), riscv_register()) -> binary(). +c_or(Rd, Rs2) -> + encode_ca_type(16#1, Rd, Rs2, 16#2, 16#23). + +%% C.XOR - Compressed Bitwise XOR +%% rd' = rd' ^ rs2' +%% Format: CA-type +%% Encoding: funct6=100011 (0x23), funct2=01, op=01 (0x1) +-spec c_xor(riscv_register(), riscv_register()) -> binary(). +c_xor(Rd, Rs2) -> + encode_ca_type(16#1, Rd, Rs2, 16#1, 16#23). + +%%----------------------------------------------------------------------------- +%% C Extension - Immediate Instructions +%%----------------------------------------------------------------------------- + +%% C.ADDI - Compressed Add Immediate +%% rd = rd + imm (rd is full 5-bit register, imm is 6-bit signed) +%% Format: CI-type +%% Encoding: funct3=000, op=01 (0x1) +-spec c_addi(riscv_register(), integer()) -> binary(). +c_addi(Rd, Imm) when Imm >= -32, Imm =< 31, Rd =/= zero -> + encode_ci_type(16#1, Rd, Imm, 16#0); +c_addi(zero, _Imm) -> + error({invalid_compressed_instruction, c_addi, 'rd cannot be zero'}); +c_addi(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -32, 31}). + +%% C.ANDI - Compressed AND Immediate +%% rd' = rd' & imm (rd' uses 3-bit encoding, imm is 6-bit signed) +%% Format: CB-type (with special encoding) +%% Encoding: funct3=100, imm[5]=bit12, funct2=10, imm[4:0]=bits 6:2, op=01 +-spec c_andi(riscv_register(), integer()) -> binary(). +c_andi(Rd, Imm) when Imm >= -32, Imm =< 31 -> + RdNum = reg_to_c_num(Rd), + ImmMasked = Imm band 16#3F, + Imm5 = (ImmMasked bsr 5) band 1, + Imm4_0 = ImmMasked band 16#1F, + Instr = + (16#4 bsl 13) bor + (Imm5 bsl 12) bor + (16#2 bsl 10) bor + (RdNum bsl 7) bor + (Imm4_0 bsl 2) bor + 16#1, + <>; +c_andi(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -32, 31}). + +%% C.LI - Compressed Load Immediate +%% rd = imm (rd is full 5-bit register, imm is 6-bit signed) +%% Format: CI-type +%% Encoding: funct3=010, op=01 (0x1) +-spec c_li(riscv_register(), integer()) -> binary(). +c_li(Rd, Imm) when Imm >= -32, Imm =< 31 -> + encode_ci_type(16#1, Rd, Imm, 16#2); +c_li(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -32, 31}). + +%% C.LUI - Compressed Load Upper Immediate +%% rd = imm << 12 (rd is full 5-bit register, imm is 6-bit signed non-zero) +%% Format: CI-type +%% Encoding: funct3=011, op=01 (0x1) +-spec c_lui(riscv_register(), integer()) -> binary(). +c_lui(Rd, Imm) when Imm >= -32, Imm =< 31, Imm =/= 0, Rd =/= zero, Rd =/= sp -> + encode_ci_type(16#1, Rd, Imm, 16#3); +c_lui(Rd, _Imm) when Rd =:= zero; Rd =:= sp -> + error({invalid_compressed_instruction, c_lui, 'rd cannot be zero or sp'}); +c_lui(_Rd, 0) -> + error({invalid_compressed_instruction, c_lui, 'immediate cannot be zero'}); +c_lui(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -32, 31}). + +%% C.ADDI16SP - Compressed Add Immediate to SP (scaled by 16) +%% sp = sp + imm (imm is 10-bit signed, must be multiple of 16, non-zero) +%% Format: CI-type (special encoding) +%% Encoding: funct3=011, rd/rs1=sp (x2), op=01 +-spec c_addi16sp(integer()) -> binary(). +c_addi16sp(Imm) when + Imm >= -512, Imm =< 496, (Imm rem 16) =:= 0, Imm =/= 0 +-> + % Immediate encoding: nzimm[9|4|6|8:7|5] -> bits [12|6|5|4:3|2] + ImmMasked = Imm band 16#3FF, + Imm9 = (ImmMasked bsr 9) band 1, + Imm4 = (ImmMasked bsr 4) band 1, + Imm6 = (ImmMasked bsr 6) band 1, + Imm8_7 = (ImmMasked bsr 7) band 3, + Imm5 = (ImmMasked bsr 5) band 1, + ImmBits = (Imm9 bsl 5) bor (Imm4 bsl 4) bor (Imm6 bsl 3) bor (Imm8_7 bsl 1) bor Imm5, + encode_ci_type(16#1, sp, ImmBits, 16#3); +c_addi16sp(0) -> + error({invalid_compressed_instruction, c_addi16sp, 'immediate cannot be zero'}); +c_addi16sp(Imm) when (Imm rem 16) =/= 0 -> + error({immediate_not_aligned, Imm, 16}); +c_addi16sp(Imm) -> + error({immediate_out_of_range, Imm, -512, 496}). + +%% C.ADDI4SPN - Compressed Add Immediate (scaled by 4) to SP, store in rd' +%% rd' = sp + imm (imm is 10-bit unsigned, must be multiple of 4, non-zero) +%% Format: CIW-type +%% Encoding: funct3=000, op=00 (0x0) +-spec c_addi4spn(riscv_register(), integer()) -> binary(). +c_addi4spn(Rd, Imm) when + Imm >= 4, Imm =< 1020, (Imm rem 4) =:= 0 +-> + % Immediate encoding: nzuimm[5:4|9:6|2|3] -> bits [12:11|10:7|6|5] + ImmMasked = Imm band 16#3FF, + Imm5_4 = (ImmMasked bsr 4) band 3, + Imm9_6 = (ImmMasked bsr 6) band 15, + Imm2 = (ImmMasked bsr 2) band 1, + Imm3 = (ImmMasked bsr 3) band 1, + ImmBits = (Imm5_4 bsl 6) bor (Imm9_6 bsl 2) bor (Imm2 bsl 1) bor Imm3, + encode_ciw_type(16#0, Rd, ImmBits, 16#0); +c_addi4spn(_Rd, Imm) when Imm =:= 0 -> + error({invalid_compressed_instruction, c_addi4spn, 'immediate cannot be zero'}); +c_addi4spn(_Rd, Imm) when (Imm rem 4) =/= 0 -> + error({immediate_not_aligned, Imm, 4}); +c_addi4spn(_Rd, Imm) -> + error({immediate_out_of_range, Imm, 4, 1020}). + +%%----------------------------------------------------------------------------- +%% C Extension - Shift Instructions +%%----------------------------------------------------------------------------- + +%% C.SLLI - Compressed Shift Left Logical Immediate +%% rd = rd << shamt (rd is full 5-bit register, shamt is 6-bit unsigned) +%% Format: CI-type +%% Encoding: funct3=000, op=10 (0x2) +-spec c_slli(riscv_register(), 0..63) -> binary(). +c_slli(Rd, Shamt) when Shamt >= 0, Shamt =< 63, Rd =/= zero -> + encode_ci_type(16#2, Rd, Shamt, 16#0); +c_slli(zero, _Shamt) -> + error({invalid_compressed_instruction, c_slli, 'rd cannot be zero'}); +c_slli(_Rd, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 63}). + +%% C.SRLI - Compressed Shift Right Logical Immediate +%% rd' = rd' >> shamt (rd' uses 3-bit encoding, shamt is 6-bit unsigned) +%% Format: CB-type (with special encoding) +%% Encoding: funct3=100, shamt[5]=bit12, funct2=00, shamt[4:0]=bits 6:2, op=01 +-spec c_srli(riscv_register(), 0..63) -> binary(). +c_srli(Rd, Shamt) when Shamt >= 0, Shamt =< 63 -> + RdNum = reg_to_c_num(Rd), + Shamt5 = (Shamt bsr 5) band 1, + Shamt4_0 = Shamt band 16#1F, + Instr = + (16#4 bsl 13) bor + (Shamt5 bsl 12) bor + (16#0 bsl 10) bor + (RdNum bsl 7) bor + (Shamt4_0 bsl 2) bor + 16#1, + <>; +c_srli(_Rd, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 63}). + +%% C.SRAI - Compressed Shift Right Arithmetic Immediate +%% rd' = rd' >> shamt (sign-extend, rd' uses 3-bit encoding, shamt is 6-bit unsigned) +%% Format: CB-type (with special encoding) +%% Encoding: funct3=100, shamt[5]=bit12, funct2=01, shamt[4:0]=bits 6:2, op=01 +-spec c_srai(riscv_register(), 0..63) -> binary(). +c_srai(Rd, Shamt) when Shamt >= 0, Shamt =< 63 -> + RdNum = reg_to_c_num(Rd), + Shamt5 = (Shamt bsr 5) band 1, + Shamt4_0 = Shamt band 16#1F, + Instr = + (16#4 bsl 13) bor + (Shamt5 bsl 12) bor + (16#1 bsl 10) bor + (RdNum bsl 7) bor + (Shamt4_0 bsl 2) bor + 16#1, + <>; +c_srai(_Rd, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 63}). + +%%----------------------------------------------------------------------------- +%% C Extension - Load/Store Instructions +%%----------------------------------------------------------------------------- + +%% C.LW - Compressed Load Word +%% rd' = mem[rs1' + offset] (both use 3-bit encoding, offset is 7-bit unsigned, multiple of 4) +%% Format: CL-type +%% Encoding: funct3=010, op=00 (0x0) +-spec c_lw(riscv_register(), {riscv_register(), integer()}) -> binary(). +c_lw(Rd, {Rs1, Offset}) when + Offset >= 0, Offset =< 124, (Offset rem 4) =:= 0 +-> + encode_cl_type(16#0, Rd, Rs1, Offset, 16#2); +c_lw(_Rd, {_Rs1, Offset}) when (Offset rem 4) =/= 0 -> + error({offset_not_aligned, Offset, 4}); +c_lw(_Rd, {_Rs1, Offset}) -> + error({offset_out_of_range, Offset, 0, 124}). + +%% C.SW - Compressed Store Word +%% mem[rs1' + offset] = rs2' (both use 3-bit encoding, offset is 7-bit unsigned, multiple of 4) +%% Format: CS-type +%% Encoding: funct3=110, op=00 (0x0) +-spec c_sw(riscv_register(), {riscv_register(), integer()}) -> binary(). +c_sw(Rs2, {Rs1, Offset}) when + Offset >= 0, Offset =< 124, (Offset rem 4) =:= 0 +-> + encode_cs_type(16#0, Rs1, Rs2, Offset, 16#6); +c_sw(_Rs2, {_Rs1, Offset}) when (Offset rem 4) =/= 0 -> + error({offset_not_aligned, Offset, 4}); +c_sw(_Rs2, {_Rs1, Offset}) -> + error({offset_out_of_range, Offset, 0, 124}). + +%% C.LWSP - Compressed Load Word from Stack Pointer +%% rd = mem[sp + offset] (rd is full 5-bit register, offset is 8-bit unsigned, multiple of 4) +%% Format: CI-type (special encoding) +%% Encoding: funct3=010, op=10 (0x2) +-spec c_lwsp(riscv_register(), integer()) -> binary(). +c_lwsp(Rd, Offset) when + Offset >= 0, Offset =< 252, (Offset rem 4) =:= 0, Rd =/= zero +-> + % Offset encoding: offset[5|4:2|7:6] -> bits [12|6:4|3:2] + OffsetMasked = Offset band 16#FF, + Offset5 = (OffsetMasked bsr 5) band 1, + Offset4_2 = (OffsetMasked bsr 2) band 7, + Offset7_6 = (OffsetMasked bsr 6) band 3, + ImmBits = (Offset5 bsl 5) bor (Offset4_2 bsl 2) bor Offset7_6, + encode_ci_type(16#2, Rd, ImmBits, 16#2); +c_lwsp(zero, _Offset) -> + error({invalid_compressed_instruction, c_lwsp, 'rd cannot be zero'}); +c_lwsp(_Rd, Offset) when (Offset rem 4) =/= 0 -> + error({offset_not_aligned, Offset, 4}); +c_lwsp(_Rd, Offset) -> + error({offset_out_of_range, Offset, 0, 252}). + +%% C.SWSP - Compressed Store Word to Stack Pointer +%% mem[sp + offset] = rs2 (rs2 is full 5-bit register, offset is 8-bit unsigned, multiple of 4) +%% Format: CSS-type +%% Encoding: funct3=110, op=10 (0x2) +-spec c_swsp(riscv_register(), integer()) -> binary(). +c_swsp(Rs2, Offset) when + Offset >= 0, Offset =< 252, (Offset rem 4) =:= 0 +-> + % Offset encoding: offset[5:2|7:6] -> bits [12:9|8:7] + OffsetMasked = Offset band 16#FF, + Offset5_2 = (OffsetMasked bsr 2) band 15, + Offset7_6 = (OffsetMasked bsr 6) band 3, + ImmBits = (Offset5_2 bsl 2) bor Offset7_6, + encode_css_type(16#2, Rs2, ImmBits, 16#6); +c_swsp(_Rs2, Offset) when (Offset rem 4) =/= 0 -> + error({offset_not_aligned, Offset, 4}); +c_swsp(_Rs2, Offset) -> + error({offset_out_of_range, Offset, 0, 252}). + +%%----------------------------------------------------------------------------- +%% C Extension - Branch and Jump Instructions +%%----------------------------------------------------------------------------- + +%% C.BEQZ - Compressed Branch if Equal to Zero +%% if (rs1' == 0) pc += offset (rs1' uses 3-bit encoding, offset is 9-bit signed, multiple of 2) +%% Format: CB-type +%% Encoding: funct3=110, op=01 (0x1) +-spec c_beqz(riscv_register(), integer()) -> binary(). +c_beqz(Rs1, Offset) when + Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0 +-> + encode_cb_type(16#1, Rs1, Offset, 16#6); +c_beqz(_Rs1, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +c_beqz(_Rs1, Offset) -> + error({offset_out_of_range, Offset, -256, 254}). + +%% C.BNEZ - Compressed Branch if Not Equal to Zero +%% if (rs1' != 0) pc += offset (rs1' uses 3-bit encoding, offset is 9-bit signed, multiple of 2) +%% Format: CB-type +%% Encoding: funct3=111, op=01 (0x1) +-spec c_bnez(riscv_register(), integer()) -> binary(). +c_bnez(Rs1, Offset) when + Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0 +-> + encode_cb_type(16#1, Rs1, Offset, 16#7); +c_bnez(_Rs1, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +c_bnez(_Rs1, Offset) -> + error({offset_out_of_range, Offset, -256, 254}). + +%% C.J - Compressed Unconditional Jump +%% pc += offset (offset is 12-bit signed, multiple of 2) +%% Format: CJ-type +%% Encoding: funct3=101, op=01 (0x1) +-spec c_j(integer()) -> binary(). +c_j(Offset) when + Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0 +-> + encode_cj_type(16#1, Offset, 16#5); +c_j(Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +c_j(Offset) -> + error({offset_out_of_range, Offset, -2048, 2046}). + +%% C.JAL - Compressed Jump and Link (RV32C only, rd is implicitly ra) +%% ra = pc + 2; pc += offset (offset is 12-bit signed, multiple of 2) +%% Format: CJ-type +%% Encoding: funct3=001 (0x1), op=01 (0x1) +-spec c_jal(integer()) -> binary(). +c_jal(Offset) when + Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0 +-> + encode_cj_type(16#1, Offset, 16#1); +c_jal(Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +c_jal(Offset) -> + error({offset_out_of_range, Offset, -2048, 2046}). + +%% C.JR - Compressed Jump Register +%% pc = rs1 (rs1 is full 5-bit register, must not be zero) +%% Format: CR-type +%% Encoding: funct4=1000 (0x8), rs2=x0, op=10 (0x2) +-spec c_jr(riscv_register()) -> binary(). +c_jr(Rs1) when Rs1 =/= zero -> + encode_cr_type(16#2, Rs1, zero, 16#8); +c_jr(zero) -> + error({invalid_compressed_instruction, c_jr, 'rs1 cannot be zero'}). + +%% C.JALR - Compressed Jump and Link Register +%% ra = pc + 2; pc = rs1 (rs1 is full 5-bit register, must not be zero) +%% Format: CR-type +%% Encoding: funct4=1001 (0x9), rs2=x0, op=10 (0x2) +-spec c_jalr(riscv_register()) -> binary(). +c_jalr(Rs1) when Rs1 =/= zero -> + encode_cr_type(16#2, Rs1, zero, 16#9); +c_jalr(zero) -> + error({invalid_compressed_instruction, c_jalr, 'rs1 cannot be zero'}). + +%% C.EBREAK - Compressed Environment Breakpoint +%% Causes a breakpoint exception to be raised +%% Format: CR-type +%% Encoding: funct4=1001 (0x9), rs1/rd=x0, rs2=x0, op=10 (0x2) +-spec c_ebreak() -> binary(). +c_ebreak() -> + encode_cr_type(16#2, zero, zero, 16#9). + +%%----------------------------------------------------------------------------- +%% C Extension - Pseudo-instructions +%%----------------------------------------------------------------------------- + +%% C.NOP - Compressed No Operation +%% Expands to: c.addi x0, 0 +%% Format: CI-type +%% Encoding: funct3=000, rd/rs1=x0, imm=0, op=01 (0x1) +-spec c_nop() -> binary(). +c_nop() -> + encode_ci_type(16#1, zero, 0, 16#0). diff --git a/libs/jit/src/jit_stream_binary.erl b/libs/jit/src/jit_stream_binary.erl index db433c12ea..26e32bafa0 100644 --- a/libs/jit/src/jit_stream_binary.erl +++ b/libs/jit/src/jit_stream_binary.erl @@ -27,7 +27,8 @@ offset/1, append/2, replace/3, - map/4 + map/4, + flush/1 ]). -export_type([stream/0]). @@ -93,3 +94,14 @@ map(Stream, Offset, Length, MapFunction) -> {Prefix, <>} = split_binary(Stream, Offset), Replacement = MapFunction(Previous), <>. + +%%----------------------------------------------------------------------------- +%% @param Stream stream to flush +%% @returns The stream flushed +%% @doc Flush the stream. NOP with binaries. +%% +%% @end +%%----------------------------------------------------------------------------- +-spec flush(stream()) -> stream(). +flush(Stream) -> + Stream. diff --git a/libs/jit/src/jit_stream_mmap.erl b/libs/jit/src/jit_stream_mmap.erl index d8129f9a41..4429146c4e 100644 --- a/libs/jit/src/jit_stream_mmap.erl +++ b/libs/jit/src/jit_stream_mmap.erl @@ -27,7 +27,8 @@ offset/1, append/2, replace/3, - map/4 + map/4, + flush/1 ]). %% Additional nif @@ -109,3 +110,14 @@ map(Stream, Offset, Length, MapFunction) -> -spec read(stream(), non_neg_integer(), pos_integer()) -> binary(). read(_Stream, _Offset, _Length) -> erlang:nif_error(undefined). + +%%----------------------------------------------------------------------------- +%% @param Stream stream to flush +%% @returns The stream flushed +%% @doc Flush the stream. Typically invalidates instruction cache. +%% +%% @end +%%----------------------------------------------------------------------------- +-spec flush(stream()) -> stream(). +flush(_Stream) -> + erlang:nif_error(undefined). diff --git a/libs/jit/src/jit_x86_64.erl b/libs/jit/src/jit_x86_64.erl index df8e7cf1d6..8344b9c023 100644 --- a/libs/jit/src/jit_x86_64.erl +++ b/libs/jit/src/jit_x86_64.erl @@ -25,6 +25,7 @@ new/3, stream/1, offset/1, + flush/1, debugger/1, used_regs/1, available_regs/1, @@ -38,6 +39,7 @@ return_if_not_equal_to_ctx/2, jump_to_label/2, jump_to_continuation/2, + jump_to_offset/2, if_block/3, if_else_block/4, shift_right/3, @@ -113,6 +115,7 @@ stream :: stream(), offset :: non_neg_integer(), branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], + jump_table_start :: non_neg_integer(), available_regs :: [x86_64_register()], used_regs :: [x86_64_register()], labels :: [{integer() | reference(), integer()}], @@ -134,6 +137,7 @@ -type condition() :: {x86_64_register(), '<', integer()} | {maybe_free_x86_64_register(), '<', x86_64_register()} + | {integer(), '<', maybe_free_x86_64_register()} | {maybe_free_x86_64_register(), '==', integer()} | {maybe_free_x86_64_register(), '!=', x86_64_register() | integer()} | {'(int)', maybe_free_x86_64_register(), '==', integer()} @@ -216,6 +220,7 @@ new(Variant, StreamModule, Stream) -> stream_module = StreamModule, stream = Stream, branches = [], + jump_table_start = 0, offset = StreamModule:offset(Stream), available_regs = ?AVAILABLE_REGS, used_regs = [], @@ -243,6 +248,16 @@ stream(#state{stream = Stream}) -> offset(#state{stream_module = StreamModule, stream = Stream}) -> StreamModule:offset(Stream). +%%----------------------------------------------------------------------------- +%% @doc Flush the current state (unused on x86-64) +%% @end +%% @param State current backend state +%% @return The flushed state +%%----------------------------------------------------------------------------- +-spec flush(state()) -> state(). +flush(#state{} = State) -> + State. + %%----------------------------------------------------------------------------- %% @doc Emit a debugger of breakpoint instruction. This is used for debugging %% and not in production. @@ -328,21 +343,73 @@ assert_all_native_free(State) -> %% @return Updated backend state %%----------------------------------------------------------------------------- -spec jump_table(state(), pos_integer()) -> state(). -jump_table(State, LabelsCount) -> - jump_table0(State, 0, LabelsCount). +jump_table(#state{stream_module = StreamModule, stream = Stream0} = State, LabelsCount) -> + JumpTableStart = StreamModule:offset(Stream0), + jump_table0(State#state{jump_table_start = JumpTableStart}, 0, LabelsCount). jump_table0(State, N, LabelsCount) when N > LabelsCount -> State; jump_table0( - #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State, + #state{stream_module = StreamModule, stream = Stream0} = State, N, LabelsCount ) -> - Offset = StreamModule:offset(Stream0), - {RelocOffset, I1} = jit_x86_64_asm:jmp_rel32(1), - Reloc = {N, Offset + RelocOffset, 32}, + % Placeholder, encodes with 0xffffffff + {_RelocOffset, I1} = jit_x86_64_asm:jmp_rel32(4), Stream1 = StreamModule:append(Stream0, I1), - jump_table0(State#state{stream = Stream1, branches = [Reloc | Branches]}, N + 1, LabelsCount). + jump_table0(State#state{stream = Stream1}, N + 1, LabelsCount). + +%%----------------------------------------------------------------------------- +%% @doc Patch a single branch in the stream +%% @end +%% @param StreamModule stream module +%% @param Stream stream state +%% @param Offset offset of the branch to patch +%% @param Size size of the branch in bits +%% @param LabelOffset target label offset +%% @return Updated stream +%%----------------------------------------------------------------------------- +-spec patch_branch(module(), stream(), non_neg_integer(), non_neg_integer(), non_neg_integer()) -> + stream(). +patch_branch(StreamModule, Stream, Offset, Size, LabelOffset) -> + StreamModule:map(Stream, Offset, Size div 8, fun(<>) -> + <<(Delta + LabelOffset - Offset):Size/little>> + end). + +%%----------------------------------------------------------------------------- +%% @doc Patch all branches targeting a specific label and return remaining branches +%% @end +%% @param StreamModule stream module +%% @param Stream stream state +%% @param TargetLabel label to patch branches for +%% @param LabelOffset offset of the target label +%% @param Branches list of pending branches +%% @return {UpdatedStream, RemainingBranches} +%%----------------------------------------------------------------------------- +-spec patch_branches_for_label( + module(), + stream(), + integer(), + non_neg_integer(), + [{integer(), non_neg_integer(), non_neg_integer()}] +) -> {stream(), [{integer(), non_neg_integer(), non_neg_integer()}]}. +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches, []). + +patch_branches_for_label(_StreamModule, Stream, _TargetLabel, _LabelOffset, [], Acc) -> + {Stream, lists:reverse(Acc)}; +patch_branches_for_label( + StreamModule, + Stream0, + TargetLabel, + LabelOffset, + [{Label, Offset, Size} | Rest], + Acc +) when Label =:= TargetLabel -> + Stream1 = patch_branch(StreamModule, Stream0, Offset, Size, LabelOffset), + patch_branches_for_label(StreamModule, Stream1, TargetLabel, LabelOffset, Rest, Acc); +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, [Branch | Rest], Acc) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Rest, [Branch | Acc]). %%----------------------------------------------------------------------------- %% @doc Rewrite stream to update all branches for labels. @@ -362,9 +429,7 @@ update_branches( } = State ) -> {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), - Stream1 = StreamModule:map(Stream0, Offset, Size div 8, fun(<>) -> - <<(Delta + LabelOffset - Offset):Size/little>> - end), + Stream1 = patch_branch(StreamModule, Stream0, Offset, Size, LabelOffset), update_branches(State#state{stream = Stream1, branches = BranchesT}). %%----------------------------------------------------------------------------- @@ -524,6 +589,13 @@ jump_to_label( State#state{stream = Stream1, branches = [Reloc | AccBranches]} end. +jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> + Offset = StreamModule:offset(Stream0), + RelOffset = TargetOffset - Offset, + I1 = jit_x86_64_asm:jmp(RelOffset), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}. + %%----------------------------------------------------------------------------- %% @doc Jump to a continuation address stored in a register. %% This is used for optimized intra-module returns. @@ -668,11 +740,41 @@ if_block_cond(#state{stream_module = StreamModule} = State0, Cond) -> {State2, ReplaceDelta}. -spec if_block_cond0(state(), condition()) -> {state(), binary(), non_neg_integer()}. -if_block_cond0(State0, {Reg, '<', 0}) when is_atom(Reg) -> +if_block_cond0(State0, {RegOrTuple, '<', 0}) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, I1 = jit_x86_64_asm:testq(Reg, Reg), {RelocJGEOffset, I2} = jit_x86_64_asm:jge_rel8(1), - {State0, <>, byte_size(I1) + RelocJGEOffset}; -if_block_cond0(State0, {RegOrTuple, '<', Value}) -> + State1 = if_block_free_reg(RegOrTuple, State0), + {State1, <>, byte_size(I1) + RelocJGEOffset}; +% Handle {Value, '<', Reg} - means Value < Reg, jump if false (i.e., if Value >= Reg or Reg <= Value) +if_block_cond0(State0, {Value, '<', RegOrTuple}) when ?IS_SINT32_T(Value) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = jit_x86_64_asm:cmpq(Value, Reg), + {RelocJLEOffset, I2} = jit_x86_64_asm:jle_rel8(1), + State1 = if_block_free_reg(RegOrTuple, State0), + {State1, <>, byte_size(I1) + RelocJLEOffset}; +% Catch-all for large values outside SINT32_T range +if_block_cond0(State0, {Value, '<', RegOrTuple}) when is_integer(Value) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % Load large value into a temporary register + I1 = jit_x86_64_asm:movabsq(Value, r11), + I2 = jit_x86_64_asm:cmpq(r11, Reg), + {RelocJLEOffset, I3} = jit_x86_64_asm:jle_rel8(1), + State1 = if_block_free_reg(RegOrTuple, State0), + {State1, <>, byte_size(I1) + byte_size(I2) + RelocJLEOffset}; +if_block_cond0(State0, {RegOrTuple, '<', Value}) when ?IS_SINT32_T(Value) -> Reg = case RegOrTuple of {free, Reg0} -> Reg0; @@ -682,6 +784,29 @@ if_block_cond0(State0, {RegOrTuple, '<', Value}) -> {RelocJGEOffset, I2} = jit_x86_64_asm:jge_rel8(1), State1 = if_block_free_reg(RegOrTuple, State0), {State1, <>, byte_size(I1) + RelocJGEOffset}; +if_block_cond0(State0, {RegOrTuple, '<', RegB}) when is_atom(RegB) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = jit_x86_64_asm:cmpq(RegB, Reg), + {RelocJGEOffset, I2} = jit_x86_64_asm:jge_rel8(1), + State1 = if_block_free_reg(RegOrTuple, State0), + {State1, <>, byte_size(I1) + RelocJGEOffset}; +% Catch-all for large values outside SINT32_T range +if_block_cond0(State0, {RegOrTuple, '<', Value}) when is_integer(Value) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % Load large value into a temporary register + I1 = jit_x86_64_asm:movabsq(Value, r11), + I2 = jit_x86_64_asm:cmpq(r11, Reg), + {RelocJGEOffset, I3} = jit_x86_64_asm:jge_rel8(1), + State1 = if_block_free_reg(RegOrTuple, State0), + {State1, <>, byte_size(I1) + byte_size(I2) + RelocJGEOffset}; if_block_cond0(State0, {RegOrTuple, '==', 0}) -> Reg = case RegOrTuple of @@ -1774,17 +1899,31 @@ set_continuation_to_label( stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _], - branches = Branches + branches = Branches, + labels = Labels } = State, Label ) -> Offset = StreamModule:offset(Stream0), - {RewriteLEAOffset, I1} = jit_x86_64_asm:leaq_rel32({-4, rip}, Temp), - Reloc = {Label, Offset + RewriteLEAOffset, 32}, - I2 = jit_x86_64_asm:movq(Temp, ?JITSTATE_CONTINUATION), - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), - State#state{stream = Stream1, branches = [Reloc | Branches]}. + case lists:keyfind(Label, 1, Labels) of + {Label, LabelOffset} -> + % Label is already known, emit direct leaq without relocation + % leaq instruction is 7 bytes, RIP points to next instruction + RelOffset = LabelOffset - (Offset + 7), + I1 = jit_x86_64_asm:leaq({rip, RelOffset}, Temp), + I2 = jit_x86_64_asm:movq(Temp, ?JITSTATE_CONTINUATION), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; + false -> + % Label not yet known, emit placeholder and add relocation + {RewriteLEAOffset, I1} = jit_x86_64_asm:leaq_rel32({-4, rip}, Temp), + Reloc = {Label, Offset + RewriteLEAOffset, 32}, + I2 = jit_x86_64_asm:movq(Temp, ?JITSTATE_CONTINUATION), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1, branches = [Reloc | Branches]} + end. set_continuation_to_offset( #state{ @@ -1826,7 +1965,9 @@ get_module_index( Reg }. -and_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> +and_(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Val) when + ?IS_GPR(Reg) +-> % 32 bits instructions on x86-64 zero the high 32 bits I1 = if @@ -1834,7 +1975,28 @@ and_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) - true -> jit_x86_64_asm:andq(Val, Reg) end, Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}. + {State#state{stream = Stream1}, Reg}; +and_( + #state{ + stream_module = StreamModule, + available_regs = [ResultReg | T], + used_regs = UR, + stream = Stream0 + } = State, + Reg, + Val +) when + ?IS_GPR(Reg) +-> + I1 = jit_x86_64_asm:movq(Reg, ResultReg), + I2 = + if + Val >= 0, Val =< 16#FFFFFFFF -> jit_x86_64_asm:andl(Val, ResultReg); + true -> jit_x86_64_asm:andq(Val, ResultReg) + end, + Stream1 = StreamModule:append(Stream0, I1), + Stream2 = StreamModule:append(Stream1, I2), + {State#state{stream = Stream2, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}. or_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> I1 = jit_x86_64_asm:orq(Val, Reg), @@ -1846,6 +2008,22 @@ add(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}. +sub( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [TempReg | _] + } = State, + Reg, + Val +) when is_integer(Val), Val < -16#80000000 orelse Val > 16#7FFFFFFF -> + % Immediate too large for 32-bit, load into temporary register + I1 = jit_x86_64_asm:movabsq(Val, TempReg), + I2 = jit_x86_64_asm:subq(TempReg, Reg), + Stream1 = StreamModule:append(Stream0, I1), + Stream2 = StreamModule:append(Stream1, I2), + % Free temporary register immediately + State#state{stream = Stream2}; sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> I1 = jit_x86_64_asm:subq(Val, Reg), Stream1 = StreamModule:append(Stream0, I1), @@ -1906,19 +2084,38 @@ call_only_or_schedule_next( #state{ stream_module = StreamModule, stream = Stream0, - branches = Branches + branches = Branches, + labels = Labels } = State0, Label ) -> Offset = StreamModule:offset(Stream0), I1 = jit_x86_64_asm:decl(?JITSTATE_REMAINING_REDUCTIONS), - {RewriteJMPOffset, I3} = jit_x86_64_asm:jmp_rel32(1), - I2 = jit_x86_64_asm:jz(byte_size(I3) + 2), - Sz = byte_size(I1) + byte_size(I2), - Reloc1 = {Label, Offset + Sz + RewriteJMPOffset, 32}, - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), - State1 = State0#state{stream = Stream1, branches = [Reloc1 | Branches]}, + I1Size = byte_size(I1), + + case lists:keyfind(Label, 1, Labels) of + {Label, LabelOffset} -> + % Label is already known, emit direct jmp with calculated offset + % jz is 2 bytes, jmp_rel32 is 5 bytes + JmpSize = 5, + I2 = jit_x86_64_asm:jz(JmpSize + 2), + I2Size = byte_size(I2), + % Calculate relative offset: target - current + RelOffset = LabelOffset - (Offset + I1Size + I2Size), + {_RewriteJMPOffset, I3} = jit_x86_64_asm:jmp_rel32(RelOffset), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = State0#state{stream = Stream1}; + false -> + % Label not yet known, emit placeholder and add relocation + {RewriteJMPOffset, I3} = jit_x86_64_asm:jmp_rel32(1), + I2 = jit_x86_64_asm:jz(byte_size(I3) + 2), + Sz = I1Size + byte_size(I2), + Reloc1 = {Label, Offset + Sz + RewriteJMPOffset, 32}, + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = State0#state{stream = Stream1, branches = [Reloc1 | Branches]} + end, State2 = set_continuation_to_label(State1, Label), call_primitive_last(State2, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]). @@ -2044,5 +2241,35 @@ add_label(#state{stream_module = StreamModule, stream = Stream} = State, Label) add_label(State, Label, Offset). -spec add_label(state(), integer() | reference(), integer()) -> state(). +add_label( + #state{ + stream_module = StreamModule, + stream = Stream0, + jump_table_start = JumpTableStart, + branches = Branches, + labels = Labels + } = State, + Label, + LabelOffset +) when is_integer(Label) -> + % Patch the jump table entry immediately + % Each jmp_rel32 instruction is 5 bytes + JumpTableEntryOffset = JumpTableStart + Label * 5, + RelativeOffset = LabelOffset - JumpTableEntryOffset, + {_RelocOffset, JmpInstruction} = jit_x86_64_asm:jmp_rel32(RelativeOffset), + Stream1 = StreamModule:replace(Stream0, JumpTableEntryOffset, JmpInstruction), + + % Eagerly patch any branches targeting this label + {Stream2, RemainingBranches} = patch_branches_for_label( + StreamModule, + Stream1, + Label, + LabelOffset, + Branches + ), + + State#state{ + stream = Stream2, branches = RemainingBranches, labels = [{Label, LabelOffset} | Labels] + }; add_label(#state{labels = Labels} = State, Label, Offset) -> State#state{labels = [{Label, Offset} | Labels]}. diff --git a/libs/jit/src/jit_x86_64_asm.erl b/libs/jit/src/jit_x86_64_asm.erl index c3b54fbe06..f5867bc5c0 100644 --- a/libs/jit/src/jit_x86_64_asm.erl +++ b/libs/jit/src/jit_x86_64_asm.erl @@ -35,6 +35,8 @@ jnz_rel8/1, jge/1, jge_rel8/1, + jle/1, + jle_rel8/1, jmp/1, jmp_rel8/1, jmp_rel32/1, @@ -360,6 +362,14 @@ jge(Offset) when Offset >= -126 andalso Offset =< 129 -> jge_rel8(Offset) when Offset >= -126 andalso Offset =< 129 -> {1, jge(Offset)}. +jle(Offset) when Offset >= -126 andalso Offset =< 129 -> + % Use short jump (matches assembler behavior) + AdjustedOffset = Offset - 2, + <<16#7E, AdjustedOffset>>. + +jle_rel8(Offset) when Offset >= -126 andalso Offset =< 129 -> + {1, jle(Offset)}. + jmp(Offset) when Offset >= -126 andalso Offset =< 129 -> % Use short jump (matches assembler behavior) AdjustedOffset = Offset - 2, @@ -483,6 +493,17 @@ addq(SrcReg, DestReg) when is_atom(SrcReg), is_atom(DestReg) -> {REX_B, MODRM_RM} = x86_64_x_reg(DestReg), <>. +subq(Imm, Reg) when ?IS_SINT8_T(Imm), is_atom(Reg) -> + case x86_64_x_reg(Reg) of + {0, Index} -> <<16#48, 16#83, (16#E8 + Index), Imm>>; + {1, Index} -> <<16#49, 16#83, (16#E8 + Index), Imm>> + end; +subq(Imm, rax) when ?IS_SINT32_T(Imm) -> + % Special short encoding for sub imm32, %rax + <<16#48, 16#2D, Imm:32/little>>; +subq(Imm, Reg) when ?IS_SINT32_T(Imm), is_atom(Reg) -> + {REX_B, MODRM_RM} = x86_64_x_reg(Reg), + <>; subq(RegA, RegB) when is_atom(RegA), is_atom(RegB) -> {REX_R, MODRM_REG} = x86_64_x_reg(RegA), {REX_B, MODRM_RM} = x86_64_x_reg(RegB), diff --git a/libs/jit/src/primitives.hrl b/libs/jit/src/primitives.hrl index 67ff60ecc8..5cf22c5163 100644 --- a/libs/jit/src/primitives.hrl +++ b/libs/jit/src/primitives.hrl @@ -92,6 +92,8 @@ -define(PRIM_BITSTRING_GET_UTF32, 69). -define(PRIM_TERM_COPY_MAP, 70). -define(PRIM_STACKTRACE_BUILD, 71). +-define(PRIM_TERM_REUSE_BINARY, 72). +-define(PRIM_BITSTRING_INSERT_FLOAT, 73). % Parameters to ?PRIM_MEMORY_ENSURE_FREE_WITH_ROOTS % -define(MEMORY_NO_SHRINK, 0). diff --git a/libs/jit/src/term.hrl b/libs/jit/src/term.hrl index 9270de3244..eca86c623a 100644 --- a/libs/jit/src/term.hrl +++ b/libs/jit/src/term.hrl @@ -74,3 +74,5 @@ -define(REFC_BINARY_MIN_64, 64). -define(TERM_BOXED_REFC_BINARY_SIZE, 6). -define(BINARY_HEADER_SIZE, 2). + +-define(TERM_INVALID_TERM, 0). diff --git a/src/libAtomVM/avmpack.c b/src/libAtomVM/avmpack.c index 075dc94966..624fa82493 100644 --- a/src/libAtomVM/avmpack.c +++ b/src/libAtomVM/avmpack.c @@ -35,6 +35,7 @@ static inline int pad(int size) bool avmpack_is_valid(const void *avmpack_binary, uint32_t size) { + // "#!/usr/bin/env AtomVM" const unsigned char pack_header[AVMPACK_SIZE] = { 0x23, 0x21, 0x2f, 0x75, 0x73, 0x72, 0x2f, 0x62, @@ -51,7 +52,7 @@ bool avmpack_is_valid(const void *avmpack_binary, uint32_t size) return memcmp(avmpack_binary, pack_header, AVMPACK_SIZE) == 0; } -int avmpack_find_section_by_flag(const void *avmpack_binary, uint32_t flags_mask, const void **ptr, uint32_t *size, const char **name) +int avmpack_find_section_by_flag(const void *avmpack_binary, uint32_t flags_mask, uint32_t flags_val, const void **ptr, uint32_t *size, const char **name) { int offset = AVMPACK_SIZE; const uint32_t *flags; @@ -60,7 +61,7 @@ int avmpack_find_section_by_flag(const void *avmpack_binary, uint32_t flags_mask const uint32_t *sizes = ((const uint32_t *) (avmpack_binary)) + offset / sizeof(uint32_t); flags = ((const uint32_t *) (avmpack_binary)) + 1 + offset / sizeof(uint32_t); - if ((ENDIAN_SWAP_32(*flags) & flags_mask) == flags_mask) { + if ((ENDIAN_SWAP_32(*flags) & flags_mask) == flags_val) { const char *found_section_name = (const char *) (sizes + 3); int section_name_len = pad(strlen(found_section_name) + 1); diff --git a/src/libAtomVM/avmpack.h b/src/libAtomVM/avmpack.h index 85ce9562d9..a68e6e3378 100644 --- a/src/libAtomVM/avmpack.h +++ b/src/libAtomVM/avmpack.h @@ -39,6 +39,7 @@ extern "C" { #define END_OF_FILE 0 #define BEAM_START_FLAG 1 #define BEAM_CODE_FLAG 2 +#define END_OF_FILE_MASK 255 struct AVMPackData; @@ -104,12 +105,13 @@ typedef void *(*avmpack_fold_fun)(void *accum, const void *section_ptr, uint32_t * @details Finds an AVM Pack section that has certain flags set and returns a pointer to it, its size and its name. * @param avmpack_binary a pointer to valid AVM Pack file data. * @param flags_mask that will be matched against file sections. + * @param flags_value that will be matched against file sections. * @param ptr will point to the found file section. * @param size will be set to the file section size that has been found, if the section has not been found it will not be updated. * @param name the section name, as defined in the module header. * @returns 1 if the file section has been found, 0 otherwise. */ -int avmpack_find_section_by_flag(const void *avmpack_binary, uint32_t flags_mask, const void **ptr, uint32_t *size, const char **name); +int avmpack_find_section_by_flag(const void *avmpack_binary, uint32_t flags_mask, uint32_t flags_value, const void **ptr, uint32_t *size, const char **name); /** * @brief Finds an AVM Pack section that has certain name. diff --git a/src/libAtomVM/bitstring.c b/src/libAtomVM/bitstring.c index b74c365316..cb14bfa608 100644 --- a/src/libAtomVM/bitstring.c +++ b/src/libAtomVM/bitstring.c @@ -330,6 +330,73 @@ void bitstring_copy_bits_incomplete_bytes(uint8_t *dst, size_t bits_offset, cons *dst = dest_byte; } +bool bitstring_extract_f16( + term src_bin, size_t offset, avm_int_t n, enum BitstringFlags bs_flags, avm_float_t *dst) +{ + unsigned long capacity = term_binary_size(src_bin); + if (8 * capacity - offset < (unsigned long) n) { + return false; + } + + if ((offset & 0x7) == 0) { + int byte_offset = offset >> 3; + const uint8_t *src = (const uint8_t *) term_binary_data(src_bin) + byte_offset; + + // Read 16-bit value + uint16_t f16_bits; + if (bs_flags & LittleEndianIntegerMask) { + f16_bits = READ_16LE_UNALIGNED(src); + } else { + f16_bits = READ_16_UNALIGNED(src); + } + + // Convert IEEE 754 half-precision to single-precision + uint32_t sign = (f16_bits >> 15) & 0x1; + uint32_t f16_exp = (f16_bits >> 10) & 0x1F; + uint32_t f16_mantissa = f16_bits & 0x3FF; + + uint32_t f32_bits; + if (f16_exp == 0) { + if (f16_mantissa == 0) { + // Zero + f32_bits = sign << 31; + } else { + // Subnormal number - normalize it + int e = -1; + uint32_t m = f16_mantissa; + do { + e++; + m <<= 1; + } while ((m & 0x400) == 0); + f16_mantissa = m & 0x3FF; + f16_exp = -e; + int32_t f32_exp = (int32_t) f16_exp + 127 - 15; + f32_bits = (sign << 31) | (f32_exp << 23) | (f16_mantissa << 13); + } + } else if (f16_exp == 0x1F) { + // Inf or NaN - not finite + return false; + } else { + // Normalized number + int32_t f32_exp = (int32_t) f16_exp + 127 - 15; + f32_bits = (sign << 31) | (f32_exp << 23) | (f16_mantissa << 13); + } + + union + { + uint32_t bits; + float fvalue; + } f32; + f32.bits = f32_bits; + + *dst = f32.fvalue; + return true; + } else { + // TODO: add support to floats not aligned to byte boundary + return false; + } +} + bool bitstring_extract_f32( term src_bin, size_t offset, avm_int_t n, enum BitstringFlags bs_flags, avm_float_t *dst) { @@ -406,3 +473,158 @@ bool bitstring_extract_f64( return false; } } + +bool bitstring_insert_f16( + term dst_bin, size_t offset, avm_float_t value, enum BitstringFlags bs_flags) +{ + unsigned long capacity = term_binary_size(dst_bin); + if (8 * capacity - offset < 16) { + return false; + } + + if (!isfinite(value)) { + return false; + } + + if ((offset & 0x7) == 0) { + int byte_offset = offset >> 3; + uint8_t *dst = (uint8_t *) term_binary_data(dst_bin) + byte_offset; + + _Static_assert(sizeof(float) == 4, "Unsupported float size"); + + // Convert double to float first + union + { + uint32_t bits; + float fvalue; + } f32; + + f32.fvalue = (float) value; + uint32_t f32_bits = f32.bits; + + // Extract components from float (32-bit) + uint32_t sign = (f32_bits >> 31) & 0x1; + int32_t exp = ((f32_bits >> 23) & 0xFF) - 127; // Remove float bias + uint32_t mantissa = f32_bits & 0x7FFFFF; + + uint16_t f16_bits; + + // Handle special cases + if (exp > 15) { + // Overflow to infinity + f16_bits = (sign << 15) | 0x7C00; + } else if (exp < -14) { + // Underflow to zero or denormal + if (exp < -24) { + // Too small, round to zero + f16_bits = sign << 15; + } else { + // Denormal number + uint32_t denorm_mantissa = (mantissa | 0x800000) >> (-14 - exp); + f16_bits = (sign << 15) | (denorm_mantissa >> 13); + } + } else { + // Normal number + uint32_t f16_exp = exp + 15; // Add half-precision bias + // Round to nearest even (bit 12 is the rounding bit) + uint32_t f16_mantissa = (mantissa + 0x1000) >> 13; // Round and keep top 10 bits + // Handle mantissa overflow + if (f16_mantissa > 0x3FF) { + f16_mantissa = 0; + f16_exp++; + } + if (f16_exp > 30) { + // Overflow to infinity + f16_bits = (sign << 15) | 0x7C00; + } else { + f16_bits = (sign << 15) | (f16_exp << 10) | f16_mantissa; + } + } + + if (bs_flags & LittleEndianIntegerMask) { + WRITE_16LE_UNALIGNED(dst, f16_bits); + } else { + WRITE_16_UNALIGNED(dst, f16_bits); + } + return true; + } else { + // TODO: add support to floats not aligned to byte boundary + return false; + } +} + +bool bitstring_insert_f32( + term dst_bin, size_t offset, avm_float_t value, enum BitstringFlags bs_flags) +{ + unsigned long capacity = term_binary_size(dst_bin); + if (8 * capacity - offset < 32) { + return false; + } + + if (!isfinite(value)) { + return false; + } + + if ((offset & 0x7) == 0) { + int byte_offset = offset >> 3; + uint8_t *dst = (uint8_t *) term_binary_data(dst_bin) + byte_offset; + + _Static_assert(sizeof(float) == 4, "Unsupported float size"); + + union + { + uint32_t bits; + float fvalue; + } f32; + + f32.fvalue = (float) value; + + if (bs_flags & LittleEndianIntegerMask) { + WRITE_32LE_UNALIGNED(dst, f32.bits); + } else { + WRITE_32_UNALIGNED(dst, f32.bits); + } + return true; + } else { + // TODO: add support to floats not aligned to byte boundary + return false; + } +} + +bool bitstring_insert_f64( + term dst_bin, size_t offset, avm_float_t value, enum BitstringFlags bs_flags) +{ + unsigned long capacity = term_binary_size(dst_bin); + if (8 * capacity - offset < 64) { + return false; + } + + if (!isfinite(value)) { + return false; + } + + if ((offset & 0x7) == 0) { + int byte_offset = offset >> 3; + uint8_t *dst = (uint8_t *) term_binary_data(dst_bin) + byte_offset; + + _Static_assert(sizeof(double) == 8, "Unsupported double size"); + + union + { + uint64_t bits; + double fvalue; + } f64; + + f64.fvalue = value; + + if (bs_flags & LittleEndianIntegerMask) { + WRITE_64LE_UNALIGNED(dst, f64.bits); + } else { + WRITE_64_UNALIGNED(dst, f64.bits); + } + return true; + } else { + // TODO: add support to doubles not aligned to byte boundary + return false; + } +} diff --git a/src/libAtomVM/bitstring.h b/src/libAtomVM/bitstring.h index da20507158..530833faa0 100644 --- a/src/libAtomVM/bitstring.h +++ b/src/libAtomVM/bitstring.h @@ -502,11 +502,20 @@ static inline void bitstring_copy_bits(uint8_t *dst, size_t bits_offset, const u } } +bool bitstring_extract_f16( + term src_bin, size_t offset, avm_int_t n, enum BitstringFlags bs_flags, avm_float_t *dst); bool bitstring_extract_f32( term src_bin, size_t offset, avm_int_t n, enum BitstringFlags bs_flags, avm_float_t *dst); bool bitstring_extract_f64( term src_bin, size_t offset, avm_int_t n, enum BitstringFlags bs_flags, avm_float_t *dst); +bool bitstring_insert_f16( + term dst_bin, size_t offset, avm_float_t value, enum BitstringFlags bs_flags); +bool bitstring_insert_f32( + term dst_bin, size_t offset, avm_float_t value, enum BitstringFlags bs_flags); +bool bitstring_insert_f64( + term dst_bin, size_t offset, avm_float_t value, enum BitstringFlags bs_flags); + #ifdef __cplusplus } #endif diff --git a/src/libAtomVM/context.c b/src/libAtomVM/context.c index f682e8c34c..89f8c94567 100644 --- a/src/libAtomVM/context.c +++ b/src/libAtomVM/context.c @@ -1297,7 +1297,7 @@ COLD_FUNC void context_dump(Context *ctx) fprintf(stderr, "process_count = %zu\n", process_count); fprintf(stderr, "ports_count = %zu\n", ports_count); fprintf(stderr, "atoms_count = %zu\n", atom_table_count(glb->atom_table)); - fprintf(stderr, "refc_binary_total_size = %zu\n", refc_binary_total_size(ctx)); + refc_binary_dump_info(ctx); } fprintf(stderr, "\n\n**End Of Crash Report**\n"); } diff --git a/src/libAtomVM/defaultatoms.def b/src/libAtomVM/defaultatoms.def index 35330fdecc..4c02b5c255 100644 --- a/src/libAtomVM/defaultatoms.def +++ b/src/libAtomVM/defaultatoms.def @@ -114,6 +114,7 @@ X(STRING_ATOM, "\x6", "string") X(UTF8_ATOM, "\x4", "utf8") X(UTF16_ATOM, "\x5", "utf16") X(UTF32_ATOM, "\x5", "utf32") +X(FLOAT_ATOM, "\x5", "float") X(COPY_ATOM, "\x4", "copy") X(REUSE_ATOM, "\x5", "reuse") @@ -209,3 +210,4 @@ X(LOAD_ATOM, "\x4", "load") X(JIT_X86_64_ATOM, "\xA", "jit_x86_64") X(JIT_AARCH64_ATOM, "\xB", "jit_aarch64") X(JIT_ARMV6M_ATOM, "\xA", "jit_armv6m") +X(JIT_RISCV32_ATOM, "\xB", "jit_riscv32") diff --git a/src/libAtomVM/jit.c b/src/libAtomVM/jit.c index 39bfa963a1..f399e127cc 100644 --- a/src/libAtomVM/jit.c +++ b/src/libAtomVM/jit.c @@ -86,6 +86,20 @@ _Static_assert(offsetof(JITState, remaining_reductions) == 0x8, "jit_state->rema _Static_assert(sizeof(size_t) == 4, "size_t is expected to be 32 bits"); +#elif JIT_ARCH_TARGET == JIT_ARCH_RISCV32 +_Static_assert(offsetof(Context, e) == 0x14, "ctx->e is 0x14 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, x) == 0x18, "ctx->x is 0x18 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, cp) == 0x5C, "ctx->cp is 0x5C in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, fr) == 0x60, "ctx->fr is 0x60 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, bs) == 0x64, "ctx->bs is 0x64 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, bs_offset) == 0x68, "ctx->bs_offset is 0x68 in jit/src/jit_riscv32.erl"); + +_Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(JITState, continuation) == 0x4, "jit_state->continuation is 0x4 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(JITState, remaining_reductions) == 0x8, "jit_state->remaining_reductions is 0x8 in jit/src/jit_riscv32.erl"); + +_Static_assert(sizeof(size_t) == 4, "size_t is expected to be 32 bits"); + #else #error Unknown jit target #endif @@ -133,7 +147,7 @@ static void destroy_extended_registers(Context *ctx, unsigned int live) static void jit_trim_live_regs(Context *ctx, uint32_t live) { - TRACE("jit_trim_live_regs: ctx->process_id = %d, live = %d\n", ctx->process_id, live); + TRACE("jit_trim_live_regs: ctx->process_id = %" PRId32 ", live = %" PRIu32 "\n", ctx->process_id, live); if (UNLIKELY(!list_is_empty(&ctx->extended_x_regs))) { destroy_extended_registers(ctx, live); } @@ -173,8 +187,8 @@ static Context *jit_return(Context *ctx, JITState *jit_state) static Context *jit_terminate_context(Context *ctx, JITState *jit_state) { - TRACE("jit_terminate_context: ctx->process_id = %d\n", ctx->process_id); - TRACE("-- Code execution finished for %i--\n", ctx->process_id); + TRACE("jit_terminate_context: ctx->process_id = %" PRId32 "\n", ctx->process_id); + TRACE("-- Code execution finished for %" PRId32 "--\n", ctx->process_id); GlobalContext *global = ctx->global; if (ctx->leader) { scheduler_stop_all(global); @@ -186,7 +200,7 @@ static Context *jit_terminate_context(Context *ctx, JITState *jit_state) static Context *jit_handle_error(Context *ctx, JITState *jit_state, int offset) { - TRACE("jit_terminate_context: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset); + TRACE("jit_terminate_context: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset); if (offset || term_is_invalid_term(ctx->x[2])) { ctx->x[2] = stacktrace_create_raw(ctx, jit_state->module, offset, ctx->x[0]); } @@ -253,14 +267,14 @@ static void set_error(Context *ctx, JITState *jit_state, int offset, term error_ static Context *jit_raise_error(Context *ctx, JITState *jit_state, int offset, term error_type_atom) { - TRACE("jit_raise_error: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset); + TRACE("jit_raise_error: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset); set_error(ctx, jit_state, offset, error_type_atom); return jit_handle_error(ctx, jit_state, 0); } static Context *jit_raise_error_tuple(Context *ctx, JITState *jit_state, int offset, term error_atom, term arg1) { - TRACE("jit_raise_error_tuple: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset); + TRACE("jit_raise_error_tuple: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset); // We can gc as we are raising if (UNLIKELY(memory_ensure_free_with_roots(ctx, TUPLE_SIZE(2), 1, &arg1, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { set_error(ctx, jit_state, offset, OUT_OF_MEMORY_ATOM); @@ -277,7 +291,7 @@ static Context *jit_raise_error_tuple(Context *ctx, JITState *jit_state, int off static Context *jit_raise(Context *ctx, JITState *jit_state, int offset, term stacktrace, term exc_value) { - TRACE("jit_raise: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset); + TRACE("jit_raise: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset); ctx->x[0] = stacktrace_exception_class(stacktrace); ctx->x[1] = exc_value; ctx->x[2] = stacktrace_create_raw(ctx, jit_state->module, offset, stacktrace); @@ -286,7 +300,7 @@ static Context *jit_raise(Context *ctx, JITState *jit_state, int offset, term st static Context *jit_schedule_next_cp(Context *ctx, JITState *jit_state) { - TRACE("jit_schedule_next_cp: ctx->process_id = %d\n", ctx->process_id); + TRACE("jit_schedule_next_cp: ctx->process_id = %" PRId32 "\n", ctx->process_id); ctx->saved_function_ptr = jit_state->continuation; ctx->saved_module = jit_state->module; jit_state->remaining_reductions = 0; @@ -295,7 +309,7 @@ static Context *jit_schedule_next_cp(Context *ctx, JITState *jit_state) static Context *jit_schedule_wait_cp(Context *ctx, JITState *jit_state) { - TRACE("jit_schedule_wait_cp: ctx->process_id = %d\n", ctx->process_id); + TRACE("jit_schedule_wait_cp: ctx->process_id = %" PRId32 "\n", ctx->process_id); ctx->saved_function_ptr = jit_state->continuation; ctx->saved_module = jit_state->module; jit_state->remaining_reductions = 0; @@ -433,7 +447,7 @@ static Context *jit_call_ext(Context *ctx, JITState *jit_state, int offset, int return_value = bif->bif2_ptr(ctx, 0, ctx->x[0], ctx->x[1]); break; default: - fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", arity); + fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", (uint32_t) arity); AVM_ABORT(); } PROCESS_MAYBE_TRAP_RETURN_VALUE_LAST(return_value, offset); @@ -463,7 +477,7 @@ static Context *jit_call_ext(Context *ctx, JITState *jit_state, int offset, int return_value = gcbif->gcbif2_ptr(ctx, 0, 0, ctx->x[0], ctx->x[1]); break; default: - fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", arity); + fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", (uint32_t) arity); AVM_ABORT(); } PROCESS_MAYBE_TRAP_RETURN_VALUE_LAST(return_value, offset); @@ -486,7 +500,7 @@ static term jit_module_get_atom_term_by_id(JITState *jit_state, int atom_index) static bool jit_allocate(Context *ctx, JITState *jit_state, uint32_t stack_need, uint32_t heap_need, uint32_t live) { - TRACE("jit_allocate: stack_need=%u heap_need=%u live=%u\n", stack_need, heap_need, live); + TRACE("jit_allocate: ENTRY ctx=%p jit_state=%p stack_need=%" PRIu32 " heap_need=%" PRIu32 " live=%" PRIu32 "\n", (void *) ctx, (void *) jit_state, stack_need, heap_need, live); if (ctx->heap.root->next || ((ctx->heap.heap_ptr + heap_need > ctx->e - (stack_need + 1)))) { TRIM_LIVE_REGS(live); if (UNLIKELY(memory_ensure_free_with_roots(ctx, heap_need + stack_need + 1, live, ctx->x, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { @@ -501,7 +515,7 @@ static bool jit_allocate(Context *ctx, JITState *jit_state, uint32_t stack_need, static BifImpl0 jit_get_imported_bif(JITState *jit_state, uint32_t bif) { - TRACE("jit_get_imported_bif: bif=%u\n", bif); + TRACE("jit_get_imported_bif: bif=%" PRIu32 "\n", bif); const struct ExportedFunction *exported_bif = jit_state->module->imported_funcs[bif]; const BifImpl0 result = EXPORTED_FUNCTION_TO_BIF(exported_bif)->bif0_ptr; return result; @@ -509,7 +523,7 @@ static BifImpl0 jit_get_imported_bif(JITState *jit_state, uint32_t bif) static bool jit_deallocate(Context *ctx, JITState *jit_state, uint32_t n_words) { - TRACE("jit_deallocate: n_words=%u\n", n_words); + TRACE("jit_deallocate: n_words=%" PRIu32 "\n", n_words); ctx->cp = ctx->e[n_words]; ctx->e += n_words + 1; // Hopefully, we only need x[0] @@ -534,7 +548,7 @@ static TermCompareResult jit_term_compare(Context *ctx, JITState *jit_state, ter static bool jit_test_heap(Context *ctx, JITState *jit_state, uint32_t heap_need, uint32_t live_registers) { - TRACE("jit_test_heap: heap_need=%u live_registers=%u\n", heap_need, live_registers); + TRACE("jit_test_heap: heap_need=%" PRIu32 " live_registers=%" PRIu32 "\n", heap_need, live_registers); size_t heap_free = context_avail_free_memory(ctx); // if we need more heap space than is currently free, then try to GC the needed space if (heap_free < heap_need) { @@ -548,7 +562,7 @@ static bool jit_test_heap(Context *ctx, JITState *jit_state, uint32_t heap_need, } else if (heap_free > heap_need * HEAP_NEED_GC_SHRINK_THRESHOLD_COEFF) { TRIM_LIVE_REGS(live_registers); if (UNLIKELY(memory_ensure_free_with_roots(ctx, heap_need * (HEAP_NEED_GC_SHRINK_THRESHOLD_COEFF / 2), live_registers, ctx->x, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { - TRACE("Unable to ensure free memory. heap_need=%i\n", heap_need); + TRACE("Unable to ensure free memory. heap_need=%" PRIu32 "\n", heap_need); set_error(ctx, jit_state, 0, OUT_OF_MEMORY_ATOM); return false; } @@ -629,13 +643,13 @@ static term maybe_alloc_boxed_integer_fragment(Context *ctx, avm_int64_t value) static term jit_term_alloc_tuple(Context *ctx, uint32_t size) { - TRACE("jit_term_alloc_tuple: size=%u\n", size); + TRACE("jit_term_alloc_tuple: size=%" PRIu32 "\n", size); return term_alloc_tuple(size, &ctx->heap); } static term jit_term_alloc_fun(Context *ctx, JITState *jit_state, uint32_t fun_index, uint32_t numfree) { - TRACE("jit_term_alloc_fun: fun_index=%u numfree=%u\n", fun_index, numfree); + TRACE("jit_term_alloc_fun: fun_index=%" PRIu32 " numfree=%" PRIu32 "\n", fun_index, numfree); size_t size = numfree + BOXED_FUN_SIZE; term *boxed_func = memory_heap_alloc(&ctx->heap, size); @@ -841,7 +855,7 @@ static Context *jit_process_signal_messages(Context *ctx, JITState *jit_state) static term jit_mailbox_peek(Context *ctx) { - TRACE("jit_mailbox_peek: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_mailbox_peek: ctx->process_id=%" PRId32 "\n", ctx->process_id); term out = term_invalid_term(); mailbox_peek(ctx, &out); return out; @@ -849,26 +863,26 @@ static term jit_mailbox_peek(Context *ctx) static void jit_mailbox_remove_message(Context *ctx) { - TRACE("jit_mailbox_remove_message: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_mailbox_remove_message: ctx->process_id=%" PRId32 "\n", ctx->process_id); mailbox_remove_message(&ctx->mailbox, &ctx->heap); } static void jit_timeout(Context *ctx) { - TRACE("jit_timeout: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_timeout: ctx->process_id=%" PRId32 "\n", ctx->process_id); context_update_flags(ctx, ~WaitingTimeoutExpired, NoFlags); mailbox_reset(&ctx->mailbox); } static void jit_mailbox_next(Context *ctx) { - TRACE("jit_mailbox_next: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_mailbox_next: ctx->process_id=%" PRId32 "\n", ctx->process_id); mailbox_next(&ctx->mailbox); } static void jit_cancel_timeout(Context *ctx) { - TRACE("jit_cancel_timeout: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_cancel_timeout: ctx->process_id=%" PRId32 "\n", ctx->process_id); if (context_get_flags(ctx, WaitingTimeout | WaitingTimeoutExpired)) { scheduler_cancel_timeout(ctx); } @@ -876,7 +890,7 @@ static void jit_cancel_timeout(Context *ctx) static void jit_clear_timeout_flag(Context *ctx) { - TRACE("jit_clear_timeout_flag: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_clear_timeout_flag: ctx->process_id=%" PRId32 "\n", ctx->process_id); context_update_flags(ctx, ~WaitingTimeoutExpired, NoFlags); } @@ -1244,6 +1258,9 @@ static term jit_bitstring_extract_float(Context *ctx, term *bin_ptr, size_t offs avm_float_t value; bool status; switch (n) { + case 16: + status = bitstring_extract_f16(((term) bin_ptr) | TERM_PRIMARY_BOXED, offset, n, bs_flags, &value); + break; case 32: status = bitstring_extract_f32(((term) bin_ptr) | TERM_PRIMARY_BOXED, offset, n, bs_flags, &value); break; @@ -1301,6 +1318,12 @@ static term jit_term_create_empty_binary(Context *ctx, size_t len) return term_create_empty_binary(len, &ctx->heap, ctx->global); } +static term jit_term_reuse_binary(Context *ctx, term src, size_t len) +{ + TRACE("jit_term_reuse_binary: src=0x%lx, len=%d\n", src, (int) len); + return term_reuse_binary(src, len, &ctx->heap, ctx->global); +} + static int jit_decode_flags_list(Context *ctx, JITState *jit_state, term flags) { int flags_value = 0; @@ -1354,6 +1377,18 @@ static bool jit_bitstring_insert_integer(term bin, size_t offset, term value, si return bitstring_insert_integer(bin, offset, int_value, n, flags); } +static bool jit_bitstring_insert_float(term bin, size_t offset, term value, size_t n, enum BitstringFlags flags) +{ + avm_float_t float_value = term_conv_to_float(value); + if (n == 16) { + return bitstring_insert_f16(bin, offset, float_value, flags); + } else if (n == 32) { + return bitstring_insert_f32(bin, offset, float_value, flags); + } else { + return bitstring_insert_f64(bin, offset, float_value, flags); + } +} + static void jit_bitstring_copy_module_str(Context *ctx, JITState *jit_state, term bin, size_t offset, int str_id, size_t len) { TRACE("jit_bitstring_copy_module_str: bin=%p offset=%d str_id=%d len=%d\n", (void *) bin, (int) offset, str_id, (int) len); @@ -1734,7 +1769,9 @@ const ModuleNativeInterface module_native_interface = { jit_bitstring_get_utf16, jit_bitstring_get_utf32, term_copy_map, - jit_stacktrace_build + jit_stacktrace_build, + jit_term_reuse_binary, + jit_bitstring_insert_float }; #endif diff --git a/src/libAtomVM/jit.h b/src/libAtomVM/jit.h index ee53259886..ae57c6c941 100644 --- a/src/libAtomVM/jit.h +++ b/src/libAtomVM/jit.h @@ -158,6 +158,8 @@ struct ModuleNativeInterface term (*bitstring_get_utf32)(term src, int flags_value); term (*term_copy_map)(Context *ctx, term src); term (*stacktrace_build)(Context *ctx); + term (*term_reuse_binary)(Context *ctx, term src, size_t len); + bool (*bitstring_insert_float)(term bin, size_t offset, term value, size_t n, enum BitstringFlags flags); }; extern const ModuleNativeInterface module_native_interface; @@ -173,6 +175,7 @@ enum TrapAndLoadResult #define JIT_ARCH_X86_64 1 #define JIT_ARCH_AARCH64 2 #define JIT_ARCH_ARMV6M 3 +#define JIT_ARCH_RISCV32 4 #define JIT_VARIANT_PIC 1 #define JIT_VARIANT_FLOAT32 2 @@ -194,6 +197,11 @@ enum TrapAndLoadResult #define JIT_JUMPTABLE_ENTRY_SIZE 12 #endif +#if defined(__riscv) && (__riscv_xlen == 32) +#define JIT_ARCH_TARGET JIT_ARCH_RISCV32 +#define JIT_JUMPTABLE_ENTRY_SIZE 8 +#endif + #ifndef JIT_ARCH_TARGET #error Unknown JIT target #endif diff --git a/src/libAtomVM/jit_stream_flash.c b/src/libAtomVM/jit_stream_flash.c new file mode 100644 index 0000000000..829b7d4f76 --- /dev/null +++ b/src/libAtomVM/jit_stream_flash.c @@ -0,0 +1,928 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2025 by Paul Guyot + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +#include "jit_stream_flash.h" + +#include "avmpack.h" +#include "context.h" +#include "defaultatoms.h" +#include "erl_nif.h" +#include "erl_nif_priv.h" +#include "globalcontext.h" +#include "jit.h" +#include "module.h" +#include "nifs.h" +#include "platform_defaultatoms.h" +#include "sys.h" +#include "term.h" + +#include +#include +#include +#include + +// #define ENABLE_TRACE +#include "trace.h" + +#define JIT_ENTRY_MAGIC 0x4A74 + +#ifdef ENABLE_TRACE +// Simple CRC32 for verification +static uint32_t crc32(const uint8_t *data, size_t len) +{ + uint32_t crc = 0xFFFFFFFF; + for (size_t i = 0; i < len; i++) { + crc ^= data[i]; + for (int j = 0; j < 8; j++) { + crc = (crc >> 1) ^ (0xEDB88320 & -(crc & 1)); + } + } + return ~crc; +} +#endif + +/** + * @brief JIT entry header stored in flash + * + * Each compiled module has an entry with this header followed by the native code. + */ +struct JITEntry +{ + uint16_t magic; ///< Magic number (JIT_ENTRY_MAGIC) or 0xFFFF for free space + uint16_t version; ///< Module version + uint32_t code; ///< Pointer to original BEAM code (32-bit for flash storage) + uint32_t labels; ///< Number of labels + uint32_t size; ///< Size of native code in bytes +} __attribute__((packed)); + +_Static_assert(sizeof(struct JITEntry) == 16, "sizeof(struct JITEntry) must be 16"); + +/** + * @brief JIT stream flash state + * + * Maintains the state for writing JIT code to flash with page buffering. + */ +struct JITStreamFlash +{ + struct JITEntry *jit_entry; ///< Pointer to current JIT entry in flash + uintptr_t page_base_addr; ///< Base address of current page + uint8_t page_buffer[FLASH_PAGE_SIZE]; ///< Page buffer for writing + uint8_t page_offset; ///< Current offset within page + struct JSFlashPlatformContext *pf_ctx; ///< Platform-specific context +}; + +static ErlNifResourceType *jit_stream_flash_resource_type; +static void jit_stream_flash_dtor(ErlNifEnv *caller_env, void *obj); +static bool jit_stream_flash_replace_at_addr(struct JSFlashPlatformContext *pf_ctx, uintptr_t addr, const uint8_t *data, size_t len); + +const ErlNifResourceTypeInit jit_stream_flash_resource_type_init = { + .members = 1, + .dtor = jit_stream_flash_dtor +}; + +static struct JITEntry *jit_entry_next(struct JITEntry *jit_entry) +{ + uintptr_t next_entry_addr = ((uintptr_t) jit_entry) + sizeof(struct JITEntry) + jit_entry->size; + // Align to 4 bytes boundaries + next_entry_addr = (next_entry_addr + 3) & ~3; + + TRACE("jit_entry_next: jit_entry = %p, return %p\n", (void *) jit_entry, (void *) next_entry_addr); + + return (struct JITEntry *) next_entry_addr; +} + +/** + * @brief Check if a sector needs to be erased + * + * Scans the entire sector to check if it contains any non-0xFF bytes. + * Uses word-by-word comparison for efficiency since sectors are aligned. + * + * @param sector_addr Address of the sector (must be sector-aligned) + * @return true if sector needs erasing, false if already erased + */ +static bool jit_stream_flash_sector_needs_erase(uintptr_t sector_addr) +{ + const uintptr_t *sector_ptr = (const uintptr_t *) sector_addr; + const uintptr_t erased_pattern = ~((uintptr_t) 0); // All bits set to 1 (0xFF...FF) + size_t num_words = FLASH_SECTOR_SIZE / sizeof(uintptr_t); + + // Check if entire sector is all 0xFF by comparing word-by-word + for (size_t i = 0; i < num_words; i++) { + if (sector_ptr[i] != erased_pattern) { + return true; + } + } + + return false; +} + +static struct JITEntry *globalcontext_find_first_jit_entry(GlobalContext *global, bool *is_valid) +{ + const void *max_end_offset = NULL; + uint32_t end_size; + const void *end_offset; + const char *end_name; + bool valid_cache = true; + + struct ListHead *item; + struct ListHead *avmpack_data = synclist_rdlock(&global->avmpack_data); + LIST_FOR_EACH (item, avmpack_data) { + struct AVMPackData *avmpack_data = GET_LIST_ENTRY(item, struct AVMPackData, avmpack_head); + avmpack_find_section_by_flag(avmpack_data->data, END_OF_FILE_MASK, END_OF_FILE, &end_offset, &end_size, &end_name); + valid_cache = valid_cache && (strcmp(end_name, "END") == 0); + + if (end_offset > max_end_offset) { + max_end_offset = end_offset; + } + } + synclist_unlock(&global->avmpack_data); + + uintptr_t max_end_offset_page = ((((uintptr_t) max_end_offset) - 1) & ~(FLASH_SECTOR_SIZE - 1)); + *is_valid = valid_cache; + + TRACE("globalcontext_find_first_jit_entry: return %p\n", (void *) (max_end_offset_page + FLASH_SECTOR_SIZE)); + + return (struct JITEntry *) (max_end_offset_page + FLASH_SECTOR_SIZE); +} + +static void globalcontext_set_cache_valid(GlobalContext *global) +{ + TRACE("globalcontext_set_cache_valid\n"); + + uint32_t end_size; + const void *end_offset; + const char *end_name; + bool valid_cache; + + // Create platform context for flash operations + struct JSFlashPlatformContext *pf_ctx = jit_stream_flash_platform_init(); + if (IS_NULL_PTR(pf_ctx)) { + fprintf(stderr, "Failed to initialize platform flash context\n"); + return; + } + + do { + valid_cache = true; + struct ListHead *item; + struct ListHead *avmpack_data = synclist_rdlock(&global->avmpack_data); + LIST_FOR_EACH (item, avmpack_data) { + struct AVMPackData *avmpack_data = GET_LIST_ENTRY(item, struct AVMPackData, avmpack_head); + avmpack_find_section_by_flag(avmpack_data->data, END_OF_FILE_MASK, END_OF_FILE, &end_offset, &end_size, &end_name); + if (strcmp(end_name, "END")) { + valid_cache = false; + break; + } + } + synclist_unlock(&global->avmpack_data); + if (!valid_cache) { + // Replace "end" with "END" - this is a 3-byte string replacement + const uint8_t end_str[] = "END"; + if (!jit_stream_flash_replace_at_addr(pf_ctx, (uintptr_t) end_name, end_str, 3)) { + fprintf(stderr, "Failed to update cache validity marker from 'end' to 'END'\n"); + break; + } + } + } while (!valid_cache); + + jit_stream_flash_platform_destroy(pf_ctx); +} + +static struct JITEntry *globalcontext_find_last_jit_entry(GlobalContext *global) +{ + bool is_valid; + struct JITEntry *jit_entry = globalcontext_find_first_jit_entry(global, &is_valid); + if (!is_valid) { + TRACE("globalcontext_find_last_jit_entry, cache not valid, returning NULL\n"); + return NULL; + } + + // Find the last valid entry + struct JITEntry *last_valid = jit_entry; + while (jit_entry->magic == JIT_ENTRY_MAGIC) { + last_valid = jit_entry; + jit_entry = jit_entry_next(jit_entry); + } + TRACE("globalcontext_find_last_jit_entry, returning last valid entry at %p\n", (void *) last_valid); + return last_valid; +} + +static bool jit_stream_flash_flush_page(struct JITStreamFlash *js) +{ + // Write the page + // Note: sector is already erased by nif_jit_stream_flash_new (first sector) + // or jit_stream_flash_append (subsequent sectors when crossing boundaries) + if (!jit_stream_flash_platform_write_page(js->pf_ctx, js->page_base_addr, js->page_buffer)) { + fprintf(stderr, "Failed to write page at address 0x%lx\n", (unsigned long) js->page_base_addr); + return false; + } + + return true; +} + +static bool jit_stream_flash_finalize_entry(struct JSFlashPlatformContext *pf_ctx, struct JITEntry *jit_entry, uint16_t magic, uint16_t version, uint32_t code, uint32_t labels) +{ + uintptr_t entry_addr = (uintptr_t) jit_entry; + uint8_t page_buffer[FLASH_PAGE_SIZE]; + uintptr_t page_base_addr = entry_addr & ~(FLASH_PAGE_SIZE - 1); + + // Read current page contents + memcpy(page_buffer, (const uint8_t *) page_base_addr, FLASH_PAGE_SIZE); + + // Calculate offset within page + size_t entry_offset = entry_addr - page_base_addr; + struct JITEntry *updated_entry = (struct JITEntry *) (page_buffer + entry_offset); + + // Update fields + updated_entry->magic = magic; + updated_entry->version = version; + updated_entry->code = code; + updated_entry->labels = labels; + + // Write back to flash + if (!jit_stream_flash_platform_write_page(pf_ctx, page_base_addr, page_buffer)) { + fprintf(stderr, "Failed to finalize entry at address 0x%lx\n", (unsigned long) page_base_addr); + return false; + } + + return true; +} + +// Replace data in flash at the given absolute address +// Returns true on success, false if validation fails (trying to set bits 0→1) +static bool jit_stream_flash_replace_at_addr(struct JSFlashPlatformContext *pf_ctx, uintptr_t addr, const uint8_t *data, size_t len) +{ + uintptr_t replace_start = addr; + uintptr_t replace_end = replace_start + len; + + // Iterate over all pages that need to be updated + uintptr_t current_page_addr = replace_start & ~(FLASH_PAGE_SIZE - 1); + size_t data_offset = 0; + + while (current_page_addr < replace_end) { + // Calculate the range within this page that needs to be replaced + uintptr_t page_start_offset = 0; + uintptr_t page_end_offset = FLASH_PAGE_SIZE; + + if (current_page_addr < replace_start) { + page_start_offset = replace_start - current_page_addr; + } + + if (current_page_addr + FLASH_PAGE_SIZE > replace_end) { + page_end_offset = replace_end - current_page_addr; + } + + size_t copy_len = page_end_offset - page_start_offset; + + // Prepare page buffer + uint8_t page_buffer[FLASH_PAGE_SIZE]; + uintptr_t page_base_addr = current_page_addr; + + // Read current page contents + memcpy(page_buffer, (const uint8_t *) page_base_addr, FLASH_PAGE_SIZE); + + // Verify that we're only clearing bits (1→0), not setting them (0→1) + const uint8_t *flash_ptr = (const uint8_t *) page_base_addr; + for (size_t i = 0; i < copy_len; i++) { + uint8_t flash_byte = flash_ptr[page_start_offset + i]; + uint8_t new_byte = data[data_offset + i]; + + // Check if we're trying to set any bits (0→1) + if ((new_byte & ~flash_byte) != 0) { + fprintf(stderr, "\n=== FLASH REPLACE VALIDATION FAILED ===\n"); + fprintf(stderr, "Attempting to set bits (0→1) without erase!\n"); + fprintf(stderr, "Page address: 0x%lx\n", (unsigned long) page_base_addr); + fprintf(stderr, "Offset in page: %zu, flash byte: 0x%02hhx, new byte: 0x%02hhx\n", + page_start_offset + i, flash_byte, new_byte); + fprintf(stderr, "Bits being set (0→1): 0x%02hhx\n", (new_byte & ~flash_byte)); + fprintf(stderr, "Replace address: 0x%lx, len=%zu\n", (unsigned long) addr, len); + fprintf(stderr, "========================================\n\n"); + return false; + } + } + + // Update with new data + memcpy(page_buffer + page_start_offset, data + data_offset, copy_len); + + // Write back to flash + if (!jit_stream_flash_platform_write_page(pf_ctx, page_base_addr, page_buffer)) { + fprintf(stderr, "Failed to replace data at address 0x%lx\n", (unsigned long) page_base_addr); + return false; + } + + data_offset += copy_len; + current_page_addr += FLASH_PAGE_SIZE; + } + + return true; +} + +static bool jit_stream_flash_append(struct JITStreamFlash *js, const uint8_t *buffer, size_t count) +{ + while (count > 0) { + // Validate flash constraints: can only write to erased (0xFF) bytes + uint8_t current_byte = js->page_buffer[js->page_offset]; + uint8_t new_byte = *buffer; + if ((~current_byte & new_byte) != 0) { + // Trying to set bits from 0→1 without erase + fprintf(stderr, "\n=== JIT STREAM FLASH APPEND ERROR ===\n"); + fprintf(stderr, "Attempting to write 0x%02x over 0x%02x at page offset %u\n", + new_byte, current_byte, js->page_offset); + fprintf(stderr, "Page base address: 0x%lx\n", (unsigned long) js->page_base_addr); + fprintf(stderr, "Flash address: 0x%lx\n", (unsigned long) (js->page_base_addr + js->page_offset)); + fprintf(stderr, "Bits being set 0→1: 0x%02x\n", (~current_byte & new_byte)); + fprintf(stderr, "This indicates the sector was not properly erased!\n"); + fprintf(stderr, "=====================================\n\n"); + return false; + } + + js->page_buffer[js->page_offset] = *buffer; + if (js->page_offset == (FLASH_PAGE_SIZE - 1)) { + if (!jit_stream_flash_flush_page(js)) { + fprintf(stderr, "jit_stream_flash_flush_page failed\n"); + return false; + } + // Move to the next page after flushing + uintptr_t previous_sector = js->page_base_addr & ~(FLASH_SECTOR_SIZE - 1); + js->page_base_addr += FLASH_PAGE_SIZE; + js->page_offset = 0; + uintptr_t new_sector = js->page_base_addr & ~(FLASH_SECTOR_SIZE - 1); + + // Check if we've entered a new sector and erase if needed + if (new_sector != previous_sector) { + if (jit_stream_flash_sector_needs_erase(new_sector)) { + TRACE("jit_stream_flash_append -- erasing new sector at %lx\n", (unsigned long) new_sector); + if (!jit_stream_flash_platform_erase_sector(js->pf_ctx, new_sector)) { + fprintf(stderr, "Failed to erase new sector at address 0x%lx\n", (unsigned long) new_sector); + return false; + } + } + } + + // Read the new page contents into the buffer + memcpy(js->page_buffer, (const uint8_t *) js->page_base_addr, FLASH_PAGE_SIZE); + } else { + js->page_offset++; + } + buffer++; + count--; + } + return true; +} + +static term nif_jit_stream_flash_new(Context *ctx, int argc, term argv[]) +{ + UNUSED(argc); + UNUSED(argv); + + struct JITEntry *last_valid_entry = globalcontext_find_last_jit_entry(ctx->global); + struct JITEntry *new_entry; + + if (last_valid_entry == NULL) { + // No valid entries, get the first position + bool is_valid; + new_entry = globalcontext_find_first_jit_entry(ctx->global, &is_valid); + } else { + // Get position after last valid entry + new_entry = jit_entry_next(last_valid_entry); + } + + // Return a resource object + struct JITStreamFlash *js = enif_alloc_resource(jit_stream_flash_resource_type, sizeof(struct JITStreamFlash)); + if (IS_NULL_PTR(js)) { + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + } + + // Initialize platform context + js->pf_ctx = jit_stream_flash_platform_init(); + if (IS_NULL_PTR(js->pf_ctx)) { + fprintf(stderr, "Failed to initialize platform flash context\n"); + enif_release_resource(js); + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + } + + js->jit_entry = new_entry; + js->page_base_addr = (uintptr_t) new_entry & ~(FLASH_PAGE_SIZE - 1); + + // Handle sector erasing for the sector where JIT entry starts + uintptr_t new_entry_addr = (uintptr_t) new_entry; + uintptr_t new_entry_sector = new_entry_addr & ~(FLASH_SECTOR_SIZE - 1); + uintptr_t sector_end = new_entry_sector + FLASH_SECTOR_SIZE; + + // Check if there's stale data from entry position to end of sector + bool has_stale_data = false; + const uintptr_t *check_ptr = (const uintptr_t *) new_entry_addr; + const uintptr_t *check_end = (const uintptr_t *) sector_end; + const uintptr_t erased_pattern = ~((uintptr_t) 0); + + while (check_ptr < check_end) { + if (*check_ptr != erased_pattern) { + has_stale_data = true; + break; + } + check_ptr++; + } + + if (has_stale_data) { + // There's stale data (from failed compilation) - need to erase but preserve data before entry + size_t preserve_size = new_entry_addr - new_entry_sector; + + if (preserve_size > 0) { + // Allocate buffer for the sector + uint8_t *sector_buffer = malloc(FLASH_SECTOR_SIZE); + if (IS_NULL_PTR(sector_buffer)) { + fprintf(stderr, "Failed to allocate sector buffer\n"); + jit_stream_flash_platform_destroy(js->pf_ctx); + enif_release_resource(js); + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + } + + // Copy data to preserve (before the entry) + memcpy(sector_buffer, (const uint8_t *) new_entry_sector, preserve_size); + + // Fill rest with 0xFF (erased state) + memset(sector_buffer + preserve_size, 0xFF, FLASH_SECTOR_SIZE - preserve_size); + + // Erase the sector + TRACE("nif_jit_stream_flash_new -- erasing sector with stale data at %lx (preserving %zu bytes)\n", + (unsigned long) new_entry_sector, preserve_size); + if (!jit_stream_flash_platform_erase_sector(js->pf_ctx, new_entry_sector)) { + fprintf(stderr, "Failed to erase sector with stale data\n"); + free(sector_buffer); + jit_stream_flash_platform_destroy(js->pf_ctx); + enif_release_resource(js); + RAISE_ERROR(BADARG_ATOM); + } + + // Write back the preserved data page by page + for (size_t page_offset = 0; page_offset < preserve_size; page_offset += FLASH_PAGE_SIZE) { + if (!jit_stream_flash_platform_write_page(js->pf_ctx, new_entry_sector + page_offset, + sector_buffer + page_offset)) { + fprintf(stderr, "Failed to write back preserved data\n"); + free(sector_buffer); + jit_stream_flash_platform_destroy(js->pf_ctx); + enif_release_resource(js); + RAISE_ERROR(BADARG_ATOM); + } + } + + free(sector_buffer); + } else { + // Entry is at sector boundary, just erase + TRACE("nif_jit_stream_flash_new -- erasing sector with stale data at %lx\n", + (unsigned long) new_entry_sector); + if (!jit_stream_flash_platform_erase_sector(js->pf_ctx, new_entry_sector)) { + fprintf(stderr, "Failed to erase sector for new JIT entry\n"); + jit_stream_flash_platform_destroy(js->pf_ctx); + enif_release_resource(js); + RAISE_ERROR(BADARG_ATOM); + } + } + } else { + TRACE("nif_jit_stream_flash_new -- sector at %lx is clean (no stale data)\n", + (unsigned long) new_entry_sector); + } + + // Now handle the sector where JIT entry ends (if different from start sector) + uintptr_t entry_end = new_entry_addr + sizeof(struct JITEntry); + uintptr_t entry_end_sector = entry_end & ~(FLASH_SECTOR_SIZE - 1); + + if (entry_end_sector != new_entry_sector) { + // Entry spans two sectors - erase the end sector if needed + if (jit_stream_flash_sector_needs_erase(entry_end_sector)) { + TRACE("nif_jit_stream_flash_new -- erasing end sector at %lx\n", + (unsigned long) entry_end_sector); + if (!jit_stream_flash_platform_erase_sector(js->pf_ctx, entry_end_sector)) { + fprintf(stderr, "Failed to erase end sector for new JIT entry\n"); + jit_stream_flash_platform_destroy(js->pf_ctx); + enif_release_resource(js); + RAISE_ERROR(BADARG_ATOM); + } + } + } + + memcpy(js->page_buffer, (const uint8_t *) js->page_base_addr, FLASH_PAGE_SIZE); + js->page_offset = (uintptr_t) new_entry & (FLASH_PAGE_SIZE - 1); + + TRACE("nif_jit_stream_flash_new entry is %p, page_offset is %lx\n", (void *) new_entry, (unsigned long) js->page_offset); + + // Append the first bytes, which may flush the page + struct JITEntry header; + header.magic = 0xFFFF; + header.version = 0xFFFF; + header.code = 0xFFFFFFFF; + header.labels = 0xFFFFFFFF; + header.size = 0xFFFFFFFF; + if (!jit_stream_flash_append(js, (const uint8_t *) &header, sizeof(header))) { + jit_stream_flash_platform_destroy(js->pf_ctx); + enif_release_resource(js); + RAISE_ERROR(BADARG_ATOM); + } + + term obj = enif_make_resource(erl_nif_env_from_context(ctx), js); + enif_release_resource(js); // decrement refcount after enif_alloc_resource + return obj; +} + +static term nif_jit_stream_flash_offset(Context *ctx, int argc, term argv[]) +{ + UNUSED(argc); + + void *js_obj_ptr; + if (UNLIKELY(!enif_get_resource(erl_nif_env_from_context(ctx), argv[0], jit_stream_flash_resource_type, &js_obj_ptr))) { + RAISE_ERROR(BADARG_ATOM); + } + struct JITStreamFlash *js_obj = (struct JITStreamFlash *) js_obj_ptr; + + uintptr_t current_addr = js_obj->page_base_addr + js_obj->page_offset; + uintptr_t base_addr = ((uintptr_t) js_obj->jit_entry + sizeof(struct JITEntry)); + + int offset = current_addr - base_addr; + + return term_from_int(offset); +} + +static term nif_jit_stream_flash_append(Context *ctx, int argc, term argv[]) +{ + UNUSED(argc); + + VALIDATE_VALUE(argv[1], term_is_binary); + void *js_obj_ptr; + if (UNLIKELY(!enif_get_resource(erl_nif_env_from_context(ctx), argv[0], jit_stream_flash_resource_type, &js_obj_ptr))) { + RAISE_ERROR(BADARG_ATOM); + } + struct JITStreamFlash *js_obj = (struct JITStreamFlash *) js_obj_ptr; + + size_t binary_size = term_binary_size(argv[1]); + const uint8_t *binary_data = (const uint8_t *) term_binary_data(argv[1]); + + if (!jit_stream_flash_append(js_obj, binary_data, binary_size)) { + RAISE_ERROR(BADARG_ATOM); + } + + return argv[0]; +} + +static term nif_jit_stream_flash_replace(Context *ctx, int argc, term argv[]) +{ + UNUSED(argc); + + VALIDATE_VALUE(argv[1], term_is_integer); + VALIDATE_VALUE(argv[2], term_is_binary); + void *js_obj_ptr; + if (UNLIKELY(!enif_get_resource(erl_nif_env_from_context(ctx), argv[0], jit_stream_flash_resource_type, &js_obj_ptr))) { + RAISE_ERROR(BADARG_ATOM); + } + + size_t binary_size = term_binary_size(argv[2]); + const uint8_t *binary_data = (const uint8_t *) term_binary_data(argv[2]); + avm_int_t offset = term_to_int(argv[1]); + + struct JITStreamFlash *js_obj = (struct JITStreamFlash *) js_obj_ptr; + + uintptr_t base_addr = ((uintptr_t) js_obj->jit_entry + sizeof(struct JITEntry)); + uintptr_t replace_start = base_addr + offset; + uintptr_t replace_end = replace_start + binary_size; + + // Iterate over all pages that need to be updated + uintptr_t current_page_addr = replace_start & ~(FLASH_PAGE_SIZE - 1); + size_t binary_offset = 0; + + while (current_page_addr < replace_end) { + // Calculate the range within this page that needs to be replaced + uintptr_t page_start_offset = 0; + uintptr_t page_end_offset = FLASH_PAGE_SIZE; + + if (current_page_addr < replace_start) { + page_start_offset = replace_start - current_page_addr; + } + + if (current_page_addr + FLASH_PAGE_SIZE > replace_end) { + page_end_offset = replace_end - current_page_addr; + } + + size_t copy_len = page_end_offset - page_start_offset; + + // Check if this is the current buffer page + if (current_page_addr == js_obj->page_base_addr) { + // Update current buffer directly + memcpy(js_obj->page_buffer + page_start_offset, binary_data + binary_offset, copy_len); + } else { + // This is an already-flushed page, need to update flash + if (!jit_stream_flash_replace_at_addr(js_obj->pf_ctx, current_page_addr + page_start_offset, + binary_data + binary_offset, + copy_len)) { + RAISE_ERROR(BADARG_ATOM); + } + } + + binary_offset += copy_len; + current_page_addr += FLASH_PAGE_SIZE; + } + + return argv[0]; +} + +static term nif_jit_stream_flash_read(Context *ctx, int argc, term argv[]) +{ + UNUSED(argc); + + VALIDATE_VALUE(argv[1], term_is_integer); + VALIDATE_VALUE(argv[2], term_is_integer); + void *js_obj_ptr; + if (UNLIKELY(!enif_get_resource(erl_nif_env_from_context(ctx), argv[0], jit_stream_flash_resource_type, &js_obj_ptr))) { + RAISE_ERROR(BADARG_ATOM); + } + struct JITStreamFlash *js_obj = (struct JITStreamFlash *) js_obj_ptr; + + avm_int_t offset = term_to_int(argv[1]); + avm_int_t len = term_to_int(argv[2]); + + // Validate parameters + if (UNLIKELY(len <= 0 || offset < 0)) { + RAISE_ERROR(BADARG_ATOM); + } + + // Calculate current stream position + uintptr_t current_addr = js_obj->page_base_addr + js_obj->page_offset; + uintptr_t base_addr = ((uintptr_t) js_obj->jit_entry + sizeof(struct JITEntry)); + size_t stream_offset = current_addr - base_addr; + + // Check if read is within bounds + if (UNLIKELY((size_t) (offset + len) > stream_offset)) { + RAISE_ERROR(BADARG_ATOM); + } + + if (UNLIKELY(memory_ensure_free_opt(ctx, TERM_BINARY_HEAP_SIZE(len), MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + } + + uintptr_t read_addr = base_addr + offset; + return term_from_literal_binary((const uint8_t *) read_addr, len, &ctx->heap, ctx->global); +} + +static term nif_jit_stream_flash_flush(Context *ctx, int argc, term argv[]) +{ + UNUSED(ctx); + UNUSED(argc); + + void *js_obj_ptr; + if (UNLIKELY(!enif_get_resource(erl_nif_env_from_context(ctx), argv[0], jit_stream_flash_resource_type, &js_obj_ptr))) { + RAISE_ERROR(BADARG_ATOM); + } + struct JITStreamFlash *js_obj = (struct JITStreamFlash *) js_obj_ptr; + + // Calculate the size BEFORE flushing + uintptr_t current_addr = js_obj->page_base_addr + js_obj->page_offset; + uintptr_t code_start = (uintptr_t) js_obj->jit_entry + sizeof(struct JITEntry); + uint32_t code_size = current_addr - code_start; + + // Check if the size field is in the current unflushed page buffer or in an already-flushed page + uintptr_t size_field_addr = (uintptr_t) &js_obj->jit_entry->size; + uintptr_t size_field_page = size_field_addr & ~(FLASH_PAGE_SIZE - 1); + + if (size_field_page == js_obj->page_base_addr) { + // Size field is in the current buffer, update it directly before flushing + size_t offset_in_page = size_field_addr - js_obj->page_base_addr; + memcpy(js_obj->page_buffer + offset_in_page, &code_size, sizeof(uint32_t)); + } else { + // Size field is in an already-flushed page, use replace + if (!jit_stream_flash_replace_at_addr(js_obj->pf_ctx, size_field_addr, + (const uint8_t *) &code_size, + sizeof(uint32_t))) { + RAISE_ERROR(BADARG_ATOM); + } + } + + // Flush the final page + if (!jit_stream_flash_flush_page(js_obj)) { + fprintf(stderr, "jit_stream_flash_flush_page failed\n"); + RAISE_ERROR(BADARG_ATOM); + } + + return argv[0]; +} + +static term nif_jit_stream_module(Context *ctx, int argc, term argv[]) +{ + UNUSED(argc); + UNUSED(argv); + + return globalcontext_make_atom(ctx->global, ATOM_STR("\x10", "jit_stream_flash")); +} + +static const struct Nif jit_stream_module_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_jit_stream_module +}; +static const struct Nif jit_stream_flash_new_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_jit_stream_flash_new +}; +static const struct Nif jit_stream_flash_offset_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_jit_stream_flash_offset +}; +static const struct Nif jit_stream_flash_append_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_jit_stream_flash_append +}; +static const struct Nif jit_stream_flash_replace_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_jit_stream_flash_replace +}; +static const struct Nif jit_stream_flash_read_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_jit_stream_flash_read +}; +static const struct Nif jit_stream_flash_flush_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_jit_stream_flash_flush +}; + +ModuleNativeEntryPoint jit_stream_flash_entry_point(Context *ctx, term jit_stream) +{ + void *js_obj_ptr; + if (UNLIKELY(!enif_get_resource(erl_nif_env_from_context(ctx), jit_stream, jit_stream_flash_resource_type, &js_obj_ptr))) { + return NULL; + } + struct JITStreamFlash *js_obj = (struct JITStreamFlash *) js_obj_ptr; + + uintptr_t base_addr = ((uintptr_t) js_obj->jit_entry + sizeof(struct JITEntry)); + + // Convert to executable address (handles DBUS→IBUS, Thumb bit, etc.) + base_addr = jit_stream_flash_platform_ptr_to_executable(base_addr); + + return (ModuleNativeEntryPoint) base_addr; +} + +static void jit_stream_flash_dtor(ErlNifEnv *caller_env, void *obj) +{ + UNUSED(caller_env); + struct JITStreamFlash *js_obj = (struct JITStreamFlash *) obj; + if (js_obj->pf_ctx) { + jit_stream_flash_platform_destroy(js_obj->pf_ctx); + } +} + +const struct Nif *jit_stream_flash_get_nif(const char *nifname) +{ + if (strcmp("jit:stream_module/0", nifname) == 0) { + return &jit_stream_module_nif; + } + if (strncmp("jit_stream_flash:", nifname, 17) == 0) { + const char *rest = nifname + 17; + if (strcmp("new/1", rest) == 0) { + return &jit_stream_flash_new_nif; + } + if (strcmp("offset/1", rest) == 0) { + return &jit_stream_flash_offset_nif; + } + if (strcmp("append/2", rest) == 0) { + return &jit_stream_flash_append_nif; + } + if (strcmp("replace/3", rest) == 0) { + return &jit_stream_flash_replace_nif; + } + if (strcmp("read/3", rest) == 0) { + return &jit_stream_flash_read_nif; + } + if (strcmp("flush/1", rest) == 0) { + return &jit_stream_flash_flush_nif; + } + } + return NULL; +} + +void jit_stream_flash_init(GlobalContext *global) +{ + ErlNifEnv env; + erl_nif_env_partial_init_from_globalcontext(&env, global); + jit_stream_flash_resource_type = enif_init_resource_type(&env, "jit_stream_flash", &jit_stream_flash_resource_type_init, ERL_NIF_RT_CREATE, NULL); +} + +void globalcontext_set_cache_native_code(GlobalContext *global, Module *mod, uint16_t version, ModuleNativeEntryPoint entry_point, uint32_t labels) +{ + bool is_valid; + (void) globalcontext_find_first_jit_entry(global, &is_valid); + + struct JSFlashPlatformContext *pf_ctx = jit_stream_flash_platform_init(); + if (IS_NULL_PTR(pf_ctx)) { + fprintf(stderr, "Failed to initialize platform flash context\n"); + return; + } + + // Reverse the executable address transformation to get data address + // Platform-specific: Thumb (clear bit 0), RISC-V (IBUS→DBUS conversion) + uintptr_t data_addr = jit_stream_flash_platform_executable_to_ptr((uintptr_t) entry_point); + + struct JITEntry *jit_entry = (struct JITEntry *) (data_addr - sizeof(struct JITEntry)); + uintptr_t code = (uintptr_t) mod->code; + + // Finalize the entry + if (!jit_stream_flash_finalize_entry(pf_ctx, jit_entry, JIT_ENTRY_MAGIC, version, (uint32_t) code, labels)) { + fprintf(stderr, "jit_stream_flash_finalize_entry failed\n"); + jit_stream_flash_platform_destroy(pf_ctx); + return; + } + +#ifdef ENABLE_TRACE + // Compute CRC of entire module for verification + uint32_t module_crc = crc32((const uint8_t *) jit_entry, sizeof(struct JITEntry) + jit_entry->size); + TRACE("After finalize - jit_entry=%p CRC32=0x%08x (entry+code size=%u)\n", + (void *) jit_entry, (unsigned int) module_crc, (unsigned int) (sizeof(struct JITEntry) + jit_entry->size)); +#endif + + // Erase next sector if it's completely after the current module + struct JITEntry *current_entry = (struct JITEntry *) (data_addr - sizeof(struct JITEntry)); + struct JITEntry *next_entry = jit_entry_next(current_entry); + uintptr_t next_entry_addr = (uintptr_t) next_entry; + uintptr_t next_sector = next_entry_addr & ~(FLASH_SECTOR_SIZE - 1); + + // Calculate the sector where the current module ENDS (not where it starts) + uintptr_t current_module_end = (uintptr_t) current_entry + sizeof(struct JITEntry) + current_entry->size; + uintptr_t current_end_sector = current_module_end & ~(FLASH_SECTOR_SIZE - 1); + + // Only erase next sector if it's completely after the current module's end + // This prevents erasing a sector that contains the tail of the current module + if (next_sector > current_end_sector) { + // Next entry is in a sector completely after current module, erase it if it has stale data + if (next_entry->magic != 0xFFFF) { + TRACE("globalcontext_set_cache_native_code -- NOT erasing new sector at %lx\n", (unsigned long) next_sector); + if (!jit_stream_flash_platform_erase_sector(pf_ctx, next_sector)) { + fprintf(stderr, "jit_stream_flash_platform_erase_sector failed\n"); + jit_stream_flash_platform_destroy(pf_ctx); + return; + } + } else { + TRACE("globalcontext_set_cache_native_code -- NOT erasing new sector at %lx\n", (unsigned long) next_sector); + } + } + + if (!is_valid) { + // Mark that cache entry is valid by replacing end with END in installed AVM + globalcontext_set_cache_valid(global); + } + + jit_stream_flash_platform_destroy(pf_ctx); +} + +// Implementation of jit_stream_entry_point, sys_get_cache_native_code and +// sys_set_cache_native_code using this jit_stream +#ifndef TEST_JIT_STREAM_FLASH +ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream) +{ + return jit_stream_flash_entry_point(ctx, jit_stream); +} + +bool sys_get_cache_native_code(GlobalContext *global, Module *mod, uint16_t *version, ModuleNativeEntryPoint *entry_point, uint32_t *labels) +{ + bool is_valid; + struct JITEntry *jit_entry = globalcontext_find_first_jit_entry(global, &is_valid); + if (!is_valid) { + return false; + } + uintptr_t code = (uintptr_t) mod->code; + while (jit_entry->magic == JIT_ENTRY_MAGIC) { + if (jit_entry->code == (uint32_t) code) { + *version = jit_entry->version; + uintptr_t ep_addr = (uintptr_t) jit_entry + sizeof(struct JITEntry); + ep_addr = jit_stream_flash_platform_ptr_to_executable(ep_addr); + *entry_point = (ModuleNativeEntryPoint) ep_addr; + *labels = jit_entry->labels; + +#ifdef ENABLE_TRACE + // Compute CRC of entire module for verification + uint32_t module_crc = crc32((const uint8_t *) jit_entry, sizeof(struct JITEntry) + jit_entry->size); + TRACE("Loading from cache - jit_entry=%p CRC32=0x%08x (entry+code size=%u)\n", + (void *) jit_entry, (unsigned int) module_crc, (unsigned int) (sizeof(struct JITEntry) + jit_entry->size)); +#endif + + return true; + } + jit_entry = jit_entry_next(jit_entry); + } + return false; +} + +void sys_set_cache_native_code(GlobalContext *global, Module *mod, uint16_t version, ModuleNativeEntryPoint entry_point, uint32_t labels) +{ + globalcontext_set_cache_native_code(global, mod, version, entry_point, labels); +} +#endif diff --git a/src/libAtomVM/jit_stream_flash.h b/src/libAtomVM/jit_stream_flash.h new file mode 100644 index 0000000000..30644189ea --- /dev/null +++ b/src/libAtomVM/jit_stream_flash.h @@ -0,0 +1,129 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2025 by Paul Guyot + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +/** + * @file jit_stream_flash.h + * @brief JIT code caching in flash memory - common implementation + */ + +#ifndef _JIT_STREAM_FLASH_H_ +#define _JIT_STREAM_FLASH_H_ + +#include "globalcontext.h" +#include "jit_stream_flash_platform.h" +#include "module.h" + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Platform-specific flash context (opaque) + */ +struct JSFlashPlatformContext; + +/** + * @brief Initialize JIT stream flash subsystem + * + * @param global Global context + */ +void jit_stream_flash_init(GlobalContext *global); + +/** + * @brief Get NIF for jit_stream_flash operations + * + * @param nifname NIF name + * @return NIF pointer or NULL + */ +const struct Nif *jit_stream_flash_get_nif(const char *nifname); + +/** + * @brief Get entry point from jit_stream_flash. + * Called by `jit_stream_entry_point` + * + * @param ctx Context + * @param jit_stream JIT stream term + * @return Entry point or NULL + */ +ModuleNativeEntryPoint jit_stream_flash_entry_point(Context *ctx, term jit_stream); + +/** + * @brief Finalize flash operation by marking an entry point as valid for + * a given module. This is called by `sys_set_cache_native_code`. + * + * @param global Global context + * @param mod Module + * @param version Module version + * @param entry_point Entry point + * @param labels Number of labels + */ +void globalcontext_set_cache_native_code(GlobalContext *global, Module *mod, uint16_t version, ModuleNativeEntryPoint entry_point, uint32_t labels); + +/** + * @brief Initialize platform flash context + * @return Platform flash context, or NULL on error + */ +struct JSFlashPlatformContext *jit_stream_flash_platform_init(void); + +/** + * @brief Destroy platform flash context + * @param pf_ctx Platform flash context to destroy + */ +void jit_stream_flash_platform_destroy(struct JSFlashPlatformContext *pf_ctx); + +/** + * @brief Erase a flash sector at the given address + * @param pf_ctx Platform flash context + * @param addr Virtual address of the sector to erase (must be sector-aligned) + * @return true on success, false on error + */ +bool jit_stream_flash_platform_erase_sector(struct JSFlashPlatformContext *pf_ctx, uintptr_t addr); + +/** + * @brief Write a page to flash + * @param pf_ctx Platform flash context + * @param addr Virtual address to write to (must be page-aligned) + * @param data Data to write (must be FLASH_PAGE_SIZE bytes) + * @return true on success, false on error + */ +bool jit_stream_flash_platform_write_page(struct JSFlashPlatformContext *pf_ctx, uintptr_t addr, const uint8_t *data); + +/** + * @brief Convert data bus address to instruction bus address + * @param addr Data bus address + * @return Instruction bus address (executable pointer) + */ +uintptr_t jit_stream_flash_platform_ptr_to_executable(uintptr_t addr); + +/** + * @brief Convert instruction bus address to data bus address + * @param addr Instruction bus address (executable pointer) + * @return Data bus address + */ +uintptr_t jit_stream_flash_platform_executable_to_ptr(uintptr_t addr); + +#ifdef __cplusplus +} +#endif + +#endif // _JIT_STREAM_FLASH_H_ diff --git a/src/libAtomVM/module.c b/src/libAtomVM/module.c index 108d5027d8..f0eec5770d 100644 --- a/src/libAtomVM/module.c +++ b/src/libAtomVM/module.c @@ -38,6 +38,9 @@ #include #include +// #define ENABLE_TRACE +#include "trace.h" + #ifdef WITH_ZLIB #include #endif @@ -316,13 +319,11 @@ Module *module_new_from_iff_binary(GlobalContext *global, const void *iff_binary return NULL; } -#ifdef ENABLE_ADVANCED_TRACE - mod->import_table = beam_file + offsets[IMPT]; -#endif if (offsets[CODE]) { mod->code = (CodeChunk *) (beam_file + offsets[CODE]); } mod->export_table = beam_file + offsets[EXPT]; + mod->import_table = beam_file + offsets[IMPT]; mod->local_table = beam_file + offsets[LOCT]; mod->atom_table = beam_file + offsets[AT8U]; mod->fun_table = beam_file + offsets[FUNT]; @@ -353,6 +354,13 @@ Module *module_new_from_iff_binary(GlobalContext *global, const void *iff_binary fprintf(stderr, "Native code chunk found but no compatible architecture or variant found\n"); } } + } else { + ModuleNativeEntryPoint module_entry_point; + uint32_t labels; + uint16_t version; + if (sys_get_cache_native_code(global, mod, &version, &module_entry_point, &labels) && version == JIT_FORMAT_VERSION) { + module_set_native_code(mod, labels, module_entry_point); + } } #endif diff --git a/src/libAtomVM/module.h b/src/libAtomVM/module.h index 2f7b56321c..1748ed4f6a 100644 --- a/src/libAtomVM/module.h +++ b/src/libAtomVM/module.h @@ -109,12 +109,9 @@ struct Module { int module_index; -#ifdef ENABLE_ADVANCED_TRACE - void *import_table; -#endif - CodeChunk *code; void *export_table; + void *import_table; void *local_table; void *atom_table; void *fun_table; diff --git a/src/libAtomVM/nifs.c b/src/libAtomVM/nifs.c index e3d76eba41..24003cfcc7 100644 --- a/src/libAtomVM/nifs.c +++ b/src/libAtomVM/nifs.c @@ -203,6 +203,7 @@ static term nif_code_server_code_chunk(Context *ctx, int argc, term argv[]); static term nif_code_server_atom_resolver(Context *ctx, int argc, term argv[]); static term nif_code_server_literal_resolver(Context *ctx, int argc, term argv[]); static term nif_code_server_type_resolver(Context *ctx, int argc, term argv[]); +static term nif_code_server_import_resolver(Context *ctx, int argc, term argv[]); static term nif_code_server_set_native_code(Context *ctx, int argc, term argv[]); #endif static term nif_erlang_module_loaded(Context *ctx, int argc, term argv[]); @@ -774,6 +775,10 @@ static const struct Nif code_server_type_resolver_nif = { .base.type = NIFFunctionType, .nif_ptr = nif_code_server_type_resolver }; +static const struct Nif code_server_import_resolver_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_code_server_import_resolver +}; static const struct Nif code_server_set_native_code_nif = { .base.type = NIFFunctionType, .nif_ptr = nif_code_server_set_native_code @@ -4806,7 +4811,7 @@ static term nif_atomvm_get_start_beam(Context *ctx, int argc, term argv[]) uint32_t size; const void *beam; const char *module_name; - if (!avmpack_find_section_by_flag(avmpack_data->data, BEAM_START_FLAG, &beam, &size, &module_name)) { + if (!avmpack_find_section_by_flag(avmpack_data->data, BEAM_START_FLAG, BEAM_START_FLAG, &beam, &size, &module_name)) { synclist_unlock(&ctx->global->avmpack_data); if (UNLIKELY(memory_ensure_free(ctx, TUPLE_SIZE(2)) != MEMORY_GC_OK)) { RAISE_ERROR(OUT_OF_MEMORY_ATOM); @@ -5622,10 +5627,60 @@ static term nif_code_server_type_resolver(Context *ctx, int argc, term argv[]) if (IS_NULL_PTR(mod)) { RAISE_ERROR(BADARG_ATOM); } + int type_index = term_to_int(argv[1]); return module_get_type_by_index(mod, type_index, ctx); } +static term nif_code_server_import_resolver(Context *ctx, int argc, term argv[]) +{ + UNUSED(argc); + VALIDATE_VALUE(argv[0], term_is_atom); + VALIDATE_VALUE(argv[1], term_is_integer); + + term module_name = argv[0]; + Module *mod = globalcontext_get_module(ctx->global, term_to_atom_index(module_name)); + if (IS_NULL_PTR(mod)) { + RAISE_ERROR(BADARG_ATOM); + } + int import_index = term_to_int(argv[1]); + + // Get the imported function entry at the given index + if (IS_NULL_PTR(mod->imported_funcs) || import_index < 0) { + RAISE_ERROR(BADARG_ATOM); + } + + // Parse the import table to get the module, function, and arity + // Import table format: each entry is 12 bytes (module_atom_index, function_atom_index, arity) + const uint8_t *import_table = mod->import_table; + if (IS_NULL_PTR(import_table)) { + RAISE_ERROR(BADARG_ATOM); + } + + int functions_count = READ_32_UNALIGNED(import_table + 8); + if (import_index >= functions_count) { + RAISE_ERROR(BADARG_ATOM); + } + + int local_module_atom_index = READ_32_UNALIGNED(import_table + import_index * 12 + 12); + int local_function_atom_index = READ_32_UNALIGNED(import_table + import_index * 12 + 4 + 12); + uint32_t arity = READ_32_UNALIGNED(import_table + import_index * 12 + 8 + 12); + + term module_atom = module_get_atom_term_by_id(mod, local_module_atom_index); + term function_atom = module_get_atom_term_by_id(mod, local_function_atom_index); + term arity_term = term_from_int(arity); + + if (UNLIKELY(memory_ensure_free(ctx, TUPLE_SIZE(3)) != MEMORY_GC_OK)) { + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + } + + term result = term_alloc_tuple(3, &ctx->heap); + term_put_tuple_element(result, 0, module_atom); + term_put_tuple_element(result, 1, function_atom); + term_put_tuple_element(result, 2, arity_term); + + return result; +} static term nif_code_server_set_native_code(Context *ctx, int argc, term argv[]) { UNUSED(argc); @@ -5633,6 +5688,8 @@ static term nif_code_server_set_native_code(Context *ctx, int argc, term argv[]) VALIDATE_VALUE(argv[0], term_is_atom); VALIDATE_VALUE(argv[1], term_is_integer); + avm_int_t labels_count = term_to_int(argv[1]); + term module_name = argv[0]; Module *mod = globalcontext_get_module(ctx->global, term_to_atom_index(module_name)); if (IS_NULL_PTR(mod)) { @@ -5646,10 +5703,12 @@ static term nif_code_server_set_native_code(Context *ctx, int argc, term argv[]) SMP_MODULE_LOCK(mod); if (mod->native_code == NULL) { - module_set_native_code(mod, term_to_int(argv[1]), entry_point); + module_set_native_code(mod, labels_count, entry_point); } SMP_MODULE_UNLOCK(mod); + sys_set_cache_native_code(ctx->global, mod, JIT_FORMAT_VERSION, entry_point, labels_count); + return OK_ATOM; } #endif @@ -5688,6 +5747,8 @@ static term nif_jit_backend_module(Context *ctx, int argc, term argv[]) return JIT_AARCH64_ATOM; #elif JIT_ARCH_TARGET == JIT_ARCH_ARMV6M return JIT_ARMV6M_ATOM; +#elif JIT_ARCH_TARGET == JIT_ARCH_RISCV32 + return JIT_RISCV32_ATOM; #else #error Unknown JIT target #endif diff --git a/src/libAtomVM/nifs.gperf b/src/libAtomVM/nifs.gperf index a4a2591fa0..6423381dd4 100644 --- a/src/libAtomVM/nifs.gperf +++ b/src/libAtomVM/nifs.gperf @@ -183,6 +183,7 @@ code_server:code_chunk/1, IF_HAVE_JIT(&code_server_code_chunk_nif) code_server:atom_resolver/2, IF_HAVE_JIT(&code_server_atom_resolver_nif) code_server:literal_resolver/2, IF_HAVE_JIT(&code_server_literal_resolver_nif) code_server:type_resolver/2, IF_HAVE_JIT(&code_server_type_resolver_nif) +code_server:import_resolver/2, IF_HAVE_JIT(&code_server_import_resolver_nif) code_server:set_native_code/3, IF_HAVE_JIT(&code_server_set_native_code_nif) console:print/1, &console_print_nif base64:encode/1, &base64_encode_nif diff --git a/src/libAtomVM/opcodesswitch.h b/src/libAtomVM/opcodesswitch.h index d8fc4106b0..ea56be68ae 100644 --- a/src/libAtomVM/opcodesswitch.h +++ b/src/libAtomVM/opcodesswitch.h @@ -4074,6 +4074,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) RAISE_ERROR(OUT_OF_MEMORY_ATOM); } term t = term_create_empty_binary(size_val, &ctx->heap, ctx->global); + if (UNLIKELY(term_is_invalid_term(t))) { + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + } ctx->bs = t; ctx->bs_offset = 0; @@ -4122,6 +4125,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) RAISE_ERROR(OUT_OF_MEMORY_ATOM); } term t = term_create_empty_binary(size_val / 8, &ctx->heap, ctx->global); + if (UNLIKELY(term_is_invalid_term(t))) { + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + } ctx->bs = t; ctx->bs_offset = 0; @@ -4530,6 +4536,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) RAISE_ERROR(OUT_OF_MEMORY_ATOM); } term t = term_create_empty_binary(0, &ctx->heap, ctx->global); + if (UNLIKELY(term_is_invalid_term(t))) { + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + } ctx->bs = t; ctx->bs_offset = 0; @@ -4595,6 +4604,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) TRACE("bs_append/8, fail=%u size=" AVM_INT_FMT " unit=%u src=0x%" TERM_X_FMT " dreg=%c%i\n", (unsigned) fail, size_val, (unsigned) unit, src, T_DEST_REG(dreg)); src = x_regs[live]; term t = term_create_empty_binary(src_size + size_val / 8, &ctx->heap, ctx->global); + if (UNLIKELY(term_is_invalid_term(t))) { + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + } memcpy((void *) term_binary_data(t), (void *) term_binary_data(src), src_size); ctx->bs = t; @@ -4641,8 +4653,10 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) RAISE_ERROR(OUT_OF_MEMORY_ATOM); } DECODE_COMPACT_TERM(src, src_pc) - term t = term_create_empty_binary(src_size + size_val / 8, &ctx->heap, ctx->global); - memcpy((void *) term_binary_data(t), (void *) term_binary_data(src), src_size); + term t = term_reuse_binary(src, src_size + size_val / 8, &ctx->heap, ctx->global); + if (UNLIKELY(term_is_invalid_term(t))) { + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + } ctx->bs = t; ctx->bs_offset = src_size * 8; @@ -5283,6 +5297,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) avm_int_t bs_offset = term_get_match_state_offset(src); bool status; switch (size_val) { + case 16: + status = bitstring_extract_f16(bs_bin, bs_offset, increment, flags_value, &value); + break; case 32: status = bitstring_extract_f32(bs_bin, bs_offset, increment, flags_value, &value); break; @@ -6736,6 +6753,7 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) // Verify parameters and compute binary size in first iteration #ifdef IMPL_EXECUTE_LOOP size_t binary_size = 0; + term reuse_binary = term_invalid_term(); #endif for (size_t j = 0; j < nb_segments; j++) { term atom_type; @@ -6803,6 +6821,31 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) segment_size = signed_size_value; break; } + case FLOAT_ATOM: { + if (!term_is_number(src)) { + if (fail == 0) { + RAISE_ERROR(BADARG_ATOM); + } else { + JUMP_TO_LABEL(mod, fail); + } + } + // size is optional for floats, defaults to 64 + avm_int_t signed_size_value = 64; + if (size != term_nil()) { + VERIFY_IS_INTEGER(size, "bs_create_bin/6", fail); + signed_size_value = term_to_int(size); + if (UNLIKELY(signed_size_value != 16 && signed_size_value != 32 && signed_size_value != 64)) { + if (fail == 0) { + RAISE_ERROR(BADARG_ATOM); + } else { + JUMP_TO_LABEL(mod, fail); + } + } + } + segment_size = signed_size_value; + segment_unit = 1; + break; + } case STRING_ATOM: { VERIFY_IS_INTEGER(size, "bs_create_bin/6", fail); avm_int_t signed_size_value = term_to_int(size); @@ -6824,6 +6867,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) // We only support src as a binary of bytes here. segment_size = term_binary_size(src); segment_unit = 8; + if (atom_type == PRIVATE_APPEND_ATOM && j == 0) { + reuse_binary = src; + } } else { VERIFY_IS_INTEGER(size, "bs_create_bin/6", fail); avm_int_t signed_size_value = term_to_int(size); @@ -6864,7 +6910,17 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) if (UNLIKELY(memory_ensure_free_with_roots(ctx, alloc + term_binary_heap_size(binary_size / 8), live, x_regs, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { RAISE_ERROR(OUT_OF_MEMORY_ATOM); } - term t = term_create_empty_binary(binary_size / 8, &ctx->heap, ctx->global); + term t; + size_t original_size = 0; + if (term_is_invalid_term(reuse_binary)) { + t = term_create_empty_binary(binary_size / 8, &ctx->heap, ctx->global); + } else { + original_size = term_binary_size(reuse_binary); + t = term_reuse_binary(reuse_binary, binary_size / 8, &ctx->heap, ctx->global); + } + if (UNLIKELY(term_is_invalid_term(t))) { + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + } size_t offset = 0; for (size_t j = 0; j < nb_segments; j++) { @@ -6888,6 +6944,7 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) case UTF16_ATOM: case UTF32_ATOM: case INTEGER_ATOM: + case FLOAT_ATOM: DECODE_FLAGS_LIST(flags_value, flags, opcode); break; default: @@ -6911,6 +6968,13 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) case STRING_ATOM: size_value = (size_t) term_to_int(size); break; + case FLOAT_ATOM: + if (size != term_nil()) { + size_value = (size_t) term_to_int(size); + } else { + size_value = 64; + } + break; default: break; } @@ -6953,6 +7017,38 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) segment_size = size_value; break; } + case FLOAT_ATOM: { + avm_float_t float_value; + if (term_is_float(src)) { + float_value = term_to_float(src); + } else if (term_is_any_integer(src)) { + float_value = (avm_float_t) term_maybe_unbox_int64(src); + } else { + if (fail == 0) { + RAISE_ERROR(BADARG_ATOM); + } else { + JUMP_TO_LABEL(mod, fail); + } + } + bool result; + if (size_value == 16) { + result = bitstring_insert_f16(t, offset, float_value, flags_value); + } else if (size_value == 32) { + result = bitstring_insert_f32(t, offset, float_value, flags_value); + } else { + result = bitstring_insert_f64(t, offset, float_value, flags_value); + } + if (UNLIKELY(!result)) { + TRACE("bs_create_bin/6: Failed to insert float into binary\n"); + if (fail == 0) { + RAISE_ERROR(BADARG_ATOM); + } else { + JUMP_TO_LABEL(mod, fail); + } + } + segment_size = size_value; + break; + } case STRING_ATOM: { uint8_t *dst = (uint8_t *) term_binary_data(t); size_t remaining = 0; @@ -6968,6 +7064,10 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) TRACE("bs_create_bin/6: current offset (%d) is not evenly divisible by 8\n", (int) offset); RAISE_ERROR(UNSUPPORTED_ATOM); } + if (reuse_binary == src && j == 0) { + segment_size = original_size * 8; + break; + } uint8_t *dst = (uint8_t *) term_binary_data(t) + (offset / 8); const uint8_t *bin = (const uint8_t *) term_binary_data(src); size_t binary_size = term_binary_size(src); @@ -7444,7 +7544,7 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) } terminate_context: - TRACE("-- Code execution finished for %i--\n", ctx->process_id); + TRACE("-- Code execution finished for %i--\n", (int) ctx->process_id); GlobalContext *global = ctx->global; if (ctx->leader) { scheduler_stop_all(global); diff --git a/src/libAtomVM/refc_binary.c b/src/libAtomVM/refc_binary.c index 61e4b05b8f..159d6f2ba9 100644 --- a/src/libAtomVM/refc_binary.c +++ b/src/libAtomVM/refc_binary.c @@ -141,3 +141,86 @@ size_t refc_binary_total_size(Context *ctx) synclist_unlock(&ctx->global->refc_binaries); return size; } + +COLD_FUNC void refc_binary_dump_info(Context *ctx) +{ + struct ListHead *item; + struct ListHead *refc_binaries = synclist_rdlock(&ctx->global->refc_binaries); + + // Note: This only counts non-const refc binaries (ones that allocate memory). + // Const binaries (created by term_from_const_binary) point to existing data + // and are never added to the global refc_binaries list, so they don't appear here. + + // First pass: count and calculate total size + size_t count = 0; + size_t total_size = 0; + LIST_FOR_EACH (item, refc_binaries) { + struct RefcBinary *refc = GET_LIST_ENTRY(item, struct RefcBinary, head); + count++; + total_size += refc->size; + } + + fprintf(stderr, "refc_binary_count = %d\n", (int) count); + fprintf(stderr, "refc_binary_total_size = %d\n", (int) total_size); + + if (count == 0) { + synclist_unlock(&ctx->global->refc_binaries); + return; + } + +// Find top 5 largest binaries +#define TOP_N 5 + struct RefcBinary *top[TOP_N] = { NULL }; + size_t top_indices[TOP_N] = { 0 }; + + size_t index = 0; + LIST_FOR_EACH (item, refc_binaries) { + struct RefcBinary *refc = GET_LIST_ENTRY(item, struct RefcBinary, head); + + // Try to insert into top 5 + for (size_t i = 0; i < TOP_N; i++) { + if (top[i] == NULL || refc->size > top[i]->size) { + // Shift down + for (size_t j = TOP_N - 1; j > i; j--) { + top[j] = top[j - 1]; + top_indices[j] = top_indices[j - 1]; + } + top[i] = refc; + top_indices[i] = index; + break; + } + } + index++; + } + + // Display top binaries + fprintf(stderr, "\nTop %d largest refc binaries:\n", TOP_N); + for (size_t i = 0; i < TOP_N && top[i] != NULL; i++) { + struct RefcBinary *refc = top[i]; + fprintf(stderr, " [%zu] size=%d bytes (%.1f%%), refcount=%d", + top_indices[i], + (int) refc->size, + (double) refc->size * 100.0 / (double) total_size, + (int) refc->ref_count); + + if (refc->resource_type) { + fprintf(stderr, " [resource]"); + } + + // Print first 32 bytes as hex + fprintf(stderr, "\n data: "); + size_t print_size = refc->size < 32 ? refc->size : 32; + for (size_t j = 0; j < print_size; j++) { + fprintf(stderr, "%02x", refc->data[j]); + if (j % 4 == 3 && j < print_size - 1) { + fprintf(stderr, " "); + } + } + if (refc->size > 32) { + fprintf(stderr, "..."); + } + fprintf(stderr, "\n"); + } + + synclist_unlock(&ctx->global->refc_binaries); +} diff --git a/src/libAtomVM/refc_binary.h b/src/libAtomVM/refc_binary.h index 3fc1784bd8..7ff38f545e 100644 --- a/src/libAtomVM/refc_binary.h +++ b/src/libAtomVM/refc_binary.h @@ -142,6 +142,16 @@ term refc_binary_create_binary_info(Context *ctx); */ size_t refc_binary_total_size(Context *ctx); +/** + * @brief Dump detailed information about reference counted binaries + * + * @details This function prints diagnostic information including the count, + * total size, and details about the top 5 largest binaries including + * their first bytes. Used for debugging memory issues. + * @param ctx the context + */ +COLD_FUNC void refc_binary_dump_info(Context *ctx); + #ifdef __cplusplus } #endif diff --git a/src/libAtomVM/sys.h b/src/libAtomVM/sys.h index 0735d86ed3..8ad701c3cb 100644 --- a/src/libAtomVM/sys.h +++ b/src/libAtomVM/sys.h @@ -296,6 +296,33 @@ void sys_free_platform(GlobalContext *global); */ ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t size, size_t offset); +/** + * @brief Get the cache (typically on flash) of native code for a given module + * + * @details If module is found in cache, return a pointer to the entry point. + * Only implemented on platforms with JIT. Implementations on flash typically + * check if the jit cache is valid (for lib or for app) and use the pointer to + * code as a key. + * @param global the global context + * @param mod module to return the cache native code for + * @param version version of the cache entry (for compatibility with the VM) + * @param entry_point entry point to the module, if found + * @param labels number of labels + * @return \c true if the cache entry was found + */ +bool sys_get_cache_native_code(GlobalContext *global, Module *mod, uint16_t *version, ModuleNativeEntryPoint *entry_point, uint32_t *labels); + +/** + * @brief Add native code to cache for a given module + * + * @param global the global context + * @param mod module to add the native code for + * @param version version of the native code + * @param entry_point entry point to the module + * @param labels number of labels + */ +void sys_set_cache_native_code(GlobalContext *global, Module *mod, uint16_t version, ModuleNativeEntryPoint entry_point, uint32_t labels); + #ifdef __cplusplus } #endif diff --git a/src/libAtomVM/term.c b/src/libAtomVM/term.c index 838fd41eee..fc30674124 100644 --- a/src/libAtomVM/term.c +++ b/src/libAtomVM/term.c @@ -909,7 +909,7 @@ term term_alloc_refc_binary(size_t size, bool is_const, Heap *heap, GlobalContex if (IS_NULL_PTR(refc)) { // TODO propagate error to callers of this function, e.g., as an invalid term fprintf(stderr, "memory_create_refc_binary: Unable to allocate %zu bytes for refc_binary.\n", size); - AVM_ABORT(); + return term_invalid_term(); } boxed_value[3] = (term) refc; refc->ref_count = 1; // added to mso list, increment ref count @@ -919,6 +919,64 @@ term term_alloc_refc_binary(size_t size, bool is_const, Heap *heap, GlobalContex return ret; } +term term_reuse_binary(term src, size_t size, Heap *heap, GlobalContext *glb) +{ + if (term_is_refc_binary(src) && !term_refc_binary_is_const(src)) { + term *boxed_value = term_to_term_ptr(src); + struct RefcBinary *old_refc = (struct RefcBinary *) boxed_value[3]; + size_t old_size = old_refc->size; + + // Only reuse if refcount is 1 (only this term references it) + if (old_refc->ref_count == 1) { + // Lock the list of refc binaries while we're trying to realloc. + struct ListHead *refc_binaries = synclist_wrlock(&glb->refc_binaries); + + // Remove from list before realloc because realloc might move the memory + list_remove(&old_refc->head); + + // Realloc to new size. + size_t n = sizeof(struct RefcBinary) + size; + struct RefcBinary *new_refc = realloc(old_refc, n); + if (IS_NULL_PTR(new_refc)) { + // Re-add to list before unlocking + list_append(refc_binaries, &old_refc->head); + synclist_unlock(&glb->refc_binaries); + fprintf(stderr, "term_reuse_binary: Unable to reallocate %zu bytes for refc_binary.\n", size); + return term_invalid_term(); + } + + // Update size + new_refc->size = size; + + // Zero the new part if size increased + if (LIKELY(size > old_size)) { + memset((char *) &new_refc->data + old_size, 0, size - old_size); + } + + // Update the boxed value to point to the new refc BEFORE unlocking + // so other threads see a consistent state + boxed_value[1] = (term) size; + boxed_value[3] = (term) new_refc; + + // Re-add to list after realloc (whether pointer changed or not) + list_append(refc_binaries, &new_refc->head); + + // Unlock the list of refc binaries + synclist_unlock(&glb->refc_binaries); + + // Return the same term (boxed_value pointer hasn't changed) + return src; + } + } + // Not a refc binary or it's a const refc binary - create a new one + size_t src_size = term_binary_size(src); + term t = term_create_uninitialized_binary(size, heap, glb); + // Copy the source data (up to the smaller of src_size and size) + size_t copy_size = src_size < size ? src_size : size; + memcpy((void *) term_binary_data(t), (void *) term_binary_data(src), copy_size); + return t; +} + static term find_binary(term binary_or_state) { term t = binary_or_state; diff --git a/src/libAtomVM/term.h b/src/libAtomVM/term.h index 9a38768bcb..9c925ea5c8 100644 --- a/src/libAtomVM/term.h +++ b/src/libAtomVM/term.h @@ -286,7 +286,8 @@ TermCompareResult term_compare(term t, term other, TermCompareOpts opts, GlobalC * @param is_const designates whether the data pointed to is "const", such as a term literal * @param heap the heap to allocate the binary in * @param glb the global context as refc binaries are global - * @return a term (reference) pointing to the newly allocated binary in the process heap. + * @return a term (reference) pointing to the newly allocated binary in the process heap or + * `term_invalid_term()` if there isn't enough memory to allocate the refc buffer. */ term term_alloc_refc_binary(size_t size, bool is_const, Heap *heap, GlobalContext *glb); @@ -1262,7 +1263,8 @@ static inline const char *term_binary_data(term t) * @param size size of binary data buffer. * @param heap the heap to allocate the binary in * @param glb the global context as refc binaries are global -* @return a term pointing to the boxed binary pointer. +* @return a term pointing to the boxed binary pointer or `term_invalid_term()` +* if there isn't enough memory to allocate the refc buffer */ static inline term term_create_uninitialized_binary(size_t size, Heap *heap, GlobalContext *glb) { @@ -1350,7 +1352,9 @@ static inline void term_set_refc_binary_data(term t, const void *data) static inline term term_from_const_binary(const void *data, size_t size, Heap *heap, GlobalContext *glb) { term binary = term_alloc_refc_binary(size, true, heap, glb); - term_set_refc_binary_data(binary, data); + if (LIKELY(!term_is_invalid_term(binary))) { + term_set_refc_binary_data(binary, data); + } return binary; } @@ -1366,10 +1370,25 @@ static inline term term_from_const_binary(const void *data, size_t size, Heap *h static inline term term_create_empty_binary(size_t size, Heap *heap, GlobalContext *glb) { term t = term_create_uninitialized_binary(size, heap, glb); - memset((char *) term_binary_data(t), 0x00, size); + if (LIKELY(!term_is_invalid_term(t))) { + memset((char *) term_binary_data(t), 0x00, size); + } return t; } +/** +* @brief Reuse a binary. If the binary is a refc binary with a ref count of +* 1, try to reuse it. Otherwise, create a new binary and copy the data. +* +* @details Try to reuse a binary and return a term pointing to it. +* @param src binary to reuse. +* @param size size of binary data buffer. +* @param heap the heap to allocate memory in +* @param glb the global context as refc binaries are global +* @return a term pointing to the boxed binary pointer. +*/ +term term_reuse_binary(term src, size_t size, Heap *heap, GlobalContext *glb); + static inline bool term_normalize_binary_pos_len(term binary, avm_int_t pos, avm_int_t len, BinaryPosLen *pos_len) { avm_int_t size = (avm_int_t) term_binary_size(binary); diff --git a/src/platforms/emscripten/src/main.c b/src/platforms/emscripten/src/main.c index 27e02c3a67..c8c00dc07a 100644 --- a/src/platforms/emscripten/src/main.c +++ b/src/platforms/emscripten/src/main.c @@ -59,7 +59,7 @@ static int load_module(const char *path) const void *startup_beam = NULL; uint32_t startup_beam_size; const char *startup_module_name; - avmpack_find_section_by_flag(avmpack_data->data, 1, &startup_beam, &startup_beam_size, &startup_module_name); + avmpack_find_section_by_flag(avmpack_data->data, BEAM_START_FLAG, BEAM_START_FLAG, &startup_beam, &startup_beam_size, &startup_module_name); if (startup_beam) { avmpack_data->in_use = true; main_module = module_new_from_iff_binary(global, startup_beam, startup_beam_size); diff --git a/src/platforms/esp32/CMakeLists.txt b/src/platforms/esp32/CMakeLists.txt index 9dec6ec5f3..4dc81e0aad 100644 --- a/src/platforms/esp32/CMakeLists.txt +++ b/src/platforms/esp32/CMakeLists.txt @@ -51,11 +51,27 @@ endif() # On Esp32, select is run in a loop in a dedicated task set(AVM_SELECT_IN_TASK ON) -# JIT is not available yet on esp32 -set(AVM_DISABLE_JIT ON) +# By default, JIT is disabled +set(AVM_DISABLE_JIT OFF) project(atomvm-esp32) +# JIT is only supported on RISC-V targets (ESP32-C2, ESP32-C3, ESP32-C6, ESP32-H2, ESP32-P4) +# Configuration comes from idf.py menuconfig (KConfig), not CMake options +if(CONFIG_JIT_ENABLED) + if (${IDF_TARGET} MATCHES "esp32c2|esp32c3|esp32c6|esp32h2|esp32p4") + set(AVM_DISABLE_JIT OFF) + set(AVM_JIT_TARGET_ARCH riscv32) + message(STATUS "JIT compilation enabled for ${IDF_TARGET} (RISC-V32)") + else() + message(WARNING "JIT is not supported on ${IDF_TARGET} (Xtensa architecture)") + set(AVM_DISABLE_JIT ON) + endif() +else() + set(AVM_DISABLE_JIT ON) + message(STATUS "JIT compilation disabled") +endif() + # esp-idf does not use compile_feature but instead sets version in # c_compile_options # Ensure project is compiled with at least C11 diff --git a/src/platforms/esp32/components/avm_sys/CMakeLists.txt b/src/platforms/esp32/components/avm_sys/CMakeLists.txt index ebcedd3b57..2f942073f7 100644 --- a/src/platforms/esp32/components/avm_sys/CMakeLists.txt +++ b/src/platforms/esp32/components/avm_sys/CMakeLists.txt @@ -25,6 +25,8 @@ set(AVM_SYS_COMPONENT_SRCS "sys.c" "platform_nifs.c" "platform_defaultatoms.c" + "jit_stream_flash_platform.c" + "../../../../libAtomVM/jit_stream_flash.c" "../../../../libAtomVM/inet.c" "../../../../libAtomVM/otp_crypto.c" "../../../../libAtomVM/otp_net.c" diff --git a/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.c b/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.c new file mode 100644 index 0000000000..bfaed52215 --- /dev/null +++ b/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.c @@ -0,0 +1,141 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2025 by Paul Guyot + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +#ifndef AVM_NO_JIT + +#include "jit_stream_flash.h" + +#include +#include + +#include "esp32_sys.h" + +#if ESP_IDF_VERSION_MAJOR >= 5 +#include +#endif + +#ifdef CONFIG_IDF_TARGET_ARCH_RISCV +#include +#endif + +struct JSFlashPlatformContext +{ + const esp_partition_t *partition; +}; + +struct JSFlashPlatformContext *jit_stream_flash_platform_init(void) +{ + const esp_partition_t *partition = esp_partition_find_first( + ESP_PARTITION_TYPE_DATA, ESP_PARTITION_SUBTYPE_ANY, JIT_PARTITION_NAME); + if (IS_NULL_PTR(partition)) { + fprintf(stderr, "Failed to find partition '%s' for JIT cache\n", JIT_PARTITION_NAME); + return NULL; + } + + struct JSFlashPlatformContext *pf_ctx = malloc(sizeof(struct JSFlashPlatformContext)); + if (IS_NULL_PTR(pf_ctx)) { + return NULL; + } + + pf_ctx->partition = partition; + return pf_ctx; +} + +void jit_stream_flash_platform_destroy(struct JSFlashPlatformContext *ctx) +{ + free(ctx); +} + +bool jit_stream_flash_platform_erase_sector(struct JSFlashPlatformContext *ctx, uintptr_t addr) +{ + if (UNLIKELY(!ctx || !ctx->partition)) { + return false; + } + + size_t flash_offset = spi_flash_cache2phys((const void *) addr); + if (UNLIKELY(flash_offset == SPI_FLASH_CACHE2PHYS_FAIL)) { + fprintf(stderr, "Failed to convert cache address 0x%lx to physical address\n", (unsigned long) addr); + return false; + } + + esp_err_t err = esp_partition_erase_range(ctx->partition, + flash_offset - ctx->partition->address, FLASH_SECTOR_SIZE); + if (UNLIKELY(err != ESP_OK)) { + fprintf(stderr, "Failed to erase sector at offset 0x%lx: %d\n", (unsigned long) flash_offset, err); + return false; + } + + return true; +} + +bool jit_stream_flash_platform_write_page(struct JSFlashPlatformContext *ctx, uintptr_t addr, const uint8_t *data) +{ + if (UNLIKELY(!ctx || !ctx->partition)) { + return false; + } + + size_t flash_offset = spi_flash_cache2phys((const void *) addr); + if (UNLIKELY(flash_offset == SPI_FLASH_CACHE2PHYS_FAIL)) { + fprintf(stderr, "Failed to convert cache address 0x%lx to physical address\n", (unsigned long) addr); + return false; + } + + esp_err_t err = esp_partition_write(ctx->partition, + flash_offset - ctx->partition->address, data, FLASH_PAGE_SIZE); + if (UNLIKELY(err != ESP_OK)) { + fprintf(stderr, "Failed to write page at offset 0x%lx: %d\n", (unsigned long) flash_offset, err); + return false; + } + + return true; +} + +uintptr_t jit_stream_flash_platform_ptr_to_executable(uintptr_t addr) +{ + // Convert data cache address to instruction cache address for RISC-V targets + // On ESP32-C3/C6/H2, flash is mapped to both DBUS (0x3C...) and IBUS (0x42...) + // but only IBUS addresses are executable +#ifdef CONFIG_IDF_TARGET_ARCH_RISCV + if ((addr & ~SOC_MMU_VADDR_MASK) == SOC_MMU_DBUS_VADDR_BASE) { + return (addr & SOC_MMU_VADDR_MASK) | SOC_MMU_IBUS_VADDR_BASE; + } + return addr; +#else + return addr; +#endif +} + +uintptr_t jit_stream_flash_platform_executable_to_ptr(uintptr_t addr) +{ + // Convert instruction cache address to data cache address for RISC-V targets + // This is the reverse of ptr_to_executable +#ifdef CONFIG_IDF_TARGET_ARCH_RISCV + if ((addr & ~SOC_MMU_VADDR_MASK) == SOC_MMU_IBUS_VADDR_BASE) { + return (addr & SOC_MMU_VADDR_MASK) | SOC_MMU_DBUS_VADDR_BASE; + } + return addr; +#else + return addr; +#endif +} + +REGISTER_NIF_COLLECTION(jit_stream_flash, jit_stream_flash_init, NULL, jit_stream_flash_get_nif) + +#endif // AVM_NO_JIT diff --git a/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.h b/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.h new file mode 100644 index 0000000000..6f8d9bffc5 --- /dev/null +++ b/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.h @@ -0,0 +1,43 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2025 by Paul Guyot + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +#ifndef _JIT_STREAM_FLASH_PLATFORM_H_ +#define _JIT_STREAM_FLASH_PLATFORM_H_ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// ESP32 flash constants +#define FLASH_SECTOR_SIZE 4096 +#define FLASH_PAGE_SIZE 256 + +// JIT code is stored in main.avm partition +#define JIT_PARTITION_NAME "main.avm" + +#ifdef __cplusplus +} +#endif + +#endif // _JIT_STREAM_FLASH_PLATFORM_H_ diff --git a/src/platforms/esp32/components/avm_sys/sys.c b/src/platforms/esp32/components/avm_sys/sys.c index 8318ae759a..ec229d70b2 100644 --- a/src/platforms/esp32/components/avm_sys/sys.c +++ b/src/platforms/esp32/components/avm_sys/sys.c @@ -807,3 +807,28 @@ void sys_mbedtls_ctr_drbg_context_unlock(GlobalContext *global) UNUSED(global); #endif } + +#ifndef AVM_NO_JIT +#include + +ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t size, size_t offset) +{ + UNUSED(size); + uintptr_t addr = (uintptr_t) (native_code + offset); + +#if defined(CONFIG_IDF_TARGET_ARCH_RISCV) + // On RISC-V ESP32 targets, native code in flash needs to be accessed + // through the instruction cache (IROM) not data cache (DROM) +#if defined(CONFIG_IDF_TARGET_ESP32C3) || defined(CONFIG_IDF_TARGET_ESP32C2) + // ESP32-C3 and C2 have separate DROM and IROM regions + if (addr >= SOC_DROM_LOW && addr < SOC_DROM_HIGH) { + // Convert from data cache address to instruction cache address + addr = addr - SOC_DROM_LOW + SOC_IROM_LOW; + } +#endif + // ESP32-C6, H2, and P4 have unified DROM/IROM, no conversion needed +#endif + + return (ModuleNativeEntryPoint) addr; +} +#endif diff --git a/src/platforms/esp32/components/libatomvm/CMakeLists.txt b/src/platforms/esp32/components/libatomvm/CMakeLists.txt index 97580dbfea..00595afeef 100644 --- a/src/platforms/esp32/components/libatomvm/CMakeLists.txt +++ b/src/platforms/esp32/components/libatomvm/CMakeLists.txt @@ -31,7 +31,12 @@ if (HAVE_PLATFORM_ATOMIC_H) target_include_directories(libAtomVM PUBLIC ../avm_sys/) endif() -target_link_libraries(${COMPONENT_LIB} - INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init") +if (AVM_DISABLE_JIT) + target_link_libraries(${COMPONENT_LIB} + INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init") +else() + target_link_libraries(${COMPONENT_LIB} + INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init" "-u jit_stream_entry_point" "-u sys_map_native_code") +endif() target_compile_features(${COMPONENT_LIB} INTERFACE c_std_11) diff --git a/src/platforms/esp32/main/Kconfig.projbuild b/src/platforms/esp32/main/Kconfig.projbuild index 88bf92aa1a..1eba944ed7 100755 --- a/src/platforms/esp32/main/Kconfig.projbuild +++ b/src/platforms/esp32/main/Kconfig.projbuild @@ -39,5 +39,11 @@ menu "AtomVM configuration" depends on USE_USB_SERIAL help Enable TinyUSB CDC functionality if USE_USB_SERIAL is enabled. + + config JIT_ENABLED + bool "Enable just in time compilation" + default n + help + Enable Just in time compilation, or just execution of precompiled native code endmenu diff --git a/src/platforms/esp32/main/main.c b/src/platforms/esp32/main/main.c index bc25c82c64..ca6d02e2f3 100644 --- a/src/platforms/esp32/main/main.c +++ b/src/platforms/esp32/main/main.c @@ -98,7 +98,7 @@ void app_main() ESP_LOGE(TAG, "Invalid startup avmpack. size=%u", size); AVM_ABORT(); } - if (!avmpack_find_section_by_flag(startup_avm, BEAM_START_FLAG, &startup_beam, &startup_beam_size, &startup_module_name)) { + if (!avmpack_find_section_by_flag(startup_avm, BEAM_START_FLAG, BEAM_START_FLAG, &startup_beam, &startup_beam_size, &startup_module_name)) { ESP_LOGE(TAG, "Error: Failed to locate start module in startup partition. (Did you flash a library by mistake?)"); AVM_ABORT(); } diff --git a/src/platforms/esp32/partitions.csv b/src/platforms/esp32/partitions.csv index 95c1cf74bc..d313cbdc81 100644 --- a/src/platforms/esp32/partitions.csv +++ b/src/platforms/esp32/partitions.csv @@ -7,6 +7,5 @@ # Note: if you change the phy_init or app partition offset, make sure to change the offset in Kconfig.projbuild nvs, data, nvs, 0x9000, 0x6000, phy_init, data, phy, 0xf000, 0x1000, -factory, app, factory, 0x10000, 0x1C0000, -boot.avm, data, phy, 0x1D0000, 0x40000, -main.avm, data, phy, 0x210000, 0x100000 +factory, app, factory, 0x10000, 0x160000, +main.avm, data, phy, 0x170000, 0x290000, diff --git a/src/platforms/esp32/test/CMakeLists.txt b/src/platforms/esp32/test/CMakeLists.txt index 2d97d91345..cee138d34c 100644 --- a/src/platforms/esp32/test/CMakeLists.txt +++ b/src/platforms/esp32/test/CMakeLists.txt @@ -57,8 +57,16 @@ endif() # On Esp32, select is run in a loop in a dedicated task set(AVM_SELECT_IN_TASK ON) -# JIT is not available yet on esp32 -set(AVM_DISABLE_JIT ON) +# JIT is only supported on RISC-V targets (ESP32-C2, ESP32-C3, ESP32-C6, ESP32-H2, ESP32-P4) +# This must be set before project() so libAtomVM is configured correctly +if (${IDF_TARGET} MATCHES "esp32c2|esp32c3|esp32c6|esp32h2|esp32p4") + set(AVM_DISABLE_JIT OFF) + set(AVM_JIT_TARGET_ARCH riscv32) + message(STATUS "JIT compilation enabled for ${IDF_TARGET} (RISC-V32)") +else() + message(STATUS "JIT is not supported on ${IDF_TARGET} (Xtensa architecture) - using interpreter") + set(AVM_DISABLE_JIT ON) +endif() project(atomvm-esp32-test) diff --git a/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt b/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt index e2d67269e8..dc4789f374 100644 --- a/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt +++ b/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt @@ -20,11 +20,31 @@ add_library(esp32_test_modules) +include(ExternalProject) +if(NOT AVM_DISABLE_JIT) +set(host_atomvm_jit_target "--target=jit") +else() +set(host_atomvm_jit_target "") +endif() ExternalProject_Add(HostAtomVM SOURCE_DIR ../../../../../../../../ INSTALL_COMMAND cmake -E echo "Skipping install step." + BUILD_COMMAND cmake --build . --target=atomvmlib ${host_atomvm_jit_target} --target=PackBEAM ) +macro(jit_precompile module_name) + if(NOT AVM_DISABLE_JIT) + add_custom_command( + OUTPUT ${AVM_JIT_TARGET_ARCH}/${module_name}.beam + COMMAND mkdir -p ${AVM_JIT_TARGET_ARCH} + && erl -pa HostAtomVM-prefix/src/HostAtomVM-build/libs/jit/src/beams/ -noshell -s jit_precompile -s init stop -- ${AVM_JIT_TARGET_ARCH} ${AVM_JIT_TARGET_ARCH}/ ${module_name}.beam + DEPENDS ${module_name}.beam HostAtomVM + COMMENT "Compiling ${module_name}.beam to ${AVM_JIT_TARGET_ARCH}" + VERBATIM + ) + endif() +endmacro() + function(compile_erlang module_name) add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${module_name}.beam" @@ -33,6 +53,7 @@ function(compile_erlang module_name) COMMENT "Compiling ${module_name}.erl" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) + jit_precompile(${module_name}) set_property(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${CMAKE_CURRENT_BINARY_DIR}/${module_name}.beam") endfunction() @@ -55,46 +76,44 @@ compile_erlang(test_time_and_processes) compile_erlang(test_twdt) compile_erlang(test_tz) +set(erlang_test_beams + test_esp_partition.beam + test_file.beam + test_wifi_example.beam + test_list_to_atom.beam + test_list_to_binary.beam + test_md5.beam + test_crypto.beam + test_monotonic_time.beam + test_mount.beam + test_net.beam + test_rtc_slow.beam + test_select.beam + test_socket.beam + test_ssl.beam + test_time_and_processes.beam + test_twdt.beam + test_tz.beam +) + +if(NOT AVM_DISABLE_JIT) + set(erlang_test_beams_${AVM_JIT_TARGET_ARCH} ${erlang_test_beams}) + list(TRANSFORM erlang_test_beams_${AVM_JIT_TARGET_ARCH} PREPEND ${AVM_JIT_TARGET_ARCH}/) + set(erlang_test_beams_to_package ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}}) + set(erlang_test_beams_depends ${erlang_test_beams} ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}}) +else() + set(erlang_test_beams_to_package ${erlang_test_beams}) + set(erlang_test_beams_depends ${erlang_test_beams}) +endif() + add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/esp32_test_modules.avm" COMMAND HostAtomVM-prefix/src/HostAtomVM-build/tools/packbeam/PackBEAM -i esp32_test_modules.avm HostAtomVM-prefix/src/HostAtomVM-build/libs/atomvmlib.avm - test_esp_partition.beam - test_file.beam - test_wifi_example.beam - test_list_to_atom.beam - test_list_to_binary.beam - test_md5.beam - test_crypto.beam - test_monotonic_time.beam - test_mount.beam - test_net.beam - test_rtc_slow.beam - test_select.beam - test_socket.beam - test_ssl.beam - test_time_and_processes.beam - test_twdt.beam - test_tz.beam + ${erlang_test_beams_to_package} DEPENDS HostAtomVM - "${CMAKE_CURRENT_BINARY_DIR}/test_esp_partition.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_wifi_example.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_file.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_list_to_atom.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_list_to_binary.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_md5.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_crypto.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_monotonic_time.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_mount.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_net.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_rtc_slow.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_select.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_socket.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_ssl.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_time_and_processes.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_twdt.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_tz.beam" + ${erlang_test_beams_depends} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} VERBATIM ) diff --git a/src/platforms/generic_unix/lib/jit_stream_mmap.c b/src/platforms/generic_unix/lib/jit_stream_mmap.c index f246a9791d..096cfe7faf 100644 --- a/src/platforms/generic_unix/lib/jit_stream_mmap.c +++ b/src/platforms/generic_unix/lib/jit_stream_mmap.c @@ -193,6 +193,28 @@ static term nif_jit_stream_mmap_read(Context *ctx, int argc, term argv[]) return term_from_literal_binary(js_obj->stream_base + offset, len, &ctx->heap, ctx->global); } +static term nif_jit_stream_mmap_flush(Context *ctx, int argc, term argv[]) +{ + UNUSED(argc); + + void *js_obj_ptr; + if (UNLIKELY(!enif_get_resource(erl_nif_env_from_context(ctx), argv[0], jit_stream_mmap_resource_type, &js_obj_ptr))) { + RAISE_ERROR(BADARG_ATOM); + } + struct JITStreamMMap *js_obj = (struct JITStreamMMap *) js_obj_ptr; + if (IS_NULL_PTR(js_obj->stream_base)) { + RAISE_ERROR(BADARG_ATOM); + } + +#if defined(__APPLE__) + sys_icache_invalidate(js_obj->stream_base, js_obj->stream_size); +#elif defined(__GNUC__) + __builtin___clear_cache(js_obj->stream_base, js_obj->stream_base + js_obj->stream_size); +#endif + + return argv[0]; +} + static term nif_jit_stream_module(Context *ctx, int argc, term argv[]) { UNUSED(argc); @@ -226,6 +248,10 @@ static const struct Nif jit_stream_mmap_read_nif = { .base.type = NIFFunctionType, .nif_ptr = nif_jit_stream_mmap_read }; +static const struct Nif jit_stream_mmap_flush_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_jit_stream_mmap_flush +}; ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream) { @@ -239,11 +265,6 @@ ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream) return NULL; } -#if defined(__APPLE__) - sys_icache_invalidate(js_obj->stream_base, js_obj->stream_size); -#elif defined(__GNUC__) - __builtin___clear_cache(js_obj->stream_base, js_obj->stream_base + js_obj->stream_size); -#endif #if JIT_ARCH_TARGET == JIT_ARCH_ARMV6M // Set thumb bit for armv6m ModuleNativeEntryPoint result = (ModuleNativeEntryPoint) js_obj->stream_base + 1; @@ -251,6 +272,7 @@ ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream) ModuleNativeEntryPoint result = (ModuleNativeEntryPoint) js_obj->stream_base; #endif + // Prevent module from being unmapped by dtor js_obj->stream_base = NULL; return result; } @@ -291,6 +313,9 @@ const struct Nif *jit_stream_mmap_get_nif(const char *nifname) if (strcmp("read/3", rest) == 0) { return &jit_stream_mmap_read_nif; } + if (strcmp("flush/1", rest) == 0) { + return &jit_stream_mmap_flush_nif; + } } return NULL; } diff --git a/src/platforms/generic_unix/lib/sys.c b/src/platforms/generic_unix/lib/sys.c index 099164dd89..eedbe060b4 100644 --- a/src/platforms/generic_unix/lib/sys.c +++ b/src/platforms/generic_unix/lib/sys.c @@ -853,4 +853,24 @@ ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t si return (ModuleNativeEntryPoint) (native_code + offset); #endif } + +bool sys_get_cache_native_code(GlobalContext *global, Module *mod, uint16_t *version, ModuleNativeEntryPoint *entry_point, uint32_t *labels) +{ + UNUSED(global); + UNUSED(mod); + UNUSED(version); + UNUSED(entry_point); + UNUSED(labels); + return false; +} + +void sys_set_cache_native_code(GlobalContext *global, Module *mod, uint16_t version, ModuleNativeEntryPoint entry_point, uint32_t labels) +{ + UNUSED(global); + UNUSED(mod); + UNUSED(version); + UNUSED(entry_point); + UNUSED(labels); +} + #endif diff --git a/src/platforms/generic_unix/main.c b/src/platforms/generic_unix/main.c index f45fd7f14f..6908aa50f8 100644 --- a/src/platforms/generic_unix/main.c +++ b/src/platforms/generic_unix/main.c @@ -104,7 +104,7 @@ int main(int argc, char **argv) const void *startup_beam = NULL; const char *startup_module_name; uint32_t startup_beam_size; - avmpack_find_section_by_flag(avmpack_data->data, 1, &startup_beam, &startup_beam_size, &startup_module_name); + avmpack_find_section_by_flag(avmpack_data->data, BEAM_START_FLAG, BEAM_START_FLAG, &startup_beam, &startup_beam_size, &startup_module_name); if (startup_beam) { avmpack_data->in_use = true; diff --git a/src/platforms/rp2/CMakeLists.txt b/src/platforms/rp2/CMakeLists.txt index 6dbcdf7bb8..86e5e6683a 100644 --- a/src/platforms/rp2/CMakeLists.txt +++ b/src/platforms/rp2/CMakeLists.txt @@ -54,6 +54,8 @@ set(HAVE_MKFIFO "" CACHE INTERNAL "Have symbol mkfifo" FORCE) set(HAVE_UNLINK "" CACHE INTERNAL "Have symbol unlink" FORCE) # Likewise with EXECVE set(HAVE_EXECVE "" CACHE INTERNAL "Have symbol execve" FORCE) +# getcwd is defined in newlib header but not implemented +set(HAVE_GETCWD "" CACHE INTERNAL "Have symbol getcwd" FORCE) # Options that make sense for this platform option(AVM_DISABLE_SMP "Disable SMP support." OFF) @@ -69,8 +71,13 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "^cortex-m.+$") if (NOT AVM_DISABLE_JIT) set(AVM_JIT_TARGET_ARCH "armv6m") endif() +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^hazard3$") + # Pico2 RISC-V processor (Hazard3) + if (NOT AVM_DISABLE_JIT) + set(AVM_JIT_TARGET_ARCH "riscv32") + endif() else() - # Typically riscv is not supported yet + # Other processors not supported yet if (NOT AVM_DISABLE_JIT) message("JIT is not supported on ${CMAKE_SYSTEM_PROCESSOR}") set(AVM_DISABLE_JIT ON CACHE BOOL "Disable just in time compilation." FORCE) diff --git a/src/platforms/rp2/src/CMakeLists.txt b/src/platforms/rp2/src/CMakeLists.txt index c79433551e..f4ef67cd8f 100644 --- a/src/platforms/rp2/src/CMakeLists.txt +++ b/src/platforms/rp2/src/CMakeLists.txt @@ -55,6 +55,10 @@ else() target_compile_definitions(AtomVM PRIVATE PICO_STDIO_USB_CONNECT_WAIT_TIMEOUT_MS=20000) endif() +if (AVM_DISABLE_SMP) + target_compile_definitions(AtomVM PRIVATE PICO_FLASH_ASSUME_CORE1_SAFE) +endif() + if (AVM_WAIT_BOOTSEL_ON_EXIT) target_compile_definitions(AtomVM PRIVATE WAIT_BOOTSEL_ON_EXIT) endif() diff --git a/src/platforms/rp2/src/lib/CMakeLists.txt b/src/platforms/rp2/src/lib/CMakeLists.txt index 3cc69b56a3..b9e594c9df 100644 --- a/src/platforms/rp2/src/lib/CMakeLists.txt +++ b/src/platforms/rp2/src/lib/CMakeLists.txt @@ -31,7 +31,6 @@ set(HEADER_FILES set(SOURCE_FILES gpiodriver.c - jit_stream_flash.c networkdriver.c otp_crypto_platform.c platform_defaultatoms.c @@ -110,4 +109,16 @@ if (PICO_CYW43_SUPPORTED) define_if_function_exists(libAtomVM${PLATFORM_LIB_SUFFIX} gethostname "unistd.h" PRIVATE HAVE_GETHOSTNAME) endif() +if (NOT AVM_DISABLE_JIT) + target_sources( + libAtomVM${PLATFORM_LIB_SUFFIX} + PRIVATE + jit_stream_flash_platform.c + ../../../../libAtomVM/jit_stream_flash.c + jit_stream_flash_platform.h + ../../../../libAtomVM/jit_stream_flash.h + ) + target_link_options(libAtomVM${PLATFORM_LIB_SUFFIX} PUBLIC "SHELL:-Wl,-u -Wl,jit_stream_flash_get_nif") +endif() + target_link_options(libAtomVM${PLATFORM_LIB_SUFFIX} PUBLIC "SHELL:-Wl,-u -Wl,gpio_nif -Wl,-u -Wl,otp_crypto_nif") diff --git a/src/platforms/rp2/src/lib/jit_stream_flash_platform.c b/src/platforms/rp2/src/lib/jit_stream_flash_platform.c new file mode 100644 index 0000000000..e8a17c3537 --- /dev/null +++ b/src/platforms/rp2/src/lib/jit_stream_flash_platform.c @@ -0,0 +1,117 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2025 by Paul Guyot + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +#ifndef AVM_NO_JIT + +#include "jit_stream_flash.h" + +#include +#include +#include +#include + +#include "rp2_sys.h" + +// Helper structures for flash_safe_execute +struct EraseParams +{ + uintptr_t addr; +}; + +struct WriteParams +{ + uintptr_t addr; + const uint8_t *data; + size_t len; +}; + +static void __not_in_flash_func(do_erase_sector)(void *params_ptr) +{ + struct EraseParams *params = (struct EraseParams *) params_ptr; + flash_range_erase(params->addr - XIP_BASE, FLASH_SECTOR_SIZE); +} + +static void __not_in_flash_func(do_write_page)(void *params_ptr) +{ + struct WriteParams *params = (struct WriteParams *) params_ptr; + flash_range_program(params->addr - XIP_BASE, params->data, params->len); +} + +struct JSFlashPlatformContext *jit_stream_flash_platform_init(void) +{ + return (struct JSFlashPlatformContext *) 1; +} + +void jit_stream_flash_platform_destroy(struct JSFlashPlatformContext *pf_ctx) +{ + UNUSED(pf_ctx); +} + +bool jit_stream_flash_platform_erase_sector(struct JSFlashPlatformContext *pf_ctx, uintptr_t addr) +{ + UNUSED(pf_ctx); + + struct EraseParams params = { + .addr = addr + }; + + int r = flash_safe_execute(do_erase_sector, ¶ms, UINT32_MAX); + if (UNLIKELY(r != PICO_OK)) { + fprintf(stderr, "flash_safe_execute (erase) failed with error %d\n", r); + return false; + } + + return true; +} + +bool jit_stream_flash_platform_write_page(struct JSFlashPlatformContext *pf_ctx, uintptr_t addr, const uint8_t *data) +{ + UNUSED(pf_ctx); + + struct WriteParams params = { + .addr = addr, + .data = data, + .len = FLASH_PAGE_SIZE + }; + + int r = flash_safe_execute(do_write_page, ¶ms, UINT32_MAX); + if (UNLIKELY(r != PICO_OK)) { + fprintf(stderr, "flash_safe_execute (write) failed with error %d\n", r); + return false; + } + + return true; +} + +uintptr_t jit_stream_flash_platform_ptr_to_executable(uintptr_t addr) +{ + // Set Thumb bit + return addr | 0x1; +} + +uintptr_t jit_stream_flash_platform_executable_to_ptr(uintptr_t addr) +{ + // Clear Thumb bit + return addr & ~0x1UL; +} + +REGISTER_NIF_COLLECTION(jit_stream_flash, jit_stream_flash_init, NULL, jit_stream_flash_get_nif) + +#endif // AVM_NO_JIT diff --git a/src/platforms/rp2/src/lib/jit_stream_flash_platform.h b/src/platforms/rp2/src/lib/jit_stream_flash_platform.h new file mode 100644 index 0000000000..3ee8c660bd --- /dev/null +++ b/src/platforms/rp2/src/lib/jit_stream_flash_platform.h @@ -0,0 +1,40 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2025 by Paul Guyot + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +#ifndef _JIT_STREAM_FLASH_PLATFORM_H_ +#define _JIT_STREAM_FLASH_PLATFORM_H_ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// RP2040 flash constants (W25Q16JV chip) +#define FLASH_SECTOR_SIZE 4096 +#define FLASH_PAGE_SIZE 256 + +#ifdef __cplusplus +} +#endif + +#endif // _JIT_STREAM_FLASH_PLATFORM_H_ diff --git a/src/platforms/rp2/src/lib/smp.c b/src/platforms/rp2/src/lib/smp.c index 946b066305..44251e8913 100644 --- a/src/platforms/rp2/src/lib/smp.c +++ b/src/platforms/rp2/src/lib/smp.c @@ -57,14 +57,17 @@ static void scheduler_core1_entry_point(void) { _Static_assert(sizeof(uintptr_t) == sizeof(uint32_t), "Expected pointers to be 32 bits"); uint32_t ctx_int = multicore_fifo_pop_blocking(); + multicore_lockout_victim_init(); int result = scheduler_entry_point((GlobalContext *) ctx_int); UNUSED(result); + multicore_lockout_victim_deinit(); } void smp_scheduler_start(GlobalContext *ctx) { multicore_launch_core1(scheduler_core1_entry_point); multicore_fifo_push_blocking((uint32_t) ctx); + multicore_lockout_victim_init(); } bool smp_is_main_thread(GlobalContext *glb) diff --git a/src/platforms/rp2/src/main.c b/src/platforms/rp2/src/main.c index e25e1398d0..0733c85e63 100644 --- a/src/platforms/rp2/src/main.c +++ b/src/platforms/rp2/src/main.c @@ -87,9 +87,12 @@ static int app_main() if (!avmpack_is_valid(MAIN_AVM, XIP_SRAM_BASE - (uintptr_t) MAIN_AVM)) { sleep_ms(5000); fprintf(stderr, "Fatal error: invalid main.avm packbeam\n"); + if (avmpack_is_valid(LIB_AVM, (uintptr_t) MAIN_AVM - (uintptr_t) LIB_AVM)) { + fprintf(stderr, "Lib avm packbeam is valid, though\n"); + } AVM_ABORT(); } - if (!avmpack_find_section_by_flag(MAIN_AVM, BEAM_START_FLAG, &startup_beam, &startup_beam_size, &startup_module_name)) { + if (!avmpack_find_section_by_flag(MAIN_AVM, BEAM_START_FLAG, BEAM_START_FLAG, &startup_beam, &startup_beam_size, &startup_module_name)) { sleep_ms(5000); fprintf(stderr, "Fatal error: Failed to locate start module in main.avm packbeam. (Did you flash a library by mistake?)"); AVM_ABORT(); diff --git a/src/platforms/stm32/src/main.c b/src/platforms/stm32/src/main.c index 7febe37717..689aa925f1 100644 --- a/src/platforms/stm32/src/main.c +++ b/src/platforms/stm32/src/main.c @@ -248,7 +248,7 @@ int main() port_driver_init_all(glb); nif_collection_init_all(glb); - if (!avmpack_is_valid(flashed_avm, size) || !avmpack_find_section_by_flag(flashed_avm, BEAM_START_FLAG, &startup_beam, &startup_beam_size, &startup_module_name)) { + if (!avmpack_is_valid(flashed_avm, size) || !avmpack_find_section_by_flag(flashed_avm, BEAM_START_FLAG, BEAM_START_FLAG, &startup_beam, &startup_beam_size, &startup_module_name)) { AVM_LOGE(TAG, "Invalid AVM Pack"); AVM_ABORT(); } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 42ef857dda..c7652f4b65 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -24,12 +24,14 @@ project (tests) add_executable(test-erlang test.c) add_executable(test-enif test-enif.c) add_executable(test-heap test-heap.c) +add_executable(test-jit_stream_flash test-jit_stream_flash.c ../src/libAtomVM/jit_stream_flash.c) add_executable(test-mailbox test-mailbox.c) add_executable(test-structs test-structs.c) target_compile_features(test-erlang PUBLIC c_std_11) target_compile_features(test-enif PUBLIC c_std_11) target_compile_features(test-heap PUBLIC c_std_11) +target_compile_features(test-jit_stream_flash PUBLIC c_std_11) target_compile_features(test-mailbox PUBLIC c_std_11) target_compile_features(test-structs PUBLIC c_std_11) @@ -37,6 +39,7 @@ if(CMAKE_COMPILER_IS_GNUCC) target_compile_options(test-erlang PUBLIC -Wall -pedantic -Wextra -ggdb) target_compile_options(test-enif PUBLIC -Wall -pedantic -Wextra -ggdb) target_compile_options(test-heap PUBLIC -Wall -pedantic -Wextra -ggdb) + target_compile_options(test-jit_stream_flash PUBLIC -Wall -pedantic -Wextra -ggdb) target_compile_options(test-mailbox PUBLIC -Wall -pedantic -Wextra -ggdb) target_compile_options(test-structs PUBLIC -Wall -pedantic -Wextra -ggdb) endif() @@ -50,6 +53,7 @@ if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") target_link_libraries(test-erlang PRIVATE ${LIBRT}) target_link_libraries(test-enif PRIVATE ${LIBRT}) target_link_libraries(test-heap PRIVATE ${LIBRT}) + target_link_libraries(test-jit_stream_flash PRIVATE ${LIBRT}) target_link_libraries(test-mailbox PRIVATE ${LIBRT}) target_link_libraries(test-structs PRIVATE ${LIBRT}) else() @@ -63,6 +67,7 @@ if (MbedTLS_FOUND) target_link_libraries(test-erlang PRIVATE MbedTLS::mbedtls) target_link_libraries(test-enif PRIVATE MbedTLS::mbedtls) target_link_libraries(test-heap PRIVATE MbedTLS::mbedtls) + target_link_libraries(test-jit_stream_flash PRIVATE MbedTLS::mbedtls) target_link_libraries(test-mailbox PRIVATE MbedTLS::mbedtls) target_link_libraries(test-structs PRIVATE MbedTLS::mbedtls) endif() @@ -79,6 +84,7 @@ if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") OR target_include_directories(test-erlang PRIVATE ../src/platforms/generic_unix/lib) target_include_directories(test-enif PRIVATE ../src/platforms/generic_unix/lib) target_include_directories(test-heap PRIVATE ../src/platforms/generic_unix/lib) + target_include_directories(test-jit_stream_flash PRIVATE ../src/platforms/generic_unix/lib) target_include_directories(test-mailbox PRIVATE ../src/platforms/generic_unix/lib) target_include_directories(test-structs PRIVATE ../src/platforms/generic_unix/lib) else() @@ -88,11 +94,15 @@ endif() target_include_directories(test-erlang PRIVATE ../src/libAtomVM) target_include_directories(test-enif PRIVATE ../src/libAtomVM) target_include_directories(test-heap PRIVATE ../src/libAtomVM) +target_include_directories(test-jit_stream_flash PRIVATE ../src/libAtomVM ${CMAKE_CURRENT_SOURCE_DIR}) target_include_directories(test-mailbox PRIVATE ../src/libAtomVM) target_include_directories(test-structs PRIVATE ../src/libAtomVM) target_link_libraries(test-erlang PRIVATE libAtomVM libAtomVM${PLATFORM_LIB_SUFFIX}) target_link_libraries(test-enif PRIVATE libAtomVM libAtomVM${PLATFORM_LIB_SUFFIX}) target_link_libraries(test-heap PRIVATE libAtomVM libAtomVM${PLATFORM_LIB_SUFFIX}) +# test-jit_stream_flash includes jit_stream_flash.c and provides its own mock platform implementation +target_compile_definitions(test-jit_stream_flash PRIVATE TEST_JIT_STREAM_FLASH) +target_link_libraries(test-jit_stream_flash PRIVATE libAtomVM libAtomVM${PLATFORM_LIB_SUFFIX}) target_link_libraries(test-mailbox PRIVATE libAtomVM libAtomVM${PLATFORM_LIB_SUFFIX}) target_link_libraries(test-structs PRIVATE libAtomVM libAtomVM${PLATFORM_LIB_SUFFIX}) @@ -120,11 +130,13 @@ if (COVERAGE) append_coverage_compiler_flags_to_target(test-erlang) append_coverage_compiler_flags_to_target(test-enif) append_coverage_compiler_flags_to_target(test-heap) + append_coverage_compiler_flags_to_target(test-jit_stream_flash) append_coverage_compiler_flags_to_target(test-mailbox) append_coverage_compiler_flags_to_target(test-structs) append_coverage_linker_flags_to_target(test-erlang) append_coverage_linker_flags_to_target(test-enif) append_coverage_linker_flags_to_target(test-heap) + append_coverage_linker_flags_to_target(test-jit_stream_flash) append_coverage_linker_flags_to_target(test-mailbox) append_coverage_linker_flags_to_target(test-structs) if (CMAKE_COMPILER_IS_GNUCC) diff --git a/tests/erlang_tests/CMakeLists.txt b/tests/erlang_tests/CMakeLists.txt index 267a4b3d29..ed77bc537c 100644 --- a/tests/erlang_tests/CMakeLists.txt +++ b/tests/erlang_tests/CMakeLists.txt @@ -613,6 +613,8 @@ compile_erlang(test_lists_member) compile_erlang(test_lists_keymember) compile_erlang(test_lists_keyfind) +compile_erlang(test_inline_arith) + if(Erlang_VERSION VERSION_GREATER_EQUAL "23") set(OTP23_OR_GREATER_TESTS test_op_bs_start_match_asm.beam @@ -1135,6 +1137,8 @@ set(erlang_test_beams test_lists_keymember.beam test_lists_keyfind.beam + test_inline_arith.beam + test_code_server_nifs.beam test_op_bs_start_match.beam diff --git a/tests/erlang_tests/test_bs.erl b/tests/erlang_tests/test_bs.erl index 460e8774ea..36ab0bb400 100644 --- a/tests/erlang_tests/test_bs.erl +++ b/tests/erlang_tests/test_bs.erl @@ -99,6 +99,8 @@ start() -> ok = test_bs_skip_bits2_little(), + ok = test_float(), + 0. test_pack_small_ints({A, B, C}, Expect) -> @@ -532,6 +534,72 @@ test_bs_match_string_select() -> test_bs_skip_bits2_little() -> ok = check_x86_64_jt(id(<<16#e9, 0:32>>)). +test_float() -> + Pi = id(3.14), + <<64,9,30,184,81,235,133,31,3,14>> = <>, + <<64,9,30,184,81,235,133,31,3,14>> = <>, + <<31,133,235,81,184,30,9,64,3,14>> = <>, + <<_:64,3,14>> = <>, + <<64,72,245,195,3,14>> = <>, + <<195,245,72,64,3,14>> = <>, + + <> = id(<<64,9,30,184,81,235,133,31,3,14>>), + <> = id(<<31,133,235,81,184,30,9,64,3,14>>), + <> = id(<<64,72,245,195,3,14>>), + <> = id(<<195,245,72,64,3,14>>), + true = abs(PiS - Pi) < 0.0001, + + % Test integer to float conversion + Int2 = id(2), + IntNeg2 = id(-2), + Int32 = id(32), + <<64,0,0,0,0,0,0,0>> = <>, + <<192,0,0,0,0,0,0,0>> = <>, + <<66,0,0,0>> = <>, + + % 16-bit floats are supported in OTP 24+ and AtomVM + Has16BitFloats = + case erlang:system_info(machine) of + "BEAM" -> + erlang:system_info(otp_release) >= "24"; + "ATOM" -> + true + end, + if + Has16BitFloats -> + % Test that 16-bit floats work + Pi16 = id(3.14), + <<66,72>> = <>, + <<66,72>> = <>, + <<72,66>> = <>, + <> = <<66, 72, 3, 14>>, + <> = <<72, 66, 3, 14>>, + true = abs(Pi16B - Pi16) < 0.001, + ok; + true -> + ok + end, + + ok = test_create_with_invalid_float_value(), + ok = test_create_with_invalid_float_size(), + ok. + +test_create_with_invalid_float_value() -> + ok = expect_error(fun() -> create_float_binary(foo, id(64)) end, badarg), + ok = expect_error(fun() -> create_float_binary([1, 2, 3], id(32)) end, badarg), + ok = expect_error(fun() -> create_float_binary(<<"binary">>, id(64)) end, badarg), + ok. + +test_create_with_invalid_float_size() -> + % These sizes are invalid in both BEAM and AtomVM + ok = expect_error(fun() -> create_float_binary(3.14, id(8)) end, badarg), + ok = expect_error(fun() -> create_float_binary(3.14, id(128)) end, badarg), + ok = expect_error(fun() -> create_float_binary(3.14, id(foo)) end, badarg), + ok. + +create_float_binary(Value, Size) -> + <>. + check_x86_64_jt(<<>>) -> ok; check_x86_64_jt(<<16#e9, _Offset:32/little, Tail/binary>>) -> check_x86_64_jt(Tail); check_x86_64_jt(Bin) -> {unexpected, Bin}. diff --git a/src/platforms/rp2/src/lib/jit_stream_flash.c b/tests/jit_stream_flash_platform.h similarity index 69% rename from src/platforms/rp2/src/lib/jit_stream_flash.c rename to tests/jit_stream_flash_platform.h index 77dfcca908..b9e39dd36b 100644 --- a/src/platforms/rp2/src/lib/jit_stream_flash.c +++ b/tests/jit_stream_flash_platform.h @@ -18,17 +18,23 @@ * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later */ -#ifndef AVM_NO_JIT +#ifndef _JIT_STREAM_FLASH_PLATFORM_H_ +#define _JIT_STREAM_FLASH_PLATFORM_H_ -#include "context.h" -#include "jit.h" -#include "term.h" +#include +#include +#include -ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream) -{ - UNUSED(ctx); - UNUSED(jit_stream); - return NULL; -} +#ifdef __cplusplus +extern "C" { +#endif + +// Host test flash constants +#define FLASH_SECTOR_SIZE 4096 +#define FLASH_PAGE_SIZE 256 +#ifdef __cplusplus +} #endif + +#endif // _JIT_STREAM_FLASH_PLATFORM_H_ diff --git a/tests/libs/jit/CMakeLists.txt b/tests/libs/jit/CMakeLists.txt index 26ab6b4ecc..45473d9f10 100644 --- a/tests/libs/jit/CMakeLists.txt +++ b/tests/libs/jit/CMakeLists.txt @@ -30,6 +30,8 @@ set(ERLANG_MODULES jit_aarch64_asm_tests jit_armv6m_tests jit_armv6m_asm_tests + jit_riscv32_tests + jit_riscv32_asm_tests jit_x86_64_tests jit_x86_64_asm_tests ) diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 23291a400c..c696f93d0b 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -88,6 +88,51 @@ call_primitive_2_args_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +call_primitive_5_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:call_primitive_last(State0, ?PRIM_ALLOCATE, [ctx, jit_state, 16, 32, 2]), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: f9401447 ldr x7, [x2, #40]\n" + " 4: d2800202 mov x2, #0x10 // #16\n" + " 8: d2800403 mov x3, #0x20 // #32\n" + " c: d2800044 mov x4, #0x2 // #2\n" + " 10: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_6_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Get bin_ptr from x_reg 0 (similar to get_list_test pattern) + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, RegA} = ?BACKEND:and_(State1, {free, RegA}, ?TERM_PRIMARY_CLEAR_MASK), + % Get another register for the last parameter to test {free, Reg} handling + {State3, OtherReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 1}), + % Call PRIM_BITSTRING_EXTRACT_INTEGER with 6 arguments + {State4, _ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_BITSTRING_EXTRACT_INTEGER, [ + ctx, jit_state, {free, RegA}, 64, 8, {free, OtherReg} + ]), + Stream = ?BACKEND:stream(State4), + Dump = + << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" + " 8: f9401c08 ldr x8, [x0, #56]\n" + " c: f940b850 ldr x16, [x2, #368]\n" + " 10: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 14: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 18: aa0703e2 mov x2, x7\n" + " 1c: d2800803 mov x3, #0x40 // #64\n" + " 20: d2800104 mov x4, #0x8 // #8\n" + " 24: aa0803e5 mov x5, x8\n" + " 28: d63f0200 blr x16\n" + " 2c: aa0003e7 mov x7, x0\n" + " 30: a8c10be1 ldp x1, x2, [sp], #16\n" + " 34: a8c103fe ldp x30, x0, [sp], #16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_primitive_extended_regs_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, RegA} = ?BACKEND:call_primitive(State0, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]), @@ -146,6 +191,44 @@ call_primitive_extended_regs_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +call_primitive_few_free_regs_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, 1), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, 2), + {State3, Reg3} = ?BACKEND:move_to_native_register(State2, 3), + {State4, Reg4} = ?BACKEND:move_to_native_register(State3, 4), + {State5, Reg5} = ?BACKEND:move_to_native_register(State4, 5), + {State6, ResultReg} = ?BACKEND:call_primitive(State5, ?PRIM_BITSTRING_INSERT_INTEGER, [ + Reg2, Reg1, {free, Reg4}, Reg3, {free, Reg5} + ]), + State7 = ?BACKEND:free_native_registers(State6, [ResultReg, Reg2, Reg1, Reg3]), + ?BACKEND:assert_all_native_free(State7), + Stream = ?BACKEND:stream(State7), + Dump = << + " 0: d2800027 mov x7, #0x1 // #1\n" + " 4: d2800048 mov x8, #0x2 // #2\n" + " 8: d2800069 mov x9, #0x3 // #3\n" + " c: d280008a mov x10, #0x4 // #4\n" + " 10: d28000ab mov x11, #0x5 // #5\n" + " 14: f940e450 ldr x16, [x2, #456]\n" + " 18: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 1c: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 20: a9bf23e9 stp x9, x8, [sp, #-16]!\n" + " 24: f81f0fe7 str x7, [sp, #-16]!\n" + " 28: aa0803e0 mov x0, x8\n" + " 2c: aa0703e1 mov x1, x7\n" + " 30: aa0a03e2 mov x2, x10\n" + " 34: aa0903e3 mov x3, x9\n" + " 38: aa0b03e4 mov x4, x11\n" + " 3c: d63f0200 blr x16\n" + " 40: aa0003ea mov x10, x0\n" + " 44: f84107e7 ldr x7, [sp], #16\n" + " 48: a8c123e9 ldp x9, x8, [sp], #16\n" + " 4c: a8c10be1 ldp x1, x2, [sp], #16\n" + " 50: a8c103fe ldp x30, x0, [sp], #16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_ext_only_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), @@ -168,6 +251,23 @@ call_ext_only_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +call_primitive_last_5_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?CASE_CLAUSE_ATOM, {free, RegA} + ]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9404c48 ldr x8, [x2, #152]\n" + " 8: d2800102 mov x2, #0x8 // #8\n" + " c: d2805963 mov x3, #0x2cb // #715\n" + " 10: aa0703e4 mov x4, x7\n" + " 14: d61f0100 br x8" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_ext_last_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), @@ -729,6 +829,82 @@ if_block_test_() -> >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {100, '<', RegA}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f10190ff cmp x7, #0x64\n" + " c: 5400004d b.le 0x14\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {100, '<', {free, RegA}}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f10190ff cmp x7, #0x64\n" + " c: 5400004d b.le 0x14\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 100}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f10190ff cmp x7, #0x64\n" + " c: 5400004a b.ge 0x14 // b.tcont\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '<', 100}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f10190ff cmp x7, #0x64\n" + " c: 5400004a b.ge 0x14 // b.tcont\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) end) ] end}. @@ -833,6 +1009,38 @@ call_only_or_schedule_next_and_label_relocation_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +call_only_or_schedule_next_known_label_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:add_label(State2, 2, 16#2c), + State4 = ?BACKEND:call_only_or_schedule_next(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), + % OP_INT_CALL_END + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), + Dump = + << + " 0: 1400000d b 0x34\n" + " 4: 14000002 b 0xc\n" + " 8: 14000009 b 0x2c\n" + " c: b9401027 ldr w7, [x1, #16]\n" + " 10: f10004e7 subs x7, x7, #0x1\n" + " 14: b9001027 str w7, [x1, #16]\n" + " 18: 540000a1 b.ne 0x2c // b.any\n" + " 1c: 10000087 adr x7, 0x2c\n" + " 20: f9000427 str x7, [x1, #8]\n" + " 24: f9400847 ldr x7, [x2, #16]\n" + " 28: d61f00e0 br x7\n" + " 2c: f9400047 ldr x7, [x2]\n" + " 30: d61f00e0 br x7\n" + " 34: f9400447 ldr x7, [x2, #8]\n" + " 38: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_bif_with_large_literal_integer_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, FuncPtr} = ?BACKEND:call_primitive(State0, 8, [jit_state, 2]), @@ -892,7 +1100,7 @@ call_bif_with_large_literal_integer_test() -> get_list_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:and_(State1, Reg, -4), + {State2, Reg} = ?BACKEND:and_(State1, {free, Reg}, -4), State3 = ?BACKEND:move_array_element(State2, Reg, 1, {y_reg, 1}), State4 = ?BACKEND:move_array_element(State3, Reg, 0, {y_reg, 0}), State5 = ?BACKEND:free_native_registers(State4, [Reg]), @@ -912,17 +1120,18 @@ get_list_test() -> is_integer_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, Arg1 = {x_reg, 0}, - {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), - State2 = ?BACKEND:if_block( - State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) -> + {State2, Reg} = ?BACKEND:move_to_native_register(State1, Arg1), + State3 = ?BACKEND:if_block( + State2, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) -> MSt1 = ?BACKEND:if_block( MSt0, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> ?BACKEND:jump_to_label(BSt0, Label) end ), - MSt2 = ?BACKEND:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt2, Reg} = ?BACKEND:and_(MSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt3 = ?BACKEND:move_array_element(MSt2, Reg, 0, Reg), ?BACKEND:if_block( MSt3, @@ -933,27 +1142,29 @@ is_integer_test() -> ) end ), - State3 = ?BACKEND:free_native_registers(State2, [Reg]), - ?BACKEND:assert_all_native_free(State3), - Offset = ?BACKEND:offset(State3), - State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + Offset = ?BACKEND:offset(State4), + State5 = ?BACKEND:add_label(State4, Label, Offset + 16#100), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: 92400ce8 and x8, x7, #0xf\n" - " 8: f1003d1f cmp x8, #0xf\n" - " c: 54000160 b.eq 0x38 // b.none\n" - " 10: 924004e8 and x8, x7, #0x3\n" - " 14: f100091f cmp x8, #0x2\n" - " 18: 54000040 b.eq 0x20 // b.none\n" - " 1c: 14000047 b 0x138\n" - " 20: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" - " 24: f94000e7 ldr x7, [x7]\n" - " 28: 924014e7 and x7, x7, #0x3f\n" - " 2c: f10020ff cmp x7, #0x8\n" - " 30: 54000040 b.eq 0x38 // b.none\n" - " 34: 14000041 b 0x138" + " 0: 14000000 b 0x0\n" + " 4: 1400004f b 0x140\n" + " 8: f9401807 ldr x7, [x0, #48]\n" + " c: 92400ce8 and x8, x7, #0xf\n" + " 10: f1003d1f cmp x8, #0xf\n" + " 14: 54000160 b.eq 0x40\n" + " 18: 924004e8 and x8, x7, #0x3\n" + " 1c: f100091f cmp x8, #0x2\n" + " 20: 54000040 b.eq 0x28\n" + " 24: 14000047 b 0x140\n" + " 28: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" + " 2c: f94000e7 ldr x7, [x7]\n" + " 30: 924014e7 and x7, x7, #0x3f\n" + " 34: f10020ff cmp x7, #0x8\n" + " 38: 54000040 b.eq 0x40\n" + " 3c: 14000041 b 0x140" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -964,15 +1175,16 @@ cond_jump_to_label(Cond, Label, MMod, MSt0) -> is_number_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, Arg1 = {x_reg, 0}, - {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), - State2 = ?BACKEND:if_block( - State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) -> + {State2, Reg} = ?BACKEND:move_to_native_register(State1, Arg1), + State3 = ?BACKEND:if_block( + State2, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) -> BSt1 = cond_jump_to_label( {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0 ), - BSt2 = ?BACKEND:and_(BSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {BSt2, Reg} = ?BACKEND:and_(BSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), BSt3 = ?BACKEND:move_array_element(BSt2, Reg, 0, Reg), cond_jump_to_label( {'and', [ @@ -985,56 +1197,262 @@ is_number_test() -> ) end ), - State3 = ?BACKEND:free_native_registers(State2, [Reg]), - ?BACKEND:assert_all_native_free(State3), - Offset = ?BACKEND:offset(State3), - State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + Offset = ?BACKEND:offset(State4), + State5 = ?BACKEND:add_label(State4, Label, Offset + 16#100), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: 92400ce8 and x8, x7, #0xf\n" - " 8: f1003d1f cmp x8, #0xf\n" - " c: 540001c0 b.eq 0x44 // b.none\n" - " 10: 924004e8 and x8, x7, #0x3\n" - " 14: f100091f cmp x8, #0x2\n" - " 18: 54000040 b.eq 0x20 // b.none\n" - " 1c: 1400004a b 0x144\n" - " 20: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" - " 24: f94000e7 ldr x7, [x7]\n" - " 28: 924014e8 and x8, x7, #0x3f\n" - " 2c: f100211f cmp x8, #0x8\n" - " 30: 540000a0 b.eq 0x44 // b.none\n" - " 34: 924014e7 and x7, x7, #0x3f\n" - " 38: f10060ff cmp x7, #0x18\n" - " 3c: 54000040 b.eq 0x44 // b.none\n" - " 40: 14000041 b 0x144" + " 0: 14000000 b 0x0\n" + " 4: 14000052 b 0x14c\n" + " 8: f9401807 ldr x7, [x0, #48]\n" + " c: 92400ce8 and x8, x7, #0xf\n" + " 10: f1003d1f cmp x8, #0xf\n" + " 14: 540001c0 b.eq 0x4c\n" + " 18: 924004e8 and x8, x7, #0x3\n" + " 1c: f100091f cmp x8, #0x2\n" + " 20: 54000040 b.eq 0x28\n" + " 24: 1400004a b 0x14c\n" + " 28: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" + " 2c: f94000e7 ldr x7, [x7]\n" + " 30: 924014e8 and x8, x7, #0x3f\n" + " 34: f100211f cmp x8, #0x8\n" + " 38: 540000a0 b.eq 0x4c\n" + " 3c: 924014e7 and x7, x7, #0x3f\n" + " 40: f10060ff cmp x7, #0x18\n" + " 44: 54000040 b.eq 0x4c\n" + " 48: 14000041 b 0x14c" >>, ?assertEqual(dump_to_bin(Dump), Stream). is_boolean_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, - {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> ?BACKEND:jump_to_label(BSt1, Label) end) end), - State3 = ?BACKEND:free_native_registers(State2, [Reg]), - ?BACKEND:assert_all_native_free(State3), - Offset = ?BACKEND:offset(State3), - State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + Offset = ?BACKEND:offset(State4), + State5 = ?BACKEND:add_label(State4, Label, Offset + 16#100), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), + Dump = << + " 0: 14000000 b 0x0\n" + " 4: 14000047 b 0x120\n" + " 8: f9401807 ldr x7, [x0, #48]\n" + " c: f1012cff cmp x7, #0x4b\n" + " 10: 54000080 b.eq 0x20\n" + " 14: f1002cff cmp x7, #0xb\n" + " 18: 54000040 b.eq 0x20\n" + " 1c: 14000041 b 0x120" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test OP_WAIT_TIMEOUT pattern +wait_timeout_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + Label = 42, + {State1, OffsetRef0} = ?BACKEND:set_continuation_to_offset(State0), + {State2, TimeoutReg} = ?BACKEND:move_to_native_register(State1, 5000), + State3 = ?BACKEND:call_primitive_last(State2, ?PRIM_WAIT_TIMEOUT, [ + ctx, jit_state, {free, TimeoutReg}, Label + ]), + State4 = ?BACKEND:add_label(State3, OffsetRef0), + State5 = ?BACKEND:continuation_entry_point(State4), + {State6, ResultReg0} = ?BACKEND:call_primitive(State5, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ]), + State7 = ?BACKEND:return_if_not_equal_to_ctx(State6, {free, ResultReg0}), + % ?WAITING_TIMEOUT_EXPIRED + {State8, ResultReg1} = ?BACKEND:call_primitive(State7, ?PRIM_CONTEXT_GET_FLAGS, [ctx, 2]), + State9 = ?BACKEND:if_block(State8, {{free, ResultReg1}, '==', 0}, fun(BlockSt) -> + ?BACKEND:call_primitive_last(BlockSt, ?PRIM_WAIT_TIMEOUT_TRAP_HANDLER, [ + ctx, jit_state, Label + ]) + end), + State10 = ?BACKEND:update_branches(State9), + + Stream = ?BACKEND:stream(State10), + Dump = << + " 0: 100000e7 adr x7, 0x1c\n" + " 4: f9000427 str x7, [x1, #8]\n" + " 8: d2827107 mov x7, #0x1388 // #5000\n" + " c: f9407848 ldr x8, [x2, #240]\n" + " 10: aa0703e2 mov x2, x7\n" + " 14: d2800543 mov x3, #0x2a // #42\n" + " 18: d61f0100 br x8\n" + " 1c: f9405450 ldr x16, [x2, #168]\n" + " 20: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 24: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 28: d63f0200 blr x16\n" + " 2c: aa0003e7 mov x7, x0\n" + " 30: a8c10be1 ldp x1, x2, [sp], #16\n" + " 34: a8c103fe ldp x30, x0, [sp], #16\n" + " 38: eb0000ff cmp x7, x0\n" + " 3c: 54000060 b.eq 0x48 // b.none\n" + " 40: aa0703e0 mov x0, x7\n" + " 44: d65f03c0 ret\n" + " 48: f9408450 ldr x16, [x2, #264]\n" + " 4c: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 50: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 54: d2800041 mov x1, #0x2 // #2\n" + " 58: d63f0200 blr x16\n" + " 5c: aa0003e7 mov x7, x0\n" + " 60: a8c10be1 ldp x1, x2, [sp], #16\n" + " 64: a8c103fe ldp x30, x0, [sp], #16\n" + " 68: b5000087 cbnz x7, 0x78\n" + " 6c: f9407c47 ldr x7, [x2, #248]\n" + " 70: d2800542 mov x2, #0x2a // #42\n" + " 74: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test OP_WAIT pattern that uses set_continuation_to_label +wait_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + State1 = ?BACKEND:jump_table(State0, 5), + State2 = ?BACKEND:add_label(State1, 1), + Label = 2, + State3 = ?BACKEND:set_continuation_to_label(State2, Label), + State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + State5 = ?BACKEND:add_label(State4, Label, 16#100), + State6 = ?BACKEND:update_branches(State5), + + Stream = ?BACKEND:stream(State6), + Dump = << + " 0: 14000000 b 0x0\n" + " 4: 14000005 b 0x18\n" + " 8: 1400003e b 0x100\n" + " c: 14000000 b 0xc\n" + " 10: 14000000 b 0x10\n" + " 14: 14000000 b 0x14\n" + " 18: 10000747 adr x7, 0x100\n" + " 1c: f9000427 str x7, [x1, #8]\n" + " 20: f9407447 ldr x7, [x2, #232]\n" + " 24: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test set_continuation_to_label with known label +wait_known_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + State1 = ?BACKEND:jump_table(State0, 5), + State2 = ?BACKEND:add_label(State1, 1), + Label = 2, + State3 = ?BACKEND:add_label(State2, Label, 16#100), + State4 = ?BACKEND:set_continuation_to_label(State3, Label), + State5 = ?BACKEND:call_primitive_last(State4, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + State6 = ?BACKEND:update_branches(State5), + + Stream = ?BACKEND:stream(State6), + Dump = << + " 0: 14000000 b 0x0\n" + " 4: 14000005 b 0x18\n" + " 8: 1400003e b 0x100\n" + " c: 14000000 b 0xc\n" + " 10: 14000000 b 0x10\n" + " 14: 14000000 b 0x14\n" + " 18: 10000747 adr x7, 0x100\n" + " 1c: f9000427 str x7, [x1, #8]\n" + " 20: f9407447 ldr x7, [x2, #232]\n" + " 24: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +return_labels_and_lines_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + + % Test return_labels_and_lines with some sample labels and lines + State2 = ?BACKEND:add_label(State1, 1, 16), + % {Line, Offset} pairs + SortedLines = [{10, 16}, {20, 32}], + + State3 = ?BACKEND:add_label(State2, 0), + + State4 = ?BACKEND:return_labels_and_lines(State3, SortedLines), State5 = ?BACKEND:update_branches(State4), Stream = ?BACKEND:stream(State5), - Offset = ?BACKEND:offset(State3), + + ?assert(byte_size(Stream) >= 44), + Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f1012cff cmp x7, #0x4b\n" - " 8: 54000080 b.eq 0x18 // b.none\n" - " c: f1002cff cmp x7, #0xb\n" - " 10: 54000040 b.eq 0x18 // b.none\n" - " 14: 14000041 b 0x118" + " 0: 14000003 b 0xc\n" + " 4: 14000003 b 0x10\n" + " 8: 14000000 b 0x8\n" + " c: 10000040 adr x0, 0x14\n" + " 10: d65f03c0 ret\n" + " 14: 00000200 .inst 0x00000200\n" + " 18: 0c000000 st4 {v0.8b-v3.8b}, [x0]\n" + " 1c: 00000100 .inst 0x00000100\n" + " 20: 02001000 .inst 0x02001000\n" + " 24: 00000a00 .inst 0x00000a00\n" + " 28: 14001000 b 0x4028\n" + " 2c: 20000000 .inst 0x20000000" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test call_primitive with {free, {x_reg, X}} +gc_bif2_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_GET_IMPORTED_BIF, [jit_state, 42]), + {State2, _ResultReg} = ?BACKEND:call_func_ptr(State1, {free, FuncPtr}, [ + ctx, 0, 3, {y_reg, 0}, {free, {x_reg, 0}} + ]), + + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: f9402050 ldr x16, [x2, #64]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: aa0103e0 mov x0, x1\n" + " 10: d2800541 mov x1, #0x2a // #42\n" + " 14: d63f0200 blr x16\n" + " 18: aa0003e7 mov x7, x0\n" + " 1c: a8c10be1 ldp x1, x2, [sp], #16\n" + " 20: a8c103fe ldp x30, x0, [sp], #16\n" + " 24: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 28: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 2c: d2800001 mov x1, #0x0 // #0\n" + " 30: d2800062 mov x2, #0x3 // #3\n" + " 34: f9401403 ldr x3, [x0, #40]\n" + " 38: f9400063 ldr x3, [x3]\n" + " 3c: f9401804 ldr x4, [x0, #48]\n" + " 40: d63f00e0 blr x7\n" + " 44: aa0003e7 mov x7, x0\n" + " 48: a8c10be1 ldp x1, x2, [sp], #16\n" + " 4c: a8c103fe ldp x30, x0, [sp], #16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test case where parameter value is in r1 +memory_ensure_free_with_roots_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, _FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_MEMORY_ENSURE_FREE_WITH_ROOTS, [ + ctx, jit_state, {free, r1}, 4, 1 + ]), + + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f940b050 ldr x16, [x2, #352]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: aa0103e2 mov x2, x1\n" + " 10: d2800083 mov x3, #0x4 // #4\n" + " 14: d2800024 mov x4, #0x1 // #1\n" + " 18: d63f0200 blr x16\n" + " 1c: aa0003e7 mov x7, x0\n" + " 20: a8c10be1 ldp x1, x2, [sp], #16\n" + " 24: a8c103fe ldp x30, x0, [sp], #16" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1081,7 +1499,7 @@ call_fun_test() -> ]) end ), - State5 = ?BACKEND:and_(State4, RegCopy, ?TERM_PRIMARY_CLEAR_MASK), + {State5, RegCopy} = ?BACKEND:and_(State4, {free, RegCopy}, ?TERM_PRIMARY_CLEAR_MASK), State6 = ?BACKEND:move_array_element(State5, RegCopy, 0, RegCopy), State7 = ?BACKEND:if_block( State6, {RegCopy, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) -> @@ -1536,8 +1954,8 @@ move_to_array_element_test_() -> end), %% move_to_array_element/5: x_reg to reg[x+offset] ?_test(begin - State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r8, r9]), - State2 = setelement(7, State1, [r8, r9]), + State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [r8, r9]), + State2 = setelement(8, State1, [r8, r9]), [r8, r9] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, r8, r9, 1), Stream = ?BACKEND:stream(State3), @@ -1550,8 +1968,8 @@ move_to_array_element_test_() -> end), %% move_to_array_element/5: imm to reg[x+offset] ?_test(begin - State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r8, r9]), - State2 = setelement(7, State1, [r8, r9]), + State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [r8, r9]), + State2 = setelement(8, State1, [r8, r9]), [r8, r9] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, 42, r8, r9, 1), Stream = ?BACKEND:stream(State3), @@ -1662,6 +2080,66 @@ move_to_native_register_test_() -> ] end}. +add_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:add(State0, Reg, Imm), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +add_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + add_test0(State0, r2, 2, << + " 0: 91000842 add x2, x2, #0x2" + >>) + end), + ?_test(begin + add_test0(State0, r2, 256, << + " 0: 91040042 add x2, x2, #0x100" + >>) + end), + ?_test(begin + add_test0(State0, r2, r3, << + " 0: 8b030042 add x2, x2, x3" + >>) + end) + ] + end}. + +sub_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:sub(State0, Reg, Imm), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +sub_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + sub_test0(State0, r2, 2, << + " 0: d1000842 sub x2, x2, #0x2" + >>) + end), + ?_test(begin + sub_test0(State0, r2, 256, << + " 0: d1040042 sub x2, x2, #0x100" + >>) + end), + ?_test(begin + sub_test0(State0, r2, r3, << + " 0: cb030042 sub x2, x2, x3" + >>) + end) + ] + end}. + mul_test0(State0, Reg, Imm, Dump) -> State1 = ?BACKEND:mul(State0, Reg, Imm), Stream = ?BACKEND:stream(State1), diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index ceaf926d7d..50098b44cf 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -32,7 +32,7 @@ -define(BACKEND, jit_armv6m). % disassembly obtained with: -% arm-elf-objdump -b binary -D dump.bin -M arm +% arm-elf-objdump -D -b binary -marm --disassembler-options=force-thumb -z call_primitive_0_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), @@ -107,7 +107,7 @@ call_primitive_6_args_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), % Get bin_ptr from x_reg 0 (similar to get_list_test pattern) {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:and_(State1, RegA, ?TERM_PRIMARY_CLEAR_MASK), + {State2, RegA} = ?BACKEND:and_(State1, {free, RegA}, ?TERM_PRIMARY_CLEAR_MASK), % Get another register for the last parameter to test {free, Reg} handling {State3, OtherReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 1}), % Call PRIM_BITSTRING_EXTRACT_INTEGER with 6 arguments @@ -312,13 +312,13 @@ call_primitive_last_5_args_test() -> " 6: 9700 str r7, [sp, #0]\n" " 8: 9902 ldr r1, [sp, #8]\n" " a: 2204 movs r2, #4\n" - " c: 4b00 ldr r3, [pc, #0] ; (0x10)\n" - " e: e001 b.n 0x14\n" - " 10: 02cb lsrs r3, r1, #16\n" - " 12: 0000 movs r0, r0\n" - " 14: 47b0 blx r6\n" - " 16: b002 add sp, #8\n" - " 18: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " c: 4b01 ldr r3, [pc, #4] ; (0x14)\n" + " e: 47b0 blx r6\n" + " 10: b002 add sp, #8\n" + " 12: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + % Literal pool + " 14: 02cb lsls r3, r1, #11\n" + " 16: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -535,17 +535,19 @@ if_block_test_() -> ?BACKEND:add(BSt0, RegB, 2) end ), - Stream = ?BACKEND:stream(State1), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), Dump = << " 0: 6987 ldr r7, [r0, #24]\n" " 2: 69c6 ldr r6, [r0, #28]\n" - " 4: 4d00 ldr r5, [pc, #0] ; (0x8)\n" - " 6: da04 bge.n 0x12\n" - " 8: 0400 lsls r0, r0, #16\n" - " a: 0000 movs r0, r0\n" - " c: 42af cmp r7, r5\n" - " e: dafe bge.n 0xe\n" - " 10: 3602 adds r6, #2" + " 4: 4d02 ldr r5, [pc, #8] ; (0x10)\n" + " 6: da01 bge.n 0xc\n" + " 8: ffff ; to be rewritten\n" + " a: 3602 adds r6, #2\n" + " c: e078 b.n 0x100\n" + " e: 0000 movs r0, r0\n" + " 10: 0400 lsls r0, r0, #16\n" + " 12: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -713,17 +715,19 @@ if_block_test_() -> ?BACKEND:add(BSt0, RegB, 1) end ), - Stream = ?BACKEND:stream(State1), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), Dump = << " 0: 6987 ldr r7, [r0, #24]\n" " 2: 69c6 ldr r6, [r0, #28]\n" - " 4: 4d00 ldr r5, [pc, #0] ; (0x8)\n" - " 6: e001 b.n 0xc\n" - " 8: 07cb lsls r3, r1, #31\n" - " a: 0000 movs r0, r0\n" - " c: 42af cmp r7, r5\n" - " e: d000 beq.n 0x12\n" - " 10: 3601 adds r6, #1" + " 4: 4d02 ldr r5, [pc, #8] ; (0x10)\n" + " 6: 42af cmp r7, r5\n" + " 8: d000 beq.n 0xc\n" + " a: 3601 adds r6, #1\n" + " c: e078 b.n 0x100\n" + " e: 0000 movs r0, r0\n" + " 10: 07cb lsls r3, r1, #31\n" + " 12: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -1063,6 +1067,94 @@ if_block_test_() -> >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {100, '<', RegA}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f64 cmp r7, #100 ; 0x64\n" + " 6: dd00 ble.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {100, '<', {free, RegA}}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f64 cmp r7, #100 ; 0x64\n" + " 6: dd00 ble.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {1024, '<', RegA}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 4d02 ldr r5, [pc, #8] ; (0x10)\n" + " 6: dd01 ble.n 0xc\n" + " 8: ffff ; to be rewritten\n" + " a: 3602 adds r6, #2\n" + " c: e078 b.n 0x100\n" + " e: 0000 movs r0, r0\n" + " 10: 0400 lsls r0, r0, #16\n" + " 12: 0000 movs r0, r0" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {1024, '<', {free, RegA}}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 4d02 ldr r5, [pc, #8] ; (0x10)\n" + " 6: dd01 ble.n 0xc\n" + " 8: ffff ; to be rewritten\n" + " a: 3602 adds r6, #2\n" + " c: e078 b.n 0x100\n" + " e: 0000 movs r0, r0\n" + " 10: 0400 lsls r0, r0, #16\n" + " 12: 0000 movs r0, r0" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) end) ] end}. @@ -1395,35 +1487,33 @@ call_only_or_schedule_next_and_label_relocation_large_gap_test() -> " 128: 3f01 subs r7, #1\n" " 12a: 60b7 str r7, [r6, #8]\n" " 12c: d004 beq.n 0x138\n" - " 12e: e011 b.n 0x154\n" + " 12e: e00f b.n 0x150\n" " 130: 46c0 nop ; (mov r8, r8)\n" " 132: 46c0 nop ; (mov r8, r8)\n" " 134: 46c0 nop ; (mov r8, r8)\n" " 136: 46c0 nop ; (mov r8, r8)\n" " 138: a700 add r7, pc, #0 ; (adr r7, 0x13c)\n" - " 13a: 4e01 ldr r6, [pc, #4] ; (0x140)\n" - " 13c: e002 b.n 0x144\n" - " 13e: 0000 movs r0, r0\n" - " 140: fedd ffff stcl2 15, cr13, [sp, #-1020] ; 0xfffffc04\n" - " 144: 19f6 adds r6, r6, r7\n" - " 146: 9f00 ldr r7, [sp, #0]\n" - " 148: 607e str r6, [r7, #4]\n" - " 14a: 6897 ldr r7, [r2, #8]\n" - " 14c: 9e05 ldr r6, [sp, #20]\n" - " 14e: 9705 str r7, [sp, #20]\n" - " 150: 46b6 mov lr, r6\n" - " 152: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 154: 6817 ldr r7, [r2, #0]\n" - " 156: 9e05 ldr r6, [sp, #20]\n" - " 158: 9705 str r7, [sp, #20]\n" - " 15a: 46b6 mov lr, r6\n" - " 15c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 15e: 46c0 nop ; (mov r8, r8)\n" - " 160: 6857 ldr r7, [r2, #4]\n" - " 162: 9e05 ldr r6, [sp, #20]\n" - " 164: 9705 str r7, [sp, #20]\n" - " 166: 46b6 mov lr, r6\n" - " 168: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " 13a: 4e04 ldr r6, [pc, #16] ; (0x14c)\n" + " 13c: 19f6 adds r6, r6, r7\n" + " 13e: 9f00 ldr r7, [sp, #0]\n" + " 140: 607e str r6, [r7, #4]\n" + " 142: 6897 ldr r7, [r2, #8]\n" + " 144: 9e05 ldr r6, [sp, #20]\n" + " 146: 9705 str r7, [sp, #20]\n" + " 148: 46b6 mov lr, r6\n" + " 14a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 14c: fedd ffff mrc2 15, 6, pc, cr13, cr15, {7}\n" + " 150: 6817 ldr r7, [r2, #0]\n" + " 152: 9e05 ldr r6, [sp, #20]\n" + " 154: 9705 str r7, [sp, #20]\n" + " 156: 46b6 mov lr, r6\n" + " 158: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 15a: 46c0 nop ; (mov r8, r8)\n" + " 15c: 6857 ldr r7, [r2, #4]\n" + " 15e: 9e05 ldr r6, [sp, #20]\n" + " 160: 9705 str r7, [sp, #20]\n" + " 162: 46b6 mov lr, r6\n" + " 164: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, {_, RelevantBinary} = split_binary(Stream, 16#124), ?assertEqual(dump_to_bin(Dump), RelevantBinary). @@ -1459,35 +1549,33 @@ call_only_or_schedule_next_and_label_relocation_large_gap_unaligned_test() -> " 128: 3f01 subs r7, #1\n" " 12a: 60b7 str r7, [r6, #8]\n" " 12c: d004 beq.n 0x138\n" - " 12e: e011 b.n 0x154\n" + " 12e: e00f b.n 0x150\n" " 130: 46c0 nop ; (mov r8, r8)\n" " 132: 46c0 nop ; (mov r8, r8)\n" " 134: 46c0 nop ; (mov r8, r8)\n" " 136: 46c0 nop ; (mov r8, r8)\n" " 138: a700 add r7, pc, #0 ; (adr r7, 0x13c)\n" - " 13a: 4e01 ldr r6, [pc, #4] ; (0x140)\n" - " 13c: e002 b.n 0x144\n" - " 13e: 0000 movs r0, r0\n" - " 140: fedd ffff stcl2 15, cr13, [sp, #-1020] ; 0xfffffc04\n" - " 144: 19f6 adds r6, r6, r7\n" - " 146: 9f00 ldr r7, [sp, #0]\n" - " 148: 607e str r6, [r7, #4]\n" - " 14a: 6897 ldr r7, [r2, #8]\n" - " 14c: 9e05 ldr r6, [sp, #20]\n" - " 14e: 9705 str r7, [sp, #20]\n" - " 150: 46b6 mov lr, r6\n" - " 152: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 154: 6817 ldr r7, [r2, #0]\n" - " 156: 9e05 ldr r6, [sp, #20]\n" - " 158: 9705 str r7, [sp, #20]\n" - " 15a: 46b6 mov lr, r6\n" - " 15c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 15e: 46c0 nop ; (mov r8, r8)\n" - " 160: 6857 ldr r7, [r2, #4]\n" - " 162: 9e05 ldr r6, [sp, #20]\n" - " 164: 9705 str r7, [sp, #20]\n" - " 166: 46b6 mov lr, r6\n" - " 168: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " 13a: 4e04 ldr r6, [pc, #16] ; (0x14c)\n" + " 13c: 19f6 adds r6, r6, r7\n" + " 13e: 9f00 ldr r7, [sp, #0]\n" + " 140: 607e str r6, [r7, #4]\n" + " 142: 6897 ldr r7, [r2, #8]\n" + " 144: 9e05 ldr r6, [sp, #20]\n" + " 146: 9705 str r7, [sp, #20]\n" + " 148: 46b6 mov lr, r6\n" + " 14a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 14c: fedd ffff mrc2 15, 6, pc, cr13, cr15, {7}\n" + " 150: 6817 ldr r7, [r2, #0]\n" + " 152: 9e05 ldr r6, [sp, #20]\n" + " 154: 9705 str r7, [sp, #20]\n" + " 156: 46b6 mov lr, r6\n" + " 158: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 15a: 46c0 nop ; (mov r8, r8)\n" + " 15c: 6857 ldr r7, [r2, #4]\n" + " 15e: 9e05 ldr r6, [sp, #20]\n" + " 160: 9705 str r7, [sp, #20]\n" + " 162: 46b6 mov lr, r6\n" + " 164: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, {_, RelevantBinary} = split_binary(Stream, 16#122), ?assertEqual(dump_to_bin(Dump), RelevantBinary). @@ -1517,39 +1605,38 @@ call_bif_with_large_literal_integer_test() -> " c: bc05 pop {r0, r2}\n" " e: 6bd6 ldr r6, [r2, #60] ; 0x3c\n" " 10: b4c5 push {r0, r2, r6, r7}\n" - " 12: 4901 ldr r1, [pc, #4] ; (0x18)\n" - " 14: e002 b.n 0x1c\n" - " 16: 0000 movs r0, r0\n" - " 18: e895 3b7f ldmia.w r5, {r0, r1, r2, r3, r4, r5, r6, r8, r9, fp, ip, sp}\n" - " 1c: 47b0 blx r6\n" - " 1e: 4605 mov r5, r0\n" - " 20: bcc5 pop {r0, r2, r6, r7}\n" - " 22: b405 push {r0, r2}\n" - " 24: b082 sub sp, #8\n" - " 26: 9500 str r5, [sp, #0]\n" - " 28: 2100 movs r1, #0\n" - " 2a: 2201 movs r2, #1\n" - " 2c: 6983 ldr r3, [r0, #24]\n" - " 2e: 47b8 blx r7\n" - " 30: 4607 mov r7, r0\n" - " 32: b002 add sp, #8\n" - " 34: bc05 pop {r0, r2}\n" - " 36: 2f00 cmp r7, #0\n" - " 38: d105 bne.n 0x46\n" - " 3a: 6997 ldr r7, [r2, #24]\n" - " 3c: 223c movs r2, #60 ; 0x3c\n" - " 3e: 9e05 ldr r6, [sp, #20]\n" - " 40: 9705 str r7, [sp, #20]\n" - " 42: 46b6 mov lr, r6\n" - " 44: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 46: 6187 str r7, [r0, #24]" + " 12: 490b ldr r1, [pc, #44] ; (0x40)\n" + " 14: 47b0 blx r6\n" + " 16: 4605 mov r5, r0\n" + " 18: bcc5 pop {r0, r2, r6, r7}\n" + " 1a: b405 push {r0, r2}\n" + " 1c: b082 sub sp, #8\n" + " 1e: 9500 str r5, [sp, #0]\n" + " 20: 2100 movs r1, #0\n" + " 22: 2201 movs r2, #1\n" + " 24: 6983 ldr r3, [r0, #24]\n" + " 26: 47b8 blx r7\n" + " 28: 4607 mov r7, r0\n" + " 2a: b002 add sp, #8\n" + " 2c: bc05 pop {r0, r2}\n" + " 2e: 2f00 cmp r7, #0\n" + " 30: d108 bne.n 0x44\n" + " 32: 6997 ldr r7, [r2, #24]\n" + " 34: 2234 movs r2, #52 ; 0x34\n" + " 36: 9e05 ldr r6, [sp, #20]\n" + " 38: 9705 str r7, [sp, #20]\n" + " 3a: 46b6 mov lr, r6\n" + " 3c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 3e: 0000 movs r0, r0\n" + " 40: e895 3b7f ldmia.w r5, {r0, r1, r2, r3, r4, r5, r6, r8, r9, fp, ip, sp}\n" + " 44: 6187 str r7, [r0, #24]" >>, ?assertEqual(dump_to_bin(Dump), Stream). get_list_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:and_(State1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {State2, Reg} = ?BACKEND:and_(State1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), State3 = ?BACKEND:move_array_element(State2, Reg, 1, {y_reg, 1}), State4 = ?BACKEND:move_array_element(State3, Reg, 0, {y_reg, 0}), State5 = ?BACKEND:free_native_registers(State4, [Reg]), @@ -1580,7 +1667,7 @@ is_integer_test() -> ?BACKEND:jump_to_label(BSt0, Label) end ), - MSt2 = ?BACKEND:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt2, Reg} = ?BACKEND:and_(MSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt3 = ?BACKEND:move_array_element(MSt2, Reg, 0, Reg), ?BACKEND:if_block( MSt3, @@ -1642,7 +1729,7 @@ is_number_test() -> BSt1 = cond_jump_to_label( {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0 ), - BSt2 = ?BACKEND:and_(BSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {BSt2, Reg} = ?BACKEND:and_(BSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), BSt3 = ?BACKEND:move_array_element(BSt2, Reg, 0, Reg), cond_jump_to_label( {'and', [ @@ -1698,57 +1785,83 @@ is_number_test() -> is_boolean_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, - {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> ?BACKEND:jump_to_label(BSt1, Label) end) end), - State3 = ?BACKEND:free_native_registers(State2, [Reg]), - ?BACKEND:assert_all_native_free(State3), - State4 = ?BACKEND:add_label(State3, Label, 16#100), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + State5 = ?BACKEND:add_label(State4, Label, 16#100), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 2f4b cmp r7, #75 ; 0x4b\n" - " 4: d006 beq.n 0x14\n" - " 6: 2f0b cmp r7, #11\n" - " 8: d004 beq.n 0x14\n" - " a: e079 b.n 0x100\n" - " c: 46c0 nop ; (mov r8, r8)\n" - " e: 46c0 nop ; (mov r8, r8)\n" - " 10: 46c0 nop ; (mov r8, r8)\n" - " 12: 46c0 nop ; (mov r8, r8)" + " 0: 4b01 ldr r3, [pc, #4]\n" + " 2: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 4: 449f add pc, r3\n" + " 6: 46c0 nop\n" + " 8: ffff .short 0xffff\n" + " a: ffff .short 0xffff\n" + " c: 4b01 ldr r3, [pc, #4]\n" + " e: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 10: 449f add pc, r3\n" + " 12: 46c0 nop\n" + " 14: 00ec lsls r4, r5, #3\n" + " 16: 0000 movs r0, r0\n" + " 18: 6987 ldr r7, [r0, #24]\n" + " 1a: 2f4b cmp r7, #75\n" + " 1c: d006 beq.n 0x2c\n" + " 1e: 2f0b cmp r7, #11\n" + " 20: d004 beq.n 0x2c\n" + " 22: e06d b.n 0x100\n" + " 24: 46c0 nop\n" + " 26: 46c0 nop\n" + " 28: 46c0 nop\n" + " 2a: 46c0 nop" >>, ?assertEqual(dump_to_bin(Dump), Stream). is_boolean_far_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, - {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> ?BACKEND:jump_to_label(BSt1, Label) end) end), - State3 = ?BACKEND:free_native_registers(State2, [Reg]), - ?BACKEND:assert_all_native_free(State3), - State4 = ?BACKEND:add_label(State3, Label, 16#1000), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + State5 = ?BACKEND:add_label(State4, Label, 16#1000), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 2f4b cmp r7, #75 ; 0x4b\n" - " 4: d006 beq.n 0x14\n" - " 6: 2f0b cmp r7, #11\n" - " 8: d004 beq.n 0x14\n" - " a: 4e01 ldr r6, [pc, #4] ; (0x10)\n" - " c: 447e add r6, pc\n" - " e: 4730 bx r6\n" - " 10: 0ff1 lsrs r0, r6, #31\n" - " 12: 0000 movs r0, r0" + " 0: 4b01 ldr r3, [pc, #4]\n" + " 2: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 4: 449f add pc, r3\n" + " 6: 46c0 nop\n" + " 8: ffff .short 0xffff\n" + " a: ffff .short 0xffff\n" + " c: 4b01 ldr r3, [pc, #4]\n" + " e: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 10: 449f add pc, r3\n" + " 12: 46c0 nop\n" + " 14: 0fec lsrs r4, r5, #31\n" + " 16: 0000 movs r0, r0\n" + " 18: 6987 ldr r7, [r0, #24]\n" + " 1a: 2f4b cmp r7, #75\n" + " 1c: d006 beq.n 0x2c\n" + " 1e: 2f0b cmp r7, #11\n" + " 20: d004 beq.n 0x2c\n" + " 22: 4e01 ldr r6, [pc, #4]\n" + " 24: 447e add r6, pc\n" + " 26: 4730 bx r6\n" + " 28: 0fd9 lsrs r1, r3, #31\n" + " 2a: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1790,29 +1903,42 @@ is_boolean_far_unaligned_test() -> is_boolean_far_known_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, - State1 = ?BACKEND:add_label(State0, Label, 16#1000), - {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), - State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + State2 = ?BACKEND:add_label(State1, Label, 16#1000), + {State3, Reg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + State4 = ?BACKEND:if_block(State3, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> ?BACKEND:jump_to_label(BSt1, Label) end) end), - State4 = ?BACKEND:free_native_registers(State3, [Reg]), - ?BACKEND:assert_all_native_free(State4), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State5 = ?BACKEND:free_native_registers(State4, [Reg]), + ?BACKEND:assert_all_native_free(State5), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 2f4b cmp r7, #75 ; 0x4b\n" - " 4: d006 beq.n 0x14\n" - " 6: 2f0b cmp r7, #11\n" - " 8: d004 beq.n 0x14\n" - " a: 4e01 ldr r6, [pc, #4] ; (0x10)\n" - " c: 447e add r6, pc\n" - " e: 4730 bx r6\n" - " 10: 0ff1 lsrs r1, r6, #31\n" - " 12: 0000 movs r0, r0" + " 0: 4b01 ldr r3, [pc, #4]\n" + " 2: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 4: 449f add pc, r3\n" + " 6: 46c0 nop\n" + " 8: ffff .short 0xffff\n" + " a: ffff .short 0xffff\n" + " c: 4b01 ldr r3, [pc, #4]\n" + " e: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 10: 449f add pc, r3\n" + " 12: 46c0 nop\n" + " 14: 0fec lsrs r4, r5, #31\n" + " 16: 0000 movs r0, r0\n" + " 18: 6987 ldr r7, [r0, #24]\n" + " 1a: 2f4b cmp r7, #75\n" + " 1c: d006 beq.n 0x2c\n" + " 1e: 2f0b cmp r7, #11\n" + " 20: d004 beq.n 0x2c\n" + " 22: 4e01 ldr r6, [pc, #4]\n" + " 24: 447e add r6, pc\n" + " 26: 4730 bx r6\n" + " 28: 0fd9 lsrs r1, r3, #31\n" + " 2a: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1823,32 +1949,45 @@ is_boolean_far_known_unaligned_test() -> TempState = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), TempStream = jit_stream_binary:append(?BACKEND:stream(TempState), PaddingInstruction), State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, TempStream), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, - State1 = ?BACKEND:add_label(State0, Label, 16#1000), - {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), - State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + State2 = ?BACKEND:add_label(State1, Label, 16#1000), + {State3, Reg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + State4 = ?BACKEND:if_block(State3, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> ?BACKEND:jump_to_label(BSt1, Label) end) end), - State4 = ?BACKEND:free_native_registers(State3, [Reg]), - ?BACKEND:assert_all_native_free(State4), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State5 = ?BACKEND:free_native_registers(State4, [Reg]), + ?BACKEND:assert_all_native_free(State5), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << " 0: 4770 bx lr\n" - " 2: 6987 ldr r7, [r0, #24]\n" - " 4: 2f4b cmp r7, #75 ; 0x4b\n" - " 6: d007 beq.n 0x18\n" - " 8: 2f0b cmp r7, #11\n" - " a: d005 beq.n 0x18\n" - " c: 4e01 ldr r6, [pc, #4] ; (0x14)\n" - " e: 447e add r6, pc\n" - " 10: 4730 bx r6\n" - " 12: 46c0 nop ; (mov r8, r8)\n" - " 14: 0fef lsrs r7, r5, #31\n" - " 16: 0000 movs r0, r0" + " 2: 4b01 ldr r3, [pc, #4]\n" + " 4: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 6: 449f add pc, r3\n" + " 8: 46c0 nop\n" + " a: ffff .short 0xffff\n" + " c: ffff .short 0xffff\n" + " e: 4b01 ldr r3, [pc, #4]\n" + " 10: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 12: 449f add pc, r3\n" + " 14: 46c0 nop\n" + " 16: 0fea lsrs r2, r5, #31\n" + " 18: 0000 movs r0, r0\n" + " 1a: 6987 ldr r7, [r0, #24]\n" + " 1c: 2f4b cmp r7, #75\n" + " 1e: d007 beq.n 0x30\n" + " 20: 2f0b cmp r7, #11\n" + " 22: d005 beq.n 0x30\n" + " 24: 4e01 ldr r6, [pc, #4]\n" + " 26: 447e add r6, pc\n" + " 28: 4730 bx r6\n" + " 2a: 46c0 nop\n" + " 2c: 0fd7 lsrs r7, r2, #31\n" + " 2e: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1879,48 +2018,46 @@ wait_timeout_test() -> Stream = ?BACKEND:stream(State10), Dump = << - " 0: a707 add r7, pc, #28 ; (adr r7, 0x22)\n" + " 0: a706 add r7, pc, #24 ; (adr r7, 0x1c)\n" " 2: 3701 adds r7, #1\n" " 4: 9e00 ldr r6, [sp, #0]\n" " 6: 6077 str r7, [r6, #4]\n" - " 8: 4f00 ldr r7, [pc, #0] ; (0xc)\n" - " a: e001 b.n 0x10\n" - " c: 1388 asrs r0, r1, #14\n" - " e: 0000 movs r0, r0\n" - " 10: 6f96 ldr r6, [r2, #120] ; 0x78\n" - " 14: 463a mov r2, r7\n" - " 16: 232a movs r3, #42 ; 0x2a\n" - " 18: 9f05 ldr r7, [sp, #20]\n" - " 1a: 9605 str r6, [sp, #20]\n" - " 1c: 46be mov lr, r7\n" - " 1e: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 20: 46c0 nop ; (mov r8, r8)\n" - " 22: b5f2 push {r1, r4, r5, r6, r7, lr}\n" - " 24: 6d57 ldr r7, [r2, #84] ; 0x54\n" - " 26: b405 push {r0, r2}\n" - " 28: 9902 ldr r1, [sp, #8]\n" - " 2a: 47b8 blx r7\n" - " 2c: 4607 mov r7, r0\n" - " 2e: bc05 pop {r0, r2}\n" - " 30: 4287 cmp r7, r0\n" - " 32: d001 beq.n 0x38\n" - " 34: 4638 mov r0, r7\n" - " 36: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 38: 2784 movs r7, #132 ; 0x84\n" - " 3a: 59d7 ldr r7, [r2, r7]\n" - " 3c: b405 push {r0, r2}\n" - " 3e: 2102 movs r1, #2\n" - " 40: 47b8 blx r7\n" - " 42: 4607 mov r7, r0\n" - " 44: bc05 pop {r0, r2}\n" - " 46: 2f00 cmp r7, #0\n" - " 48: d105 bne.n 0x56\n" - " 4a: 6fd7 ldr r7, [r2, #124] ; 0x7c\n" - " 4c: 222a movs r2, #42 ; 0x2a\n" - " 4e: 9e05 ldr r6, [sp, #20]\n" - " 50: 9705 str r7, [sp, #20]\n" - " 52: 46b6 mov lr, r6\n" - " 54: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " 8: 4f03 ldr r7, [pc, #12] ; (0x18)\n" + " a: 6f96 ldr r6, [r2, #120] ; 0x78\n" + " c: 463a mov r2, r7\n" + " e: 232a movs r3, #42 ; 0x2a\n" + " 10: 9f05 ldr r7, [sp, #20]\n" + " 12: 9605 str r6, [sp, #20]\n" + " 14: 46be mov lr, r7\n" + " 16: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 18: 1388 asrs r0, r1, #14\n" + " 1a: 0000 movs r0, r0\n" + " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1e: 6d57 ldr r7, [r2, #84] ; 0x54\n" + " 20: b405 push {r0, r2}\n" + " 22: 9902 ldr r1, [sp, #8]\n" + " 24: 47b8 blx r7\n" + " 26: 4607 mov r7, r0\n" + " 28: bc05 pop {r0, r2}\n" + " 2a: 4287 cmp r7, r0\n" + " 2c: d001 beq.n 0x32\n" + " 2e: 4638 mov r0, r7\n" + " 30: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 32: 2784 movs r7, #132 ; 0x84\n" + " 34: 59d7 ldr r7, [r2, r7]\n" + " 36: b405 push {r0, r2}\n" + " 38: 2102 movs r1, #2\n" + " 3a: 47b8 blx r7\n" + " 3c: 4607 mov r7, r0\n" + " 3e: bc05 pop {r0, r2}\n" + " 40: 2f00 cmp r7, #0\n" + " 42: d105 bne.n 0x50\n" + " 44: 6fd7 ldr r7, [r2, #124] ; 0x7c\n" + " 46: 222a movs r2, #42 ; 0x2a\n" + " 48: 9e05 ldr r6, [sp, #20]\n" + " 4a: 9705 str r7, [sp, #20]\n" + " 4c: 46b6 mov lr, r6\n" + " 4e: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1928,102 +2065,106 @@ wait_timeout_test() -> wait_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), - State1 = ?BACKEND:jump_table(State0, 5), + State1 = ?BACKEND:jump_table(State0, 2), State2 = ?BACKEND:add_label(State1, 1), Label = 2, State3 = ?BACKEND:set_continuation_to_label(State2, Label), State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + State5 = ?BACKEND:add_label(State4, 2), + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:update_branches(State6), - Stream = ?BACKEND:stream(State4), + Stream = ?BACKEND:stream(State7), Dump = << " 0: 4b01 ldr r3, [pc, #4] ; (0x8)\n" " 2: b5f2 push {r1, r4, r5, r6, r7, lr}\n" " 4: 449f add pc, r3\n" " 6: 46c0 nop ; (mov r8, r8)\n" - " 8: 0000 movs r0, r0\n" + " 8: 0034 movs r4, r6\n" " a: 0000 movs r0, r0\n" " c: 4b01 ldr r3, [pc, #4] ; (0x14)\n" " e: b5f2 push {r1, r4, r5, r6, r7, lr}\n" " 10: 449f add pc, r3\n" " 12: 46c0 nop ; (mov r8, r8)\n" - " 14: 0000 movs r0, r0\n" + " 14: 0010 movs r0, r2\n" " 16: 0000 movs r0, r0\n" " 18: 4b01 ldr r3, [pc, #4] ; (0x20)\n" " 1a: b5f2 push {r1, r4, r5, r6, r7, lr}\n" " 1c: 449f add pc, r3\n" " 1e: 46c0 nop ; (mov r8, r8)\n" - " 20: 0000 movs r0, r0\n" + " 20: 001c movs r4, r3\n" " 22: 0000 movs r0, r0\n" - " 24: 4b01 ldr r3, [pc, #4] ; (0x2c)\n" - " 26: b5f2 push {r1, r4, r5, r6, r7, lr}\n" - " 28: 449f add pc, r3\n" - " 2a: 46c0 nop ; (mov r8, r8)\n" - " 2c: 0000 movs r0, r0\n" - " 2e: 0000 movs r0, r0\n" - " 30: 4b01 ldr r3, [pc, #4] ; (0x38)\n" - " 32: b5f2 push {r1, r4, r5, r6, r7, lr}\n" - " 34: 449f add pc, r3\n" - " 36: 46c0 nop ; (mov r8, r8)\n" - " 38: 0000 movs r0, r0\n" - " 3a: 0000 movs r0, r0\n" - " 3c: 4b01 ldr r3, [pc, #4] ; (0x44)\n" - " 3e: b5f2 push {r1, r4, r5, r6, r7, lr}\n" - " 40: 449f add pc, r3\n" - " 42: 46c0 nop ; (mov r8, r8)\n" - " 44: 0000 movs r0, r0\n" - " 46: 0000 movs r0, r0\n" - " 48: a700 add r7, pc, #0 ; (adr r7, 0x4c)\n" - " 4a: 2633 movs r6, #51 ; 0x33\n" - " 4c: 4276 negs r6, r6\n" - " 4e: 19f6 adds r6, r6, r7\n" - " 50: 9f00 ldr r7, [sp, #0]\n" - " 52: 607e str r6, [r7, #4]\n" - " 54: 6f57 ldr r7, [r2, #116] ; 0x74\n" - " 56: 9e05 ldr r6, [sp, #20]\n" - " 58: 9705 str r7, [sp, #20]\n" - " 5a: 46b6 mov lr, r6\n" - " 5c: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " 24: a700 add r7, pc, #0 ; (adr r7, 0x28)\n" + " 26: 260f movs r6, #15\n" + " 28: 4276 negs r6, r6\n" + " 2a: 19f6 adds r6, r6, r7\n" + " 2c: 9f00 ldr r7, [sp, #0]\n" + " 2e: 607e str r6, [r7, #4]\n" + " 30: 6f57 ldr r7, [r2, #116] ; 0x74\n" + " 32: 9e05 ldr r6, [sp, #20]\n" + " 34: 9705 str r7, [sp, #20]\n" + " 36: 46b6 mov lr, r6\n" + " 38: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 3a: 46c0 nop ; (mov r8, r8)" >>, ?assertEqual(dump_to_bin(Dump), Stream). %% Test return_labels_and_lines/2 function return_labels_and_lines_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), % Test return_labels_and_lines with some sample labels and lines - State1 = ?BACKEND:add_label(State0, 2, 32), - State2 = ?BACKEND:add_label(State1, 1, 16), + State2 = ?BACKEND:add_label(State1, 2, 32), + State3 = ?BACKEND:add_label(State2, 1, 16), % {Line, Offset} pairs SortedLines = [{10, 16}, {20, 32}], - State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines), - Stream = ?BACKEND:stream(State3), + State4 = ?BACKEND:return_labels_and_lines(State3, SortedLines), + Stream = ?BACKEND:stream(State4), % Should have generated adr + pop {r1,r4,r5,r6,r7,pc} + labels table + lines table % adr = 4 bytes, pop = 2 bytes, labels table = 6*2 = 12 bytes, lines table = 6*2 = 12 bytes % Total minimum: 30 bytes ?assert(byte_size(Stream) >= 30), - % Expected: adr r0, + pop {r1,r4,r5,r6,r7,pc} + labels table + lines table - % The data tables start at offset 4, so adr should be adr r0, 4 not adr r0, 8 + % Expected: jump table (3 entries) + adr r0, + pop {r1,r4,r5,r6,r7,pc} + labels table + lines table Dump = << - " 0: a000 add r0, pc, #0 ; (adr r0, 0x4)\n" - " 2: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 4: 0200 lsls r0, r0, #8\n" - " 6: 0100 lsls r0, r0, #4\n" - " 8: 0000 movs r0, r0\n" - " a: 1000 asrs r0, r0, #32\n" - " c: 0200 lsls r0, r0, #8\n" - " e: 0000 movs r0, r0\n" - " 10: 2000 movs r0, #0\n" - " 12: 0200 lsls r0, r0, #8\n" - " 14: 0a00 lsrs r0, r0, #8\n" - " 16: 0000 movs r0, r0\n" - " 18: 1000 asrs r0, r0, #32\n" - " 1a: 1400 asrs r0, r0, #16\n" - " 1c: 0000 movs r0, r0\n" - " 1e: 2000 movs r0, #0" + " 0: 4b01 ldr r3, [pc, #4]\n" + " 2: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 4: 449f add pc, r3\n" + " 6: 46c0 nop\n" + " 8: ffff .short 0xffff\n" + " a: ffff .short 0xffff\n" + " c: 4b01 ldr r3, [pc, #4]\n" + " e: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 10: 449f add pc, r3\n" + " 12: 46c0 nop\n" + " 14: fffc .short 0xfffc\n" + " 16: ffff .short 0xffff\n" + " 18: 4b01 ldr r3, [pc, #4]\n" + " 1a: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1c: 449f add pc, r3\n" + " 1e: 46c0 nop\n" + " 20: 0000 movs r0, r0\n" + " 22: 0000 movs r0, r0\n" + " 24: a000 add r0, pc, #0\n" + " 26: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 28: 0200 lsls r0, r0, #8\n" + " 2a: 0100 lsls r0, r0, #4\n" + " 2c: 0000 movs r0, r0\n" + " 2e: 1000 asrs r0, r0, #32\n" + " 30: 0200 lsls r0, r0, #8\n" + " 32: 0000 movs r0, r0\n" + " 34: 2000 movs r0, #0\n" + " 36: 0200 lsls r0, r0, #8\n" + " 38: 0a00 lsrs r0, r0, #8\n" + " 3a: 0000 movs r0, r0\n" + " 3c: 1000 asrs r0, r0, #32\n" + " 3e: 1400 asrs r0, r0, #16\n" + " 40: 0000 movs r0, r0\n" + " 42: 2000 movs r0, #0" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2035,36 +2176,55 @@ return_labels_and_lines_unaligned_test() -> TempState = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), TempStream = jit_stream_binary:append(?BACKEND:stream(TempState), PaddingInstruction), State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, TempStream), + State1 = ?BACKEND:jump_table(State0, 2), % Test return_labels_and_lines with some sample labels and lines - State1 = ?BACKEND:add_label(State0, 2, 32), - State2 = ?BACKEND:add_label(State1, 1, 16), + State2 = ?BACKEND:add_label(State1, 2, 32), + State3 = ?BACKEND:add_label(State2, 1, 16), % {Line, Offset} pairs SortedLines = [{10, 16}, {20, 32}], - State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines), - Stream = ?BACKEND:stream(State3), + State4 = ?BACKEND:return_labels_and_lines(State3, SortedLines), + Stream = ?BACKEND:stream(State4), Dump = << " 0: 4770 bx lr\n" - "2: a001 add r0, pc, #4 ; (adr r0, 0x8)\n" - "4: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - "6: 0000 movs r0, r0\n" - "8: 0200 lsls r0, r0, #8\n" - "a: 0100 lsls r0, r0, #4\n" - "c: 0000 movs r0, r0\n" - "e: 1000 asrs r0, r0, #32\n" - "10: 0200 lsls r0, r0, #8\n" - "12: 0000 movs r0, r0\n" - "14: 2000 movs r0, #0\n" - "16: 0200 lsls r0, r0, #8\n" - "18: 0a00 lsrs r0, r0, #8\n" - "1a: 0000 movs r0, r0\n" - "1c: 1000 asrs r0, r0, #32\n" - "1e: 1400 asrs r0, r0, #16\n" - "20: 0000 movs r0, r0\n" - "22: 2000 movs r0, #0" + " 2: 4b01 ldr r3, [pc, #4]\n" + " 4: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 6: 449f add pc, r3\n" + " 8: 46c0 nop\n" + " a: ffff .short 0xffff\n" + " c: ffff .short 0xffff\n" + " e: 4b01 ldr r3, [pc, #4]\n" + " 10: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 12: 449f add pc, r3\n" + " 14: 46c0 nop\n" + " 16: fffa .short 0xfffa\n" + " 18: ffff .short 0xffff\n" + " 1a: 4b01 ldr r3, [pc, #4]\n" + " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1e: 449f add pc, r3\n" + " 20: 46c0 nop\n" + " 22: fffe .short 0xfffe\n" + " 24: ffff .short 0xffff\n" + " 26: a001 add r0, pc, #4\n" + " 28: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 2a: 0000 movs r0, r0\n" + " 2c: 0200 lsls r0, r0, #8\n" + " 2e: 0100 lsls r0, r0, #4\n" + " 30: 0000 movs r0, r0\n" + " 32: 1000 asrs r0, r0, #32\n" + " 34: 0200 lsls r0, r0, #8\n" + " 36: 0000 movs r0, r0\n" + " 38: 2000 movs r0, #0\n" + " 3a: 0200 lsls r0, r0, #8\n" + " 3c: 0a00 lsrs r0, r0, #8\n" + " 3e: 0000 movs r0, r0\n" + " 40: 1000 asrs r0, r0, #32\n" + " 42: 1400 asrs r0, r0, #16\n" + " 44: 0000 movs r0, r0\n" + " 46: 2000 movs r0, #0" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2187,7 +2347,7 @@ call_fun_test() -> ]) end ), - State5 = ?BACKEND:and_(State4, RegCopy, ?TERM_PRIMARY_CLEAR_MASK), + {State5, RegCopy} = ?BACKEND:and_(State4, {free, RegCopy}, ?TERM_PRIMARY_CLEAR_MASK), State6 = ?BACKEND:move_array_element(State5, RegCopy, 0, RegCopy), State7 = ?BACKEND:if_block( State6, {RegCopy, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) -> @@ -2224,55 +2384,55 @@ call_fun_test() -> " 24: 2403 movs r4, #3\n" " 26: 4025 ands r5, r4\n" " 28: 2d02 cmp r5, #2\n" - " 2a: d00c beq.n 0x46\n" + " 2a: d00b beq.n 0x44\n" " 2c: 6cd7 ldr r7, [r2, #76] ; 0x4c\n" " 2e: b082 sub sp, #8\n" " 30: 9600 str r6, [sp, #0]\n" " 32: 9902 ldr r1, [sp, #8]\n" " 34: 222e movs r2, #46 ; 0x2e\n" - " 36: 4b01 ldr r3, [pc, #4] ; (0x3c)\n" - " 38: e002 b.n 0x40\n" - " 3a: 0000 movs r0, r0\n" - " 3c: 018b lsls r3, r1, #6\n" + " 36: 4b02 ldr r3, [pc, #8] ; (0x40)\n" + " 38: 47b8 blx r7\n" + " 3a: b002 add sp, #8\n" + " 3c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" " 3e: 0000 movs r0, r0\n" - " 40: 47b8 blx r7\n" - " 42: b002 add sp, #8\n" - " 44: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 46: 2503 movs r5, #3\n" - " 48: 43ae bics r6, r5\n" - " 4a: 6836 ldr r6, [r6, #0]\n" - " 4c: 4635 mov r5, r6\n" - " 4e: 243f movs r4, #63 ; 0x3f\n" - " 50: 4025 ands r5, r4\n" - " 52: 2d14 cmp r5, #20\n" - " 54: d00b beq.n 0x6e\n" - " 56: 6cd7 ldr r7, [r2, #76] ; 0x4c\n" - " 58: b082 sub sp, #8\n" - " 5a: 9600 str r6, [sp, #0]\n" - " 5c: 9902 ldr r1, [sp, #8]\n" - " 5e: 2258 movs r2, #88 ; 0x58\n" - " 60: 4b00 ldr r3, [pc, #0] ; (0x64)\n" - " 62: e001 b.n 0x68\n" - " 64: 018b lsls r3, r1, #6\n" + " 40: 018b lsls r3, r1, #6\n" + " 42: 0000 movs r0, r0\n" + " 44: 2503 movs r5, #3\n" + " 46: 43ae bics r6, r5\n" + " 48: 6836 ldr r6, [r6, #0]\n" + " 4a: 4635 mov r5, r6\n" + " 4c: 243f movs r4, #63 ; 0x3f\n" + " 4e: 4025 ands r5, r4\n" + " 50: 2d14 cmp r5, #20\n" + " 52: d00b beq.n 0x6c\n" + " 54: 6cd7 ldr r7, [r2, #76] ; 0x4c\n" + " 56: b082 sub sp, #8\n" + " 58: 9600 str r6, [sp, #0]\n" + " 5a: 9902 ldr r1, [sp, #8]\n" + " 5c: 2256 movs r2, #86 ; 0x56\n" + " 5e: 4b02 ldr r3, [pc, #8] ; (0x68)\n" + " 60: 47b8 blx r7\n" + " 62: b002 add sp, #8\n" + " 64: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" " 66: 0000 movs r0, r0\n" - " 68: 47b8 blx r7\n" - " 6a: b002 add sp, #8\n" - " 6c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 6e: 9d00 ldr r5, [sp, #0]\n" - " 70: 682e ldr r6, [r5, #0]\n" - " 72: 6836 ldr r6, [r6, #0]\n" - " 74: 0636 lsls r6, r6, #24\n" - " 76: 4d05 ldr r5, [pc, #20] ; (0x8c)\n" - " 78: 432e orrs r6, r5\n" - " 7a: 65c6 str r6, [r0, #92] ; 0x5c\n" - " 7c: 2680 movs r6, #128 ; 0x80\n" - " 7e: 5996 ldr r6, [r2, r6]\n" - " 80: 463a mov r2, r7\n" - " 82: 2300 movs r3, #0\n" - " 84: 9f05 ldr r7, [sp, #20]\n" - " 86: 9605 str r6, [sp, #20]\n" - " 88: 46be mov lr, r7\n" - " 8a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 68: 018b lsls r3, r1, #6\n" + " 6a: 0000 movs r0, r0\n" + " 6c: 9d00 ldr r5, [sp, #0]\n" + " 6e: 682e ldr r6, [r5, #0]\n" + " 70: 6836 ldr r6, [r6, #0]\n" + " 72: 0636 lsls r6, r6, #24\n" + " 74: 4d05 ldr r5, [pc, #20] ; (0x8c)\n" + " 76: 432e orrs r6, r5\n" + " 78: 65c6 str r6, [r0, #92] ; 0x5c\n" + " 7a: 2680 movs r6, #128 ; 0x80\n" + " 7c: 5996 ldr r6, [r2, r6]\n" + " 7e: 463a mov r2, r7\n" + " 80: 2300 movs r3, #0\n" + " 82: 9f05 ldr r7, [sp, #20]\n" + " 84: 9605 str r6, [sp, #20]\n" + " 86: 46be mov lr, r7\n" + " 88: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 8a: 0000 movs r0, r0\n" " 8c: 0240 lsls r0, r0, #9\n" " 8e: 0000 movs r0, r0\n" " 90: b5f2 push {r1, r4, r5, r6, r7, lr}" @@ -2281,7 +2441,8 @@ call_fun_test() -> move_to_vm_register_test0(State, Source, Dest, Dump) -> State1 = ?BACKEND:move_to_vm_register(State, Source, Dest), - Stream = ?BACKEND:stream(State1), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), ?assertEqual(dump_to_bin(Dump), Stream). move_to_vm_register_test_() -> @@ -2294,88 +2455,101 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, 0, {x_reg, 0}, << " 0: 2700 movs r7, #0\n" - " 2: 6187 str r7, [r0, #24]" + " 2: 6187 str r7, [r0, #24]\n" + " 4: e07c b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {x_reg, extra}, << " 0: 2700 movs r7, #0\n" - " 2: 6587 str r7, [r0, #88] ; 0x58" + " 2: 6587 str r7, [r0, #88] ; 0x58\n" + " 4: e07c b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {ptr, r6}, << " 0: 2700 movs r7, #0\n" - " 2: 6037 str r7, [r6, #0]" + " 2: 6037 str r7, [r6, #0]\n" + " 4: e07c b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {y_reg, 2}, << " 0: 2600 movs r6, #0\n" " 2: 6947 ldr r7, [r0, #20]\n" - " 4: 60be str r6, [r7, #8]" + " 4: 60be str r6, [r7, #8]\n" + " 6: e07b b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {y_reg, 20}, << " 0: 2600 movs r6, #0\n" " 2: 6947 ldr r7, [r0, #20]\n" - " 4: 653e str r6, [r7, #80] ; 0x50" + " 4: 653e str r6, [r7, #80] ; 0x50\n" + " 6: e07b b.n 0x100" >>) end), %% Test: Immediate to x_reg ?_test(begin move_to_vm_register_test0(State0, 42, {x_reg, 0}, << " 0: 272a movs r7, #42 ; 0x2a\n" - " 2: 6187 str r7, [r0, #24]" + " 2: 6187 str r7, [r0, #24]\n" + " 4: e07c b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {x_reg, extra}, << " 0: 272a movs r7, #42 ; 0x2a\n" - " 2: 6587 str r7, [r0, #88] ; 0x58" + " 2: 6587 str r7, [r0, #88] ; 0x58\n" + " 4: e07c b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 2}, << " 0: 262a movs r6, #42 ; 0x2a\n" " 2: 6947 ldr r7, [r0, #20]\n" - " 4: 60be str r6, [r7, #8]" + " 4: 60be str r6, [r7, #8]\n" + " 6: e07b b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 20}, << " 0: 262a movs r6, #42 ; 0x2a\n" " 2: 6947 ldr r7, [r0, #20]\n" - " 4: 653e str r6, [r7, #80] ; 0x50" + " 4: 653e str r6, [r7, #80] ; 0x50\n" + " 6: e07b b.n 0x100" >>) end), %% Test: Immediate to ptr ?_test(begin move_to_vm_register_test0(State0, 99, {ptr, r3}, << " 0: 2763 movs r7, #99 ; 0x63\n" - " 2: 601f str r7, [r3, #0]" + " 2: 601f str r7, [r3, #0]\n" + " 4: e07c b.n 0x100" >>) end), %% Test: x_reg to x_reg ?_test(begin move_to_vm_register_test0(State0, {x_reg, 1}, {x_reg, 2}, << " 0: 69c7 ldr r7, [r0, #28]\n" - " 2: 6207 str r7, [r0, #32]" + " 2: 6207 str r7, [r0, #32]\n" + " 4: e07c b.n 0x100" >>) end), %% Test: x_reg to ptr ?_test(begin move_to_vm_register_test0(State0, {x_reg, 1}, {ptr, r1}, << " 0: 69c7 ldr r7, [r0, #28]\n" - " 2: 600f str r7, [r1, #0]" + " 2: 600f str r7, [r1, #0]\n" + " 4: e07c b.n 0x100" >>) end), %% Test: ptr to x_reg ?_test(begin move_to_vm_register_test0(State0, {ptr, r4}, {x_reg, 3}, << " 0: 6827 ldr r7, [r4, #0]\n" - " 2: 6247 str r7, [r0, #36] ; 0x24" + " 2: 6247 str r7, [r0, #36] ; 0x24\n" + " 4: e07c b.n 0x100" >>) end), %% Test: x_reg to y_reg @@ -2383,7 +2557,8 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, {x_reg, 0}, {y_reg, 1}, << " 0: 6987 ldr r7, [r0, #24]\n" " 2: 6946 ldr r6, [r0, #20]\n" - " 4: 6077 str r7, [r6, #4]" + " 4: 6077 str r7, [r6, #4]\n" + " 6: e07b b.n 0x100" >>) end), %% Test: y_reg to x_reg @@ -2391,7 +2566,8 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, << " 0: 6946 ldr r6, [r0, #20]\n" " 2: 6837 ldr r7, [r6, #0]\n" - " 4: 6247 str r7, [r0, #36] ; 0x24" + " 4: 6247 str r7, [r0, #36] ; 0x24\n" + " 6: e07b b.n 0x100" >>) end), %% Test: y_reg to y_reg @@ -2399,41 +2575,47 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, << " 0: 6946 ldr r6, [r0, #20]\n" " 2: 6877 ldr r7, [r6, #4]\n" - " 4: 6247 str r7, [r0, #36] ; 0x24" + " 4: 6247 str r7, [r0, #36] ; 0x24\n" + " 6: e07b b.n 0x100" >>) end), %% Test: Native register to x_reg ?_test(begin move_to_vm_register_test0(State0, r5, {x_reg, 0}, << - " 0: 6185 str r5, [r0, #24]" + " 0: 6185 str r5, [r0, #24]\n" + " 2: e07d b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, r6, {x_reg, extra}, << - " 0: 6586 str r6, [r0, #88] ; 0x58" + " 0: 6586 str r6, [r0, #88] ; 0x58\n" + " 2: e07d b.n 0x100" >>) end), %% Test: Native register to ptr ?_test(begin move_to_vm_register_test0(State0, r4, {ptr, r3}, << - " 0: 601c str r4, [r3, #0]" + " 0: 601c str r4, [r3, #0]\n" + " 2: e07d b.n 0x100" >>) end), %% Test: Native register to y_reg ?_test(begin move_to_vm_register_test0(State0, r1, {y_reg, 0}, << " 0: 6947 ldr r7, [r0, #20]\n" - " 2: 6039 str r1, [r7, #0]" + " 2: 6039 str r1, [r7, #0]\n" + " 4: e07c b.n 0x100" >>) end), %% Test: Large immediate to x_reg (32-bit literal pool, aligned case) ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {x_reg, 0}, << - " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" - " 4: 5678 ldrsb r0, [r7, r1]\n" - " 6: 1234 asrs r4, r6, #8\n" - " 8: 6187 str r7, [r0, #24]" + " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 2: 6187 str r7, [r0, #24]\n" + " 4: e07c b.n 0x100\n" + " 6: 0000 movs r0, r0\n" + " 8: 5678 ldrsb r0, [r7, r1]\n" + " a: 1234 asrs r4, r6, #8" >>) end), %% Test: Large immediate to x_reg (32-bit literal pool, unaligned case) @@ -2442,55 +2624,57 @@ move_to_vm_register_test_() -> State1 = ?BACKEND:move_to_vm_register(State0, r1, {ptr, r3}), %% Then do large immediate which should handle unaligned case State2 = ?BACKEND:move_to_vm_register(State1, 16#12345678, {x_reg, 0}), - Stream = ?BACKEND:stream(State2), + State3 = ?BACKEND:jump_to_offset(State2, 16#100), + Stream = ?BACKEND:stream(State3), Expected = dump_to_bin(<< " 0: 6019 str r1, [r3, #0]\n" " 2: 4f01 ldr r7, [pc, #4] ; (0x8)\n" - " 4: e002 b.n 0xc\n" - " 6: 0000 movs r0, r0\n" + " 4: 6187 str r7, [r0, #24]\n" + " 6: e07b b.n 0x100\n" " 8: 5678 ldrsb r0, [r7, r1]\n" - " a: 1234 asrs r4, r6, #8\n" - " c: 6187 str r7, [r0, #24]" + " a: 1234 asrs r4, r6, #8" >>), ?assertEqual(Expected, Stream) end), ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {x_reg, extra}, << - " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" - " 4: 5678 ldrsb r0, [r7, r1]\n" - " 6: 1234 asrs r4, r6, #8\n" - " 8: 6587 str r7, [r0, #88] ; 0x58" + " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 2: 6587 str r7, [r0, #88] ; 0x58\n" + " 4: e07c b.n 0x100\n" + " 6: 0000 movs r0, r0\n" + " 8: 5678 ldrsb r0, [r7, r1]\n" + " a: 1234 asrs r4, r6, #8" >>) end), ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {y_reg, 2}, << - " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" - " 4: 5678 ldrsb r0, [r7, r1]\n" - " 6: 1234 asrs r4, r6, #8\n" - " 8: 6946 ldr r6, [r0, #20]\n" - " a: 60b7 str r7, [r6, #8]" + " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 2: 6946 ldr r6, [r0, #20]\n" + " 4: 60b7 str r7, [r6, #8]\n" + " 6: e07b b.n 0x100\n" + " 8: 5678 ldrsb r0, [r7, r1]\n" + " a: 1234 asrs r4, r6, #8" >>) end), ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {y_reg, 20}, << - " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" - " 4: 5678 ldrsb r0, [r7, r1]\n" - " 6: 1234 asrs r4, r6, #8\n" - " 8: 6946 ldr r6, [r0, #20]\n" - " a: 6537 str r7, [r6, #80] ; 0x50" + " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 2: 6946 ldr r6, [r0, #20]\n" + " 4: 6537 str r7, [r6, #80] ; 0x50\n" + " 6: e07b b.n 0x100\n" + " 8: 5678 ldrsb r0, [r7, r1]\n" + " a: 1234 asrs r4, r6, #8" >>) end), %% Test: Large immediate to ptr ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {ptr, r3}, << - " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" - " 4: 5678 ldrsb r0, [r7, r1]\n" - " 6: 1234 asrs r4, r6, #8\n" - " 8: 601f str r7, [r3, #0]" + " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 2: 601f str r7, [r3, #0]\n" + " 4: e07c b.n 0x100\n" + " 6: 0000 movs r0, r0\n" + " 8: 5678 ldrsb r0, [r7, r1]\n" + " a: 1234 asrs r4, r6, #8" >>) end), %% Test: x_reg to y_reg (high index) @@ -2498,7 +2682,8 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, {x_reg, 15}, {y_reg, 31}, << " 0: 6d47 ldr r7, [r0, #84] ; 0x54\n" " 2: 6946 ldr r6, [r0, #20]\n" - " 4: 67f7 str r7, [r6, #124] ; 0x7c" + " 4: 67f7 str r7, [r6, #124] ; 0x7c\n" + " 6: e07b b.n 0x100" >>) end), %% Test: y_reg to x_reg (high index) @@ -2506,7 +2691,8 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, << " 0: 6946 ldr r6, [r0, #20]\n" " 2: 6ff7 ldr r7, [r6, #124] ; 0x7c\n" - " 4: 6547 str r7, [r0, #84] ; 0x54" + " 4: 6547 str r7, [r0, #84] ; 0x54\n" + " 6: e07b b.n 0x100" >>) end), %% Test: Large y_reg index (32) that exceeds str immediate offset limit @@ -2516,7 +2702,8 @@ move_to_vm_register_test_() -> " 2: 6947 ldr r7, [r0, #20]\n" " 4: 2580 movs r5, #128 ; 0x80\n" " 6: 443d add r5, r7\n" - " 8: 602e str r6, [r5, #0]" + " 8: 602e str r6, [r5, #0]\n" + " a: e079 b.n 0x100" >>) end), %% Test: Negative immediate to x_reg @@ -2524,7 +2711,8 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, -1, {x_reg, 0}, << " 0: 2701 movs r7, #1\n" " 2: 427f negs r7, r7\n" - " 4: 6187 str r7, [r0, #24]" + " 4: 6187 str r7, [r0, #24]\n" + " 6: e07b b.n 0x100" >>) end) ] @@ -2624,6 +2812,34 @@ move_array_element_test_() -> " 2: 68be ldr r6, [r7, #8]\n" " 4: 62c6 str r6, [r0, #44] ; 0x2c" >>) + end), + %% move_array_element: reg[32] to x_reg (large offset, index 32, offset 128) + ?_test(begin + move_array_element_test0(State0, r3, 32, {x_reg, 0}, << + " 0: 2704 movs r7, #4\n" + " 2: 441f add r7, r3\n" + " 4: 6ffe ldr r6, [r7, #124] ; 0x7c\n" + " 6: 6186 str r6, [r0, #24]" + >>) + end), + %% move_array_element: reg[32] to ptr (large offset) + ?_test(begin + move_array_element_test0(State0, r3, 32, {ptr, r5}, << + " 0: 2704 movs r7, #4\n" + " 2: 441f add r7, r3\n" + " 4: 6fff ldr r7, [r7, #124] ; 0x7c\n" + " 6: 602f str r7, [r5, #0]" + >>) + end), + %% move_array_element: reg[32] to y_reg (large offset) + ?_test(begin + move_array_element_test0(State0, r3, 32, {y_reg, 2}, << + " 0: 2604 movs r6, #4\n" + " 2: 441e add r6, r3\n" + " 4: 6ff6 ldr r6, [r6, #124] ; 0x7c\n" + " 6: 6947 ldr r7, [r0, #20]\n" + " 8: 60be str r6, [r7, #8]" + >>) end) ] end}. @@ -2644,6 +2860,19 @@ get_array_element_test_() -> >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual(r7, Reg) + end), + %% get_array_element: reg[x] with large offset (index 32, offset 128) + %% For offset 128, we use ldr with max offset 124 + temp register for remainder (4) + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, r4, 32), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 2604 movs r6, #4\n" + " 2: 4426 add r6, r4\n" + " 4: 6ff7 ldr r7, [r6, #124] ; 0x7c" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(r7, Reg) end) ] end}. @@ -2665,6 +2894,18 @@ move_to_array_element_test_() -> >>, ?assertEqual(dump_to_bin(Dump), Stream) end), + %% move_to_array_element/4: x_reg to reg[x], larger immediate offset + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r3, 32), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 2604 movs r6, #4\n" + " 4: 441e add r6, r3\n" + " 6: 67f7 str r7, [r6, #124] ; 0x7c" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), %% move_to_array_element/4: x_reg to reg[reg] ?_test(begin State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r3, r4), @@ -2714,8 +2955,8 @@ move_to_array_element_test_() -> end), %% move_to_array_element/5: x_reg to reg[x+offset] ?_test(begin - State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r3, r4]), - State2 = setelement(7, State1, [r3, r4]), + State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [r3, r4]), + State2 = setelement(8, State1, [r3, r4]), [r3, r4] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, r3, r4, 1), Stream = ?BACKEND:stream(State3), @@ -2729,8 +2970,8 @@ move_to_array_element_test_() -> end), %% move_to_array_element/5: imm to reg[x+offset] ?_test(begin - State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r3, r4]), - State2 = setelement(7, State1, [r3, r4]), + State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [r3, r4]), + State2 = setelement(8, State1, [r3, r4]), [r3, r4] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, 42, r3, r4, 1), Stream = ?BACKEND:stream(State3), @@ -2787,11 +3028,12 @@ move_to_native_register_test_() -> %% move_to_native_register/2: -256 (boundary case, should use literal pool) ?_test(begin {State1, Reg} = ?BACKEND:move_to_native_register(State0, -256), - Stream = ?BACKEND:stream(State1), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), ?assertEqual(r7, Reg), Dump = << " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" + " 2: e07d b.n 0x100\n" " 4: ff00 ffff vmaxnm.f32 , q8, " >>, ?assertEqual(dump_to_bin(Dump), Stream) @@ -2895,7 +3137,9 @@ move_to_native_register_test_() -> add_test0(State0, Reg, Imm, Dump) -> State1 = ?BACKEND:add(State0, Reg, Imm), - Stream = ?BACKEND:stream(State1), + % Force emission of literal pool + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), ?assertEqual(dump_to_bin(Dump), Stream). add_test_() -> @@ -2907,21 +3151,24 @@ add_test_() -> [ ?_test(begin add_test0(State0, r2, 2, << - " 0: 3202 adds r2, #2" + " 0: 3202 adds r2, #2\n" + " 2: e07d b.n 0x100" >>) end), ?_test(begin add_test0(State0, r2, 256, << - " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" - " 4: 0100 lsls r0, r0, #4\n" + " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 2: 19d2 adds r2, r2, r7\n" + " 4: e07c b.n 0x100\n" " 6: 0000 movs r0, r0\n" - " 8: 19d2 adds r2, r2, r7" + " 8: 0100 lsls r0, r0, #4\n" + " a: 0000 movs r0, r0" >>) end), ?_test(begin add_test0(State0, r2, r3, << - " 0: 18d2 adds r2, r2, r3" + " 0: 18d2 adds r2, r2, r3\n" + " 2: e07d b.n 0x100" >>) end) ] @@ -2929,7 +3176,9 @@ add_test_() -> sub_test0(State0, Reg, Imm, Dump) -> State1 = ?BACKEND:sub(State0, Reg, Imm), - Stream = ?BACKEND:stream(State1), + % Force emission of literal pool + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), ?assertEqual(dump_to_bin(Dump), Stream). sub_test_() -> @@ -2941,21 +3190,24 @@ sub_test_() -> [ ?_test(begin sub_test0(State0, r2, 2, << - " 0: 3a02 subs r2, #2" + " 0: 3a02 subs r2, #2\n" + " 2: e07d b.n 0x100" >>) end), ?_test(begin sub_test0(State0, r2, 256, << - " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" - " 4: 0100 lsls r0, r0, #4\n" + " 0: 4f01 ldr r7, [pc, #4] @ (0xc)\n" + " 2: 1bd2 subs r2, r2, r7\n" + " 4: e07c b.n 0x100\n" " 6: 0000 movs r0, r0\n" - " 8: 1bd2 subs r2, r2, r7" + " 8: 0100 lsls r0, r0, #4\n" + " a: 0000 movs r0, r0" >>) end), ?_test(begin sub_test0(State0, r2, r3, << - " 0: 1ad2 subs r2, r2, r3" + " 0: 1ad2 subs r2, r2, r3\n" + " 2: e07d b.n 0x110" >>) end) ] @@ -3184,7 +3436,7 @@ and_register_exhaustion_negative_test() -> {State5, r3} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), {StateNoRegs, r1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), % Test negative immediate (-4) which should use BICS with r0 as temp - StateResult = ?BACKEND:and_(StateNoRegs, r7, -4), + {StateResult, r7} = ?BACKEND:and_(StateNoRegs, {free, r7}, -4), Stream = ?BACKEND:stream(StateResult), ExpectedDump = << " 0: 6987 ldr r7, [r0, #24]\n" @@ -3210,7 +3462,7 @@ and_register_exhaustion_positive_test() -> {State5, r3} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), {StateNoRegs, r1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), % Test positive immediate (0x3F) which should use ANDS with r0 as temp - StateResult = ?BACKEND:and_(StateNoRegs, r7, 16#3F), + {StateResult, r7} = ?BACKEND:and_(StateNoRegs, {free, r7}, 16#3F), Stream = ?BACKEND:stream(StateResult), ExpectedDump = << " 0: 6987 ldr r7, [r0, #24]\n" @@ -3256,23 +3508,36 @@ alloc_boxed_integer_fragment_large_test() -> {State1, ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_ALLOC_BOXED_INTEGER_FRAGMENT, [ ctx, {avm_int64_t, 16#123456789ABCDEF0} ]), + % Add a call primitive last to emit literal pool + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADMATCH_ATOM, {free, ResultReg} + ]), ?assertEqual(r7, ResultReg), - Stream = ?BACKEND:stream(State1), + Stream = ?BACKEND:stream(State2), Dump = << - " 0: 6bd7 ldr r7, [r2, #60] ; 0x3c\n" + " 0: 6bd7 ldr r7, [r2, #60] @ 0x3c\n" " 2: b405 push {r0, r2}\n" - " 4: 4a00 ldr r2, [pc, #0] ; (0x8)\n" - " 6: e001 b.n 0xc\n" - " 8: def0 udf #240 ; 0xf0\n" - " a: 9abc ldr r2, [sp, #752] ; 0x2f0\n" - " c: 4b00 ldr r3, [pc, #0] ; (0x10)\n" - " e: e001 b.n 0x14\n" - " 10: 5678 ldrsb r0, [r7, r1]\n" - " 12: 1234 asrs r4, r6, #8\n" - " 14: 47b8 blx r7\n" - " 16: 4607 mov r7, r0\n" - " 18: bc05 pop {r0, r2}" + " 4: 4a06 ldr r2, [pc, #24] @ (0x20)\n" + " 6: 4b07 ldr r3, [pc, #28] @ (0x24)\n" + " 8: 47b8 blx r7\n" + " a: 4607 mov r7, r0\n" + " c: bc05 pop {r0, r2}\n" + " e: 6cd6 ldr r6, [r2, #76] @ 0x4c\n" + " 10: b082 sub sp, #8\n" + " 12: 9700 str r7, [sp, #0]\n" + " 14: 9902 ldr r1, [sp, #8]\n" + " 16: 2210 movs r2, #16\n" + " 18: 4b03 ldr r3, [pc, #12] @ (0x28)\n" + " 1a: 47b0 blx r6\n" + " 1c: b002 add sp, #8\n" + " 1e: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 20: def0 udf #240 @ 0xf0\n" + " 22: 9abc ldr r2, [sp, #752] @ 0x2f0\n" + " 24: 5678 ldrsb r0, [r7, r1]\n" + " 26: 1234 asrs r4, r6, #8\n" + " 28: 028b lsls r3, r1, #10\n" + " 2a: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream). diff --git a/tests/libs/jit/jit_riscv32_asm_tests.erl b/tests/libs/jit/jit_riscv32_asm_tests.erl new file mode 100644 index 0000000000..28a0f4fa58 --- /dev/null +++ b/tests/libs/jit/jit_riscv32_asm_tests.erl @@ -0,0 +1,900 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_riscv32_asm_tests). + +-include_lib("eunit/include/eunit.hrl"). + +-define(_assertAsmEqual(Bin, Str, Value), + ?_assertEqual(jit_tests_common:asm(riscv32, Bin, Str), Value) +). + +%%----------------------------------------------------------------------------- +%% R-type arithmetic and logical instruction tests +%%----------------------------------------------------------------------------- + +add_test_() -> + [ + ?_assertAsmEqual( + <<16#00628533:32/little>>, "add a0, t0, t1", jit_riscv32_asm:add(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#95aa:16/little>>, "add a1, a1, a0", jit_riscv32_asm:add(a1, a1, a0) + ), + ?_assertAsmEqual( + <<16#97fa:16/little>>, "add a5, a5, t5", jit_riscv32_asm:add(a5, a5, t5) + ) + ]. + +sub_test_() -> + [ + ?_assertAsmEqual( + <<16#40628533:32/little>>, "sub a0, t0, t1", jit_riscv32_asm:sub(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#8d89:16/little>>, "sub a1, a1, a0", jit_riscv32_asm:sub(a1, a1, a0) + ), + ?_assertAsmEqual( + <<16#41e787b3:32/little>>, "sub a5, a5, t5", jit_riscv32_asm:sub(a5, a5, t5) + ) + ]. + +and_test_() -> + [ + ?_assertAsmEqual( + <<16#0062f533:32/little>>, "and a0, t0, t1", jit_riscv32_asm:and_(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#8df1:16/little>>, "and a1, a1, a2", jit_riscv32_asm:and_(a1, a1, a2) + ) + ]. + +or_test_() -> + [ + ?_assertAsmEqual( + <<16#0062e533:32/little>>, "or a0, t0, t1", jit_riscv32_asm:or_(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#8dd1:16/little>>, "or a1, a1, a2", jit_riscv32_asm:or_(a1, a1, a2) + ) + ]. + +xor_test_() -> + [ + ?_assertAsmEqual( + <<16#0062c533:32/little>>, "xor a0, t0, t1", jit_riscv32_asm:xor_(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#8db1:16/little>>, "xor a1, a1, a2", jit_riscv32_asm:xor_(a1, a1, a2) + ) + ]. + +sll_test_() -> + [ + ?_assertAsmEqual( + <<16#00629533:32/little>>, "sll a0, t0, t1", jit_riscv32_asm:sll(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c59633:32/little>>, "sll a2, a1, a2", jit_riscv32_asm:sll(a2, a1, a2) + ) + ]. + +srl_test_() -> + [ + ?_assertAsmEqual( + <<16#0062d533:32/little>>, "srl a0, t0, t1", jit_riscv32_asm:srl(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c5d633:32/little>>, "srl a2, a1, a2", jit_riscv32_asm:srl(a2, a1, a2) + ) + ]. + +sra_test_() -> + [ + ?_assertAsmEqual( + <<16#4062d533:32/little>>, "sra a0, t0, t1", jit_riscv32_asm:sra(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#40c5d633:32/little>>, "sra a2, a1, a2", jit_riscv32_asm:sra(a2, a1, a2) + ) + ]. + +slt_test_() -> + [ + ?_assertAsmEqual( + <<16#0062a533:32/little>>, "slt a0, t0, t1", jit_riscv32_asm:slt(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c5a633:32/little>>, "slt a2, a1, a2", jit_riscv32_asm:slt(a2, a1, a2) + ) + ]. + +sltu_test_() -> + [ + ?_assertAsmEqual( + <<16#0062b533:32/little>>, "sltu a0, t0, t1", jit_riscv32_asm:sltu(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c5b633:32/little>>, "sltu a2, a1, a2", jit_riscv32_asm:sltu(a2, a1, a2) + ) + ]. + +%%----------------------------------------------------------------------------- +%% I-type immediate instruction tests +%%----------------------------------------------------------------------------- + +addi_test_() -> + [ + ?_assertAsmEqual( + <<16#01428513:32/little>>, "addi a0, t0, 20", jit_riscv32_asm:addi(a0, t0, 20) + ), + ?_assertAsmEqual( + <<16#15fd:16/little>>, "addi a1, a1, -1", jit_riscv32_asm:addi(a1, a1, -1) + ), + ?_assertAsmEqual( + <<16#7ff00513:32/little>>, "addi a0, zero, 2047", jit_riscv32_asm:addi(a0, zero, 2047) + ), + ?_assertAsmEqual( + <<16#80000593:32/little>>, "addi a1, zero, -2048", jit_riscv32_asm:addi(a1, zero, -2048) + ) + ]. + +andi_test_() -> + [ + ?_assertAsmEqual( + <<16#0ff2f513:32/little>>, "andi a0, t0, 255", jit_riscv32_asm:andi(a0, t0, 255) + ), + ?_assertAsmEqual( + <<16#89bd:16/little>>, "andi a1, a1, 15", jit_riscv32_asm:andi(a1, a1, 15) + ) + ]. + +ori_test_() -> + [ + ?_assertAsmEqual( + <<16#0ff2e513:32/little>>, "ori a0, t0, 255", jit_riscv32_asm:ori(a0, t0, 255) + ), + ?_assertAsmEqual( + <<16#00f5e593:32/little>>, "ori a1, a1, 15", jit_riscv32_asm:ori(a1, a1, 15) + ) + ]. + +xori_test_() -> + [ + ?_assertAsmEqual( + <<16#0ff2c513:32/little>>, "xori a0, t0, 255", jit_riscv32_asm:xori(a0, t0, 255) + ), + ?_assertAsmEqual( + <<16#fff5c593:32/little>>, "xori a1, a1, -1", jit_riscv32_asm:xori(a1, a1, -1) + ) + ]. + +slli_test_() -> + [ + ?_assertAsmEqual( + <<16#00329513:32/little>>, "slli a0, t0, 3", jit_riscv32_asm:slli(a0, t0, 3) + ), + ?_assertAsmEqual( + <<16#05fe:16/little>>, "slli a1, a1, 31", jit_riscv32_asm:slli(a1, a1, 31) + ), + ?_assertAsmEqual( + <<16#00051513:32/little>>, "slli a0, a0, 0", jit_riscv32_asm:slli(a0, a0, 0) + ) + ]. + +srli_test_() -> + [ + ?_assertAsmEqual( + <<16#0032d513:32/little>>, "srli a0, t0, 3", jit_riscv32_asm:srli(a0, t0, 3) + ), + ?_assertAsmEqual( + <<16#81fd:16/little>>, "srli a1, a1, 31", jit_riscv32_asm:srli(a1, a1, 31) + ) + ]. + +srai_test_() -> + [ + ?_assertAsmEqual( + <<16#4032d513:32/little>>, "srai a0, t0, 3", jit_riscv32_asm:srai(a0, t0, 3) + ), + ?_assertAsmEqual( + <<16#85fd:16/little>>, "srai a1, a1, 31", jit_riscv32_asm:srai(a1, a1, 31) + ) + ]. + +slti_test_() -> + [ + ?_assertAsmEqual( + <<16#0142a513:32/little>>, "slti a0, t0, 20", jit_riscv32_asm:slti(a0, t0, 20) + ), + ?_assertAsmEqual( + <<16#fff5a593:32/little>>, "slti a1, a1, -1", jit_riscv32_asm:slti(a1, a1, -1) + ) + ]. + +sltiu_test_() -> + [ + ?_assertAsmEqual( + <<16#0142b513:32/little>>, "sltiu a0, t0, 20", jit_riscv32_asm:sltiu(a0, t0, 20) + ), + ?_assertAsmEqual( + <<16#00153513:32/little>>, "sltiu a0, a0, 1", jit_riscv32_asm:sltiu(a0, a0, 1) + ) + ]. + +%%----------------------------------------------------------------------------- +%% Load instruction tests +%%----------------------------------------------------------------------------- + +lw_test_() -> + [ + ?_assertAsmEqual(<<16#4108:16/little>>, "lw a0, 0(a0)", jit_riscv32_asm:lw(a0, a0, 0)), + ?_assertAsmEqual(<<16#4108:16/little>>, "lw a0, 0(a0)", jit_riscv32_asm:lw(a0, a0)), + ?_assertAsmEqual(<<16#414c:16/little>>, "lw a1, 4(a0)", jit_riscv32_asm:lw(a1, a0, 4)), + ?_assertAsmEqual( + <<16#ffc52503:32/little>>, "lw a0, -4(a0)", jit_riscv32_asm:lw(a0, a0, -4) + ), + ?_assertAsmEqual( + <<16#7ff52503:32/little>>, "lw a0, 2047(a0)", jit_riscv32_asm:lw(a0, a0, 2047) + ) + ]. + +lh_test_() -> + [ + ?_assertAsmEqual(<<16#00051503:32/little>>, "lh a0, 0(a0)", jit_riscv32_asm:lh(a0, a0, 0)), + ?_assertAsmEqual(<<16#00051503:32/little>>, "lh a0, 0(a0)", jit_riscv32_asm:lh(a0, a0)), + ?_assertAsmEqual(<<16#00251583:32/little>>, "lh a1, 2(a0)", jit_riscv32_asm:lh(a1, a0, 2)) + ]. + +lhu_test_() -> + [ + ?_assertAsmEqual( + <<16#00055503:32/little>>, "lhu a0, 0(a0)", jit_riscv32_asm:lhu(a0, a0, 0) + ), + ?_assertAsmEqual(<<16#00055503:32/little>>, "lhu a0, 0(a0)", jit_riscv32_asm:lhu(a0, a0)), + ?_assertAsmEqual(<<16#00255583:32/little>>, "lhu a1, 2(a0)", jit_riscv32_asm:lhu(a1, a0, 2)) + ]. + +lb_test_() -> + [ + ?_assertAsmEqual(<<16#00050503:32/little>>, "lb a0, 0(a0)", jit_riscv32_asm:lb(a0, a0, 0)), + ?_assertAsmEqual(<<16#00050503:32/little>>, "lb a0, 0(a0)", jit_riscv32_asm:lb(a0, a0)), + ?_assertAsmEqual(<<16#00150583:32/little>>, "lb a1, 1(a0)", jit_riscv32_asm:lb(a1, a0, 1)) + ]. + +lbu_test_() -> + [ + ?_assertAsmEqual( + <<16#00054503:32/little>>, "lbu a0, 0(a0)", jit_riscv32_asm:lbu(a0, a0, 0) + ), + ?_assertAsmEqual(<<16#00054503:32/little>>, "lbu a0, 0(a0)", jit_riscv32_asm:lbu(a0, a0)), + ?_assertAsmEqual(<<16#00154583:32/little>>, "lbu a1, 1(a0)", jit_riscv32_asm:lbu(a1, a0, 1)) + ]. + +%%----------------------------------------------------------------------------- +%% Store instruction tests +%%----------------------------------------------------------------------------- + +sw_test_() -> + [ + ?_assertAsmEqual(<<16#c10c:16/little>>, "sw a1, 0(a0)", jit_riscv32_asm:sw(a0, a1, 0)), + ?_assertAsmEqual(<<16#c10c:16/little>>, "sw a1, 0(a0)", jit_riscv32_asm:sw(a1, a0)), + ?_assertAsmEqual(<<16#c14c:16/little>>, "sw a1, 4(a0)", jit_riscv32_asm:sw(a0, a1, 4)), + ?_assertAsmEqual(<<16#feb52e23:32/little>>, "sw a1, -4(a0)", jit_riscv32_asm:sw(a0, a1, -4)) + ]. + +sh_test_() -> + [ + ?_assertAsmEqual(<<16#00b51023:32/little>>, "sh a1, 0(a0)", jit_riscv32_asm:sh(a0, a1, 0)), + ?_assertAsmEqual(<<16#00b51023:32/little>>, "sh a1, 0(a0)", jit_riscv32_asm:sh(a1, a0)), + ?_assertAsmEqual(<<16#00b51123:32/little>>, "sh a1, 2(a0)", jit_riscv32_asm:sh(a0, a1, 2)) + ]. + +sb_test_() -> + [ + ?_assertAsmEqual(<<16#00b50023:32/little>>, "sb a1, 0(a0)", jit_riscv32_asm:sb(a0, a1, 0)), + ?_assertAsmEqual(<<16#00b50023:32/little>>, "sb a1, 0(a0)", jit_riscv32_asm:sb(a1, a0)), + ?_assertAsmEqual(<<16#00b500a3:32/little>>, "sb a1, 1(a0)", jit_riscv32_asm:sb(a0, a1, 1)) + ]. + +%%----------------------------------------------------------------------------- +%% Branch instruction tests +%%----------------------------------------------------------------------------- + +beq_test_() -> + [ + ?_assertAsmEqual( + <<16#00628463:32/little>>, "beq t0, t1, .+8", jit_riscv32_asm:beq(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb50ee3:32/little>>, "beq a0, a1, .-4", jit_riscv32_asm:beq(a0, a1, -4) + ), + ?_assertAsmEqual( + <<16#c101:16/little>>, "beq a0, zero, .", jit_riscv32_asm:beq(a0, zero, 0) + ) + ]. + +bne_test_() -> + [ + ?_assertAsmEqual( + <<16#00629463:32/little>>, "bne t0, t1, .+8", jit_riscv32_asm:bne(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb51ee3:32/little>>, "bne a0, a1, .-4", jit_riscv32_asm:bne(a0, a1, -4) + ) + ]. + +blt_test_() -> + [ + ?_assertAsmEqual( + <<16#0062c463:32/little>>, "blt t0, t1, .+8", jit_riscv32_asm:blt(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb54ee3:32/little>>, "blt a0, a1, .-4", jit_riscv32_asm:blt(a0, a1, -4) + ) + ]. + +bge_test_() -> + [ + ?_assertAsmEqual( + <<16#0062d463:32/little>>, "bge t0, t1, .+8", jit_riscv32_asm:bge(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb55ee3:32/little>>, "bge a0, a1, .-4", jit_riscv32_asm:bge(a0, a1, -4) + ) + ]. + +bltu_test_() -> + [ + ?_assertAsmEqual( + <<16#0062e463:32/little>>, "bltu t0, t1, .+8", jit_riscv32_asm:bltu(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb56ee3:32/little>>, "bltu a0, a1, .-4", jit_riscv32_asm:bltu(a0, a1, -4) + ) + ]. + +bgeu_test_() -> + [ + ?_assertAsmEqual( + <<16#0062f463:32/little>>, "bgeu t0, t1, .+8", jit_riscv32_asm:bgeu(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb57ee3:32/little>>, "bgeu a0, a1, .-4", jit_riscv32_asm:bgeu(a0, a1, -4) + ) + ]. + +%%----------------------------------------------------------------------------- +%% Jump instruction tests +%%----------------------------------------------------------------------------- + +jal_test_() -> + [ + ?_assertAsmEqual( + <<16#2021:16/little>>, "jal .+8", jit_riscv32_asm:jal(ra, 8) + ), + ?_assertAsmEqual( + <<16#3ff5:16/little>>, "jal .-4", jit_riscv32_asm:jal(ra, -4) + ), + ?_assertAsmEqual( + <<16#00000517:32/little, 16#9502:16/little>>, + "auipc a0, 0\njalr a0", + jit_riscv32_asm:call(a0, 0) + ), + ?_assertAsmEqual( + <<16#00002517:32/little, 16#800500e7:32/little>>, + "auipc a0, 0x2\njalr -2048(a0)", + jit_riscv32_asm:call(a0, 16#1800) + ) + ]. + +jalr_test_() -> + [ + ?_assertAsmEqual(<<16#9502:16/little>>, "jalr a0", jit_riscv32_asm:jalr(ra, a0, 0)), + ?_assertAsmEqual(<<16#9502:16/little>>, "jalr a0", jit_riscv32_asm:jalr(ra, a0)), + ?_assertAsmEqual(<<16#004500e7:32/little>>, "jalr 4(a0)", jit_riscv32_asm:jalr(ra, a0, 4)) + ]. + +%%----------------------------------------------------------------------------- +%% Upper immediate instruction tests +%%----------------------------------------------------------------------------- + +lui_test_() -> + [ + ?_assertAsmEqual(<<16#65c9:16/little>>, "lui a1, 18", jit_riscv32_asm:lui(a1, 18)), + ?_assertAsmEqual(<<16#6505:16/little>>, "lui a0, 1", jit_riscv32_asm:lui(a0, 1)), + ?_assertAsmEqual(<<16#75fd:16/little>>, "lui a1, 0xfffff", jit_riscv32_asm:lui(a1, -1)) + ]. + +auipc_test_() -> + [ + ?_assertAsmEqual(<<16#00012597:32/little>>, "auipc a1, 18", jit_riscv32_asm:auipc(a1, 18)), + ?_assertAsmEqual(<<16#00001517:32/little>>, "auipc a0, 1", jit_riscv32_asm:auipc(a0, 1)) + ]. + +%%----------------------------------------------------------------------------- +%% Pseudo-instruction tests +%%----------------------------------------------------------------------------- + +nop_test_() -> + [ + % We want a 4-byte NOP for padding, so use .option norvc to force non-compressed + ?_assertAsmEqual(<<16#00000013:32/little>>, ".option norvc\nnop", jit_riscv32_asm:nop()) + ]. + +li_small_test_() -> + [ + ?_assertAsmEqual(<<16#4529:16/little>>, "li a0, 10", jit_riscv32_asm:li(a0, 10)), + ?_assertAsmEqual(<<16#557d:16/little>>, "li a0, -1", jit_riscv32_asm:li(a0, -1)), + ?_assertAsmEqual(<<16#7ff00513:32/little>>, "li a0, 2047", jit_riscv32_asm:li(a0, 2047)) + ]. + +li_large_test_() -> + [ + % 0x12345 = 74565 - requires lui + addi + ?_assertAsmEqual( + <<16#6549:16/little, 16#34550513:32/little>>, + "lui a0, 0x12\naddi a0, a0, 0x345", + jit_riscv32_asm:li(a0, 16#12345) + ), + % 0x80000000 = -2147483648 (minimum 32-bit signed) + ?_assertAsmEqual( + <<16#800005b7:32/little, 16#0581:16/little>>, + "lui a1, 0x80000\nc.addi a1, 0", + jit_riscv32_asm:li(a1, -16#80000000) + ), + % 0x7FFFFFFF = 2147483647 (maximum 32-bit signed) + ?_assertAsmEqual( + <<16#80000537:32/little, 16#157d:16/little>>, + "lui a0, 0x80000\naddi a0, a0, -1", + jit_riscv32_asm:li(a0, 16#7FFFFFFF) + ) + ]. + +mv_test_() -> + [ + ?_assertAsmEqual(<<16#852a:16/little>>, "mv a0, a0", jit_riscv32_asm:mv(a0, a0)), + ?_assertAsmEqual(<<16#85ae:16/little>>, "mv a1, a1", jit_riscv32_asm:mv(a1, a1)) + ]. + +not_test_() -> + [ + ?_assertAsmEqual(<<16#fff54513:32/little>>, "not a0, a0", jit_riscv32_asm:not_(a0, a0)), + ?_assertAsmEqual(<<16#fff5c593:32/little>>, "not a1, a1", jit_riscv32_asm:not_(a1, a1)) + ]. + +neg_test_() -> + [ + ?_assertAsmEqual(<<16#40a00533:32/little>>, "neg a0, a0", jit_riscv32_asm:neg(a0, a0)), + ?_assertAsmEqual(<<16#40b005b3:32/little>>, "neg a1, a1", jit_riscv32_asm:neg(a1, a1)) + ]. + +j_test_() -> + [ + ?_assertAsmEqual( + <<16#a021:16/little>>, "j .+8", jit_riscv32_asm:j(8) + ), + ?_assertAsmEqual( + <<16#bff5:16/little>>, "j .-4", jit_riscv32_asm:j(-4) + ) + ]. + +jr_test_() -> + [ + ?_assertAsmEqual(<<16#8502:16/little>>, "jr a0", jit_riscv32_asm:jr(a0)), + ?_assertAsmEqual(<<16#8282:16/little>>, "jr t0", jit_riscv32_asm:jr(t0)) + ]. + +ret_test_() -> + [ + ?_assertAsmEqual(<<16#8082:16/little>>, "ret", jit_riscv32_asm:ret()) + ]. + +%%----------------------------------------------------------------------------- +%% M Extension (Multiply/Divide) instruction tests +%%----------------------------------------------------------------------------- + +mul_test_() -> + [ + ?_assertAsmEqual( + <<16#02f50533:32/little>>, "mul a0, a0, a5", jit_riscv32_asm:mul(a0, a0, a5) + ), + ?_assertAsmEqual( + <<16#03f60633:32/little>>, "mul a2, a2, t6", jit_riscv32_asm:mul(a2, a2, t6) + ), + ?_assertAsmEqual( + <<16#026585b3:32/little>>, "mul a1, a1, t1", jit_riscv32_asm:mul(a1, a1, t1) + ), + ?_assertAsmEqual( + <<16#02d282b3:32/little>>, "mul t0, t0, a3", jit_riscv32_asm:mul(t0, t0, a3) + ) + ]. + +%%----------------------------------------------------------------------------- +%% System instruction tests +%%----------------------------------------------------------------------------- + +c_ebreak_test_() -> + [ + ?_assertAsmEqual( + <<16#9002:16/little>>, "c.ebreak", jit_riscv32_asm:c_ebreak() + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Arithmetic and Logical instruction tests +%%----------------------------------------------------------------------------- + +c_add_test_() -> + [ + ?_assertAsmEqual( + <<16#9532:16/little>>, "c.add a0, a2", jit_riscv32_asm:c_add(a0, a2) + ), + ?_assertAsmEqual( + <<16#95be:16/little>>, "c.add a1, a5", jit_riscv32_asm:c_add(a1, a5) + ), + ?_assertAsmEqual( + <<16#9522:16/little>>, "c.add a0, s0", jit_riscv32_asm:c_add(a0, s0) + ) + ]. + +c_mv_test_() -> + [ + ?_assertAsmEqual( + <<16#8532:16/little>>, "c.mv a0, a2", jit_riscv32_asm:c_mv(a0, a2) + ), + ?_assertAsmEqual( + <<16#85be:16/little>>, "c.mv a1, a5", jit_riscv32_asm:c_mv(a1, a5) + ), + ?_assertAsmEqual( + <<16#842a:16/little>>, "c.mv s0, a0", jit_riscv32_asm:c_mv(s0, a0) + ) + ]. + +c_sub_test_() -> + [ + ?_assertAsmEqual( + <<16#8d09:16/little>>, "c.sub a0, a0", jit_riscv32_asm:c_sub(a0, a0) + ), + ?_assertAsmEqual( + <<16#8d8d:16/little>>, "c.sub a1, a1", jit_riscv32_asm:c_sub(a1, a1) + ), + ?_assertAsmEqual( + <<16#8c0d:16/little>>, "c.sub s0, a1", jit_riscv32_asm:c_sub(s0, a1) + ) + ]. + +c_and_test_() -> + [ + ?_assertAsmEqual( + <<16#8d6d:16/little>>, "c.and a0, a1", jit_riscv32_asm:c_and(a0, a1) + ), + ?_assertAsmEqual( + <<16#8fed:16/little>>, "c.and a5, a1", jit_riscv32_asm:c_and(a5, a1) + ), + ?_assertAsmEqual( + <<16#8c6d:16/little>>, "c.and s0, a1", jit_riscv32_asm:c_and(s0, a1) + ) + ]. + +c_or_test_() -> + [ + ?_assertAsmEqual( + <<16#8d4d:16/little>>, "c.or a0, a1", jit_riscv32_asm:c_or(a0, a1) + ), + ?_assertAsmEqual( + <<16#8fcd:16/little>>, "c.or a5, a1", jit_riscv32_asm:c_or(a5, a1) + ), + ?_assertAsmEqual( + <<16#8c4d:16/little>>, "c.or s0, a1", jit_riscv32_asm:c_or(s0, a1) + ) + ]. + +c_xor_test_() -> + [ + ?_assertAsmEqual( + <<16#8d2d:16/little>>, "c.xor a0, a1", jit_riscv32_asm:c_xor(a0, a1) + ), + ?_assertAsmEqual( + <<16#8fad:16/little>>, "c.xor a5, a1", jit_riscv32_asm:c_xor(a5, a1) + ), + ?_assertAsmEqual( + <<16#8c2d:16/little>>, "c.xor s0, a1", jit_riscv32_asm:c_xor(s0, a1) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Immediate instruction tests +%%----------------------------------------------------------------------------- + +c_addi_test_() -> + [ + ?_assertAsmEqual( + <<16#0511:16/little>>, "c.addi a0, 4", jit_riscv32_asm:c_addi(a0, 4) + ), + ?_assertAsmEqual( + <<16#15fd:16/little>>, "c.addi a1, -1", jit_riscv32_asm:c_addi(a1, -1) + ), + ?_assertAsmEqual( + <<16#0541:16/little>>, "c.addi a0, 16", jit_riscv32_asm:c_addi(a0, 16) + ), + ?_assertAsmEqual( + <<16#1561:16/little>>, "c.addi a0, -8", jit_riscv32_asm:c_addi(a0, -8) + ) + ]. + +c_andi_test_() -> + [ + ?_assertAsmEqual( + <<16#8929:16/little>>, "c.andi a0, 10", jit_riscv32_asm:c_andi(a0, 10) + ), + ?_assertAsmEqual( + <<16#99fd:16/little>>, "c.andi a1, -1", jit_riscv32_asm:c_andi(a1, -1) + ), + ?_assertAsmEqual( + <<16#8941:16/little>>, "c.andi a0, 16", jit_riscv32_asm:c_andi(a0, 16) + ) + ]. + +c_li_test_() -> + [ + ?_assertAsmEqual( + <<16#4529:16/little>>, "c.li a0, 10", jit_riscv32_asm:c_li(a0, 10) + ), + ?_assertAsmEqual( + <<16#55fd:16/little>>, "c.li a1, -1", jit_riscv32_asm:c_li(a1, -1) + ), + ?_assertAsmEqual( + <<16#4505:16/little>>, "c.li a0, 1", jit_riscv32_asm:c_li(a0, 1) + ), + ?_assertAsmEqual( + <<16#5501:16/little>>, "c.li a0, -32", jit_riscv32_asm:c_li(a0, -32) + ) + ]. + +c_lui_test_() -> + [ + ?_assertAsmEqual( + <<16#6529:16/little>>, "c.lui a0, 10", jit_riscv32_asm:c_lui(a0, 10) + ), + ?_assertAsmEqual( + <<16#75fd:16/little>>, "c.lui a1, 0xfffff", jit_riscv32_asm:c_lui(a1, -1) + ), + ?_assertAsmEqual( + <<16#6505:16/little>>, "c.lui a0, 1", jit_riscv32_asm:c_lui(a0, 1) + ) + ]. + +c_addi16sp_test_() -> + [ + ?_assertAsmEqual( + <<16#6141:16/little>>, "c.addi16sp sp, 16", jit_riscv32_asm:c_addi16sp(16) + ), + ?_assertAsmEqual( + <<16#7101:16/little>>, "c.addi16sp sp, -512", jit_riscv32_asm:c_addi16sp(-512) + ), + ?_assertAsmEqual( + <<16#6161:16/little>>, "c.addi16sp sp, 80", jit_riscv32_asm:c_addi16sp(80) + ) + ]. + +c_addi4spn_test_() -> + [ + ?_assertAsmEqual( + <<16#0048:16/little>>, "c.addi4spn a0, sp, 4", jit_riscv32_asm:c_addi4spn(a0, 4) + ), + ?_assertAsmEqual( + <<16#1010:16/little>>, "c.addi4spn a2, sp, 32", jit_riscv32_asm:c_addi4spn(a2, 32) + ), + ?_assertAsmEqual( + <<16#1ffc:16/little>>, + "c.addi4spn a5, sp, 1020", + jit_riscv32_asm:c_addi4spn(a5, 1020) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Shift instruction tests +%%----------------------------------------------------------------------------- + +c_slli_test_() -> + [ + ?_assertAsmEqual( + <<16#050e:16/little>>, "c.slli a0, 3", jit_riscv32_asm:c_slli(a0, 3) + ), + ?_assertAsmEqual( + <<16#05fe:16/little>>, "c.slli a1, 31", jit_riscv32_asm:c_slli(a1, 31) + ), + ?_assertAsmEqual( + <<16#0542:16/little>>, "c.slli a0, 16", jit_riscv32_asm:c_slli(a0, 16) + ) + ]. + +c_srli_test_() -> + [ + ?_assertAsmEqual( + <<16#810d:16/little>>, "c.srli a0, 3", jit_riscv32_asm:c_srli(a0, 3) + ), + ?_assertAsmEqual( + <<16#81fd:16/little>>, "c.srli a1, 31", jit_riscv32_asm:c_srli(a1, 31) + ), + ?_assertAsmEqual( + <<16#8141:16/little>>, "c.srli a0, 16", jit_riscv32_asm:c_srli(a0, 16) + ) + ]. + +c_srai_test_() -> + [ + ?_assertAsmEqual( + <<16#850d:16/little>>, "c.srai a0, 3", jit_riscv32_asm:c_srai(a0, 3) + ), + ?_assertAsmEqual( + <<16#85fd:16/little>>, "c.srai a1, 31", jit_riscv32_asm:c_srai(a1, 31) + ), + ?_assertAsmEqual( + <<16#8541:16/little>>, "c.srai a0, 16", jit_riscv32_asm:c_srai(a0, 16) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Load/Store instruction tests +%%----------------------------------------------------------------------------- + +c_lw_test_() -> + [ + ?_assertAsmEqual( + <<16#4188:16/little>>, "c.lw a0, 0(a1)", jit_riscv32_asm:c_lw(a0, {a1, 0}) + ), + ?_assertAsmEqual( + <<16#41d8:16/little>>, "c.lw a4, 4(a1)", jit_riscv32_asm:c_lw(a4, {a1, 4}) + ), + ?_assertAsmEqual( + <<16#5ffc:16/little>>, "c.lw a5, 124(a5)", jit_riscv32_asm:c_lw(a5, {a5, 124}) + ) + ]. + +c_sw_test_() -> + [ + ?_assertAsmEqual( + <<16#c188:16/little>>, "c.sw a0, 0(a1)", jit_riscv32_asm:c_sw(a0, {a1, 0}) + ), + ?_assertAsmEqual( + <<16#c1d8:16/little>>, "c.sw a4, 4(a1)", jit_riscv32_asm:c_sw(a4, {a1, 4}) + ), + ?_assertAsmEqual( + <<16#dffc:16/little>>, "c.sw a5, 124(a5)", jit_riscv32_asm:c_sw(a5, {a5, 124}) + ) + ]. + +c_lwsp_test_() -> + [ + ?_assertAsmEqual( + <<16#4502:16/little>>, "c.lwsp a0, 0(sp)", jit_riscv32_asm:c_lwsp(a0, 0) + ), + ?_assertAsmEqual( + <<16#4512:16/little>>, "c.lwsp a0, 4(sp)", jit_riscv32_asm:c_lwsp(a0, 4) + ), + ?_assertAsmEqual( + <<16#50fe:16/little>>, "c.lwsp ra, 252(sp)", jit_riscv32_asm:c_lwsp(ra, 252) + ) + ]. + +c_swsp_test_() -> + [ + ?_assertAsmEqual( + <<16#c02a:16/little>>, "c.swsp a0, 0(sp)", jit_riscv32_asm:c_swsp(a0, 0) + ), + ?_assertAsmEqual( + <<16#c22a:16/little>>, "c.swsp a0, 4(sp)", jit_riscv32_asm:c_swsp(a0, 4) + ), + ?_assertAsmEqual( + <<16#dfe6:16/little>>, "c.swsp s9, 252(sp)", jit_riscv32_asm:c_swsp(s9, 252) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Branch and Jump instruction tests +%%----------------------------------------------------------------------------- + +c_beqz_test_() -> + [ + ?_assertAsmEqual( + <<16#c111:16/little>>, "c.beqz a0, .+4", jit_riscv32_asm:c_beqz(a0, 4) + ), + ?_assertAsmEqual( + <<16#dced:16/little>>, "c.beqz s1, .-6", jit_riscv32_asm:c_beqz(s1, -6) + ), + ?_assertAsmEqual( + <<16#c101:16/little>>, "c.beqz a0, .", jit_riscv32_asm:c_beqz(a0, 0) + ) + ]. + +c_bnez_test_() -> + [ + ?_assertAsmEqual( + <<16#e111:16/little>>, "c.bnez a0, .+4", jit_riscv32_asm:c_bnez(a0, 4) + ), + ?_assertAsmEqual( + <<16#fced:16/little>>, "c.bnez s1, .-6", jit_riscv32_asm:c_bnez(s1, -6) + ), + ?_assertAsmEqual( + <<16#e101:16/little>>, "c.bnez a0, .", jit_riscv32_asm:c_bnez(a0, 0) + ) + ]. + +c_j_test_() -> + [ + ?_assertAsmEqual( + <<16#a011:16/little>>, "c.j .+4", jit_riscv32_asm:c_j(4) + ), + ?_assertAsmEqual( + <<16#bfed:16/little>>, "c.j .-6", jit_riscv32_asm:c_j(-6) + ), + ?_assertAsmEqual( + <<16#a001:16/little>>, "c.j .", jit_riscv32_asm:c_j(0) + ) + ]. + +c_jal_test_() -> + [ + ?_assertAsmEqual( + <<16#2021:16/little>>, "c.jal .+8", jit_riscv32_asm:c_jal(8) + ), + ?_assertAsmEqual( + <<16#3ff5:16/little>>, "c.jal .-4", jit_riscv32_asm:c_jal(-4) + ), + ?_assertAsmEqual( + <<16#2001:16/little>>, "c.jal .", jit_riscv32_asm:c_jal(0) + ) + ]. + +c_jr_test_() -> + [ + ?_assertAsmEqual( + <<16#8502:16/little>>, "c.jr a0", jit_riscv32_asm:c_jr(a0) + ), + ?_assertAsmEqual( + <<16#8402:16/little>>, "c.jr s0", jit_riscv32_asm:c_jr(s0) + ), + ?_assertAsmEqual( + <<16#8082:16/little>>, "c.jr ra", jit_riscv32_asm:c_jr(ra) + ) + ]. + +c_jalr_test_() -> + [ + ?_assertAsmEqual( + <<16#9502:16/little>>, "c.jalr a0", jit_riscv32_asm:c_jalr(a0) + ), + ?_assertAsmEqual( + <<16#9402:16/little>>, "c.jalr s0", jit_riscv32_asm:c_jalr(s0) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Pseudo-instruction tests +%%----------------------------------------------------------------------------- + +c_nop_test_() -> + [ + ?_assertAsmEqual( + <<16#0001:16/little>>, "c.nop", jit_riscv32_asm:c_nop() + ) + ]. diff --git a/tests/libs/jit/jit_riscv32_tests.erl b/tests/libs/jit/jit_riscv32_tests.erl new file mode 100644 index 0000000000..ab13f91c28 --- /dev/null +++ b/tests/libs/jit/jit_riscv32_tests.erl @@ -0,0 +1,3640 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_riscv32_tests). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. + +-include("jit/include/jit.hrl"). +-include("jit/src/term.hrl"). +-include("jit/src/default_atoms.hrl"). +-include("jit/src/primitives.hrl"). + +-define(BACKEND, jit_riscv32). + +% disassembly obtained with: +% arm-elf-objdump -b binary -D dump.bin -M arm + +call_primitive_0_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 0, [ctx, jit_state]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 00062f83 lw t6,0(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9f82 jalr t6\n" + " 10: 8faa mv t6,a0\n" + " 12: 4082 lw ra,0(sp)\n" + " 14: 4512 lw a0,4(sp)\n" + " 16: 45a2 lw a1,8(sp)\n" + " 18: 4632 lw a2,12(sp)\n" + " 1a: 0141 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_1_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 1, [ctx, jit_state]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 00462f83 lw t6,4(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9f82 jalr t6\n" + " 10: 8faa mv t6,a0\n" + " 12: 4082 lw ra,0(sp)\n" + " 14: 4512 lw a0,4(sp)\n" + " 16: 45a2 lw a1,8(sp)\n" + " 18: 4632 lw a2,12(sp)\n" + " 1a: 0141 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_2_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 2, [ctx, 42, 43, 44]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 00862f83 lw t6,8(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 02a00593 li a1,42\n" + " 12: 02b00613 li a2,43\n" + " 16: 02c00693 li a3,44\n" + " 1a: 9f82 jalr t6\n" + " 1c: 8faa mv t6,a0\n" + " 1e: 4082 lw ra,0(sp)\n" + " 20: 4512 lw a0,4(sp)\n" + " 22: 45a2 lw a1,8(sp)\n" + " 24: 4632 lw a2,12(sp)\n" + " 26: 0141 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_5_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:call_primitive_last(State0, ?PRIM_ALLOCATE, [ctx, jit_state, 16, 32, 2]), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 01462f83 lw t6,20(a2)\n" + " 4: 4641 li a2,16\n" + " 6: 02000693 li a3,32\n" + " a: 4709 li a4,2\n" + " c: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_6_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Get bin_ptr from x_reg 0 (similar to get_list_test pattern) + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, RegA} = ?BACKEND:and_(State1, {free, RegA}, ?TERM_PRIMARY_CLEAR_MASK), + % Get another register for the last parameter to test {free, Reg} handling + {State3, OtherReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 1}), + % Call PRIM_BITSTRING_EXTRACT_INTEGER with 6 arguments + {State4, _ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_BITSTRING_EXTRACT_INTEGER, [ + ctx, jit_state, {free, RegA}, 64, 8, {free, OtherReg} + ]), + Stream = ?BACKEND:stream(State4), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 4f0d li t5,3\n" + " 6: ffff4f13 not t5,t5\n" + " a: 01efffb3 and t6,t6,t5\n" + " e: 01c52f03 lw t5,28(a0)\n" + " 12: 0b800e93 li t4,184\n" + " 16: 9eb2 add t4,t4,a2\n" + " 18: 000eae83 lw t4,0(t4)\n" + " 1c: 1141 addi sp,sp,-16\n" + " 1e: c006 sw ra,0(sp)\n" + " 20: c22a sw a0,4(sp)\n" + " 22: c42e sw a1,8(sp)\n" + " 24: c632 sw a2,12(sp)\n" + " 26: 867e mv a2,t6\n" + " 28: 04000693 li a3,64\n" + " 2c: 4721 li a4,8\n" + " 2e: 87fa mv a5,t5\n" + " 30: 9e82 jalr t4\n" + " 32: 8eaa mv t4,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 0141 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_extended_regs_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:call_primitive(State0, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]), + {State2, RegB} = ?BACKEND:call_primitive(State1, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 20]), + {State3, RegC} = ?BACKEND:call_primitive(State2, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]), + {State4, ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_PUT_LIST, [ + ctx, {free, {ptr, RegA}}, {free, {ptr, RegB}} + ]), + State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {ptr, RegC}), + State6 = ?BACKEND:free_native_registers(State5, [ResultReg, {ptr, RegC}]), + ?BACKEND:assert_all_native_free(State6), + Stream = ?BACKEND:stream(State6), + Dump = << + " 0: 04862f83 lw t6,72(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 45cd li a1,19\n" + " 10: 9f82 jalr t6\n" + " 12: 8faa mv t6,a0\n" + " 14: 4082 lw ra,0(sp)\n" + " 16: 4512 lw a0,4(sp)\n" + " 18: 45a2 lw a1,8(sp)\n" + " 1a: 4632 lw a2,12(sp)\n" + " 1c: 0141 addi sp,sp,16\n" + " 1e: 04862f03 lw t5,72(a2)\n" + " 22: 1101 addi sp,sp,-32\n" + " 24: c006 sw ra,0(sp)\n" + " 26: c22a sw a0,4(sp)\n" + " 28: c42e sw a1,8(sp)\n" + " 2a: c632 sw a2,12(sp)\n" + " 2c: c87e sw t6,16(sp)\n" + " 2e: 45d1 li a1,20\n" + " 30: 9f02 jalr t5\n" + " 32: 8f2a mv t5,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 4fc2 lw t6,16(sp)\n" + " 3e: 02010113 addi sp,sp,32\n" + " 42: 04862e83 lw t4,72(a2)\n" + " 46: 1101 addi sp,sp,-32\n" + " 48: c006 sw ra,0(sp)\n" + " 4a: c22a sw a0,4(sp)\n" + " 4c: c42e sw a1,8(sp)\n" + " 4e: c632 sw a2,12(sp)\n" + " 50: c87a sw t5,16(sp)\n" + " 52: ca7e sw t6,20(sp)\n" + " 54: 45cd li a1,19\n" + " 56: 9e82 jalr t4\n" + " 58: 8eaa mv t4,a0\n" + " 5a: 4082 lw ra,0(sp)\n" + " 5c: 4512 lw a0,4(sp)\n" + " 5e: 45a2 lw a1,8(sp)\n" + " 60: 4632 lw a2,12(sp)\n" + " 62: 4f42 lw t5,16(sp)\n" + " 64: 4fd2 lw t6,20(sp)\n" + " 66: 02010113 addi sp,sp,32\n" + " 6a: 03462e03 lw t3,52(a2)\n" + " 6e: 1101 addi sp,sp,-32\n" + " 70: c006 sw ra,0(sp)\n" + " 72: c22a sw a0,4(sp)\n" + " 74: c42e sw a1,8(sp)\n" + " 76: c632 sw a2,12(sp)\n" + " 78: c876 sw t4,16(sp)\n" + " 7a: 000fa583 lw a1,0(t6)\n" + " 7e: 000f2603 lw a2,0(t5)\n" + " 82: 9e02 jalr t3\n" + " 84: 8e2a mv t3,a0\n" + " 86: 4082 lw ra,0(sp)\n" + " 88: 4512 lw a0,4(sp)\n" + " 8a: 45a2 lw a1,8(sp)\n" + " 8c: 4632 lw a2,12(sp)\n" + " 8e: 4ec2 lw t4,16(sp)\n" + " 90: 02010113 addi sp,sp,32\n" + " 94: 01cea023 sw t3,0(t4)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_few_free_regs_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, t6} = ?BACKEND:move_to_native_register(State0, 1), + {State2, t5} = ?BACKEND:move_to_native_register(State1, 2), + {State3, t4} = ?BACKEND:move_to_native_register(State2, 3), + {State4, t3} = ?BACKEND:move_to_native_register(State3, 4), + {State5, t2} = ?BACKEND:move_to_native_register(State4, 5), + {State6, ResultReg} = ?BACKEND:call_primitive(State5, ?PRIM_BITSTRING_INSERT_INTEGER, [ + t5, t6, {free, t3}, t4, {free, t2} + ]), + State7 = ?BACKEND:free_native_registers(State6, [ResultReg, t5, t6, t4]), + ?BACKEND:assert_all_native_free(State7), + Stream = ?BACKEND:stream(State7), + Dump = << + " 0: 4f85 li t6,1\n" + " 2: 4f09 li t5,2\n" + " 4: 4e8d li t4,3\n" + " 6: 4e11 li t3,4\n" + " 8: 4395 li t2,5\n" + " a: 0e400313 li t1,228\n" + " e: 9332 add t1,t1,a2\n" + " 10: 00032303 lw t1,0(t1)\n" + " 14: 1101 addi sp,sp,-32\n" + " 16: c006 sw ra,0(sp)\n" + " 18: c22a sw a0,4(sp)\n" + " 1a: c42e sw a1,8(sp)\n" + " 1c: c632 sw a2,12(sp)\n" + " 1e: c876 sw t4,16(sp)\n" + " 20: ca7a sw t5,20(sp)\n" + " 22: cc7e sw t6,24(sp)\n" + " 24: 857a mv a0,t5\n" + " 26: 85fe mv a1,t6\n" + " 28: 8672 mv a2,t3\n" + " 2a: 86f6 mv a3,t4\n" + " 2c: 871e mv a4,t2\n" + " 2e: 9302 jalr t1\n" + " 30: 832a mv t1,a0\n" + " 32: 4082 lw ra,0(sp)\n" + " 34: 4512 lw a0,4(sp)\n" + " 36: 45a2 lw a1,8(sp)\n" + " 38: 4632 lw a2,12(sp)\n" + " 3a: 4ec2 lw t4,16(sp)\n" + " 3c: 4f52 lw t5,20(sp)\n" + " 3e: 4fe2 lw t6,24(sp)\n" + " 40: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_only_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, -1]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f9b63 bnez t6,0x20\n" + " e: 00000f97 auipc t6,0x0\n" + " 12: 0fc9 addi t6,t6,18 # 0x20\n" + " 14: 0001 nop\n" + " 16: 01f5a223 sw t6,4(a1)\n" + " 1a: 00862f83 lw t6,8(a2)\n" + " 1e: 8f82 jr t6\n" + " 20: 01062f83 lw t6,16(a2)\n" + " 24: 02400613 li a2,36\n" + " 28: 4689 li a3,2\n" + " 2a: 4709 li a4,2\n" + " 2c: 57fd li a5,-1\n" + " 2e: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_last_5_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?CASE_CLAUSE_ATOM, {free, RegA} + ]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04c62f03 lw t5,76(a2)\n" + " 8: 4621 li a2,8\n" + " a: 2cb00693 li a3,715\n" + " e: 877e mv a4,t6\n" + " 10: 8f02 jr t5" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_last_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, 10]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f9b63 bnez t6,0x20\n" + " e: 00000f97 auipc t6,0x0\n" + " 12: 0fc9 addi t6,t6,18 # 0x20\n" + " 14: 0001 nop\n" + " 16: 01f5a223 sw t6,4(a1)\n" + " 1a: 00862f83 lw t6,8(a2)\n" + " 1e: 8f82 jr t6\n" + " 20: 01062f83 lw t6,16(a2)\n" + " 24: 02400613 li a2,36\n" + " 28: 4689 li a3,2\n" + " 2a: 4709 li a4,2\n" + " 2c: 47a9 li a5,10\n" + " 2e: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_last_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:call_primitive_last(State0, 0, [ctx, jit_state, 42]), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 00062f83 lw t6,0(a2)\n" + " 4: 02a00613 li a2,42\n" + " 8: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +return_if_not_equal_to_ctx_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + {State1, ResultReg} = ?BACKEND:call_primitive( + State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ] + ), + ?assertEqual(t6, ResultReg), + State2 = ?BACKEND:return_if_not_equal_to_ctx(State1, {free, ResultReg}), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 05462f83 lw t6,84(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9f82 jalr t6\n" + " 10: 8faa mv t6,a0\n" + " 12: 4082 lw ra,0(sp)\n" + " 14: 4512 lw a0,4(sp)\n" + " 16: 45a2 lw a1,8(sp)\n" + " 18: 4632 lw a2,12(sp)\n" + " 1a: 0141 addi sp,sp,16\n" + " 1c: 00af8463 beq t6,a0,0x24\n" + " 20: 857e mv a0,t6\n" + " 22: 8082 ret" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State1, ResultReg} = ?BACKEND:call_primitive( + State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ] + ), + ?assertEqual(t6, ResultReg), + {State2, OtherReg} = ?BACKEND:copy_to_native_register(State1, ResultReg), + ?assertEqual(t5, OtherReg), + State3 = ?BACKEND:return_if_not_equal_to_ctx(State2, {free, OtherReg}), + Stream = ?BACKEND:stream(State3), + Dump = + << + " 0: 05462f83 lw t6,84(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9f82 jalr t6\n" + " 10: 8faa mv t6,a0\n" + " 12: 4082 lw ra,0(sp)\n" + " 14: 4512 lw a0,4(sp)\n" + " 16: 45a2 lw a1,8(sp)\n" + " 18: 4632 lw a2,12(sp)\n" + " 1a: 0141 addi sp,sp,16\n" + " 1c: 8f7e mv t5,t6\n" + " 1e: 00af0463 beq t5,a0,0x26\n" + " 22: 857a mv a0,t5\n" + " 24: 8082 ret" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +move_to_cp_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_cp(State0, {y_reg, 0}), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 000f2f83 lw t6,0(t5)\n" + " 8: 05f52e23 sw t6,92(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +increment_sp_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:increment_sp(State0, 7), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 01452f83 lw t6,20(a0)\n" + " 4: 0ff1 addi t6,t6,28\n" + " 6: 01f52a23 sw t6,20(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +if_block_test_() -> + {setup, + fun() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, RegB} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State2, RegA, RegB} + end, + fun({State0, RegA, RegB}) -> + [ + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000fd363 bgez t6,0xe\n" + " c: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', RegB}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01efd363 bge t6,t5,0xe\n" + " c: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01dfd363 bge t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 1024}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 40000e93 li t4,1024\n" + " c: 01dfd363 bge t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2\n" + " 12: a0fd j 0x100" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9363 bnez t6,0xe\n" + " c: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9363 bnez t6,0xe\n" + " c: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', -1}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 5efd li t4,-1\n" + " a: 01df9363 bne t6,t4,0x10\n" + " e: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9363 bnez t6,0xe\n" + " c: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9363 bnez t6,0xe\n" + " c: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '!=', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '!=', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '!=', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + % Test large immediate (1995) that requires temporary register + State1 = ?BACKEND:if_block( + State0, + {RegA, '!=', 1995}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 1) + end + ), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 7cb00e93 li t4,1995\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f05 addi t5,t5,1\n" + " 12: a0fd j 0x100" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '!=', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df9363 bne t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df9363 bne t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '==', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df9363 bne t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '==', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df9363 bne t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', RegA, '==', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ec363 bltz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', {free, RegA}, '==', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ec363 bltz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', RegA, '!=', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ed363 bgez t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', {free, RegA}, '!=', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ed363 bgez t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', 16#7, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 007ffe93 andi t4,t6,7\n" + " c: 000e8363 beqz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', 16#5, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 005ffe93 andi t4,t6,5\n" + " c: 000e8363 beqz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '&', 16#7, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 007ffe93 andi t4,t6,7\n" + " c: 000e8363 beqz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: ffffce93 not t4,t6\n" + " c: 0ef2 slli t4,t4,0x1c\n" + " e: 000e8363 beqz t4,0x14\n" + " 12: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: ffffcf93 not t6,t6\n" + " c: 0ff2 slli t6,t6,0x1c\n" + " e: 000f8363 beqz t6,0x14\n" + " 12: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 8efe mv t4,t6\n" + " a: 03f00e13 li t3,63\n" + " e: 01cefeb3 and t4,t4,t3\n" + " 12: 4e21 li t3,8\n" + " 14: 01ce8363 beq t4,t3,0x1a\n" + " 18: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '<', RegB}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01efd363 bge t6,t5,0xe\n" + " c: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + { + {free, RegA}, + '&', + ?TERM_BOXED_TAG_MASK, + '!=', + ?TERM_BOXED_POSITIVE_INTEGER + }, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03f00e93 li t4,63\n" + " c: 01dfffb3 and t6,t6,t4\n" + " 10: 4ea1 li t4,8\n" + " 12: 01df8363 beq t6,t4,0x18\n" + " 16: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + %% Test {RegA, '&', 16#3, '!=', 0} using ANDI instruction + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', 16#3, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 003ffe93 andi t4,t6,3\n" + " c: 000e8363 beqz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {100, '<', RegA}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 06400e93 li t4,100\n" + " c: 01fed363 bge t4,t6,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {100, '<', {free, RegA}}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 06400e93 li t4,100\n" + " c: 01fed363 bge t4,t6,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {1024, '<', RegA}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 40000e93 li t4,1024\n" + " c: 01fed363 bge t4,t6,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {1024, '<', {free, RegA}}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 40000e93 li t4,1024\n" + " c: 01fed363 bge t4,t6,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end) + ] + end}. + +if_else_block_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + State3 = ?BACKEND:if_else_block( + State2, + {Reg1, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, Reg2, 2) + end, + fun(BSt0) -> + ?BACKEND:add(BSt0, Reg2, 4) + end + ), + Stream = ?BACKEND:stream(State3), + Dump = + << + "0: 01852f83 lw t6,24(a0)\n" + "4: 01c52f03 lw t5,28(a0)\n" + "8: 03b00e93 li t4,59\n" + "c: 01df9463 bne t6,t4,0x14\n" + "10: 0f09 addi t5,t5,2\n" + "12: a011 j 0x16\n" + "14: 0f11 addi t5,t5,4" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +shift_right_test_() -> + [ + ?_test(begin + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg} = ?BACKEND:shift_right(State1, {free, Reg}, 3), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 003fdf93 srli t6,t6,0x3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, OtherReg} = ?BACKEND:shift_right(State1, Reg, 3), + ?assertNotEqual(OtherReg, Reg), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 003fdf13 srli t5,t6,0x3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ]. + +shift_left_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:shift_left(State1, Reg, 3), + Stream = ?BACKEND:stream(State2), + Dump = + << + "0: 01852f83 lw t6,24(a0)\n" + "4: 0f8e slli t6,t6,0x3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_only_or_schedule_next_and_label_relocation_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:call_only_or_schedule_next(State2, 2), + State4 = ?BACKEND:add_label(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), + % OP_INT_CALL_END + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), + Dump = + << + " 0: 00000697 auipc a3,0x0\n" + " 4: 04668067 jr 70(a3) # 0x46\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 01068067 jr 16(a3) # 0x18\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 03068067 jr 48(a3) # 0x40\n" + " 18: 0085af83 lw t6,8(a1)\n" + " 1c: 1ffd addi t6,t6,-1\n" + " 1e: 01f5a423 sw t6,8(a1)\n" + " 22: 000f8663 beqz t6,0x2e\n" + " 26: a829 j 0x40\n" + " 28: 0001 nop\n" + " 2a: 00000013 nop\n" + " 2e: 00000f97 auipc t6,0x0\n" + " 32: 0fc9 addi t6,t6,18 # 0x40\n" + " 34: 0001 nop\n" + " 36: 01f5a223 sw t6,4(a1)\n" + " 3a: 00862f83 lw t6,8(a2)\n" + " 3e: 8f82 jr t6\n" + " 40: 00062f83 lw t6,0(a2)\n" + " 44: 8f82 jr t6\n" + " 46: 00462f83 lw t6,4(a2)\n" + " 4a: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_only_or_schedule_next_known_label_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:add_label(State2, 2, 16#36), + State4 = ?BACKEND:call_only_or_schedule_next(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), + % OP_INT_CALL_END + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), + Dump = + << + " 0: 00000697 auipc a3,0x0\n" + " 4: 03c68067 jr 60(a3) # 0x3c\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 01068067 jr 16(a3) # 0x18\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 02668067 jr 38(a3) # 0x36\n" + " 18: 0085af83 lw t6,8(a1)\n" + " 1c: 1ffd addi t6,t6,-1\n" + " 1e: 01f5a423 sw t6,8(a1)\n" + " 22: 000f9a63 bnez t6,0x36\n" + " 26: 00000f97 auipc t6,0x0\n" + " 2a: 0fc1 addi t6,t6,16 # 0x36\n" + " 2c: 01f5a223 sw t6,4(a1)\n" + " 30: 00862f83 lw t6,8(a2)\n" + " 34: 8f82 jr t6\n" + " 36: 00062f83 lw t6,0(a2)\n" + " 3a: 8f82 jr t6\n" + " 3c: 00462f83 lw t6,4(a2)\n" + " 40: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test with large gap (256+ bytes) to force mov_immediate path +call_only_or_schedule_next_and_label_relocation_large_gap_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + % Add large padding by emitting many move_to_native_register operations + % This creates a large gap between the jump table and the actual function bodies + % Each operation emits ~2 bytes, so 128 operations = ~256 bytes + StatePadded = lists:foldl( + fun(_, S) -> + ?BACKEND:move_to_native_register(S, {x_reg, 2}, a3) + end, + State1, + lists:seq(1, 128) + ), + State2 = ?BACKEND:add_label(StatePadded, 1), + State3 = ?BACKEND:call_only_or_schedule_next(State2, 2), + State4 = ?BACKEND:add_label(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), + % OP_INT_CALL_END + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), + % Extract the final section starting at 0x118 (after jump table 24 bytes + 128 loads 256 bytes) + % RISC-V: Jump table is 3×8=24 bytes, loads are 2 bytes each (compressed) + Dump = << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f8663 beqz t6,0x16\n" + " e: a829 j 0x28\n" + " 10: 0001 nop\n" + " 12: 00000013 nop\n" + " 16: 00000f97 auipc t6,0x0\n" + " 1a: 0fc9 addi t6,t6,18 # 0x28\n" + " 1c: 0001 nop\n" + " 1e: 01f5a223 sw t6,4(a1)\n" + " 22: 00862f83 lw t6,8(a2)\n" + " 26: 8f82 jr t6\n" + " 28: 00062f83 lw t6,0(a2)\n" + " 2c: 8f82 jr t6\n" + " 2e: 00462f83 lw t6,4(a2)\n" + " 32: 8f82 jr t6" + >>, + {_, RelevantBinary} = split_binary(Stream, 16#118), + ?assertEqual(dump_to_bin(Dump), RelevantBinary). + +call_bif_with_large_literal_integer_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, FuncPtr} = ?BACKEND:call_primitive(State0, 8, [jit_state, 2]), + {State2, ArgReg} = ?BACKEND:call_primitive(State1, 15, [ctx, 998238357]), + {State3, ResultReg} = ?BACKEND:call_func_ptr(State2, {free, FuncPtr}, [ + ctx, 0, 1, {free, {x_reg, 0}}, {free, ArgReg} + ]), + State4 = ?BACKEND:if_block(State3, {ResultReg, '==', 0}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_HANDLE_ERROR, [ctx, jit_state, offset]) + end), + State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {x_reg, 0}), + State6 = ?BACKEND:free_native_registers(State5, [ResultReg]), + ?BACKEND:assert_all_native_free(State6), + Stream = ?BACKEND:stream(State6), + Dump = + << + " 0: 02062f83 lw t6,32(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 852e mv a0,a1\n" + " 10: 4589 li a1,2\n" + " 12: 9f82 jalr t6\n" + " 14: 8faa mv t6,a0\n" + " 16: 4082 lw ra,0(sp)\n" + " 18: 4512 lw a0,4(sp)\n" + " 1a: 45a2 lw a1,8(sp)\n" + " 1c: 4632 lw a2,12(sp)\n" + " 1e: 0141 addi sp,sp,16\n" + " 20: 03c62f03 lw t5,60(a2)\n" + " 24: 1101 addi sp,sp,-32\n" + " 26: c006 sw ra,0(sp)\n" + " 28: c22a sw a0,4(sp)\n" + " 2a: c42e sw a1,8(sp)\n" + " 2c: c632 sw a2,12(sp)\n" + " 2e: c87e sw t6,16(sp)\n" + " 30: 3b7ff5b7 lui a1,0x3b7ff\n" + " 34: 89558593 addi a1,a1,-1899 # 0x3b7fe895\n" + " 38: 9f02 jalr t5\n" + " 3a: 8f2a mv t5,a0\n" + " 3c: 4082 lw ra,0(sp)\n" + " 3e: 4512 lw a0,4(sp)\n" + " 40: 45a2 lw a1,8(sp)\n" + " 42: 4632 lw a2,12(sp)\n" + " 44: 4fc2 lw t6,16(sp)\n" + " 46: 02010113 addi sp,sp,32\n" + " 4a: 1141 addi sp,sp,-16\n" + " 4c: c006 sw ra,0(sp)\n" + " 4e: c22a sw a0,4(sp)\n" + " 50: c42e sw a1,8(sp)\n" + " 52: c632 sw a2,12(sp)\n" + " 54: 4581 li a1,0\n" + " 56: 4605 li a2,1\n" + " 58: 4d14 lw a3,24(a0)\n" + " 5a: 877a mv a4,t5\n" + " 5c: 9f82 jalr t6\n" + " 5e: 8faa mv t6,a0\n" + " 60: 4082 lw ra,0(sp)\n" + " 62: 4512 lw a0,4(sp)\n" + " 64: 45a2 lw a1,8(sp)\n" + " 66: 4632 lw a2,12(sp)\n" + " 68: 0141 addi sp,sp,16\n" + " 6a: 000f9763 bnez t6,0x78\n" + " 6e: 01862f83 lw t6,24(a2)\n" + " 72: 07200613 li a2,114\n" + " 76: 8f82 jr t6\n" + " 78: 01f52c23 sw t6,24(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +get_list_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg} = ?BACKEND:and_(State1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), + State3 = ?BACKEND:move_array_element(State2, Reg, 1, {y_reg, 1}), + State4 = ?BACKEND:move_array_element(State3, Reg, 0, {y_reg, 0}), + State5 = ?BACKEND:free_native_registers(State4, [Reg]), + ?BACKEND:assert_all_native_free(State5), + Stream = ?BACKEND:stream(State5), + Dump = + << + "0: 01852f83 lw t6,24(a0)\n" + "4: 4f0d li t5,3\n" + "6: ffff4f13 not t5,t5\n" + "a: 01efffb3 and t6,t6,t5\n" + "e: 004fae83 lw t4,4(t6)\n" + "12: 01452f03 lw t5,20(a0)\n" + "16: 01df2223 sw t4,4(t5)\n" + "1a: 000fae83 lw t4,0(t6)\n" + "1e: 01452f03 lw t5,20(a0)\n" + "22: 01df2023 sw t4,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_integer_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), + Label = 1, + Arg1 = {x_reg, 0}, + {State2, Reg} = ?BACKEND:move_to_native_register(State1, Arg1), + State3 = ?BACKEND:if_block( + State2, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) -> + MSt1 = ?BACKEND:if_block( + MSt0, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, Label) + end + ), + {MSt2, Reg} = ?BACKEND:and_(MSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), + MSt3 = ?BACKEND:move_array_element(MSt2, Reg, 0, Reg), + ?BACKEND:if_block( + MSt3, + {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, Label) + end + ) + end + ), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + State5 = ?BACKEND:add_label(State4, Label, 16#100), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), + Dump = + << + " 0: ffff .insn 2, 0xffff\n" + " 2: ffff .insn 2, 0xffff\n" + " 4: ffff .insn 2, 0xffff\n" + " 6: ffff .insn 2, 0xffff\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 0f868067 jr 248(a3) # 0x100\n" + " 10: 01852f83 lw t6,24(a0)\n" + " 14: ffffcf13 not t5,t6\n" + " 18: 0f72 slli t5,t5,0x1c\n" + " 1a: 020f0f63 beqz t5,0x58\n" + " 1e: 8f7e mv t5,t6\n" + " 20: 4e8d li t4,3\n" + " 22: 01df7f33 and t5,t5,t4\n" + " 26: 4e89 li t4,2\n" + " 28: 01df0663 beq t5,t4,0x34\n" + " 2c: a8d1 j 0x100\n" + " 2e: 0001 nop\n" + " 30: 00000013 nop\n" + " 34: 4f0d li t5,3\n" + " 36: ffff4f13 not t5,t5\n" + " 3a: 01efffb3 and t6,t6,t5\n" + " 3e: 000faf83 lw t6,0(t6)\n" + " 42: 03f00f13 li t5,63\n" + " 46: 01efffb3 and t6,t6,t5\n" + " 4a: 4f21 li t5,8\n" + " 4c: 01ef8663 beq t6,t5,0x58\n" + " 50: a845 j 0x100\n" + " 52: 0001 nop\n" + " 54: 00000013 nop" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +cond_jump_to_label(Cond, Label, MMod, MSt0) -> + MMod:if_block(MSt0, Cond, fun(BSt0) -> + MMod:jump_to_label(BSt0, Label) + end). + +%% Keep the unoptimized version to test the and case. +is_number_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), + Label = 1, + Arg1 = {x_reg, 0}, + {State2, Reg} = ?BACKEND:move_to_native_register(State1, Arg1), + State3 = ?BACKEND:if_block( + State2, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) -> + BSt1 = cond_jump_to_label( + {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0 + ), + {BSt2, Reg} = ?BACKEND:and_(BSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), + BSt3 = ?BACKEND:move_array_element(BSt2, Reg, 0, Reg), + cond_jump_to_label( + {'and', [ + {Reg, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FLOAT} + ]}, + Label, + ?BACKEND, + BSt3 + ) + end + ), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + State5 = ?BACKEND:add_label(State4, Label, 16#100), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), + Dump = + << + " 0: ffff .insn 2, 0xffff\n" + " 2: ffff .insn 2, 0xffff\n" + " 4: ffff .insn 2, 0xffff\n" + " 6: ffff .insn 2, 0xffff\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 0f868067 jr 248(a3) # 0x100\n" + " 10: 01852f83 lw t6,24(a0)\n" + " 14: ffffcf13 not t5,t6\n" + " 18: 0f72 slli t5,t5,0x1c\n" + " 1a: 040f0763 beqz t5,0x68\n" + " 1e: 8f7e mv t5,t6\n" + " 20: 4e8d li t4,3\n" + " 22: 01df7f33 and t5,t5,t4\n" + " 26: 4e89 li t4,2\n" + " 28: 01df0663 beq t5,t4,0x34\n" + " 2c: a8d1 j 0x100\n" + " 2e: 0001 nop\n" + " 30: 00000013 nop\n" + " 34: 4f0d li t5,3\n" + " 36: ffff4f13 not t5,t5\n" + " 3a: 01efffb3 and t6,t6,t5\n" + " 3e: 000faf83 lw t6,0(t6)\n" + " 42: 8f7e mv t5,t6\n" + " 44: 03f00e93 li t4,63\n" + " 48: 01df7f33 and t5,t5,t4\n" + " 4c: 4ea1 li t4,8\n" + " 4e: 01df0d63 beq t5,t4,0x68\n" + " 52: 03f00f13 li t5,63\n" + " 56: 01efffb3 and t6,t6,t5\n" + " 5a: 4f61 li t5,24\n" + " 5c: 01ef8663 beq t6,t5,0x68\n" + " 60: a045 j 0x100\n" + " 62: 0001 nop\n" + " 64: 00000013 nop" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), + Label = 1, + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + State5 = ?BACKEND:add_label(State4, Label, 16#100), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), + Dump = << + " 0: ffff .insn 2, 0xffff\n" + " 2: ffff .insn 2, 0xffff\n" + " 4: ffff .insn 2, 0xffff\n" + " 6: ffff .insn 2, 0xffff\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 0f868067 jr 248(a3) # 0x100\n" + " 10: 01852f83 lw t6,24(a0)\n" + " 14: 04b00f13 li t5,75\n" + " 18: 01ef8963 beq t6,t5,0x2a\n" + " 1c: 4f2d li t5,11\n" + " 1e: 01ef8663 beq t6,t5,0x2a\n" + " 22: a8f9 j 0x100\n" + " 24: 0001 nop\n" + " 26: 00000013 nop" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_far_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + State1 = ?BACKEND:jump_table(State0, 1), + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + State5 = ?BACKEND:add_label(State4, Label, 16#1000), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), + Dump = + << + " 0: ffff .insn 2, 0xffff\n" + " 2: ffff .insn 2, 0xffff\n" + " 4: ffff .insn 2, 0xffff\n" + " 6: ffff .insn 2, 0xffff\n" + " 8: 00001697 auipc a3,0x1\n" + " c: ff868067 jr -8(a3) # 0x1000\n" + " 10: 01852f83 lw t6,24(a0)\n" + " 14: 04b00f13 li t5,75\n" + " 18: 01ef8963 beq t6,t5,0x2a\n" + " 1c: 4f2d li t5,11\n" + " 1e: 01ef8663 beq t6,t5,0x2a\n" + " 22: 7df0006f j 0x1000\n" + " 26: 00000013 nop" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_far_known_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), + Label = 1, + State2 = ?BACKEND:add_label(State1, Label, 16#1000), + {State3, Reg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + State4 = ?BACKEND:if_block(State3, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State5 = ?BACKEND:free_native_registers(State4, [Reg]), + ?BACKEND:assert_all_native_free(State5), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), + Dump = + << + " 0: ffff .insn 2, 0xffff\n" + " 2: ffff .insn 2, 0xffff\n" + " 4: ffff .insn 2, 0xffff\n" + " 6: ffff .insn 2, 0xffff\n" + " 8: 00001697 auipc a3,0x1\n" + " c: ff868067 jr -8(a3) # 0x1000\n" + " 10: 01852f83 lw t6,24(a0)\n" + " 14: 04b00f13 li t5,75\n" + " 18: 01ef8963 beq t6,t5,0x2a\n" + " 1c: 4f2d li t5,11\n" + " 1e: 01ef8663 beq t6,t5,0x2a\n" + " 22: 00001f17 auipc t5,0x1\n" + " 26: fdef0067 jr -34(t5) # 0x1000" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test OP_WAIT_TIMEOUT pattern that uses set_continuation_to_offset and continuation_entry_point +wait_timeout_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + Label = 42, + {State1, OffsetRef0} = ?BACKEND:set_continuation_to_offset(State0), + {State2, TimeoutReg} = ?BACKEND:move_to_native_register(State1, 5000), + State3 = ?BACKEND:call_primitive_last(State2, ?PRIM_WAIT_TIMEOUT, [ + ctx, jit_state, {free, TimeoutReg}, Label + ]), + State4 = ?BACKEND:add_label(State3, OffsetRef0), + State5 = ?BACKEND:continuation_entry_point(State4), + {State6, ResultReg0} = ?BACKEND:call_primitive(State5, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ]), + State7 = ?BACKEND:return_if_not_equal_to_ctx(State6, {free, ResultReg0}), + % ?WAITING_TIMEOUT_EXPIRED + {State8, ResultReg1} = ?BACKEND:call_primitive(State7, ?PRIM_CONTEXT_GET_FLAGS, [ctx, 2]), + State9 = ?BACKEND:if_block(State8, {{free, ResultReg1}, '==', 0}, fun(BlockSt) -> + ?BACKEND:call_primitive_last(BlockSt, ?PRIM_WAIT_TIMEOUT_TRAP_HANDLER, [ + ctx, jit_state, Label + ]) + end), + State10 = ?BACKEND:update_branches(State9), + + Stream = ?BACKEND:stream(State10), + Dump = + << + " 0: 00000f97 auipc t6,0x0\n" + " 4: 0ff9 addi t6,t6,30 # 0x1e\n" + " 6: 0001 nop\n" + " 8: 01f5a223 sw t6,4(a1)\n" + " c: 6f85 lui t6,0x1\n" + " e: 388f8f93 addi t6,t6,904 # 0x1388\n" + " 12: 07862f03 lw t5,120(a2)\n" + " 16: 867e mv a2,t6\n" + " 18: 02a00693 li a3,42\n" + " 1c: 8f02 jr t5\n" + " 1e: 05462f83 lw t6,84(a2)\n" + " 22: 1141 addi sp,sp,-16\n" + " 24: c006 sw ra,0(sp)\n" + " 26: c22a sw a0,4(sp)\n" + " 28: c42e sw a1,8(sp)\n" + " 2a: c632 sw a2,12(sp)\n" + " 2c: 9f82 jalr t6\n" + " 2e: 8faa mv t6,a0\n" + " 30: 4082 lw ra,0(sp)\n" + " 32: 4512 lw a0,4(sp)\n" + " 34: 45a2 lw a1,8(sp)\n" + " 36: 4632 lw a2,12(sp)\n" + " 38: 0141 addi sp,sp,16\n" + " 3a: 00af8463 beq t6,a0,0x42\n" + " 3e: 857e mv a0,t6\n" + " 40: 8082 ret\n" + " 42: 08400f93 li t6,132\n" + " 46: 9fb2 add t6,t6,a2\n" + " 48: 000faf83 lw t6,0(t6)\n" + " 4c: 1141 addi sp,sp,-16\n" + " 4e: c006 sw ra,0(sp)\n" + " 50: c22a sw a0,4(sp)\n" + " 52: c42e sw a1,8(sp)\n" + " 54: c632 sw a2,12(sp)\n" + " 56: 4589 li a1,2\n" + " 58: 9f82 jalr t6\n" + " 5a: 8faa mv t6,a0\n" + " 5c: 4082 lw ra,0(sp)\n" + " 5e: 4512 lw a0,4(sp)\n" + " 60: 45a2 lw a1,8(sp)\n" + " 62: 4632 lw a2,12(sp)\n" + " 64: 0141 addi sp,sp,16\n" + " 66: 000f9763 bnez t6,0x74\n" + " 6a: 07c62f83 lw t6,124(a2)\n" + " 6e: 02a00613 li a2,42\n" + " 72: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test OP_WAIT pattern that uses set_continuation_to_label +wait_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + State1 = ?BACKEND:jump_table(State0, 5), + State2 = ?BACKEND:add_label(State1, 1), + Label = 2, + State3 = ?BACKEND:set_continuation_to_label(State2, Label), + State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + State5 = ?BACKEND:add_label(State4, Label, 16#100), + State6 = ?BACKEND:update_branches(State5), + + Stream = ?BACKEND:stream(State6), + Dump = + << + " 0: ffff .insn 2, 0xffff\n" + " 2: ffff .insn 2, 0xffff\n" + " 4: ffff .insn 2, 0xffff\n" + " 6: ffff .insn 2, 0xffff\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 02868067 jr 40(a3) # 0x30\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 0f068067 jr 240(a3) # 0x100\n" + " 18: ffff .insn 2, 0xffff\n" + " 1a: ffff .insn 2, 0xffff\n" + " 1c: ffff .insn 2, 0xffff\n" + " 1e: ffff .insn 2, 0xffff\n" + " 20: ffff .insn 2, 0xffff\n" + " 22: ffff .insn 2, 0xffff\n" + " 24: ffff .insn 2, 0xffff\n" + " 26: ffff .insn 2, 0xffff\n" + " 28: ffff .insn 2, 0xffff\n" + " 2a: ffff .insn 2, 0xffff\n" + " 2c: ffff .insn 2, 0xffff\n" + " 2e: ffff .insn 2, 0xffff\n" + " 30: 00000f97 auipc t6,0x0\n" + " 34: 0d0f8f93 addi t6,t6,208 # 0x100\n" + " 38: 01f5a223 sw t6,4(a1)\n" + " 3c: 07462f83 lw t6,116(a2)\n" + " 40: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test set_continuation_to_label with known label +wait_known_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + State1 = ?BACKEND:jump_table(State0, 5), + State2 = ?BACKEND:add_label(State1, 1), + Label = 2, + State3 = ?BACKEND:add_label(State2, Label, 16#100), + State4 = ?BACKEND:set_continuation_to_label(State3, Label), + State5 = ?BACKEND:call_primitive_last(State4, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + State6 = ?BACKEND:update_branches(State5), + + Stream = ?BACKEND:stream(State6), + Dump = + << + " 0: ffff .insn 2, 0xffff\n" + " 2: ffff .insn 2, 0xffff\n" + " 4: ffff .insn 2, 0xffff\n" + " 6: ffff .insn 2, 0xffff\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 02868067 jr 40(a3) # 0x30\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 0f068067 jr 240(a3) # 0x100\n" + " 18: ffff .insn 2, 0xffff\n" + " 1a: ffff .insn 2, 0xffff\n" + " 1c: ffff .insn 2, 0xffff\n" + " 1e: ffff .insn 2, 0xffff\n" + " 20: ffff .insn 2, 0xffff\n" + " 22: ffff .insn 2, 0xffff\n" + " 24: ffff .insn 2, 0xffff\n" + " 26: ffff .insn 2, 0xffff\n" + " 28: ffff .insn 2, 0xffff\n" + " 2a: ffff .insn 2, 0xffff\n" + " 2c: ffff .insn 2, 0xffff\n" + " 2e: ffff .insn 2, 0xffff\n" + " 30: 00000f97 auipc t6,0x0\n" + " 34: 0d0f8f93 addi t6,t6,208 # 0x100\n" + " 38: 01f5a223 sw t6,4(a1)\n" + " 3c: 07462f83 lw t6,116(a2)\n" + " 40: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test return_labels_and_lines/2 function +return_labels_and_lines_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + + % Test return_labels_and_lines with some sample labels and lines + State2 = ?BACKEND:add_label(State1, 2, 32), + State3 = ?BACKEND:add_label(State2, 1, 16), + + % {Line, Offset} pairs + SortedLines = [{10, 16}, {20, 32}], + + State4 = ?BACKEND:return_labels_and_lines(State3, SortedLines), + Stream = ?BACKEND:stream(State4), + + % Should have jump table + generated code with label/line tables + ?assert(byte_size(Stream) >= 32), + + % Expected: jump table (3 entries, 24 bytes) + auipc + addi + ret + padding + labels table + lines table + Dump = + << + " 0: ffff .insn 2, 0xffff\n" + " 2: ffff .insn 2, 0xffff\n" + " 4: ffff .insn 2, 0xffff\n" + " 6: ffff .insn 2, 0xffff\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 00868067 jr 8(a3) # 0x10\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 01068067 jr 16(a3) # 0x20\n" + " 18: 00000517 auipc a0,0x0\n" + " 1c: 0529 addi a0,a0,10 # 0x22\n" + " 1e: 8082 ret\n" + " 20: ffff .insn 2, 0xffff\n" + " 22: 0200 addi s0,sp,256\n" + " 24: 0100 addi s0,sp,128\n" + " 26: 0000 unimp\n" + " 28: 1000 addi s0,sp,32\n" + " 2a: 0200 addi s0,sp,256\n" + " 2c: 0000 unimp\n" + " 2e: 2000 fld fs0,0(s0)\n" + " 30: 0200 addi s0,sp,256\n" + " 32: 0a00 addi s0,sp,272\n" + " 34: 0000 unimp\n" + " 36: 1000 addi s0,sp,32\n" + " 38: 1400 addi s0,sp,544\n" + " 3a: 0000 unimp\n" + " 3c: 2000 fld fs0,0(s0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test call_primitive with {free, {x_reg, X}} +gc_bif2_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_GET_IMPORTED_BIF, [jit_state, 42]), + {State2, _ResultReg} = ?BACKEND:call_func_ptr(State1, {free, FuncPtr}, [ + ctx, 0, 3, {y_reg, 0}, {free, {x_reg, 0}} + ]), + + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 02062f83 lw t6,32(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 852e mv a0,a1\n" + " 10: 02a00593 li a1,42\n" + " 14: 9f82 jalr t6\n" + " 16: 8faa mv t6,a0\n" + " 18: 4082 lw ra,0(sp)\n" + " 1a: 4512 lw a0,4(sp)\n" + " 1c: 45a2 lw a1,8(sp)\n" + " 1e: 4632 lw a2,12(sp)\n" + " 20: 0141 addi sp,sp,16\n" + " 22: 1141 addi sp,sp,-16\n" + " 24: c006 sw ra,0(sp)\n" + " 26: c22a sw a0,4(sp)\n" + " 28: c42e sw a1,8(sp)\n" + " 2a: c632 sw a2,12(sp)\n" + " 2c: 4581 li a1,0\n" + " 2e: 460d li a2,3\n" + " 30: 01452f03 lw t5,20(a0)\n" + " 34: 000f2683 lw a3,0(t5)\n" + " 38: 4d18 lw a4,24(a0)\n" + " 3a: 9f82 jalr t6\n" + " 3c: 8faa mv t6,a0\n" + " 3e: 4082 lw ra,0(sp)\n" + " 40: 4512 lw a0,4(sp)\n" + " 42: 45a2 lw a1,8(sp)\n" + " 44: 4632 lw a2,12(sp)\n" + " 46: 0141 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test case where parameter value is in a1 +memory_ensure_free_with_roots_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, _FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_MEMORY_ENSURE_FREE_WITH_ROOTS, [ + ctx, jit_state, {free, a1}, 4, 1 + ]), + + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 0b000f93 li t6,176\n" + " 4: 9fb2 add t6,t6,a2\n" + " 6: 000faf83 lw t6,0(t6)\n" + " a: 1141 addi sp,sp,-16\n" + " c: c006 sw ra,0(sp)\n" + " e: c22a sw a0,4(sp)\n" + " 10: c42e sw a1,8(sp)\n" + " 12: c632 sw a2,12(sp)\n" + " 14: 8f2e mv t5,a1\n" + " 16: 867a mv a2,t5\n" + " 18: 4691 li a3,4\n" + " 1a: 4705 li a4,1\n" + " 1c: 9f82 jalr t6\n" + " 1e: 8faa mv t6,a0\n" + " 20: 4082 lw ra,0(sp)\n" + " 22: 4512 lw a0,4(sp)\n" + " 24: 45a2 lw a1,8(sp)\n" + " 26: 4632 lw a2,12(sp)\n" + " 28: 0141 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_with_cp(State1, 4, [ctx, jit_state, 2, 5, -1]), + ?BACKEND:assert_all_native_free(State2), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f9b63 bnez t6,0x20\n" + " e: 00000f97 auipc t6,0x0\n" + " 12: 0fc9 addi t6,t6,18 # 0x20\n" + " 14: 0001 nop\n" + " 16: 01f5a223 sw t6,4(a1)\n" + " 1a: 00862f83 lw t6,8(a2)\n" + " 1e: 8f82 jr t6\n" + " 20: 0005af03 lw t5,0(a1)\n" + " 24: 000f2f03 lw t5,0(t5)\n" + " 28: 0f62 slli t5,t5,0x18\n" + " 2a: 11800f93 li t6,280\n" + " 2e: 00000013 nop\n" + " 32: 01ff6f33 or t5,t5,t6\n" + " 36: 05e52e23 sw t5,92(a0)\n" + " 3a: 01062f83 lw t6,16(a2)\n" + " 3e: 4609 li a2,2\n" + " 40: 4695 li a3,5\n" + " 42: 577d li a4,-1\n" + " 44: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_fun_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + FuncReg = {x_reg, 0}, + ArgsCount = 0, + {State2, Reg} = ?BACKEND:move_to_native_register(State1, FuncReg), + {State3, RegCopy} = ?BACKEND:copy_to_native_register(State2, Reg), + State4 = ?BACKEND:if_block( + State3, {RegCopy, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy + ]) + end + ), + {State5, RegCopy} = ?BACKEND:and_(State4, {free, RegCopy}, ?TERM_PRIMARY_CLEAR_MASK), + State6 = ?BACKEND:move_array_element(State5, RegCopy, 0, RegCopy), + State7 = ?BACKEND:if_block( + State6, {RegCopy, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy + ]) + end + ), + State8 = ?BACKEND:free_native_registers(State7, [RegCopy]), + State9 = ?BACKEND:call_primitive_with_cp(State8, ?PRIM_CALL_FUN, [ + ctx, jit_state, Reg, ArgsCount + ]), + ?BACKEND:assert_all_native_free(State9), + Stream = ?BACKEND:stream(State9), + Dump = + << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f9b63 bnez t6,0x20\n" + " e: 00000f97 auipc t6,0x0\n" + " 12: 0fc9 addi t6,t6,18 # 0x20\n" + " 14: 0001 nop\n" + " 16: 01f5a223 sw t6,4(a1)\n" + " 1a: 00862f83 lw t6,8(a2)\n" + " 1e: 8f82 jr t6\n" + " 20: 01852f83 lw t6,24(a0)\n" + " 24: 8f7e mv t5,t6\n" + " 26: 8efa mv t4,t5\n" + " 28: 4e0d li t3,3\n" + " 2a: 01cefeb3 and t4,t4,t3\n" + " 2e: 4e09 li t3,2\n" + " 30: 01ce8a63 beq t4,t3,0x44\n" + " 34: 04c62f83 lw t6,76(a2)\n" + " 38: 03800613 li a2,56\n" + " 3c: 18b00693 li a3,395\n" + " 40: 877a mv a4,t5\n" + " 42: 8f82 jr t6\n" + " 44: 4e8d li t4,3\n" + " 46: fffece93 not t4,t4\n" + " 4a: 01df7f33 and t5,t5,t4\n" + " 4e: 000f2f03 lw t5,0(t5)\n" + " 52: 8efa mv t4,t5\n" + " 54: 03f00e13 li t3,63\n" + " 58: 01cefeb3 and t4,t4,t3\n" + " 5c: 4e51 li t3,20\n" + " 5e: 01ce8a63 beq t4,t3,0x72\n" + " 62: 04c62f83 lw t6,76(a2)\n" + " 66: 06600613 li a2,102\n" + " 6a: 18b00693 li a3,395\n" + " 6e: 877a mv a4,t5\n" + " 70: 8f82 jr t6\n" + " 72: 0005ae83 lw t4,0(a1)\n" + " 76: 000eae83 lw t4,0(t4)\n" + " 7a: 0ee2 slli t4,t4,0x18\n" + " 7c: 27000f13 li t5,624\n" + " 80: 00000013 nop\n" + " 84: 01eeeeb3 or t4,t4,t5\n" + " 88: 05d52e23 sw t4,92(a0)\n" + " 8c: 08000f13 li t5,128\n" + " 90: 9f32 add t5,t5,a2\n" + " 92: 000f2f03 lw t5,0(t5)\n" + " 96: 867e mv a2,t6\n" + " 98: 4681 li a3,0\n" + " 9a: 8f02 jr t5" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +move_to_vm_register_test0(State, Source, Dest, Dump) -> + State1 = ?BACKEND:move_to_vm_register(State, Source, Dest), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + ?assertEqual(dump_to_bin(Dump), Stream). + +move_to_vm_register_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + move_to_vm_register_test0(State0, 0, {x_reg, 0}, << + " 0: 4f81 li t6,0\n" + " 2: 01f52c23 sw t6,24(a0)\n" + " 6: a8ed j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {x_reg, extra}, << + " 0: 4f81 li t6,0\n" + " 2: 05f52c23 sw t6,88(a0)\n" + " 6: a8ed j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {ptr, t5}, << + " 0: 4f81 li t6,0\n" + " 2: 01ff2023 sw t6,0(t5)\n" + " 6: a8ed j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {y_reg, 2}, << + " 0: 4f01 li t5,0\n" + " 2: 01452f83 lw t6,20(a0)\n" + " 6: 01efa423 sw t5,8(t6)\n" + " a: a8dd j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {y_reg, 20}, << + " 0: 4f01 li t5,0\n" + " 2: 01452f83 lw t6,20(a0)\n" + " 6: 05efa823 sw t5,80(t6)\n" + " a: a8dd j 0x100" + >>) + end), + %% Test: Immediate to x_reg + ?_test(begin + move_to_vm_register_test0(State0, 42, {x_reg, 0}, << + " 0: 02a00f93 li t6,42\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: a8e5 j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {x_reg, extra}, << + " 0: 02a00f93 li t6,42\n" + " 4: 05f52c23 sw t6,88(a0)\n" + " 8: a8e5 j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 2}, << + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 01efa423 sw t5,8(t6)\n" + " c: a8d5 j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 20}, << + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 05efa823 sw t5,80(t6)\n" + " c: a8d5 j 0x100" + >>) + end), + %% Test: Immediate to ptr + ?_test(begin + move_to_vm_register_test0(State0, 99, {ptr, a3}, << + " 0: 06300f93 li t6,99\n" + " 4: 01f6a023 sw t6,0(a3)\n" + " 8: a8e5 j 0x100" + >>) + end), + %% Test: x_reg to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 1}, {x_reg, 2}, << + " 0: 01c52f83 lw t6,28(a0)\n" + " 4: 03f52023 sw t6,32(a0)\n" + " 8: a8e5 j 0x100" + >>) + end), + %% Test: x_reg to ptr + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 1}, {ptr, a1}, << + " 0: 01c52f83 lw t6,28(a0)\n" + " 4: 01f5a023 sw t6,0(a1)\n" + " 8: a8e5 j 0x100" + >>) + end), + %% Test: ptr to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {ptr, t3}, {x_reg, 3}, << + " 0: 000e2f83 lw t6,0(t3)\n" + " 4: 03f52223 sw t6,36(a0)\n" + " 8: a8e5 j 0x100" + >>) + end), + %% Test: x_reg to y_reg + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 0}, {y_reg, 1}, << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01452f03 lw t5,20(a0)\n" + " 8: 01ff2223 sw t6,4(t5)\n" + " c: a8d5 j 0x100" + >>) + end), + %% Test: y_reg to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 000f2f83 lw t6,0(t5)\n" + " 8: 03f52223 sw t6,36(a0)\n" + " c: a8d5 j 0x100" + >>) + end), + %% Test: y_reg to y_reg + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 004f2f83 lw t6,4(t5)\n" + " 8: 03f52223 sw t6,36(a0)\n" + " c: a8d5 j 0x100" + >>) + end), + %% Test: Native register to x_reg + ?_test(begin + move_to_vm_register_test0(State0, t4, {x_reg, 0}, << + " 0: 01d52c23 sw t4,24(a0)\n" + " 4: a8f5 j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, t5, {x_reg, extra}, << + " 0: 05e52c23 sw t5,88(a0)\n" + " 4: a8f5 j 0x100" + >>) + end), + %% Test: Native register to ptr + ?_test(begin + move_to_vm_register_test0(State0, t3, {ptr, a3}, << + " 0: 01c6a023 sw t3,0(a3)\n" + " 4: a8f5 j 0x100" + >>) + end), + %% Test: Native register to y_reg + ?_test(begin + move_to_vm_register_test0(State0, a1, {y_reg, 0}, << + " 0: 01452f83 lw t6,20(a0)\n" + " 4: 00bfa023 sw a1,0(t6)\n" + " 8: a8e5 j 0x100" + >>) + end), + %% Test: Large immediate to x_reg (uses lui + addi in RISC-V) + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {x_reg, 0}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01f52c23 sw t6,24(a0)\n" + " c: a8d5 j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {x_reg, extra}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 05f52c23 sw t6,88(a0)\n" + " c: a8d5 j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {y_reg, 2}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01452f03 lw t5,20(a0)\n" + " c: 01ff2423 sw t6,8(t5)\n" + " 10: a8c5 j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {y_reg, 20}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01452f03 lw t5,20(a0)\n" + " c: 05ff2823 sw t6,80(t5)\n" + " 10: a8c5 j 0x100" + >>) + end), + %% Test: Large immediate to ptr + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {ptr, a3}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01f6a023 sw t6,0(a3)\n" + " c: a8d5 j 0x100" + >>) + end), + %% Test: x_reg to y_reg (high index) + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 15}, {y_reg, 31}, << + " 0: 05452f83 lw t6,84(a0)\n" + " 4: 01452f03 lw t5,20(a0)\n" + " 8: 07ff2e23 sw t6,124(t5)\n" + " c: a8d5 j 0x100" + >>) + end), + %% Test: y_reg to x_reg (high index) + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 07cf2f83 lw t6,124(t5)\n" + " 8: 05f52a23 sw t6,84(a0)\n" + " c: a8d5 j 0x100" + >>) + end), + %% Test: Large y_reg index (32) that exceeds str immediate offset limit + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 32}, << + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 08000e93 li t4,128\n" + " c: 9efe add t4,t4,t6\n" + " e: 01eea023 sw t5,0(t4)\n" + " 12: a0fd j 0x100" + >>) + end), + %% Test: Negative immediate to x_reg + ?_test(begin + move_to_vm_register_test0(State0, -1, {x_reg, 0}, << + " 0: 5ffd li t6,-1\n" + " 2: 01f52c23 sw t6,24(a0)\n" + " 6: a8ed j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, -100, {x_reg, 0}, << + " 0: f9c00f93 li t6,-100\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: a8e5 j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, -1000, {x_reg, 0}, << + " 0: c1800f93 li t6,-1000\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: a8e5 j 0x100" + >>) + end) + ] + end}. + +move_array_element_test0(State, Reg, Index, Dest, Dump) -> + State1 = ?BACKEND:move_array_element(State, Reg, Index, Dest), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +move_array_element_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_array_element: reg[x] to x_reg + ?_test(begin + move_array_element_test0(State0, a3, 2, {x_reg, 0}, << + " 0: 0086af83 lw t6,8(a3)\n" + " 4: 01f52c23 sw t6,24(a0)" + >>) + end), + %% move_array_element: reg[x] to ptr + ?_test(begin + move_array_element_test0(State0, a3, 3, {ptr, t4}, << + " 0: 00c6af83 lw t6,12(a3)\n" + " 4: 01fea023 sw t6,0(t4)" + >>) + end), + %% move_array_element: reg[x] to y_reg + ?_test(begin + move_array_element_test0(State0, a3, 1, {y_reg, 2}, << + " 0: 0046af03 lw t5,4(a3)\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 01efa423 sw t5,8(t6)" + >>) + end), + %% move_array_element: reg[x] to native reg (t4) + ?_test(begin + move_array_element_test0(State0, a3, 1, t4, << + " 0: 0046ae83 lw t4,4(a3)" + >>) + end), + %% move_array_element: reg[x] to y_reg + ?_test(begin + move_array_element_test0(State0, a3, 7, {y_reg, 31}, << + " 0: 01c6af03 lw t5,28(a3)\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 07efae23 sw t5,124(t6)" + >>) + end), + %% move_array_element: reg[x] to x_reg + ?_test(begin + move_array_element_test0(State0, a3, 7, {x_reg, 15}, << + " 0: 01c6af83 lw t6,28(a3)\n" + " 4: 05f52a23 sw t6,84(a0)" + >>) + end), + %% move_array_element: reg_x[reg_y] to x_reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4), + move_array_element_test0(State1, a3, {free, Reg}, {x_reg, 2}, << + " 0: 0106af83 lw t6,16(a3)\n" + " 4: 0f8a slli t6,t6,0x2\n" + " 6: 01f68fb3 add t6,a3,t6\n" + " a: 000faf83 lw t6,0(t6)\n" + " e: 03f52023 sw t6,32(a0)" + >>) + end), + %% move_array_element: reg_x[reg_y] to pointer (large x reg) + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4), + move_array_element_test0(State1, a3, {free, Reg}, {ptr, t4}, << + " 0: 0106af83 lw t6,16(a3)\n" + " 4: 0f8a slli t6,t6,0x2\n" + " 6: 01f68fb3 add t6,a3,t6\n" + " a: 000faf83 lw t6,0(t6)\n" + " e: 01fea023 sw t6,0(t4)" + >>) + end), + %% move_array_element: reg_x[reg_y] to y_reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4), + move_array_element_test0(State1, a3, {free, Reg}, {y_reg, 31}, << + " 0: 0106af83 lw t6,16(a3)\n" + " 4: 0f8a slli t6,t6,0x2\n" + " 6: 01f68fb3 add t6,a3,t6\n" + " a: 000faf83 lw t6,0(t6)\n" + " e: 01452f03 lw t5,20(a0)\n" + " 12: 07ff2e23 sw t6,124(t5)" + >>) + end), + %% move_array_element with integer index and x_reg destination + ?_test(begin + {State1, BaseReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + move_array_element_test0(State1, BaseReg, 2, {x_reg, 5}, << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 008faf03 lw t5,8(t6)\n" + " 8: 03e52623 sw t5,44(a0)" + >>) + end) + ] + end}. + +get_array_element_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% get_array_element: reg[x] to new native reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, t3, 4), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 010e2f83 lw t6,16(t3)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t6, Reg) + end) + ] + end}. + +move_to_array_element_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_to_array_element/4: x_reg to reg[x] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, 2), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01f6a423 sw t6,8(a3)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: x_reg to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, t3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 8f72 mv t5,t3\n" + " 6: 0f0a slli t5,t5,0x2\n" + " 8: 01e68f33 add t5,a3,t5\n" + " c: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: ptr to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {ptr, t6}, a3, t3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 000faf83 lw t6,0(t6)\n" + " 4: 8f72 mv t5,t3\n" + " 6: 0f0a slli t5,t5,0x2\n" + " 8: 01e68f33 add t5,a3,t5\n" + " c: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: y_reg to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {y_reg, 2}, a3, t3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 008f2f83 lw t6,8(t5)\n" + " 8: 8f72 mv t5,t3\n" + " a: 0f0a slli t5,t5,0x2\n" + " c: 01e68f33 add t5,a3,t5\n" + " 10: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: x_reg to reg[x+offset] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, 2, 1), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01f6a423 sw t6,8(a3)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: x_reg to reg[x+offset] + ?_test(begin + State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [a3, t3]), + State2 = setelement(8, State1, [a3, t3]), + [a3, t3] = ?BACKEND:used_regs(State2), + State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, a3, t3, 1), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 001e0f13 addi t5,t3,1\n" + " 8: 0f0a slli t5,t5,0x2\n" + " a: 01e68f33 add t5,a3,t5\n" + " e: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: imm to reg[x+offset] + ?_test(begin + State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [a3, t3]), + State2 = setelement(8, State1, [a3, t3]), + [a3, t3] = ?BACKEND:used_regs(State2), + State3 = ?BACKEND:move_to_array_element(State2, 42, a3, t3, 1), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 02a00f93 li t6,42\n" + " 4: 001e0f13 addi t5,t3,1\n" + " 8: 0f0a slli t5,t5,0x2\n" + " a: 01e68f33 add t5,a3,t5\n" + " e: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +move_to_native_register_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_to_native_register/2: imm + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, 42), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: 02a00f93 li t6,42" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: negative value + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, -42), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: fd600f93 li t6,-42" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: -255 (boundary case) + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, -255), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: f0100f93 li t6,-255" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: -256 (boundary case, fits in immediate for RISC-V) + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, -256), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + ?assertEqual(t6, Reg), + Dump = << + " 0: f0000f93 li t6,-256\n" + " 4: a8f5 j 0x100" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {ptr, reg} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {ptr, t5}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t5, Reg), + Dump = << + " 0: 000f2f03 lw t5,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {x_reg, N} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 5}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: 02c52f83 lw t6,44(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {y_reg, N} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 3}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 00cf2f83 lw t6,12(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: imm to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, 42, t5), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 02a00f13 li t5,42" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: reg to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, t6, t4), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 8efe mv t4,t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {ptr, reg} to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {ptr, t6}, t3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 000fae03 lw t3,0(t6)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {x_reg, x} to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, a3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 5114 lw a3,32(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {y_reg, y} to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, a1), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01452f83 lw t6,20(a0)\n" + " 4: 008fa583 lw a1,8(t6)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% Test: ptr with offset to fp_reg (term_to_float) + ?_test(begin + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:move_to_vm_register( + State1, {free, {ptr, RegA, 1}}, {fp_reg, 3} + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 06052f03 lw t5,96(a0)\n" + " 8: 004fae83 lw t4,4(t6)\n" + " c: 01df2c23 sw t4,24(t5)\n" + " 10: 008fae83 lw t4,8(t6)\n" + " 14: 01df2e23 sw t4,28(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +add_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:add(State0, Reg, Imm), + % Force emission of literal pool + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + ?assertEqual(dump_to_bin(Dump), Stream). + +add_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + add_test0(State0, a2, 2, << + " 0: 0609 addi a2,a2,2\n" + " 2: a8fd j 0x100" + >>) + end), + ?_test(begin + add_test0(State0, a2, 256, << + " 0: 10000f93 li t6,256\n" + " 4: 967e add a2,a2,t6\n" + " 6: a8ed j 0x100" + >>) + end), + ?_test(begin + add_test0(State0, a2, a3, << + " 0: 9636 add a2,a2,a3\n" + " 2: a8fd j 0x100" + >>) + end) + ] + end}. + +sub_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:sub(State0, Reg, Imm), + % Force emission of literal pool + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + ?assertEqual(dump_to_bin(Dump), Stream). + +sub_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + sub_test0(State0, a2, 2, << + " 0: 1679 addi a2,a2,-2\n" + " 2: a8fd j 0x100" + >>) + end), + ?_test(begin + sub_test0(State0, a2, 256, << + " 0: 10000f93 li t6,256\n" + " 4: 41f60633 sub a2,a2,t6\n" + " 8: a8e5 j 0x100" + >>) + end), + ?_test(begin + sub_test0(State0, a2, a3, << + " 0: 8e15 sub a2,a2,a3\n" + " 2: a8fd j 0x100" + >>) + end) + ] + end}. + +mul_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:mul(State0, Reg, Imm), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +mul_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + mul_test0(State0, a2, 2, << + " 0: 0606 slli a2,a2,0x1" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 3, << + " 0: 00161f93 slli t6,a2,0x1\n" + " 4: 00cf8633 add a2,t6,a2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 4, << + " 0: 060a slli a2,a2,0x2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 5, << + " 0: 00261f93 slli t6,a2,0x2\n" + " 4: 00cf8633 add a2,t6,a2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 6, << + " 0: 00161f93 slli t6,a2,0x1\n" + " 4: 00cf8633 add a2,t6,a2\n" + " 8: 0606 slli a2,a2,0x1" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 7, << + " 0: 00361f93 slli t6,a2,0x3\n" + " 4: 40cf8633 sub a2,t6,a2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 8, << + " 0: 060e slli a2,a2,0x3" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 9, << + " 0: 00361f93 slli t6,a2,0x3\n" + " 4: 00cf8633 add a2,t6,a2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 10, << + " 0: 00261f93 slli t6,a2,0x2\n" + " 4: 00cf8633 add a2,t6,a2\n" + " 8: 0606 slli a2,a2,0x1" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 11, << + " 0: 4fad li t6,11\n" + " 2: 03f60633 mul a2,a2,t6" + >>) + end) + ] + end}. + +%% Test set_args1 with y_reg pattern +set_args1_y_reg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + % Call primitive with y_reg argument to trigger {y_reg, X} pattern in set_args1 + % This mirrors: {MSt2, Value} = MMod:call_primitive(MSt1, ?PRIM_BITSTRING_GET_UTF8, [{free, Src}]) + % but with {y_reg, 5} instead of {free, Src} + {State1, _ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_BITSTRING_GET_UTF8, [ + {y_reg, 5} + ]), + + Stream = ?BACKEND:stream(State1), + % Expected disassembly for loading from y_reg and calling primitive + Dump = << + " 0: 04300f93 li t6,67\n" + " 4: 0f8a slli t6,t6,0x2\n" + " 6: 9fb2 add t6,t6,a2\n" + " 8: 000faf83 lw t6,0(t6)\n" + " c: 1141 addi sp,sp,-16\n" + " e: c006 sw ra,0(sp)\n" + " 10: c22a sw a0,4(sp)\n" + " 12: c42e sw a1,8(sp)\n" + " 14: c632 sw a2,12(sp)\n" + " 16: 01452f03 lw t5,20(a0)\n" + " 1a: 014f2503 lw a0,20(t5)\n" + " 1e: 9f82 jalr t6\n" + " 20: 8faa mv t6,a0\n" + " 22: 4082 lw ra,0(sp)\n" + " 24: 4512 lw a0,4(sp)\n" + " 26: 45a2 lw a1,8(sp)\n" + " 28: 4632 lw a2,12(sp)\n" + " 2a: 0141 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test large Y register read (Y=123, offset=492, exceeds immediate limit) +large_y_reg_read_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Move from a large Y register (123 * 4 = 492 bytes, exceeds immediate limit) + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 123}), + Stream = ?BACKEND:stream(State1), + % Expected: uses helper with temp register for large offset + Dump = << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 1ec00f93 li t6,492\n" + " 8: 9ffa add t6,t6,t5\n" + " a: 000faf83 lw t6,0(t6)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t6, Reg). + +%% Test large Y register write with immediate value +large_y_reg_write_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Move immediate to a large Y register (123 * 4 = 492 bytes) + State1 = ?BACKEND:move_to_vm_register(State0, 42, {y_reg, 123}), + Stream = ?BACKEND:stream(State1), + % Expected: uses helper with temp registers for large offset + Dump = << + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 1ec00e93 li t4,492\n" + " c: 9efe add t4,t4,t6\n" + " e: 01eea023 sw t5,0(t4)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test large Y register read with limited registers (uses IP_REG fallback) +large_y_reg_read_register_exhaustion_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Allocate most available registers to simulate near-exhaustion (leave 1 for the y_reg helper) + {State1, _} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, _} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, _} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, _} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, _} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + % Leave one register available so the y_reg helper can work, but it will need IP_REG fallback + {StateFinal, ResultReg} = ?BACKEND:move_to_native_register(State5, {y_reg, 35}), + Stream = ?BACKEND:stream(StateFinal), + % Expected: uses t0+t1 fallback sequence when temps are exhausted + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 01452283 lw t0,20(a0)\n" + " 18: 08c00313 li t1,140\n" + " 1c: 9316 add t1,t1,t0\n" + " 1e: 00032303 lw t1,0(t1)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t1, ResultReg). + +%% Test large Y register write with register exhaustion (uses t1/t0 fallback) +large_y_reg_write_register_exhaustion_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Get a source register first + {State1, SrcReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + % Allocate most remaining registers to simulate exhaustion + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + % Try to write to large Y register when only one temp register is available + StateFinal = ?BACKEND:move_to_vm_register(State5, SrcReg, {y_reg, 50}), + Stream = ?BACKEND:stream(StateFinal), + % Expected: uses t1/t0 fallback sequence + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 01452303 lw t1,20(a0)\n" + " 18: 0c800293 li t0,200\n" + " 1c: 929a add t0,t0,t1\n" + " 1e: 01f2a023 sw t6,0(t0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test boundary case: Y=31 (124 bytes, exactly at limit, should use direct addressing) +y_reg_boundary_direct_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 31}), + Stream = ?BACKEND:stream(State1), + % Expected: uses direct addressing since 31 * 4 = 124 < 2048 + Dump = << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 07cf2f83 lw t6,124(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t6, Reg). + +%% Test debugger function +debugger_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:debugger(State0), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 9002 ebreak" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +and_register_exhaustion_negative_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Allocate all available registers to simulate register exhaustion + {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + {StateNoRegs, t1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), + % Test negative immediate (-4) which should use NOT+AND with t0 as temp + {StateResult, t6} = ?BACKEND:and_(StateNoRegs, {free, t6}, -4), + Stream = ?BACKEND:stream(StateResult), + ExpectedDump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 428d li t0,3\n" + " 1a: fff2c293 not t0,t0\n" + " 1e: 005fffb3 and t6,t6,t0" + >>, + ?assertEqual(dump_to_bin(ExpectedDump), Stream). + +and_register_exhaustion_positive_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Allocate all available registers to simulate register exhaustion + {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + {StateNoRegs, t1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), + % Test positive immediate (0x3F) which should use AND with t0 as temp + {StateResult, t6} = ?BACKEND:and_(StateNoRegs, {free, t6}, 16#3F), + Stream = ?BACKEND:stream(StateResult), + ExpectedDump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 03f00293 li t0,63\n" + " 1c: 005fffb3 and t6,t6,t0" + >>, + ?assertEqual(dump_to_bin(ExpectedDump), Stream). + +jump_table_large_labels_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 512), + Stream = ?BACKEND:stream(State1), + % RISC-V: Each jump table entry is 8 bytes (AUIPC + JALR) + ?assertEqual((512 + 1) * 8, byte_size(Stream)). + +alloc_boxed_integer_fragment_small_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_ALLOC_BOXED_INTEGER_FRAGMENT, [ + ctx, {avm_int64_t, 42} + ]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 03c62f83 lw t6,60(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 02a00593 li a1,42\n" + " 12: 4601 li a2,0\n" + " 14: 9f82 jalr t6\n" + " 16: 8faa mv t6,a0\n" + " 18: 4082 lw ra,0(sp)\n" + " 1a: 4512 lw a0,4(sp)\n" + " 1c: 45a2 lw a1,8(sp)\n" + " 1e: 4632 lw a2,12(sp)\n" + " 20: 0141 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +alloc_boxed_integer_fragment_large_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_ALLOC_BOXED_INTEGER_FRAGMENT, [ + ctx, {avm_int64_t, 16#123456789ABCDEF0} + ]), + % Add a call primitive last to emit literal pool + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADMATCH_ATOM, {free, ResultReg} + ]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 03c62f83 lw t6,60(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9abce5b7 lui a1,0x9abce\n" + " 12: ef058593 addi a1,a1,-272 # 0x9abcdef0\n" + " 16: 12345637 lui a2,0x12345\n" + " 1a: 67860613 addi a2,a2,1656 # 0x12345678\n" + " 1e: 9f82 jalr t6\n" + " 20: 8faa mv t6,a0\n" + " 22: 4082 lw ra,0(sp)\n" + " 24: 4512 lw a0,4(sp)\n" + " 26: 45a2 lw a1,8(sp)\n" + " 28: 4632 lw a2,12(sp)\n" + " 2a: 0141 addi sp,sp,16\n" + " 2c: 04c62f03 lw t5,76(a2)\n" + " 30: 03000613 li a2,48\n" + " 34: 28b00693 li a3,651\n" + " 38: 877e mv a4,t6\n" + " 3a: 8f02 jr t5" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test for stack alignment issue in call_func_ptr +%% RISC-V maintains 16-byte stack alignment (RISC-V calling convention) +call_func_ptr_stack_alignment_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, _ResultReg} = ?BACKEND:call_func_ptr(State4, {free, t3}, [42]), + Stream = ?BACKEND:stream(State5), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 1101 addi sp,sp,-32\n" + " 12: c006 sw ra,0(sp)\n" + " 14: c22a sw a0,4(sp)\n" + " 16: c42e sw a1,8(sp)\n" + " 18: c632 sw a2,12(sp)\n" + " 1a: c876 sw t4,16(sp)\n" + " 1c: ca7a sw t5,20(sp)\n" + " 1e: cc7e sw t6,24(sp)\n" + " 20: 02a00513 li a0,42\n" + " 24: 9e02 jalr t3\n" + " 26: 8e2a mv t3,a0\n" + " 28: 4082 lw ra,0(sp)\n" + " 2a: 4512 lw a0,4(sp)\n" + " 2c: 45a2 lw a1,8(sp)\n" + " 2e: 4632 lw a2,12(sp)\n" + " 30: 4ec2 lw t4,16(sp)\n" + " 32: 4f52 lw t5,20(sp)\n" + " 34: 4fe2 lw t6,24(sp)\n" + " 36: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test for register exhaustion issue in call_func_ptr with 5+ arguments +%% When all registers are used and we call a function with 5+ args, +%% set_args needs temporary registers but none are available +call_func_ptr_register_exhaustion_test_() -> + {setup, + fun() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + % Allocate all available registers to simulate register pressure + {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + {State6, t1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), + State6 + end, + fun(State6) -> + [ + ?_test(begin + {State7, _ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, t5}, + [ctx, jit_state, {free, t2}, 3, 1] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 1101 addi sp,sp,-32\n" + " 1a: c006 sw ra,0(sp)\n" + " 1c: c22a sw a0,4(sp)\n" + " 1e: c42e sw a1,8(sp)\n" + " 20: c632 sw a2,12(sp)\n" + " 22: c81a sw t1,16(sp)\n" + " 24: ca72 sw t3,20(sp)\n" + " 26: cc76 sw t4,24(sp)\n" + " 28: ce7e sw t6,28(sp)\n" + " 2a: 861e mv a2,t2\n" + " 2c: 468d li a3,3\n" + " 2e: 4705 li a4,1\n" + " 30: 9f02 jalr t5\n" + " 32: 8f2a mv t5,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 4342 lw t1,16(sp)\n" + " 3e: 4e52 lw t3,20(sp)\n" + " 40: 4ee2 lw t4,24(sp)\n" + " 42: 4ff2 lw t6,28(sp)\n" + " 44: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State7, _ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, t5}, + [ctx, jit_state, {free, t2}, 1, t1] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 1101 addi sp,sp,-32\n" + " 1a: c006 sw ra,0(sp)\n" + " 1c: c22a sw a0,4(sp)\n" + " 1e: c42e sw a1,8(sp)\n" + " 20: c632 sw a2,12(sp)\n" + " 22: c81a sw t1,16(sp)\n" + " 24: ca72 sw t3,20(sp)\n" + " 26: cc76 sw t4,24(sp)\n" + " 28: ce7e sw t6,28(sp)\n" + " 2a: 861e mv a2,t2\n" + " 2c: 4685 li a3,1\n" + " 2e: 871a mv a4,t1\n" + " 30: 9f02 jalr t5\n" + " 32: 8f2a mv t5,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 4342 lw t1,16(sp)\n" + " 3e: 4e52 lw t3,20(sp)\n" + " 40: 4ee2 lw t4,24(sp)\n" + " 42: 4ff2 lw t6,28(sp)\n" + " 44: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State7, ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, t5}, + [ctx, jit_state, {free, t2}, t1, 1] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 1101 addi sp,sp,-32\n" + " 1a: c006 sw ra,0(sp)\n" + " 1c: c22a sw a0,4(sp)\n" + " 1e: c42e sw a1,8(sp)\n" + " 20: c632 sw a2,12(sp)\n" + " 22: c81a sw t1,16(sp)\n" + " 24: ca72 sw t3,20(sp)\n" + " 26: cc76 sw t4,24(sp)\n" + " 28: ce7e sw t6,28(sp)\n" + " 2a: 861e mv a2,t2\n" + " 2c: 869a mv a3,t1\n" + " 2e: 4705 li a4,1\n" + " 30: 9f02 jalr t5\n" + " 32: 8f2a mv t5,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 4342 lw t1,16(sp)\n" + " 3e: 4e52 lw t3,20(sp)\n" + " 40: 4ee2 lw t4,24(sp)\n" + " 42: 4ff2 lw t6,28(sp)\n" + " 44: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t5, ResultReg) + end), + ?_test(begin + {State7, _ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, a1}, + [t5, a3] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: fd010113 addi sp,sp,-48\n" + " 1c: c006 sw ra,0(sp)\n" + " 1e: c22a sw a0,4(sp)\n" + " 20: c42e sw a1,8(sp)\n" + " 22: c632 sw a2,12(sp)\n" + " 24: c81a sw t1,16(sp)\n" + " 26: ca1e sw t2,20(sp)\n" + " 28: cc72 sw t3,24(sp)\n" + " 2a: ce76 sw t4,28(sp)\n" + " 2c: d07a sw t5,32(sp)\n" + " 2e: d27e sw t6,36(sp)\n" + " 30: 832e mv t1,a1\n" + " 32: 857a mv a0,t5\n" + " 34: 85b6 mv a1,a3\n" + " 36: 9302 jalr t1\n" + " 38: c42a sw a0,8(sp)\n" + " 3a: 4082 lw ra,0(sp)\n" + " 3c: 4512 lw a0,4(sp)\n" + " 3e: 45a2 lw a1,8(sp)\n" + " 40: 4632 lw a2,12(sp)\n" + " 42: 4342 lw t1,16(sp)\n" + " 44: 43d2 lw t2,20(sp)\n" + " 46: 4e62 lw t3,24(sp)\n" + " 48: 4ef2 lw t4,28(sp)\n" + " 4a: 5f02 lw t5,32(sp)\n" + " 4c: 5f92 lw t6,36(sp)\n" + " 4e: 03010113 addi sp,sp,48" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State7, ResultReg} = ?BACKEND:call_func_ptr( + State6, + {primitive, 2}, + [{free, t5}, a3] + ), + ?assertEqual(ResultReg, t5), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: fd010113 addi sp,sp,-48\n" + " 1c: c006 sw ra,0(sp)\n" + " 1e: c22a sw a0,4(sp)\n" + " 20: c42e sw a1,8(sp)\n" + " 22: c632 sw a2,12(sp)\n" + " 24: c81a sw t1,16(sp)\n" + " 26: ca1e sw t2,20(sp)\n" + " 28: cc72 sw t3,24(sp)\n" + " 2a: ce76 sw t4,28(sp)\n" + " 2c: d07e sw t6,32(sp)\n" + " 2e: 00862303 lw t1,8(a2)\n" + " 32: 857a mv a0,t5\n" + " 34: 85b6 mv a1,a3\n" + " 36: 9302 jalr t1\n" + " 38: 8f2a mv t5,a0\n" + " 3a: 4082 lw ra,0(sp)\n" + " 3c: 4512 lw a0,4(sp)\n" + " 3e: 45a2 lw a1,8(sp)\n" + " 40: 4632 lw a2,12(sp)\n" + " 42: 4342 lw t1,16(sp)\n" + " 44: 43d2 lw t2,20(sp)\n" + " 46: 4e62 lw t3,24(sp)\n" + " 48: 4ef2 lw t4,28(sp)\n" + " 4a: 5f82 lw t6,32(sp)\n" + " 4c: 03010113 addi sp,sp,48" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +%% Test jump_to_continuation optimization for intra-module returns +jump_to_continuation_test_() -> + [ + ?_test(begin + % Test 1: jump_to_continuation at offset 0 + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_to_continuation(State0, {free, a0}), + Stream = ?BACKEND:stream(State1), + % Expected: riscv32 PIC sequence + Dump = + << + " 0: 00000f97 auipc t6,0x0\n" + " 4: 9faa add t6,t6,a0\n" + " 6: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + % Test 2: jump_to_continuation after jump table (non-zero relative address) + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Generate a jump table for 3 labels (4 entries * 8 bytes = 32 bytes) + State1 = ?BACKEND:jump_table(State0, 3), + State2 = ?BACKEND:jump_to_continuation(State1, {free, a0}), + Stream = ?BACKEND:stream(State2), + % Expected: jump table (32 bytes) + jump_to_continuation + % NetOffset = 0 - 32 = -32 (0xFFFFFFE0) + Dump = + << + " 0: ffffffff .insn 4, 0xffffffff\n" + " 4: ffffffff .insn 4, 0xffffffff\n" + " 8: ffffffff .insn 4, 0xffffffff\n" + " c: ffffffff .insn 4, 0xffffffff\n" + " 10: ffffffff .insn 4, 0xffffffff\n" + " 14: ffffffff .insn 4, 0xffffffff\n" + " 18: ffffffff .insn 4, 0xffffffff\n" + " 1c: ffffffff .insn 4, 0xffffffff\n" + " 20: 00000f97 auipc t6,0x0\n" + " 24: 1f81 addi t6,t6,-32 # 0x0\n" + " 26: 9faa add t6,t6,a0\n" + " 28: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ]. + +%% Mimic part of add.beam +add_beam_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 3), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:move_to_vm_register(State2, 16#9f, {x_reg, 1}), + State4 = ?BACKEND:move_to_vm_register(State3, 16#8f, {x_reg, 0}), + State5 = ?BACKEND:call_only_or_schedule_next(State4, 2), + State6 = ?BACKEND:add_label(State5, 2), + {State7, ResultReg} = ?BACKEND:call_primitive(State6, ?PRIM_ALLOCATE, [ + ctx, jit_state, 1, 0, 1 + ]), + State8 = ?BACKEND:if_block(State7, {'(bool)', {free, ResultReg}, '==', false}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_HANDLE_ERROR, [ctx, jit_state, offset]) + end), + State9 = ?BACKEND:move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}), + State10 = ?BACKEND:call_or_schedule_next(State9, 3), + State11 = ?BACKEND:add_label(State10, 3), + State12 = ?BACKEND:call_primitive_last(State11, ?PRIM_RETURN, [ + ctx, jit_state + ]), + % OP_INT_CALL_END + State13 = ?BACKEND:add_label(State12, 0), + State14 = ?BACKEND:call_primitive_last(State13, 1, [ctx, jit_state]), + State15 = ?BACKEND:update_branches(State14), + Stream = ?BACKEND:stream(State15), + Dump = + << + % jump table (new 8-byte format) + " 0: 00000697 auipc a3,0x0\n" + " 4: 0e068067 jr 224(a3) # 0xe0\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 01868067 jr 24(a3) # 0x20\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 04868067 jr 72(a3) # 0x58\n" + " 18: 00000697 auipc a3,0x0\n" + " 1c: 0c268067 jr 194(a3) # 0xda\n" + % label 1 + % {move,{integer,9},{x,1}}. + " 20: 09f00f93 li t6,159\n" + " 24: 01f52e23 sw t6,28(a0)\n" + % {move,{integer,8},{x,0}} + " 28: 08f00f93 li t6,143\n" + " 2c: 01f52c23 sw t6,24(a0)\n" + % {call_only,2,{f,2}}. + " 30: 0085af83 lw t6,8(a1)\n" + " 34: 1ffd addi t6,t6,-1\n" + " 36: 01f5a423 sw t6,8(a1)\n" + " 3a: 000f8663 beqz t6,0x46\n" + " 3e: a829 j 0x58\n" + " 40: 0001 nop\n" + " 42: 00000013 nop\n" + " 46: 00000f97 auipc t6,0x0\n" + " 4a: 0fc9 addi t6,t6,18 # 0x58\n" + " 4c: 0001 nop\n" + " 4e: 01f5a223 sw t6,4(a1)\n" + " 52: 00862f83 lw t6,8(a2)\n" + " 56: 8f82 jr t6\n" + % label 2 + % {allocate,1,1}. + " 58: 01462f83 lw t6,20(a2)\n" + " 5c: 1141 addi sp,sp,-16\n" + " 5e: c006 sw ra,0(sp)\n" + " 60: c22a sw a0,4(sp)\n" + " 62: c42e sw a1,8(sp)\n" + " 64: c632 sw a2,12(sp)\n" + " 66: 4605 li a2,1\n" + " 68: 4681 li a3,0\n" + " 6a: 4705 li a4,1\n" + " 6c: 9f82 jalr t6\n" + " 6e: 8faa mv t6,a0\n" + " 70: 4082 lw ra,0(sp)\n" + " 72: 4512 lw a0,4(sp)\n" + " 74: 45a2 lw a1,8(sp)\n" + " 76: 4632 lw a2,12(sp)\n" + " 78: 0141 addi sp,sp,16\n" + " 7a: 01ff9f13 slli t5,t6,0x1f\n" + " 7e: 000f4763 bltz t5,0x8c\n" + " 82: 01862f83 lw t6,24(a2)\n" + " 86: 08600613 li a2,134\n" + " 8a: 8f82 jr t6\n" + % {init_yregs,{list,[{y,0}]}}. + %% move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}), + " 8c: 03b00f13 li t5,59\n" + " 90: 01452f83 lw t6,20(a0)\n" + " 94: 01efa023 sw t5,0(t6)\n" + % {call,1,{f,3}} + %% call_or_schedule_next(State9, 3), + " 98: 0005af03 lw t5,0(a1)\n" + " 9c: 000f2f03 lw t5,0(t5)\n" + " a0: 0f62 slli t5,t5,0x18\n" + " a2: 36800f93 li t6,872\n" + " a6: 00000013 nop\n" + " aa: 01ff6f33 or t5,t5,t6\n" + " ae: 05e52e23 sw t5,92(a0)\n" + " b2: 0085af83 lw t6,8(a1)\n" + " b6: 1ffd addi t6,t6,-1\n" + " b8: 01f5a423 sw t6,8(a1)\n" + " bc: 000f8663 beqz t6,0xc8\n" + " c0: a829 j 0xda\n" + " c2: 0001 nop\n" + " c4: 00000013 nop\n" + " c8: 00000f97 auipc t6,0x0\n" + " cc: 0fc9 addi t6,t6,18 # 0xda\n" + " ce: 0001 nop\n" + " d0: 01f5a223 sw t6,4(a1)\n" + " d4: 00862f83 lw t6,8(a2)\n" + " d8: 8f82 jr t6\n" + %% (continuation) + % label 3 + " da: 00462f83 lw t6,4(a2)\n" + " de: 8f82 jr t6\n" + % label 0 + " e0: 00462f83 lw t6,4(a2)\n" + " e4: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +dump_to_bin(Dump) -> + dump_to_bin0(Dump, addr, []). + +-define(IS_HEX_DIGIT(C), + ((C >= $0 andalso C =< $9) orelse (C >= $a andalso C =< $f) orelse (C >= $A andalso C =< $F)) +). + +dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> + dump_to_bin0(Tail, hex, Acc); +dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\n, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\s, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\t, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\s, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, hex, Acc); +dump_to_bin0(<<$\t, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, hex, Acc); +%% Handle RISC-V 32-bit instructions (8 consecutive hex digits) +dump_to_bin0(<>, hex, Acc) when + (Sp =:= $\t orelse Sp =:= $\s) andalso + ?IS_HEX_DIGIT(H1) andalso + ?IS_HEX_DIGIT(H2) andalso + ?IS_HEX_DIGIT(H3) andalso + ?IS_HEX_DIGIT(H4) andalso + ?IS_HEX_DIGIT(H5) andalso + ?IS_HEX_DIGIT(H6) andalso + ?IS_HEX_DIGIT(H7) andalso + ?IS_HEX_DIGIT(H8) +-> + %% RISC-V instructions are 32-bit little-endian + Instr = list_to_integer([H1, H2, H3, H4, H5, H6, H7, H8], 16), + dump_to_bin0(Rest, instr, [<> | Acc]); +%% Handle 32-bits undefined instruction (ARM format with space: "1234 5678") +dump_to_bin0(<>, hex, Acc) when + (Sp =:= $\t orelse Sp =:= $\s) andalso + ?IS_HEX_DIGIT(H1) andalso + ?IS_HEX_DIGIT(H2) andalso + ?IS_HEX_DIGIT(H3) andalso + ?IS_HEX_DIGIT(H4) andalso + ?IS_HEX_DIGIT(H5) andalso + ?IS_HEX_DIGIT(H6) andalso + ?IS_HEX_DIGIT(H7) andalso + ?IS_HEX_DIGIT(H8) +-> + InstrA = list_to_integer([H1, H2, H3, H4], 16), + InstrB = list_to_integer([H5, H6, H7, H8], 16), + dump_to_bin0(Rest, instr, [<>, <> | Acc]); +%% Handle 16-bit ARM32 Thumb instructions (4 hex digits) +dump_to_bin0(<>, hex, Acc) when + (Sp =:= $\t orelse Sp =:= $\s) andalso + ?IS_HEX_DIGIT(H1) andalso + ?IS_HEX_DIGIT(H2) andalso + ?IS_HEX_DIGIT(H3) andalso + ?IS_HEX_DIGIT(H4) +-> + %% Parse 4 hex digits (ARM32 Thumb 16-bit instruction) + Instr = list_to_integer([H1, H2, H3, H4], 16), + dump_to_bin0(Rest, instr, [<> | Acc]); +dump_to_bin0(<<$\n, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\n, Tail/binary>>, instr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<_Other, Tail/binary>>, instr, Acc) -> + dump_to_bin0(Tail, instr, Acc); +dump_to_bin0(<<>>, _, Acc) -> + list_to_binary(lists:reverse(Acc)). diff --git a/tests/libs/jit/jit_tests.erl b/tests/libs/jit/jit_tests.erl index 72a356ae3c..5b1dfdae3a 100644 --- a/tests/libs/jit/jit_tests.erl +++ b/tests/libs/jit/jit_tests.erl @@ -74,6 +74,7 @@ compile_minimal_x86_64_test() -> fun(_) -> undefined end, fun(_) -> undefined end, fun(_) -> any end, + fun(_) -> undefined end, jit_x86_64, Stream2 ), @@ -117,10 +118,11 @@ term_to_int_verify_is_match_state_typed_optimization_x86_64_test() -> AtomResolver = jit_precompile:atom_resolver(?ATU8_CHUNK_1), LiteralResolver = fun(_) -> test_literal end, TypeResolver = jit_precompile:type_resolver(?TYPE_CHUNK_1), + ImportResolver = fun(_) -> test_function end, % Compile with typed register support {_LabelsCount, Stream3} = jit:compile( - ?CODE_CHUNK_1, AtomResolver, LiteralResolver, TypeResolver, jit_x86_64, Stream2 + ?CODE_CHUNK_1, AtomResolver, LiteralResolver, TypeResolver, ImportResolver, jit_x86_64, Stream2 ), CompiledCode = jit_x86_64:stream(Stream3), @@ -194,10 +196,11 @@ verify_is_function_typed_optimization_x86_64_test() -> AtomResolver = jit_precompile:atom_resolver(?ATU8_CHUNK_2), LiteralResolver = fun(_) -> test_literal end, TypeResolver = jit_precompile:type_resolver(?TYPE_CHUNK_2), + ImportResolver = fun(_) -> test_function end, % Compile with typed register support {_LabelsCount, Stream3} = jit:compile( - ?CODE_CHUNK_2, AtomResolver, LiteralResolver, TypeResolver, jit_x86_64, Stream2 + ?CODE_CHUNK_2, AtomResolver, LiteralResolver, TypeResolver, ImportResolver, jit_x86_64, Stream2 ), CompiledCode = jit_x86_64:stream(Stream3), diff --git a/tests/libs/jit/jit_tests_common.erl b/tests/libs/jit/jit_tests_common.erl index cfabfcf15f..cf989e746d 100644 --- a/tests/libs/jit/jit_tests_common.erl +++ b/tests/libs/jit/jit_tests_common.erl @@ -77,6 +77,8 @@ asm(Arch, Bin, Str) -> find_binutils(Arch) -> ArchStr = atom_to_list(Arch), BinutilsList = [ + {ArchStr ++ "-esp-elf-as", ArchStr ++ "-esp-elf-objdump"}, + {ArchStr ++ "-unknown-elf-as", ArchStr ++ "-unknown-elf-objdump"}, {ArchStr ++ "-elf-as", ArchStr ++ "-elf-objdump"}, {ArchStr ++ "-none-eabi-as", ArchStr ++ "-none-eabi-objdump"}, {ArchStr ++ "-linux-gnu-as", ArchStr ++ "-linux-gnu-objdump"} @@ -104,6 +106,8 @@ get_asm_header(arm) -> get_asm_header(aarch64) -> ".text\n"; get_asm_header(x86_64) -> + ".text\n"; +get_asm_header(riscv32) -> ".text\n". %% Get architecture-specific assembler flags @@ -113,7 +117,9 @@ get_as_flags(arm) -> get_as_flags(aarch64) -> ""; get_as_flags(x86_64) -> - "--64". + "--64"; +get_as_flags(riscv32) -> + "-march=rv32imac". %% Parse objdump output lines and extract binary data -spec asm_lines([binary()], binary(), atom()) -> binary(). diff --git a/tests/libs/jit/jit_x86_64_asm_tests.erl b/tests/libs/jit/jit_x86_64_asm_tests.erl index 797ed9077c..a1c9bb949f 100644 --- a/tests/libs/jit/jit_x86_64_asm_tests.erl +++ b/tests/libs/jit/jit_x86_64_asm_tests.erl @@ -866,6 +866,19 @@ jge_rel8_test_() -> ) ]. +jle_test_() -> + [ + ?_assertAsmEqual(<<16#7e, 16#f4>>, "jle .-10", jit_x86_64_asm:jle(-10)) + ]. + +jle_rel8_test_() -> + [ + ?_assertEqual( + {1, jit_tests_common:asm(x86_64, <<16#7e, 16#05>>, "jle .+7")}, + jit_x86_64_asm:jle_rel8(7) + ) + ]. + jmp_rel8_test_() -> [ ?_assertEqual( @@ -914,9 +927,50 @@ andb_test_() -> subq_test_() -> [ + % Register-register forms ?_assertAsmEqual(<<16#48, 16#29, 16#c1>>, "subq %rax, %rcx", jit_x86_64_asm:subq(rax, rcx)), ?_assertAsmEqual(<<16#49, 16#29, 16#c2>>, "subq %rax, %r10", jit_x86_64_asm:subq(rax, r10)), - ?_assertAsmEqual(<<16#4c, 16#29, 16#c1>>, "subq %r8, %rcx", jit_x86_64_asm:subq(r8, rcx)) + ?_assertAsmEqual(<<16#4c, 16#29, 16#c1>>, "subq %r8, %rcx", jit_x86_64_asm:subq(r8, rcx)), + % 8-bit immediate forms + ?_assertAsmEqual( + <<16#48, 16#83, 16#e8, 16#0a>>, "subq $10, %rax", jit_x86_64_asm:subq(10, rax) + ), + ?_assertAsmEqual( + <<16#48, 16#83, 16#e9, 16#05>>, "subq $5, %rcx", jit_x86_64_asm:subq(5, rcx) + ), + ?_assertAsmEqual( + <<16#49, 16#83, 16#ea, 16#08>>, "subq $8, %r10", jit_x86_64_asm:subq(8, r10) + ), + ?_assertAsmEqual( + <<16#49, 16#83, 16#eb, 16#7f>>, "subq $127, %r11", jit_x86_64_asm:subq(127, r11) + ), + % 32-bit immediate, special short form for %rax + ?_assertAsmEqual( + <<16#48, 16#2d, 16#00, 16#01, 16#00, 16#00>>, + "subq $256, %rax", + jit_x86_64_asm:subq(256, rax) + ), + ?_assertAsmEqual( + <<16#48, 16#2d, 16#00, 16#04, 16#00, 16#00>>, + "subq $1024, %rax", + jit_x86_64_asm:subq(1024, rax) + ), + % 32-bit immediate forms for other registers + ?_assertAsmEqual( + <<16#48, 16#81, 16#e9, 16#00, 16#01, 16#00, 16#00>>, + "subq $256, %rcx", + jit_x86_64_asm:subq(256, rcx) + ), + ?_assertAsmEqual( + <<16#49, 16#81, 16#ea, 16#00, 16#04, 16#00, 16#00>>, + "subq $1024, %r10", + jit_x86_64_asm:subq(1024, r10) + ), + ?_assertAsmEqual( + <<16#49, 16#81, 16#eb, 16#00, 16#10, 16#00, 16#00>>, + "subq $4096, %r11", + jit_x86_64_asm:subq(4096, r11) + ) ]. decl_test_() -> diff --git a/tests/libs/jit/jit_x86_64_tests.erl b/tests/libs/jit/jit_x86_64_tests.erl index 9aa86b6427..87ca4cefae 100644 --- a/tests/libs/jit/jit_x86_64_tests.erl +++ b/tests/libs/jit/jit_x86_64_tests.erl @@ -789,6 +789,166 @@ if_block_test_() -> >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {100, '<', RegA}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 8: 48 83 f8 64 cmp $0x64,%rax\n" + " c: 7e 04 jle 0x12\n" + " e: 49 83 c3 02 add $0x2,%r11" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {100, '<', {free, RegA}}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 8: 48 83 f8 64 cmp $0x64,%rax\n" + " c: 7e 04 jle 0x12\n" + " e: 49 83 c3 02 add $0x2,%r11" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 100}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 8: 48 83 f8 64 cmp $0x64,%rax\n" + " c: 7d 04 jge 0x12\n" + " e: 49 83 c3 02 add $0x2,%r11" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '<', 100}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 8: 48 83 f8 64 cmp $0x64,%rax\n" + " c: 7d 04 jge 0x12\n" + " e: 49 83 c3 02 add $0x2,%r11" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 16#100000000}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 8: 49 bb 00 00 00 00 01 movabs $0x100000000,%r11\n" + " f: 00 00 00 \n" + " 12: 4c 39 d8 cmp %r11,%rax\n" + " 15: 7d 04 jge 0x1b\n" + " 17: 49 83 c3 02 add $0x2,%r11" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '<', 16#100000000}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 8: 49 bb 00 00 00 00 01 movabs $0x100000000,%r11\n" + " f: 00 00 00 \n" + " 12: 4c 39 d8 cmp %r11,%rax\n" + " 15: 7d 04 jge 0x1b\n" + " 17: 49 83 c3 02 add $0x2,%r11" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {16#100000000, '<', RegA}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 8: 49 bb 00 00 00 00 01 movabs $0x100000000,%r11\n" + " f: 00 00 00 \n" + " 12: 4c 39 d8 cmp %r11,%rax\n" + " 15: 7e 04 jle 0x1b\n" + " 17: 49 83 c3 02 add $0x2,%r11" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {16#100000000, '<', {free, RegA}}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 8: 49 bb 00 00 00 00 01 movabs $0x100000000,%r11\n" + " f: 00 00 00 \n" + " 12: 4c 39 d8 cmp %r11,%rax\n" + " 15: 7e 04 jle 0x1b\n" + " 17: 49 83 c3 02 add $0x2,%r11" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) end) ] end}. @@ -893,6 +1053,37 @@ call_only_or_schedule_next_and_label_relocation_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +call_only_or_schedule_next_known_label_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:add_label(State2, 2, 16#2a), + State4 = ?BACKEND:call_only_or_schedule_next(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), + % OP_INT_CALL_END + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), + Dump = + << + " 0: e9 2a 00 00 00 jmpq 0x2f\n" + " 5: e9 05 00 00 00 jmpq 0xf\n" + " a: e9 1b 00 00 00 jmpq 0x2a\n" + " f: ff 4e 10 decl 0x10(%rsi)\n" + " 12: 74 05 je 0x19\n" + " 14: e9 11 00 00 00 jmpq 0x2a\n" + " 19: 48 8d 05 0a 00 00 00 lea 0xa(%rip),%rax # 0x2a\n" + " 20: 48 89 46 08 mov %rax,0x8(%rsi)\n" + " 24: 48 8b 42 10 mov 0x10(%rdx),%rax\n" + " 28: ff e0 jmpq *%rax\n" + " 2a: 48 8b 02 mov (%rdx),%rax\n" + " 2d: ff e0 jmpq *%rax\n" + " 2f: 48 8b 42 08 mov 0x8(%rdx),%rax\n" + " 33: ff e0 jmpq *%rax\n" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_bif_with_large_literal_integer_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, FuncPtr} = ?BACKEND:call_primitive(State0, 8, [jit_state, 2]), @@ -957,7 +1148,7 @@ call_bif_with_large_literal_integer_test() -> get_list_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:and_(State1, Reg, -4), + {State2, Reg} = ?BACKEND:and_(State1, {free, Reg}, -4), State3 = ?BACKEND:move_array_element(State2, Reg, 1, {y_reg, 1}), State4 = ?BACKEND:move_array_element(State3, Reg, 0, {y_reg, 0}), State5 = ?BACKEND:free_native_registers(State4, [Reg]), @@ -977,17 +1168,18 @@ get_list_test() -> is_integer_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, Arg1 = {x_reg, 0}, - {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), - State2 = ?BACKEND:if_block( - State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) -> + {State2, Reg} = ?BACKEND:move_to_native_register(State1, Arg1), + State3 = ?BACKEND:if_block( + State2, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) -> MSt1 = ?BACKEND:if_block( MSt0, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> ?BACKEND:jump_to_label(BSt0, Label) end ), - MSt2 = ?BACKEND:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt2, Reg} = ?BACKEND:and_(MSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt3 = ?BACKEND:move_array_element(MSt2, Reg, 0, Reg), ?BACKEND:if_block( MSt3, @@ -998,29 +1190,31 @@ is_integer_test() -> ) end ), - State3 = ?BACKEND:free_native_registers(State2, [Reg]), - ?BACKEND:assert_all_native_free(State3), - Offset = ?BACKEND:offset(State3), - State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + Offset = ?BACKEND:offset(State4), + State5 = ?BACKEND:add_label(State4, Label, Offset + 16#100), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 49 89 c3 mov %rax,%r11\n" - " 7: 41 80 e3 0f and $0xf,%r11b\n" - " b: 41 80 fb 0f cmp $0xf,%r11b\n" - " f: 74 25 je 0x36\n" - " 11: 49 89 c3 mov %rax,%r11\n" - " 14: 41 80 e3 03 and $0x3,%r11b\n" - " 18: 41 80 fb 02 cmp $0x2,%r11b\n" - " 1c: 74 05 je 0x23\n" - " 1e: e9 13 01 00 00 jmpq 0x136\n" - " 23: 48 83 e0 fc and $0xfffffffffffffffc,%rax\n" - " 27: 48 8b 00 mov (%rax),%rax\n" - " 2a: 24 3f and $0x3f,%al\n" - " 2c: 80 f8 08 cmp $0x8,%al\n" - " 2f: 74 05 je 0x36\n" - " 31: e9 00 01 00 00 jmpq 0x136" + " 0: e9 ff ff ff ff jmpq 0x4\n" + " 5: e9 36 01 00 00 jmpq 0x140\n" + " a: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " e: 49 89 c3 mov %rax,%r11\n" + " 11: 41 80 e3 0f and $0xf,%r11b\n" + " 15: 41 80 fb 0f cmp $0xf,%r11b\n" + " 19: 74 25 je 0x40\n" + " 1b: 49 89 c3 mov %rax,%r11\n" + " 1e: 41 80 e3 03 and $0x3,%r11b\n" + " 22: 41 80 fb 02 cmp $0x2,%r11b\n" + " 26: 74 05 je 0x2d\n" + " 28: e9 13 01 00 00 jmpq 0x140\n" + " 2d: 48 83 e0 fc and $0xfffffffffffffffc,%rax\n" + " 31: 48 8b 00 mov (%rax),%rax\n" + " 34: 24 3f and $0x3f,%al\n" + " 36: 80 f8 08 cmp $0x8,%al\n" + " 39: 74 05 je 0x40\n" + " 3b: e9 00 01 00 00 jmpq 0x140" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1031,15 +1225,16 @@ cond_jump_to_label(Cond, Label, MMod, MSt0) -> is_number_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, Arg1 = {x_reg, 0}, - {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), - State2 = ?BACKEND:if_block( - State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) -> + {State2, Reg} = ?BACKEND:move_to_native_register(State1, Arg1), + State3 = ?BACKEND:if_block( + State2, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) -> BSt1 = cond_jump_to_label( {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0 ), - BSt2 = ?BACKEND:and_(BSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {BSt2, Reg} = ?BACKEND:and_(BSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), BSt3 = ?BACKEND:move_array_element(BSt2, Reg, 0, Reg), cond_jump_to_label( {'and', [ @@ -1052,58 +1247,63 @@ is_number_test() -> ) end ), - State3 = ?BACKEND:free_native_registers(State2, [Reg]), - ?BACKEND:assert_all_native_free(State3), - Offset = ?BACKEND:offset(State3), - State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + Offset = ?BACKEND:offset(State4), + State5 = ?BACKEND:add_label(State4, Label, Offset + 16#100), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 49 89 c3 mov %rax,%r11\n" - " 7: 41 80 e3 0f and $0xf,%r11b\n" - " b: 41 80 fb 0f cmp $0xf,%r11b\n" - " f: 74 32 je 0x43\n" - " 11: 49 89 c3 mov %rax,%r11\n" - " 14: 41 80 e3 03 and $0x3,%r11b\n" - " 18: 41 80 fb 02 cmp $0x2,%r11b\n" - " 1c: 74 05 je 0x23\n" - " 1e: e9 20 01 00 00 jmpq 0x143\n" - " 23: 48 83 e0 fc and $0xfffffffffffffffc,%rax\n" - " 27: 48 8b 00 mov (%rax),%rax\n" - " 2a: 49 89 c3 mov %rax,%r11\n" - " 2d: 41 80 e3 3f and $0x3f,%r11b\n" - " 31: 41 80 fb 08 cmp $0x8,%r11b\n" - " 35: 74 0c je 0x43\n" - " 37: 24 3f and $0x3f,%al\n" - " 39: 80 f8 18 cmp $0x18,%al\n" - " 3c: 74 05 je 0x43\n" - " 3e: e9 00 01 00 00 jmpq 0x143" + " 0: e9 ff ff ff ff jmpq 0x4\n" + " 5: e9 43 01 00 00 jmpq 0x14d\n" + " a: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " e: 49 89 c3 mov %rax,%r11\n" + " 11: 41 80 e3 0f and $0xf,%r11b\n" + " 15: 41 80 fb 0f cmp $0xf,%r11b\n" + " 19: 74 32 je 0x4d\n" + " 1b: 49 89 c3 mov %rax,%r11\n" + " 1e: 41 80 e3 03 and $0x3,%r11b\n" + " 22: 41 80 fb 02 cmp $0x2,%r11b\n" + " 26: 74 05 je 0x2d\n" + " 28: e9 20 01 00 00 jmpq 0x14d\n" + " 2d: 48 83 e0 fc and $0xfffffffffffffffc,%rax\n" + " 31: 48 8b 00 mov (%rax),%rax\n" + " 34: 49 89 c3 mov %rax,%r11\n" + " 37: 41 80 e3 3f and $0x3f,%r11b\n" + " 3b: 41 80 fb 08 cmp $0x8,%r11b\n" + " 3f: 74 0c je 0x4d\n" + " 41: 24 3f and $0x3f,%al\n" + " 43: 80 f8 18 cmp $0x18,%al\n" + " 46: 74 05 je 0x4d\n" + " 48: e9 00 01 00 00 jmpq 0x14d" >>, ?assertEqual(dump_to_bin(Dump), Stream). is_boolean_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, - {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> ?BACKEND:jump_to_label(BSt1, Label) end) end), - State3 = ?BACKEND:free_native_registers(State2, [Reg]), - ?BACKEND:assert_all_native_free(State3), - Offset = ?BACKEND:offset(State3), - State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + Offset = ?BACKEND:offset(State4), + State5 = ?BACKEND:add_label(State4, Label, Offset + 16#100), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 48 83 f8 4b cmp $0x4b,%rax\n" - " 8: 74 0b je 0x15\n" - " a: 48 83 f8 0b cmp $0xb,%rax\n" - " e: 74 05 je 0x15\n" - " 10: e9 00 01 00 00 jmpq 0x115\n" + " 0: e9 ff ff ff ff jmpq 0x4\n" + " 5: e9 15 01 00 00 jmpq 0x11f\n" + " a: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " e: 48 83 f8 4b cmp $0x4b,%rax\n" + " 12: 74 0b je 0x1f\n" + " 14: 48 83 f8 0b cmp $0xb,%rax\n" + " 18: 74 05 je 0x1f\n" + " 1a: e9 00 01 00 00 jmpq 0x11f\n" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1148,7 +1348,7 @@ call_fun_test() -> ]) end ), - State5 = ?BACKEND:and_(State4, RegCopy, ?TERM_PRIMARY_CLEAR_MASK), + {State5, RegCopy} = ?BACKEND:and_(State4, {free, RegCopy}, ?TERM_PRIMARY_CLEAR_MASK), State6 = ?BACKEND:move_array_element(State5, RegCopy, 0, RegCopy), State7 = ?BACKEND:if_block( State6, {RegCopy, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) -> @@ -1592,6 +1792,62 @@ jump_to_continuation_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +%% Test set_continuation_to_label with unknown label +wait_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + State1 = ?BACKEND:jump_table(State0, 5), + State2 = ?BACKEND:add_label(State1, 1), + Label = 2, + State3 = ?BACKEND:set_continuation_to_label(State2, Label), + State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + State5 = ?BACKEND:add_label(State4, Label, 16#100), + State6 = ?BACKEND:update_branches(State5), + + Stream = ?BACKEND:stream(State6), + Dump = + << + " 0: e9 ff ff ff ff jmpq 0x4\n" + " 5: e9 14 00 00 00 jmpq 0x1e\n" + " a: e9 f1 00 00 00 jmpq 0x100\n" + " f: e9 ff ff ff ff jmpq 0x13\n" + " 14: e9 ff ff ff ff jmpq 0x18\n" + " 19: e9 ff ff ff ff jmpq 0x1d\n" + " 1e: 48 8d 05 db 00 00 00 lea 0xdb(%rip),%rax\n" + " 25: 48 89 46 08 mov %rax,0x8(%rsi)\n" + " 29: 48 8b 82 e8 00 00 00 mov 0xe8(%rdx),%rax\n" + " 30: ff e0 jmpq *%rax" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test set_continuation_to_label with known label +wait_known_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + State1 = ?BACKEND:jump_table(State0, 5), + State2 = ?BACKEND:add_label(State1, 1), + Label = 2, + State3 = ?BACKEND:add_label(State2, Label, 16#100), + State4 = ?BACKEND:set_continuation_to_label(State3, Label), + State5 = ?BACKEND:call_primitive_last(State4, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + State6 = ?BACKEND:update_branches(State5), + + Stream = ?BACKEND:stream(State6), + Dump = + << + " 0: e9 ff ff ff ff jmpq 0x4\n" + " 5: e9 14 00 00 00 jmpq 0x1e\n" + " a: e9 f1 00 00 00 jmpq 0x100\n" + " f: e9 ff ff ff ff jmpq 0x13\n" + " 14: e9 ff ff ff ff jmpq 0x18\n" + " 19: e9 ff ff ff ff jmpq 0x1d\n" + " 1e: 48 8d 05 db 00 00 00 lea 0xdb(%rip),%rax\n" + " 25: 48 89 46 08 mov %rax,0x8(%rsi)\n" + " 29: 48 8b 82 e8 00 00 00 mov 0xe8(%rdx),%rax\n" + " 30: ff e0 jmpq *%rax" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + dump_to_bin(Dump) -> dump_to_bin0(Dump, addr, []). diff --git a/tests/libs/jit/tests.erl b/tests/libs/jit/tests.erl index ff272f6eac..2d130cad03 100644 --- a/tests/libs/jit/tests.erl +++ b/tests/libs/jit/tests.erl @@ -31,6 +31,8 @@ start() -> jit_aarch64_asm_tests, jit_armv6m_tests, jit_armv6m_asm_tests, + jit_riscv32_tests, + jit_riscv32_asm_tests, jit_x86_64_tests, jit_x86_64_asm_tests ]). diff --git a/tests/test-jit_stream_flash.c b/tests/test-jit_stream_flash.c new file mode 100644 index 0000000000..d35b565584 --- /dev/null +++ b/tests/test-jit_stream_flash.c @@ -0,0 +1,858 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2025 by Paul Guyot + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +#include +#include +#include +#include +#include +#include + +#include "avmpack.h" +#include "context.h" +#include "globalcontext.h" +#include "jit_stream_flash.h" +#include "jit_stream_flash_platform.h" +#include "scheduler.h" +#include "synclist.h" +#include "term.h" +#include "utils.h" + +// Mock flash memory - simulate 64KB of flash +#define MOCK_FLASH_SIZE (64 * 1024) +// Align to sector boundary for proper flash simulation +static uint8_t mock_flash[MOCK_FLASH_SIZE] __attribute__((aligned(FLASH_SECTOR_SIZE))); + +// JIT entry header (copied from jit_stream_flash.c for testing) +struct JITEntry +{ + uint16_t magic; + uint16_t version; + uint32_t code; + uint32_t labels; + uint32_t size; +}; + +// CRC32 for verification (copied from jit_stream_flash.c) +static uint32_t crc32(const uint8_t *data, size_t len) +{ + uint32_t crc = 0xFFFFFFFF; + for (size_t i = 0; i < len; i++) { + crc ^= data[i]; + for (int j = 0; j < 8; j++) { + crc = (crc >> 1) ^ (0xEDB88320 & -(crc & 1)); + } + } + return ~crc; +} + +// Platform context (opaque) +struct JSFlashPlatformContext +{ + uintptr_t base_addr; +}; + +// Forward declarations of mock platform functions +struct JSFlashPlatformContext *jit_stream_flash_platform_init(void); +void jit_stream_flash_platform_destroy(struct JSFlashPlatformContext *ctx); +bool jit_stream_flash_platform_erase_sector(struct JSFlashPlatformContext *ctx, uintptr_t addr); +bool jit_stream_flash_platform_write_page(struct JSFlashPlatformContext *ctx, uintptr_t addr, const uint8_t *data); +uintptr_t jit_stream_flash_platform_ptr_to_executable(uintptr_t addr); + +// Mock platform implementation +struct JSFlashPlatformContext *jit_stream_flash_platform_init(void) +{ + struct JSFlashPlatformContext *ctx = malloc(sizeof(struct JSFlashPlatformContext)); + if (!ctx) { + return NULL; + } + + // DO NOT erase flash here - it should persist across multiple stream creations + // Flash initialization happens once at test startup + + ctx->base_addr = (uintptr_t) mock_flash; + return ctx; +} + +void jit_stream_flash_platform_destroy(struct JSFlashPlatformContext *ctx) +{ + free(ctx); +} + +bool jit_stream_flash_platform_erase_sector(struct JSFlashPlatformContext *ctx, uintptr_t addr) +{ + assert(ctx); + + // Check alignment + if ((addr - ctx->base_addr) % FLASH_SECTOR_SIZE != 0) { + fprintf(stderr, "Erase address 0x%lx not sector-aligned\n", (unsigned long) addr); + return false; + } + + size_t offset = addr - ctx->base_addr; + if (offset >= MOCK_FLASH_SIZE) { + fprintf(stderr, "Erase address 0x%lx out of bounds\n", (unsigned long) addr); + return false; + } + + // Erase the sector + memset(&mock_flash[offset], 0xFF, FLASH_SECTOR_SIZE); + + return true; +} + +bool jit_stream_flash_platform_write_page(struct JSFlashPlatformContext *ctx, uintptr_t addr, const uint8_t *data) +{ + assert(ctx); + + // Check alignment + if ((addr - ctx->base_addr) % FLASH_PAGE_SIZE != 0) { + fprintf(stderr, "Write address 0x%lx not page-aligned (base_addr=0x%lx, offset=0x%lx)\n", + (unsigned long) addr, (unsigned long) ctx->base_addr, + (unsigned long) (addr - ctx->base_addr)); + return false; + } + + size_t offset = addr - ctx->base_addr; + if (offset + FLASH_PAGE_SIZE > MOCK_FLASH_SIZE) { + fprintf(stderr, "Write at offset 0x%zx would exceed flash bounds\n", offset); + return false; + } + + // Validate write - flash can only transition bits from 1→0 without erase + for (size_t i = 0; i < FLASH_PAGE_SIZE; i++) { + uint8_t current = mock_flash[offset + i]; + uint8_t new_val = data[i]; + + // Check if we're trying to set any bits from 0→1 + if ((~current & new_val) != 0) { + fprintf(stderr, "FLASH VALIDATION ERROR at offset 0x%zx:\n", offset + i); + fprintf(stderr, " Attempting to set bits 0→1 without erase\n"); + fprintf(stderr, " Current: 0x%02x, New: 0x%02x, Invalid bits: 0x%02x\n", + current, new_val, ~current & new_val); + return false; + } + } + + // Write the page + memcpy(&mock_flash[offset], data, FLASH_PAGE_SIZE); + + return true; +} + +uintptr_t jit_stream_flash_platform_ptr_to_executable(uintptr_t addr) +{ + // For host testing, no conversion needed + return addr; +} + +uintptr_t jit_stream_flash_platform_executable_to_ptr(uintptr_t addr) +{ + // For host testing, no conversion needed + return addr; +} + +// Create a minimal AVM pack for testing +static uint8_t create_minimal_avmpack(void) +{ + // Create a minimal AVM pack with an "end" section + uint8_t *pack = mock_flash + 0x100; // Place pack at offset 0x100 + + // AVM Pack header: "#!/usr/bin/env AtomVM\n" (23 bytes) + padding to 24 bytes + const char header_str[] = "#!/usr/bin/env AtomVM\n"; + memcpy(pack, header_str, 23); + pack[23] = 0; // Padding to align to 4 bytes + + // Section header for "end" section + uint8_t *section = pack + 24; + uint32_t *sec_header = (uint32_t *) section; + + // Section format: size (4) + flags (4) + reserved (4) + name (null-terminated) + // Write size in big-endian (total section size including header) + uint32_t section_size = 4 + 4 + 4 + 4; // size + flags + reserved + "end\0" + sec_header[0] = __builtin_bswap32(section_size); + + // Write flags in big-endian + uint32_t flags = END_OF_FILE; + sec_header[1] = __builtin_bswap32(flags); + + // Write reserved field (seems to be 0) + sec_header[2] = 0; + + // Write null-terminated name starting at offset 12 + memcpy(section + 12, "end", 4); // includes null terminator + + return 0; +} + +// Register AVM pack with global context +static void register_test_avmpack(GlobalContext *glb) +{ + create_minimal_avmpack(); + + // Create AVMPackData + struct ConstAVMPack *pack = malloc(sizeof(struct ConstAVMPack)); + avmpack_data_init(&pack->base, &const_avm_pack_info); + pack->base.data = mock_flash + 0x100; + pack->base.in_use = true; + + // Add to global context's avmpack list + synclist_append(&glb->avmpack_data, &pack->base.avmpack_head); +} + +// Test helper: create binary term with proper GC rooting +static term make_binary_rooted(Context *ctx, const uint8_t *data, size_t len, term *roots, int num_roots) +{ + if (UNLIKELY(memory_ensure_free_with_roots(ctx, term_binary_heap_size(len), num_roots, roots, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { + return term_invalid_term(); + } + return term_from_literal_binary(data, len, &ctx->heap, ctx->global); +} + +// Test helper: get NIF function +typedef term (*nif_function)(Context *ctx, int argc, term argv[]); + +static nif_function get_nif(const char *name) +{ + const struct Nif *nif = jit_stream_flash_get_nif(name); + if (!nif || nif->base.type != NIFFunctionType) { + return NULL; + } + return nif->nif_ptr; +} + +// Test 1: Basic append and flush +void test_basic_append_flush(void) +{ + fprintf(stderr, "\n=== Test: Basic Append and Flush ===\n"); + + // Reset flash for this test + memset(mock_flash, 0x00, MOCK_FLASH_SIZE); + memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); // first page with AVM + + GlobalContext *glb = globalcontext_new(); + Context *ctx = context_new(glb); + + register_test_avmpack(glb); + jit_stream_flash_init(glb); + + nif_function new_nif = get_nif("jit_stream_flash:new/1"); + nif_function append_nif = get_nif("jit_stream_flash:append/2"); + nif_function flush_nif = get_nif("jit_stream_flash:flush/1"); + + assert(new_nif != NULL); + assert(append_nif != NULL); + assert(flush_nif != NULL); + + // Create stream + term argv[3]; + argv[0] = term_from_int(10); // label count + term stream = new_nif(ctx, 1, argv); + assert(term_is_binary(stream)); // Resource is a binary + + // Append some data - root the stream during binary allocation + uint8_t data[100]; + memset(data, 0xAA, sizeof(data)); + argv[0] = stream; + argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1); // Root argv[0] (stream) + stream = append_nif(ctx, 2, argv); // Update stream in case GC moved it + assert(stream == argv[0]); // Should return the stream + + // Flush + argv[0] = stream; + stream = flush_nif(ctx, 1, argv); // Update stream + assert(stream == argv[0]); + + scheduler_terminate(ctx); + globalcontext_destroy(glb); + + fprintf(stderr, "PASS: Basic append and flush\n"); +} + +// Test 2: Multiple appends crossing page boundaries +void test_multiple_appends(void) +{ + fprintf(stderr, "\n=== Test: Multiple Appends Crossing Pages ===\n"); + + // Reset flash for this test + memset(mock_flash, 0x00, MOCK_FLASH_SIZE); + memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); // first page with AVM + + GlobalContext *glb = globalcontext_new(); + Context *ctx = context_new(glb); + + register_test_avmpack(glb); + jit_stream_flash_init(glb); + + nif_function new_nif = get_nif("jit_stream_flash:new/1"); + nif_function append_nif = get_nif("jit_stream_flash:append/2"); + nif_function flush_nif = get_nif("jit_stream_flash:flush/1"); + + // Create stream + term argv[3]; + argv[0] = term_from_int(10); + term stream = new_nif(ctx, 1, argv); + + // Append multiple chunks to cross page boundaries + for (int i = 0; i < 10; i++) { + uint8_t data[100]; + memset(data, 0xA0 + i, sizeof(data)); + argv[0] = stream; + argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1); + stream = append_nif(ctx, 2, argv); + argv[0] = stream; // Update for next iteration + } + + // Flush + argv[0] = stream; + flush_nif(ctx, 1, argv); + + scheduler_terminate(ctx); + globalcontext_destroy(glb); + + fprintf(stderr, "PASS: Multiple appends crossing pages\n"); +} + +// Test 3: Replace operation +void test_replace(void) +{ + fprintf(stderr, "\n=== Test: Replace Operation ===\n"); + + // Reset flash for this test + memset(mock_flash, 0x00, MOCK_FLASH_SIZE); + memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); // first page with AVM + + GlobalContext *glb = globalcontext_new(); + Context *ctx = context_new(glb); + + register_test_avmpack(glb); + jit_stream_flash_init(glb); + + nif_function new_nif = get_nif("jit_stream_flash:new/1"); + nif_function append_nif = get_nif("jit_stream_flash:append/2"); + nif_function replace_nif = get_nif("jit_stream_flash:replace/3"); + nif_function flush_nif = get_nif("jit_stream_flash:flush/1"); + + // Create stream + term argv[3]; + argv[0] = term_from_int(10); + term stream = new_nif(ctx, 1, argv); + + // Append initial data + uint8_t data[200]; + memset(data, 0xAA, sizeof(data)); + argv[0] = stream; + argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1); + stream = append_nif(ctx, 2, argv); // Update stream + + // Replace some bytes in the middle + uint8_t replace_data[] = { 0x11, 0x22, 0x33, 0x44 }; + argv[0] = stream; + argv[1] = term_from_int(50); // offset + argv[2] = make_binary_rooted(ctx, replace_data, sizeof(replace_data), &argv[0], 1); + stream = replace_nif(ctx, 3, argv); // Update stream + + // Flush + argv[0] = stream; + stream = flush_nif(ctx, 1, argv); // Update stream + + scheduler_terminate(ctx); + globalcontext_destroy(glb); + + fprintf(stderr, "PASS: Replace operation\n"); +} + +// Test 4: Second module bug scenario - this is the critical test! +void test_second_module_bug(void) +{ + fprintf(stderr, "\n=== Test: Second Module Bug Scenario (THE ACTUAL BUG) ===\n"); + + // Reset flash for this test + memset(mock_flash, 0x00, MOCK_FLASH_SIZE); + memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); // first page with AVM + + GlobalContext *glb = globalcontext_new(); + Context *ctx = context_new(glb); + + register_test_avmpack(glb); + jit_stream_flash_init(glb); + + nif_function new_nif = get_nif("jit_stream_flash:new/1"); + nif_function append_nif = get_nif("jit_stream_flash:append/2"); + nif_function flush_nif = get_nif("jit_stream_flash:flush/1"); + + // Simulate first module compilation - fill most of first sector + fprintf(stderr, "Simulating first module compilation...\n"); + term argv[3]; + argv[0] = term_from_int(100); + term stream1 = new_nif(ctx, 1, argv); + + // Write 3.5KB of code (leaves 0.5KB in first sector) + for (int i = 0; i < 35; i++) { + uint8_t data[100]; + memset(data, 0xA0 + (i % 16), sizeof(data)); + argv[0] = stream1; + argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1); + stream1 = append_nif(ctx, 2, argv); // Update stream1 + } + + argv[0] = stream1; + + stream1 = flush_nif(ctx, 1, argv); // Update stream1 + + fprintf(stderr, "First module compiled and flushed\n"); + + // Finalize the first module to mark it as valid and prepare for the second + ModuleNativeEntryPoint entry1 = jit_stream_flash_entry_point(ctx, stream1); + Module fake_mod1; + fake_mod1.code = (CodeChunk *) 0x12345678; // Fake code pointer for testing + + globalcontext_set_cache_native_code(glb, &fake_mod1, 1, entry1, 100); + + // Now simulate second module - this should trigger the bug + // The bug was: when creating a new stream, if we're in a new sector + // that hasn't been erased, we need to erase it before writing + fprintf(stderr, "\nSimulating second module compilation...\n"); + argv[0] = term_from_int(50); + term stream2 = new_nif(ctx, 1, argv); + + // Append data - this will cross into next sector + for (int i = 0; i < 20; i++) { + uint8_t data[100]; + memset(data, 0xB0 + (i % 16), sizeof(data)); + argv[0] = stream2; + argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1); + stream2 = append_nif(ctx, 2, argv); // Update stream2 + } + + argv[0] = stream2; + stream2 = flush_nif(ctx, 1, argv); // Update stream2 + + fprintf(stderr, "Second module compiled and flushed successfully!\n"); + + // Finalize the second module + ModuleNativeEntryPoint entry2 = jit_stream_flash_entry_point(ctx, stream2); + Module fake_mod2; + fake_mod2.code = (CodeChunk *) 0x87654321; // Fake code pointer for testing + globalcontext_set_cache_native_code(glb, &fake_mod2, 1, entry2, 50); + + scheduler_terminate(ctx); + globalcontext_destroy(glb); + + fprintf(stderr, "PASS: Second module bug scenario - bug is FIXED!\n"); +} + +void test_magic_0xffff_but_garbage_bug(void) +{ + fprintf(stderr, "\n=== Test: Magic is 0xFFFF but Sector Has Garbage ===\n"); + + // Simulate ESP32 scenario where first JIT entry is at start of sector + // and magic happens to be 0xFFFF but rest has garbage + memset(mock_flash, 0x00, MOCK_FLASH_SIZE); + memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); // first sector with AVM + + // Set magic to 0xFFFF at start of sector 1, but rest is garbage (0x97) + uint16_t *magic_ptr = (uint16_t *) (mock_flash + 0x1000); + *magic_ptr = 0xFFFF; + // Fill rest of sector with garbage + for (size_t i = 2; i < FLASH_SECTOR_SIZE; i++) { + mock_flash[0x1000 + i] = 0x97; + } + + fprintf(stderr, "Sector 1: magic=0xFFFF at offset 0, but rest has garbage (0x97)\n"); + + GlobalContext *glb = globalcontext_new(); + Context *ctx = context_new(glb); + + register_test_avmpack(glb); + jit_stream_flash_init(glb); + + nif_function new_nif = get_nif("jit_stream_flash:new/1"); + nif_function append_nif = get_nif("jit_stream_flash:append/2"); + nif_function flush_nif = get_nif("jit_stream_flash:flush/1"); + + // Compile a small module - should detect garbage and erase + fprintf(stderr, "Compiling module (should detect garbage despite magic=0xFFFF)...\n"); + term argv[3]; + argv[0] = term_from_int(100); + term stream1 = new_nif(ctx, 1, argv); + + // Append some data + uint8_t data[100]; + memset(data, 0xAA, sizeof(data)); + argv[0] = stream1; + argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1); + stream1 = append_nif(ctx, 2, argv); + + argv[0] = stream1; + stream1 = flush_nif(ctx, 1, argv); + fprintf(stderr, "Module compiled successfully!\n"); + + scheduler_terminate(ctx); + globalcontext_destroy(glb); + + fprintf(stderr, "PASS: Magic 0xFFFF but garbage test\n"); +} + +void test_garbage_flash_bug(void) +{ + fprintf(stderr, "\n=== Test: Garbage Flash Bug - JIT Sectors Not Erased After AVM Flash ===\n"); + + // Reset flash for this test + memset(mock_flash, 0x00, MOCK_FLASH_SIZE); + memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); // first page with AVM + + fprintf(stderr, "Flash state: Sector 0 erased (0xFF), sectors 1+ have garbage (0x00)\n"); + + GlobalContext *glb = globalcontext_new(); + Context *ctx = context_new(glb); + + register_test_avmpack(glb); + jit_stream_flash_init(glb); + + nif_function new_nif = get_nif("jit_stream_flash:new/1"); + nif_function append_nif = get_nif("jit_stream_flash:append/2"); + nif_function flush_nif = get_nif("jit_stream_flash:flush/1"); + + // Compile first module that spans two sectors (like benchmark: 8254 bytes) + fprintf(stderr, "Compiling first module spanning sectors 1-2 (8254 bytes)...\n"); + term argv[3]; + argv[0] = term_from_int(100); + term stream1 = new_nif(ctx, 1, argv); + + // Write 82 blocks of 100 bytes = 8200 bytes + 16 byte header = 8216 bytes + for (int i = 0; i < 82; i++) { + uint8_t data[100]; + memset(data, 0xAA, sizeof(data)); + argv[0] = stream1; + argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1); + stream1 = append_nif(ctx, 2, argv); + } + + argv[0] = stream1; + stream1 = flush_nif(ctx, 1, argv); + fprintf(stderr, "First module compiled and flushed\n"); + + ModuleNativeEntryPoint entry1 = jit_stream_flash_entry_point(ctx, stream1); + Module fake_mod1; + fake_mod1.code = (CodeChunk *) 0x12345678; + globalcontext_set_cache_native_code(glb, &fake_mod1, 1, entry1, 100); + + scheduler_terminate(ctx); + globalcontext_destroy(glb); + + fprintf(stderr, "PASS: Garbage flash bug test\n"); +} + +void test_esp32_crash_bug(void) +{ + fprintf(stderr, "\n=== Test: ESP32 Crash Bug - Module Spanning Multiple Sectors ===\n"); + + // Reset flash for this test + memset(mock_flash, 0x00, MOCK_FLASH_SIZE); + memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); // first page with AVM + + GlobalContext *glb = globalcontext_new(); + Context *ctx = context_new(glb); + + register_test_avmpack(glb); + jit_stream_flash_init(glb); + + nif_function new_nif = get_nif("jit_stream_flash:new/1"); + nif_function append_nif = get_nif("jit_stream_flash:append/2"); + nif_function flush_nif = get_nif("jit_stream_flash:flush/1"); + + // Simulate first module like ESP32 benchmark: ~8254 bytes + // This will span sectors 0, 1, and part of sector 2 + fprintf(stderr, "First module: writing ~8254 bytes (spans 3 sectors)...\n"); + term argv[3]; + argv[0] = term_from_int(100); + term stream1 = new_nif(ctx, 1, argv); + + // Write 82 blocks of 100 bytes = 8200 bytes + 16 byte header = 8216 bytes + for (int i = 0; i < 82; i++) { + uint8_t data[100]; + memset(data, 0xAA, sizeof(data)); + argv[0] = stream1; + argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1); + stream1 = append_nif(ctx, 2, argv); + } + + argv[0] = stream1; + stream1 = flush_nif(ctx, 1, argv); + fprintf(stderr, "First module flushed\n"); + + ModuleNativeEntryPoint entry1 = jit_stream_flash_entry_point(ctx, stream1); + Module fake_mod1; + fake_mod1.code = (CodeChunk *) 0x12345678; + globalcontext_set_cache_native_code(glb, &fake_mod1, 1, entry1, 100); + + // Second module like ESP32 pingpong: ~6690 bytes + // This will start in sector 2 (which already has tail of first module!) + fprintf(stderr, "Second module: writing ~6690 bytes...\n"); + argv[0] = term_from_int(50); + term stream2 = new_nif(ctx, 1, argv); + + // Write 67 blocks of 100 bytes = 6700 bytes + for (int i = 0; i < 67; i++) { + uint8_t data[100]; + memset(data, 0xBB, sizeof(data)); + argv[0] = stream2; + argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1); + stream2 = append_nif(ctx, 2, argv); + } + + argv[0] = stream2; + stream2 = flush_nif(ctx, 1, argv); + fprintf(stderr, "Second module flushed\n"); + + ModuleNativeEntryPoint entry2 = jit_stream_flash_entry_point(ctx, stream2); + Module fake_mod2; + fake_mod2.code = (CodeChunk *) 0x87654321; + globalcontext_set_cache_native_code(glb, &fake_mod2, 1, entry2, 50); + + scheduler_terminate(ctx); + globalcontext_destroy(glb); + + fprintf(stderr, "PASS: ESP32 crash bug test\n"); +} + +// Test for the tail corruption bug: when first module extends into next sector, +// creating the second module should NOT erase the sector containing the first module's tail +static void test_tail_corruption_bug(void) +{ + fprintf(stderr, "\n=== Test: Tail Corruption Bug - Module Tail in Next Sector ===\n"); + + // Initialize flash: sector 0 erased (AVM), rest is garbage + memset(mock_flash, 0x00, MOCK_FLASH_SIZE); + memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); + + create_minimal_avmpack(); + + GlobalContext *glb = globalcontext_new(); + Context *ctx = context_new(glb); + + register_test_avmpack(glb); + jit_stream_flash_init(glb); + + nif_function new_nif = get_nif("jit_stream_flash:new/1"); + nif_function append_nif = get_nif("jit_stream_flash:append/2"); + nif_function flush_nif = get_nif("jit_stream_flash:flush/1"); + + // Create first module that will extend into the next sector + // Module size: 8270 bytes (like benchmark on ESP32) + // Entry header: 16 bytes at 0x0 in sector 0x1000 + // Native code: 8254 bytes, extends from sector 0x1000 into sector 0x2000 + // Module ends at: 0x1000 + 16 + 8254 = 0x304E (in sector 0x2000) + // Next entry would be at: 0x3050 (also in sector 0x2000) + + term argv[3]; + argv[0] = term_from_int(10); + term stream1 = new_nif(ctx, 1, argv); + + // Append 8254 bytes of native code + uint8_t code1[8254]; + memset(code1, 0xAB, sizeof(code1)); + argv[0] = stream1; + argv[1] = make_binary_rooted(ctx, code1, sizeof(code1), &argv[0], 1); + stream1 = append_nif(ctx, 2, argv); + + argv[0] = stream1; + term stream1_flushed = flush_nif(ctx, 1, argv); + ModuleNativeEntryPoint entry1 = jit_stream_flash_entry_point(ctx, stream1_flushed); + + Module fake_mod1; + fake_mod1.code = (CodeChunk *) 0x12345678; + globalcontext_set_cache_native_code(glb, &fake_mod1, 1, entry1, 30); + + // Compute CRC of first module for verification + uintptr_t data_addr1 = jit_stream_flash_platform_executable_to_ptr((uintptr_t) entry1); + struct JITEntry *jit_entry1 = (struct JITEntry *) (data_addr1 - sizeof(struct JITEntry)); + uint32_t crc1_after_finalize = crc32((const uint8_t *) jit_entry1, sizeof(struct JITEntry) + jit_entry1->size); + fprintf(stderr, "First module: entry=%p size=%u CRC=0x%08x\n", + (void *) jit_entry1, (unsigned int) jit_entry1->size, (unsigned int) crc1_after_finalize); + + // Verify first module extends into sector 0x2000 + uintptr_t entry1_addr = (uintptr_t) jit_entry1; + uintptr_t entry1_end = entry1_addr + sizeof(struct JITEntry) + jit_entry1->size; + uintptr_t entry1_sector = entry1_addr & ~(FLASH_SECTOR_SIZE - 1); + uintptr_t entry1_end_sector = entry1_end & ~(FLASH_SECTOR_SIZE - 1); + fprintf(stderr, "First module: starts in sector 0x%lx, ends at 0x%lx (sector 0x%lx)\n", + (unsigned long) entry1_sector, (unsigned long) entry1_end, + (unsigned long) entry1_end_sector); + + if (entry1_sector == entry1_end_sector) { + fprintf(stderr, "FAIL: Test setup error - first module should span sectors\n"); + exit(1); + } + + // Create second module - THIS SHOULD NOT CORRUPT THE FIRST MODULE + argv[0] = term_from_int(10); + term stream2 = new_nif(ctx, 1, argv); + + uint8_t code2[100]; + memset(code2, 0xCD, sizeof(code2)); + argv[0] = stream2; + argv[1] = make_binary_rooted(ctx, code2, sizeof(code2), &argv[0], 1); + stream2 = append_nif(ctx, 2, argv); + + argv[0] = stream2; + term stream2_flushed = flush_nif(ctx, 1, argv); + ModuleNativeEntryPoint entry2 = jit_stream_flash_entry_point(ctx, stream2_flushed); + + Module fake_mod2; + fake_mod2.code = (CodeChunk *) 0x87654321; + globalcontext_set_cache_native_code(glb, &fake_mod2, 1, entry2, 20); + + // Verify first module's CRC is still intact + uint32_t crc1_after_second = crc32((const uint8_t *) jit_entry1, sizeof(struct JITEntry) + jit_entry1->size); + fprintf(stderr, "First module after second: CRC=0x%08x (expected 0x%08x)\n", + (unsigned int) crc1_after_second, (unsigned int) crc1_after_finalize); + + if (crc1_after_second != crc1_after_finalize) { + fprintf(stderr, "FAIL: First module corrupted after creating second module!\n"); + fprintf(stderr, "Expected CRC: 0x%08x, Got: 0x%08x\n", + (unsigned int) crc1_after_finalize, (unsigned int) crc1_after_second); + exit(1); + } + + scheduler_terminate(ctx); + globalcontext_destroy(glb); + + fprintf(stderr, "PASS: Tail corruption bug test\n"); +} + +// Test 9: Stale data cleanup after failed compilation +static void test_stale_data_cleanup(void) +{ + fprintf(stderr, "\n=== Test: Stale Data Cleanup After Failed Compilation ===\n"); + + // Initialize flash: sector 0 erased (AVM), rest is garbage + memset(mock_flash, 0x00, MOCK_FLASH_SIZE); + memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); + + create_minimal_avmpack(); + + GlobalContext *glb = globalcontext_new(); + Context *ctx = context_new(glb); + + register_test_avmpack(glb); + jit_stream_flash_init(glb); + + nif_function new_nif = get_nif("jit_stream_flash:new/1"); + nif_function append_nif = get_nif("jit_stream_flash:append/2"); + nif_function flush_nif = get_nif("jit_stream_flash:flush/1"); + + // Create first module and finalize it (small, stays in first sector after AVM) + term argv[3]; + argv[0] = term_from_int(10); + term stream1 = new_nif(ctx, 1, argv); + + uint8_t code1[500]; + memset(code1, 0xAA, sizeof(code1)); + argv[0] = stream1; + argv[1] = make_binary_rooted(ctx, code1, sizeof(code1), &argv[0], 1); + stream1 = append_nif(ctx, 2, argv); + + argv[0] = stream1; + term stream1_flushed = flush_nif(ctx, 1, argv); + ModuleNativeEntryPoint entry1 = jit_stream_flash_entry_point(ctx, stream1_flushed); + + Module fake_mod1; + fake_mod1.code = (CodeChunk *) 0x12345678; + globalcontext_set_cache_native_code(glb, &fake_mod1, 1, entry1, 30); + + // Compute CRC of first module + uintptr_t data_addr1 = jit_stream_flash_platform_executable_to_ptr((uintptr_t) entry1); + struct JITEntry *jit_entry1 = (struct JITEntry *) (data_addr1 - sizeof(struct JITEntry)); + uint32_t crc1_original = crc32((const uint8_t *) jit_entry1, sizeof(struct JITEntry) + jit_entry1->size); + fprintf(stderr, "First module: CRC=0x%08x, size=%u bytes\n", + (unsigned int) crc1_original, (unsigned int) jit_entry1->size); + + // Start creating a second module but DON'T finalize (simulate crash/OOM) + argv[0] = term_from_int(10); + term stream2_attempt1 = new_nif(ctx, 1, argv); + + uint8_t code2[200]; + memset(code2, 0xBB, sizeof(code2)); + argv[0] = stream2_attempt1; + argv[1] = make_binary_rooted(ctx, code2, sizeof(code2), &argv[0], 1); + stream2_attempt1 = append_nif(ctx, 2, argv); + + // DON'T flush or finalize - this simulates a failed compilation + // Now there's stale data in flash after the first module + + fprintf(stderr, "Simulated failed compilation - stale data left in flash\n"); + + // Try to create the second module again - should detect and clean up stale data + argv[0] = term_from_int(10); + term stream2_attempt2 = new_nif(ctx, 1, argv); + + memset(code2, 0xCC, sizeof(code2)); + argv[0] = stream2_attempt2; + argv[1] = make_binary_rooted(ctx, code2, sizeof(code2), &argv[0], 1); + stream2_attempt2 = append_nif(ctx, 2, argv); + + argv[0] = stream2_attempt2; + term stream2_flushed = flush_nif(ctx, 1, argv); + ModuleNativeEntryPoint entry2 = jit_stream_flash_entry_point(ctx, stream2_flushed); + + Module fake_mod2; + fake_mod2.code = (CodeChunk *) 0x87654321; + globalcontext_set_cache_native_code(glb, &fake_mod2, 1, entry2, 20); + + fprintf(stderr, "Second module successfully created after cleanup\n"); + + // Verify first module's CRC is still intact + uint32_t crc1_after_cleanup = crc32((const uint8_t *) jit_entry1, sizeof(struct JITEntry) + jit_entry1->size); + fprintf(stderr, "First module after cleanup: CRC=0x%08x (expected 0x%08x)\n", + (unsigned int) crc1_after_cleanup, (unsigned int) crc1_original); + + if (crc1_after_cleanup != crc1_original) { + fprintf(stderr, "FAIL: First module corrupted during stale data cleanup!\n"); + exit(1); + } + + scheduler_terminate(ctx); + globalcontext_destroy(glb); + + fprintf(stderr, "PASS: Stale data cleanup test\n"); +} + +int main(int argc, char **argv) +{ + UNUSED(argc); + UNUSED(argv); + + fprintf(stderr, "Starting jit_stream_flash tests...\n"); + + test_basic_append_flush(); + test_multiple_appends(); + test_replace(); + test_second_module_bug(); + test_magic_0xffff_but_garbage_bug(); + test_garbage_flash_bug(); + test_esp32_crash_bug(); + test_tail_corruption_bug(); + test_stale_data_cleanup(); + + fprintf(stderr, "\nAll tests passed!\n"); + return EXIT_SUCCESS; +} diff --git a/tests/test.c b/tests/test.c index 577572c967..accbe9534a 100644 --- a/tests/test.c +++ b/tests/test.c @@ -607,6 +607,8 @@ struct Test tests[] = { TEST_CASE(test_lists_keymember), TEST_CASE(test_lists_keyfind), + TEST_CASE(test_inline_arith), + // TEST CRASHES HERE: TEST_CASE(memlimit), { NULL, 0, false, false } @@ -713,6 +715,11 @@ int test_modules_execution(bool beam, bool skip, int count, char **item) perror("Error: cannot find armv6m directory"); return EXIT_FAILURE; } +#elif JIT_ARCH_TARGET == JIT_ARCH_RISCV32 + if (chdir("riscv32") != 0) { + perror("Error: cannot find riscv32 directory"); + return EXIT_FAILURE; + } #else #error Unknown JIT target #endif