From b60722b4de500a68a913a03eda8b838d953fb189 Mon Sep 17 00:00:00 2001 From: Tom Short Date: Sat, 6 Mar 2021 07:55:06 -0500 Subject: [PATCH 001/159] Basic rewrite --- Project.toml | 11 ++------ README.md | 40 ++-------------------------- src/StaticCompiler.jl | 62 ++++++++++++++++++++++++++++--------------- test/runtests.jl | 17 +++--------- 4 files changed, 47 insertions(+), 83 deletions(-) diff --git a/Project.toml b/Project.toml index 583ae23..e7c1344 100644 --- a/Project.toml +++ b/Project.toml @@ -4,19 +4,12 @@ authors = ["Tom Short"] version = "0.1.0" [deps] -Cassette = "7057c7e9-c182-5462-911a-8362d720325c" -DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" +LLVM_full_jll = "a3ccf953-465e-511d-b87f-60a6490c289d" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" -MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" -TypedCodeUtils = "687fb87b-adea-59d5-9be9-82253b54685d" [compat] -DataStructures = "0.17" -Cassette = "0.3" -LLVM = "1.3" -TypedCodeUtils = "0.1" -MacroTools = "0.5" julia = "1.2" [extras] diff --git a/README.md b/README.md index 8b09b28..5b0ab32 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ This is an experimental package to compile Julia code to standalone libraries. A system image is not needed. It is also meant for cross compilation, so Julia code can be compiled for other targets, including WebAssembly and embedded targets. ## Installation and Usage + ```julia using Pkg Pkg.add(PackageSpec( url = "https://github.com/tshort/StaticCompiler.jl", rev = "master")) @@ -16,45 +17,8 @@ Pkg.add(PackageSpec( url = "https://github.com/tshort/StaticCompiler.jl", rev = ```julia using StaticCompiler ``` -**Documentation**: [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://tshort.github.io/StaticCompiler.jl/dev) ## Approach -This package uses the [LLVM package](https://github.com/maleadt/LLVM.jl) to generate code in the same fashion as [CUDAnative](https://github.com/JuliaGPU/CUDAnative.jl). - -Some of the key details of this approach are: - -* **ccalls and cglobal** -- When Julia compiles code CUDAnative style, `ccall` and `cglobal` references get compiled to a direct pointer. `StaticCompiler` converts these to symbol references for later linking. For `ccall` with a tuple call to a symbol in a library, `Cassette` is used to convert that to just a symbol reference (no dynamic library loading). - -* **Global variables** -- A lot of code gets compiled with global variables, and these get compiled to a direct pointer. `StaticCompiler` includes a basic serialize/deserialize approach. Right now, this is fairly basic, and it takes shortcuts for some objects by swapping in wrong types. This can work because many times, the objects are not really used in the code. Finding the global variable can be a little tricky because the pointer is converted to a Julia object with `unsafe_pointer_to_objref`, and that segfaults for some addresses. How to best handle cases like that is still to be determined. - -* **Initialization** -- If libjulia is used, some init code needs to be run to set up garbage collection and other things. For this, a basic `blank.ji` file is used to feed `jl_init_with_image`. - -Long term, a better approach may be to use Julia's standard compilation techniques with "tree shaking" to generate a reduced system image (see [here](https://github.com/JuliaLang/julia/issues/33670)). - -## Example -The API still needs work, but here is the general approach right now: - -```julia -using StaticCompiler -m = irgen(cos, Tuple{Float64}) -write(m, "cos.bc") -write_object(m, "cos.o") -``` - -`cos.o` should contain a function called `cos`. From there, you need to convert to link as needed with `libjulia`. - -See the `test` directory for more information and types of code that currently run. The most advanced example that works is a call to an ODE solution using modified code from [ODE.jl](https://github.com/JuliaDiffEq/ODE.jl). For information on compiling and linking to an executable, see [test/standalone-exe.jl](./test/standalone-exe.jl). - -## Known limitations - -* It won't work for recursive code. Jameson's [codegen-norecursion](https://github.com/JuliaLang/julia/tree/jn/codegen-norecursion) should fix that when merged. - -* `cfunction` is not supported. - -* Generic code that uses `jl_apply_generic` does not work. One strategy for this is to use Cassette to swap out known code that uses dynamic calls. Another approach is to write something like `jl_apply_generic` to implement dynamic calls. - -* The use of Cassette makes it more difficult for Julia to infer some things, and only type-stable code can be statically compiled with this approach. -* It's only been tested on Linux and Windows. +This package uses the [GPUCompiler package](https://github.com/JuliaGPU/GPUCompiler.jl) to generate code. -Finally, this whole approach is young and likely brittle. Do not expect it to work for your code. diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 5f16b62..6c3f0f1 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -1,28 +1,46 @@ module StaticCompiler -export irgen, write_object, @extern - +import GPUCompiler +import LLVM +import LLVM_full_jll import Libdl -using LLVM -using LLVM.Interop -using TypedCodeUtils -import TypedCodeUtils: reflect, lookthrough, canreflect, - DefaultConsumer, Reflection, Callsite, - identify_invoke, identify_call, identify_foreigncall, - process_invoke, process_call -using MacroTools -using DataStructures: MultiDict - - -include("serialize.jl") -include("utils.jl") -include("ccalls.jl") -include("globals.jl") -include("overdub.jl") -include("irgen.jl") -include("extern.jl") - -include("helpers/helpers.jl") +export generate_shlib_fptr + +module TestRuntime + # dummy methods + signal_exception() = return + malloc(sz) = C_NULL + report_oom(sz) = return + report_exception(ex) = return + report_exception_name(ex) = return + report_exception_frame(idx, func, file, line) = return + + # for validation + sin(x) = Base.sin(x) +end + +struct TestCompilerParams <: GPUCompiler.AbstractCompilerParams end +GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{<:Any,TestCompilerParams}) = TestRuntime + +function generate_shlib_fptr(f, tt, name = GPUCompiler.safe_name(repr(f))) + mktemp() do path, io + target = GPUCompiler.NativeCompilerTarget(;reloc=LLVM.API.LLVMRelocPIC, extern=true) + source = GPUCompiler.FunctionSpec(f, Base.to_tuple_type(tt), false, name) + params = TestCompilerParams() + job = GPUCompiler.CompilerJob(target, source, params) + obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) + write(io, obj) + flush(io) + # FIXME: Be more portable + run(`ld -shared -o $path.$(Libdl.dlext) $path`) + ptr = Libdl.dlopen("$path.$(Libdl.dlext)", Libdl.RTLD_LOCAL) + fptr = Libdl.dlsym(ptr, "julia_$name") + @assert fptr != C_NULL + atexit(()->rm("$path.$(Libdl.dlext)")) + fptr + end +end + end # module diff --git a/test/runtests.jl b/test/runtests.jl index 3381a87..24470e0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,21 +1,10 @@ using StaticCompiler using Test -using LLVM -using Libdl -cd(@__DIR__) -@testset "ccalls" begin - include("ccalls.jl") -end -@testset "globals" begin - include("globals.jl") -end -@testset "others" begin - include("others.jl") +@testset "basics" begin + f1(x) = x+1 + @test ccall(generate_shlib_fptr(f1, (Int,)), Int, (Int,), 1) == 2 end -@testset "standalone" begin - include("standalone-exe.jl") -end From d3b8f5aa50dea700cf58aafaf43f08eebeea2cb2 Mon Sep 17 00:00:00 2001 From: Tom Short Date: Sat, 6 Mar 2021 09:18:56 -0500 Subject: [PATCH 002/159] Include the Manifest --- .gitignore | 3 - Manifest.toml | 211 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 211 insertions(+), 3 deletions(-) create mode 100644 Manifest.toml diff --git a/.gitignore b/.gitignore index 2ef6f28..6e716b0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,11 +2,8 @@ *.jl.cov *.jl.mem .DS_Store -Manifest.toml -!helpers/Manifest.toml /dev/ /test/standalone -/test/Manifest.toml /test/test.* test.o diff --git a/Manifest.toml b/Manifest.toml new file mode 100644 index 0000000..5808f59 --- /dev/null +++ b/Manifest.toml @@ -0,0 +1,211 @@ +# This file is machine-generated - editing it directly is not advised + +[[ArgTools]] +uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" + +[[Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" + +[[Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[CEnum]] +git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9" +uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" +version = "0.4.1" + +[[Compat]] +deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] +git-tree-sha1 = "919c7f3151e79ff196add81d7f4e45d91bbf420b" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "3.25.0" + +[[DataStructures]] +deps = ["Compat", "InteractiveUtils", "OrderedCollections"] +git-tree-sha1 = "4437b64df1e0adccc3e5d1adbc3ac741095e4677" +uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +version = "0.18.9" + +[[Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[DelimitedFiles]] +deps = ["Mmap"] +uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" + +[[Distributed]] +deps = ["Random", "Serialization", "Sockets"] +uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" + +[[Downloads]] +deps = ["ArgTools", "LibCURL", "NetworkOptions"] +uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" + +[[ExprTools]] +git-tree-sha1 = "10407a39b87f29d47ebaca8edbc75d7c302ff93e" +uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" +version = "0.1.3" + +[[GPUCompiler]] +deps = ["DataStructures", "ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "Scratch", "Serialization", "TimerOutputs", "UUIDs"] +git-tree-sha1 = "7db5fa8c3f7da5ef25ba5740802b19a87c962a02" +repo-rev = "jps/static-compile" +repo-url = "https://github.com/JuliaGPU/GPUCompiler.jl.git" +uuid = "61eb1bfa-7361-4325-ad38-22787b887f55" +version = "0.10.1" + +[[InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[JLLWrappers]] +git-tree-sha1 = "a431f5f2ca3f4feef3bd7a5e94b8b8d4f2f647a0" +uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" +version = "1.2.0" + +[[LLVM]] +deps = ["CEnum", "Libdl", "Printf", "Unicode"] +git-tree-sha1 = "b616937c31337576360cb9fb872ec7633af7b194" +uuid = "929cbde3-209d-540e-8aea-75f648917ca0" +version = "3.6.0" + +[[LLVM_full_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "bbd0d3ef31d8e819035537227b0e6cac8c900913" +uuid = "a3ccf953-465e-511d-b87f-60a6490c289d" +version = "11.0.1+3" + +[[LibCURL]] +deps = ["LibCURL_jll", "MozillaCACerts_jll"] +uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" + +[[LibCURL_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" + +[[LibGit2]] +deps = ["Base64", "NetworkOptions", "Printf", "SHA"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + +[[LibSSH2_jll]] +deps = ["Artifacts", "Libdl", "MbedTLS_jll"] +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" + +[[Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[LinearAlgebra]] +deps = ["Libdl", "libblastrampoline_jll"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + +[[Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[MbedTLS_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" + +[[Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" + +[[MozillaCACerts_jll]] +uuid = "14a3606d-f60d-562e-9121-12d972cd8159" + +[[NetworkOptions]] +uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" + +[[OrderedCollections]] +git-tree-sha1 = "4fa2ba51070ec13fcc7517db714445b4ab986bdf" +uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +version = "1.4.0" + +[[Pkg]] +deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" + +[[Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[Random]] +deps = ["Serialization"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" + +[[Scratch]] +deps = ["Dates"] +git-tree-sha1 = "ad4b278adb62d185bbcb6864dc24959ab0627bf6" +uuid = "6c6a2e73-6563-6170-7368-637461726353" +version = "1.0.3" + +[[Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[SharedArrays]] +deps = ["Distributed", "Mmap", "Random", "Serialization"] +uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" + +[[Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" + +[[SparseArrays]] +deps = ["LinearAlgebra", "Random"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + +[[Statistics]] +deps = ["LinearAlgebra", "SparseArrays"] +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[[TOML]] +deps = ["Dates"] +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" + +[[Tar]] +deps = ["ArgTools", "SHA"] +uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" + +[[Test]] +deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[[TimerOutputs]] +deps = ["Printf"] +git-tree-sha1 = "32cdbe6cd2d214c25a0b88f985c9e0092877c236" +uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" +version = "0.5.8" + +[[UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + +[[Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[Zlib_jll]] +deps = ["Libdl"] +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" + +[[libblastrampoline_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "411ee75a5364426ef54afa88482ca84f35937923" +uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" +version = "3.0.2+0" + +[[nghttp2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" + +[[p7zip_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" From 04115fab38c52bbcc2546bb7e140dad631382fa7 Mon Sep 17 00:00:00 2001 From: Tom Short Date: Sat, 6 Mar 2021 09:30:48 -0500 Subject: [PATCH 003/159] Use LLVM's linker --- src/StaticCompiler.jl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 6c3f0f1..b210544 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -23,6 +23,8 @@ end struct TestCompilerParams <: GPUCompiler.AbstractCompilerParams end GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{<:Any,TestCompilerParams}) = TestRuntime +const linker = Sys.isunix() ? "ld.lld" : Sys.isapple() ? "ld64.lld" : "lld-link" + function generate_shlib_fptr(f, tt, name = GPUCompiler.safe_name(repr(f))) mktemp() do path, io target = GPUCompiler.NativeCompilerTarget(;reloc=LLVM.API.LLVMRelocPIC, extern=true) @@ -33,7 +35,8 @@ function generate_shlib_fptr(f, tt, name = GPUCompiler.safe_name(repr(f))) write(io, obj) flush(io) # FIXME: Be more portable - run(`ld -shared -o $path.$(Libdl.dlext) $path`) + # run(`ld -shared -o $path.$(Libdl.dlext) $path`) + run(`$(StaticCompiler.LLVM_full_jll.PATH)/$linker -shared -o $path.$(Libdl.dlext) $path`) ptr = Libdl.dlopen("$path.$(Libdl.dlext)", Libdl.RTLD_LOCAL) fptr = Libdl.dlsym(ptr, "julia_$name") @assert fptr != C_NULL From 73413c8107954c0a5000c34e07db82d2d5f73e27 Mon Sep 17 00:00:00 2001 From: Tom Short Date: Sat, 6 Mar 2021 09:33:51 -0500 Subject: [PATCH 004/159] Fixup CI and compatibility --- .github/workflows/ci.yml | 3 +-- .travis.yml | 1 - Project.toml | 4 ++-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cb97e17..b9510c3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,11 +10,10 @@ jobs: fail-fast: false matrix: version: - - '1.3' - 'nightly' os: - ubuntu-latest - # - macOS-latest + - macOS-latest - windows-latest arch: - x64 diff --git a/.travis.yml b/.travis.yml index d735ad5..9011d24 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,6 @@ os: - linux - windows julia: - - 1.3 - nightly matrix: # extra linux test diff --git a/Project.toml b/Project.toml index e7c1344..098e7e2 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short"] -version = "0.1.0" +version = "0.2.0" [deps] GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55" @@ -10,7 +10,7 @@ LLVM_full_jll = "a3ccf953-465e-511d-b87f-60a6490c289d" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" [compat] -julia = "1.2" +julia = "1.7" [extras] Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0" From 5db52a25c512cc7b8fa4c4268e67a9ab2c1f1c17 Mon Sep 17 00:00:00 2001 From: Tom Short Date: Sat, 6 Mar 2021 09:39:57 -0500 Subject: [PATCH 005/159] Cleanup --- src/StaticCompiler.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index b210544..74b8076 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -34,8 +34,6 @@ function generate_shlib_fptr(f, tt, name = GPUCompiler.safe_name(repr(f))) obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) write(io, obj) flush(io) - # FIXME: Be more portable - # run(`ld -shared -o $path.$(Libdl.dlext) $path`) run(`$(StaticCompiler.LLVM_full_jll.PATH)/$linker -shared -o $path.$(Libdl.dlext) $path`) ptr = Libdl.dlopen("$path.$(Libdl.dlext)", Libdl.RTLD_LOCAL) fptr = Libdl.dlsym(ptr, "julia_$name") From aa9eef57601fcb941e8b525bb7d35a2494a74198 Mon Sep 17 00:00:00 2001 From: Tom Short Date: Sat, 6 Mar 2021 09:49:13 -0500 Subject: [PATCH 006/159] Add `compile` to return an LLVM module --- src/StaticCompiler.jl | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 74b8076..b1fe6d7 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -5,7 +5,7 @@ import LLVM import LLVM_full_jll import Libdl -export generate_shlib_fptr +export generate_shlib_fptr, compile module TestRuntime # dummy methods @@ -44,4 +44,15 @@ function generate_shlib_fptr(f, tt, name = GPUCompiler.safe_name(repr(f))) end +# Return an LLVM module +function compile(f, tt, name = GPUCompiler.safe_name(repr(f))) + target = GPUCompiler.NativeCompilerTarget(;reloc=LLVM.API.LLVMRelocPIC, extern=true) + source = GPUCompiler.FunctionSpec(f, Base.to_tuple_type(tt), false, name) + params = TestCompilerParams() + job = GPUCompiler.CompilerJob(target, source, params) + m, _ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) + return m +end + + end # module From ecc1b8cb547c6e66ff24a3b4b8479163f786b705 Mon Sep 17 00:00:00 2001 From: Tom Short Date: Sun, 7 Mar 2021 07:08:46 -0500 Subject: [PATCH 007/159] Split up methods (Mason's suggestion) --- README.md | 16 ++++++++++++++++ src/StaticCompiler.jl | 28 ++++++++++++++++++++-------- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 5b0ab32..48a0e2b 100644 --- a/README.md +++ b/README.md @@ -14,8 +14,24 @@ This is an experimental package to compile Julia code to standalone libraries. A using Pkg Pkg.add(PackageSpec( url = "https://github.com/tshort/StaticCompiler.jl", rev = "master")) ``` + ```julia using StaticCompiler +f(x) = 2x + +# compile `f` and return an LLVM module +m = compile(f, (Int,)) + +# compile `f` and write to a shared library ("f.so" or "f.dll") +generate_shlib(f, (Int,), "libf") +# find a function pointer for this shared library +fptr = generate_shlib_fptr("libf", "f") +ccall(fptr, Int, (Int,), 2) + +# do this in one step (this time with a temporary shared library) +fptr = generate_shlib_fptr(f, (Int,)) +ccall(fptr, Int, (Int,), 2) + ``` ## Approach diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index b1fe6d7..ad044a2 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -5,7 +5,7 @@ import LLVM import LLVM_full_jll import Libdl -export generate_shlib_fptr, compile +export generate_shlib, generate_shlib_fptr, compile module TestRuntime # dummy methods @@ -25,8 +25,8 @@ GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{<:Any,TestCompilerParams}) const linker = Sys.isunix() ? "ld.lld" : Sys.isapple() ? "ld64.lld" : "lld-link" -function generate_shlib_fptr(f, tt, name = GPUCompiler.safe_name(repr(f))) - mktemp() do path, io +function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f))) + open(path, "w") do io target = GPUCompiler.NativeCompilerTarget(;reloc=LLVM.API.LLVMRelocPIC, extern=true) source = GPUCompiler.FunctionSpec(f, Base.to_tuple_type(tt), false, name) params = TestCompilerParams() @@ -35,14 +35,26 @@ function generate_shlib_fptr(f, tt, name = GPUCompiler.safe_name(repr(f))) write(io, obj) flush(io) run(`$(StaticCompiler.LLVM_full_jll.PATH)/$linker -shared -o $path.$(Libdl.dlext) $path`) - ptr = Libdl.dlopen("$path.$(Libdl.dlext)", Libdl.RTLD_LOCAL) - fptr = Libdl.dlsym(ptr, "julia_$name") - @assert fptr != C_NULL + rm(path) + end + path, name +end +function generate_shlib_fptr(f, tt, path::String=tempname(), name = GPUCompiler.safe_name(repr(f)); temp::Bool=true) + generate_shlib(f, tt, path, name) + ptr = Libdl.dlopen("$(abspath(path)).$(Libdl.dlext)", Libdl.RTLD_LOCAL) + fptr = Libdl.dlsym(ptr, "julia_$name") + @assert fptr != C_NULL + if temp atexit(()->rm("$path.$(Libdl.dlext)")) - fptr end + fptr +end +function generate_shlib_fptr(path::String, name) + ptr = Libdl.dlopen("$(abspath(path)).$(Libdl.dlext)", Libdl.RTLD_LOCAL) + fptr = Libdl.dlsym(ptr, "julia_$name") + @assert fptr != C_NULL + fptr end - # Return an LLVM module function compile(f, tt, name = GPUCompiler.safe_name(repr(f))) From b1ec2e5cbadad8c123bb123c4704113e2233c068 Mon Sep 17 00:00:00 2001 From: Jorge Alberto Vieyra Salas Date: Sun, 7 Mar 2021 14:12:15 +0100 Subject: [PATCH 008/159] Added simple tests to start testing. Most of them segfault. --- test/runtests.jl | 115 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 112 insertions(+), 3 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 24470e0..96e0d27 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,10 +1,119 @@ using StaticCompiler using Test +using Libdl +using LinearAlgebra +@testset "Basics" begin + simple_sum(x) = x + one(typeof(x)) + + # This probably needs a macro + @test ccall(generate_shlib_fptr(simple_sum, (Int,)), Int, (Int,), 1) == Int(2) + @test ccall(generate_shlib_fptr(simple_sum, (Float64,)), Float64, (Float64 ,), 1) == Float64(2) + + @test ccall(generate_shlib_fptr(simple_sum, (Int32,)), Int32, (Int32,), 1) == Int32(2) + @test ccall(generate_shlib_fptr(simple_sum, (Float32,)), Float32, (Float32 ,), 1) == Float16(2) + + @test ccall(generate_shlib_fptr(simple_sum, (Int16,)), Int16, (Int16,), 1) == Int16(2) + @test ccall(generate_shlib_fptr(simple_sum, (Float16,)), Float16, (Float16 ,), 1) == Float16(2) -@testset "basics" begin - f1(x) = x+1 - @test ccall(generate_shlib_fptr(f1, (Int,)), Int, (Int,), 1) == 2 end +@testset "Recursion" begin + fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) + # This works on the REPL but fails here + @test_skip ccall(generate_shlib_fptr(fib, (Int,)), Int, (Int,), 10) == 55 +end + +# Call binaries for testing +# Need Mason's code +# @testset "Generate binary" begin +# fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) +# libname = tempname() * "." * Libdl.dlext +# generate_shlib(fib, (Int,), libname) +# ptr = Libdl.dlopen(libname, Libdl.RTLD_LOCAL) +# fptr = Libdl.dlsym(ptr, "julia_fib") +# @assert fptr != C_NULL +# @test_skip ccall(fptr, Int, (Int,), 10) == 55 +# end + +@testset "Loops" begin + function sum_first_N_int(N) + s = 0 + for a in 1:N + s += a + end + s + end + @test ccall(generate_shlib_fptr(sum_first_N_int, (Int,)), Int, (Int,), 10) == 55 + + function sum_first_N_float64(N) + s = Float64(0) + for a in 1:N + s += Float64(a) + end + s + end + @test ccall(generate_shlib_fptr(sum_first_N_float64, (Int,)), Float64, (Int,), 10) == 55. + + function sum_first_N_int_inbounds(N) + s = 0 + @inbounds for a in 1:N + s += a + end + s + end + @test ccall(generate_shlib_fptr(sum_first_N_int_inbounds, (Int,)), Int, (Int,), 10) == 55 + + + function sum_first_N_float64_inbounds(N) + s = Float64(0) + @inbounds for a in 1:N + s += Float64(a) + end + s + end + @test ccall(generate_shlib_fptr(sum_first_N_float64_inbounds, (Int,)), Float64, (Int,), 10) == 55. + +end + +# Arrays with different input types Int32, Int64, Float32, Float64, Complex? +@testset "Arrays" begin + + arr = collect(1:10) + function array_sum(n, A) + s = zero(eltype(A)) + for i in 1:n + s += A[i] + end + s + end + + #This segfaults, not sure if this is how you pass around arrays + @test_skip ccall(generate_shlib_fptr(array_sum, (Csize_t, Ptr{Float64})), Int, (Csize_t, Ptr{Float64}), length(arr), arr) == 55 + +end + +# Just to call external libraries +@testset "BLAS" begin + function mydot(N) + a = Float64.(1:N) + BLAS.dot(N, a, 1, a, 1) + end + @test_skip ccall(generate_shlib_fptr(mydot, (Int,)), Float64, (Int,), 2) == 5. +end + +@testset "Hello World" begin + function hello(N) + println("Hello World $N") + N + end + # How do I test this? + # Also ... this segfaults + @test_skip ccall(generate_shlib_fptr(hello, (Int,)), Int, (Int,), 1) == 1 +end + + +# data structures, dictionaries, tuples, named tuples +# passing pointers? +# @inbounds LoopVectorization From dc83554f07e6ce60a8253a3ebf3d169e091aa1ba Mon Sep 17 00:00:00 2001 From: Jorge Alberto Vieyra Salas Date: Sun, 7 Mar 2021 14:24:52 +0100 Subject: [PATCH 009/159] Added simple tests to start testing. Most of them segfault. --- test/runtests.jl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 96e0d27..731caa0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -26,16 +26,16 @@ end end # Call binaries for testing -# Need Mason's code -# @testset "Generate binary" begin -# fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) -# libname = tempname() * "." * Libdl.dlext -# generate_shlib(fib, (Int,), libname) -# ptr = Libdl.dlopen(libname, Libdl.RTLD_LOCAL) -# fptr = Libdl.dlsym(ptr, "julia_fib") -# @assert fptr != C_NULL -# @test_skip ccall(fptr, Int, (Int,), 10) == 55 -# end +@testset "Generate binary" begin + fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) + libname = tempname() + generate_shlib(fib, (Int,), libname) + ptr = Libdl.dlopen(libname * "." * Libdl.dlext, Libdl.RTLD_LOCAL) + fptr = Libdl.dlsym(ptr, "julia_fib") + @assert fptr != C_NULL + # This works on REPL + @test_skip ccall(fptr, Int, (Int,), 10) == 55 +end @testset "Loops" begin function sum_first_N_int(N) From 00772b5c40a1a4191f9b2bcb0b1c2096aed56630 Mon Sep 17 00:00:00 2001 From: Jorge Alberto Vieyra Salas Date: Sun, 7 Mar 2021 14:48:00 +0100 Subject: [PATCH 010/159] Commented the Generate binary test, because it was completly broken --- test/runtests.jl | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 731caa0..b721ea6 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -26,16 +26,17 @@ end end # Call binaries for testing -@testset "Generate binary" begin - fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) - libname = tempname() - generate_shlib(fib, (Int,), libname) - ptr = Libdl.dlopen(libname * "." * Libdl.dlext, Libdl.RTLD_LOCAL) - fptr = Libdl.dlsym(ptr, "julia_fib") - @assert fptr != C_NULL - # This works on REPL - @test_skip ccall(fptr, Int, (Int,), 10) == 55 -end +# @testset "Generate binary" begin +# fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) +# libname = tempname() +# generate_shlib(fib, (Int,), libname) +# ptr = Libdl.dlopen(libname * "." * Libdl.dlext, Libdl.RTLD_LOCAL) +# fptr = Libdl.dlsym(ptr, "julia_fib") +# @assert fptr != C_NULL +# # This works on REPL +# @test_skip ccall(fptr, Int, (Int,), 10) == 55 +# end + @testset "Loops" begin function sum_first_N_int(N) From 23831ae8c1f741ae2f6626c5aec922c6982cc8c5 Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Tue, 1 Feb 2022 18:04:38 -0700 Subject: [PATCH 011/159] update, remove junk, fix tests, use `gcc` for linking --- .appveyor.yml | 31 ---- .travis.yml | 23 --- Manifest.toml | 179 +++++++------------- Project.toml | 3 +- README.md | 12 +- docs/src/backend.md | 2 +- docs/src/helpers.md | 23 --- docs/src/index.md | 64 -------- src/StaticCompiler.jl | 15 +- src/ccalls.jl | 90 ---------- src/extern.jl | 16 -- src/globals.jl | 164 ------------------- src/helpers/README.md | 12 -- src/helpers/helpers.jl | 37 ----- src/helpers/jlrun.jl | 61 ------- src/helpers/juliaconfig.jl | 63 ------- src/helpers/standalone-exe.jl | 148 ----------------- src/irgen.jl | 300 ---------------------------------- src/overdub.jl | 33 ---- src/serialize.jl | 242 --------------------------- src/utils.jl | 56 ------- test/ccalls.jl | 35 ---- test/globals.jl | 36 ---- test/ode.jl | 182 --------------------- test/others.jl | 73 --------- test/runtests.jl | 28 +++- test/standalone-exe.jl | 41 ----- 27 files changed, 102 insertions(+), 1867 deletions(-) delete mode 100644 .appveyor.yml delete mode 100644 .travis.yml delete mode 100644 src/ccalls.jl delete mode 100644 src/extern.jl delete mode 100644 src/globals.jl delete mode 100644 src/helpers/README.md delete mode 100644 src/helpers/helpers.jl delete mode 100644 src/helpers/jlrun.jl delete mode 100644 src/helpers/juliaconfig.jl delete mode 100644 src/helpers/standalone-exe.jl delete mode 100644 src/irgen.jl delete mode 100644 src/overdub.jl delete mode 100644 src/serialize.jl delete mode 100644 src/utils.jl delete mode 100644 test/ccalls.jl delete mode 100644 test/globals.jl delete mode 100644 test/ode.jl delete mode 100644 test/others.jl delete mode 100644 test/standalone-exe.jl diff --git a/.appveyor.yml b/.appveyor.yml deleted file mode 100644 index 16bb9b0..0000000 --- a/.appveyor.yml +++ /dev/null @@ -1,31 +0,0 @@ -# Documentation: https://github.com/JuliaCI/Appveyor.jl -environment: - matrix: - - julia_version: 1.3 - - julia_version: nightly -platform: - - x86 - - x64 -matrix: - allow_failures: - - julia_version: nightly -branches: - only: - - master - - /release-.*/ -notifications: - - provider: Email - on_build_success: false - on_build_failure: false - on_build_status_changed: false -install: - - ps: iex ((new-object net.webclient).DownloadString("https://raw.githubusercontent.com/JuliaCI/Appveyor.jl/version-1/bin/install.ps1")) -build_script: - - echo "%JL_BUILD_SCRIPT%" - - C:\julia\bin\julia -e "%JL_BUILD_SCRIPT%" -test_script: - - echo "%JL_TEST_SCRIPT%" - - C:\julia\bin\julia -e "%JL_TEST_SCRIPT%" -on_success: - - echo "%JL_CODECOV_SCRIPT%" - - C:\julia\bin\julia -e "%JL_CODECOV_SCRIPT%" diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 9011d24..0000000 --- a/.travis.yml +++ /dev/null @@ -1,23 +0,0 @@ -# Documentation: http://docs.travis-ci.com/user/languages/julia/ -language: julia -os: -- linux -- windows -julia: - - nightly -matrix: - # extra linux test - include: - - os: linux - dist: bionic - - allow_failures: - - julia: nightly - fast_finish: true - -notifications: - email: false - -after_success: - - julia -e 'using Pkg; Pkg.add("Coverage"); using Coverage; Codecov.submit(process_folder())' - - julia -e 'using Pkg; Pkg.add("Coverage"); using Coverage; Coveralls.submit(process_folder())' diff --git a/Manifest.toml b/Manifest.toml index 5808f59..44dbe94 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -1,211 +1,158 @@ # This file is machine-generated - editing it directly is not advised -[[ArgTools]] +manifest_format = "2.0" + +[[deps.ArgTools]] uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" -[[Artifacts]] +[[deps.Artifacts]] uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" -[[Base64]] +[[deps.Base64]] uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" -[[CEnum]] +[[deps.CEnum]] git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9" uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" version = "0.4.1" -[[Compat]] -deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] -git-tree-sha1 = "919c7f3151e79ff196add81d7f4e45d91bbf420b" -uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" -version = "3.25.0" - -[[DataStructures]] -deps = ["Compat", "InteractiveUtils", "OrderedCollections"] -git-tree-sha1 = "4437b64df1e0adccc3e5d1adbc3ac741095e4677" -uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" -version = "0.18.9" - -[[Dates]] +[[deps.Dates]] deps = ["Printf"] uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" -[[DelimitedFiles]] -deps = ["Mmap"] -uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" - -[[Distributed]] -deps = ["Random", "Serialization", "Sockets"] -uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" - -[[Downloads]] +[[deps.Downloads]] deps = ["ArgTools", "LibCURL", "NetworkOptions"] uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" -[[ExprTools]] -git-tree-sha1 = "10407a39b87f29d47ebaca8edbc75d7c302ff93e" +[[deps.ExprTools]] +git-tree-sha1 = "56559bbef6ca5ea0c0818fa5c90320398a6fbf8d" uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" -version = "0.1.3" +version = "0.1.8" -[[GPUCompiler]] -deps = ["DataStructures", "ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "Scratch", "Serialization", "TimerOutputs", "UUIDs"] -git-tree-sha1 = "7db5fa8c3f7da5ef25ba5740802b19a87c962a02" -repo-rev = "jps/static-compile" -repo-url = "https://github.com/JuliaGPU/GPUCompiler.jl.git" +[[deps.GPUCompiler]] +deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "TimerOutputs", "UUIDs"] +git-tree-sha1 = "abd824e1f2ecd18d33811629c781441e94a24e81" uuid = "61eb1bfa-7361-4325-ad38-22787b887f55" -version = "0.10.1" +version = "0.13.11" -[[InteractiveUtils]] +[[deps.InteractiveUtils]] deps = ["Markdown"] uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" -[[JLLWrappers]] -git-tree-sha1 = "a431f5f2ca3f4feef3bd7a5e94b8b8d4f2f647a0" +[[deps.JLLWrappers]] +deps = ["Preferences"] +git-tree-sha1 = "abc9885a7ca2052a736a600f7fa66209f96506e1" uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" -version = "1.2.0" +version = "1.4.1" -[[LLVM]] -deps = ["CEnum", "Libdl", "Printf", "Unicode"] -git-tree-sha1 = "b616937c31337576360cb9fb872ec7633af7b194" +[[deps.LLVM]] +deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"] +git-tree-sha1 = "f8dcd7adfda0dddaf944e62476d823164cccc217" uuid = "929cbde3-209d-540e-8aea-75f648917ca0" -version = "3.6.0" +version = "4.7.1" -[[LLVM_full_jll]] +[[deps.LLVMExtra_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "bbd0d3ef31d8e819035537227b0e6cac8c900913" -uuid = "a3ccf953-465e-511d-b87f-60a6490c289d" -version = "11.0.1+3" +git-tree-sha1 = "62115afed394c016c2d3096c5b85c407b48be96b" +uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab" +version = "0.0.13+1" -[[LibCURL]] +[[deps.LibCURL]] deps = ["LibCURL_jll", "MozillaCACerts_jll"] uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" -[[LibCURL_jll]] +[[deps.LibCURL_jll]] deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" -[[LibGit2]] +[[deps.LibGit2]] deps = ["Base64", "NetworkOptions", "Printf", "SHA"] uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" -[[LibSSH2_jll]] +[[deps.LibSSH2_jll]] deps = ["Artifacts", "Libdl", "MbedTLS_jll"] uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" -[[Libdl]] +[[deps.Libdl]] uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" -[[LinearAlgebra]] -deps = ["Libdl", "libblastrampoline_jll"] -uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" - -[[Logging]] +[[deps.Logging]] uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" -[[Markdown]] +[[deps.Markdown]] deps = ["Base64"] uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" -[[MbedTLS_jll]] +[[deps.MbedTLS_jll]] deps = ["Artifacts", "Libdl"] uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" -[[Mmap]] -uuid = "a63ad114-7e13-5084-954f-fe012c677804" - -[[MozillaCACerts_jll]] +[[deps.MozillaCACerts_jll]] uuid = "14a3606d-f60d-562e-9121-12d972cd8159" -[[NetworkOptions]] +[[deps.NetworkOptions]] uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" -[[OrderedCollections]] -git-tree-sha1 = "4fa2ba51070ec13fcc7517db714445b4ab986bdf" -uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" -version = "1.4.0" - -[[Pkg]] +[[deps.Pkg]] deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -[[Printf]] +[[deps.Preferences]] +deps = ["TOML"] +git-tree-sha1 = "2cf929d64681236a2e074ffafb8d568733d2e6af" +uuid = "21216c6a-2e73-6563-6e65-726566657250" +version = "1.2.3" + +[[deps.Printf]] deps = ["Unicode"] uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" -[[REPL]] +[[deps.REPL]] deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" -[[Random]] -deps = ["Serialization"] +[[deps.Random]] +deps = ["SHA", "Serialization"] uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -[[SHA]] +[[deps.SHA]] uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" -[[Scratch]] -deps = ["Dates"] -git-tree-sha1 = "ad4b278adb62d185bbcb6864dc24959ab0627bf6" -uuid = "6c6a2e73-6563-6170-7368-637461726353" -version = "1.0.3" - -[[Serialization]] +[[deps.Serialization]] uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" -[[SharedArrays]] -deps = ["Distributed", "Mmap", "Random", "Serialization"] -uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" - -[[Sockets]] +[[deps.Sockets]] uuid = "6462fe0b-24de-5631-8697-dd941f90decc" -[[SparseArrays]] -deps = ["LinearAlgebra", "Random"] -uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" - -[[Statistics]] -deps = ["LinearAlgebra", "SparseArrays"] -uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" - -[[TOML]] +[[deps.TOML]] deps = ["Dates"] uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" -[[Tar]] +[[deps.Tar]] deps = ["ArgTools", "SHA"] uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" -[[Test]] -deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] -uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[[TimerOutputs]] -deps = ["Printf"] -git-tree-sha1 = "32cdbe6cd2d214c25a0b88f985c9e0092877c236" +[[deps.TimerOutputs]] +deps = ["ExprTools", "Printf"] +git-tree-sha1 = "97e999be94a7147d0609d0b9fc9feca4bf24d76b" uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" -version = "0.5.8" +version = "0.5.15" -[[UUIDs]] +[[deps.UUIDs]] deps = ["Random", "SHA"] uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" -[[Unicode]] +[[deps.Unicode]] uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" -[[Zlib_jll]] +[[deps.Zlib_jll]] deps = ["Libdl"] uuid = "83775a58-1f1d-513f-b197-d71354ab007a" -[[libblastrampoline_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "411ee75a5364426ef54afa88482ca84f35937923" -uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" -version = "3.0.2+0" - -[[nghttp2_jll]] +[[deps.nghttp2_jll]] deps = ["Artifacts", "Libdl"] uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" -[[p7zip_jll]] +[[deps.p7zip_jll]] deps = ["Artifacts", "Libdl"] uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" diff --git a/Project.toml b/Project.toml index 098e7e2..81a4e92 100644 --- a/Project.toml +++ b/Project.toml @@ -6,11 +6,12 @@ version = "0.2.0" [deps] GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" -LLVM_full_jll = "a3ccf953-465e-511d-b87f-60a6490c289d" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" [compat] julia = "1.7" +GPUCompiler = "0.13" +LLVM = "4" [extras] Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0" diff --git a/README.md b/README.md index 48a0e2b..a5395f2 100644 --- a/README.md +++ b/README.md @@ -26,15 +26,21 @@ m = compile(f, (Int,)) generate_shlib(f, (Int,), "libf") # find a function pointer for this shared library fptr = generate_shlib_fptr("libf", "f") -ccall(fptr, Int, (Int,), 2) +@ccall $fptr(2::Int)::Int # do this in one step (this time with a temporary shared library) fptr = generate_shlib_fptr(f, (Int,)) -ccall(fptr, Int, (Int,), 2) - +@ccall $fptr(2::Int)::Int ``` ## Approach This package uses the [GPUCompiler package](https://github.com/JuliaGPU/GPUCompiler.jl) to generate code. +## Limitations + +* This package currently requires that you have `gcc` installed and in your system's `PATH`. This is probably pretty easy to fix, we only use `gcc` for linking. In theory Clang_jll or LLVM_full_jll should be able to do this, and be managed through Julia's package manager. +* No heap allocations (e.g. creating an array or a string) are allowed inside a statically compiled function body. If you try to run such a function, you will get a segfault. +** It's sometimes possible you won't get a segfault if you define and run the function in the same session, but trying to call the compiled function in a new julia session will definitely segfault. +* Lots of other limitations too. E.g. there's an example in tests/runtests.jl where summing a vector of `Complex{Float32}` is fine, but segfaults on `Complex{Float64}`. + diff --git a/docs/src/backend.md b/docs/src/backend.md index 523901c..1e08a76 100644 --- a/docs/src/backend.md +++ b/docs/src/backend.md @@ -7,4 +7,4 @@ Pages = ["backend.md"] ```@autodocs Modules = [StaticCompiler] Pages = readdir("../src") -``` +``` \ No newline at end of file diff --git a/docs/src/helpers.md b/docs/src/helpers.md index c07d4df..e69de29 100644 --- a/docs/src/helpers.md +++ b/docs/src/helpers.md @@ -1,23 +0,0 @@ -# Helpers -Note that the helpers defined here are used in tests, and they are useful to test out code in the REPL. - -```julia -twox(x) = 2x -# run code in the REPL -@jlrun twox(3) -# compile to an executable in a `standalone` directory -exegen([ (twox, Tuple{Int}, 4) ]) -``` - -These are not meant to be a permanent part of the API. They are just for testing. - - -```@index -Modules = [StaticCompiler] -Pages = ["helpers.md"] -``` - -```@autodocs -Modules = [StaticCompiler] -Pages = readdir("../src/helpers") -``` diff --git a/docs/src/index.md b/docs/src/index.md index 760d5e6..e69de29 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1,64 +0,0 @@ -```@meta -CurrentModule = StaticCompiler -``` - -# StaticCompiler - -[![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://tshort.github.io/StaticCompiler.jl/dev) -[![Build Status](https://travis-ci.com/tshort/StaticCompiler.jl.svg?branch=master)](https://travis-ci.com/tshort/StaticCompiler.jl) -[![Build Status](https://ci.appveyor.com/api/projects/status/github/tshort/StaticCompiler.jl?svg=true)](https://ci.appveyor.com/project/tshort/StaticCompiler-jl) -[![Codecov](https://codecov.io/gh/tshort/StaticCompiler.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/tshort/StaticCompiler.jl) -[![Coveralls](https://coveralls.io/repos/github/tshort/StaticCompiler.jl/badge.svg?branch=master)](https://coveralls.io/github/tshort/StaticCompiler.jl?branch=master) - -This is an experimental package to compile Julia code to standalone libraries. A system image is not needed. It is also meant for cross compilation, so Julia code can be compiled for other targets, including WebAssembly and embedded targets. - -## Installation and Usage -```julia -using Pkg -Pkg.add(PackageSpec( url = "https://github.com/tshort/StaticCompiler.jl", rev = "master")) -``` -```julia -using StaticCompiler -``` -**Documentation**: [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://tshort.github.io/StaticCompiler.jl/dev) - -## Approach -This package uses the [LLVM package](https://github.com/maleadt/LLVM.jl) to generate code in the same fashion as [CUDAnative](https://github.com/JuliaGPU/CUDAnative.jl). - -Some of the key details of this approach are: - -* **ccalls and cglobal** -- When Julia compiles code CUDAnative style, `ccall` and `cglobal` references get compiled to a direct pointer. `StaticCompiler` converts these to symbol references for later linking. For `ccall` with a tuple call to a symbol in a library, `Cassette` is used to convert that to just a symbol reference (no dynamic library loading). - -* **Global variables** -- A lot of code gets compiled with global variables, and these get compiled to a direct pointer. `StaticCompiler` includes a basic serialize/deserialize approach. Right now, this is fairly basic, and it takes shortcuts for some objects by swapping in wrong types. This can work because many times, the objects are not really used in the code. Finding the global variable can be a little tricky because the pointer is converted to a Julia object with `unsafe_pointer_to_objref`, and that segfaults for some addresses. How to best handle cases like that is still to be determined. - -* **Initialization** -- If libjulia is used, some init code needs to be run to set up garbage collection and other things. For this, a basic `blank.ji` file is used to feed `jl_init_with_image`. - -Long term, a better approach may be to use Julia's standard compilation techniques with "tree shaking" to generate a reduced system image (see [here](https://github.com/JuliaLang/julia/issues/33670)). - -## Example -The API still needs work, but here is the general approach right now: - -```julia -using StaticCompiler -m = irgen(cos, Tuple{Float64}) -write(m, "cos.bc") -write_object(m, "cos.o") -``` - -`cos.o` should contain a function called `cos`. From there, you need to convert to link as needed with `libjulia`. - -See the `test` directory for more information and types of code that currently run. The most advanced example that works is a call to an ODE solution using modified code from [ODE.jl](https://github.com/JuliaDiffEq/ODE.jl). For information on compiling and linking to an executable, see [test/standalone-exe.jl](https://github.com/tshort/StaticCompiler.jl/blob/master/test/standalone-exe.jl). - -## Known limitations - -* It won't work for recursive code. Jameson's [codegen-norecursion](https://github.com/JuliaLang/julia/tree/jn/codegen-norecursion) should fix that when merged. - -* `cfunction` is not supported. - -* Generic code that uses `jl_apply_generic` does not work. One strategy for this is to use Cassette to swap out known code that uses dynamic calls. Another approach is to write something like `jl_apply_generic` to implement dynamic calls. - -* The use of Cassette makes it more difficult for Julia to infer some things, and only type-stable code can be statically compiled with this approach. - -* It's only been tested on Linux and Windows. - -Finally, this whole approach is young and likely brittle. Do not expect it to work for your code. diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index ad044a2..c544fa4 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -1,9 +1,9 @@ module StaticCompiler -import GPUCompiler -import LLVM -import LLVM_full_jll -import Libdl +using GPUCompiler: GPUCompiler +using LLVM: LLVM +using Libdl: Libdl + export generate_shlib, generate_shlib_fptr, compile @@ -23,18 +23,19 @@ end struct TestCompilerParams <: GPUCompiler.AbstractCompilerParams end GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{<:Any,TestCompilerParams}) = TestRuntime -const linker = Sys.isunix() ? "ld.lld" : Sys.isapple() ? "ld64.lld" : "lld-link" +# const linker = Sys.isunix() ? "ld.lld" : Sys.isapple() ? "ld64.lld" : "lld-link" function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f))) open(path, "w") do io - target = GPUCompiler.NativeCompilerTarget(;reloc=LLVM.API.LLVMRelocPIC, extern=true) + target = GPUCompiler.NativeCompilerTarget(;always_inline=true) source = GPUCompiler.FunctionSpec(f, Base.to_tuple_type(tt), false, name) params = TestCompilerParams() job = GPUCompiler.CompilerJob(target, source, params) obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) write(io, obj) flush(io) - run(`$(StaticCompiler.LLVM_full_jll.PATH)/$linker -shared -o $path.$(Libdl.dlext) $path`) + # run(`$(Clang_jll.PATH[])/clang -shared -o $path.$(Libdl.dlext) $path`) + run(`gcc -shared -o $path.$(Libdl.dlext) $path`) rm(path) end path, name diff --git a/src/ccalls.jl b/src/ccalls.jl deleted file mode 100644 index 4314fab..0000000 --- a/src/ccalls.jl +++ /dev/null @@ -1,90 +0,0 @@ - -""" - find_ccalls(f, tt) - -Returns a `Dict` mapping function addresses to symbol names for all `ccall`s and -`cglobal`s called from the method. This descends into other invocations -within the method. -""" -find_ccalls(@nospecialize(f), @nospecialize(tt)) = find_ccalls(reflect(f, tt)) - -function find_ccalls(ref::Reflection) - result = Dict{Ptr{Nothing}, Symbol}() - idx = VERSION > v"1.2" ? 5 : 4 - foreigncalls = TypedCodeUtils.filter((c) -> lookthrough((c) -> c.head === :foreigncall && !(c.args[idx] isa QuoteNode && c.args[idx].value == :llvmcall), c), ref.CI.code) - # foreigncalls = TypedCodeUtils.filter((c) -> lookthrough((c) -> c.head === :foreigncall, c), ref.CI.code) - for fc in foreigncalls - sym = getsym(fc[2].args[1]) - address = eval(:(cglobal($(sym)))) - result[address] = Symbol(sym isa Tuple ? sym[1] : sym.value) - end - cglobals = TypedCodeUtils.filter((c) -> lookthrough(c -> c.head === :call && iscglobal(c.args[1]), c), ref.CI.code) - for fc in cglobals - sym = getsym(fc[2].args[2]) - address = eval(:(cglobal($(sym)))) - result[address] = Symbol(sym isa Tuple ? sym[1] : sym.value) - end - invokes = TypedCodeUtils.filter((c) -> lookthrough(identify_invoke, c), ref.CI.code) - invokes = map((arg) -> process_invoke(DefaultConsumer(), ref, arg...), invokes) - for fi in invokes - canreflect(fi) || continue - merge!(result, find_ccalls(reflect(fi))) - end - return result -end - -getsym(x) = x -getsym(x::String) = QuoteNode(Symbol(x)) -getsym(x::QuoteNode) = x -getsym(x::Expr) = eval.((x.args[2], x.args[3])) - -iscglobal(x) = x == cglobal || x isa GlobalRef && x.name == :cglobal - - -""" - fix_ccalls!(mod::LLVM.Module, d) - -Replace function addresses with symbol names in `mod`. The symbol names are -meant to be linked to `libjulia` or other libraries. -`d` is a `Dict` mapping a function address to symbol name for `ccall`s. -""" -function fix_ccalls!(mod::LLVM.Module, d) - for fun in functions(mod), blk in blocks(fun), instr in instructions(blk) - if instr isa LLVM.CallInst - dest = called_value(instr) - if dest isa ConstantExpr && occursin("inttoptr", string(dest)) - # @show instr - # @show dest - argtypes = [llvmtype(op) for op in operands(instr)] - nargs = length(parameters(eltype(argtypes[end]))) - # num_extra_args = 1 + length(collect(eachmatch(r"jl_roots", string(instr)))) - ptr = Ptr{Cvoid}(convert(Int, first(operands(dest)))) - if haskey(d, ptr) - sym = d[ptr] - newdest = LLVM.Function(mod, string(sym), LLVM.FunctionType(llvmtype(instr), argtypes[1:nargs])) - LLVM.linkage!(newdest, LLVM.API.LLVMExternalLinkage) - replace_uses!(dest, newdest) - end - end - elseif instr isa LLVM.LoadInst && occursin("inttoptr", string(instr)) - # dest = called_value(instr) - for op in operands(instr) - lastop = op - if occursin("inttoptr", string(op)) - # @show instr - if occursin("addrspacecast", string(op)) || occursin("getelementptr", string(op)) - op = first(operands(op)) - end - first(operands(op)) isa LLVM.ConstantInt || continue - ptr = Ptr{Cvoid}(convert(Int, first(operands(op)))) - if haskey(d, ptr) - obj = d[ptr] - newdest = GlobalVariable(mod, llvmtype(instr), string(d[ptr])) - LLVM.linkage!(newdest, LLVM.API.LLVMExternalLinkage) - replace_uses!(op, newdest) - end - end - end - end - end -end diff --git a/src/extern.jl b/src/extern.jl deleted file mode 100644 index 6c0424a..0000000 --- a/src/extern.jl +++ /dev/null @@ -1,16 +0,0 @@ -""" - @extern(fun, returntype, argtypes, args...) - -Creates a call to an external function meant to be included at link time. -Use the same conventions as `ccall`. - -This transforms into the following `ccall`: - - ccall("extern fun", llvmcall, returntype, argtypes, args...) -""" -macro extern(name, rettyp, argtyp, args...) - externfun = string("extern ", name isa AbstractString || name isa Symbol ? name : name.value) - Expr(:call, :ccall, externfun, esc(:llvmcall), esc(rettyp), - Expr(:tuple, esc.(argtyp.args)...), esc.(args)...) -end - diff --git a/src/globals.jl b/src/globals.jl deleted file mode 100644 index e258338..0000000 --- a/src/globals.jl +++ /dev/null @@ -1,164 +0,0 @@ -struct GlobalsContext - invokes::Set{Any} -end -GlobalsContext() = GlobalsContext(Set()) - - -""" - fix_globals!(mod::LLVM.Module) - -Replace function addresses in `mod` with references to global data structures. -For each global variable, two LLVM global objects are created: - -* `jl.global.data` -- An LLVM 'i8' vector holding a serialized version of the Julia object. -* `jl.global` -- A pointer to the unserialized Julia object. - -The `inttopt` with the function address is replaced by `jl.global`. - -A function `jl_init_globals` is added to `mod`. This function deserializes the data in -`jl.global.data` and updates `jl.global`. -""" - -_opcode(x::LLVM.ConstantExpr) = LLVM.API.LLVMGetConstOpcode(LLVM.ref(x)) - -function fix_globals!(mod::LLVM.Module) - # Create a `jl_init_globals` function. - jl_init_globals_func = LLVM.Function(mod, "jl_init_globals", - LLVM.FunctionType(julia_to_llvm(Cvoid), LLVMType[])) - jl_init_global_entry = BasicBlock(jl_init_globals_func, "entry", context(mod)) - - # Definitions for utility functions - func_type = LLVM.FunctionType(julia_to_llvm(Any), LLVMType[LLVM.PointerType(julia_to_llvm(Int8))]) - deserialize_funs = Dict() - - uint8_t = julia_to_llvm(UInt8) - - ctx = SerializeContext() - es = [] - objs = Set() - gptridx = Dict() - instrs = [] - gptrs = [] - j = 1 # counter for position in gptridx - Builder(context(mod)) do builder - toinstr!(x) = x - function toinstr!(x::LLVM.ConstantExpr) - if _opcode(x) == LLVM.API.LLVMAddrSpaceCast - val = toinstr!(first(operands(x))) - ret = addrspacecast!(builder, val, llvmtype(x)) - return ret - elseif _opcode(x) == LLVM.API.LLVMGetElementPtr - ops = operands(x) - val = toinstr!(first(ops)) - ret = gep!(builder, val, [ops[i] for i in 2:length(ops)]) - return ret - elseif _opcode(x) == LLVM.API.LLVMBitCast - ops = operands(x) - val = toinstr!(first(ops)) - ret = pointercast!(builder, val, llvmtype(x)) - return ret - elseif _opcode(x) == LLVM.API.LLVMIntToPtr - ptr = Ptr{Any}(convert(Int, first(operands(x)))) - obj = unsafe_pointer_to_objref(ptr) - if !in(obj, objs) - push!(es, serialize(ctx, obj)) - push!(objs, obj) - # Create pointers to the data. - gptr = GlobalVariable(mod, julia_to_llvm(Any), "jl.global") - linkage!(gptr, LLVM.API.LLVMInternalLinkage) - LLVM.API.LLVMSetInitializer(LLVM.ref(gptr), LLVM.ref(null(julia_to_llvm(Any)))) - push!(gptrs, gptr) - gptridx[obj] = j - j += 1 - end - gptr = gptrs[gptridx[obj]] - gptr2 = load!(builder, gptr) - ret = pointercast!(builder, gptr2, llvmtype(x)) - return ret - end - return x - end - for fun in functions(mod) - if startswith(LLVM.name(fun), "jfptr") - unsafe_delete!(mod, fun) - continue - end - - for blk in blocks(fun), instr in instructions(blk) - # Set up functions to walk the operands of the instruction - # and convert appropriate ConstantExpr's to instructions. - # Look for `LLVMIntToPtr` expressions. - position!(builder, instr) - ops = operands(instr) - N = opcode(instr) == LLVM.API.LLVMCall ? length(ops) - 1 : length(ops) - if opcode(instr) == LLVM.API.LLVMCall && name(last(operands(instr))) == "jl_type_error" - continue - end - for i in 1:N - try - if opcode(instr) == LLVM.API.LLVMPHI - position!(builder, last(instructions(LLVM.incoming(instr)[i][2]))) - end - ops[i] = toinstr!(ops[i]) - catch x - end - end - end - end - end - nglobals = length(es) - #@show mod - #verify(mod) - for i in 1:nglobals - # Assign the appropriate function argument to the appropriate global. - es[i] = :(unsafe_store!($((Symbol("global", i))), $(es[i]))) - end - # Define the deserializing function. - fune = quote - function _deserialize_globals(Vptr, $((Symbol("global", i) for i in 1:nglobals)...)) - $(ctx.init...) - $(es...) - return - end - end - # @show fune - # Execute the deserializing function. - deser_fun = eval(fune) - v = take!(ctx.io) - gv_typ = LLVM.ArrayType(uint8_t, length(v)) - data = LLVM.GlobalVariable(mod, gv_typ, "jl.global.data") - linkage!(data, LLVM.API.LLVMExternalLinkage) - constant!(data, true) - LLVM.API.LLVMSetInitializer(LLVM.ref(data), - LLVM.API.LLVMConstArray(LLVM.ref(uint8_t), - [LLVM.ref(ConstantInt(uint8_t, x)) for x in v], - UInt32(length(v)))) - Builder(context(mod)) do builder - dataptr = gep!(builder, data, [ConstantInt(0, context(mod)), ConstantInt(0, context(mod))]) - - # Create the Julia object from `data` and include that in `init_fun`. - position!(builder, jl_init_global_entry) - gfunc_type = LLVM.FunctionType(julia_to_llvm(Cvoid), - LLVMType[LLVM.PointerType(julia_to_llvm(Int8)), - Iterators.repeated(LLVM.FunctionType(julia_to_llvm(Any)), nglobals)...]) - deserialize_globals_func = LLVM.Function(mod, "_deserialize_globals", gfunc_type) - LLVM.linkage!(deserialize_globals_func, LLVM.API.LLVMExternalLinkage) - for i in 1:nglobals - # The following fix is to match the argument types which are an integer, not a %jl_value_t**. - gptrs[i] = LLVM.ptrtoint!(builder, gptrs[i], julia_to_llvm(Csize_t)) - end - LLVM.call!(builder, deserialize_globals_func, LLVM.Value[dataptr, gptrs...]) - ret!(builder) - end - tt = Tuple{Ptr{UInt8}, Iterators.repeated(Ptr{Any}, nglobals)...} - deser_mod = irgen(deser_fun, tt, overdub = false, fix_globals = false, optimize_llvm = false) - d = find_ccalls(deser_fun, tt) - fix_ccalls!(deser_mod, d) - # rename deserialization function to "_deserialize_globals" - fun = first(TypedCodeUtils.filter(x -> LLVM.name(x) == "_deserialize_globals", functions(deser_mod)))[2] - # LLVM.name!(fun, "_deserialize_globals") - linkage!(fun, LLVM.API.LLVMExternalLinkage) - # link into the main module - LLVM.link!(mod, deser_mod) - return -end diff --git a/src/helpers/README.md b/src/helpers/README.md deleted file mode 100644 index e443db6..0000000 --- a/src/helpers/README.md +++ /dev/null @@ -1,12 +0,0 @@ -# Helpers -Note that the helpers defined here are used in tests, and they are useful to test out code in the REPL. - -```julia -twox(x) = 2x -# run code in the REPL -@jlrun twox(3) -# compile to an executable in a `standalone` directory -exegen([ (twox, Tuple{Int}, 4) ]) -``` - -These are not meant to be a permanent part of the API. They are just for testing. diff --git a/src/helpers/helpers.jl b/src/helpers/helpers.jl deleted file mode 100644 index 1f04154..0000000 --- a/src/helpers/helpers.jl +++ /dev/null @@ -1,37 +0,0 @@ -""" -Returns shellcmd string for different OS. Optionally, checks for gcc installation. -""" -function _shellcmd(checkInstallation::Bool = false) - - if Sys.isunix() - shellcmd = "gcc" - elseif Sys.iswindows() - shellcmd = ["cmd", "/c", "gcc"] - else - error("run command not defined") - end - - if checkInstallation - # Checking gcc installation - try - run(`$shellcmd -v`) - catch - @warn "Make sure gcc compiler is installed: https://gcc.gnu.org/install/binaries.html and is on the path, othetwise some of the functions will return errors" - return nothing - end - end - - return shellcmd -end - -shellcmd = _shellcmd(true) # is used in @jlrun and exegen() - - -export @jlrun -include("jlrun.jl") - -export ldflags, ldlibs, cflags # are used in exegen -include("juliaconfig.jl") - -export exegen -include("standalone-exe.jl") diff --git a/src/helpers/jlrun.jl b/src/helpers/jlrun.jl deleted file mode 100644 index f2e8e16..0000000 --- a/src/helpers/jlrun.jl +++ /dev/null @@ -1,61 +0,0 @@ -using Libdl, LLVM - -function show_inttoptr(mod) - for fun in LLVM.functions(mod), - blk in LLVM.blocks(fun), - instr in LLVM.instructions(blk) - - s = string(instr) - if occursin("inttoptr", s) && occursin(r"[0-9]{8,30}", s) - println(LLVM.name(fun), " ---------------------------") - @show instr - println() - end - end -end - -""" -Compiles function call provided and calls it with `ccall` using the shared library that was created. -""" -macro jlrun(e) - - fun = e.args[1] - efun = esc(fun) - args = length(e.args) > 1 ? e.args[2:end] : Any[] - libpath = abspath("test.o") - dylibpath = abspath("test.so") - tt = Tuple{(typeof(eval(a)) for a in args)...} - if length(e.args) > 1 - ct = code_typed(Base.eval(__module__, fun), tt) - else - ct = code_typed(Base.eval(__module__, fun)) - end - rettype = ct[1][2] - bindir = joinpath(dirname(Sys.BINDIR), "tools") - libdir = joinpath(dirname(Sys.BINDIR), "lib") - - runCommand = :(run( - $(`$shellcmd -shared -fPIC -o test.so -L$libdir test.o -ljulia`), - wait = true, - )) - - quote - m = irgen($efun, $tt) - # m = irgen($efun, $tt, overdub = false) - LLVM.verify(m) - # show_inttoptr(m) - write(m, "test.bc") - write_object(m, "test.o") - $runCommand - dylib = Libdl.dlopen($dylibpath) - ccall(Libdl.dlsym(dylib, "jl_init_globals"), Cvoid, ()) - res = ccall( - Libdl.dlsym(dylib, $(Meta.quot(fun))), - $rettype, - ($((typeof(eval(a)) for a in args)...),), - $(eval.(args)...), - ) - Libdl.dlclose(dylib) - res - end -end diff --git a/src/helpers/juliaconfig.jl b/src/helpers/juliaconfig.jl deleted file mode 100644 index 039e090..0000000 --- a/src/helpers/juliaconfig.jl +++ /dev/null @@ -1,63 +0,0 @@ -# from PackageCompilerX: https://github.com/KristofferC/PackageCompilerX.jl/blob/c1a90edfaa28907edf2edbbc734ef8afdeeaca80/src/juliaconfig.jl -# adopted from https://github.com/JuliaLang/julia/blob/release-0.6/contrib/julia-config.jl - -function shell_escape(str) - str = replace(str, "'" => "'\''") - return "'$str'" -end - -function julia_libdir() - return if ccall(:jl_is_debugbuild, Cint, ()) != 0 - dirname(abspath(Libdl.dlpath("libjulia-debug"))) - else - dirname(abspath(Libdl.dlpath("libjulia"))) - end -end - -function julia_private_libdir() - @static if Sys.iswindows() - return julia_libdir() - else - return abspath(Sys.BINDIR, Base.PRIVATE_LIBDIR) - end -end - -julia_includedir() = abspath(Sys.BINDIR, Base.INCLUDEDIR, "julia") - -function ldflags() - fl = "-L$(shell_escape(julia_libdir()))" - if Sys.iswindows() - fl = fl * " -Wl,--stack,8388608" - fl = fl * " -Wl,--export-all-symbols" - elseif Sys.islinux() - fl = fl * " -Wl,--export-dynamic" - end - return fl -end - -# TODO -function ldlibs(relative_path=nothing) - libname = if ccall(:jl_is_debugbuild, Cint, ()) != 0 - "julia-debug" - else - "julia" - end - if Sys.islinux() - return "-Wl,-rpath-link,$(shell_escape(julia_libdir())) -Wl,-rpath-link,$(shell_escape(julia_private_libdir())) -l$libname" - elseif Sys.iswindows() - return "-l$libname -lopenlibm" - else - return "-l$libname" - end -end - -function cflags() - flags = IOBuffer() - print(flags, "-std=gnu99") - include = shell_escape(julia_includedir()) - print(flags, " -I", include) - if Sys.isunix() - print(flags, " -fPIC") - end - return String(take!(flags)) -end diff --git a/src/helpers/standalone-exe.jl b/src/helpers/standalone-exe.jl deleted file mode 100644 index 40d026b..0000000 --- a/src/helpers/standalone-exe.jl +++ /dev/null @@ -1,148 +0,0 @@ -Ctemplate = """ -#include -#include -extern CRETTYPE FUNNAME(CARGTYPES); -extern void jl_init_with_image(const char *, const char *); -extern void jl_init_globals(void); -int main() -{ - jl_init_with_image(".", "blank.ji"); - jl_init_globals(); - printf("RETFORMAT", FUNNAME(FUNARG)); - jl_atexit_hook(0); - return 0; -} -""" - -# "signed" is removed from signed types -# duplicates will remove automatically -Cmap = Dict( - Cchar => "char", #Int8 - Cuchar => "unsigned char", #UInt8 - Cshort => "short", #Int16 - # Cstring => - Cushort => "unsigned short", #UInt16 - Cint => "int", #Int32 - Cuint => "unsigned int", #UInt32 - Clong => "long", #Int32 - Culong => "unsigned long", #UInt32 - Clonglong => "long long", #Int64 - Culonglong => "unsigned long long", #UInt64 - # Cintmax_t => "intmax_t", #Int64 - # Cuintmax_t => "uintmax_t", #UInt64 - # Csize_t => "size_t", #UInt - # Cssize_t => "ssize_t", #Int - # Cptrdiff_t => "ptrdiff_t", #Int - # Cwchar_t => "wchar_t", #Int32 - # Cwstring => - Cfloat => "float", #Float32 - Cdouble => "double", #Float64 - Nothing => "void", -) - -Cformatmap = Dict( - Cchar => "%c", #Int8 - # Cuchar => "unsigned char", #UInt8 - # Cshort => "short", #Int16 - Cstring => "%s", - # Cushort => "unsigned short", #UInt16 - Cint => "%d", #"i" #Int32 - Cuint => "%u", #UInt32 - Clong => "%ld", #Int32 - # Culong => "unsigned long", #UInt32 - Clonglong => "%lld", #Int64 - # Culonglong => "unsigned long long", #UInt64 - # Cintmax_t => "intmax_t", #Int64 - # Cuintmax_t => "uintmax_t", #UInt64 - # Csize_t => "size_t", #UInt - # Cssize_t => "ssize_t", #Int - # Cptrdiff_t => "ptrdiff_t", #Int - # Cwchar_t => "wchar_t", #Int32 - # Cwstring => - # Cfloat => "%f", #Float32 - Cdouble => "%f", #%e #Float64 -) - -""" -converts to text. returns "" for Nothing and empty Tuple. -""" -totext(x) = string(x) -totext(x::Nothing) = "" -totext(x::Tuple{}) = "" - -""" -Makes standalone executable. -""" -function exegen(funcalls) - - cd(mkpath("standalone")) do - # create `blank.ji` for initialization - julia_path = joinpath(Sys.BINDIR, Base.julia_exename()) - base_dir = dirname(Base.find_source_file("sysimg.jl")) - wd = pwd() - open(println, "blank.jl", "w") - cd(base_dir) do - run(`$(julia_path) --output-ji $(wd)/blank.ji $(wd)/blank.jl`) - end - - dir = pwd() - standalonedir = dir - bindir = string(Sys.BINDIR) - libdir = joinpath(dirname(Sys.BINDIR), "lib") - includedir = joinpath(dirname(Sys.BINDIR), "include", "julia") - if Sys.iswindows() - for fn in readdir(bindir) - if splitext(fn)[end] == ".dll" - cp(joinpath(bindir, fn), fn, force = true) - end - end - end - - flags = join((cflags(), ldflags(), ldlibs()), " ") - flags = Base.shell_split(flags) - wrapper = joinpath(@__DIR__, "embedding_wrapper.c") - if Sys.iswindows() - rpath = `` - elseif Sys.isapple() - rpath = `-Wl,-rpath,'@executable_path' -Wl,-rpath,'@executable_path/../lib'` - else - rpath = `-Wl,-rpath,\$ORIGIN:\$ORIGIN/../lib` - end - - for (func, tt, val) in funcalls - fname = nameof(func) - rettype = Base.return_types(func, tt)[1] - argtype = length(tt.types) > 0 ? tt.types[1] : Nothing - fmt = Cformatmap[rettype] - Ctxt = foldl(replace, - ( - "FUNNAME" => fname, - "CRETTYPE" => Cmap[rettype], - "RETFORMAT" => fmt, - "CARGTYPES" => Cmap[argtype], - "FUNARG" => totext(val), - ), - init = Ctemplate) - write("$fname.c", Ctxt) - m = StaticCompiler.irgen(func, tt) - # StaticCompiler.show_inttoptr(m) - # @show m - dlext = Libdl.dlext - exeext = Sys.iswindows() ? ".exe" : "" - if Sys.isapple() - o_file = `-Wl,-all_load $fname.o` - else - o_file = `-Wl,--whole-archive $fname.o -Wl,--no-whole-archive` - end - extra = Sys.iswindows() ? `-Wl,--export-all-symbols` : `` - write(m, "$fname.bc") - write_object(m, "$fname.o") - - run(`$shellcmd -shared -fpic -L$libdir -o lib$fname.$dlext $o_file -Wl,-rpath,$libdir -ljulia $extra`) - run(`$shellcmd -c -std=gnu99 -I$includedir -DJULIA_ENABLE_THREADING=1 -fPIC $fname.c`) - #run(`$shellcmd -o $fname $fname.o -L$libdir -L$standalonedir -Wl,--unresolved-symbols=ignore-in-object-files -Wl,-rpath,'.' -Wl,-rpath,$libdir -ljulia -l$fname -O2 $rpath $flags`) - run(`$shellcmd -o $fname $fname.o -L$libdir -L$standalonedir -Wl,-rpath,'.' -Wl,-rpath,$libdir -ljulia -l$fname -O2 $rpath $flags`) - end - end - -end diff --git a/src/irgen.jl b/src/irgen.jl deleted file mode 100644 index cc9811c..0000000 --- a/src/irgen.jl +++ /dev/null @@ -1,300 +0,0 @@ - -struct LLVMNativeCode # thin wrapper - p::Ptr{Cvoid} -end - -function xlinfo(f, tt) - # get the method instance - world = typemax(UInt) - g = (args...) -> Cassette.overdub(ctx, f, args...) - meth = which(g, tt) - sig_tt = Tuple{typeof(g), tt.parameters...} - (ti, env) = ccall(:jl_type_intersection_with_env, Any, - (Any, Any), sig_tt, meth.sig)::Core.SimpleVector - - if VERSION >= v"1.2.0-DEV.320" - meth = Base.func_for_method_checked(meth, ti, env) - else - meth = Base.func_for_method_checked(meth, ti) - end - - return ccall(:jl_specializations_get_linfo, Ref{Core.MethodInstance}, - (Any, Any, Any, UInt), meth, ti, env, world) -end - -""" -Returns an LLVMNativeCode object for the function call `f` with TupleTypes `tt`. -""" -function raise_exception(insblock::BasicBlock, ex::Value) -end - -# const jlctx = Ref{LLVM.Context}() - -# function __init__() -# jlctx[] = LLVM.Context(convert(LLVM.API.LLVMContextRef, -# cglobal(:jl_LLVMContext, Nothing))) -# end - -""" - irgen(func, tt; - optimize = true, - optimize_llvm = true, - fix_globals = true, - overdub = true, - module_setup = (m) -> nothing) - -Generates Julia IR targeted for static compilation. -`ccall` and `cglobal` uses have pointer references changed to symbols -meant to be linked with libjulia and other libraries. - -`optimize` controls Julia-side optimization. `optimize_llvm` controls -optimization on the LLVM side. - -If `overdub == true` (the default), Cassette is used to swap out -`ccall`s with a tuple of library and symbol. - -`module_setup` is an optional function to control setup of modules. It takes an LLVM -module as input. -""" -function irgen(@nospecialize(func), @nospecialize(tt); - optimize = true, - optimize_llvm = true, - fix_globals = true, - overdub = true, - module_setup = (m) -> nothing) - # get the method instance - isa(func, Core.Builtin) && error("function is not a generic function") - world = typemax(UInt) - gfunc = overdub ? (args...) -> Cassette.overdub(ctx, func, args...) : func - meth = which(gfunc, tt) - sig_tt = Tuple{typeof(gfunc), tt.parameters...} - (ti, env) = ccall(:jl_type_intersection_with_env, Any, - (Any, Any), sig_tt, meth.sig)::Core.SimpleVector - - if VERSION >= v"1.2.0-DEV.320" - meth = Base.func_for_method_checked(meth, ti, env) - else - meth = Base.func_for_method_checked(meth, ti) - end - - linfo = ccall(:jl_specializations_get_linfo, Ref{Core.MethodInstance}, - (Any, Any, Any, UInt), meth, ti, env, world) - - current_method = nothing - last_method_instance = nothing - call_stack = Vector{Core.MethodInstance}() - global method_map = Dict{String,Core.MethodInstance}() - global dependencies = MultiDict{Core.MethodInstance,LLVM.Function}() - # set-up the compiler interface - function hook_module_setup(ref::Ptr{Cvoid}) - ref = convert(LLVM.API.LLVMModuleRef, ref) - module_setup(LLVM.Module(ref)) - end - function hook_raise_exception(insblock::Ptr{Cvoid}, ex::Ptr{Cvoid}) - insblock = convert(LLVM.API.LLVMValueRef, insblock) - ex = convert(LLVM.API.LLVMValueRef, ex) - raise_exception(BasicBlock(insblock), Value(ex)) - end - function postprocess(ir) - # get rid of jfptr wrappers - for llvmf in functions(ir) - startswith(LLVM.name(llvmf), "jfptr_") && unsafe_delete!(ir, llvmf) - end - - return - end - function hook_module_activation(ref::Ptr{Cvoid}) - ref = convert(LLVM.API.LLVMModuleRef, ref) - global ir = LLVM.Module(ref) - postprocess(ir) - - # find the function that this module defines - llvmfs = filter(llvmf -> !isdeclaration(llvmf) && - linkage(llvmf) == LLVM.API.LLVMExternalLinkage, - collect(functions(ir))) - llvmf = nothing - if length(llvmfs) == 1 - llvmf = first(llvmfs) - elseif length(llvmfs) > 1 - llvmfs = filter!(llvmf -> startswith(LLVM.name(llvmf), "julia_"), llvmfs) - if length(llvmfs) == 1 - llvmf = first(llvmfs) - end - end - insert!(dependencies, last_method_instance, llvmf) - method_map[name(llvmf)] = current_method - end - function hook_emit_function(method_instance, code, world) - push!(call_stack, method_instance) - end - function hook_emitted_function(method, code, world) - current_method = method - last_method_instance = pop!(call_stack) - # @show code - # dump(method, maxdepth=2) - # global mymeth = method - end - - params = Base.CodegenParams(cached=false, - track_allocations=false, - code_coverage=false, - static_alloc=false, - prefer_specsig=true, - module_setup=hook_module_setup, - module_activation=hook_module_activation, - raise_exception=hook_raise_exception, - emit_function=hook_emit_function, - emitted_function=hook_emitted_function, - ) - - # get the code - mod = let - ref = ccall(:jl_get_llvmf_defn, LLVM.API.LLVMValueRef, - (Any, UInt, Bool, Bool, Base.CodegenParams), - linfo, world, #=wrapper=#false, #=optimize=#false, params) - if ref == C_NULL - # error(jlctx[], "the Julia compiler could not generate LLVM IR") - end - - llvmf = LLVM.Function(ref) - LLVM.parent(llvmf) - end - - # the main module should contain a single jfptr_ function definition, - # e.g. jlcall_kernel_vadd_62977 - - # definitions = filter(f->!isdeclaration(f), functions(mod)) - definitions = Iterators.filter(f->!isdeclaration(f), collect(functions(mod))) - # definitions = collect(functions(mod)) - wrapper = let - fs = collect(Iterators.filter(f->startswith(LLVM.name(f), "jfptr_"), definitions)) - @assert length(fs) == 1 - fs[1] - end - - # the jlcall wrapper function should point us to the actual entry-point, - # e.g. julia_kernel_vadd_62984 - entry_tag = let - m = match(r"jfptr_(.+)_\d+", LLVM.name(wrapper)) - @assert m != nothing - m.captures[1] - end - unsafe_delete!(mod, wrapper) - entry = let - re = Regex("julia_$(entry_tag)_\\d+") - llvmcall_re = Regex("julia_$(entry_tag)_\\d+u\\d+") - fs = collect(Iterators.filter(f->occursin(re, LLVM.name(f)) && - !occursin(llvmcall_re, LLVM.name(f)), definitions)) - if length(fs) != 1 - compiler_error(func, tt, cap, "could not find single entry-point"; - entry=>entry_tag, available=>[LLVM.name.(definitions)]) - end - fs[1] - end - - LLVM.name!(entry, string(nameof(func))) - - # link in dependent modules - cache = Dict{String,String}() - for called_method_instance in keys(dependencies) - llvmfs = dependencies[called_method_instance] - - # link the first module - llvmf = popfirst!(llvmfs) - llvmfn = LLVM.name(llvmf) - link!(mod, LLVM.parent(llvmf)) - # process subsequent duplicate modules - for dup_llvmf in llvmfs - if Base.JLOptions().debug_level >= 2 - # link them too, to ensure accurate backtrace reconstruction - link!(mod, LLVM.parent(dup_llvmf)) - else - # don't link them, but note the called function name in a cache - dup_llvmfn = LLVM.name(dup_llvmf) - cache[dup_llvmfn] = llvmfn - end - end - end - # resolve function declarations with cached entries - for llvmf in filter(isdeclaration, collect(functions(mod))) - llvmfn = LLVM.name(llvmf) - if haskey(cache, llvmfn) - def_llvmfn = cache[llvmfn] - replace_uses!(llvmf, functions(mod)[def_llvmfn]) - unsafe_delete!(LLVM.parent(llvmf), llvmf) - end - end - # rename functions to something easier to decipher - # especially helps with overdubbed functions - for (fname, mi) in method_map - id = split(fname, "_")[end] - basename = mi.def.name - args = join(collect(mi.specTypes.parameters)[2:end], "_") - if basename == :overdub # special handling for Cassette - basename = string(mi.specTypes.parameters[3]) - basename = replace(basename, r"^typeof\(" => "") - basename = replace(basename, r"\)$" => "") - args = join(collect(mi.specTypes.parameters)[4:end], "_") - end - newname = join([basename, args, id], "_") - if haskey(functions(mod), fname) - name!(functions(mod)[fname], newname) - end - end - - d = find_ccalls(gfunc, tt) - fix_ccalls!(mod, d) - if fix_globals - fix_globals!(mod) - end - if optimize_llvm - optimize!(mod) - end - return mod -end - - -""" - optimize!(mod::LLVM.Module) - -Optimize the LLVM module `mod`. Crude for now. -Returns nothing. -""" -function optimize!(mod::LLVM.Module) - for llvmf in functions(mod) - startswith(LLVM.name(llvmf), "jfptr_") && unsafe_delete!(mod, llvmf) - startswith(LLVM.name(llvmf), "julia_") && LLVM.linkage!(llvmf, LLVM.API.LLVMExternalLinkage) - end - # triple = "wasm32-unknown-unknown-wasm" - # triple!(mod, triple) - # datalayout!(mod, "e-m:e-p:32:32-i64:64-n32:64-S128") - # LLVM.API.@apicall(:LLVMInitializeWebAssemblyTarget, Cvoid, ()) - # LLVM.API.@apicall(:LLVMInitializeWebAssemblyTargetMC, Cvoid, ()) - # LLVM.API.@apicall(:LLVMInitializeWebAssemblyTargetInfo, Cvoid, ()) - triple = "i686-pc-linux-gnu" - tm = TargetMachine(Target(triple), triple) - - ModulePassManager() do pm - # add_library_info!(pm, triple(mod)) - add_transform_info!(pm, tm) - ccall(:jl_add_optimization_passes, Cvoid, - (LLVM.API.LLVMPassManagerRef, Cint, Cint), - LLVM.ref(pm), Base.JLOptions().opt_level, 1) - - dead_arg_elimination!(pm) - global_optimizer!(pm) - global_dce!(pm) - strip_dead_prototypes!(pm) - - run!(pm, mod) - end - mod -end - -function write_object(mod::LLVM.Module, path) - host_triple = triple() - host_t = Target(host_triple) - TargetMachine(host_t, host_triple, "", "", LLVM.API.LLVMCodeGenLevelDefault, LLVM.API.LLVMRelocPIC) do tm - emit(tm, mod, LLVM.API.LLVMObjectFile, path) - end -end diff --git a/src/overdub.jl b/src/overdub.jl deleted file mode 100644 index 9a0015b..0000000 --- a/src/overdub.jl +++ /dev/null @@ -1,33 +0,0 @@ -# Implements contextual dispatch through Cassette.jl - -using Cassette - -## -# Convert two-arg `ccall` to single arg. -## -function transform(ctx, ref) - CI = ref.code_info - ismatch = x -> begin - Base.Meta.isexpr(x, :foreigncall) && - Base.Meta.isexpr(x.args[1], :call) - end - replace = x -> begin - y = Expr(x.head, Any[x.args[1].args[2], x.args[2:end]...]) - Expr(x.head, x.args[1].args[2], x.args[2:end]...) - end - Cassette.replace_match!(replace, ismatch, CI.code) - return CI -end - -const Pass = Cassette.@pass transform - -Cassette.@context Ctx -const ctx = Cassette.disablehooks(Ctx(pass = Pass)) - -### -# Rewrite functions -### - -#@inline Cassette.overdub(ctx::Ctx, ::typeof(+), a::T, b::T) where T<:Union{Float32, Float64} = add_float_contract(a, b) - -contextualize(f::F) where F = (args...) -> Cassette.overdub(ctx, f, args...) diff --git a/src/serialize.jl b/src/serialize.jl deleted file mode 100644 index 5725650..0000000 --- a/src/serialize.jl +++ /dev/null @@ -1,242 +0,0 @@ - -""" -A context structure for holding state related to serializing Julia -objects. A key component is an `IOBuffer` used to hold the serialized -result. -""" -struct SerializeContext - io::IOBuffer - store::Dict{Any,Any} # Meant to map Julia object to variable name - init::Vector{Any} # Expressions to run initially -end -SerializeContext(io::IOBuffer = IOBuffer()) = SerializeContext(io, Dict(), Vector{Expr}()) - -const _td = IdDict( - Any => :jl_any_type, - Float64 => :jl_float64_type, - Float32 => :jl_float32_type, - Int64 => :jl_int64_type, - Int32 => :jl_int32_type, - Int16 => :jl_int16_type, - Int8 => :jl_int8_type, - UInt64 => :jl_uint64_type, - UInt32 => :jl_uint32_type, - UInt16 => :jl_uint16_type, - UInt8 => :jl_uint8_type, - Cint => :jl_int32_type, - Cvoid => :jl_any_type, - Array => :jl_array_type, - Array{Any,1} => :jl_array_any_type, - Array{Int32,1} => :jl_array_int32_type, - Array{UInt8,1} => :jl_array_uint8_type, - ErrorException => :jl_errorexception_type, - DataType => :jl_datatype_type, - UnionAll => :jl_unionall_type, - Union => :jl_union_type, - Core.TypeofBottom => :jl_typeofbottom_type, - TypeVar => :jl_tvar_type, -) - -const _t = IdDict() - -for (t,s) in _td - _t[t] = :(unsafe_load(cglobal($(QuoteNode(s)), Type))) -end - -const _gd = IdDict( - Core => :jl_core_module, - Main => :jl_main_module, - nothing => :jl_nothing, - () => :jl_emptytuple, - Core.svec() => :jl_emptysvec, - UndefRefError() => :jl_undefref_exception, -) - -const _g = IdDict() - -for (x,s) in _gd - _g[x] = :(unsafe_load(cglobal($(QuoteNode(s)), Any))) -end - -""" - serialize(ctx::SerializeContext, x) - -Serialize `x` into the context object `ctx`. `ctx.io` is the `IOBuffer` where the -serialized results are stored. Get the result with `take!(ctx.io)`. - -This function returns an expression that will deserialize the object. Several `serialize` -methods can be called recursively to build up deserialization code for nested objects. -The expression returned is meant to be `eval`ed into a function that can be called -to do the serialization. - -The deserialization code should be pretty low-level code that can be compiled -relatively easily. It especially shouldn't use global variables. - -Serialization / deserialization code can use `ctx` to hold state information. - -Some simple types like boxed variables do not need to write anything to `ctx.io`. -They can return an expression that directly creates the object. -""" -function serialize(ctx::SerializeContext, @nospecialize(x)) - haskey(_g, x) && return _g[x] - # TODO: fix this major kludge. - if nfields(x) > 0 - return Expr(:tuple, (serialize(ctx, getfield(x,i)) for i in 1:nfields(x))...) - end - return :(unsafe_load(cglobal(:jl_emptytuple, Any))) -end - -function serialize(ctx::SerializeContext, @nospecialize(t::DataType)) - if haskey(_t, t) - return _t[t] - elseif haskey(ctx.store, t) - return ctx.store[t] - else - # primary = unwrap_unionall(t.wrapper) - name = gensym(Symbol(:type, "-", t.name.name)) - ctx.store[t] = name - e = quote - $name = let - local tn = $(serialize(ctx, t.name)) - # names = $(serialize(ctx, t.names)) - local super = $(serialize(ctx, t.super)) - local parameters = $(serialize(ctx, t.parameters)) - local types = $(serialize(ctx, t.types)) - local ndt = ccall(:jl_new_datatype, Any, - (Any, Any, Any, Any, Any, Any, Cint, Cint, Cint), - tn, tn.module, super, parameters, #=names=# unsafe_load(cglobal(:jl_any_type, Any)), types, - $(t.abstract), $(t.mutable), $(t.ninitialized)) - # tn.wrapper = ndt.name.wrapper - # ccall(:jl_set_const, Cvoid, (Any, Any, Any), tn.module, tn.name, tn.wrapper) - ndt - # ty = tn.wrapper - # $(ctx.types[string(t)]) = ndt - # hasinstance = serialize(ctx, ) - # $(if isdefined(primary, :instance) && !isdefined(t, :instance) - # # use setfield! directly to avoid `fieldtype` lowering expecting to see a Singleton object already on ty - # :(Core.setfield!(ty, :instance, ccall(:jl_new_struct, Any, (Any, Any...), ty))) - # end) - end - end - push!(ctx.init, e) - return name - end -end - -function serialize(ctx::SerializeContext, tn::Core.TypeName) - haskey(ctx.store, tn) && return ctx.store[tn] - name = gensym(Symbol(:typename, "-", tn.name)) - ctx.store[tn] = name - e = quote - $name = ccall(:jl_new_typename_in, Ref{Core.TypeName}, (Any, Any), - # $(serialize(ctx, tn.name)), Main #=__deserialized_types__ =# ) - $(serialize(ctx, tn.name)), unsafe_load(cglobal(:jl_main_module, Any)) #=__deserialized_types__ =# ) - end - push!(ctx.init, e) - return name -end - -function serialize(ctx::SerializeContext, mi::Core.MethodInstance) - return :(unsafe_load(cglobal(:jl_emptytuple, Any))) -end - -function serialize(ctx::SerializeContext, x::String) - advance!(ctx.io) - v = Vector{UInt8}(x) - ioptr = ctx.io.ptr - write(ctx.io, v) - quote - unsafe_string(Vptr + $(ioptr - 1), $(length(v))) - end -end - -function serialize(ctx::SerializeContext, x::Symbol) - haskey(ctx.store, x) && return ctx.store[x] - name = gensym(Symbol(:symbol, "-", x)) - ctx.store[x] = name - e = quote - $name = ccall(:jl_symbol_n, Any, (Ptr{UInt8}, Csize_t), $(serialize(ctx, string(x))), $(length(string(x)))) - # ccall(:jl_set_global, Cvoid, (Any, Any, Any), unsafe_load(cglobal(:jl_main_module, Any)), $(QuoteNode(name)), x) - end - push!(ctx.init, e) - return name -end - - - -# Define functions that return an expression. Example: -# serialize(ctx::SerializeContext, x::Int) = :(ccall(:jl_box_int64, Any, (Int,), $x)) -for (fun, type) in (:jl_box_int64 => Int64, :jl_box_int32 => Int32, :jl_box_int8 => Int16, :jl_box_int8 => Int8, - :jl_box_uint64 => UInt64, :jl_box_uint32 => UInt32, :jl_box_uint8 => UInt16, :jl_box_uint8 => UInt8, - :jl_box_voidpointer => Ptr{Cvoid}, - :jl_box_float64 => Float64, :jl_box_float32 => Float32) - @eval serialize(ctx::SerializeContext, x::$type) = Expr(:call, :ccall, QuoteNode($(QuoteNode(fun))), Any, Expr(:tuple, $type), x) -end -serialize(ctx::SerializeContext, x::Char) = :(ccall(:jl_box_char, Any, (UInt32,), $x)) -serialize(ctx::SerializeContext, x::Bool) = :(ccall(:jl_box_bool, Any, (UInt8,), $x)) - -function serialize(ctx::SerializeContext, a::Tuple) - length(a) == 0 && return :(unsafe_load(cglobal(:jl_emptytuple, Any))) - Expr(:tuple, (serialize(ctx, x) for x in a)...) -end - -function serialize(ctx::SerializeContext, a::Core.SimpleVector) - length(a) == 0 && return :(unsafe_load(cglobal(:jl_emptysvec, Any))) - Expr(:call, Expr(:., :Core, QuoteNode(:svec)), (serialize(ctx, x) for x in a)...) -end - -advance!(io) = write(io, repeat('\0', -rem(io.ptr - 1, 8, RoundUp))) # Align data to 8 bytes - -function serialize(ctx::SerializeContext, a::Array{T,N}) where {T,N} - elty = eltype(a) - aty = typeof(a) - dims = size(a) - atys = serialize(ctx, aty) - if isbitstype(elty) - advance!(ctx.io) - ioptr = ctx.io.ptr - write(ctx.io, a) - if N == 1 - advance!(ctx.io) - ioptr = ctx.io.ptr - write(ctx.io, a) - quote - p = Vptr + $ioptr - 1 - ccall(:jl_ptr_to_array_1d, $aty, (Any, Ptr{Cvoid}, Csize_t, Cint), $atys, p, $(length(a)), false) - end - else - dms = serialize(ctx, dims) - advance!(ctx.io) - ioptr = ctx.io.ptr - write(ctx.io, a) - quote - p = Vptr + $ioptr - 1 - ccall(:jl_ptr_to_array, $aty, (Any, Ptr{Cvoid}, Any, Int32), $atys, p, $dms, false) - end - end - else - idx = Int[] - e = Array{Any}(undef, length(a)) - @inbounds for i in eachindex(a) - if isassigned(a, i) - e[i] = serialize(ctx, a[i]) - push!(idx, i) - end - end - aname = gensym() - resulte = [quote - # $aname = Array{$elty, $(length(dims))}(undef, $dims) - $aname = ccall(:jl_new_array, $aty, (Any, Any), $atys, $(serialize(ctx, dims))) - end] - for i in idx - push!(resulte, quote - # unsafe_store!(pointer($aname), $(e[i]), $i) - unsafe_store!(convert(Ptr{Any}, pointer($aname)), $(e[i]), $i) - # unsafe_store!(convert(Ptr{Csize_t}, pointer($aname)), pointer_from_objref($(e[i])), $i) - # @inbounds $aname[$i] = $(e[i]) - end) - end - push!(resulte, :($aname = $aname)) - Expr(:block, resulte...) - end -end diff --git a/src/utils.jl b/src/utils.jl deleted file mode 100644 index 0fdf7a7..0000000 --- a/src/utils.jl +++ /dev/null @@ -1,56 +0,0 @@ - -function julia_to_llvm(@nospecialize x) - isboxed = Ref{UInt8}() - # LLVMType(ccall(:jl_type_to_llvm, LLVM.API.LLVMTypeRef, (Any, Ref{UInt8}), x, isboxed)) # noserialize - LLVMType(ccall(:julia_type_to_llvm, LLVM.API.LLVMTypeRef, (Any, Ref{UInt8}), x, isboxed)) # julia v1.1.1 -end - -const jl_value_t_ptr = julia_to_llvm(Any) -const jl_value_t = eltype(jl_value_t_ptr) -# const jl_value_t_ptr_ptr = LLVM.PointerType(jl_value_t_ptr) -# # cheat on these for now: -# const jl_datatype_t_ptr = jl_value_t_ptr -# const jl_unionall_t_ptr = jl_value_t_ptr -# const jl_typename_t_ptr = jl_value_t_ptr -# const jl_sym_t_ptr = jl_value_t_ptr -# const jl_svec_t_ptr = jl_value_t_ptr -# const jl_module_t_ptr = jl_value_t_ptr -# const jl_array_t_ptr = jl_value_t_ptr -# -# const bool_t = julia_to_llvm(Bool) -# const int8_t = julia_to_llvm(Int8) -# const int16_t = julia_to_llvm(Int16) -# const int32_t = julia_to_llvm(Int32) -# const int64_t = julia_to_llvm(Int64) -# const uint8_t = julia_to_llvm(UInt8) -# const uint16_t = julia_to_llvm(UInt16) -# const uint32_t = julia_to_llvm(UInt32) -# const uint64_t = julia_to_llvm(UInt64) -# const float_t = julia_to_llvm(Float32) -# const double_t = julia_to_llvm(Float64) -# const float32_t = julia_to_llvm(Float32) -# const float64_t = julia_to_llvm(Float64) -# const void_t = julia_to_llvm(Nothing) -# const size_t = julia_to_llvm(Int) -# -# const int8_t_ptr = LLVM.PointerType(int8_t) -# const void_t_ptr = LLVM.PointerType(void_t) - -function module_setup(mod::LLVM.Module) -# triple!(mod, "wasm32-unknown-unknown-wasm") -# datalayout!(mod, "e-m:e-p:32:32-i64:64-n32:64-S128") -end - -llvmmod(native_code) = - LLVM.Module(ccall(:jl_get_llvm_module, LLVM.API.LLVMModuleRef, - (Ptr{Cvoid},), native_code.p)) - -function Base.write(mod::LLVM.Module, path::String) - open(io -> write(io, mod), path, "w") -end - - -walk(f, x) = true -# walk(f, x::Instruction) = foreach(c->walk(f,c), operands(x)) -# walk(f, x::Instruction) = f(x) || foreach(c->walk(f,c), operands(x)) -walk(f, x::ConstantExpr) = f(x) || foreach(c->walk(f,c), operands(x)) diff --git a/test/ccalls.jl b/test/ccalls.jl deleted file mode 100644 index dd5663c..0000000 --- a/test/ccalls.jl +++ /dev/null @@ -1,35 +0,0 @@ -# d = find_ccalls(Threads.nthreads, Tuple{}) -# d = find_ccalls(time, Tuple{}) -# d = find_ccalls(muladd, Tuple{Array{Float64,2},Array{Float64,2},Array{Float64,2}}) - -f1() = ccall(:jl_errno, Int, (Int,), 11) -f2() = ccall(:jl_errno, Int, (Int, Int), 21, 22) -f3() = ccall(:jl_errno, Int, (Int, Int, Int), 31, 32, 33) - -@testset "ccalls" begin - m1 = irgen(f1, Tuple{}) - m2 = irgen(f2, Tuple{}) - m3 = irgen(f3, Tuple{}) - LLVM.verify(m1) - LLVM.verify(m2) - LLVM.verify(m3) -end - - -function f() - n = Int(unsafe_load(cglobal(:jl_n_threads, Cint))) - return 2n -end - -@testset "cglobal" begin - m = irgen(f, Tuple{}) - LLVM.verify(m) - @test f() == @jlrun f() -end - -@testset "extern" begin - f() = @extern(:time, Cvoid, (Ptr{Cvoid},), C_NULL) - m = irgen(f, Tuple{}) - LLVM.verify(m) - @test "time" in [name(f) for f in LLVM.functions(m)] -end diff --git a/test/globals.jl b/test/globals.jl deleted file mode 100644 index 43c70e8..0000000 --- a/test/globals.jl +++ /dev/null @@ -1,36 +0,0 @@ -# @testset "serialize" begin -# ctx = StaticCompiler.SerializeContext() -# a = Any["abcdg", ["hi", "bye"], 3333, Int32(44), 314f0, 3.14, (1, 3.3f0), Core.svec(9.9, 9), :sym, :sym, :a] -# e = StaticCompiler.serialize(ctx, a) -# g = eval(:(Vptr -> $e)) -# v = take!(ctx.io) -# GC.enable(false) -# res = g(pointer(v)) -# GC.enable(true) -# @test res == a -# end - - -# const a = ["abcdg", "asdfl", 123, 3.14, ["a", "asdf"], (1, 3.63), [1, 3.63]] -const a = ["abcdg", "asdxf"] -const b = "B" -const x = [1.33, 35.0] -const xi = [3, 5] - -f(x) = @inbounds a[1][3] > b[1] ? 2x : x -g(i) = @inbounds x[1] > x[2] ? 2i : i -h(i) = @inbounds xi[1] == 3 ? i : 2i - -@testset "globals" begin - @test f(3) == @jlrun f(3) - @test g(3) == @jlrun g(3) - @test h(3) == @jlrun h(3) -end - -f() = Complex{Float64} -g(@nospecialize(x)) = isa(x, Number) ? 1 : 0 - -@testset "type" begin - @test string(@jlrun f()) == "Complex{Float64}" - res = g(4.0im) -end diff --git a/test/ode.jl b/test/ode.jl deleted file mode 100644 index b270a54..0000000 --- a/test/ode.jl +++ /dev/null @@ -1,182 +0,0 @@ -# Adapted from: https://github.com/JuliaDiffEq/ODE.jl/blob/8954872f956116e78b6c04690f899fe2db696b4e/src/ODE.jl#L84-L360 -# MIT license -# Copyright (c) 2009-2015: various contributors: https://github.com/JuliaLang/ODE.jl/contributors - -using LinearAlgebra - -function hinit(F, x0, t0::T, tend, p, reltol, abstol) where T - # Returns first step, direction of integration and F evaluated at t0 - tdir = sign(tend-t0) - tdir==0 && error("Zero time span") - tau = max(reltol*norm(x0, Inf), abstol) - d0 = norm(x0, Inf)/tau - f0 = F(t0, x0) - d1 = norm(f0, Inf)/tau - if d0 < 1e-5 || d1 < 1e-5 - h0 = 1e-6 - else - h0 = 0.01*(d0/d1) - end - h0 = convert(T,h0) - # perform Euler step - x1 = x0 + tdir*h0*f0 - f1 = F(t0 + tdir*h0, x1) - # estimate second derivative - d2 = norm(f1 - f0, Inf)/(tau*h0) - if max(d1, d2) <= 1e-15 - h1 = max(T(10)^(-6), T(10)^(-3)*h0) - else - pow = -(2 + log10(max(d1, d2)))/(p + 1) - h1 = 10^pow - end - h1 = convert(T,h1) - return tdir*min(100*h0, h1, tdir*(tend-t0)), tdir, f0 -end - -function fdjacobian(F, x::Number, t) - ftx = F(t, x) - - # The 100 below is heuristic - dx = (x .+ (x==0))./100 - dFdx = (F(t,x+dx)-ftx)./dx - - return dFdx -end - -function fdjacobian(F, x, t) - ftx = F(t, x) - lx = max(length(x),1) - dFdx = zeros(eltype(x), lx, lx) - for j = 1:lx - # The 100 below is heuristic - dx = zeros(eltype(x), lx) - dx[j] = (x[j] .+ (x[j]==0))./100 - dFdx[:,j] = (F(t,x+dx)-ftx)./dx[j] - end - return dFdx -end - -# ODE23S Solve stiff systems based on a modified Rosenbrock triple -# (also used by MATLAB's ODE23s); see Sec. 4.1 in -# -# [SR97] L.F. Shampine and M.W. Reichelt: "The MATLAB ODE Suite," SIAM Journal on Scientific Computing, Vol. 18, 1997, pp. 1–22 -# -# supports keywords: points = :all | :specified (using dense output) -# jacobian = G(t,y)::Function | nothing (FD) -function ode23s(F, y0, tspan; - reltol = 1.0e-5, abstol = 1.0e-8, - jacobian=nothing, - points=:all, - norm=LinearAlgebra.norm, - minstep=abs(tspan[end] - tspan[1])/1e18, - maxstep=abs(tspan[end] - tspan[1])/2.5, - initstep=0.) - - # select method for computing the Jacobian - if typeof(jacobian) == Function - jac = jacobian - else - # fallback finite-difference - jac = (t, y)->fdjacobian(F, y, t) - end - - # constants - d = 1/(2 + sqrt(2)) - e32 = 6 + sqrt(2) - - - # initialization - t = tspan[1] - - tfinal = tspan[end] - - h = initstep - if h == 0. - # initial guess at a step size - h, tdir, F0 = hinit(F, y0, t, tfinal, 3, reltol, abstol) - else - tdir = sign(tfinal - t) - F0 = F(t,y0) - end - h = tdir * min(abs(h), maxstep) - - y = y0 - tout = [t] # first output time - yout = [deepcopy(y)] # first output solution - - J = jac(t,y) # get Jacobian of F wrt y -# Core.print(t, " ", tfinal, " ", minstep, " ", h) - while abs(t - tfinal) > 0 && minstep < abs(h) - if abs(t-tfinal) < abs(h) - h = tfinal - t - end - - if size(J,1) == 1 - W = I - h*d*J - else - # note: if there is a mass matrix M on the lhs of the ODE, i.e., - # M * dy/dt = F(t,y) - # we can simply replace eye(J) by M in the following expression - # (see Sec. 5 in [SR97]) - - W = lu( I - h*d*J ) - end - - # approximate time-derivative of F - T = h*d*(F(t + h/100, y) - F0)/(h/100) - - # modified Rosenbrock formula - k1 = W\(F0 + T) - F1 = F(t + 0.5*h, y + 0.5*h*k1) - k2 = W\(F1 - k1) + k1 - ynew = y + h*k2 - F2 = F(t + h, ynew) - k3 = W\(F2 - e32*(k2 - F1) - 2*(k1 - F0) + T ) - - err = (abs(h)/6)*norm(k1 - 2*k2 + k3) # error estimate - delta = max(reltol*max(norm(y),norm(ynew)), abstol) # allowable error - - # check if new solution is acceptable - if err <= delta - - # # if points==:specified || points==:all - # # only points in tspan are requested - # # -> find relevant points in (t,t+h] - # for toi in tspan[(tspan.>t) .& (tspan.<=t+h)] - # # rescale to (0,1] - # s = (toi-t)/h - - # # use interpolation formula to get solutions at t=toi - # push!(tout, toi) - # push!(yout, y + h*( k1*s*(1-s)/(1-2*d) + k2*s*(s-2*d)/(1-2*d))) - # end - # # Core.print("First\n") - # # end - # if points==:all - if (tout[end]!=t+h) - # # add the intermediate points - push!(tout, t + h) - push!(yout, ynew) - end - - # update solution - t = t + h - y = ynew - - F0 = F2 # use FSAL property - J = jac(t,y) # get Jacobian of F wrt y - # for new solution - end - - # update of the step size - h = tdir*min( maxstep, abs(h)*0.8*(delta/err)^(1/3) ) - end - - return tout, yout -end - - -# fode() = ode23s((t,y)->2.0t^2, 0.0, Float64[0:.001:2;], initstep = 1e-4)[2][end] -# fode() = ode23s((t,y)->2.0t^2, 0.0, [0:.001:2;], initstep = 1e-4)[2][end] - -# @show fode() diff --git a/test/others.jl b/test/others.jl deleted file mode 100644 index 4481e77..0000000 --- a/test/others.jl +++ /dev/null @@ -1,73 +0,0 @@ -mutable struct AAA - aaa::Int - bbb::Int -end -@noinline ssum(x) = x.aaa + x.bbb -fstruct(x) = ssum(AAA(x, 99)) -@test fstruct(10) == @jlrun fstruct(10) - -module ZZ -mutable struct AAA - aaa::Int - bbb::Int -end -@noinline ssum(x) = x.aaa + x.bbb -fstruct(x) = ssum(AAA(x, 99)) -end # module -ffstruct(x) = ZZ.fstruct(x) -@test ffstruct(10) == @jlrun ffstruct(10) - -const ag = Ref(0x80808080) -jglobal() = ag -@show bg = @jlrun jglobal() -# @test jglobal()[] == bg[] # Something's broken with mutable's - -arraysum(x) = sum([x, 1]) -# @test arraysum(6) == @jlrun arraysum(6) - -fsin(x) = sin(x) -@test fsin(0.5) == @jlrun fsin(0.5) - -fccall() = ccall(:jl_ver_major, Cint, ()) -@test fccall() == @jlrun fccall() - -fcglobal() = cglobal(:jl_n_threads, Cint) -@test fcglobal() == @jlrun fcglobal() - -const sv = Core.svec(1,2,3,4) -fsv() = sv -@test fsv() == @jlrun fsv() - -const arr = [9,9,9,9] -farray() = arr -@show @jlrun farray() -@show farray() -# @test farray() == @jlrun farray() - -@noinline f0(x) = 3x -@noinline fop(f, x) = 2f(x) -funcall(x) = fop(f0, x) -@test funcall(2) == @jlrun funcall(2) - -hi() = print(Core.stdout, 'X') -@jlrun hi() - -hello() = print(Core.stdout, "Hello world...\n") -@jlrun hello() - -function gx(i) - a = 2.0:0.1:10.0 - @inbounds i > 3 ? a[1] : a[5] -end - -@test_skip gx(4) == @jlrun gx(4) - -fsimple() = [0:.001:2;][end] - -@test fsimple() == @jlrun fsimple() - -@noinline function fsym(x; l = :hello, s = :x) - s == :asdf ? x : 2x -end -gsym(x) = fsym(x, l = :hello, s = :asdf) + 1 -@test gsym(3) == @jlrun gsym(3) diff --git a/test/runtests.jl b/test/runtests.jl index b721ea6..21286a2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -19,10 +19,12 @@ using LinearAlgebra end +fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) # for some reason, if this is defined in the testset, it segfaults + @testset "Recursion" begin - fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) # This works on the REPL but fails here - @test_skip ccall(generate_shlib_fptr(fib, (Int,)), Int, (Int,), 10) == 55 + fib_ptr = generate_shlib_fptr(fib, (Int,)) + @test @ccall( $fib_ptr(10::Int) :: Int ) == 55 end # Call binaries for testing @@ -80,8 +82,6 @@ end # Arrays with different input types Int32, Int64, Float32, Float64, Complex? @testset "Arrays" begin - - arr = collect(1:10) function array_sum(n, A) s = zero(eltype(A)) for i in 1:n @@ -89,21 +89,31 @@ end end s end + + array_sum_ptr = generate_shlib_fptr(array_sum, Tuple{Int, Vector{Int}}) + @test ( @ccall $array_sum_ptr(10::Int, collect(1:10)::Vector{Int})::Int ) == 55 - #This segfaults, not sure if this is how you pass around arrays - @test_skip ccall(generate_shlib_fptr(array_sum, (Csize_t, Ptr{Float64})), Int, (Csize_t, Ptr{Float64}), length(arr), arr) == 55 + # this will segfault on my machine if I use 64 bit complex numbers! + array_sum_complex_ptr = generate_shlib_fptr(array_sum, Tuple{Int, Vector{Complex{Float32}}}) + @test ( @ccall $array_sum_complex_ptr(2::Int, [1f0+im, 1f0-im]::Vector{Complex{Float32}})::Complex{Float32} ) ≈ 2.0 + #This will segfault + array_sum_complex64_ptr = generate_shlib_fptr(array_sum, Tuple{Int, Vector{Complex{Float642}}}) + @test_skip ( @ccall $array_sum_complex_ptr(2::Int, [1.0+im, 1.0-im]::Vector{Complex{Float64}})::Complex{Float64} ) ≈ 2.0 end # Just to call external libraries @testset "BLAS" begin - function mydot(N) - a = Float64.(1:N) + function mydot(a::Vector{Float64}) + N = length(a) BLAS.dot(N, a, 1, a, 1) end - @test_skip ccall(generate_shlib_fptr(mydot, (Int,)), Float64, (Int,), 2) == 5. + a = [1.0, 2.0] + mydot_ptr = generate_shlib_fptr(mydot, Tuple{Vector{Float64}}) + @test @ccall( $mydot_ptr(a::Vector{Float64})::Float64 ) == 5.0 end + @testset "Hello World" begin function hello(N) println("Hello World $N") diff --git a/test/standalone-exe.jl b/test/standalone-exe.jl deleted file mode 100644 index fb97de5..0000000 --- a/test/standalone-exe.jl +++ /dev/null @@ -1,41 +0,0 @@ -# Definitions of functions to compile -twox(x) = 2x - -const aa = [4, 5] -arrayfun(x) = x + aa[1] + aa[2] - -jsin(x) = sin(x) - -function arridx(i) - a = collect(1.0:0.1:10.0) - @inbounds i > 3 ? a[1] : a[5] -end - -fsimple() = [0:.001:2;][end] - -include("ode.jl") -fode() = ode23s((t,y)->2.0t^2, 0.0, [0:.001:2;], initstep = 1e-4)[2][end] - -# Functions to compile and arguments to pass -funcalls = [ - (twox, Tuple{Int}, 4), - (arrayfun, Tuple{Int}, 4), - (jsin, Tuple{Float64}, 0.5), - (arridx, Tuple{Int}, 4), - (fsimple, Tuple{}, ()), - (fode, Tuple{}, ()), # Broken on Julia v1.2.0; works on Julia v1.3.0-rc3 -] - -StaticCompiler.exegen(funcalls) - -using Formatting -@testset "exegen" begin - cd("standalone") do - for (func, tt, val) in funcalls - fname = nameof(func) - rettype = Base.return_types(func, tt)[1] - fmt = StaticCompiler.Cformatmap[rettype] - @test Formatting.sprintf1(fmt, func(val...)) == read(`./$fname`, String) - end - end -end From ffde87606a4e781779dc767985b38cd6bdf2b4ab Mon Sep 17 00:00:00 2001 From: Tom Short Date: Wed, 2 Feb 2022 08:48:30 -0500 Subject: [PATCH 012/159] Fix test typo --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 21286a2..e008315 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -98,7 +98,7 @@ end @test ( @ccall $array_sum_complex_ptr(2::Int, [1f0+im, 1f0-im]::Vector{Complex{Float32}})::Complex{Float32} ) ≈ 2.0 #This will segfault - array_sum_complex64_ptr = generate_shlib_fptr(array_sum, Tuple{Int, Vector{Complex{Float642}}}) + array_sum_complex64_ptr = generate_shlib_fptr(array_sum, Tuple{Int, Vector{Complex{Float64}}}) @test_skip ( @ccall $array_sum_complex_ptr(2::Int, [1.0+im, 1.0-im]::Vector{Complex{Float64}})::Complex{Float64} ) ≈ 2.0 end From 01be968e8ae23b69e2dabc4efc9a4244300400d3 Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Wed, 2 Feb 2022 11:26:33 -0700 Subject: [PATCH 013/159] remove windows from CI --- .github/workflows/ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b9510c3..14c8967 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,6 @@ jobs: os: - ubuntu-latest - macOS-latest - - windows-latest arch: - x64 steps: From 058220577b8f494ed7619b9965a774acec2a3bbe Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Wed, 2 Feb 2022 11:27:14 -0700 Subject: [PATCH 014/159] note that this doesn't work on windows --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a5395f2..5578005 100644 --- a/README.md +++ b/README.md @@ -43,4 +43,4 @@ This package uses the [GPUCompiler package](https://github.com/JuliaGPU/GPUCompi * No heap allocations (e.g. creating an array or a string) are allowed inside a statically compiled function body. If you try to run such a function, you will get a segfault. ** It's sometimes possible you won't get a segfault if you define and run the function in the same session, but trying to call the compiled function in a new julia session will definitely segfault. * Lots of other limitations too. E.g. there's an example in tests/runtests.jl where summing a vector of `Complex{Float32}` is fine, but segfaults on `Complex{Float64}`. - +* Doesn't currently work on Windows \ No newline at end of file From 6248d8d690a6e0ef4d8cf04940a6aefadfe8ccd4 Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Wed, 2 Feb 2022 14:05:17 -0700 Subject: [PATCH 015/159] fix `compile`, add `native_code_llvm`, `native_code_typed` --- src/StaticCompiler.jl | 47 ++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index c544fa4..dfc9445 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -10,28 +10,44 @@ export generate_shlib, generate_shlib_fptr, compile module TestRuntime # dummy methods signal_exception() = return - malloc(sz) = C_NULL + # HACK: if malloc returns 0 or traps, all calling functions (like jl_box_*) + # get reduced to a trap, which really messes with our test suite. + malloc(sz) = Ptr{Cvoid}(Int(0xDEADBEEF)) report_oom(sz) = return report_exception(ex) = return report_exception_name(ex) = return report_exception_frame(idx, func, file, line) = return - - # for validation - sin(x) = Base.sin(x) end struct TestCompilerParams <: GPUCompiler.AbstractCompilerParams end GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{<:Any,TestCompilerParams}) = TestRuntime + +function native_job(@nospecialize(func), @nospecialize(types); kernel::Bool=false, name=GPUCompiler.safe_name(repr(func)), kwargs...) + source = GPUCompiler.FunctionSpec(func, Base.to_tuple_type(types), kernel, name) + target = GPUCompiler.NativeCompilerTarget(always_inline=true) + params = TestCompilerParams() + GPUCompiler.CompilerJob(target, source, params), kwargs +end + +function native_code_llvm(@nospecialize(func), @nospecialize(types); kwargs...) + job, kwargs = native_job(func, types; kwargs...) + GPUCompiler.code_llvm(stdout, job; kwargs...) +end + +function native_code_typed(@nospecialize(func), @nospecialize(types); kwargs...) + job, kwargs = native_job(func, types; kwargs...) + GPUCompiler.code_typed(job; kwargs...) +end + + # const linker = Sys.isunix() ? "ld.lld" : Sys.isapple() ? "ld64.lld" : "lld-link" -function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f))) +function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)); kwargs...) open(path, "w") do io - target = GPUCompiler.NativeCompilerTarget(;always_inline=true) - source = GPUCompiler.FunctionSpec(f, Base.to_tuple_type(tt), false, name) - params = TestCompilerParams() - job = GPUCompiler.CompilerJob(target, source, params) + job, kwargs = native_job(f, tt; name, kwargs...) obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) + write(io, obj) flush(io) # run(`$(Clang_jll.PATH[])/clang -shared -o $path.$(Libdl.dlext) $path`) @@ -40,8 +56,9 @@ function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.saf end path, name end -function generate_shlib_fptr(f, tt, path::String=tempname(), name = GPUCompiler.safe_name(repr(f)); temp::Bool=true) - generate_shlib(f, tt, path, name) + +function generate_shlib_fptr(f, tt, path::String=tempname(), name = GPUCompiler.safe_name(repr(f)); temp::Bool=true, kwargs...) + generate_shlib(f, tt, path, name; kwargs...) ptr = Libdl.dlopen("$(abspath(path)).$(Libdl.dlext)", Libdl.RTLD_LOCAL) fptr = Libdl.dlsym(ptr, "julia_$name") @assert fptr != C_NULL @@ -50,6 +67,7 @@ function generate_shlib_fptr(f, tt, path::String=tempname(), name = GPUCompiler. end fptr end + function generate_shlib_fptr(path::String, name) ptr = Libdl.dlopen("$(abspath(path)).$(Libdl.dlext)", Libdl.RTLD_LOCAL) fptr = Libdl.dlsym(ptr, "julia_$name") @@ -58,11 +76,8 @@ function generate_shlib_fptr(path::String, name) end # Return an LLVM module -function compile(f, tt, name = GPUCompiler.safe_name(repr(f))) - target = GPUCompiler.NativeCompilerTarget(;reloc=LLVM.API.LLVMRelocPIC, extern=true) - source = GPUCompiler.FunctionSpec(f, Base.to_tuple_type(tt), false, name) - params = TestCompilerParams() - job = GPUCompiler.CompilerJob(target, source, params) +function compile(f, tt, name = GPUCompiler.safe_name(repr(f)); kwargs...) + job, kwargs = native_job(f, tt; name, kwargs...) m, _ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) return m end From 61a12b6ba21f504877c33a98f7f2265c47c1b1f4 Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Wed, 2 Feb 2022 15:31:09 -0700 Subject: [PATCH 016/159] clarify fib problem, add LoopVectorization test :) --- test/Project.toml | 1 + test/runtests.jl | 56 +++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 53 insertions(+), 4 deletions(-) diff --git a/test/Project.toml b/test/Project.toml index 6e781cc..5b86d43 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -4,3 +4,4 @@ LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index e008315..7e687b4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,6 +2,7 @@ using StaticCompiler using Test using Libdl using LinearAlgebra +using LoopVectorization @testset "Basics" begin @@ -19,12 +20,20 @@ using LinearAlgebra end -fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) # for some reason, if this is defined in the testset, it segfaults + +fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) # This needs to be defined globally due to https://github.com/JuliaLang/julia/issues/40990 @testset "Recursion" begin - # This works on the REPL but fails here fib_ptr = generate_shlib_fptr(fib, (Int,)) @test @ccall( $fib_ptr(10::Int) :: Int ) == 55 + + # Trick to work around #40990 + _fib2(_fib2, n) = n <= 1 ? n : _fib2(_fib2, n-1) + _fib2(_fib2, n-2) + fib2(n) = _fib2(_fib2, n) + + fib2_ptr = generate_shlib_fptr(fib2, (Int,)) + @test @ccall( $fib2_ptr(20::Int) :: Int ) == 6765 + end # Call binaries for testing @@ -102,6 +111,25 @@ end @test_skip ( @ccall $array_sum_complex_ptr(2::Int, [1.0+im, 1.0-im]::Vector{Complex{Float64}})::Complex{Float64} ) ≈ 2.0 end + +# Julia wants to treat Tuple (and other things like it) as plain bits, but LLVM wants to treat it as something with a pointer. +# We need to be careful to not send, nor receive an unwrapped Tuple to a compiled function +@testset "Send and receive Tuple" begin + foo(u::Tuple) = 2 .* reverse(u) .- 1 # we can't just compile this as is. + + # Make a mutating function that places the output into a Ref for the caller to grab: + foo!(out::Ref{<:Tuple}, u::Tuple) = (out[] = foo(u); return nothing) + + foo_ptr = generate_shlib_fptr(foo!, Tuple{Base.RefValue{NTuple{3, Int}}, NTuple{3, Int}}) + out = Ref{NTuple{3, Int}}() + # we wrap u in a ref when we send it to the binary because LLVM expects that :( + u = Ref((1, 2, 3)) + (@ccall $foo_ptr(out::Ref{NTuple{3, Int}}, u::Ref{NTuple{3, Int}}) :: Nothing) + + @test out[] == foo(u[]) +end + + # Just to call external libraries @testset "BLAS" begin function mydot(a::Vector{Float64}) @@ -124,7 +152,27 @@ end @test_skip ccall(generate_shlib_fptr(hello, (Int,)), Int, (Int,), 1) == 1 end +@testset "LoopVectorization" begin + function mul!(C, A, B) + @turbo for n ∈ indices((C,B), 2), m ∈ indices((C,A), 1) + Cmn = zero(eltype(C)) + for k ∈ indices((A,B), (2,1)) + Cmn += A[m,k] * B[k,n] + end + C[m,n] = Cmn + end + end + mul_ptr! = generate_shlib_fptr(mul!, Tuple{Matrix{Float64}, Matrix{Float64}, Matrix{Float64}}) + + C = Array{Float64}(undef, 10, 12) + A = rand(10, 11) + B = rand(11, 12) + + @ccall $mul_ptr!(C::Matrix{Float64}, A::Matrix{Float64}, B::Matrix{Float64}) :: Nothing + @test C ≈ A*B +end + + + # data structures, dictionaries, tuples, named tuples -# passing pointers? -# @inbounds LoopVectorization From df3f23f43b307dd1a25053d5e1d58bf1cf03fa1f Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Wed, 2 Feb 2022 15:46:10 -0700 Subject: [PATCH 017/159] export `native_code_*` --- src/StaticCompiler.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index dfc9445..69c47b1 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -5,7 +5,7 @@ using LLVM: LLVM using Libdl: Libdl -export generate_shlib, generate_shlib_fptr, compile +export generate_shlib, generate_shlib_fptr, compile, native_code_llvm, native_code_typed module TestRuntime # dummy methods From 85c24566e92c9718e177ac733e7f11355e4526f6 Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Wed, 2 Feb 2022 15:46:47 -0700 Subject: [PATCH 018/159] add tests for stack allocated mutable arrays --- test/Project.toml | 4 +++- test/runtests.jl | 23 ++++++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/test/Project.toml b/test/Project.toml index 5b86d43..3dee0e5 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -4,4 +4,6 @@ LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" \ No newline at end of file +LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" +ManualMemory = "d125e4d3-2237-4719-b19c-fa641b8a4667" +StrideArraysCore = "7792a7ef-975c-4747-a70f-980b88e8d1da" \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 7e687b4..78d62bf 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,6 +3,8 @@ using Test using Libdl using LinearAlgebra using LoopVectorization +using ManualMemory +using StrideArraysCore @testset "Basics" begin @@ -152,8 +154,10 @@ end @test_skip ccall(generate_shlib_fptr(hello, (Int,)), Int, (Int,), 1) == 1 end +# I can't beleive this works. @testset "LoopVectorization" begin function mul!(C, A, B) + # note: @tturbo does NOT work @turbo for n ∈ indices((C,B), 2), m ∈ indices((C,A), 1) Cmn = zero(eltype(C)) for k ∈ indices((A,B), (2,1)) @@ -172,7 +176,24 @@ end @test C ≈ A*B end - +# This is a trick to get stack allocated arrays inside a function body (so long as they don't escape). +# This lets us have intermediate, mutable stack allocated arrays inside our +@testset "Alloca" begin + function f(N) + # this can hold at most 100 Int values, if you use it for more, you'll segfault + buf = ManualMemory.MemoryBuffer{100, Int}(undef) + GC.@preserve buf begin + # wrap the first N values in a PtrArray + arr = PtrArray(pointer(buf), (N,)) + arr .= 1 # mutate the array to be all 1s + sum(arr) # compute the sum. It is very imporatant that no references to arr escape the function body + end + end + + fptr = generate_shlib_fptr(f, Tuple{Int}) + @test (@ccall $fptr(20::Int) :: Int) == 20 + +end # data structures, dictionaries, tuples, named tuples From 775b5f2be8e48311b8d9e2352fbbb241efb4fbc7 Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Wed, 2 Feb 2022 15:49:50 -0700 Subject: [PATCH 019/159] test on version 1.7 --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 14c8967..15f005a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,6 +10,7 @@ jobs: fail-fast: false matrix: version: + - '1.7' - 'nightly' os: - ubuntu-latest From 773cc329b4cbc035fe5523d2ebb7cd8132873836 Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Wed, 2 Feb 2022 15:57:08 -0700 Subject: [PATCH 020/159] fix malformed .yml --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 15f005a..03f8476 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,7 @@ jobs: fail-fast: false matrix: version: - - '1.7' + - '1.7' - 'nightly' os: - ubuntu-latest From d1f53696b523532eaf9794f8f1f00b469741743c Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Wed, 2 Feb 2022 16:05:48 -0700 Subject: [PATCH 021/159] rename `compile` to `native_llvm_module` --- src/StaticCompiler.jl | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 69c47b1..0de1f67 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -5,7 +5,7 @@ using LLVM: LLVM using Libdl: Libdl -export generate_shlib, generate_shlib_fptr, compile, native_code_llvm, native_code_typed +export generate_shlib, generate_shlib_fptr, compile, native_code_llvm, native_code_typed, native_llvm_module module TestRuntime # dummy methods @@ -30,19 +30,6 @@ function native_job(@nospecialize(func), @nospecialize(types); kernel::Bool=fals GPUCompiler.CompilerJob(target, source, params), kwargs end -function native_code_llvm(@nospecialize(func), @nospecialize(types); kwargs...) - job, kwargs = native_job(func, types; kwargs...) - GPUCompiler.code_llvm(stdout, job; kwargs...) -end - -function native_code_typed(@nospecialize(func), @nospecialize(types); kwargs...) - job, kwargs = native_job(func, types; kwargs...) - GPUCompiler.code_typed(job; kwargs...) -end - - -# const linker = Sys.isunix() ? "ld.lld" : Sys.isapple() ? "ld64.lld" : "lld-link" - function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)); kwargs...) open(path, "w") do io job, kwargs = native_job(f, tt; name, kwargs...) @@ -50,7 +37,6 @@ function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.saf write(io, obj) flush(io) - # run(`$(Clang_jll.PATH[])/clang -shared -o $path.$(Libdl.dlext) $path`) run(`gcc -shared -o $path.$(Libdl.dlext) $path`) rm(path) end @@ -75,8 +61,20 @@ function generate_shlib_fptr(path::String, name) fptr end + + +function native_code_llvm(@nospecialize(func), @nospecialize(types); kwargs...) + job, kwargs = native_job(func, types; kwargs...) + GPUCompiler.code_llvm(stdout, job; kwargs...) +end + +function native_code_typed(@nospecialize(func), @nospecialize(types); kwargs...) + job, kwargs = native_job(func, types; kwargs...) + GPUCompiler.code_typed(job; kwargs...) +end + # Return an LLVM module -function compile(f, tt, name = GPUCompiler.safe_name(repr(f)); kwargs...) +function native_llvm_module(f, tt, name = GPUCompiler.safe_name(repr(f)); kwargs...) job, kwargs = native_job(f, tt; name, kwargs...) m, _ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) return m From 6e20094ac2f54a3845df9a7b1f7e61241846e9e8 Mon Sep 17 00:00:00 2001 From: Tom Short Date: Wed, 2 Feb 2022 19:16:41 -0500 Subject: [PATCH 022/159] Basic rewrite for GPUCompiler (#46) * Basic rewrite using @jpsamaroo's generate_shlib_fptr. * update, remove junk, fix tests, use `gcc` for linking * remove windows from CI * note that this doesn't work on windows * fix `compile`, add `native_code_llvm`, `native_code_typed` * clarify fib problem, add LoopVectorization test :) * add tests for stack allocated mutable arrays * test on version 1.7 * rename `compile` to `native_llvm_module` Co-authored-by: Jorge Alberto Vieyra Salas Co-authored-by: Mason Protter --- .appveyor.yml | 31 ---- .github/workflows/ci.yml | 5 +- .gitignore | 3 - .travis.yml | 24 --- Manifest.toml | 158 ++++++++++++++++++ Project.toml | 16 +- README.md | 56 +++---- docs/src/backend.md | 2 +- docs/src/helpers.md | 23 --- docs/src/index.md | 64 -------- src/StaticCompiler.jl | 104 +++++++++--- src/ccalls.jl | 90 ---------- src/extern.jl | 16 -- src/globals.jl | 164 ------------------- src/helpers/README.md | 12 -- src/helpers/helpers.jl | 37 ----- src/helpers/jlrun.jl | 61 ------- src/helpers/juliaconfig.jl | 63 ------- src/helpers/standalone-exe.jl | 148 ----------------- src/irgen.jl | 300 ---------------------------------- src/overdub.jl | 33 ---- src/serialize.jl | 242 --------------------------- src/utils.jl | 56 ------- test/Project.toml | 3 + test/ccalls.jl | 35 ---- test/globals.jl | 36 ---- test/ode.jl | 182 --------------------- test/others.jl | 73 --------- test/runtests.jl | 198 ++++++++++++++++++++-- test/standalone-exe.jl | 41 ----- 30 files changed, 458 insertions(+), 1818 deletions(-) delete mode 100644 .appveyor.yml delete mode 100644 .travis.yml create mode 100644 Manifest.toml delete mode 100644 src/ccalls.jl delete mode 100644 src/extern.jl delete mode 100644 src/globals.jl delete mode 100644 src/helpers/README.md delete mode 100644 src/helpers/helpers.jl delete mode 100644 src/helpers/jlrun.jl delete mode 100644 src/helpers/juliaconfig.jl delete mode 100644 src/helpers/standalone-exe.jl delete mode 100644 src/irgen.jl delete mode 100644 src/overdub.jl delete mode 100644 src/serialize.jl delete mode 100644 src/utils.jl delete mode 100644 test/ccalls.jl delete mode 100644 test/globals.jl delete mode 100644 test/ode.jl delete mode 100644 test/others.jl delete mode 100644 test/standalone-exe.jl diff --git a/.appveyor.yml b/.appveyor.yml deleted file mode 100644 index 16bb9b0..0000000 --- a/.appveyor.yml +++ /dev/null @@ -1,31 +0,0 @@ -# Documentation: https://github.com/JuliaCI/Appveyor.jl -environment: - matrix: - - julia_version: 1.3 - - julia_version: nightly -platform: - - x86 - - x64 -matrix: - allow_failures: - - julia_version: nightly -branches: - only: - - master - - /release-.*/ -notifications: - - provider: Email - on_build_success: false - on_build_failure: false - on_build_status_changed: false -install: - - ps: iex ((new-object net.webclient).DownloadString("https://raw.githubusercontent.com/JuliaCI/Appveyor.jl/version-1/bin/install.ps1")) -build_script: - - echo "%JL_BUILD_SCRIPT%" - - C:\julia\bin\julia -e "%JL_BUILD_SCRIPT%" -test_script: - - echo "%JL_TEST_SCRIPT%" - - C:\julia\bin\julia -e "%JL_TEST_SCRIPT%" -on_success: - - echo "%JL_CODECOV_SCRIPT%" - - C:\julia\bin\julia -e "%JL_CODECOV_SCRIPT%" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cb97e17..03f8476 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,12 +10,11 @@ jobs: fail-fast: false matrix: version: - - '1.3' + - '1.7' - 'nightly' os: - ubuntu-latest - # - macOS-latest - - windows-latest + - macOS-latest arch: - x64 steps: diff --git a/.gitignore b/.gitignore index 2ef6f28..6e716b0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,11 +2,8 @@ *.jl.cov *.jl.mem .DS_Store -Manifest.toml -!helpers/Manifest.toml /dev/ /test/standalone -/test/Manifest.toml /test/test.* test.o diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index d735ad5..0000000 --- a/.travis.yml +++ /dev/null @@ -1,24 +0,0 @@ -# Documentation: http://docs.travis-ci.com/user/languages/julia/ -language: julia -os: -- linux -- windows -julia: - - 1.3 - - nightly -matrix: - # extra linux test - include: - - os: linux - dist: bionic - - allow_failures: - - julia: nightly - fast_finish: true - -notifications: - email: false - -after_success: - - julia -e 'using Pkg; Pkg.add("Coverage"); using Coverage; Codecov.submit(process_folder())' - - julia -e 'using Pkg; Pkg.add("Coverage"); using Coverage; Coveralls.submit(process_folder())' diff --git a/Manifest.toml b/Manifest.toml new file mode 100644 index 0000000..44dbe94 --- /dev/null +++ b/Manifest.toml @@ -0,0 +1,158 @@ +# This file is machine-generated - editing it directly is not advised + +manifest_format = "2.0" + +[[deps.ArgTools]] +uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" + +[[deps.Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" + +[[deps.Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[deps.CEnum]] +git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9" +uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" +version = "0.4.1" + +[[deps.Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[deps.Downloads]] +deps = ["ArgTools", "LibCURL", "NetworkOptions"] +uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" + +[[deps.ExprTools]] +git-tree-sha1 = "56559bbef6ca5ea0c0818fa5c90320398a6fbf8d" +uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" +version = "0.1.8" + +[[deps.GPUCompiler]] +deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "TimerOutputs", "UUIDs"] +git-tree-sha1 = "abd824e1f2ecd18d33811629c781441e94a24e81" +uuid = "61eb1bfa-7361-4325-ad38-22787b887f55" +version = "0.13.11" + +[[deps.InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[deps.JLLWrappers]] +deps = ["Preferences"] +git-tree-sha1 = "abc9885a7ca2052a736a600f7fa66209f96506e1" +uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" +version = "1.4.1" + +[[deps.LLVM]] +deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"] +git-tree-sha1 = "f8dcd7adfda0dddaf944e62476d823164cccc217" +uuid = "929cbde3-209d-540e-8aea-75f648917ca0" +version = "4.7.1" + +[[deps.LLVMExtra_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "62115afed394c016c2d3096c5b85c407b48be96b" +uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab" +version = "0.0.13+1" + +[[deps.LibCURL]] +deps = ["LibCURL_jll", "MozillaCACerts_jll"] +uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" + +[[deps.LibCURL_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" + +[[deps.LibGit2]] +deps = ["Base64", "NetworkOptions", "Printf", "SHA"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + +[[deps.LibSSH2_jll]] +deps = ["Artifacts", "Libdl", "MbedTLS_jll"] +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[deps.Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[deps.Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[deps.MbedTLS_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" + +[[deps.MozillaCACerts_jll]] +uuid = "14a3606d-f60d-562e-9121-12d972cd8159" + +[[deps.NetworkOptions]] +uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" + +[[deps.Pkg]] +deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" + +[[deps.Preferences]] +deps = ["TOML"] +git-tree-sha1 = "2cf929d64681236a2e074ffafb8d568733d2e6af" +uuid = "21216c6a-2e73-6563-6e65-726566657250" +version = "1.2.3" + +[[deps.Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[deps.REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[deps.Random]] +deps = ["SHA", "Serialization"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[deps.SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" + +[[deps.Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[deps.Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" + +[[deps.TOML]] +deps = ["Dates"] +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" + +[[deps.Tar]] +deps = ["ArgTools", "SHA"] +uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" + +[[deps.TimerOutputs]] +deps = ["ExprTools", "Printf"] +git-tree-sha1 = "97e999be94a7147d0609d0b9fc9feca4bf24d76b" +uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" +version = "0.5.15" + +[[deps.UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + +[[deps.Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[deps.Zlib_jll]] +deps = ["Libdl"] +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" + +[[deps.nghttp2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" + +[[deps.p7zip_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" diff --git a/Project.toml b/Project.toml index 583ae23..81a4e92 100644 --- a/Project.toml +++ b/Project.toml @@ -1,23 +1,17 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short"] -version = "0.1.0" +version = "0.2.0" [deps] -Cassette = "7057c7e9-c182-5462-911a-8362d720325c" -DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" -MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" -TypedCodeUtils = "687fb87b-adea-59d5-9be9-82253b54685d" [compat] -DataStructures = "0.17" -Cassette = "0.3" -LLVM = "1.3" -TypedCodeUtils = "0.1" -MacroTools = "0.5" -julia = "1.2" +julia = "1.7" +GPUCompiler = "0.13" +LLVM = "4" [extras] Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0" diff --git a/README.md b/README.md index 8b09b28..5578005 100644 --- a/README.md +++ b/README.md @@ -9,52 +9,38 @@ This is an experimental package to compile Julia code to standalone libraries. A system image is not needed. It is also meant for cross compilation, so Julia code can be compiled for other targets, including WebAssembly and embedded targets. ## Installation and Usage + ```julia using Pkg Pkg.add(PackageSpec( url = "https://github.com/tshort/StaticCompiler.jl", rev = "master")) ``` -```julia -using StaticCompiler -``` -**Documentation**: [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://tshort.github.io/StaticCompiler.jl/dev) - -## Approach -This package uses the [LLVM package](https://github.com/maleadt/LLVM.jl) to generate code in the same fashion as [CUDAnative](https://github.com/JuliaGPU/CUDAnative.jl). - -Some of the key details of this approach are: - -* **ccalls and cglobal** -- When Julia compiles code CUDAnative style, `ccall` and `cglobal` references get compiled to a direct pointer. `StaticCompiler` converts these to symbol references for later linking. For `ccall` with a tuple call to a symbol in a library, `Cassette` is used to convert that to just a symbol reference (no dynamic library loading). - -* **Global variables** -- A lot of code gets compiled with global variables, and these get compiled to a direct pointer. `StaticCompiler` includes a basic serialize/deserialize approach. Right now, this is fairly basic, and it takes shortcuts for some objects by swapping in wrong types. This can work because many times, the objects are not really used in the code. Finding the global variable can be a little tricky because the pointer is converted to a Julia object with `unsafe_pointer_to_objref`, and that segfaults for some addresses. How to best handle cases like that is still to be determined. - -* **Initialization** -- If libjulia is used, some init code needs to be run to set up garbage collection and other things. For this, a basic `blank.ji` file is used to feed `jl_init_with_image`. - -Long term, a better approach may be to use Julia's standard compilation techniques with "tree shaking" to generate a reduced system image (see [here](https://github.com/JuliaLang/julia/issues/33670)). - -## Example -The API still needs work, but here is the general approach right now: ```julia using StaticCompiler -m = irgen(cos, Tuple{Float64}) -write(m, "cos.bc") -write_object(m, "cos.o") -``` +f(x) = 2x -`cos.o` should contain a function called `cos`. From there, you need to convert to link as needed with `libjulia`. +# compile `f` and return an LLVM module +m = compile(f, (Int,)) -See the `test` directory for more information and types of code that currently run. The most advanced example that works is a call to an ODE solution using modified code from [ODE.jl](https://github.com/JuliaDiffEq/ODE.jl). For information on compiling and linking to an executable, see [test/standalone-exe.jl](./test/standalone-exe.jl). +# compile `f` and write to a shared library ("f.so" or "f.dll") +generate_shlib(f, (Int,), "libf") +# find a function pointer for this shared library +fptr = generate_shlib_fptr("libf", "f") +@ccall $fptr(2::Int)::Int -## Known limitations - -* It won't work for recursive code. Jameson's [codegen-norecursion](https://github.com/JuliaLang/julia/tree/jn/codegen-norecursion) should fix that when merged. - -* `cfunction` is not supported. +# do this in one step (this time with a temporary shared library) +fptr = generate_shlib_fptr(f, (Int,)) +@ccall $fptr(2::Int)::Int +``` -* Generic code that uses `jl_apply_generic` does not work. One strategy for this is to use Cassette to swap out known code that uses dynamic calls. Another approach is to write something like `jl_apply_generic` to implement dynamic calls. +## Approach -* The use of Cassette makes it more difficult for Julia to infer some things, and only type-stable code can be statically compiled with this approach. +This package uses the [GPUCompiler package](https://github.com/JuliaGPU/GPUCompiler.jl) to generate code. -* It's only been tested on Linux and Windows. +## Limitations -Finally, this whole approach is young and likely brittle. Do not expect it to work for your code. +* This package currently requires that you have `gcc` installed and in your system's `PATH`. This is probably pretty easy to fix, we only use `gcc` for linking. In theory Clang_jll or LLVM_full_jll should be able to do this, and be managed through Julia's package manager. +* No heap allocations (e.g. creating an array or a string) are allowed inside a statically compiled function body. If you try to run such a function, you will get a segfault. +** It's sometimes possible you won't get a segfault if you define and run the function in the same session, but trying to call the compiled function in a new julia session will definitely segfault. +* Lots of other limitations too. E.g. there's an example in tests/runtests.jl where summing a vector of `Complex{Float32}` is fine, but segfaults on `Complex{Float64}`. +* Doesn't currently work on Windows \ No newline at end of file diff --git a/docs/src/backend.md b/docs/src/backend.md index 523901c..1e08a76 100644 --- a/docs/src/backend.md +++ b/docs/src/backend.md @@ -7,4 +7,4 @@ Pages = ["backend.md"] ```@autodocs Modules = [StaticCompiler] Pages = readdir("../src") -``` +``` \ No newline at end of file diff --git a/docs/src/helpers.md b/docs/src/helpers.md index c07d4df..e69de29 100644 --- a/docs/src/helpers.md +++ b/docs/src/helpers.md @@ -1,23 +0,0 @@ -# Helpers -Note that the helpers defined here are used in tests, and they are useful to test out code in the REPL. - -```julia -twox(x) = 2x -# run code in the REPL -@jlrun twox(3) -# compile to an executable in a `standalone` directory -exegen([ (twox, Tuple{Int}, 4) ]) -``` - -These are not meant to be a permanent part of the API. They are just for testing. - - -```@index -Modules = [StaticCompiler] -Pages = ["helpers.md"] -``` - -```@autodocs -Modules = [StaticCompiler] -Pages = readdir("../src/helpers") -``` diff --git a/docs/src/index.md b/docs/src/index.md index 760d5e6..e69de29 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1,64 +0,0 @@ -```@meta -CurrentModule = StaticCompiler -``` - -# StaticCompiler - -[![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://tshort.github.io/StaticCompiler.jl/dev) -[![Build Status](https://travis-ci.com/tshort/StaticCompiler.jl.svg?branch=master)](https://travis-ci.com/tshort/StaticCompiler.jl) -[![Build Status](https://ci.appveyor.com/api/projects/status/github/tshort/StaticCompiler.jl?svg=true)](https://ci.appveyor.com/project/tshort/StaticCompiler-jl) -[![Codecov](https://codecov.io/gh/tshort/StaticCompiler.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/tshort/StaticCompiler.jl) -[![Coveralls](https://coveralls.io/repos/github/tshort/StaticCompiler.jl/badge.svg?branch=master)](https://coveralls.io/github/tshort/StaticCompiler.jl?branch=master) - -This is an experimental package to compile Julia code to standalone libraries. A system image is not needed. It is also meant for cross compilation, so Julia code can be compiled for other targets, including WebAssembly and embedded targets. - -## Installation and Usage -```julia -using Pkg -Pkg.add(PackageSpec( url = "https://github.com/tshort/StaticCompiler.jl", rev = "master")) -``` -```julia -using StaticCompiler -``` -**Documentation**: [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://tshort.github.io/StaticCompiler.jl/dev) - -## Approach -This package uses the [LLVM package](https://github.com/maleadt/LLVM.jl) to generate code in the same fashion as [CUDAnative](https://github.com/JuliaGPU/CUDAnative.jl). - -Some of the key details of this approach are: - -* **ccalls and cglobal** -- When Julia compiles code CUDAnative style, `ccall` and `cglobal` references get compiled to a direct pointer. `StaticCompiler` converts these to symbol references for later linking. For `ccall` with a tuple call to a symbol in a library, `Cassette` is used to convert that to just a symbol reference (no dynamic library loading). - -* **Global variables** -- A lot of code gets compiled with global variables, and these get compiled to a direct pointer. `StaticCompiler` includes a basic serialize/deserialize approach. Right now, this is fairly basic, and it takes shortcuts for some objects by swapping in wrong types. This can work because many times, the objects are not really used in the code. Finding the global variable can be a little tricky because the pointer is converted to a Julia object with `unsafe_pointer_to_objref`, and that segfaults for some addresses. How to best handle cases like that is still to be determined. - -* **Initialization** -- If libjulia is used, some init code needs to be run to set up garbage collection and other things. For this, a basic `blank.ji` file is used to feed `jl_init_with_image`. - -Long term, a better approach may be to use Julia's standard compilation techniques with "tree shaking" to generate a reduced system image (see [here](https://github.com/JuliaLang/julia/issues/33670)). - -## Example -The API still needs work, but here is the general approach right now: - -```julia -using StaticCompiler -m = irgen(cos, Tuple{Float64}) -write(m, "cos.bc") -write_object(m, "cos.o") -``` - -`cos.o` should contain a function called `cos`. From there, you need to convert to link as needed with `libjulia`. - -See the `test` directory for more information and types of code that currently run. The most advanced example that works is a call to an ODE solution using modified code from [ODE.jl](https://github.com/JuliaDiffEq/ODE.jl). For information on compiling and linking to an executable, see [test/standalone-exe.jl](https://github.com/tshort/StaticCompiler.jl/blob/master/test/standalone-exe.jl). - -## Known limitations - -* It won't work for recursive code. Jameson's [codegen-norecursion](https://github.com/JuliaLang/julia/tree/jn/codegen-norecursion) should fix that when merged. - -* `cfunction` is not supported. - -* Generic code that uses `jl_apply_generic` does not work. One strategy for this is to use Cassette to swap out known code that uses dynamic calls. Another approach is to write something like `jl_apply_generic` to implement dynamic calls. - -* The use of Cassette makes it more difficult for Julia to infer some things, and only type-stable code can be statically compiled with this approach. - -* It's only been tested on Linux and Windows. - -Finally, this whole approach is young and likely brittle. Do not expect it to work for your code. diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 5f16b62..0de1f67 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -1,28 +1,84 @@ module StaticCompiler -export irgen, write_object, @extern - -import Libdl - -using LLVM -using LLVM.Interop -using TypedCodeUtils -import TypedCodeUtils: reflect, lookthrough, canreflect, - DefaultConsumer, Reflection, Callsite, - identify_invoke, identify_call, identify_foreigncall, - process_invoke, process_call -using MacroTools -using DataStructures: MultiDict - - -include("serialize.jl") -include("utils.jl") -include("ccalls.jl") -include("globals.jl") -include("overdub.jl") -include("irgen.jl") -include("extern.jl") - -include("helpers/helpers.jl") +using GPUCompiler: GPUCompiler +using LLVM: LLVM +using Libdl: Libdl + + +export generate_shlib, generate_shlib_fptr, compile, native_code_llvm, native_code_typed, native_llvm_module + +module TestRuntime + # dummy methods + signal_exception() = return + # HACK: if malloc returns 0 or traps, all calling functions (like jl_box_*) + # get reduced to a trap, which really messes with our test suite. + malloc(sz) = Ptr{Cvoid}(Int(0xDEADBEEF)) + report_oom(sz) = return + report_exception(ex) = return + report_exception_name(ex) = return + report_exception_frame(idx, func, file, line) = return +end + +struct TestCompilerParams <: GPUCompiler.AbstractCompilerParams end +GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{<:Any,TestCompilerParams}) = TestRuntime + + +function native_job(@nospecialize(func), @nospecialize(types); kernel::Bool=false, name=GPUCompiler.safe_name(repr(func)), kwargs...) + source = GPUCompiler.FunctionSpec(func, Base.to_tuple_type(types), kernel, name) + target = GPUCompiler.NativeCompilerTarget(always_inline=true) + params = TestCompilerParams() + GPUCompiler.CompilerJob(target, source, params), kwargs +end + +function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)); kwargs...) + open(path, "w") do io + job, kwargs = native_job(f, tt; name, kwargs...) + obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) + + write(io, obj) + flush(io) + run(`gcc -shared -o $path.$(Libdl.dlext) $path`) + rm(path) + end + path, name +end + +function generate_shlib_fptr(f, tt, path::String=tempname(), name = GPUCompiler.safe_name(repr(f)); temp::Bool=true, kwargs...) + generate_shlib(f, tt, path, name; kwargs...) + ptr = Libdl.dlopen("$(abspath(path)).$(Libdl.dlext)", Libdl.RTLD_LOCAL) + fptr = Libdl.dlsym(ptr, "julia_$name") + @assert fptr != C_NULL + if temp + atexit(()->rm("$path.$(Libdl.dlext)")) + end + fptr +end + +function generate_shlib_fptr(path::String, name) + ptr = Libdl.dlopen("$(abspath(path)).$(Libdl.dlext)", Libdl.RTLD_LOCAL) + fptr = Libdl.dlsym(ptr, "julia_$name") + @assert fptr != C_NULL + fptr +end + + + +function native_code_llvm(@nospecialize(func), @nospecialize(types); kwargs...) + job, kwargs = native_job(func, types; kwargs...) + GPUCompiler.code_llvm(stdout, job; kwargs...) +end + +function native_code_typed(@nospecialize(func), @nospecialize(types); kwargs...) + job, kwargs = native_job(func, types; kwargs...) + GPUCompiler.code_typed(job; kwargs...) +end + +# Return an LLVM module +function native_llvm_module(f, tt, name = GPUCompiler.safe_name(repr(f)); kwargs...) + job, kwargs = native_job(f, tt; name, kwargs...) + m, _ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) + return m +end + end # module diff --git a/src/ccalls.jl b/src/ccalls.jl deleted file mode 100644 index 4314fab..0000000 --- a/src/ccalls.jl +++ /dev/null @@ -1,90 +0,0 @@ - -""" - find_ccalls(f, tt) - -Returns a `Dict` mapping function addresses to symbol names for all `ccall`s and -`cglobal`s called from the method. This descends into other invocations -within the method. -""" -find_ccalls(@nospecialize(f), @nospecialize(tt)) = find_ccalls(reflect(f, tt)) - -function find_ccalls(ref::Reflection) - result = Dict{Ptr{Nothing}, Symbol}() - idx = VERSION > v"1.2" ? 5 : 4 - foreigncalls = TypedCodeUtils.filter((c) -> lookthrough((c) -> c.head === :foreigncall && !(c.args[idx] isa QuoteNode && c.args[idx].value == :llvmcall), c), ref.CI.code) - # foreigncalls = TypedCodeUtils.filter((c) -> lookthrough((c) -> c.head === :foreigncall, c), ref.CI.code) - for fc in foreigncalls - sym = getsym(fc[2].args[1]) - address = eval(:(cglobal($(sym)))) - result[address] = Symbol(sym isa Tuple ? sym[1] : sym.value) - end - cglobals = TypedCodeUtils.filter((c) -> lookthrough(c -> c.head === :call && iscglobal(c.args[1]), c), ref.CI.code) - for fc in cglobals - sym = getsym(fc[2].args[2]) - address = eval(:(cglobal($(sym)))) - result[address] = Symbol(sym isa Tuple ? sym[1] : sym.value) - end - invokes = TypedCodeUtils.filter((c) -> lookthrough(identify_invoke, c), ref.CI.code) - invokes = map((arg) -> process_invoke(DefaultConsumer(), ref, arg...), invokes) - for fi in invokes - canreflect(fi) || continue - merge!(result, find_ccalls(reflect(fi))) - end - return result -end - -getsym(x) = x -getsym(x::String) = QuoteNode(Symbol(x)) -getsym(x::QuoteNode) = x -getsym(x::Expr) = eval.((x.args[2], x.args[3])) - -iscglobal(x) = x == cglobal || x isa GlobalRef && x.name == :cglobal - - -""" - fix_ccalls!(mod::LLVM.Module, d) - -Replace function addresses with symbol names in `mod`. The symbol names are -meant to be linked to `libjulia` or other libraries. -`d` is a `Dict` mapping a function address to symbol name for `ccall`s. -""" -function fix_ccalls!(mod::LLVM.Module, d) - for fun in functions(mod), blk in blocks(fun), instr in instructions(blk) - if instr isa LLVM.CallInst - dest = called_value(instr) - if dest isa ConstantExpr && occursin("inttoptr", string(dest)) - # @show instr - # @show dest - argtypes = [llvmtype(op) for op in operands(instr)] - nargs = length(parameters(eltype(argtypes[end]))) - # num_extra_args = 1 + length(collect(eachmatch(r"jl_roots", string(instr)))) - ptr = Ptr{Cvoid}(convert(Int, first(operands(dest)))) - if haskey(d, ptr) - sym = d[ptr] - newdest = LLVM.Function(mod, string(sym), LLVM.FunctionType(llvmtype(instr), argtypes[1:nargs])) - LLVM.linkage!(newdest, LLVM.API.LLVMExternalLinkage) - replace_uses!(dest, newdest) - end - end - elseif instr isa LLVM.LoadInst && occursin("inttoptr", string(instr)) - # dest = called_value(instr) - for op in operands(instr) - lastop = op - if occursin("inttoptr", string(op)) - # @show instr - if occursin("addrspacecast", string(op)) || occursin("getelementptr", string(op)) - op = first(operands(op)) - end - first(operands(op)) isa LLVM.ConstantInt || continue - ptr = Ptr{Cvoid}(convert(Int, first(operands(op)))) - if haskey(d, ptr) - obj = d[ptr] - newdest = GlobalVariable(mod, llvmtype(instr), string(d[ptr])) - LLVM.linkage!(newdest, LLVM.API.LLVMExternalLinkage) - replace_uses!(op, newdest) - end - end - end - end - end -end diff --git a/src/extern.jl b/src/extern.jl deleted file mode 100644 index 6c0424a..0000000 --- a/src/extern.jl +++ /dev/null @@ -1,16 +0,0 @@ -""" - @extern(fun, returntype, argtypes, args...) - -Creates a call to an external function meant to be included at link time. -Use the same conventions as `ccall`. - -This transforms into the following `ccall`: - - ccall("extern fun", llvmcall, returntype, argtypes, args...) -""" -macro extern(name, rettyp, argtyp, args...) - externfun = string("extern ", name isa AbstractString || name isa Symbol ? name : name.value) - Expr(:call, :ccall, externfun, esc(:llvmcall), esc(rettyp), - Expr(:tuple, esc.(argtyp.args)...), esc.(args)...) -end - diff --git a/src/globals.jl b/src/globals.jl deleted file mode 100644 index e258338..0000000 --- a/src/globals.jl +++ /dev/null @@ -1,164 +0,0 @@ -struct GlobalsContext - invokes::Set{Any} -end -GlobalsContext() = GlobalsContext(Set()) - - -""" - fix_globals!(mod::LLVM.Module) - -Replace function addresses in `mod` with references to global data structures. -For each global variable, two LLVM global objects are created: - -* `jl.global.data` -- An LLVM 'i8' vector holding a serialized version of the Julia object. -* `jl.global` -- A pointer to the unserialized Julia object. - -The `inttopt` with the function address is replaced by `jl.global`. - -A function `jl_init_globals` is added to `mod`. This function deserializes the data in -`jl.global.data` and updates `jl.global`. -""" - -_opcode(x::LLVM.ConstantExpr) = LLVM.API.LLVMGetConstOpcode(LLVM.ref(x)) - -function fix_globals!(mod::LLVM.Module) - # Create a `jl_init_globals` function. - jl_init_globals_func = LLVM.Function(mod, "jl_init_globals", - LLVM.FunctionType(julia_to_llvm(Cvoid), LLVMType[])) - jl_init_global_entry = BasicBlock(jl_init_globals_func, "entry", context(mod)) - - # Definitions for utility functions - func_type = LLVM.FunctionType(julia_to_llvm(Any), LLVMType[LLVM.PointerType(julia_to_llvm(Int8))]) - deserialize_funs = Dict() - - uint8_t = julia_to_llvm(UInt8) - - ctx = SerializeContext() - es = [] - objs = Set() - gptridx = Dict() - instrs = [] - gptrs = [] - j = 1 # counter for position in gptridx - Builder(context(mod)) do builder - toinstr!(x) = x - function toinstr!(x::LLVM.ConstantExpr) - if _opcode(x) == LLVM.API.LLVMAddrSpaceCast - val = toinstr!(first(operands(x))) - ret = addrspacecast!(builder, val, llvmtype(x)) - return ret - elseif _opcode(x) == LLVM.API.LLVMGetElementPtr - ops = operands(x) - val = toinstr!(first(ops)) - ret = gep!(builder, val, [ops[i] for i in 2:length(ops)]) - return ret - elseif _opcode(x) == LLVM.API.LLVMBitCast - ops = operands(x) - val = toinstr!(first(ops)) - ret = pointercast!(builder, val, llvmtype(x)) - return ret - elseif _opcode(x) == LLVM.API.LLVMIntToPtr - ptr = Ptr{Any}(convert(Int, first(operands(x)))) - obj = unsafe_pointer_to_objref(ptr) - if !in(obj, objs) - push!(es, serialize(ctx, obj)) - push!(objs, obj) - # Create pointers to the data. - gptr = GlobalVariable(mod, julia_to_llvm(Any), "jl.global") - linkage!(gptr, LLVM.API.LLVMInternalLinkage) - LLVM.API.LLVMSetInitializer(LLVM.ref(gptr), LLVM.ref(null(julia_to_llvm(Any)))) - push!(gptrs, gptr) - gptridx[obj] = j - j += 1 - end - gptr = gptrs[gptridx[obj]] - gptr2 = load!(builder, gptr) - ret = pointercast!(builder, gptr2, llvmtype(x)) - return ret - end - return x - end - for fun in functions(mod) - if startswith(LLVM.name(fun), "jfptr") - unsafe_delete!(mod, fun) - continue - end - - for blk in blocks(fun), instr in instructions(blk) - # Set up functions to walk the operands of the instruction - # and convert appropriate ConstantExpr's to instructions. - # Look for `LLVMIntToPtr` expressions. - position!(builder, instr) - ops = operands(instr) - N = opcode(instr) == LLVM.API.LLVMCall ? length(ops) - 1 : length(ops) - if opcode(instr) == LLVM.API.LLVMCall && name(last(operands(instr))) == "jl_type_error" - continue - end - for i in 1:N - try - if opcode(instr) == LLVM.API.LLVMPHI - position!(builder, last(instructions(LLVM.incoming(instr)[i][2]))) - end - ops[i] = toinstr!(ops[i]) - catch x - end - end - end - end - end - nglobals = length(es) - #@show mod - #verify(mod) - for i in 1:nglobals - # Assign the appropriate function argument to the appropriate global. - es[i] = :(unsafe_store!($((Symbol("global", i))), $(es[i]))) - end - # Define the deserializing function. - fune = quote - function _deserialize_globals(Vptr, $((Symbol("global", i) for i in 1:nglobals)...)) - $(ctx.init...) - $(es...) - return - end - end - # @show fune - # Execute the deserializing function. - deser_fun = eval(fune) - v = take!(ctx.io) - gv_typ = LLVM.ArrayType(uint8_t, length(v)) - data = LLVM.GlobalVariable(mod, gv_typ, "jl.global.data") - linkage!(data, LLVM.API.LLVMExternalLinkage) - constant!(data, true) - LLVM.API.LLVMSetInitializer(LLVM.ref(data), - LLVM.API.LLVMConstArray(LLVM.ref(uint8_t), - [LLVM.ref(ConstantInt(uint8_t, x)) for x in v], - UInt32(length(v)))) - Builder(context(mod)) do builder - dataptr = gep!(builder, data, [ConstantInt(0, context(mod)), ConstantInt(0, context(mod))]) - - # Create the Julia object from `data` and include that in `init_fun`. - position!(builder, jl_init_global_entry) - gfunc_type = LLVM.FunctionType(julia_to_llvm(Cvoid), - LLVMType[LLVM.PointerType(julia_to_llvm(Int8)), - Iterators.repeated(LLVM.FunctionType(julia_to_llvm(Any)), nglobals)...]) - deserialize_globals_func = LLVM.Function(mod, "_deserialize_globals", gfunc_type) - LLVM.linkage!(deserialize_globals_func, LLVM.API.LLVMExternalLinkage) - for i in 1:nglobals - # The following fix is to match the argument types which are an integer, not a %jl_value_t**. - gptrs[i] = LLVM.ptrtoint!(builder, gptrs[i], julia_to_llvm(Csize_t)) - end - LLVM.call!(builder, deserialize_globals_func, LLVM.Value[dataptr, gptrs...]) - ret!(builder) - end - tt = Tuple{Ptr{UInt8}, Iterators.repeated(Ptr{Any}, nglobals)...} - deser_mod = irgen(deser_fun, tt, overdub = false, fix_globals = false, optimize_llvm = false) - d = find_ccalls(deser_fun, tt) - fix_ccalls!(deser_mod, d) - # rename deserialization function to "_deserialize_globals" - fun = first(TypedCodeUtils.filter(x -> LLVM.name(x) == "_deserialize_globals", functions(deser_mod)))[2] - # LLVM.name!(fun, "_deserialize_globals") - linkage!(fun, LLVM.API.LLVMExternalLinkage) - # link into the main module - LLVM.link!(mod, deser_mod) - return -end diff --git a/src/helpers/README.md b/src/helpers/README.md deleted file mode 100644 index e443db6..0000000 --- a/src/helpers/README.md +++ /dev/null @@ -1,12 +0,0 @@ -# Helpers -Note that the helpers defined here are used in tests, and they are useful to test out code in the REPL. - -```julia -twox(x) = 2x -# run code in the REPL -@jlrun twox(3) -# compile to an executable in a `standalone` directory -exegen([ (twox, Tuple{Int}, 4) ]) -``` - -These are not meant to be a permanent part of the API. They are just for testing. diff --git a/src/helpers/helpers.jl b/src/helpers/helpers.jl deleted file mode 100644 index 1f04154..0000000 --- a/src/helpers/helpers.jl +++ /dev/null @@ -1,37 +0,0 @@ -""" -Returns shellcmd string for different OS. Optionally, checks for gcc installation. -""" -function _shellcmd(checkInstallation::Bool = false) - - if Sys.isunix() - shellcmd = "gcc" - elseif Sys.iswindows() - shellcmd = ["cmd", "/c", "gcc"] - else - error("run command not defined") - end - - if checkInstallation - # Checking gcc installation - try - run(`$shellcmd -v`) - catch - @warn "Make sure gcc compiler is installed: https://gcc.gnu.org/install/binaries.html and is on the path, othetwise some of the functions will return errors" - return nothing - end - end - - return shellcmd -end - -shellcmd = _shellcmd(true) # is used in @jlrun and exegen() - - -export @jlrun -include("jlrun.jl") - -export ldflags, ldlibs, cflags # are used in exegen -include("juliaconfig.jl") - -export exegen -include("standalone-exe.jl") diff --git a/src/helpers/jlrun.jl b/src/helpers/jlrun.jl deleted file mode 100644 index f2e8e16..0000000 --- a/src/helpers/jlrun.jl +++ /dev/null @@ -1,61 +0,0 @@ -using Libdl, LLVM - -function show_inttoptr(mod) - for fun in LLVM.functions(mod), - blk in LLVM.blocks(fun), - instr in LLVM.instructions(blk) - - s = string(instr) - if occursin("inttoptr", s) && occursin(r"[0-9]{8,30}", s) - println(LLVM.name(fun), " ---------------------------") - @show instr - println() - end - end -end - -""" -Compiles function call provided and calls it with `ccall` using the shared library that was created. -""" -macro jlrun(e) - - fun = e.args[1] - efun = esc(fun) - args = length(e.args) > 1 ? e.args[2:end] : Any[] - libpath = abspath("test.o") - dylibpath = abspath("test.so") - tt = Tuple{(typeof(eval(a)) for a in args)...} - if length(e.args) > 1 - ct = code_typed(Base.eval(__module__, fun), tt) - else - ct = code_typed(Base.eval(__module__, fun)) - end - rettype = ct[1][2] - bindir = joinpath(dirname(Sys.BINDIR), "tools") - libdir = joinpath(dirname(Sys.BINDIR), "lib") - - runCommand = :(run( - $(`$shellcmd -shared -fPIC -o test.so -L$libdir test.o -ljulia`), - wait = true, - )) - - quote - m = irgen($efun, $tt) - # m = irgen($efun, $tt, overdub = false) - LLVM.verify(m) - # show_inttoptr(m) - write(m, "test.bc") - write_object(m, "test.o") - $runCommand - dylib = Libdl.dlopen($dylibpath) - ccall(Libdl.dlsym(dylib, "jl_init_globals"), Cvoid, ()) - res = ccall( - Libdl.dlsym(dylib, $(Meta.quot(fun))), - $rettype, - ($((typeof(eval(a)) for a in args)...),), - $(eval.(args)...), - ) - Libdl.dlclose(dylib) - res - end -end diff --git a/src/helpers/juliaconfig.jl b/src/helpers/juliaconfig.jl deleted file mode 100644 index 039e090..0000000 --- a/src/helpers/juliaconfig.jl +++ /dev/null @@ -1,63 +0,0 @@ -# from PackageCompilerX: https://github.com/KristofferC/PackageCompilerX.jl/blob/c1a90edfaa28907edf2edbbc734ef8afdeeaca80/src/juliaconfig.jl -# adopted from https://github.com/JuliaLang/julia/blob/release-0.6/contrib/julia-config.jl - -function shell_escape(str) - str = replace(str, "'" => "'\''") - return "'$str'" -end - -function julia_libdir() - return if ccall(:jl_is_debugbuild, Cint, ()) != 0 - dirname(abspath(Libdl.dlpath("libjulia-debug"))) - else - dirname(abspath(Libdl.dlpath("libjulia"))) - end -end - -function julia_private_libdir() - @static if Sys.iswindows() - return julia_libdir() - else - return abspath(Sys.BINDIR, Base.PRIVATE_LIBDIR) - end -end - -julia_includedir() = abspath(Sys.BINDIR, Base.INCLUDEDIR, "julia") - -function ldflags() - fl = "-L$(shell_escape(julia_libdir()))" - if Sys.iswindows() - fl = fl * " -Wl,--stack,8388608" - fl = fl * " -Wl,--export-all-symbols" - elseif Sys.islinux() - fl = fl * " -Wl,--export-dynamic" - end - return fl -end - -# TODO -function ldlibs(relative_path=nothing) - libname = if ccall(:jl_is_debugbuild, Cint, ()) != 0 - "julia-debug" - else - "julia" - end - if Sys.islinux() - return "-Wl,-rpath-link,$(shell_escape(julia_libdir())) -Wl,-rpath-link,$(shell_escape(julia_private_libdir())) -l$libname" - elseif Sys.iswindows() - return "-l$libname -lopenlibm" - else - return "-l$libname" - end -end - -function cflags() - flags = IOBuffer() - print(flags, "-std=gnu99") - include = shell_escape(julia_includedir()) - print(flags, " -I", include) - if Sys.isunix() - print(flags, " -fPIC") - end - return String(take!(flags)) -end diff --git a/src/helpers/standalone-exe.jl b/src/helpers/standalone-exe.jl deleted file mode 100644 index 40d026b..0000000 --- a/src/helpers/standalone-exe.jl +++ /dev/null @@ -1,148 +0,0 @@ -Ctemplate = """ -#include -#include -extern CRETTYPE FUNNAME(CARGTYPES); -extern void jl_init_with_image(const char *, const char *); -extern void jl_init_globals(void); -int main() -{ - jl_init_with_image(".", "blank.ji"); - jl_init_globals(); - printf("RETFORMAT", FUNNAME(FUNARG)); - jl_atexit_hook(0); - return 0; -} -""" - -# "signed" is removed from signed types -# duplicates will remove automatically -Cmap = Dict( - Cchar => "char", #Int8 - Cuchar => "unsigned char", #UInt8 - Cshort => "short", #Int16 - # Cstring => - Cushort => "unsigned short", #UInt16 - Cint => "int", #Int32 - Cuint => "unsigned int", #UInt32 - Clong => "long", #Int32 - Culong => "unsigned long", #UInt32 - Clonglong => "long long", #Int64 - Culonglong => "unsigned long long", #UInt64 - # Cintmax_t => "intmax_t", #Int64 - # Cuintmax_t => "uintmax_t", #UInt64 - # Csize_t => "size_t", #UInt - # Cssize_t => "ssize_t", #Int - # Cptrdiff_t => "ptrdiff_t", #Int - # Cwchar_t => "wchar_t", #Int32 - # Cwstring => - Cfloat => "float", #Float32 - Cdouble => "double", #Float64 - Nothing => "void", -) - -Cformatmap = Dict( - Cchar => "%c", #Int8 - # Cuchar => "unsigned char", #UInt8 - # Cshort => "short", #Int16 - Cstring => "%s", - # Cushort => "unsigned short", #UInt16 - Cint => "%d", #"i" #Int32 - Cuint => "%u", #UInt32 - Clong => "%ld", #Int32 - # Culong => "unsigned long", #UInt32 - Clonglong => "%lld", #Int64 - # Culonglong => "unsigned long long", #UInt64 - # Cintmax_t => "intmax_t", #Int64 - # Cuintmax_t => "uintmax_t", #UInt64 - # Csize_t => "size_t", #UInt - # Cssize_t => "ssize_t", #Int - # Cptrdiff_t => "ptrdiff_t", #Int - # Cwchar_t => "wchar_t", #Int32 - # Cwstring => - # Cfloat => "%f", #Float32 - Cdouble => "%f", #%e #Float64 -) - -""" -converts to text. returns "" for Nothing and empty Tuple. -""" -totext(x) = string(x) -totext(x::Nothing) = "" -totext(x::Tuple{}) = "" - -""" -Makes standalone executable. -""" -function exegen(funcalls) - - cd(mkpath("standalone")) do - # create `blank.ji` for initialization - julia_path = joinpath(Sys.BINDIR, Base.julia_exename()) - base_dir = dirname(Base.find_source_file("sysimg.jl")) - wd = pwd() - open(println, "blank.jl", "w") - cd(base_dir) do - run(`$(julia_path) --output-ji $(wd)/blank.ji $(wd)/blank.jl`) - end - - dir = pwd() - standalonedir = dir - bindir = string(Sys.BINDIR) - libdir = joinpath(dirname(Sys.BINDIR), "lib") - includedir = joinpath(dirname(Sys.BINDIR), "include", "julia") - if Sys.iswindows() - for fn in readdir(bindir) - if splitext(fn)[end] == ".dll" - cp(joinpath(bindir, fn), fn, force = true) - end - end - end - - flags = join((cflags(), ldflags(), ldlibs()), " ") - flags = Base.shell_split(flags) - wrapper = joinpath(@__DIR__, "embedding_wrapper.c") - if Sys.iswindows() - rpath = `` - elseif Sys.isapple() - rpath = `-Wl,-rpath,'@executable_path' -Wl,-rpath,'@executable_path/../lib'` - else - rpath = `-Wl,-rpath,\$ORIGIN:\$ORIGIN/../lib` - end - - for (func, tt, val) in funcalls - fname = nameof(func) - rettype = Base.return_types(func, tt)[1] - argtype = length(tt.types) > 0 ? tt.types[1] : Nothing - fmt = Cformatmap[rettype] - Ctxt = foldl(replace, - ( - "FUNNAME" => fname, - "CRETTYPE" => Cmap[rettype], - "RETFORMAT" => fmt, - "CARGTYPES" => Cmap[argtype], - "FUNARG" => totext(val), - ), - init = Ctemplate) - write("$fname.c", Ctxt) - m = StaticCompiler.irgen(func, tt) - # StaticCompiler.show_inttoptr(m) - # @show m - dlext = Libdl.dlext - exeext = Sys.iswindows() ? ".exe" : "" - if Sys.isapple() - o_file = `-Wl,-all_load $fname.o` - else - o_file = `-Wl,--whole-archive $fname.o -Wl,--no-whole-archive` - end - extra = Sys.iswindows() ? `-Wl,--export-all-symbols` : `` - write(m, "$fname.bc") - write_object(m, "$fname.o") - - run(`$shellcmd -shared -fpic -L$libdir -o lib$fname.$dlext $o_file -Wl,-rpath,$libdir -ljulia $extra`) - run(`$shellcmd -c -std=gnu99 -I$includedir -DJULIA_ENABLE_THREADING=1 -fPIC $fname.c`) - #run(`$shellcmd -o $fname $fname.o -L$libdir -L$standalonedir -Wl,--unresolved-symbols=ignore-in-object-files -Wl,-rpath,'.' -Wl,-rpath,$libdir -ljulia -l$fname -O2 $rpath $flags`) - run(`$shellcmd -o $fname $fname.o -L$libdir -L$standalonedir -Wl,-rpath,'.' -Wl,-rpath,$libdir -ljulia -l$fname -O2 $rpath $flags`) - end - end - -end diff --git a/src/irgen.jl b/src/irgen.jl deleted file mode 100644 index cc9811c..0000000 --- a/src/irgen.jl +++ /dev/null @@ -1,300 +0,0 @@ - -struct LLVMNativeCode # thin wrapper - p::Ptr{Cvoid} -end - -function xlinfo(f, tt) - # get the method instance - world = typemax(UInt) - g = (args...) -> Cassette.overdub(ctx, f, args...) - meth = which(g, tt) - sig_tt = Tuple{typeof(g), tt.parameters...} - (ti, env) = ccall(:jl_type_intersection_with_env, Any, - (Any, Any), sig_tt, meth.sig)::Core.SimpleVector - - if VERSION >= v"1.2.0-DEV.320" - meth = Base.func_for_method_checked(meth, ti, env) - else - meth = Base.func_for_method_checked(meth, ti) - end - - return ccall(:jl_specializations_get_linfo, Ref{Core.MethodInstance}, - (Any, Any, Any, UInt), meth, ti, env, world) -end - -""" -Returns an LLVMNativeCode object for the function call `f` with TupleTypes `tt`. -""" -function raise_exception(insblock::BasicBlock, ex::Value) -end - -# const jlctx = Ref{LLVM.Context}() - -# function __init__() -# jlctx[] = LLVM.Context(convert(LLVM.API.LLVMContextRef, -# cglobal(:jl_LLVMContext, Nothing))) -# end - -""" - irgen(func, tt; - optimize = true, - optimize_llvm = true, - fix_globals = true, - overdub = true, - module_setup = (m) -> nothing) - -Generates Julia IR targeted for static compilation. -`ccall` and `cglobal` uses have pointer references changed to symbols -meant to be linked with libjulia and other libraries. - -`optimize` controls Julia-side optimization. `optimize_llvm` controls -optimization on the LLVM side. - -If `overdub == true` (the default), Cassette is used to swap out -`ccall`s with a tuple of library and symbol. - -`module_setup` is an optional function to control setup of modules. It takes an LLVM -module as input. -""" -function irgen(@nospecialize(func), @nospecialize(tt); - optimize = true, - optimize_llvm = true, - fix_globals = true, - overdub = true, - module_setup = (m) -> nothing) - # get the method instance - isa(func, Core.Builtin) && error("function is not a generic function") - world = typemax(UInt) - gfunc = overdub ? (args...) -> Cassette.overdub(ctx, func, args...) : func - meth = which(gfunc, tt) - sig_tt = Tuple{typeof(gfunc), tt.parameters...} - (ti, env) = ccall(:jl_type_intersection_with_env, Any, - (Any, Any), sig_tt, meth.sig)::Core.SimpleVector - - if VERSION >= v"1.2.0-DEV.320" - meth = Base.func_for_method_checked(meth, ti, env) - else - meth = Base.func_for_method_checked(meth, ti) - end - - linfo = ccall(:jl_specializations_get_linfo, Ref{Core.MethodInstance}, - (Any, Any, Any, UInt), meth, ti, env, world) - - current_method = nothing - last_method_instance = nothing - call_stack = Vector{Core.MethodInstance}() - global method_map = Dict{String,Core.MethodInstance}() - global dependencies = MultiDict{Core.MethodInstance,LLVM.Function}() - # set-up the compiler interface - function hook_module_setup(ref::Ptr{Cvoid}) - ref = convert(LLVM.API.LLVMModuleRef, ref) - module_setup(LLVM.Module(ref)) - end - function hook_raise_exception(insblock::Ptr{Cvoid}, ex::Ptr{Cvoid}) - insblock = convert(LLVM.API.LLVMValueRef, insblock) - ex = convert(LLVM.API.LLVMValueRef, ex) - raise_exception(BasicBlock(insblock), Value(ex)) - end - function postprocess(ir) - # get rid of jfptr wrappers - for llvmf in functions(ir) - startswith(LLVM.name(llvmf), "jfptr_") && unsafe_delete!(ir, llvmf) - end - - return - end - function hook_module_activation(ref::Ptr{Cvoid}) - ref = convert(LLVM.API.LLVMModuleRef, ref) - global ir = LLVM.Module(ref) - postprocess(ir) - - # find the function that this module defines - llvmfs = filter(llvmf -> !isdeclaration(llvmf) && - linkage(llvmf) == LLVM.API.LLVMExternalLinkage, - collect(functions(ir))) - llvmf = nothing - if length(llvmfs) == 1 - llvmf = first(llvmfs) - elseif length(llvmfs) > 1 - llvmfs = filter!(llvmf -> startswith(LLVM.name(llvmf), "julia_"), llvmfs) - if length(llvmfs) == 1 - llvmf = first(llvmfs) - end - end - insert!(dependencies, last_method_instance, llvmf) - method_map[name(llvmf)] = current_method - end - function hook_emit_function(method_instance, code, world) - push!(call_stack, method_instance) - end - function hook_emitted_function(method, code, world) - current_method = method - last_method_instance = pop!(call_stack) - # @show code - # dump(method, maxdepth=2) - # global mymeth = method - end - - params = Base.CodegenParams(cached=false, - track_allocations=false, - code_coverage=false, - static_alloc=false, - prefer_specsig=true, - module_setup=hook_module_setup, - module_activation=hook_module_activation, - raise_exception=hook_raise_exception, - emit_function=hook_emit_function, - emitted_function=hook_emitted_function, - ) - - # get the code - mod = let - ref = ccall(:jl_get_llvmf_defn, LLVM.API.LLVMValueRef, - (Any, UInt, Bool, Bool, Base.CodegenParams), - linfo, world, #=wrapper=#false, #=optimize=#false, params) - if ref == C_NULL - # error(jlctx[], "the Julia compiler could not generate LLVM IR") - end - - llvmf = LLVM.Function(ref) - LLVM.parent(llvmf) - end - - # the main module should contain a single jfptr_ function definition, - # e.g. jlcall_kernel_vadd_62977 - - # definitions = filter(f->!isdeclaration(f), functions(mod)) - definitions = Iterators.filter(f->!isdeclaration(f), collect(functions(mod))) - # definitions = collect(functions(mod)) - wrapper = let - fs = collect(Iterators.filter(f->startswith(LLVM.name(f), "jfptr_"), definitions)) - @assert length(fs) == 1 - fs[1] - end - - # the jlcall wrapper function should point us to the actual entry-point, - # e.g. julia_kernel_vadd_62984 - entry_tag = let - m = match(r"jfptr_(.+)_\d+", LLVM.name(wrapper)) - @assert m != nothing - m.captures[1] - end - unsafe_delete!(mod, wrapper) - entry = let - re = Regex("julia_$(entry_tag)_\\d+") - llvmcall_re = Regex("julia_$(entry_tag)_\\d+u\\d+") - fs = collect(Iterators.filter(f->occursin(re, LLVM.name(f)) && - !occursin(llvmcall_re, LLVM.name(f)), definitions)) - if length(fs) != 1 - compiler_error(func, tt, cap, "could not find single entry-point"; - entry=>entry_tag, available=>[LLVM.name.(definitions)]) - end - fs[1] - end - - LLVM.name!(entry, string(nameof(func))) - - # link in dependent modules - cache = Dict{String,String}() - for called_method_instance in keys(dependencies) - llvmfs = dependencies[called_method_instance] - - # link the first module - llvmf = popfirst!(llvmfs) - llvmfn = LLVM.name(llvmf) - link!(mod, LLVM.parent(llvmf)) - # process subsequent duplicate modules - for dup_llvmf in llvmfs - if Base.JLOptions().debug_level >= 2 - # link them too, to ensure accurate backtrace reconstruction - link!(mod, LLVM.parent(dup_llvmf)) - else - # don't link them, but note the called function name in a cache - dup_llvmfn = LLVM.name(dup_llvmf) - cache[dup_llvmfn] = llvmfn - end - end - end - # resolve function declarations with cached entries - for llvmf in filter(isdeclaration, collect(functions(mod))) - llvmfn = LLVM.name(llvmf) - if haskey(cache, llvmfn) - def_llvmfn = cache[llvmfn] - replace_uses!(llvmf, functions(mod)[def_llvmfn]) - unsafe_delete!(LLVM.parent(llvmf), llvmf) - end - end - # rename functions to something easier to decipher - # especially helps with overdubbed functions - for (fname, mi) in method_map - id = split(fname, "_")[end] - basename = mi.def.name - args = join(collect(mi.specTypes.parameters)[2:end], "_") - if basename == :overdub # special handling for Cassette - basename = string(mi.specTypes.parameters[3]) - basename = replace(basename, r"^typeof\(" => "") - basename = replace(basename, r"\)$" => "") - args = join(collect(mi.specTypes.parameters)[4:end], "_") - end - newname = join([basename, args, id], "_") - if haskey(functions(mod), fname) - name!(functions(mod)[fname], newname) - end - end - - d = find_ccalls(gfunc, tt) - fix_ccalls!(mod, d) - if fix_globals - fix_globals!(mod) - end - if optimize_llvm - optimize!(mod) - end - return mod -end - - -""" - optimize!(mod::LLVM.Module) - -Optimize the LLVM module `mod`. Crude for now. -Returns nothing. -""" -function optimize!(mod::LLVM.Module) - for llvmf in functions(mod) - startswith(LLVM.name(llvmf), "jfptr_") && unsafe_delete!(mod, llvmf) - startswith(LLVM.name(llvmf), "julia_") && LLVM.linkage!(llvmf, LLVM.API.LLVMExternalLinkage) - end - # triple = "wasm32-unknown-unknown-wasm" - # triple!(mod, triple) - # datalayout!(mod, "e-m:e-p:32:32-i64:64-n32:64-S128") - # LLVM.API.@apicall(:LLVMInitializeWebAssemblyTarget, Cvoid, ()) - # LLVM.API.@apicall(:LLVMInitializeWebAssemblyTargetMC, Cvoid, ()) - # LLVM.API.@apicall(:LLVMInitializeWebAssemblyTargetInfo, Cvoid, ()) - triple = "i686-pc-linux-gnu" - tm = TargetMachine(Target(triple), triple) - - ModulePassManager() do pm - # add_library_info!(pm, triple(mod)) - add_transform_info!(pm, tm) - ccall(:jl_add_optimization_passes, Cvoid, - (LLVM.API.LLVMPassManagerRef, Cint, Cint), - LLVM.ref(pm), Base.JLOptions().opt_level, 1) - - dead_arg_elimination!(pm) - global_optimizer!(pm) - global_dce!(pm) - strip_dead_prototypes!(pm) - - run!(pm, mod) - end - mod -end - -function write_object(mod::LLVM.Module, path) - host_triple = triple() - host_t = Target(host_triple) - TargetMachine(host_t, host_triple, "", "", LLVM.API.LLVMCodeGenLevelDefault, LLVM.API.LLVMRelocPIC) do tm - emit(tm, mod, LLVM.API.LLVMObjectFile, path) - end -end diff --git a/src/overdub.jl b/src/overdub.jl deleted file mode 100644 index 9a0015b..0000000 --- a/src/overdub.jl +++ /dev/null @@ -1,33 +0,0 @@ -# Implements contextual dispatch through Cassette.jl - -using Cassette - -## -# Convert two-arg `ccall` to single arg. -## -function transform(ctx, ref) - CI = ref.code_info - ismatch = x -> begin - Base.Meta.isexpr(x, :foreigncall) && - Base.Meta.isexpr(x.args[1], :call) - end - replace = x -> begin - y = Expr(x.head, Any[x.args[1].args[2], x.args[2:end]...]) - Expr(x.head, x.args[1].args[2], x.args[2:end]...) - end - Cassette.replace_match!(replace, ismatch, CI.code) - return CI -end - -const Pass = Cassette.@pass transform - -Cassette.@context Ctx -const ctx = Cassette.disablehooks(Ctx(pass = Pass)) - -### -# Rewrite functions -### - -#@inline Cassette.overdub(ctx::Ctx, ::typeof(+), a::T, b::T) where T<:Union{Float32, Float64} = add_float_contract(a, b) - -contextualize(f::F) where F = (args...) -> Cassette.overdub(ctx, f, args...) diff --git a/src/serialize.jl b/src/serialize.jl deleted file mode 100644 index 5725650..0000000 --- a/src/serialize.jl +++ /dev/null @@ -1,242 +0,0 @@ - -""" -A context structure for holding state related to serializing Julia -objects. A key component is an `IOBuffer` used to hold the serialized -result. -""" -struct SerializeContext - io::IOBuffer - store::Dict{Any,Any} # Meant to map Julia object to variable name - init::Vector{Any} # Expressions to run initially -end -SerializeContext(io::IOBuffer = IOBuffer()) = SerializeContext(io, Dict(), Vector{Expr}()) - -const _td = IdDict( - Any => :jl_any_type, - Float64 => :jl_float64_type, - Float32 => :jl_float32_type, - Int64 => :jl_int64_type, - Int32 => :jl_int32_type, - Int16 => :jl_int16_type, - Int8 => :jl_int8_type, - UInt64 => :jl_uint64_type, - UInt32 => :jl_uint32_type, - UInt16 => :jl_uint16_type, - UInt8 => :jl_uint8_type, - Cint => :jl_int32_type, - Cvoid => :jl_any_type, - Array => :jl_array_type, - Array{Any,1} => :jl_array_any_type, - Array{Int32,1} => :jl_array_int32_type, - Array{UInt8,1} => :jl_array_uint8_type, - ErrorException => :jl_errorexception_type, - DataType => :jl_datatype_type, - UnionAll => :jl_unionall_type, - Union => :jl_union_type, - Core.TypeofBottom => :jl_typeofbottom_type, - TypeVar => :jl_tvar_type, -) - -const _t = IdDict() - -for (t,s) in _td - _t[t] = :(unsafe_load(cglobal($(QuoteNode(s)), Type))) -end - -const _gd = IdDict( - Core => :jl_core_module, - Main => :jl_main_module, - nothing => :jl_nothing, - () => :jl_emptytuple, - Core.svec() => :jl_emptysvec, - UndefRefError() => :jl_undefref_exception, -) - -const _g = IdDict() - -for (x,s) in _gd - _g[x] = :(unsafe_load(cglobal($(QuoteNode(s)), Any))) -end - -""" - serialize(ctx::SerializeContext, x) - -Serialize `x` into the context object `ctx`. `ctx.io` is the `IOBuffer` where the -serialized results are stored. Get the result with `take!(ctx.io)`. - -This function returns an expression that will deserialize the object. Several `serialize` -methods can be called recursively to build up deserialization code for nested objects. -The expression returned is meant to be `eval`ed into a function that can be called -to do the serialization. - -The deserialization code should be pretty low-level code that can be compiled -relatively easily. It especially shouldn't use global variables. - -Serialization / deserialization code can use `ctx` to hold state information. - -Some simple types like boxed variables do not need to write anything to `ctx.io`. -They can return an expression that directly creates the object. -""" -function serialize(ctx::SerializeContext, @nospecialize(x)) - haskey(_g, x) && return _g[x] - # TODO: fix this major kludge. - if nfields(x) > 0 - return Expr(:tuple, (serialize(ctx, getfield(x,i)) for i in 1:nfields(x))...) - end - return :(unsafe_load(cglobal(:jl_emptytuple, Any))) -end - -function serialize(ctx::SerializeContext, @nospecialize(t::DataType)) - if haskey(_t, t) - return _t[t] - elseif haskey(ctx.store, t) - return ctx.store[t] - else - # primary = unwrap_unionall(t.wrapper) - name = gensym(Symbol(:type, "-", t.name.name)) - ctx.store[t] = name - e = quote - $name = let - local tn = $(serialize(ctx, t.name)) - # names = $(serialize(ctx, t.names)) - local super = $(serialize(ctx, t.super)) - local parameters = $(serialize(ctx, t.parameters)) - local types = $(serialize(ctx, t.types)) - local ndt = ccall(:jl_new_datatype, Any, - (Any, Any, Any, Any, Any, Any, Cint, Cint, Cint), - tn, tn.module, super, parameters, #=names=# unsafe_load(cglobal(:jl_any_type, Any)), types, - $(t.abstract), $(t.mutable), $(t.ninitialized)) - # tn.wrapper = ndt.name.wrapper - # ccall(:jl_set_const, Cvoid, (Any, Any, Any), tn.module, tn.name, tn.wrapper) - ndt - # ty = tn.wrapper - # $(ctx.types[string(t)]) = ndt - # hasinstance = serialize(ctx, ) - # $(if isdefined(primary, :instance) && !isdefined(t, :instance) - # # use setfield! directly to avoid `fieldtype` lowering expecting to see a Singleton object already on ty - # :(Core.setfield!(ty, :instance, ccall(:jl_new_struct, Any, (Any, Any...), ty))) - # end) - end - end - push!(ctx.init, e) - return name - end -end - -function serialize(ctx::SerializeContext, tn::Core.TypeName) - haskey(ctx.store, tn) && return ctx.store[tn] - name = gensym(Symbol(:typename, "-", tn.name)) - ctx.store[tn] = name - e = quote - $name = ccall(:jl_new_typename_in, Ref{Core.TypeName}, (Any, Any), - # $(serialize(ctx, tn.name)), Main #=__deserialized_types__ =# ) - $(serialize(ctx, tn.name)), unsafe_load(cglobal(:jl_main_module, Any)) #=__deserialized_types__ =# ) - end - push!(ctx.init, e) - return name -end - -function serialize(ctx::SerializeContext, mi::Core.MethodInstance) - return :(unsafe_load(cglobal(:jl_emptytuple, Any))) -end - -function serialize(ctx::SerializeContext, x::String) - advance!(ctx.io) - v = Vector{UInt8}(x) - ioptr = ctx.io.ptr - write(ctx.io, v) - quote - unsafe_string(Vptr + $(ioptr - 1), $(length(v))) - end -end - -function serialize(ctx::SerializeContext, x::Symbol) - haskey(ctx.store, x) && return ctx.store[x] - name = gensym(Symbol(:symbol, "-", x)) - ctx.store[x] = name - e = quote - $name = ccall(:jl_symbol_n, Any, (Ptr{UInt8}, Csize_t), $(serialize(ctx, string(x))), $(length(string(x)))) - # ccall(:jl_set_global, Cvoid, (Any, Any, Any), unsafe_load(cglobal(:jl_main_module, Any)), $(QuoteNode(name)), x) - end - push!(ctx.init, e) - return name -end - - - -# Define functions that return an expression. Example: -# serialize(ctx::SerializeContext, x::Int) = :(ccall(:jl_box_int64, Any, (Int,), $x)) -for (fun, type) in (:jl_box_int64 => Int64, :jl_box_int32 => Int32, :jl_box_int8 => Int16, :jl_box_int8 => Int8, - :jl_box_uint64 => UInt64, :jl_box_uint32 => UInt32, :jl_box_uint8 => UInt16, :jl_box_uint8 => UInt8, - :jl_box_voidpointer => Ptr{Cvoid}, - :jl_box_float64 => Float64, :jl_box_float32 => Float32) - @eval serialize(ctx::SerializeContext, x::$type) = Expr(:call, :ccall, QuoteNode($(QuoteNode(fun))), Any, Expr(:tuple, $type), x) -end -serialize(ctx::SerializeContext, x::Char) = :(ccall(:jl_box_char, Any, (UInt32,), $x)) -serialize(ctx::SerializeContext, x::Bool) = :(ccall(:jl_box_bool, Any, (UInt8,), $x)) - -function serialize(ctx::SerializeContext, a::Tuple) - length(a) == 0 && return :(unsafe_load(cglobal(:jl_emptytuple, Any))) - Expr(:tuple, (serialize(ctx, x) for x in a)...) -end - -function serialize(ctx::SerializeContext, a::Core.SimpleVector) - length(a) == 0 && return :(unsafe_load(cglobal(:jl_emptysvec, Any))) - Expr(:call, Expr(:., :Core, QuoteNode(:svec)), (serialize(ctx, x) for x in a)...) -end - -advance!(io) = write(io, repeat('\0', -rem(io.ptr - 1, 8, RoundUp))) # Align data to 8 bytes - -function serialize(ctx::SerializeContext, a::Array{T,N}) where {T,N} - elty = eltype(a) - aty = typeof(a) - dims = size(a) - atys = serialize(ctx, aty) - if isbitstype(elty) - advance!(ctx.io) - ioptr = ctx.io.ptr - write(ctx.io, a) - if N == 1 - advance!(ctx.io) - ioptr = ctx.io.ptr - write(ctx.io, a) - quote - p = Vptr + $ioptr - 1 - ccall(:jl_ptr_to_array_1d, $aty, (Any, Ptr{Cvoid}, Csize_t, Cint), $atys, p, $(length(a)), false) - end - else - dms = serialize(ctx, dims) - advance!(ctx.io) - ioptr = ctx.io.ptr - write(ctx.io, a) - quote - p = Vptr + $ioptr - 1 - ccall(:jl_ptr_to_array, $aty, (Any, Ptr{Cvoid}, Any, Int32), $atys, p, $dms, false) - end - end - else - idx = Int[] - e = Array{Any}(undef, length(a)) - @inbounds for i in eachindex(a) - if isassigned(a, i) - e[i] = serialize(ctx, a[i]) - push!(idx, i) - end - end - aname = gensym() - resulte = [quote - # $aname = Array{$elty, $(length(dims))}(undef, $dims) - $aname = ccall(:jl_new_array, $aty, (Any, Any), $atys, $(serialize(ctx, dims))) - end] - for i in idx - push!(resulte, quote - # unsafe_store!(pointer($aname), $(e[i]), $i) - unsafe_store!(convert(Ptr{Any}, pointer($aname)), $(e[i]), $i) - # unsafe_store!(convert(Ptr{Csize_t}, pointer($aname)), pointer_from_objref($(e[i])), $i) - # @inbounds $aname[$i] = $(e[i]) - end) - end - push!(resulte, :($aname = $aname)) - Expr(:block, resulte...) - end -end diff --git a/src/utils.jl b/src/utils.jl deleted file mode 100644 index 0fdf7a7..0000000 --- a/src/utils.jl +++ /dev/null @@ -1,56 +0,0 @@ - -function julia_to_llvm(@nospecialize x) - isboxed = Ref{UInt8}() - # LLVMType(ccall(:jl_type_to_llvm, LLVM.API.LLVMTypeRef, (Any, Ref{UInt8}), x, isboxed)) # noserialize - LLVMType(ccall(:julia_type_to_llvm, LLVM.API.LLVMTypeRef, (Any, Ref{UInt8}), x, isboxed)) # julia v1.1.1 -end - -const jl_value_t_ptr = julia_to_llvm(Any) -const jl_value_t = eltype(jl_value_t_ptr) -# const jl_value_t_ptr_ptr = LLVM.PointerType(jl_value_t_ptr) -# # cheat on these for now: -# const jl_datatype_t_ptr = jl_value_t_ptr -# const jl_unionall_t_ptr = jl_value_t_ptr -# const jl_typename_t_ptr = jl_value_t_ptr -# const jl_sym_t_ptr = jl_value_t_ptr -# const jl_svec_t_ptr = jl_value_t_ptr -# const jl_module_t_ptr = jl_value_t_ptr -# const jl_array_t_ptr = jl_value_t_ptr -# -# const bool_t = julia_to_llvm(Bool) -# const int8_t = julia_to_llvm(Int8) -# const int16_t = julia_to_llvm(Int16) -# const int32_t = julia_to_llvm(Int32) -# const int64_t = julia_to_llvm(Int64) -# const uint8_t = julia_to_llvm(UInt8) -# const uint16_t = julia_to_llvm(UInt16) -# const uint32_t = julia_to_llvm(UInt32) -# const uint64_t = julia_to_llvm(UInt64) -# const float_t = julia_to_llvm(Float32) -# const double_t = julia_to_llvm(Float64) -# const float32_t = julia_to_llvm(Float32) -# const float64_t = julia_to_llvm(Float64) -# const void_t = julia_to_llvm(Nothing) -# const size_t = julia_to_llvm(Int) -# -# const int8_t_ptr = LLVM.PointerType(int8_t) -# const void_t_ptr = LLVM.PointerType(void_t) - -function module_setup(mod::LLVM.Module) -# triple!(mod, "wasm32-unknown-unknown-wasm") -# datalayout!(mod, "e-m:e-p:32:32-i64:64-n32:64-S128") -end - -llvmmod(native_code) = - LLVM.Module(ccall(:jl_get_llvm_module, LLVM.API.LLVMModuleRef, - (Ptr{Cvoid},), native_code.p)) - -function Base.write(mod::LLVM.Module, path::String) - open(io -> write(io, mod), path, "w") -end - - -walk(f, x) = true -# walk(f, x::Instruction) = foreach(c->walk(f,c), operands(x)) -# walk(f, x::Instruction) = f(x) || foreach(c->walk(f,c), operands(x)) -walk(f, x::ConstantExpr) = f(x) || foreach(c->walk(f,c), operands(x)) diff --git a/test/Project.toml b/test/Project.toml index 6e781cc..3dee0e5 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -4,3 +4,6 @@ LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" +ManualMemory = "d125e4d3-2237-4719-b19c-fa641b8a4667" +StrideArraysCore = "7792a7ef-975c-4747-a70f-980b88e8d1da" \ No newline at end of file diff --git a/test/ccalls.jl b/test/ccalls.jl deleted file mode 100644 index dd5663c..0000000 --- a/test/ccalls.jl +++ /dev/null @@ -1,35 +0,0 @@ -# d = find_ccalls(Threads.nthreads, Tuple{}) -# d = find_ccalls(time, Tuple{}) -# d = find_ccalls(muladd, Tuple{Array{Float64,2},Array{Float64,2},Array{Float64,2}}) - -f1() = ccall(:jl_errno, Int, (Int,), 11) -f2() = ccall(:jl_errno, Int, (Int, Int), 21, 22) -f3() = ccall(:jl_errno, Int, (Int, Int, Int), 31, 32, 33) - -@testset "ccalls" begin - m1 = irgen(f1, Tuple{}) - m2 = irgen(f2, Tuple{}) - m3 = irgen(f3, Tuple{}) - LLVM.verify(m1) - LLVM.verify(m2) - LLVM.verify(m3) -end - - -function f() - n = Int(unsafe_load(cglobal(:jl_n_threads, Cint))) - return 2n -end - -@testset "cglobal" begin - m = irgen(f, Tuple{}) - LLVM.verify(m) - @test f() == @jlrun f() -end - -@testset "extern" begin - f() = @extern(:time, Cvoid, (Ptr{Cvoid},), C_NULL) - m = irgen(f, Tuple{}) - LLVM.verify(m) - @test "time" in [name(f) for f in LLVM.functions(m)] -end diff --git a/test/globals.jl b/test/globals.jl deleted file mode 100644 index 43c70e8..0000000 --- a/test/globals.jl +++ /dev/null @@ -1,36 +0,0 @@ -# @testset "serialize" begin -# ctx = StaticCompiler.SerializeContext() -# a = Any["abcdg", ["hi", "bye"], 3333, Int32(44), 314f0, 3.14, (1, 3.3f0), Core.svec(9.9, 9), :sym, :sym, :a] -# e = StaticCompiler.serialize(ctx, a) -# g = eval(:(Vptr -> $e)) -# v = take!(ctx.io) -# GC.enable(false) -# res = g(pointer(v)) -# GC.enable(true) -# @test res == a -# end - - -# const a = ["abcdg", "asdfl", 123, 3.14, ["a", "asdf"], (1, 3.63), [1, 3.63]] -const a = ["abcdg", "asdxf"] -const b = "B" -const x = [1.33, 35.0] -const xi = [3, 5] - -f(x) = @inbounds a[1][3] > b[1] ? 2x : x -g(i) = @inbounds x[1] > x[2] ? 2i : i -h(i) = @inbounds xi[1] == 3 ? i : 2i - -@testset "globals" begin - @test f(3) == @jlrun f(3) - @test g(3) == @jlrun g(3) - @test h(3) == @jlrun h(3) -end - -f() = Complex{Float64} -g(@nospecialize(x)) = isa(x, Number) ? 1 : 0 - -@testset "type" begin - @test string(@jlrun f()) == "Complex{Float64}" - res = g(4.0im) -end diff --git a/test/ode.jl b/test/ode.jl deleted file mode 100644 index b270a54..0000000 --- a/test/ode.jl +++ /dev/null @@ -1,182 +0,0 @@ -# Adapted from: https://github.com/JuliaDiffEq/ODE.jl/blob/8954872f956116e78b6c04690f899fe2db696b4e/src/ODE.jl#L84-L360 -# MIT license -# Copyright (c) 2009-2015: various contributors: https://github.com/JuliaLang/ODE.jl/contributors - -using LinearAlgebra - -function hinit(F, x0, t0::T, tend, p, reltol, abstol) where T - # Returns first step, direction of integration and F evaluated at t0 - tdir = sign(tend-t0) - tdir==0 && error("Zero time span") - tau = max(reltol*norm(x0, Inf), abstol) - d0 = norm(x0, Inf)/tau - f0 = F(t0, x0) - d1 = norm(f0, Inf)/tau - if d0 < 1e-5 || d1 < 1e-5 - h0 = 1e-6 - else - h0 = 0.01*(d0/d1) - end - h0 = convert(T,h0) - # perform Euler step - x1 = x0 + tdir*h0*f0 - f1 = F(t0 + tdir*h0, x1) - # estimate second derivative - d2 = norm(f1 - f0, Inf)/(tau*h0) - if max(d1, d2) <= 1e-15 - h1 = max(T(10)^(-6), T(10)^(-3)*h0) - else - pow = -(2 + log10(max(d1, d2)))/(p + 1) - h1 = 10^pow - end - h1 = convert(T,h1) - return tdir*min(100*h0, h1, tdir*(tend-t0)), tdir, f0 -end - -function fdjacobian(F, x::Number, t) - ftx = F(t, x) - - # The 100 below is heuristic - dx = (x .+ (x==0))./100 - dFdx = (F(t,x+dx)-ftx)./dx - - return dFdx -end - -function fdjacobian(F, x, t) - ftx = F(t, x) - lx = max(length(x),1) - dFdx = zeros(eltype(x), lx, lx) - for j = 1:lx - # The 100 below is heuristic - dx = zeros(eltype(x), lx) - dx[j] = (x[j] .+ (x[j]==0))./100 - dFdx[:,j] = (F(t,x+dx)-ftx)./dx[j] - end - return dFdx -end - -# ODE23S Solve stiff systems based on a modified Rosenbrock triple -# (also used by MATLAB's ODE23s); see Sec. 4.1 in -# -# [SR97] L.F. Shampine and M.W. Reichelt: "The MATLAB ODE Suite," SIAM Journal on Scientific Computing, Vol. 18, 1997, pp. 1–22 -# -# supports keywords: points = :all | :specified (using dense output) -# jacobian = G(t,y)::Function | nothing (FD) -function ode23s(F, y0, tspan; - reltol = 1.0e-5, abstol = 1.0e-8, - jacobian=nothing, - points=:all, - norm=LinearAlgebra.norm, - minstep=abs(tspan[end] - tspan[1])/1e18, - maxstep=abs(tspan[end] - tspan[1])/2.5, - initstep=0.) - - # select method for computing the Jacobian - if typeof(jacobian) == Function - jac = jacobian - else - # fallback finite-difference - jac = (t, y)->fdjacobian(F, y, t) - end - - # constants - d = 1/(2 + sqrt(2)) - e32 = 6 + sqrt(2) - - - # initialization - t = tspan[1] - - tfinal = tspan[end] - - h = initstep - if h == 0. - # initial guess at a step size - h, tdir, F0 = hinit(F, y0, t, tfinal, 3, reltol, abstol) - else - tdir = sign(tfinal - t) - F0 = F(t,y0) - end - h = tdir * min(abs(h), maxstep) - - y = y0 - tout = [t] # first output time - yout = [deepcopy(y)] # first output solution - - J = jac(t,y) # get Jacobian of F wrt y -# Core.print(t, " ", tfinal, " ", minstep, " ", h) - while abs(t - tfinal) > 0 && minstep < abs(h) - if abs(t-tfinal) < abs(h) - h = tfinal - t - end - - if size(J,1) == 1 - W = I - h*d*J - else - # note: if there is a mass matrix M on the lhs of the ODE, i.e., - # M * dy/dt = F(t,y) - # we can simply replace eye(J) by M in the following expression - # (see Sec. 5 in [SR97]) - - W = lu( I - h*d*J ) - end - - # approximate time-derivative of F - T = h*d*(F(t + h/100, y) - F0)/(h/100) - - # modified Rosenbrock formula - k1 = W\(F0 + T) - F1 = F(t + 0.5*h, y + 0.5*h*k1) - k2 = W\(F1 - k1) + k1 - ynew = y + h*k2 - F2 = F(t + h, ynew) - k3 = W\(F2 - e32*(k2 - F1) - 2*(k1 - F0) + T ) - - err = (abs(h)/6)*norm(k1 - 2*k2 + k3) # error estimate - delta = max(reltol*max(norm(y),norm(ynew)), abstol) # allowable error - - # check if new solution is acceptable - if err <= delta - - # # if points==:specified || points==:all - # # only points in tspan are requested - # # -> find relevant points in (t,t+h] - # for toi in tspan[(tspan.>t) .& (tspan.<=t+h)] - # # rescale to (0,1] - # s = (toi-t)/h - - # # use interpolation formula to get solutions at t=toi - # push!(tout, toi) - # push!(yout, y + h*( k1*s*(1-s)/(1-2*d) + k2*s*(s-2*d)/(1-2*d))) - # end - # # Core.print("First\n") - # # end - # if points==:all - if (tout[end]!=t+h) - # # add the intermediate points - push!(tout, t + h) - push!(yout, ynew) - end - - # update solution - t = t + h - y = ynew - - F0 = F2 # use FSAL property - J = jac(t,y) # get Jacobian of F wrt y - # for new solution - end - - # update of the step size - h = tdir*min( maxstep, abs(h)*0.8*(delta/err)^(1/3) ) - end - - return tout, yout -end - - -# fode() = ode23s((t,y)->2.0t^2, 0.0, Float64[0:.001:2;], initstep = 1e-4)[2][end] -# fode() = ode23s((t,y)->2.0t^2, 0.0, [0:.001:2;], initstep = 1e-4)[2][end] - -# @show fode() diff --git a/test/others.jl b/test/others.jl deleted file mode 100644 index 4481e77..0000000 --- a/test/others.jl +++ /dev/null @@ -1,73 +0,0 @@ -mutable struct AAA - aaa::Int - bbb::Int -end -@noinline ssum(x) = x.aaa + x.bbb -fstruct(x) = ssum(AAA(x, 99)) -@test fstruct(10) == @jlrun fstruct(10) - -module ZZ -mutable struct AAA - aaa::Int - bbb::Int -end -@noinline ssum(x) = x.aaa + x.bbb -fstruct(x) = ssum(AAA(x, 99)) -end # module -ffstruct(x) = ZZ.fstruct(x) -@test ffstruct(10) == @jlrun ffstruct(10) - -const ag = Ref(0x80808080) -jglobal() = ag -@show bg = @jlrun jglobal() -# @test jglobal()[] == bg[] # Something's broken with mutable's - -arraysum(x) = sum([x, 1]) -# @test arraysum(6) == @jlrun arraysum(6) - -fsin(x) = sin(x) -@test fsin(0.5) == @jlrun fsin(0.5) - -fccall() = ccall(:jl_ver_major, Cint, ()) -@test fccall() == @jlrun fccall() - -fcglobal() = cglobal(:jl_n_threads, Cint) -@test fcglobal() == @jlrun fcglobal() - -const sv = Core.svec(1,2,3,4) -fsv() = sv -@test fsv() == @jlrun fsv() - -const arr = [9,9,9,9] -farray() = arr -@show @jlrun farray() -@show farray() -# @test farray() == @jlrun farray() - -@noinline f0(x) = 3x -@noinline fop(f, x) = 2f(x) -funcall(x) = fop(f0, x) -@test funcall(2) == @jlrun funcall(2) - -hi() = print(Core.stdout, 'X') -@jlrun hi() - -hello() = print(Core.stdout, "Hello world...\n") -@jlrun hello() - -function gx(i) - a = 2.0:0.1:10.0 - @inbounds i > 3 ? a[1] : a[5] -end - -@test_skip gx(4) == @jlrun gx(4) - -fsimple() = [0:.001:2;][end] - -@test fsimple() == @jlrun fsimple() - -@noinline function fsym(x; l = :hello, s = :x) - s == :asdf ? x : 2x -end -gsym(x) = fsym(x, l = :hello, s = :asdf) + 1 -@test gsym(3) == @jlrun gsym(3) diff --git a/test/runtests.jl b/test/runtests.jl index 3381a87..78d62bf 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,21 +1,199 @@ using StaticCompiler using Test -using LLVM using Libdl +using LinearAlgebra +using LoopVectorization +using ManualMemory +using StrideArraysCore -cd(@__DIR__) -@testset "ccalls" begin - include("ccalls.jl") +@testset "Basics" begin + + simple_sum(x) = x + one(typeof(x)) + + # This probably needs a macro + @test ccall(generate_shlib_fptr(simple_sum, (Int,)), Int, (Int,), 1) == Int(2) + @test ccall(generate_shlib_fptr(simple_sum, (Float64,)), Float64, (Float64 ,), 1) == Float64(2) + + @test ccall(generate_shlib_fptr(simple_sum, (Int32,)), Int32, (Int32,), 1) == Int32(2) + @test ccall(generate_shlib_fptr(simple_sum, (Float32,)), Float32, (Float32 ,), 1) == Float16(2) + + @test ccall(generate_shlib_fptr(simple_sum, (Int16,)), Int16, (Int16,), 1) == Int16(2) + @test ccall(generate_shlib_fptr(simple_sum, (Float16,)), Float16, (Float16 ,), 1) == Float16(2) + +end + + +fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) # This needs to be defined globally due to https://github.com/JuliaLang/julia/issues/40990 + +@testset "Recursion" begin + fib_ptr = generate_shlib_fptr(fib, (Int,)) + @test @ccall( $fib_ptr(10::Int) :: Int ) == 55 + + # Trick to work around #40990 + _fib2(_fib2, n) = n <= 1 ? n : _fib2(_fib2, n-1) + _fib2(_fib2, n-2) + fib2(n) = _fib2(_fib2, n) + + fib2_ptr = generate_shlib_fptr(fib2, (Int,)) + @test @ccall( $fib2_ptr(20::Int) :: Int ) == 6765 + +end + +# Call binaries for testing +# @testset "Generate binary" begin +# fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) +# libname = tempname() +# generate_shlib(fib, (Int,), libname) +# ptr = Libdl.dlopen(libname * "." * Libdl.dlext, Libdl.RTLD_LOCAL) +# fptr = Libdl.dlsym(ptr, "julia_fib") +# @assert fptr != C_NULL +# # This works on REPL +# @test_skip ccall(fptr, Int, (Int,), 10) == 55 +# end + + +@testset "Loops" begin + function sum_first_N_int(N) + s = 0 + for a in 1:N + s += a + end + s + end + @test ccall(generate_shlib_fptr(sum_first_N_int, (Int,)), Int, (Int,), 10) == 55 + + function sum_first_N_float64(N) + s = Float64(0) + for a in 1:N + s += Float64(a) + end + s + end + @test ccall(generate_shlib_fptr(sum_first_N_float64, (Int,)), Float64, (Int,), 10) == 55. + + function sum_first_N_int_inbounds(N) + s = 0 + @inbounds for a in 1:N + s += a + end + s + end + @test ccall(generate_shlib_fptr(sum_first_N_int_inbounds, (Int,)), Int, (Int,), 10) == 55 + + + function sum_first_N_float64_inbounds(N) + s = Float64(0) + @inbounds for a in 1:N + s += Float64(a) + end + s + end + @test ccall(generate_shlib_fptr(sum_first_N_float64_inbounds, (Int,)), Float64, (Int,), 10) == 55. + +end + +# Arrays with different input types Int32, Int64, Float32, Float64, Complex? +@testset "Arrays" begin + function array_sum(n, A) + s = zero(eltype(A)) + for i in 1:n + s += A[i] + end + s + end + + array_sum_ptr = generate_shlib_fptr(array_sum, Tuple{Int, Vector{Int}}) + @test ( @ccall $array_sum_ptr(10::Int, collect(1:10)::Vector{Int})::Int ) == 55 + + # this will segfault on my machine if I use 64 bit complex numbers! + array_sum_complex_ptr = generate_shlib_fptr(array_sum, Tuple{Int, Vector{Complex{Float32}}}) + @test ( @ccall $array_sum_complex_ptr(2::Int, [1f0+im, 1f0-im]::Vector{Complex{Float32}})::Complex{Float32} ) ≈ 2.0 + + #This will segfault + array_sum_complex64_ptr = generate_shlib_fptr(array_sum, Tuple{Int, Vector{Complex{Float64}}}) + @test_skip ( @ccall $array_sum_complex_ptr(2::Int, [1.0+im, 1.0-im]::Vector{Complex{Float64}})::Complex{Float64} ) ≈ 2.0 +end + + +# Julia wants to treat Tuple (and other things like it) as plain bits, but LLVM wants to treat it as something with a pointer. +# We need to be careful to not send, nor receive an unwrapped Tuple to a compiled function +@testset "Send and receive Tuple" begin + foo(u::Tuple) = 2 .* reverse(u) .- 1 # we can't just compile this as is. + + # Make a mutating function that places the output into a Ref for the caller to grab: + foo!(out::Ref{<:Tuple}, u::Tuple) = (out[] = foo(u); return nothing) + + foo_ptr = generate_shlib_fptr(foo!, Tuple{Base.RefValue{NTuple{3, Int}}, NTuple{3, Int}}) + out = Ref{NTuple{3, Int}}() + # we wrap u in a ref when we send it to the binary because LLVM expects that :( + u = Ref((1, 2, 3)) + (@ccall $foo_ptr(out::Ref{NTuple{3, Int}}, u::Ref{NTuple{3, Int}}) :: Nothing) + + @test out[] == foo(u[]) end -@testset "globals" begin - include("globals.jl") + +# Just to call external libraries +@testset "BLAS" begin + function mydot(a::Vector{Float64}) + N = length(a) + BLAS.dot(N, a, 1, a, 1) + end + a = [1.0, 2.0] + mydot_ptr = generate_shlib_fptr(mydot, Tuple{Vector{Float64}}) + @test @ccall( $mydot_ptr(a::Vector{Float64})::Float64 ) == 5.0 end -@testset "others" begin - include("others.jl") + +@testset "Hello World" begin + function hello(N) + println("Hello World $N") + N + end + # How do I test this? + # Also ... this segfaults + @test_skip ccall(generate_shlib_fptr(hello, (Int,)), Int, (Int,), 1) == 1 end -@testset "standalone" begin - include("standalone-exe.jl") +# I can't beleive this works. +@testset "LoopVectorization" begin + function mul!(C, A, B) + # note: @tturbo does NOT work + @turbo for n ∈ indices((C,B), 2), m ∈ indices((C,A), 1) + Cmn = zero(eltype(C)) + for k ∈ indices((A,B), (2,1)) + Cmn += A[m,k] * B[k,n] + end + C[m,n] = Cmn + end + end + mul_ptr! = generate_shlib_fptr(mul!, Tuple{Matrix{Float64}, Matrix{Float64}, Matrix{Float64}}) + + C = Array{Float64}(undef, 10, 12) + A = rand(10, 11) + B = rand(11, 12) + + @ccall $mul_ptr!(C::Matrix{Float64}, A::Matrix{Float64}, B::Matrix{Float64}) :: Nothing + @test C ≈ A*B end + +# This is a trick to get stack allocated arrays inside a function body (so long as they don't escape). +# This lets us have intermediate, mutable stack allocated arrays inside our +@testset "Alloca" begin + function f(N) + # this can hold at most 100 Int values, if you use it for more, you'll segfault + buf = ManualMemory.MemoryBuffer{100, Int}(undef) + GC.@preserve buf begin + # wrap the first N values in a PtrArray + arr = PtrArray(pointer(buf), (N,)) + arr .= 1 # mutate the array to be all 1s + sum(arr) # compute the sum. It is very imporatant that no references to arr escape the function body + end + end + + fptr = generate_shlib_fptr(f, Tuple{Int}) + @test (@ccall $fptr(20::Int) :: Int) == 20 + +end + + +# data structures, dictionaries, tuples, named tuples diff --git a/test/standalone-exe.jl b/test/standalone-exe.jl deleted file mode 100644 index fb97de5..0000000 --- a/test/standalone-exe.jl +++ /dev/null @@ -1,41 +0,0 @@ -# Definitions of functions to compile -twox(x) = 2x - -const aa = [4, 5] -arrayfun(x) = x + aa[1] + aa[2] - -jsin(x) = sin(x) - -function arridx(i) - a = collect(1.0:0.1:10.0) - @inbounds i > 3 ? a[1] : a[5] -end - -fsimple() = [0:.001:2;][end] - -include("ode.jl") -fode() = ode23s((t,y)->2.0t^2, 0.0, [0:.001:2;], initstep = 1e-4)[2][end] - -# Functions to compile and arguments to pass -funcalls = [ - (twox, Tuple{Int}, 4), - (arrayfun, Tuple{Int}, 4), - (jsin, Tuple{Float64}, 0.5), - (arridx, Tuple{Int}, 4), - (fsimple, Tuple{}, ()), - (fode, Tuple{}, ()), # Broken on Julia v1.2.0; works on Julia v1.3.0-rc3 -] - -StaticCompiler.exegen(funcalls) - -using Formatting -@testset "exegen" begin - cd("standalone") do - for (func, tt, val) in funcalls - fname = nameof(func) - rettype = Base.return_types(func, tt)[1] - fmt = StaticCompiler.Cformatmap[rettype] - @test Formatting.sprintf1(fmt, func(val...)) == read(`./$fname`, String) - end - end -end From fea8b4b8df29074ad1adc9e5ffd11d5754c9c4e9 Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Wed, 2 Feb 2022 19:59:39 -0700 Subject: [PATCH 023/159] new friendly `compile` interface --- Manifest.toml | 1 + Project.toml | 3 +- README.md | 34 +++++++++----- src/StaticCompiler.jl | 107 +++++++++++++++++++++++++++++++++++++++++- test/runtests.jl | 72 +++++++++------------------- 5 files changed, 152 insertions(+), 65 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index 44dbe94..ab4efd7 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -1,5 +1,6 @@ # This file is machine-generated - editing it directly is not advised +julia_version = "1.7.1" manifest_format = "2.0" [[deps.ArgTools]] diff --git a/Project.toml b/Project.toml index 81a4e92..461d8dd 100644 --- a/Project.toml +++ b/Project.toml @@ -7,11 +7,12 @@ version = "0.2.0" GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" +Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" [compat] -julia = "1.7" GPUCompiler = "0.13" LLVM = "4" +julia = "1.7" [extras] Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0" diff --git a/README.md b/README.md index 5578005..c5f188d 100644 --- a/README.md +++ b/README.md @@ -16,21 +16,29 @@ Pkg.add(PackageSpec( url = "https://github.com/tshort/StaticCompiler.jl", rev = ``` ```julia -using StaticCompiler -f(x) = 2x +julia> using StaticCompiler -# compile `f` and return an LLVM module -m = compile(f, (Int,)) +julia> fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) +fib (generic function with 1 method) -# compile `f` and write to a shared library ("f.so" or "f.dll") -generate_shlib(f, (Int,), "libf") -# find a function pointer for this shared library -fptr = generate_shlib_fptr("libf", "f") -@ccall $fptr(2::Int)::Int +julia> path, fib_compiled = compile(fib, Tuple{Int}, "fib") +("fib.cjl", StaticCompiler.StaticCompiledFunction{Int64, Tuple{Int64}}(:fib, Ptr{Nothing} @0x00007fc4ec032130)) -# do this in one step (this time with a temporary shared library) -fptr = generate_shlib_fptr(f, (Int,)) -@ccall $fptr(2::Int)::Int +julia> fib_compiled(10) +55 +``` +Now we can quite this session and load a new one where `fib` is not defined: +```julia +julia> using StaticCompiler + +julia> fib +ERROR: UndefVarError: fib not defined + +julia> fib_compiled = load_function("fib.cjl") +StaticCompiler.StaticCompiledFunction{Int64, Tuple{Int64}}(:fib, Ptr{Nothing} @0x00007f9ee8050130) + +julia> fib_compiled(10) +55 ``` ## Approach @@ -41,6 +49,6 @@ This package uses the [GPUCompiler package](https://github.com/JuliaGPU/GPUCompi * This package currently requires that you have `gcc` installed and in your system's `PATH`. This is probably pretty easy to fix, we only use `gcc` for linking. In theory Clang_jll or LLVM_full_jll should be able to do this, and be managed through Julia's package manager. * No heap allocations (e.g. creating an array or a string) are allowed inside a statically compiled function body. If you try to run such a function, you will get a segfault. -** It's sometimes possible you won't get a segfault if you define and run the function in the same session, but trying to call the compiled function in a new julia session will definitely segfault. +** It's sometimes possible you won't get a segfault if you define and run the function in the same session, but trying to call the compiled function in a new julia session will definitely segfault if you allocate memory. * Lots of other limitations too. E.g. there's an example in tests/runtests.jl where summing a vector of `Complex{Float32}` is fine, but segfaults on `Complex{Float64}`. * Doesn't currently work on Windows \ No newline at end of file diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 0de1f67..06f71a5 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -3,9 +3,110 @@ module StaticCompiler using GPUCompiler: GPUCompiler using LLVM: LLVM using Libdl: Libdl +using Base: RefValue +using Serialization: serialize, deserialize +export compile, load_function +export native_code_llvm, native_code_typed, native_llvm_module -export generate_shlib, generate_shlib_fptr, compile, native_code_llvm, native_code_typed, native_llvm_module +""" + compile(f, types, path::String = tempname()) --> (obj_path, compiled_f) + + !!! Warning: this will fail on programs that heap allocate any memory, or have dynamic dispatch !!! + +Statically compile the method of a function `f` specialized to arguments of the type given by `types`. + +This will save a shared object file (i.e. a `.so` or `.dylib`) at the specified path, and will save a +`LazyStaticCompiledFunction` object at the same path with the extension `.cjl`. This +`LazyStaticCompiledFunction` can be deserialized with the function `load_function`. Once it is +instantiated in a julia session, it will be of type `StaticCompiledFunction` and may be called with +arguments of type `types` as if it were a function with a single method (the method determined by `types`). + +`compile` will return a `obj_path` which is the location of the serialized `LazyStaticCompiledFunction`, and +an already instantiated `StaticCompiledFunction` object. + +Example: + +Define and compile a `fib` function: +```julia +julia> using StaticCompiler + +julia> fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) +fib (generic function with 1 method) + +julia> path, fib_compiled = compile(fib, Tuple{Int}, "fib") +("fib.cjl", StaticCompiler.StaticCompiledFunction{Int64, Tuple{Int64}}(:fib, Ptr{Nothing} @0x00007fc4ec032130)) + +julia> fib_compiled(10) +55 +``` +Now we can quite this session and load a new one where `fib` is not defined: +```julia +julia> using StaticCompiler + +julia> fib +ERROR: UndefVarError: fib not defined + +julia> fib_compiled = load_function("fib.cjl") +StaticCompiler.StaticCompiledFunction{Int64, Tuple{Int64}}(:fib, Ptr{Nothing} @0x00007f9ee8050130) + +julia> fib_compiled(10) +55 +``` +Tada! +""" +function compile(f, _tt, path::String = tempname(); name = GPUCompiler.safe_name(repr(f)), kwargs...) + tt = Base.to_tuple_type(_tt) + isconcretetype(tt) || error("input type signature $_tt is not concrete") + + rt = only(native_code_typed(f, tt))[2] + isconcretetype(rt) || error("$f on $_tt did not infer to a concrete type. Got $rt") + + # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals + # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this + + f_wrap!(out::Ref, args::Ref{<:Tuple}) = (out[] = f(args[]...); nothing) + + generate_shlib(f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, path, name; kwargs...) + + lf = LazyStaticCompiledFunction{rt, tt}(Symbol(f), path, name) + serialize(path*".cjl", lf) + path*".cjl", instantiate(lf) +end + + +""" + load_function(path) --> compiled_f + +load a `StaticCompiledFunction` from a given path. This object is callable. +""" +load_function(path) = instantiate(deserialize(path) :: LazyStaticCompiledFunction) + + +struct LazyStaticCompiledFunction{rt, tt} + f::Symbol + path::String + name::String +end + +function instantiate(p::LazyStaticCompiledFunction{rt, tt}) where {rt, tt} + StaticCompiledFunction{rt, tt}(p.f, generate_shlib_fptr(p.path::String, p.name)) +end + +struct StaticCompiledFunction{rt, tt} + f::Symbol + ptr::Ptr{Nothing} +end + +function (f::StaticCompiledFunction{rt, tt})(args...) where {rt, tt} + Tuple{typeof.(args)...} == tt || error("Input types don't match compiled target $((tt.parameters...,)). Got arguments of type $(typeof.(args))") + out = RefValue{rt}() + refargs = Ref(args) + ccall(f.ptr, Nothing, (Ref{rt}, Ref{tt}), out, refargs) + out[] +end + +instantiate(f::StaticCompiledFunction) = f module TestRuntime # dummy methods @@ -81,4 +182,8 @@ function native_llvm_module(f, tt, name = GPUCompiler.safe_name(repr(f)); kwargs end + + + + end # module diff --git a/test/runtests.jl b/test/runtests.jl index 78d62bf..85eb6f8 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -11,31 +11,22 @@ using StrideArraysCore simple_sum(x) = x + one(typeof(x)) # This probably needs a macro - @test ccall(generate_shlib_fptr(simple_sum, (Int,)), Int, (Int,), 1) == Int(2) - @test ccall(generate_shlib_fptr(simple_sum, (Float64,)), Float64, (Float64 ,), 1) == Float64(2) - - @test ccall(generate_shlib_fptr(simple_sum, (Int32,)), Int32, (Int32,), 1) == Int32(2) - @test ccall(generate_shlib_fptr(simple_sum, (Float32,)), Float32, (Float32 ,), 1) == Float16(2) - - @test ccall(generate_shlib_fptr(simple_sum, (Int16,)), Int16, (Int16,), 1) == Int16(2) - @test ccall(generate_shlib_fptr(simple_sum, (Float16,)), Float16, (Float16 ,), 1) == Float16(2) - + for T ∈ (Int, Float64, Int32, Float32, Int16, Float16) + @test compile(simple_sum, (T,))[2]( T(1) ) == T(2) + end end fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) # This needs to be defined globally due to https://github.com/JuliaLang/julia/issues/40990 @testset "Recursion" begin - fib_ptr = generate_shlib_fptr(fib, (Int,)) - @test @ccall( $fib_ptr(10::Int) :: Int ) == 55 + @test compile(fib, (Int,))[2](10) == fib(10) # Trick to work around #40990 _fib2(_fib2, n) = n <= 1 ? n : _fib2(_fib2, n-1) + _fib2(_fib2, n-2) fib2(n) = _fib2(_fib2, n) - fib2_ptr = generate_shlib_fptr(fib2, (Int,)) - @test @ccall( $fib2_ptr(20::Int) :: Int ) == 6765 - + @test compile(fib2, (Int,))[2](20) == fib(20) end # Call binaries for testing @@ -59,8 +50,8 @@ end end s end - @test ccall(generate_shlib_fptr(sum_first_N_int, (Int,)), Int, (Int,), 10) == 55 - + @test compile(sum_first_N_int, (Int,))[2](10) == 55 + function sum_first_N_float64(N) s = Float64(0) for a in 1:N @@ -68,7 +59,7 @@ end end s end - @test ccall(generate_shlib_fptr(sum_first_N_float64, (Int,)), Float64, (Int,), 10) == 55. + @test compile(sum_first_N_float64, (Int,))[2](10) == 55. function sum_first_N_int_inbounds(N) s = 0 @@ -77,8 +68,7 @@ end end s end - @test ccall(generate_shlib_fptr(sum_first_N_int_inbounds, (Int,)), Int, (Int,), 10) == 55 - + @test compile(sum_first_N_int_inbounds, (Int,))[2](10) == 55 function sum_first_N_float64_inbounds(N) s = Float64(0) @@ -87,7 +77,7 @@ end end s end - @test ccall(generate_shlib_fptr(sum_first_N_float64_inbounds, (Int,)), Float64, (Int,), 10) == 55. + @test compile(sum_first_N_float64_inbounds, (Int,))[2](10) == 55. end @@ -100,35 +90,20 @@ end end s end - - array_sum_ptr = generate_shlib_fptr(array_sum, Tuple{Int, Vector{Int}}) - @test ( @ccall $array_sum_ptr(10::Int, collect(1:10)::Vector{Int})::Int ) == 55 - # this will segfault on my machine if I use 64 bit complex numbers! - array_sum_complex_ptr = generate_shlib_fptr(array_sum, Tuple{Int, Vector{Complex{Float32}}}) - @test ( @ccall $array_sum_complex_ptr(2::Int, [1f0+im, 1f0-im]::Vector{Complex{Float32}})::Complex{Float32} ) ≈ 2.0 - - #This will segfault - array_sum_complex64_ptr = generate_shlib_fptr(array_sum, Tuple{Int, Vector{Complex{Float64}}}) - @test_skip ( @ccall $array_sum_complex_ptr(2::Int, [1.0+im, 1.0-im]::Vector{Complex{Float64}})::Complex{Float64} ) ≈ 2.0 + @test compile(array_sum, (Int, Vector{Int}))[2](10, Int.(1:10)) == 55 + @test compile(array_sum, (Int, Vector{Complex{Float32}}))[2](10, Complex{Float32}.(1:10)) == 55f0 + 0f0im + @test compile(array_sum, (Int, Vector{Complex{Float64}}))[2](10, Complex{Float64}.(1:10)) == 55f0 + 0f0im end # Julia wants to treat Tuple (and other things like it) as plain bits, but LLVM wants to treat it as something with a pointer. -# We need to be careful to not send, nor receive an unwrapped Tuple to a compiled function +# We need to be careful to not send, nor receive an unwrapped Tuple to a compiled function. +# The interface made in `compile` should handle this fine. @testset "Send and receive Tuple" begin foo(u::Tuple) = 2 .* reverse(u) .- 1 # we can't just compile this as is. - # Make a mutating function that places the output into a Ref for the caller to grab: - foo!(out::Ref{<:Tuple}, u::Tuple) = (out[] = foo(u); return nothing) - - foo_ptr = generate_shlib_fptr(foo!, Tuple{Base.RefValue{NTuple{3, Int}}, NTuple{3, Int}}) - out = Ref{NTuple{3, Int}}() - # we wrap u in a ref when we send it to the binary because LLVM expects that :( - u = Ref((1, 2, 3)) - (@ccall $foo_ptr(out::Ref{NTuple{3, Int}}, u::Ref{NTuple{3, Int}}) :: Nothing) - - @test out[] == foo(u[]) + @test compile(foo, (NTuple{3, Int},))[2]((1, 2, 3)) == (5, 3, 1) end @@ -139,8 +114,8 @@ end BLAS.dot(N, a, 1, a, 1) end a = [1.0, 2.0] - mydot_ptr = generate_shlib_fptr(mydot, Tuple{Vector{Float64}}) - @test @ccall( $mydot_ptr(a::Vector{Float64})::Float64 ) == 5.0 + + @test compile(mydot, (Vector{Float64},))[2](a) == 5.0 end @@ -166,13 +141,12 @@ end C[m,n] = Cmn end end - mul_ptr! = generate_shlib_fptr(mul!, Tuple{Matrix{Float64}, Matrix{Float64}, Matrix{Float64}}) - + C = Array{Float64}(undef, 10, 12) A = rand(10, 11) B = rand(11, 12) - @ccall $mul_ptr!(C::Matrix{Float64}, A::Matrix{Float64}, B::Matrix{Float64}) :: Nothing + compile(mul!, (Matrix{Float64}, Matrix{Float64}, Matrix{Float64},))[2](C, A, B) @test C ≈ A*B end @@ -189,10 +163,8 @@ end sum(arr) # compute the sum. It is very imporatant that no references to arr escape the function body end end - - fptr = generate_shlib_fptr(f, Tuple{Int}) - @test (@ccall $fptr(20::Int) :: Int) == 20 - + + @test compile(f, (Int,))[2](20) == 20 end From 73311bb0073f8981a32b3d3e3be186f78e6f77ce Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Wed, 2 Feb 2022 20:08:09 -0700 Subject: [PATCH 024/159] fix typos --- README.md | 2 +- src/StaticCompiler.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c5f188d..eccd83e 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ julia> path, fib_compiled = compile(fib, Tuple{Int}, "fib") julia> fib_compiled(10) 55 ``` -Now we can quite this session and load a new one where `fib` is not defined: +Now we can quit this session and load a new one where `fib` is not defined: ```julia julia> using StaticCompiler diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 06f71a5..bb85fcc 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -40,7 +40,7 @@ julia> path, fib_compiled = compile(fib, Tuple{Int}, "fib") julia> fib_compiled(10) 55 ``` -Now we can quite this session and load a new one where `fib` is not defined: +Now we can quit this session and load a new one where `fib` is not defined: ```julia julia> using StaticCompiler From 3658b05b8b552fbeed5e3a278c66048d35fc51e1 Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Wed, 2 Feb 2022 20:08:32 -0700 Subject: [PATCH 025/159] bump version number --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 461d8dd..a1faac6 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short"] -version = "0.2.0" +version = "0.3.0" [deps] GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55" From ad2aba1b97f43c673f7d061a5754da0f899dc9f1 Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Wed, 2 Feb 2022 20:34:26 -0700 Subject: [PATCH 026/159] fix code block --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6802bc2..2e369c4 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ StaticCompiler.StaticCompiledFunction{Int64, Tuple{Int64}}(:fib, Ptr{Nothing} @0 julia> fib_compiled(10) 55 +``` ## Approach @@ -50,4 +51,4 @@ do * No heap allocations (e.g. creating an array or a string) are allowed inside a statically compiled function body. If you try to run such a function, you will get a segfault. ** It's sometimes possible you won't get a segfault if you define and run the function in the same session, but trying to call the compiled function in a new julia session will definitely segfault if you allocate memory. * Lots of other limitations too. E.g. there's an example in tests/runtests.jl where summing a vector of `Complex{Float32}` is fine, but segfaults on `Complex{Float64}`. -* Doesn't currently work on Windows \ No newline at end of file +* Doesn't currently work on Windows From e005ac5509ff67459a81a411261aed7db63809c2 Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Wed, 2 Feb 2022 20:35:52 -0700 Subject: [PATCH 027/159] remove accidental word --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2e369c4..17921a8 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ julia> fib_compiled(10) ## Approach This package uses the [GPUCompiler package](https://github.com/JuliaGPU/GPUCompiler.jl) to generate code. -do + ## Limitations * This package currently requires that you have `gcc` installed and in your system's `PATH`. This is probably pretty easy to fix, we only use `gcc` for linking. In theory Clang_jll or LLVM_full_jll should be able to do this, and be managed through Julia's package manager. From 0096d0be58b4ad57b61365745333adb127618ae0 Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Wed, 2 Feb 2022 20:36:58 -0700 Subject: [PATCH 028/159] update limitations --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 17921a8..0018a83 100644 --- a/README.md +++ b/README.md @@ -50,5 +50,5 @@ This package uses the [GPUCompiler package](https://github.com/JuliaGPU/GPUCompi * This package currently requires that you have `gcc` installed and in your system's `PATH`. This is probably pretty easy to fix, we only use `gcc` for linking. In theory Clang_jll or LLVM_full_jll should be able to do this, and be managed through Julia's package manager. * No heap allocations (e.g. creating an array or a string) are allowed inside a statically compiled function body. If you try to run such a function, you will get a segfault. ** It's sometimes possible you won't get a segfault if you define and run the function in the same session, but trying to call the compiled function in a new julia session will definitely segfault if you allocate memory. -* Lots of other limitations too. E.g. there's an example in tests/runtests.jl where summing a vector of `Complex{Float32}` is fine, but segfaults on `Complex{Float64}`. * Doesn't currently work on Windows +* If you find any other limitations, let us know. There's probably lots. From 9426a050095824c931393056d72af738e8c8f514 Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Wed, 2 Feb 2022 21:22:42 -0700 Subject: [PATCH 029/159] use Clang_jll to make sure there is a compiler available --- Manifest.toml | 10 ++++++++++ Project.toml | 1 + src/StaticCompiler.jl | 8 ++++---- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index ab4efd7..43eef3e 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -17,6 +17,12 @@ git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9" uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" version = "0.4.1" +[[deps.Clang_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll", "libLLVM_jll"] +git-tree-sha1 = "8cf7e67e264dedc5d321ec87e78525e958aea057" +uuid = "0ee61d77-7f21-5576-8119-9fcc46b10100" +version = "12.0.1+3" + [[deps.Dates]] deps = ["Printf"] uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" @@ -150,6 +156,10 @@ uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" deps = ["Libdl"] uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +[[deps.libLLVM_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8f36deef-c2a5-5394-99ed-8e07531fb29a" + [[deps.nghttp2_jll]] deps = ["Artifacts", "Libdl"] uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" diff --git a/Project.toml b/Project.toml index a1faac6..5293f7b 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["Tom Short"] version = "0.3.0" [deps] +Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 29da3e6..952d687 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -5,6 +5,7 @@ using LLVM: LLVM using Libdl: Libdl using Base: RefValue using Serialization: serialize, deserialize +using Clang_jll: clang export compile, load_function export native_code_llvm, native_code_typed, native_llvm_module @@ -82,7 +83,6 @@ load a `StaticCompiledFunction` from a given path. This object is callable. """ load_function(path) = instantiate(deserialize(path) :: LazyStaticCompiledFunction) - struct LazyStaticCompiledFunction{rt, tt} f::Symbol path::String @@ -138,7 +138,9 @@ function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.saf write(io, obj) flush(io) - run(`gcc -shared -o $path.$(Libdl.dlext) $path`) + clang() do exe + run(`$exe -shared -o $path.$(Libdl.dlext) $path`) + end rm(path) end path, name @@ -162,8 +164,6 @@ function generate_shlib_fptr(path::String, name) fptr end - - function native_code_llvm(@nospecialize(func), @nospecialize(types); kwargs...) job, kwargs = native_job(func, types; kwargs...) GPUCompiler.code_llvm(stdout, job; kwargs...) From d877bbfa90a06c0749b0f58dc6172c4c0ce1ee3a Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Wed, 2 Feb 2022 21:23:24 -0700 Subject: [PATCH 030/159] no longer just call out to `gcc` willy nilly --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 0018a83..f979e87 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,6 @@ This package uses the [GPUCompiler package](https://github.com/JuliaGPU/GPUCompi ## Limitations -* This package currently requires that you have `gcc` installed and in your system's `PATH`. This is probably pretty easy to fix, we only use `gcc` for linking. In theory Clang_jll or LLVM_full_jll should be able to do this, and be managed through Julia's package manager. * No heap allocations (e.g. creating an array or a string) are allowed inside a statically compiled function body. If you try to run such a function, you will get a segfault. ** It's sometimes possible you won't get a segfault if you define and run the function in the same session, but trying to call the compiled function in a new julia session will definitely segfault if you allocate memory. * Doesn't currently work on Windows From 798c2d0349183af4c9ee8bfee7e38de286c54bcb Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Wed, 2 Feb 2022 22:12:02 -0700 Subject: [PATCH 031/159] put compilation artifacts into one directory. --- README.md | 10 ++++----- src/StaticCompiler.jl | 48 ++++++++++++++++++++++++++----------------- test/runtests.jl | 28 ++++++++++++------------- 3 files changed, 48 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index f979e87..c8139ec 100644 --- a/README.md +++ b/README.md @@ -21,8 +21,8 @@ julia> using StaticCompiler julia> fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) fib (generic function with 1 method) -julia> path, fib_compiled = compile(fib, Tuple{Int}, "fib") -("fib.cjl", StaticCompiler.StaticCompiledFunction{Int64, Tuple{Int64}}(:fib, Ptr{Nothing} @0x00007fc4ec032130)) +julia> fib_compiled, path = compile(fib, Tuple{Int}, "fib") +(f = fib(::Int64) :: Int64, path = "fib") julia> fib_compiled(10) 55 @@ -34,10 +34,10 @@ julia> using StaticCompiler julia> fib ERROR: UndefVarError: fib not defined -julia> fib_compiled = load_function("fib.cjl") -StaticCompiler.StaticCompiledFunction{Int64, Tuple{Int64}}(:fib, Ptr{Nothing} @0x00007f9ee8050130) +julia> fib_comiled = load_function("fib") +fib(::Int64) :: Int64 -julia> fib_compiled(10) +julia> fib_comiled(10) 55 ``` diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 952d687..a16c193 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -11,20 +11,20 @@ export compile, load_function export native_code_llvm, native_code_typed, native_llvm_module """ - compile(f, types, path::String = tempname()) --> (obj_path, compiled_f) + compile(f, types, path::String = tempname()) --> (compiled_f, path) !!! Warning: this will fail on programs that heap allocate any memory, or have dynamic dispatch !!! Statically compile the method of a function `f` specialized to arguments of the type given by `types`. -This will save a shared object file (i.e. a `.so` or `.dylib`) at the specified path, and will save a -`LazyStaticCompiledFunction` object at the same path with the extension `.cjl`. This -`LazyStaticCompiledFunction` can be deserialized with the function `load_function`. Once it is -instantiated in a julia session, it will be of type `StaticCompiledFunction` and may be called with -arguments of type `types` as if it were a function with a single method (the method determined by `types`). +This will create a directory at the specified path with a shared object file (i.e. a `.so` or `.dylib`), +and will save a `LazyStaticCompiledFunction` object in the same directory with the extension `.cjl`. This +`LazyStaticCompiledFunction` can be deserialized with `load_function(path)`. Once it is instantiated in +a julia session, it will be of type `StaticCompiledFunction` and may be called with arguments of type +`types` as if it were a function with a single method (the method determined by `types`). -`compile` will return a `obj_path` which is the location of the serialized `LazyStaticCompiledFunction`, and -an already instantiated `StaticCompiledFunction` object. +`compile` will return an already instantiated `StaticCompiledFunction` object and `obj_path` which is the +location of the directory containing the compilation artifacts. Example: @@ -35,8 +35,8 @@ julia> using StaticCompiler julia> fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) fib (generic function with 1 method) -julia> path, fib_compiled = compile(fib, Tuple{Int}, "fib") -("fib.cjl", StaticCompiler.StaticCompiledFunction{Int64, Tuple{Int64}}(:fib, Ptr{Nothing} @0x00007fc4ec032130)) +julia> fib_compiled, path = compile(fib, Tuple{Int}, "fib") +(f = fib(::Int64) :: Int64, path = "fib") julia> fib_compiled(10) 55 @@ -49,7 +49,7 @@ julia> fib ERROR: UndefVarError: fib not defined julia> fib_compiled = load_function("fib.cjl") -StaticCompiler.StaticCompiledFunction{Int64, Tuple{Int64}}(:fib, Ptr{Nothing} @0x00007f9ee8050130) +fib(::Int64) :: Int64 julia> fib_compiled(10) 55 @@ -71,8 +71,9 @@ function compile(f, _tt, path::String = tempname(); name = GPUCompiler.safe_nam generate_shlib(f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, path, name; kwargs...) lf = LazyStaticCompiledFunction{rt, tt}(Symbol(f), path, name) - serialize(path*".cjl", lf) - path*".cjl", instantiate(lf) + cjl_path = joinpath(path, "obj.cjl") + serialize(cjl_path, lf) + (; f = instantiate(lf), path) end @@ -81,7 +82,7 @@ end load a `StaticCompiledFunction` from a given path. This object is callable. """ -load_function(path) = instantiate(deserialize(path) :: LazyStaticCompiledFunction) +load_function(path) = instantiate(deserialize(joinpath(path, "obj.cjl")) :: LazyStaticCompiledFunction) struct LazyStaticCompiledFunction{rt, tt} f::Symbol @@ -98,6 +99,11 @@ struct StaticCompiledFunction{rt, tt} ptr::Ptr{Nothing} end +function Base.show(io::IO, f::StaticCompiledFunction{rt, tt}) where {rt, tt} + types = [tt.parameters...] + print(io, String(f.f), "(", join(("::$T" for T ∈ tt.parameters), ',') ,") :: $rt") +end + function (f::StaticCompiledFunction{rt, tt})(args...) where {rt, tt} Tuple{typeof.(args)...} == tt || error("Input types don't match compiled target $((tt.parameters...,)). Got arguments of type $(typeof.(args))") out = RefValue{rt}() @@ -132,33 +138,37 @@ function native_job(@nospecialize(func), @nospecialize(types); kernel::Bool=fals end function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)); kwargs...) - open(path, "w") do io + mkpath(path) + obj_path = joinpath(path, "obj") + lib_path = joinpath(path, "obj.$(Libdl.dlext)") + open(obj_path, "w") do io job, kwargs = native_job(f, tt; name, kwargs...) obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) write(io, obj) flush(io) clang() do exe - run(`$exe -shared -o $path.$(Libdl.dlext) $path`) + run(`$exe -shared -o $lib_path $obj_path`) end - rm(path) end path, name end function generate_shlib_fptr(f, tt, path::String=tempname(), name = GPUCompiler.safe_name(repr(f)); temp::Bool=true, kwargs...) generate_shlib(f, tt, path, name; kwargs...) + lib_path = joinpath(abspath(path), "obj.$(Libdl.dlext)") ptr = Libdl.dlopen("$(abspath(path)).$(Libdl.dlext)", Libdl.RTLD_LOCAL) fptr = Libdl.dlsym(ptr, "julia_$name") @assert fptr != C_NULL if temp - atexit(()->rm("$path.$(Libdl.dlext)")) + atexit(()->rm(path; recursive=true)) end fptr end function generate_shlib_fptr(path::String, name) - ptr = Libdl.dlopen("$(abspath(path)).$(Libdl.dlext)", Libdl.RTLD_LOCAL) + lib_path = joinpath(abspath(path), "obj.$(Libdl.dlext)") + ptr = Libdl.dlopen(lib_path, Libdl.RTLD_LOCAL) fptr = Libdl.dlsym(ptr, "julia_$name") @assert fptr != C_NULL fptr diff --git a/test/runtests.jl b/test/runtests.jl index 51a8832..c0fcaf3 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -12,7 +12,7 @@ using StrideArraysCore # This probably needs a macro for T ∈ (Int, Float64, Int32, Float32, Int16, Float16) - @test compile(simple_sum, (T,))[2]( T(1) ) == T(2) + @test compile(simple_sum, (T,))[1]( T(1) ) == T(2) end end @@ -20,13 +20,13 @@ end fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) # This needs to be defined globally due to https://github.com/JuliaLang/julia/issues/40990 @testset "Recursion" begin - @test compile(fib, (Int,))[2](10) == fib(10) + @test compile(fib, (Int,))[1](10) == fib(10) # Trick to work around #40990 _fib2(_fib2, n) = n <= 1 ? n : _fib2(_fib2, n-1) + _fib2(_fib2, n-2) fib2(n) = _fib2(_fib2, n) - @test compile(fib2, (Int,))[2](20) == fib(20) + @test compile(fib2, (Int,))[1](20) == fib(20) end # Call binaries for testing @@ -50,7 +50,7 @@ end end s end - @test compile(sum_first_N_int, (Int,))[2](10) == 55 + @test compile(sum_first_N_int, (Int,))[1](10) == 55 function sum_first_N_float64(N) s = Float64(0) @@ -59,7 +59,7 @@ end end s end - @test compile(sum_first_N_float64, (Int,))[2](10) == 55. + @test compile(sum_first_N_float64, (Int,))[1](10) == 55. function sum_first_N_int_inbounds(N) s = 0 @@ -68,7 +68,7 @@ end end s end - @test compile(sum_first_N_int_inbounds, (Int,))[2](10) == 55 + @test compile(sum_first_N_int_inbounds, (Int,))[1](10) == 55 function sum_first_N_float64_inbounds(N) s = Float64(0) @@ -77,7 +77,7 @@ end end s end - @test compile(sum_first_N_float64_inbounds, (Int,))[2](10) == 55. + @test compile(sum_first_N_float64_inbounds, (Int,))[1](10) == 55. end @@ -91,9 +91,9 @@ end s end - @test compile(array_sum, (Int, Vector{Int}))[2](10, Int.(1:10)) == 55 - @test compile(array_sum, (Int, Vector{Complex{Float32}}))[2](10, Complex{Float32}.(1:10)) == 55f0 + 0f0im - @test compile(array_sum, (Int, Vector{Complex{Float64}}))[2](10, Complex{Float64}.(1:10)) == 55f0 + 0f0im + @test compile(array_sum, (Int, Vector{Int}))[1](10, Int.(1:10)) == 55 + @test compile(array_sum, (Int, Vector{Complex{Float32}}))[1](10, Complex{Float32}.(1:10)) == 55f0 + 0f0im + @test compile(array_sum, (Int, Vector{Complex{Float64}}))[1](10, Complex{Float64}.(1:10)) == 55f0 + 0f0im end @@ -103,7 +103,7 @@ end @testset "Send and receive Tuple" begin foo(u::Tuple) = 2 .* reverse(u) .- 1 # we can't just compile this as is. - @test compile(foo, (NTuple{3, Int},))[2]((1, 2, 3)) == (5, 3, 1) + @test compile(foo, (NTuple{3, Int},))[1]((1, 2, 3)) == (5, 3, 1) end @@ -115,7 +115,7 @@ end end a = [1.0, 2.0] - @test compile(mydot, (Vector{Float64},))[2](a) == 5.0 + @test compile(mydot, (Vector{Float64},))[1](a) == 5.0 end @@ -146,7 +146,7 @@ end A = rand(10, 11) B = rand(11, 12) - compile(mul!, (Matrix{Float64}, Matrix{Float64}, Matrix{Float64},))[2](C, A, B) + compile(mul!, (Matrix{Float64}, Matrix{Float64}, Matrix{Float64},))[1](C, A, B) @test C ≈ A*B end @@ -164,7 +164,7 @@ end end end - @test compile(f, (Int,))[2](20) == 20 + @test compile(f, (Int,))[1](20) == 20 end From c4e440116dc0b99da6e7741166726af353f5e84f Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Wed, 2 Feb 2022 22:25:42 -0700 Subject: [PATCH 032/159] try testing on windows again --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 03f8476..e645408 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,6 +15,7 @@ jobs: os: - ubuntu-latest - macOS-latest + - windows-latest arch: - x64 steps: From 57197cee8d995b2dba25bc2ece8985786014649c Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Wed, 2 Feb 2022 22:49:20 -0700 Subject: [PATCH 033/159] try falling back to `gcc` --- src/StaticCompiler.jl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index a16c193..9d3aced 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -147,8 +147,13 @@ function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.saf write(io, obj) flush(io) - clang() do exe - run(`$exe -shared -o $lib_path $obj_path`) + try + clang() do exe + run(`$exe -shared -o $lib_path $obj_path`) + end + catch e; + # if Clang_jll fails, check if gcc is available + run(`gcc -shared -o $lib_path $obj_path`) end end path, name From 8b5268332888e90bc4ceab062d5a283f512a7c34 Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Wed, 2 Feb 2022 23:17:56 -0700 Subject: [PATCH 034/159] redirect clang output to devnull --- src/StaticCompiler.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 9d3aced..2166b3e 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -149,7 +149,7 @@ function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.saf flush(io) try clang() do exe - run(`$exe -shared -o $lib_path $obj_path`) + run(pipeline(`$exe -shared -o $lib_path $obj_path`, stdout=devnull)) #get rid of devnull for debugging end catch e; # if Clang_jll fails, check if gcc is available From 90c150b888463014232e93e9565d277f6cb7e806 Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Wed, 2 Feb 2022 23:19:04 -0700 Subject: [PATCH 035/159] don't test on windows --- .github/workflows/ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e645408..03f8476 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,7 +15,6 @@ jobs: os: - ubuntu-latest - macOS-latest - - windows-latest arch: - x64 steps: From b5e123ad0d00482c086ffd42bb1619727e92f9cc Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Wed, 2 Feb 2022 23:25:58 -0700 Subject: [PATCH 036/159] fix --- src/StaticCompiler.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 2166b3e..e272fab 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -149,7 +149,7 @@ function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.saf flush(io) try clang() do exe - run(pipeline(`$exe -shared -o $lib_path $obj_path`, stdout=devnull)) #get rid of devnull for debugging + run(pipeline(`$exe -shared -o $lib_path $obj_path`, stderr=devnull)) #get rid of devnull when debugging end catch e; # if Clang_jll fails, check if gcc is available From 2de06f4ca3a3cf4a3c9752e90b90e016f6e5fa19 Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Thu, 3 Feb 2022 08:21:03 -0700 Subject: [PATCH 037/159] fix generate_shlib_fptr --- src/StaticCompiler.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index e272fab..7cef288 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -162,7 +162,7 @@ end function generate_shlib_fptr(f, tt, path::String=tempname(), name = GPUCompiler.safe_name(repr(f)); temp::Bool=true, kwargs...) generate_shlib(f, tt, path, name; kwargs...) lib_path = joinpath(abspath(path), "obj.$(Libdl.dlext)") - ptr = Libdl.dlopen("$(abspath(path)).$(Libdl.dlext)", Libdl.RTLD_LOCAL) + ptr = Libdl.dlopen(lib_path, Libdl.RTLD_LOCAL) fptr = Libdl.dlsym(ptr, "julia_$name") @assert fptr != C_NULL if temp From 310ffeaabf0b8254eb93e295c49a66381abd9182 Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Thu, 3 Feb 2022 10:24:05 -0700 Subject: [PATCH 038/159] return abspath --- src/StaticCompiler.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 7cef288..84ead38 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -73,7 +73,7 @@ function compile(f, _tt, path::String = tempname(); name = GPUCompiler.safe_nam lf = LazyStaticCompiledFunction{rt, tt}(Symbol(f), path, name) cjl_path = joinpath(path, "obj.cjl") serialize(cjl_path, lf) - (; f = instantiate(lf), path) + (; f = instantiate(lf), path=abspath(path)) end From 70625560afe15cc8f108b6a023b1473802f1c66f Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Thu, 3 Feb 2022 15:20:16 -0700 Subject: [PATCH 039/159] do tests with a binary loaded on another process. This will catch reloactability problems --- test/Project.toml | 3 ++- test/runtests.jl | 61 +++++++++++++++++++++++++++++++---------------- 2 files changed, 43 insertions(+), 21 deletions(-) diff --git a/test/Project.toml b/test/Project.toml index 3dee0e5..b0cf1dc 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -6,4 +6,5 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" ManualMemory = "d125e4d3-2237-4719-b19c-fa641b8a4667" -StrideArraysCore = "7792a7ef-975c-4747-a70f-980b88e8d1da" \ No newline at end of file +StrideArraysCore = "7792a7ef-975c-4747-a70f-980b88e8d1da" +Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index c0fcaf3..9c1191b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,6 +5,12 @@ using LinearAlgebra using LoopVectorization using ManualMemory using StrideArraysCore +using Distributed + +addprocs(1) +@everywhere using StaticCompiler + +remote_load_call(path, args...) = fetch(@spawnat 2 load_function(path)(args...)) @testset "Basics" begin @@ -12,7 +18,8 @@ using StrideArraysCore # This probably needs a macro for T ∈ (Int, Float64, Int32, Float32, Int16, Float16) - @test compile(simple_sum, (T,))[1]( T(1) ) == T(2) + _, path, = compile(simple_sum, (T,)) + @test remote_load_call(path, T(1)) == T(2) end end @@ -20,13 +27,16 @@ end fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) # This needs to be defined globally due to https://github.com/JuliaLang/julia/issues/40990 @testset "Recursion" begin - @test compile(fib, (Int,))[1](10) == fib(10) + _, path = compile(fib, (Int,)) + @test remote_load_call(path, 10) == fib(10) # Trick to work around #40990 _fib2(_fib2, n) = n <= 1 ? n : _fib2(_fib2, n-1) + _fib2(_fib2, n-2) fib2(n) = _fib2(_fib2, n) - - @test compile(fib2, (Int,))[1](20) == fib(20) + + _, path = compile(fib2, (Int,)) + @test remote_load_call(path, 20) == fib(20) + #@test compile(fib2, (Int,))[1](20) == fib(20) end # Call binaries for testing @@ -50,7 +60,8 @@ end end s end - @test compile(sum_first_N_int, (Int,))[1](10) == 55 + _, path = compile(sum_first_N_int, (Int,)) + @test remote_load_call(path, 10) == 55 function sum_first_N_float64(N) s = Float64(0) @@ -59,7 +70,8 @@ end end s end - @test compile(sum_first_N_float64, (Int,))[1](10) == 55. + _, path = compile(sum_first_N_float64, (Int,)) + @test remote_load_call(path, 10) == 55. function sum_first_N_int_inbounds(N) s = 0 @@ -68,7 +80,8 @@ end end s end - @test compile(sum_first_N_int_inbounds, (Int,))[1](10) == 55 + _, path = compile(sum_first_N_int_inbounds, (Int,)) + @test remote_load_call(path, 10) == 55 function sum_first_N_float64_inbounds(N) s = Float64(0) @@ -77,8 +90,8 @@ end end s end - @test compile(sum_first_N_float64_inbounds, (Int,))[1](10) == 55. - + _, path = compile(sum_first_N_float64_inbounds, (Int,)) + @test remote_load_call(path, 10) == 55. end # Arrays with different input types Int32, Int64, Float32, Float64, Complex? @@ -90,10 +103,14 @@ end end s end + for T ∈ (Int, Complex{Float32}, Complex{Float64}) + _, path = compile(array_sum, (Int, Vector{T})) + @test remote_load_call(path, 10, T.(1:10)) == T(55) + end - @test compile(array_sum, (Int, Vector{Int}))[1](10, Int.(1:10)) == 55 - @test compile(array_sum, (Int, Vector{Complex{Float32}}))[1](10, Complex{Float32}.(1:10)) == 55f0 + 0f0im - @test compile(array_sum, (Int, Vector{Complex{Float64}}))[1](10, Complex{Float64}.(1:10)) == 55f0 + 0f0im + # @test (10, Int.(1:10)) == 55 + # @test compile(array_sum, (Int, Vector{Complex{Float32}}))[1](10, Complex{Float32}.(1:10)) == 55f0 + 0f0im + # @test compile(array_sum, (Int, Vector{Complex{Float64}}))[1](10, Complex{Float64}.(1:10)) == 55f0 + 0f0im end @@ -101,9 +118,10 @@ end # We need to be careful to not send, nor receive an unwrapped Tuple to a compiled function. # The interface made in `compile` should handle this fine. @testset "Send and receive Tuple" begin - foo(u::Tuple) = 2 .* reverse(u) .- 1 # we can't just compile this as is. - - @test compile(foo, (NTuple{3, Int},))[1]((1, 2, 3)) == (5, 3, 1) + foo(u::Tuple) = 2 .* reverse(u) .- 1 + + _, path = compile(foo, (NTuple{3, Int},)) + @test remote_load_call(path, (1, 2, 3)) == (5, 3, 1) end @@ -114,8 +132,9 @@ end BLAS.dot(N, a, 1, a, 1) end a = [1.0, 2.0] - - @test compile(mydot, (Vector{Float64},))[1](a) == 5.0 + mydot_compiled, path = compile(mydot, (Vector{Float64},)) + @test_skip remote_load_call(path, a) == 5.0 # this needs a relocatable pointer to work + @test mydot_compiled(a) == 5.0 end @@ -146,7 +165,9 @@ end A = rand(10, 11) B = rand(11, 12) - compile(mul!, (Matrix{Float64}, Matrix{Float64}, Matrix{Float64},))[1](C, A, B) + _, path = compile(mul!, (Matrix{Float64}, Matrix{Float64}, Matrix{Float64},)) + # remote_load_call(path, C, A, B) This won't work because @spawnat copies C + C .= fetch(@spawnat 2 (load_function(path)(C, A, B); C)) @test C ≈ A*B end @@ -163,8 +184,8 @@ end sum(arr) # compute the sum. It is very imporatant that no references to arr escape the function body end end - - @test compile(f, (Int,))[1](20) == 20 + _, path = compile(f, (Int,)) + @test remote_load_call(path, 20) == 20 end From 7f6477475af93e56e9323d81d579560e0d9a49d6 Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Thu, 3 Feb 2022 15:22:29 -0700 Subject: [PATCH 040/159] use cc instead of gcc --- src/StaticCompiler.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 84ead38..78b8596 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -153,7 +153,7 @@ function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.saf end catch e; # if Clang_jll fails, check if gcc is available - run(`gcc -shared -o $lib_path $obj_path`) + run(`cc -shared -o $lib_path $obj_path`) end end path, name @@ -196,5 +196,4 @@ function native_llvm_module(f, tt, name = GPUCompiler.safe_name(repr(f)); kwargs return m end - end # module From e86be065467b16e3b0c4e2916e49d9fa5fdf4855 Mon Sep 17 00:00:00 2001 From: "C.Brenhin Keller" Date: Thu, 3 Feb 2022 21:21:45 -0500 Subject: [PATCH 041/159] Use .bc extension for LLVM bitcode --- src/StaticCompiler.jl | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 78b8596..a8bfa7f 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -15,15 +15,15 @@ export native_code_llvm, native_code_typed, native_llvm_module !!! Warning: this will fail on programs that heap allocate any memory, or have dynamic dispatch !!! -Statically compile the method of a function `f` specialized to arguments of the type given by `types`. +Statically compile the method of a function `f` specialized to arguments of the type given by `types`. -This will create a directory at the specified path with a shared object file (i.e. a `.so` or `.dylib`), -and will save a `LazyStaticCompiledFunction` object in the same directory with the extension `.cjl`. This -`LazyStaticCompiledFunction` can be deserialized with `load_function(path)`. Once it is instantiated in -a julia session, it will be of type `StaticCompiledFunction` and may be called with arguments of type +This will create a directory at the specified path with a shared object file (i.e. a `.so` or `.dylib`), +and will save a `LazyStaticCompiledFunction` object in the same directory with the extension `.cjl`. This +`LazyStaticCompiledFunction` can be deserialized with `load_function(path)`. Once it is instantiated in +a julia session, it will be of type `StaticCompiledFunction` and may be called with arguments of type `types` as if it were a function with a single method (the method determined by `types`). -`compile` will return an already instantiated `StaticCompiledFunction` object and `obj_path` which is the +`compile` will return an already instantiated `StaticCompiledFunction` object and `obj_path` which is the location of the directory containing the compilation artifacts. Example: @@ -67,7 +67,7 @@ function compile(f, _tt, path::String = tempname(); name = GPUCompiler.safe_nam # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this f_wrap!(out::Ref, args::Ref{<:Tuple}) = (out[] = f(args[]...); nothing) - + generate_shlib(f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, path, name; kwargs...) lf = LazyStaticCompiledFunction{rt, tt}(Symbol(f), path, name) @@ -139,12 +139,12 @@ end function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)); kwargs...) mkpath(path) - obj_path = joinpath(path, "obj") + obj_path = joinpath(path, "obj.bc") lib_path = joinpath(path, "obj.$(Libdl.dlext)") open(obj_path, "w") do io job, kwargs = native_job(f, tt; name, kwargs...) obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) - + write(io, obj) flush(io) try From 4e3faf3880f9e4a21318bc9adc9476fcb96874d1 Mon Sep 17 00:00:00 2001 From: "C.Brenhin Keller" Date: Thu, 3 Feb 2022 22:22:08 -0500 Subject: [PATCH 042/159] Add docstring for generate_shlib --- src/StaticCompiler.jl | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index a8bfa7f..463747a 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -26,7 +26,7 @@ a julia session, it will be of type `StaticCompiledFunction` and may be called w `compile` will return an already instantiated `StaticCompiledFunction` object and `obj_path` which is the location of the directory containing the compilation artifacts. -Example: +### Examples: Define and compile a `fib` function: ```julia @@ -137,6 +137,43 @@ function native_job(@nospecialize(func), @nospecialize(types); kernel::Bool=fals GPUCompiler.CompilerJob(target, source, params), kwargs end + +""" +```julia +generate_shlib(f, tt, path::String, name::String; kwargs...) +``` +Low level interface for compiling a shared object / dynamically loaded library + (`.so` / `.dylib`) for function `f` given a tuple type `tt` characterizing +the types of the arguments for which the function will be compiled. + +### Examples +```julia +julia> function test(n) + r = 0.0 + for i=1:n + r += log(sqrt(i)) + end + return r/n + end +test (generic function with 1 method) + +julia> path, name = StaticCompiler.generate_shlib(test, Tuple{Int64}, "./test") +("./test", "test") + +shell> tree \$path +./test +|-- obj.bc +`-- obj.dylib + +0 directories, 2 files + +julia> test(100_000) +5.256496109495593 + +julia> ccall(StaticCompiler.generate_shlib_fptr(path, name), Float64, (Int64,), 100_000) +5.256496109495593 +``` +""" function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)); kwargs...) mkpath(path) obj_path = joinpath(path, "obj.bc") From e82f0fd6c9316c8dba4d3f39c33f5dc3ebf2c60f Mon Sep 17 00:00:00 2001 From: "C.Brenhin Keller" Date: Thu, 3 Feb 2022 22:48:30 -0500 Subject: [PATCH 043/159] Add docstring for `generate_shlib_fptr` --- src/StaticCompiler.jl | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 463747a..c49f0b8 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -146,6 +146,8 @@ Low level interface for compiling a shared object / dynamically loaded library (`.so` / `.dylib`) for function `f` given a tuple type `tt` characterizing the types of the arguments for which the function will be compiled. +See also `StaticCompiler.generate_shlib_fptr`. + ### Examples ```julia julia> function test(n) @@ -196,6 +198,7 @@ function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.saf path, name end + function generate_shlib_fptr(f, tt, path::String=tempname(), name = GPUCompiler.safe_name(repr(f)); temp::Bool=true, kwargs...) generate_shlib(f, tt, path, name; kwargs...) lib_path = joinpath(abspath(path), "obj.$(Libdl.dlext)") @@ -208,6 +211,42 @@ function generate_shlib_fptr(f, tt, path::String=tempname(), name = GPUCompiler. fptr end +""" +```julia +generate_shlib_fptr(path::String, name) +``` +Low level interface for obtaining a function pointer by `dlopen`ing a shared +library given the `path` and `name` of a `.so`/`.dylib` already compiled by +`generate_shlib`. + +See also `StaticCompiler.enerate_shlib`. + +### Examples +```julia +julia> function test(n) + r = 0.0 + for i=1:n + r += log(sqrt(i)) + end + return r/n + end +test (generic function with 1 method) + +julia> path, name = StaticCompiler.generate_shlib(test, Tuple{Int64}, "./test"); + +julia> test_ptr = StaticCompiler.generate_shlib_fptr(path, name) +Ptr{Nothing} @0x000000015209f600 + +julia> ccall(test_ptr, Float64, (Int64,), 100_000) +5.256496109495593 + +julia> @ccall \$test_ptr(100_000::Int64)::Float64 # Equivalently +5.256496109495593 + +julia> test(100_000) +5.256496109495593 +``` +""" function generate_shlib_fptr(path::String, name) lib_path = joinpath(abspath(path), "obj.$(Libdl.dlext)") ptr = Libdl.dlopen(lib_path, Libdl.RTLD_LOCAL) From f94c0556827dd2b6629fc6f682cebbc15b6ba0c8 Mon Sep 17 00:00:00 2001 From: "C.Brenhin Keller" Date: Thu, 3 Feb 2022 23:04:25 -0500 Subject: [PATCH 044/159] Special case for apple, allow compiler errors through again in other cases --- src/StaticCompiler.jl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index c49f0b8..6cc90d6 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -186,13 +186,15 @@ function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.saf write(io, obj) flush(io) + + # Pick a Clang + cc = Sys.isapple() ? `cc` : clang() + # Compile! try - clang() do exe - run(pipeline(`$exe -shared -o $lib_path $obj_path`, stderr=devnull)) #get rid of devnull when debugging - end - catch e; - # if Clang_jll fails, check if gcc is available - run(`cc -shared -o $lib_path $obj_path`) + run(`$cc -shared -o $lib_path $obj_path`) + catch + # If all Clangs fail, try system gcc + run(`gcc -shared -o $lib_path $obj_path`) end end path, name From 195960343d17e16b2874baf902c18985b0075fc2 Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Thu, 3 Feb 2022 21:34:46 -0700 Subject: [PATCH 045/159] Update StaticCompiler.jl --- src/StaticCompiler.jl | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 6cc90d6..5842b72 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -190,12 +190,7 @@ function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.saf # Pick a Clang cc = Sys.isapple() ? `cc` : clang() # Compile! - try - run(`$cc -shared -o $lib_path $obj_path`) - catch - # If all Clangs fail, try system gcc - run(`gcc -shared -o $lib_path $obj_path`) - end + run(`$cc -shared -o $lib_path $obj_path`) end path, name end From bfdd7ba3a7a149cacdf64eacf941de65d20e5c16 Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Thu, 3 Feb 2022 21:45:23 -0700 Subject: [PATCH 046/159] rename `.bc` to `.o` --- src/StaticCompiler.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 5842b72..5d3edb2 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -178,7 +178,7 @@ julia> ccall(StaticCompiler.generate_shlib_fptr(path, name), Float64, (Int64,), """ function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)); kwargs...) mkpath(path) - obj_path = joinpath(path, "obj.bc") + obj_path = joinpath(path, "obj.o") lib_path = joinpath(path, "obj.$(Libdl.dlext)") open(obj_path, "w") do io job, kwargs = native_job(f, tt; name, kwargs...) From f710f81fe435dfb0f5323cef9e711fe68f3d4855 Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Fri, 4 Feb 2022 11:49:46 -0700 Subject: [PATCH 047/159] don't bother building docs for now --- .github/workflows/{docs.yml => docsyml_backup} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{docs.yml => docsyml_backup} (100%) diff --git a/.github/workflows/docs.yml b/.github/workflows/docsyml_backup similarity index 100% rename from .github/workflows/docs.yml rename to .github/workflows/docsyml_backup From 438f02b1d93cb80b4ad984ef696b991657660ede Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Fri, 4 Feb 2022 11:50:05 -0700 Subject: [PATCH 048/159] fix typo --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c8139ec..e458edd 100644 --- a/README.md +++ b/README.md @@ -34,10 +34,10 @@ julia> using StaticCompiler julia> fib ERROR: UndefVarError: fib not defined -julia> fib_comiled = load_function("fib") +julia> fib_compiled = load_function("fib") fib(::Int64) :: Int64 -julia> fib_comiled(10) +julia> fib_compiled(10) 55 ``` From 4d348e52a430365a3e13aa8da6222a267e3e51d2 Mon Sep 17 00:00:00 2001 From: MasonProtter Date: Fri, 4 Feb 2022 11:50:12 -0700 Subject: [PATCH 049/159] iterate on docs --- src/StaticCompiler.jl | 46 +++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 5d3edb2..8d0ec0e 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -17,16 +17,16 @@ export native_code_llvm, native_code_typed, native_llvm_module Statically compile the method of a function `f` specialized to arguments of the type given by `types`. -This will create a directory at the specified path with a shared object file (i.e. a `.so` or `.dylib`), -and will save a `LazyStaticCompiledFunction` object in the same directory with the extension `.cjl`. This -`LazyStaticCompiledFunction` can be deserialized with `load_function(path)`. Once it is instantiated in -a julia session, it will be of type `StaticCompiledFunction` and may be called with arguments of type -`types` as if it were a function with a single method (the method determined by `types`). +This will create a directory at the specified path (or in a temporary directory if you exclude that argument) +that contains the files needed for your static compiled function. `compile` will return a +`StaticCompiledFunction` object and `obj_path` which is the absolute path of the directory containing the +compilation artifacts. The `StaticCompiledFunction` can be treated as if it is a function with a single +method corresponding to the types you specified when it was compiled. -`compile` will return an already instantiated `StaticCompiledFunction` object and `obj_path` which is the -location of the directory containing the compilation artifacts. +To deserialize and instantiate a previously compiled function, simply execute `load_function(path)`, which +returns a callable `StaticCompiledFunction`. -### Examples: +### Example: Define and compile a `fib` function: ```julia @@ -43,11 +43,11 @@ julia> fib_compiled(10) ``` Now we can quit this session and load a new one where `fib` is not defined: ```julia -julia> using StaticCompiler - julia> fib ERROR: UndefVarError: fib not defined +julia> using StaticCompiler + julia> fib_compiled = load_function("fib.cjl") fib(::Int64) :: Int64 @@ -55,6 +55,27 @@ julia> fib_compiled(10) 55 ``` Tada! + +### Details: + +Here is the structure of the directory created by `compile` in the above example: +```julia +shell> tree fib +path +├── obj.cjl +├── obj.o +└── obj.so + +0 directories, 3 files +```` +* `obj.so` (or `.dylib` on MacOS) is a shared object file that can be linked to in order to execute your +compiled julia function. +* `obj.cjl` is a serialized `LazyStaticCompiledFunction` object which will be deserialized and instantiated +with `load_function(path)`. `LazyStaticcompiledfunction`s contain the requisite information needed to link to the +`obj.so` inside a julia session. Once it is instantiated in a julia session (i.e. by +`instantiate(::LazyStaticCompiledFunction)`, this happens automatically in `load_function`), it will be of type +`StaticCompiledFunction` and may be called with arguments of type `types` as if it were a function with a +single method (the method determined by `types`). """ function compile(f, _tt, path::String = tempname(); name = GPUCompiler.safe_name(repr(f)), kwargs...) tt = Base.to_tuple_type(_tt) @@ -76,7 +97,6 @@ function compile(f, _tt, path::String = tempname(); name = GPUCompiler.safe_nam (; f = instantiate(lf), path=abspath(path)) end - """ load_function(path) --> compiled_f @@ -164,8 +184,8 @@ julia> path, name = StaticCompiler.generate_shlib(test, Tuple{Int64}, "./test") shell> tree \$path ./test -|-- obj.bc -`-- obj.dylib +|-- obj.o +`-- obj.so 0 directories, 2 files From 567168a81b5d887c6fc436f00bde6d529b6690d6 Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Thu, 10 Feb 2022 01:22:48 -0500 Subject: [PATCH 050/159] Add `compile_executable` (#56) * Allow changeing filename base in generate_shlib, though leave obj as default * Add `generate_executable` * Add `compile_executable` * Update docstring for compile_executable * Add tests for compile_executable * llvmcall used in standalone test only works on Julia 1.8+ * Looks like clang_jll ain't gonna cut it for the standalone executables even on linux * Try annotating compiled function with Base.@ccallable * Let's try some inlining * Ah, so `@ccallable` requires a return type * Ok, so no `@ccallable` * Potential workaround on non-apple systems: use minimal wrapper * We can probably use clang_jll after all * Probably about time for a version bump * Add more minimal test for v1.7, fix specifier on advanced test * Implement changes requested in code review --- src/StaticCompiler.jl | 171 +++++++++++++++++++++++++++++++++++++----- test/runtests.jl | 63 ++++++++++++++-- 2 files changed, 206 insertions(+), 28 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 8d0ec0e..c3cd855 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -7,20 +7,20 @@ using Base: RefValue using Serialization: serialize, deserialize using Clang_jll: clang -export compile, load_function +export compile, load_function, compile_executable export native_code_llvm, native_code_typed, native_llvm_module """ compile(f, types, path::String = tempname()) --> (compiled_f, path) - !!! Warning: this will fail on programs that heap allocate any memory, or have dynamic dispatch !!! + !!! Warning: this will fail on programs that heap allocate any memory tracked by the GC, or have dynamic dispatch !!! Statically compile the method of a function `f` specialized to arguments of the type given by `types`. This will create a directory at the specified path (or in a temporary directory if you exclude that argument) that contains the files needed for your static compiled function. `compile` will return a -`StaticCompiledFunction` object and `obj_path` which is the absolute path of the directory containing the -compilation artifacts. The `StaticCompiledFunction` can be treated as if it is a function with a single +`StaticCompiledFunction` object and `obj_path` which is the absolute path of the directory containing the +compilation artifacts. The `StaticCompiledFunction` can be treated as if it is a function with a single method corresponding to the types you specified when it was compiled. To deserialize and instantiate a previously compiled function, simply execute `load_function(path)`, which @@ -68,14 +68,14 @@ path 0 directories, 3 files ```` -* `obj.so` (or `.dylib` on MacOS) is a shared object file that can be linked to in order to execute your -compiled julia function. +* `obj.so` (or `.dylib` on MacOS) is a shared object file that can be linked to in order to execute your +compiled julia function. * `obj.cjl` is a serialized `LazyStaticCompiledFunction` object which will be deserialized and instantiated -with `load_function(path)`. `LazyStaticcompiledfunction`s contain the requisite information needed to link to the -`obj.so` inside a julia session. Once it is instantiated in a julia session (i.e. by -`instantiate(::LazyStaticCompiledFunction)`, this happens automatically in `load_function`), it will be of type -`StaticCompiledFunction` and may be called with arguments of type `types` as if it were a function with a -single method (the method determined by `types`). +with `load_function(path)`. `LazyStaticcompiledfunction`s contain the requisite information needed to link to the +`obj.so` inside a julia session. Once it is instantiated in a julia session (i.e. by +`instantiate(::LazyStaticCompiledFunction)`, this happens automatically in `load_function`), it will be of type +`StaticCompiledFunction` and may be called with arguments of type `types` as if it were a function with a +single method (the method determined by `types`). """ function compile(f, _tt, path::String = tempname(); name = GPUCompiler.safe_name(repr(f)), kwargs...) tt = Base.to_tuple_type(_tt) @@ -134,6 +134,81 @@ end instantiate(f::StaticCompiledFunction) = f + +""" +```julia +compile_executable(f, types::Tuple, path::String, name::String=repr(f); filename::String=name, kwargs...) +``` +Attempt to compile a standalone executable that runs function `f` with a type signature given by the tuple of `types`. + +### Examples +```julia +julia> using StaticCompiler + +julia> function puts(s::Ptr{UInt8}) # Can't use Base.println because it allocates. + # Note, this `llvmcall` requires Julia 1.8+ + Base.llvmcall((\""" + ; External declaration of the puts function + declare i32 @puts(i8* nocapture) nounwind + + define i32 @main(i8*) { + entry: + %call = call i32 (i8*) @puts(i8* %0) + ret i32 0 + } + \""", "main"), Int32, Tuple{Ptr{UInt8}}, s) + end +puts (generic function with 1 method) + +julia> function print_args(argc::Int, argv::Ptr{Ptr{UInt8}}) + for i=1:argc + # Get pointer + p = unsafe_load(argv, i) + # Print string at pointer location (which fortunately already exists isn't tracked by the GC) + puts(p) + end + return 0 + end + +julia> compile_executable(print_args, (Int, Ptr{Ptr{UInt8}})) +"/Users/foo/code/StaticCompiler.jl/print_args" + +shell> ./print_args 1 2 3 4 Five +./print_args +1 +2 +3 +4 +Five +``` +```julia +julia> using StaticTools # So you don't have to define `puts` and friends every time + +julia> hello() = println(c"Hello, world!") # c"..." makes a stack-allocated StaticString + +julia> compile_executable(hello) +"/Users/foo/code/StaticCompiler.jl/hello" + +shell> ./hello +Hello, world! +``` +""" +function compile_executable(f, _tt=(), path::String="./", name=GPUCompiler.safe_name(repr(f)); filename=name, kwargs...) + tt = Base.to_tuple_type(_tt) + tt == Tuple{} || tt == Tuple{Int, Ptr{Ptr{UInt8}}} || error("input type signature $_tt must be either () or (Int, Ptr{Ptr{UInt8}})") + + rt = only(native_code_typed(f, tt))[2] + isconcretetype(rt) || error("$f$_tt did not infer to a concrete type. Got $rt") + + # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals + # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this + + generate_executable(f, tt, path, name, filename; kwargs...) + + joinpath(abspath(path), filename) +end + + module TestRuntime # dummy methods signal_exception() = return @@ -160,7 +235,7 @@ end """ ```julia -generate_shlib(f, tt, path::String, name::String; kwargs...) +generate_shlib(f, tt, path::String, name::String, filenamebase::String="obj"; kwargs...) ``` Low level interface for compiling a shared object / dynamically loaded library (`.so` / `.dylib`) for function `f` given a tuple type `tt` characterizing @@ -196,10 +271,10 @@ julia> ccall(StaticCompiler.generate_shlib_fptr(path, name), Float64, (Int64,), 5.256496109495593 ``` """ -function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)); kwargs...) +function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; kwargs...) mkpath(path) - obj_path = joinpath(path, "obj.o") - lib_path = joinpath(path, "obj.$(Libdl.dlext)") + obj_path = joinpath(path, "$filenamebase.o") + lib_path = joinpath(path, "$filenamebase.$(Libdl.dlext)") open(obj_path, "w") do io job, kwargs = native_job(f, tt; name, kwargs...) obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) @@ -216,9 +291,9 @@ function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.saf end -function generate_shlib_fptr(f, tt, path::String=tempname(), name = GPUCompiler.safe_name(repr(f)); temp::Bool=true, kwargs...) +function generate_shlib_fptr(f, tt, path::String=tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; temp::Bool=true, kwargs...) generate_shlib(f, tt, path, name; kwargs...) - lib_path = joinpath(abspath(path), "obj.$(Libdl.dlext)") + lib_path = joinpath(abspath(path), "$filenamebase.$(Libdl.dlext)") ptr = Libdl.dlopen(lib_path, Libdl.RTLD_LOCAL) fptr = Libdl.dlsym(ptr, "julia_$name") @assert fptr != C_NULL @@ -236,7 +311,7 @@ Low level interface for obtaining a function pointer by `dlopen`ing a shared library given the `path` and `name` of a `.so`/`.dylib` already compiled by `generate_shlib`. -See also `StaticCompiler.enerate_shlib`. +See also `StaticCompiler.generate_shlib`. ### Examples ```julia @@ -264,14 +339,70 @@ julia> test(100_000) 5.256496109495593 ``` """ -function generate_shlib_fptr(path::String, name) - lib_path = joinpath(abspath(path), "obj.$(Libdl.dlext)") +function generate_shlib_fptr(path::String, name, filenamebase::String="obj") + lib_path = joinpath(abspath(path), "$filenamebase.$(Libdl.dlext)") ptr = Libdl.dlopen(lib_path, Libdl.RTLD_LOCAL) fptr = Libdl.dlsym(ptr, "julia_$name") @assert fptr != C_NULL fptr end +""" +```julia +generate_executable(f, tt, path::String, name, filename=string(name); kwargs...) +``` +Attempt to compile a standalone executable that runs `f`. + +### Examples +```julia +julia> function test(n) + r = 0.0 + for i=1:n + r += log(sqrt(i)) + end + return r/n + end +test (generic function with 1 method) + +julia> path, name = StaticCompiler.generate_executable(test, Tuple{Int64}, "./scratch") +``` +""" +function generate_executable(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filename::String=string(name); kwargs...) + mkpath(path) + obj_path = joinpath(path, "$filename.o") + exec_path = joinpath(path, filename) + open(obj_path, "w") do io + job, kwargs = native_job(f, tt; name, kwargs...) + obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) + + write(io, obj) + flush(io) + + # Pick a compiler + cc = Sys.isapple() ? `cc` : clang() + # Compile! + if Sys.isapple() + # Apple no longer uses _start, so we can just specify a custom entry + entry = "_julia_$name" + run(`$cc -e $entry $obj_path -o $exec_path`) + else + # Write a minimal wrapper to avoid having to specify a custom entry + wrapper_path = joinpath(path, "wrapper.c") + f = open(wrapper_path, "w") + print(f, """int main(int argc, char** argv) + { + julia_$name(argc, argv); + return 0; + }""") + close(f) + run(`$cc $wrapper_path $obj_path -o $exec_path`) + # Clean up + run(`rm $wrapper_path`) + end + end + path, name +end + function native_code_llvm(@nospecialize(func), @nospecialize(types); kwargs...) job, kwargs = native_job(func, types; kwargs...) GPUCompiler.code_llvm(stdout, job; kwargs...) diff --git a/test/runtests.jl b/test/runtests.jl index 9c1191b..3201dbe 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -33,16 +33,16 @@ fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) # This needs to be defined globall # Trick to work around #40990 _fib2(_fib2, n) = n <= 1 ? n : _fib2(_fib2, n-1) + _fib2(_fib2, n-2) fib2(n) = _fib2(_fib2, n) - + _, path = compile(fib2, (Int,)) @test remote_load_call(path, 20) == fib(20) - #@test compile(fib2, (Int,))[1](20) == fib(20) + #@test compile(fib2, (Int,))[1](20) == fib(20) end # Call binaries for testing # @testset "Generate binary" begin # fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) -# libname = tempname() +# libname = tempname() # generate_shlib(fib, (Int,), libname) # ptr = Libdl.dlopen(libname * "." * Libdl.dlext, Libdl.RTLD_LOCAL) # fptr = Libdl.dlsym(ptr, "julia_fib") @@ -62,7 +62,7 @@ end end _, path = compile(sum_first_N_int, (Int,)) @test remote_load_call(path, 10) == 55 - + function sum_first_N_float64(N) s = Float64(0) for a in 1:N @@ -116,10 +116,10 @@ end # Julia wants to treat Tuple (and other things like it) as plain bits, but LLVM wants to treat it as something with a pointer. # We need to be careful to not send, nor receive an unwrapped Tuple to a compiled function. -# The interface made in `compile` should handle this fine. +# The interface made in `compile` should handle this fine. @testset "Send and receive Tuple" begin foo(u::Tuple) = 2 .* reverse(u) .- 1 - + _, path = compile(foo, (NTuple{3, Int},)) @test remote_load_call(path, (1, 2, 3)) == (5, 3, 1) end @@ -172,7 +172,7 @@ end end # This is a trick to get stack allocated arrays inside a function body (so long as they don't escape). -# This lets us have intermediate, mutable stack allocated arrays inside our +# This lets us have intermediate, mutable stack allocated arrays inside our @testset "Alloca" begin function f(N) # this can hold at most 100 Int values, if you use it for more, you'll segfault @@ -186,7 +186,54 @@ end end _, path = compile(f, (Int,)) @test remote_load_call(path, 20) == 20 -end +end + +@testset "Standalone Executables" begin + # Minimal test with no `llvmcall` + @inline function foo() + v = 0.0 + n = 1000 + for i=1:n + v += sqrt(n) + end + return 0 + end + + filepath = compile_executable(foo, (), tempdir()) + + r = run(`$filepath`); + @test isa(r, Base.Process) + @test r.exitcode == 0 + @static if VERSION>v"1.8.0-DEV" # The llvmcall here only works on 1.8+ + @inline function puts(s::Ptr{UInt8}) # Can't use Base.println because it allocates + Base.llvmcall((""" + ; External declaration of the puts function + declare i32 @puts(i8* nocapture) nounwind + define i32 @main(i8*) { + entry: + %call = call i32 (i8*) @puts(i8* %0) + ret i32 0 + } + """, "main"), Int32, Tuple{Ptr{UInt8}}, s) + end + + @inline function print_args(argc::Int, argv::Ptr{Ptr{UInt8}}) + for i=1:argc + # Get pointer + p = unsafe_load(argv, i) + # Print string at pointer location (which fortunately already exists isn't tracked by the GC) + puts(p) + end + return 0 + end + + filepath = compile_executable(print_args, (Int, Ptr{Ptr{UInt8}}), tempdir()) + + r = run(`$filepath Hello, world!`); + @test isa(r, Base.Process) + @test r.exitcode == 0 + end +end # data structures, dictionaries, tuples, named tuples From 536ea4410e0d04916ba722c4d7298d83a76f98aa Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Thu, 10 Feb 2022 22:34:12 -0700 Subject: [PATCH 051/159] Patch and relocate pointers (#58) * Patch and relocate points * comment out erroneous `@show` * doc update and `filename` support in `compile` * Update src/StaticCompiler.jl Co-authored-by: Julian Samaroo * Update README.md Co-authored-by: Julian Samaroo * Update src/StaticCompiler.jl Co-authored-by: Julian Samaroo * Update src/StaticCompiler.jl Co-authored-by: Julian Samaroo * Update src/StaticCompiler.jl Co-authored-by: Julian Samaroo * Update src/StaticCompiler.jl Co-authored-by: Julian Samaroo * Update src/pointer_patching.jl Co-authored-by: Julian Samaroo * Apply suggestions from code review Co-authored-by: Julian Samaroo * reenable blas test * go back to serialization stdlib * note on gvs * try using JITTargetMachine for the dynamic linker * try running CI on windows * do some code reorganization * refine warning * allow windows failures * add options to strip the llvm / asm code * remove `@showln` * add `pointer_patching_diff` function to quickly see the effect of `relocation_table!` * improve pointer relocation * consolidate logic and reduce segfaults during compilation * remove unnecessary stuff * don't duplicate global values, switch from Dict to IdDict for reloc * avoid re-creation of global variables * more tests * add dummy test for error handing * test with ErrorTypes.jl * bump version Co-authored-by: Julian Samaroo --- .github/workflows/ci.yml | 2 + Manifest.toml | 97 +++++++++++++++++++++- Project.toml | 3 +- README.md | 10 ++- src/StaticCompiler.jl | 132 ++++++++++------------------- src/code_loading.jl | 77 +++++++++++++++++ src/pointer_patching.jl | 174 +++++++++++++++++++++++++++++++++++++++ src/target.jl | 43 ++++++++++ test/Project.toml | 3 +- test/runtests.jl | 86 +++++++++++++++++-- 10 files changed, 523 insertions(+), 104 deletions(-) create mode 100644 src/code_loading.jl create mode 100644 src/pointer_patching.jl create mode 100644 src/target.jl diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 03f8476..0ea99ae 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,6 +6,7 @@ jobs: test: name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} runs-on: ${{ matrix.os }} + continue-on-error: ${{ matrix.os == 'windows-latest' }} strategy: fail-fast: false matrix: @@ -15,6 +16,7 @@ jobs: os: - ubuntu-latest - macOS-latest + - windows-latest arch: - x64 steps: diff --git a/Manifest.toml b/Manifest.toml index 43eef3e..a31f820 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -1,6 +1,6 @@ # This file is machine-generated - editing it directly is not advised -julia_version = "1.7.1" +julia_version = "1.7.2" manifest_format = "2.0" [[deps.ArgTools]] @@ -23,10 +23,34 @@ git-tree-sha1 = "8cf7e67e264dedc5d321ec87e78525e958aea057" uuid = "0ee61d77-7f21-5576-8119-9fcc46b10100" version = "12.0.1+3" +[[deps.Compat]] +deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] +git-tree-sha1 = "44c37b4636bc54afac5c574d2d02b625349d6582" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "3.41.0" + +[[deps.CompilerSupportLibraries_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" + +[[deps.DataStructures]] +deps = ["Compat", "InteractiveUtils", "OrderedCollections"] +git-tree-sha1 = "3daef5523dd2e769dad2365274f760ff5f282c7d" +uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +version = "0.18.11" + [[deps.Dates]] deps = ["Printf"] uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" +[[deps.DelimitedFiles]] +deps = ["Mmap"] +uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" + +[[deps.Distributed]] +deps = ["Random", "Serialization", "Sockets"] +uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" + [[deps.Downloads]] deps = ["ArgTools", "LibCURL", "NetworkOptions"] uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" @@ -36,6 +60,12 @@ git-tree-sha1 = "56559bbef6ca5ea0c0818fa5c90320398a6fbf8d" uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" version = "0.1.8" +[[deps.FileIO]] +deps = ["Pkg", "Requires", "UUIDs"] +git-tree-sha1 = "80ced645013a5dbdc52cf70329399c35ce007fae" +uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" +version = "1.13.0" + [[deps.GPUCompiler]] deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "TimerOutputs", "UUIDs"] git-tree-sha1 = "abd824e1f2ecd18d33811629c781441e94a24e81" @@ -46,6 +76,12 @@ version = "0.13.11" deps = ["Markdown"] uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +[[deps.JLD2]] +deps = ["DataStructures", "FileIO", "MacroTools", "Mmap", "Pkg", "Printf", "Reexport", "TranscodingStreams", "UUIDs"] +git-tree-sha1 = "b528d68220e2aba1d2d0c0461b6f7eda8c5c1e33" +uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819" +version = "0.4.20" + [[deps.JLLWrappers]] deps = ["Preferences"] git-tree-sha1 = "abc9885a7ca2052a736a600f7fa66209f96506e1" @@ -83,9 +119,19 @@ uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" [[deps.Libdl]] uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" +[[deps.LinearAlgebra]] +deps = ["Libdl", "libblastrampoline_jll"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + [[deps.Logging]] uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" +[[deps.MacroTools]] +deps = ["Markdown", "Random"] +git-tree-sha1 = "3d3e902b31198a27340d0bf00d6ac452866021cf" +uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +version = "0.5.9" + [[deps.Markdown]] deps = ["Base64"] uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" @@ -94,12 +140,24 @@ uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" deps = ["Artifacts", "Libdl"] uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" +[[deps.Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" + [[deps.MozillaCACerts_jll]] uuid = "14a3606d-f60d-562e-9121-12d972cd8159" [[deps.NetworkOptions]] uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" +[[deps.OpenBLAS_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] +uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" + +[[deps.OrderedCollections]] +git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c" +uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +version = "1.4.1" + [[deps.Pkg]] deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" @@ -122,15 +180,38 @@ uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" deps = ["SHA", "Serialization"] uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +[[deps.Reexport]] +git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" +uuid = "189a3867-3050-52da-a836-e630ba90ab69" +version = "1.2.2" + +[[deps.Requires]] +deps = ["UUIDs"] +git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7" +uuid = "ae029012-a4dd-5104-9daa-d747884805df" +version = "1.3.0" + [[deps.SHA]] uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" [[deps.Serialization]] uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" +[[deps.SharedArrays]] +deps = ["Distributed", "Mmap", "Random", "Serialization"] +uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" + [[deps.Sockets]] uuid = "6462fe0b-24de-5631-8697-dd941f90decc" +[[deps.SparseArrays]] +deps = ["LinearAlgebra", "Random"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + +[[deps.Statistics]] +deps = ["LinearAlgebra", "SparseArrays"] +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + [[deps.TOML]] deps = ["Dates"] uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" @@ -139,12 +220,22 @@ uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" deps = ["ArgTools", "SHA"] uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" +[[deps.Test]] +deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + [[deps.TimerOutputs]] deps = ["ExprTools", "Printf"] git-tree-sha1 = "97e999be94a7147d0609d0b9fc9feca4bf24d76b" uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" version = "0.5.15" +[[deps.TranscodingStreams]] +deps = ["Random", "Test"] +git-tree-sha1 = "216b95ea110b5972db65aa90f88d8d89dcb8851c" +uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" +version = "0.9.6" + [[deps.UUIDs]] deps = ["Random", "SHA"] uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" @@ -160,6 +251,10 @@ uuid = "83775a58-1f1d-513f-b197-d71354ab007a" deps = ["Artifacts", "Libdl"] uuid = "8f36deef-c2a5-5394-99ed-8e07531fb29a" +[[deps.libblastrampoline_jll]] +deps = ["Artifacts", "Libdl", "OpenBLAS_jll"] +uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" + [[deps.nghttp2_jll]] deps = ["Artifacts", "Libdl"] uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" diff --git a/Project.toml b/Project.toml index 5293f7b..ef7854a 100644 --- a/Project.toml +++ b/Project.toml @@ -1,11 +1,12 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short"] -version = "0.3.0" +version = "0.4.0" [deps] Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55" +JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" diff --git a/README.md b/README.md index e458edd..0c9d6d4 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,9 @@ This package uses the [GPUCompiler package](https://github.com/JuliaGPU/GPUCompi ## Limitations -* No heap allocations (e.g. creating an array or a string) are allowed inside a statically compiled function body. If you try to run such a function, you will get a segfault. -** It's sometimes possible you won't get a segfault if you define and run the function in the same session, but trying to call the compiled function in a new julia session will definitely segfault if you allocate memory. -* Doesn't currently work on Windows -* If you find any other limitations, let us know. There's probably lots. +* GC-tracked allocations and global variables do work with `compile`, but the way they are implemented is brittle and can be dangerous. Allocate with care. +* GC-tracked allocations and global varaibles do *not* work with `compile_executable` (yet). +* Type unstable code is not yet supported. +* Throwing errors is not currently supported. In the meantime, consider wrapping possible errors with [ErrorTypes.jl](https://github.com/jakobnissen/ErrorTypes.jl) +* Doesn't currently work on Windows. +* If you find any other limitations, let us know. There's probably lots. \ No newline at end of file diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index c3cd855..611bc92 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -8,12 +8,16 @@ using Serialization: serialize, deserialize using Clang_jll: clang export compile, load_function, compile_executable -export native_code_llvm, native_code_typed, native_llvm_module +export native_code_llvm, native_code_typed, native_llvm_module, native_code_native + +include("target.jl") +include("pointer_patching.jl") +include("code_loading.jl") """ compile(f, types, path::String = tempname()) --> (compiled_f, path) - !!! Warning: this will fail on programs that heap allocate any memory tracked by the GC, or have dynamic dispatch !!! + !!! Warning: this will fail on programs that have dynamic dispatch !!! Statically compile the method of a function `f` specialized to arguments of the type given by `types`. @@ -63,77 +67,38 @@ Here is the structure of the directory created by `compile` in the above example shell> tree fib path ├── obj.cjl -├── obj.o -└── obj.so +└── obj.o 0 directories, 3 files ```` -* `obj.so` (or `.dylib` on MacOS) is a shared object file that can be linked to in order to execute your -compiled julia function. +* `obj.o` contains statically compiled code in the form of an LLVM generated object file. * `obj.cjl` is a serialized `LazyStaticCompiledFunction` object which will be deserialized and instantiated with `load_function(path)`. `LazyStaticcompiledfunction`s contain the requisite information needed to link to the -`obj.so` inside a julia session. Once it is instantiated in a julia session (i.e. by +`obj.o` inside a julia session. Once it is instantiated in a julia session (i.e. by `instantiate(::LazyStaticCompiledFunction)`, this happens automatically in `load_function`), it will be of type `StaticCompiledFunction` and may be called with arguments of type `types` as if it were a function with a single method (the method determined by `types`). """ -function compile(f, _tt, path::String = tempname(); name = GPUCompiler.safe_name(repr(f)), kwargs...) +function compile(f, _tt, path::String = tempname(); name = GPUCompiler.safe_name(repr(f)), filename="obj", + strip_llvm = false, + strip_asm = true, + kwargs...) tt = Base.to_tuple_type(_tt) isconcretetype(tt) || error("input type signature $_tt is not concrete") rt = only(native_code_typed(f, tt))[2] isconcretetype(rt) || error("$f on $_tt did not infer to a concrete type. Got $rt") - - # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals - # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this - + f_wrap!(out::Ref, args::Ref{<:Tuple}) = (out[] = f(args[]...); nothing) - - generate_shlib(f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, path, name; kwargs...) - - lf = LazyStaticCompiledFunction{rt, tt}(Symbol(f), path, name) - cjl_path = joinpath(path, "obj.cjl") + _, _, table = generate_shlib(f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, path, name; strip_llvm, strip_asm, filename, kwargs...) + + lf = LazyStaticCompiledFunction{rt, tt}(Symbol(f), path, name, filename, table) + cjl_path = joinpath(path, "$filename.cjl") serialize(cjl_path, lf) - (; f = instantiate(lf), path=abspath(path)) -end - -""" - load_function(path) --> compiled_f - -load a `StaticCompiledFunction` from a given path. This object is callable. -""" -load_function(path) = instantiate(deserialize(joinpath(path, "obj.cjl")) :: LazyStaticCompiledFunction) -struct LazyStaticCompiledFunction{rt, tt} - f::Symbol - path::String - name::String -end - -function instantiate(p::LazyStaticCompiledFunction{rt, tt}) where {rt, tt} - StaticCompiledFunction{rt, tt}(p.f, generate_shlib_fptr(p.path::String, p.name)) -end - -struct StaticCompiledFunction{rt, tt} - f::Symbol - ptr::Ptr{Nothing} -end - -function Base.show(io::IO, f::StaticCompiledFunction{rt, tt}) where {rt, tt} - types = [tt.parameters...] - print(io, String(f.f), "(", join(("::$T" for T ∈ tt.parameters), ',') ,") :: $rt") -end - -function (f::StaticCompiledFunction{rt, tt})(args...) where {rt, tt} - Tuple{typeof.(args)...} == tt || error("Input types don't match compiled target $((tt.parameters...,)). Got arguments of type $(typeof.(args))") - out = RefValue{rt}() - refargs = Ref(args) - ccall(f.ptr, Nothing, (Ref{rt}, Ref{tt}), out, refargs) - out[] + (; f = instantiate(lf), path=abspath(path)) end -instantiate(f::StaticCompiledFunction) = f - """ ```julia @@ -208,31 +173,6 @@ function compile_executable(f, _tt=(), path::String="./", name=GPUCompiler.safe_ joinpath(abspath(path), filename) end - -module TestRuntime - # dummy methods - signal_exception() = return - # HACK: if malloc returns 0 or traps, all calling functions (like jl_box_*) - # get reduced to a trap, which really messes with our test suite. - malloc(sz) = Ptr{Cvoid}(Int(0xDEADBEEF)) - report_oom(sz) = return - report_exception(ex) = return - report_exception_name(ex) = return - report_exception_frame(idx, func, file, line) = return -end - -struct TestCompilerParams <: GPUCompiler.AbstractCompilerParams end -GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{<:Any,TestCompilerParams}) = TestRuntime - - -function native_job(@nospecialize(func), @nospecialize(types); kernel::Bool=false, name=GPUCompiler.safe_name(repr(func)), kwargs...) - source = GPUCompiler.FunctionSpec(func, Base.to_tuple_type(types), kernel, name) - target = GPUCompiler.NativeCompilerTarget(always_inline=true) - params = TestCompilerParams() - GPUCompiler.CompilerJob(target, source, params), kwargs -end - - """ ```julia generate_shlib(f, tt, path::String, name::String, filenamebase::String="obj"; kwargs...) @@ -271,23 +211,25 @@ julia> ccall(StaticCompiler.generate_shlib_fptr(path, name), Float64, (Int64,), 5.256496109495593 ``` """ -function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; kwargs...) +function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; + strip_llvm = false, + strip_asm = true, + kwargs...) mkpath(path) obj_path = joinpath(path, "$filenamebase.o") lib_path = joinpath(path, "$filenamebase.$(Libdl.dlext)") - open(obj_path, "w") do io - job, kwargs = native_job(f, tt; name, kwargs...) - obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) - write(io, obj) - flush(io) + job, kwargs = native_job(f, tt; name, kwargs...) + mod, meta = GPUCompiler.codegen(:llvm, job; strip=strip_llvm, only_entry=false, validate=false) - # Pick a Clang - cc = Sys.isapple() ? `cc` : clang() - # Compile! - run(`$cc -shared -o $lib_path $obj_path`) + table = relocation_table!(mod) + + obj, _ = GPUCompiler.emit_asm(job, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) + + open(obj_path, "w") do io + write(io, obj) end - path, name + path, name, table end @@ -420,4 +362,14 @@ function native_llvm_module(f, tt, name = GPUCompiler.safe_name(repr(f)); kwargs return m end +function native_code_native(@nospecialize(f), @nospecialize(tt), name = GPUCompiler.safe_name(repr(f)); kwargs...) + job, kwargs = native_job(f, tt; name, kwargs...) + GPUCompiler.code_native(stdout, job; kwargs...) +end + + + + + + end # module diff --git a/src/code_loading.jl b/src/code_loading.jl new file mode 100644 index 0000000..b720548 --- /dev/null +++ b/src/code_loading.jl @@ -0,0 +1,77 @@ +""" + load_function(path) --> compiled_f + +load a `StaticCompiledFunction` from a given path. This object is callable. +""" +function load_function(path; filename="obj") + instantiate(deserialize(joinpath(path, "$filename.cjl"))) +end + +struct LazyStaticCompiledFunction{rt, tt} + f::Symbol + path::String + name::String + filename::String + reloc::IdDict{Any,String} +end + +function instantiate(p::LazyStaticCompiledFunction{rt, tt}) where {rt, tt} + # LLVM.load_library_permantly(dirname(Libdl.dlpath(Libdl.dlopen("libjulia")))) + lljit = LLVM.LLJIT(;tm=LLVM.JITTargetMachine()) + jd = LLVM.JITDylib(lljit) + flags = LLVM.API.LLVMJITSymbolFlags(LLVM.API.LLVMJITSymbolGenericFlagsExported, 0) + ofile = LLVM.MemoryBufferFile(joinpath(p.path, "$(p.filename).o")) #$(Libdl.dlext) + + + # Set all the uninitialized global variables to point to julia values from the relocation table + for (val, name) ∈ p.reloc + if !ismutable(val) + # Sometimes Julia embeds functions like `Base.string` into code, and this doesn't have a pointer + # so we need to give it one manually, and put the ref in the dict to make sure it doesn't expire. + delete!(p.reloc, val) + val = Ref(val) + p.reloc[val] = name + end + address = LLVM.API.LLVMOrcJITTargetAddress(reinterpret(UInt, pointer_from_objref(val))) + + symbol = LLVM.API.LLVMJITEvaluatedSymbol(address, flags) + gv = LLVM.API.LLVMJITCSymbolMapPair(LLVM.mangle(lljit, name), symbol) + mu = absolute_symbols(Ref(gv)) + LLVM.define(jd, mu) + end + # consider switching to one mu for all gvs instead of one per gv. + # I tried that already, but I got an error saying + # JIT session error: Symbols not found: [ __Type_Vector_Float64___274 ] + + # Link to libjulia + prefix = LLVM.get_prefix(lljit) + dg = LLVM.CreateDynamicLibrarySearchGeneratorForProcess(prefix) + LLVM.add!(jd, dg) + + LLVM.add!(lljit, jd, ofile) + fptr = pointer(LLVM.lookup(lljit, "julia_" * p.name)) + + StaticCompiledFunction{rt, tt}(p.f, fptr, lljit, p.reloc) +end + +struct StaticCompiledFunction{rt, tt} + f::Symbol + ptr::Ptr{Nothing} + jit::LLVM.LLJIT + reloc::IdDict{Any, String} +end + +function Base.show(io::IO, f::StaticCompiledFunction{rt, tt}) where {rt, tt} + types = [tt.parameters...] + print(io, String(f.f), "(", join(("::$T" for T ∈ tt.parameters), ',') ,") :: $rt") +end + +function (f::StaticCompiledFunction{rt, tt})(args...) where {rt, tt} + Tuple{typeof.(args)...} == tt || error("Input types don't match compiled target $((tt.parameters...,)). Got arguments of type $(typeof.(args))") + out = RefValue{rt}() + refargs = Ref(args) + ccall(f.ptr, Nothing, (Ptr{rt}, Ref{tt}), pointer_from_objref(out), refargs) + out[] +end + +instantiate(f::StaticCompiledFunction) = f diff --git a/src/pointer_patching.jl b/src/pointer_patching.jl new file mode 100644 index 0000000..c882f8e --- /dev/null +++ b/src/pointer_patching.jl @@ -0,0 +1,174 @@ +function relocation_table!(mod) + i64 = LLVM.IntType(64; ctx=LLVM.context(mod)) + jl_t = LLVM.PointerType(LLVM.StructType(LLVM.LLVMType[]; ctx=LLVM.context(mod))) + d = IdDict{Any, Tuple{String, LLVM.GlobalVariable}}() + + for func ∈ LLVM.functions(mod), bb ∈ LLVM.blocks(func), inst ∈ LLVM.instructions(bb) + if isa(inst, LLVM.LoadInst) && occursin("inttoptr", string(inst)) + get_pointers!(d, mod, inst) + elseif isa(inst, LLVM.StoreInst) && occursin("inttoptr", string(inst)) + @debug "Relocating StoreInst" inst + get_pointers!(d, mod, inst) + elseif inst isa LLVM.RetInst && occursin("inttoptr", string(inst)) + @debug "Relocating RetInst" inst LLVM.operands(inst) + get_pointers!(d, mod, inst) + elseif isa(inst, LLVM.BitCastInst) && occursin("inttoptr", string(inst)) + @debug "Relocating BitCastInst" inst LLVM.operands(inst) + get_pointers!(d, mod, inst) + elseif isa(inst, LLVM.CallInst) + @debug "Relocating CallInst" inst LLVM.operands(inst) + dest = LLVM.called_value(inst) + if occursin("inttoptr", string(dest)) && length(LLVM.operands(dest)) > 0 + @debug "Relocating CallInst inttoptr" dest LLVM.operands(dest) LLVM.operands(inst) + ptr_arg = first(LLVM.operands(dest)) + ptr_val = convert(Int, ptr_arg) + ptr = Ptr{Cvoid}(ptr_val) + + frames = ccall(:jl_lookup_code_address, Any, (Ptr{Cvoid}, Cint,), ptr, 0) + if length(frames) >= 1 + fn, file, line, linfo, fromC, inlined = last(frames) + fn = string(fn) + if ptr == cglobal(:jl_alloc_array_1d) + fn = "jl_alloc_array_1d" + end + if ptr == cglobal(:jl_alloc_array_2d) + fn = "jl_alloc_array_2d" + end + if ptr == cglobal(:jl_alloc_array_3d) + fn = "jl_alloc_array_3d" + end + if ptr == cglobal(:jl_new_array) + fn = "jl_new_array" + end + if ptr == cglobal(:jl_array_copy) + fn = "jl_array_copy" + end + if ptr == cglobal(:jl_alloc_string) + fn = "jl_alloc_string" + end + if ptr == cglobal(:jl_in_threaded_region) + fn = "jl_in_threaded_region" + end + if ptr == cglobal(:jl_enter_threaded_region) + fn = "jl_enter_threaded_region" + end + if ptr == cglobal(:jl_exit_threaded_region) + fn = "jl_exit_threaded_region" + end + if ptr == cglobal(:jl_set_task_tid) + fn = "jl_set_task_tid" + end + if ptr == cglobal(:jl_new_task) + fn = "jl_new_task" + end + if ptr == cglobal(:malloc) + fn = "malloc" + end + if ptr == cglobal(:memmove) + fn = "memmove" + end + if ptr == cglobal(:jl_array_grow_beg) + fn = "jl_array_grow_beg" + end + if ptr == cglobal(:jl_array_grow_end) + fn = "jl_array_grow_end" + end + if ptr == cglobal(:jl_array_grow_at) + fn = "jl_array_grow_at" + end + if ptr == cglobal(:jl_array_del_beg) + fn = "jl_array_del_beg" + end + if ptr == cglobal(:jl_array_del_end) + fn = "jl_array_del_end" + end + if ptr == cglobal(:jl_array_del_at) + fn = "jl_array_del_at" + end + if ptr == cglobal(:jl_array_ptr) + fn = "jl_array_ptr" + end + if ptr == cglobal(:jl_value_ptr) + fn = "jl_value_ptr" + end + if ptr == cglobal(:jl_get_ptls_states) + fn = "jl_get_ptls_states" + end + if ptr == cglobal(:jl_gc_add_finalizer_th) + fn = "jl_gc_add_finalizer_th" + end + if ptr == cglobal(:jl_symbol_n) + fn = "jl_symbol_n" + end + end + + if length(fn) > 1 && fromC + mod = LLVM.parent(LLVM.parent(LLVM.parent(inst))) + lfn = LLVM.API.LLVMGetNamedFunction(mod, fn) + + if lfn == C_NULL + lfn = LLVM.API.LLVMAddFunction(mod, fn, LLVM.API.LLVMGetCalledFunctionType(inst)) + else + lfn = LLVM.API.LLVMConstBitCast(lfn, LLVM.PointerType(LLVM.FunctionType(LLVM.API.LLVMGetCalledFunctionType(inst)))) + end + LLVM.API.LLVMSetOperand(inst, LLVM.API.LLVMGetNumOperands(inst)-1, lfn) + end + end + get_pointers!(d, mod, inst) + end + end + IdDict{Any, String}(val => name for (val, (name, _)) ∈ d) +end + +function get_pointers!(d, mod, inst) + jl_t = LLVM.PointerType(LLVM.StructType(LLVM.LLVMType[]; ctx=LLVM.context(mod))) + for (i, arg) ∈ enumerate(LLVM.operands(inst)) + if occursin("inttoptr", string(arg)) && arg isa LLVM.ConstantExpr + op1 = LLVM.Value(LLVM.API.LLVMGetOperand(arg, 0)) + ptr = Ptr{Cvoid}(convert(Int, op1)) + + frames = ccall(:jl_lookup_code_address, Any, (Ptr{Cvoid}, Cint,), ptr, 0) + if length(frames) >= 1 + fn, file, line, linfo, fromC, inlined = last(frames) + if isempty(String(fn)) || fn == :jl_system_image_data + val = unsafe_pointer_to_objref(ptr) + if val ∈ keys(d) + _, gv = d[val] + LLVM.API.LLVMSetOperand(inst, i-1, gv) + else + gv_name = GPUCompiler.safe_name(String(gensym(repr(Core.Typeof(val))))) + gv = LLVM.GlobalVariable(mod, jl_t, gv_name) + LLVM.extinit!(gv, true) + LLVM.API.LLVMSetOperand(inst, i-1, gv) + + d[val] = (gv_name, gv) + end + else + @warn "Found data we don't know how to relocate." frames + end + end + end + end +end + +function absolute_symbols(symbols) + ref = LLVM.API.LLVMOrcAbsoluteSymbols(symbols, length(symbols)) + LLVM.MaterializationUnit(ref) +end + +function pointer_patching_diff(mod::LLVM.Module, path1=tempname(), path2=tempname(); show_reloc_table=false) + s1 = string(mod) + write(path1, s1) + + d = StaticCompiler.relocation_table!(mod) + if show_reloc_table + @show d + end + + s2 = string(mod) + write(path2, s2) + + run(`diff $p1 $p2`) +end + + diff --git a/src/target.jl b/src/target.jl new file mode 100644 index 0000000..3b1c623 --- /dev/null +++ b/src/target.jl @@ -0,0 +1,43 @@ +Base.@kwdef struct NativeCompilerTarget <: GPUCompiler.AbstractCompilerTarget + cpu::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUName()) + features::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUFeatures()) +end + +GPUCompiler.llvm_triple(::NativeCompilerTarget) = Sys.MACHINE + +function GPUCompiler.llvm_machine(target::NativeCompilerTarget) + triple = GPUCompiler.llvm_triple(target) + + t = LLVM.Target(triple=triple) + + tm = LLVM.TargetMachine(t, triple, target.cpu, target.features, reloc=LLVM.API.LLVMRelocPIC) + GPUCompiler.asm_verbosity!(tm, true) + + return tm +end + +GPUCompiler.runtime_slug(job::GPUCompiler.CompilerJob{NativeCompilerTarget}) = "native_$(job.target.cpu)-$(hash(job.target.features))" + +module StaticRuntime + # dummy methods + signal_exception() = return + # HACK: if malloc returns 0 or traps, all calling functions (like jl_box_*) + # get reduced to a trap, which really messes with our test suite. + malloc(sz) = Ptr{Cvoid}(Int(0xDEADBEEF)) + report_oom(sz) = return + report_exception(ex) = return + report_exception_name(ex) = return + report_exception_frame(idx, func, file, line) = return +end + +struct StaticCompilerParams <: GPUCompiler.AbstractCompilerParams end + +GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{<:Any,StaticCompilerParams}) = StaticRuntime +GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{NativeCompilerTarget}) = true + +function native_job(@nospecialize(func), @nospecialize(types); kernel::Bool=false, name=GPUCompiler.safe_name(repr(func)), kwargs...) + source = GPUCompiler.FunctionSpec(func, Base.to_tuple_type(types), kernel, name) + target = NativeCompilerTarget() + params = StaticCompilerParams() + GPUCompiler.CompilerJob(target, source, params), kwargs +end diff --git a/test/Project.toml b/test/Project.toml index b0cf1dc..b00d649 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -7,4 +7,5 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" ManualMemory = "d125e4d3-2237-4719-b19c-fa641b8a4667" StrideArraysCore = "7792a7ef-975c-4747-a70f-980b88e8d1da" -Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" \ No newline at end of file +Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" +ErrorTypes = "7f495686-c6d2-4c77-9e8e-e4c865675f9d" \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 3201dbe..74b240f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -6,12 +6,14 @@ using LoopVectorization using ManualMemory using StrideArraysCore using Distributed +using ErrorTypes addprocs(1) -@everywhere using StaticCompiler +@everywhere using StaticCompiler, ErrorTypes remote_load_call(path, args...) = fetch(@spawnat 2 load_function(path)(args...)) + @testset "Basics" begin simple_sum(x) = x + one(typeof(x)) @@ -107,10 +109,63 @@ end _, path = compile(array_sum, (Int, Vector{T})) @test remote_load_call(path, 10, T.(1:10)) == T(55) end +end + +@testset "Array allocations" begin + function f(N) + v = Vector{Float64}(undef, N) + for i ∈ eachindex(v) + v[i] = i*i + end + v + end + _, path = compile(f, (Int,)) + @test remote_load_call(path, 5) == [1.0, 4.0, 9.0, 16.0, 25.0] +end + +# This is also a good test of loading and storing from the same object +@testset "Load & Store Same object" begin + global const x = Ref(0) + counter() = x[] += 1 + _, path = compile(counter, ()) + @spawnat 2 global counter = load_function(path) + @test fetch(@spawnat 2 counter()) == 1 + @test fetch(@spawnat 2 counter()) == 2 +end + +# This is also a good test of loading and storing from the same object +counter = let x = Ref(0) + () -> x[] += 1 +end +@testset "Closures" begin + #this currently segfaults during compilation + @test_skip begin + _, path = compile(counter, ()) + @spawnat 2 global counter_comp = load_function(path) + @test fetch(@spawnat 2 counter_comp()) == 1 + @test fetch(@spawnat 2 counter_comp()) == 2 + end +end + +@testset "Error handling" begin + # Doesn't work yet. Probably need the slow ABI :( + @test_skip begin + _, sqrt_path = compile(sqrt, (Int,)) + @test_throws DomainError remote_load_call(sqrt_path, -1) + end +end + - # @test (10, Int.(1:10)) == 55 - # @test compile(array_sum, (Int, Vector{Complex{Float32}}))[1](10, Complex{Float32}.(1:10)) == 55f0 + 0f0im - # @test compile(array_sum, (Int, Vector{Complex{Float64}}))[1](10, Complex{Float64}.(1:10)) == 55f0 + 0f0im +@testset "ErrorTypes handling" begin + function try_sqrt(x) :: Result{Float64, Nothing} + if x >= 0.0 + Ok(sqrt(x)) + else + Err(nothing) + end + end + _, sqrt_path = compile(try_sqrt, (Int,)) + @test remote_load_call(sqrt_path, -1) == none(Float64) end @@ -132,12 +187,25 @@ end BLAS.dot(N, a, 1, a, 1) end a = [1.0, 2.0] - mydot_compiled, path = compile(mydot, (Vector{Float64},)) - @test_skip remote_load_call(path, a) == 5.0 # this needs a relocatable pointer to work - @test mydot_compiled(a) == 5.0 + + # This used to work within a session, but now that I'm doing pointer relocation a bit better, + # it's chocking on the `BLAS` pointer call. I'm not sure yet how to relocate this properly. + @test_skip begin + mydot_compiled, path = compile(mydot, (Vector{Float64},)) + @test remote_load_call(path, a) == 5.0 + @test mydot_compiled(a) == 5.0 + end end +@testset "Strings" begin + function hello(name) + "Hello, " * name * "!" + end + hello_compiled, path = compile(hello, (String,)) + @test remote_load_call(path, "world") == "Hello, world!" +end + @testset "Hello World" begin function hello(N) println("Hello World $N") @@ -171,6 +239,8 @@ end @test C ≈ A*B end + + # This is a trick to get stack allocated arrays inside a function body (so long as they don't escape). # This lets us have intermediate, mutable stack allocated arrays inside our @testset "Alloca" begin @@ -236,4 +306,6 @@ end @test r.exitcode == 0 end end + + # data structures, dictionaries, tuples, named tuples From f85891a3081d849a1915562d4642250ae37a617b Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Sat, 12 Feb 2022 09:56:42 -0700 Subject: [PATCH 052/159] Fixup; get errors working (#61) * tell llvm the correct types of objects; verify the IR after after modification * Update src/code_loading.jl Co-authored-by: Valentin Churavy * use julia's optimizer, enable tests for error handling * make ManualMemory and StrideArraysCore available on other process * run optimization twice so that reloc can happen on optimized code * grab `jl_add_optimization_passes` from `libjulia-codegen` on newer versions * don't test on windows for now, it's pointless * remove unnecessary func * add some comments * reorganize * catch an error pathway Co-authored-by: Valentin Churavy --- .github/workflows/ci.yml | 2 - README.md | 1 - src/StaticCompiler.jl | 153 +++++++++++++++++++++++---------------- src/code_loading.jl | 24 ++++-- src/pointer_patching.jl | 26 ++++--- src/target.jl | 11 ++- test/Project.toml | 3 +- test/runtests.jl | 66 +++++++---------- 8 files changed, 158 insertions(+), 128 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0ea99ae..03f8476 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,7 +6,6 @@ jobs: test: name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} runs-on: ${{ matrix.os }} - continue-on-error: ${{ matrix.os == 'windows-latest' }} strategy: fail-fast: false matrix: @@ -16,7 +15,6 @@ jobs: os: - ubuntu-latest - macOS-latest - - windows-latest arch: - x64 steps: diff --git a/README.md b/README.md index 0c9d6d4..56e0368 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,5 @@ This package uses the [GPUCompiler package](https://github.com/JuliaGPU/GPUCompi * GC-tracked allocations and global variables do work with `compile`, but the way they are implemented is brittle and can be dangerous. Allocate with care. * GC-tracked allocations and global varaibles do *not* work with `compile_executable` (yet). * Type unstable code is not yet supported. -* Throwing errors is not currently supported. In the meantime, consider wrapping possible errors with [ErrorTypes.jl](https://github.com/jakobnissen/ErrorTypes.jl) * Doesn't currently work on Windows. * If you find any other limitations, let us know. There's probably lots. \ No newline at end of file diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 611bc92..49975ad 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -1,8 +1,10 @@ module StaticCompiler using GPUCompiler: GPUCompiler -using LLVM: LLVM -using Libdl: Libdl +using LLVM +using LLVM.Interop +using LLVM: API +using Libdl: Libdl, dlsym, dlopen using Base: RefValue using Serialization: serialize, deserialize using Clang_jll: clang @@ -82,6 +84,7 @@ single method (the method determined by `types`). function compile(f, _tt, path::String = tempname(); name = GPUCompiler.safe_name(repr(f)), filename="obj", strip_llvm = false, strip_asm = true, + opt_level=3, kwargs...) tt = Base.to_tuple_type(_tt) isconcretetype(tt) || error("input type signature $_tt is not concrete") @@ -90,7 +93,7 @@ function compile(f, _tt, path::String = tempname(); name = GPUCompiler.safe_nam isconcretetype(rt) || error("$f on $_tt did not infer to a concrete type. Got $rt") f_wrap!(out::Ref, args::Ref{<:Tuple}) = (out[] = f(args[]...); nothing) - _, _, table = generate_shlib(f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, path, name; strip_llvm, strip_asm, filename, kwargs...) + _, _, table = generate_shlib(f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, path, name; opt_level, strip_llvm, strip_asm, filename, kwargs...) lf = LazyStaticCompiledFunction{rt, tt}(Symbol(f), path, name, filename, table) cjl_path = joinpath(path, "$filename.cjl") @@ -99,6 +102,92 @@ function compile(f, _tt, path::String = tempname(); name = GPUCompiler.safe_nam (; f = instantiate(lf), path=abspath(path)) end +""" +```julia +generate_shlib(f, tt, path::String, name::String, filenamebase::String="obj"; kwargs...) +``` +Low level interface for compiling a shared object / dynamically loaded library + (`.so` / `.dylib`) for function `f` given a tuple type `tt` characterizing +the types of the arguments for which the function will be compiled. + +See also `StaticCompiler.generate_shlib_fptr`. + +### Examples +```julia +julia> function test(n) + r = 0.0 + for i=1:n + r += log(sqrt(i)) + end + return r/n + end +test (generic function with 1 method) + +julia> path, name = StaticCompiler.generate_shlib(test, Tuple{Int64}, "./test") +("./test", "test") + +shell> tree \$path +./test +|-- obj.o +`-- obj.so + +0 directories, 2 files + +julia> test(100_000) +5.256496109495593 + +julia> ccall(StaticCompiler.generate_shlib_fptr(path, name), Float64, (Int64,), 100_000) +5.256496109495593 +``` +""" +function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; + strip_llvm = false, + strip_asm = true, + opt_level=3, + kwargs...) + mkpath(path) + obj_path = joinpath(path, "$filenamebase.o") + lib_path = joinpath(path, "$filenamebase.$(Libdl.dlext)") + + job, kwargs = native_job(f, tt; name, kwargs...) + #Get LLVM to generated a module of code for us. We don't want GPUCompiler's optimization passes. + mod, meta = GPUCompiler.codegen(:llvm, job; strip=strip_llvm, only_entry=false, validate=false, optimize=false) + #use Julia's optimization pass on the LLVM code, but leave intrinsics alone + julia_opt_passes(mod, job; opt_level, lower_intrinsics=0) + # Scoop up all the pointers in the optimized module, and replace them with unitialized global variables. + # table is a dictionary where the keys are julia objects that are needed by the function, and the values + # of the dictionary are the names of their associated LLVM GlobalVariable names. + table = relocation_table!(mod) + # Now that we've removed all the pointers from the code, we can (hopefully) safely lower all the instrinsics + julia_opt_passes(mod, job; opt_level, lower_intrinsics=1) + # Make sure we didn't make any glaring errors + LLVM.verify(mod) + # Compile the LLVM module to native code and save it to disk + obj, _ = GPUCompiler.emit_asm(job, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) + open(obj_path, "w") do io + write(io, obj) + end + path, name, table +end + +function julia_opt_passes(mod, job; opt_level, lower_intrinsics) + triple = GPUCompiler.llvm_triple(job.target) + tm = GPUCompiler.llvm_machine(job.target) + + lib_path = VERSION > v"1.8.0-DEV" ? "libjulia-codegen" : "libjulia" + + dlopen(lib_path) do lib + opt_func = dlsym(lib, "jl_add_optimization_passes") + ModulePassManager() do pm + add_library_info!(pm, triple) + add_transform_info!(pm, tm) + ccall(opt_func, Cvoid, + (LLVM.API.LLVMPassManagerRef, Cint, Cint), + pm, opt_level, lower_intrinsics) + run!(pm, mod) + end + end +end """ ```julia @@ -173,64 +262,6 @@ function compile_executable(f, _tt=(), path::String="./", name=GPUCompiler.safe_ joinpath(abspath(path), filename) end -""" -```julia -generate_shlib(f, tt, path::String, name::String, filenamebase::String="obj"; kwargs...) -``` -Low level interface for compiling a shared object / dynamically loaded library - (`.so` / `.dylib`) for function `f` given a tuple type `tt` characterizing -the types of the arguments for which the function will be compiled. - -See also `StaticCompiler.generate_shlib_fptr`. - -### Examples -```julia -julia> function test(n) - r = 0.0 - for i=1:n - r += log(sqrt(i)) - end - return r/n - end -test (generic function with 1 method) - -julia> path, name = StaticCompiler.generate_shlib(test, Tuple{Int64}, "./test") -("./test", "test") - -shell> tree \$path -./test -|-- obj.o -`-- obj.so - -0 directories, 2 files - -julia> test(100_000) -5.256496109495593 - -julia> ccall(StaticCompiler.generate_shlib_fptr(path, name), Float64, (Int64,), 100_000) -5.256496109495593 -``` -""" -function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; - strip_llvm = false, - strip_asm = true, - kwargs...) - mkpath(path) - obj_path = joinpath(path, "$filenamebase.o") - lib_path = joinpath(path, "$filenamebase.$(Libdl.dlext)") - - job, kwargs = native_job(f, tt; name, kwargs...) - mod, meta = GPUCompiler.codegen(:llvm, job; strip=strip_llvm, only_entry=false, validate=false) - - table = relocation_table!(mod) - - obj, _ = GPUCompiler.emit_asm(job, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) - - open(obj_path, "w") do io - write(io, obj) - end - path, name, table -end function generate_shlib_fptr(f, tt, path::String=tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; temp::Bool=true, kwargs...) diff --git a/src/code_loading.jl b/src/code_loading.jl index b720548..931e6a5 100644 --- a/src/code_loading.jl +++ b/src/code_loading.jl @@ -15,6 +15,15 @@ struct LazyStaticCompiledFunction{rt, tt} reloc::IdDict{Any,String} end +""" + unsafe_pointer_from_objref(x) + +Sometimes Julia embeds immutables like `Base.string` into code, and julia +will error if you call `pointer_from_objref(string)`, claiming that it +doesn't have a pointer even though that's a lie. +""" +unsafe_pointer_from_objref(x) = ccall(:jl_value_ptr, Ptr{Cvoid}, (Any,), x) + function instantiate(p::LazyStaticCompiledFunction{rt, tt}) where {rt, tt} # LLVM.load_library_permantly(dirname(Libdl.dlpath(Libdl.dlopen("libjulia")))) lljit = LLVM.LLJIT(;tm=LLVM.JITTargetMachine()) @@ -25,14 +34,7 @@ function instantiate(p::LazyStaticCompiledFunction{rt, tt}) where {rt, tt} # Set all the uninitialized global variables to point to julia values from the relocation table for (val, name) ∈ p.reloc - if !ismutable(val) - # Sometimes Julia embeds functions like `Base.string` into code, and this doesn't have a pointer - # so we need to give it one manually, and put the ref in the dict to make sure it doesn't expire. - delete!(p.reloc, val) - val = Ref(val) - p.reloc[val] = name - end - address = LLVM.API.LLVMOrcJITTargetAddress(reinterpret(UInt, pointer_from_objref(val))) + address = LLVM.API.LLVMOrcJITTargetAddress(reinterpret(UInt, unsafe_pointer_from_objref(val))) symbol = LLVM.API.LLVMJITEvaluatedSymbol(address, flags) gv = LLVM.API.LLVMJITCSymbolMapPair(LLVM.mangle(lljit, name), symbol) @@ -54,6 +56,12 @@ function instantiate(p::LazyStaticCompiledFunction{rt, tt}) where {rt, tt} StaticCompiledFunction{rt, tt}(p.f, fptr, lljit, p.reloc) end +function absolute_symbols(symbols) + ref = LLVM.API.LLVMOrcAbsoluteSymbols(symbols, length(symbols)) + LLVM.MaterializationUnit(ref) +end + + struct StaticCompiledFunction{rt, tt} f::Symbol ptr::Ptr{Nothing} diff --git a/src/pointer_patching.jl b/src/pointer_patching.jl index c882f8e..18be73e 100644 --- a/src/pointer_patching.jl +++ b/src/pointer_patching.jl @@ -1,6 +1,5 @@ function relocation_table!(mod) i64 = LLVM.IntType(64; ctx=LLVM.context(mod)) - jl_t = LLVM.PointerType(LLVM.StructType(LLVM.LLVMType[]; ctx=LLVM.context(mod))) d = IdDict{Any, Tuple{String, LLVM.GlobalVariable}}() for func ∈ LLVM.functions(mod), bb ∈ LLVM.blocks(func), inst ∈ LLVM.instructions(bb) @@ -121,23 +120,26 @@ function relocation_table!(mod) end function get_pointers!(d, mod, inst) - jl_t = LLVM.PointerType(LLVM.StructType(LLVM.LLVMType[]; ctx=LLVM.context(mod))) + jl_t = (LLVM.StructType(LLVM.LLVMType[]; ctx=LLVM.context(mod))) for (i, arg) ∈ enumerate(LLVM.operands(inst)) if occursin("inttoptr", string(arg)) && arg isa LLVM.ConstantExpr op1 = LLVM.Value(LLVM.API.LLVMGetOperand(arg, 0)) + if op1 isa LLVM.ConstantExpr + op1 = LLVM.Value(LLVM.API.LLVMGetOperand(op1, 0)) + end ptr = Ptr{Cvoid}(convert(Int, op1)) - frames = ccall(:jl_lookup_code_address, Any, (Ptr{Cvoid}, Cint,), ptr, 0) if length(frames) >= 1 fn, file, line, linfo, fromC, inlined = last(frames) - if isempty(String(fn)) || fn == :jl_system_image_data - val = unsafe_pointer_to_objref(ptr) + if (isempty(String(fn)) && isempty(String(file))) || fn == :jl_system_image_data + val = unsafe_pointer_to_objref(ptr) if val ∈ keys(d) _, gv = d[val] LLVM.API.LLVMSetOperand(inst, i-1, gv) else gv_name = GPUCompiler.safe_name(String(gensym(repr(Core.Typeof(val))))) - gv = LLVM.GlobalVariable(mod, jl_t, gv_name) + gv = LLVM.GlobalVariable(mod, llvmeltype(arg), gv_name, LLVM.addrspace(llvmtype(arg))) + LLVM.extinit!(gv, true) LLVM.API.LLVMSetOperand(inst, i-1, gv) @@ -151,10 +153,7 @@ function get_pointers!(d, mod, inst) end end -function absolute_symbols(symbols) - ref = LLVM.API.LLVMOrcAbsoluteSymbols(symbols, length(symbols)) - LLVM.MaterializationUnit(ref) -end +llvmeltype(x::LLVM.Value) = eltype(LLVM.llvmtype(x)) function pointer_patching_diff(mod::LLVM.Module, path1=tempname(), path2=tempname(); show_reloc_table=false) s1 = string(mod) @@ -168,7 +167,12 @@ function pointer_patching_diff(mod::LLVM.Module, path1=tempname(), path2=tempnam s2 = string(mod) write(path2, s2) - run(`diff $p1 $p2`) + try + # this always ends in an error for me for some reason + run(`diff $path1 $path2`) + catch e; + nothing + end end diff --git a/src/target.jl b/src/target.jl index 3b1c623..2efb191 100644 --- a/src/target.jl +++ b/src/target.jl @@ -19,11 +19,9 @@ end GPUCompiler.runtime_slug(job::GPUCompiler.CompilerJob{NativeCompilerTarget}) = "native_$(job.target.cpu)-$(hash(job.target.features))" module StaticRuntime - # dummy methods + # the runtime library signal_exception() = return - # HACK: if malloc returns 0 or traps, all calling functions (like jl_box_*) - # get reduced to a trap, which really messes with our test suite. - malloc(sz) = Ptr{Cvoid}(Int(0xDEADBEEF)) + malloc(sz) = ccall("extern malloc", llvmcall, Csize_t, (Csize_t,), sz) report_oom(sz) = return report_exception(ex) = return report_exception_name(ex) = return @@ -33,6 +31,11 @@ end struct StaticCompilerParams <: GPUCompiler.AbstractCompilerParams end GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{<:Any,StaticCompilerParams}) = StaticRuntime +GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{NativeCompilerTarget}) = StaticRuntime +GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{NativeCompilerTarget, StaticCompilerParams}) = StaticRuntime + +GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{<:Any,StaticCompilerParams}) = true +GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{NativeCompilerTarget, StaticCompilerParams}) = true GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{NativeCompilerTarget}) = true function native_job(@nospecialize(func), @nospecialize(types); kernel::Bool=false, name=GPUCompiler.safe_name(repr(func)), kwargs...) diff --git a/test/Project.toml b/test/Project.toml index b00d649..b0cf1dc 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -7,5 +7,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" ManualMemory = "d125e4d3-2237-4719-b19c-fa641b8a4667" StrideArraysCore = "7792a7ef-975c-4747-a70f-980b88e8d1da" -Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" -ErrorTypes = "7f495686-c6d2-4c77-9e8e-e4c865675f9d" \ No newline at end of file +Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 74b240f..3bd9f0c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,16 +4,13 @@ using Libdl using LinearAlgebra using LoopVectorization using ManualMemory -using StrideArraysCore using Distributed -using ErrorTypes - +using StrideArraysCore addprocs(1) -@everywhere using StaticCompiler, ErrorTypes +@everywhere using StaticCompiler, StrideArraysCore remote_load_call(path, args...) = fetch(@spawnat 2 load_function(path)(args...)) - @testset "Basics" begin simple_sum(x) = x + one(typeof(x)) @@ -147,28 +144,19 @@ end end end -@testset "Error handling" begin - # Doesn't work yet. Probably need the slow ABI :( - @test_skip begin - _, sqrt_path = compile(sqrt, (Int,)) - @test_throws DomainError remote_load_call(sqrt_path, -1) - end -end - -@testset "ErrorTypes handling" begin - function try_sqrt(x) :: Result{Float64, Nothing} - if x >= 0.0 - Ok(sqrt(x)) - else - Err(nothing) +@testset "Error handling" begin + _, path = compile(sqrt, (Int,)) + tsk = @spawnat 2 begin + try + load_function(path)(-1) + catch e; + e end end - _, sqrt_path = compile(try_sqrt, (Int,)) - @test remote_load_call(sqrt_path, -1) == none(Float64) + @test fetch(tsk) isa DomainError end - # Julia wants to treat Tuple (and other things like it) as plain bits, but LLVM wants to treat it as something with a pointer. # We need to be careful to not send, nor receive an unwrapped Tuple to a compiled function. # The interface made in `compile` should handle this fine. @@ -216,6 +204,23 @@ end @test_skip ccall(generate_shlib_fptr(hello, (Int,)), Int, (Int,), 1) == 1 end +# This is a trick to get stack allocated arrays inside a function body (so long as they don't escape). +# This lets us have intermediate, mutable stack allocated arrays inside our +@testset "Alloca" begin + function f(N) + # this can hold at most 100 Int values, if you use it for more, you'll segfault + buf = ManualMemory.MemoryBuffer{100, Int}(undef) + GC.@preserve buf begin + # wrap the first N values in a PtrArray + arr = PtrArray(pointer(buf), (N,)) + arr .= 1 # mutate the array to be all 1s + sum(arr) # compute the sum. It is very imporatant that no references to arr escape the function body + end + end + _, path = compile(f, (Int,)) + @test remote_load_call(path, 20) == 20 +end + # I can't beleive this works. @testset "LoopVectorization" begin function mul!(C, A, B) @@ -241,23 +246,6 @@ end -# This is a trick to get stack allocated arrays inside a function body (so long as they don't escape). -# This lets us have intermediate, mutable stack allocated arrays inside our -@testset "Alloca" begin - function f(N) - # this can hold at most 100 Int values, if you use it for more, you'll segfault - buf = ManualMemory.MemoryBuffer{100, Int}(undef) - GC.@preserve buf begin - # wrap the first N values in a PtrArray - arr = PtrArray(pointer(buf), (N,)) - arr .= 1 # mutate the array to be all 1s - sum(arr) # compute the sum. It is very imporatant that no references to arr escape the function body - end - end - _, path = compile(f, (Int,)) - @test remote_load_call(path, 20) == 20 -end - @testset "Standalone Executables" begin # Minimal test with no `llvmcall` @inline function foo() From 998feecc1c925b4f279f6eb69c8c305ba001c197 Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Mon, 14 Feb 2022 16:20:34 -0700 Subject: [PATCH 053/159] Use Enzyme's optimization and late lowering mechanisms (#63) * Use Enzyme's optimization and late lowering mechanisms * BLAS calls work again * update README * add another test case * re-enable the `@test_skip` on remotely calling BLAS --- README.md | 10 +- src/StaticCompiler.jl | 38 ++--- src/optimize.jl | 326 ++++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 22 +-- 4 files changed, 359 insertions(+), 37 deletions(-) create mode 100644 src/optimize.jl diff --git a/README.md b/README.md index 56e0368..48509ee 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,9 @@ # StaticCompiler -[![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://tshort.github.io/StaticCompiler.jl/dev) -[![Build Status](https://travis-ci.com/tshort/StaticCompiler.jl.svg?branch=master)](https://travis-ci.com/tshort/StaticCompiler.jl) -[![Build Status](https://ci.appveyor.com/api/projects/status/github/tshort/StaticCompiler.jl?svg=true)](https://ci.appveyor.com/project/tshort/StaticCompiler-jl) +[![CI](https://github.com/tshort/StaticCompiler.jl/actions/workflows/ci.yml/badge.svg)](https://github.com/tshort/StaticCompiler.jl/actions/workflows/ci.yml) [![Codecov](https://codecov.io/gh/tshort/StaticCompiler.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/tshort/StaticCompiler.jl) -[![Coveralls](https://coveralls.io/repos/github/tshort/StaticCompiler.jl/badge.svg?branch=master)](https://coveralls.io/github/tshort/StaticCompiler.jl?branch=master) -This is an experimental package to compile Julia code to standalone libraries. A system image is not needed. It is also meant for cross compilation, so Julia code can be compiled for other targets, including WebAssembly and embedded targets. +This is an experimental package to compile Julia code to standalone libraries. A system image is not needed. ## Installation and Usage @@ -41,6 +38,9 @@ julia> fib_compiled(10) 55 ``` +See the file `tests/runtests.jl` for some examples of functions that work with static compilation (and some that don't, +marked with `@test_skip`) + ## Approach This package uses the [GPUCompiler package](https://github.com/JuliaGPU/GPUCompiler.jl) to generate code. diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 49975ad..686e6b6 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -15,6 +15,8 @@ export native_code_llvm, native_code_typed, native_llvm_module, native_code_nati include("target.jl") include("pointer_patching.jl") include("code_loading.jl") +include("optimize.jl") + """ compile(f, types, path::String = tempname()) --> (compiled_f, path) @@ -148,20 +150,27 @@ function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.saf mkpath(path) obj_path = joinpath(path, "$filenamebase.o") lib_path = joinpath(path, "$filenamebase.$(Libdl.dlext)") - + tm = GPUCompiler.llvm_machine(NativeCompilerTarget()) job, kwargs = native_job(f, tt; name, kwargs...) #Get LLVM to generated a module of code for us. We don't want GPUCompiler's optimization passes. mod, meta = GPUCompiler.codegen(:llvm, job; strip=strip_llvm, only_entry=false, validate=false, optimize=false) - #use Julia's optimization pass on the LLVM code, but leave intrinsics alone - julia_opt_passes(mod, job; opt_level, lower_intrinsics=0) + + # Use Enzyme's annotation and optimization pipeline + annotate!(mod) + optimize!(mod, tm) + # Scoop up all the pointers in the optimized module, and replace them with unitialized global variables. - # table is a dictionary where the keys are julia objects that are needed by the function, and the values + # `table` is a dictionary where the keys are julia objects that are needed by the function, and the values # of the dictionary are the names of their associated LLVM GlobalVariable names. table = relocation_table!(mod) + # Now that we've removed all the pointers from the code, we can (hopefully) safely lower all the instrinsics - julia_opt_passes(mod, job; opt_level, lower_intrinsics=1) + # (again, using Enzyme's pipeline) + post_optimize!(mod, tm) + # Make sure we didn't make any glaring errors LLVM.verify(mod) + # Compile the LLVM module to native code and save it to disk obj, _ = GPUCompiler.emit_asm(job, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) open(obj_path, "w") do io @@ -170,25 +179,6 @@ function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.saf path, name, table end -function julia_opt_passes(mod, job; opt_level, lower_intrinsics) - triple = GPUCompiler.llvm_triple(job.target) - tm = GPUCompiler.llvm_machine(job.target) - - lib_path = VERSION > v"1.8.0-DEV" ? "libjulia-codegen" : "libjulia" - - dlopen(lib_path) do lib - opt_func = dlsym(lib, "jl_add_optimization_passes") - ModulePassManager() do pm - add_library_info!(pm, triple) - add_transform_info!(pm, tm) - ccall(opt_func, Cvoid, - (LLVM.API.LLVMPassManagerRef, Cint, Cint), - pm, opt_level, lower_intrinsics) - run!(pm, mod) - end - end -end - """ ```julia compile_executable(f, types::Tuple, path::String, name::String=repr(f); filename::String=name, kwargs...) diff --git a/src/optimize.jl b/src/optimize.jl new file mode 100644 index 0000000..f2934a1 --- /dev/null +++ b/src/optimize.jl @@ -0,0 +1,326 @@ +# stolen from https://github.com/EnzymeAD/Enzyme.jl/blob/1b187cc16953727cab26b64bc6a6dcf106c29a57/src/compiler/optimize.jl#L213 + +function optimize!(mod::LLVM.Module, tm) + # everying except unroll, slpvec, loop-vec + # then finish Julia GC + ModulePassManager() do pm + add_library_info!(pm, triple(mod)) + add_transform_info!(pm, tm) + + propagate_julia_addrsp!(pm) + scoped_no_alias_aa!(pm) + type_based_alias_analysis!(pm) + basic_alias_analysis!(pm) + cfgsimplification!(pm) + dce!(pm) +@static if isdefined(GPUCompiler, :cpu_features!) + GPUCompiler.cpu_features!(pm) +end + scalar_repl_aggregates_ssa!(pm) # SSA variant? + mem_cpy_opt!(pm) + always_inliner!(pm) + alloc_opt!(pm) + instruction_combining!(pm) + cfgsimplification!(pm) + scalar_repl_aggregates_ssa!(pm) # SSA variant? + instruction_combining!(pm) + jump_threading!(pm) + correlated_value_propagation!(pm) + instruction_combining!(pm) + reassociate!(pm) + early_cse!(pm) + alloc_opt!(pm) + loop_idiom!(pm) + loop_rotate!(pm) + lower_simdloop!(pm) + licm!(pm) + loop_unswitch!(pm) + instruction_combining!(pm) + ind_var_simplify!(pm) + loop_deletion!(pm) + loop_unroll!(pm) + alloc_opt!(pm) + scalar_repl_aggregates_ssa!(pm) # SSA variant? + gvn!(pm) + # This InstCombine needs to be after GVN + # Otherwise it will generate load chains in GPU code... + instruction_combining!(pm) + mem_cpy_opt!(pm) + sccp!(pm) + instruction_combining!(pm) + jump_threading!(pm) + dead_store_elimination!(pm) + alloc_opt!(pm) + cfgsimplification!(pm) + loop_idiom!(pm) + loop_deletion!(pm) + jump_threading!(pm) + correlated_value_propagation!(pm) + # SLP_Vectorizer -- not for Enzyme + aggressive_dce!(pm) + instruction_combining!(pm) + # Loop Vectorize -- not for Enzyme + # InstCombine + + # GC passes + barrier_noop!(pm) + gc_invariant_verifier!(pm, false) + + # FIXME: Currently crashes printing + cfgsimplification!(pm) + instruction_combining!(pm) # Extra for Enzyme + #API.EnzymeAddAttributorLegacyPass(pm) + run!(pm, mod) + end + # @show "omod", mod + # flush(stdout) + # flush(stderr) +end + +# https://github.com/JuliaLang/julia/blob/2eb5da0e25756c33d1845348836a0a92984861ac/src/aotcompile.cpp#L603 +function addTargetPasses!(pm, tm) + add_library_info!(pm, LLVM.triple(tm)) + add_transform_info!(pm, tm) +end + +# https://github.com/JuliaLang/julia/blob/2eb5da0e25756c33d1845348836a0a92984861ac/src/aotcompile.cpp#L620 +function addOptimizationPasses!(pm) + constant_merge!(pm) + + propagate_julia_addrsp!(pm) + scoped_no_alias_aa!(pm) + type_based_alias_analysis!(pm) + basic_alias_analysis!(pm) + cfgsimplification!(pm) + dce!(pm) + scalar_repl_aggregates!(pm) + + # mem_cpy_opt!(pm) + + always_inliner!(pm) # Respect always_inline + + # Running `memcpyopt` between this and `sroa` seems to give `sroa` a hard time + # merging the `alloca` for the unboxed data and the `alloca` created by the `alloc_opt` + # pass. + + alloc_opt!(pm) + # consider AggressiveInstCombinePass at optlevel > 2 + + instruction_combining!(pm) + cfgsimplification!(pm) + scalar_repl_aggregates!(pm) + instruction_combining!(pm) # TODO: createInstSimplifyLegacy + jump_threading!(pm) + correlated_value_propagation!(pm) + + reassociate!(pm) + + early_cse!(pm) + + # Load forwarding above can expose allocations that aren't actually used + # remove those before optimizing loops. + alloc_opt!(pm) + loop_rotate!(pm) + # moving IndVarSimplify here prevented removing the loop in perf_sumcartesian(10:-1:1) + loop_idiom!(pm) + + # LoopRotate strips metadata from terminator, so run LowerSIMD afterwards + lower_simdloop!(pm) # Annotate loop marked with "loopinfo" as LLVM parallel loop + licm!(pm) + julia_licm!(pm) + # Subsequent passes not stripping metadata from terminator + instruction_combining!(pm) # TODO: createInstSimplifyLegacy + ind_var_simplify!(pm) + loop_deletion!(pm) + loop_unroll!(pm) # TODO: in Julia createSimpleLoopUnroll + + # Run our own SROA on heap objects before LLVM's + alloc_opt!(pm) + # Re-run SROA after loop-unrolling (useful for small loops that operate, + # over the structure of an aggregate) + scalar_repl_aggregates!(pm) + instruction_combining!(pm) # TODO: createInstSimplifyLegacy + + gvn!(pm) + mem_cpy_opt!(pm) + sccp!(pm) + + # Run instcombine after redundancy elimination to exploit opportunities + # opened up by them. + # This needs to be InstCombine instead of InstSimplify to allow + # loops over Union-typed arrays to vectorize. + instruction_combining!(pm) + jump_threading!(pm) + dead_store_elimination!(pm) + + # More dead allocation (store) deletion before loop optimization + # consider removing this: + alloc_opt!(pm) + + # see if all of the constant folding has exposed more loops + # to simplification and deletion + # this helps significantly with cleaning up iteration + cfgsimplification!(pm) + loop_deletion!(pm) + instruction_combining!(pm) + loop_vectorize!(pm) + # TODO: createLoopLoadEliminationPass + cfgsimplification!(pm) + slpvectorize!(pm) + # might need this after LLVM 11: + # TODO: createVectorCombinePass() + + aggressive_dce!(pm) +end + +function addMachinePasses!(pm) + combine_mul_add!(pm) + # TODO: createDivRemPairs[] + + demote_float16!(pm) + gvn!(pm) +end + +function addJuliaLegalizationPasses!(pm, lower_intrinsics=true) + if lower_intrinsics + # LowerPTLS removes an indirect call. As a result, it is likely to trigger + # LLVM's devirtualization heuristics, which would result in the entire + # pass pipeline being re-exectuted. Prevent this by inserting a barrier. + barrier_noop!(pm) + lower_exc_handlers!(pm) + gc_invariant_verifier!(pm, false) + + # Needed **before** LateLowerGCFrame on LLVM < 12 + # due to bug in `CreateAlignmentAssumption`. + remove_ni!(pm) + late_lower_gc_frame!(pm) + final_lower_gc!(pm) + # We need these two passes and the instcombine below + # after GC lowering to let LLVM do some constant propagation on the tags. + # and remove some unnecessary write barrier checks. + gvn!(pm) + sccp!(pm) + # Remove dead use of ptls + dce!(pm) + lower_ptls!(pm, #=dump_native=# false) + instruction_combining!(pm) + # Clean up write barrier and ptls lowering + cfgsimplification!(pm) + else + barrier_noop!(pm) + remove_ni!(pm) + end +end + +function post_optimize!(mod, tm) + # @show "pre_post", mod + # flush(stdout) + # flush(stderr) + LLVM.ModulePassManager() do pm + addTargetPasses!(pm, tm) + addOptimizationPasses!(pm) + run!(pm, mod) + end + LLVM.ModulePassManager() do pm + addJuliaLegalizationPasses!(pm, true) + addMachinePasses!(pm) + run!(pm, mod) + end + # @show "post_mod", mod + # flush(stdout) + # flush(stderr) +end + + + + +const inactivefns = Set{String}(( + "jl_gc_queue_root", "gpu_report_exception", "gpu_signal_exception", + "julia.ptls_states", "julia.write_barrier", "julia.typeof", "jl_box_int64", "jl_box_int32", + "jl_subtype", "julia.get_pgcstack", "jl_in_threaded_region", "jl_object_id_", "jl_object_id", + "jl_breakpoint", + "llvm.julia.gc_preserve_begin","llvm.julia.gc_preserve_end", "jl_get_ptls_states", + "jl_f_fieldtype", + "jl_symbol_n", + # BIG TODO + "jl_gc_add_finalizer_th", + # "jl_" +)) + +const activefns = Set{String}(( + "jl_", +)) + +function annotate!(mod) + ctx = context(mod) + inactive = LLVM.StringAttribute("enzyme_inactive", ""; ctx) + active = LLVM.StringAttribute("enzyme_active", ""; ctx) + fns = functions(mod) + + for inactivefn in inactivefns + if haskey(fns, inactivefn) + fn = fns[inactivefn] + push!(function_attributes(fn), inactive) + end + end + + for activefn in activefns + if haskey(fns, activefn) + fn = fns[activefn] + push!(function_attributes(fn), active) + end + end + + for fname in ("julia.typeof",) + if haskey(fns, fname) + fn = fns[fname] + push!(function_attributes(fn), LLVM.EnumAttribute("readnone", 0; ctx)) + push!(function_attributes(fn), LLVM.StringAttribute("enzyme_shouldrecompute"; ctx)) + end + end + + for fname in ("julia.get_pgcstack", "julia.ptls_states", "jl_get_ptls_states") + if haskey(fns, fname) + fn = fns[fname] + # TODO per discussion w keno perhaps this should change to readonly / inaccessiblememonly + push!(function_attributes(fn), LLVM.EnumAttribute("readnone", 0; ctx)) + end + end + + for fname in ("julia.pointer_from_objref",) + if haskey(fns, fname) + fn = fns[fname] + push!(function_attributes(fn), LLVM.EnumAttribute("readnone", 0; ctx)) + end + end + + for boxfn in ("jl_box_float32", "jl_box_float64", "jl_box_int32", "jl_box_int64", "julia.gc_alloc_obj", "jl_alloc_array_1d", "jl_alloc_array_2d", "jl_alloc_array_3d") + if haskey(fns, boxfn) + fn = fns[boxfn] + push!(return_attributes(fn), LLVM.EnumAttribute("noalias", 0; ctx)) + push!(function_attributes(fn), LLVM.EnumAttribute("inaccessiblememonly", 0; ctx)) + end + end + + for gc in ("llvm.julia.gc_preserve_begin", "llvm.julia.gc_preserve_end") + if haskey(fns, gc) + fn = fns[gc] + push!(function_attributes(fn), LLVM.EnumAttribute("inaccessiblememonly", 0; ctx)) + end + end + + for rfn in ("jl_object_id_", "jl_object_id") + if haskey(fns, rfn) + fn = fns[rfn] + push!(function_attributes(fn), LLVM.EnumAttribute("readonly", 0; ctx)) + end + end + + for rfn in ("jl_in_threaded_region_", "jl_in_threaded_region") + if haskey(fns, rfn) + fn = fns[rfn] + push!(function_attributes(fn), LLVM.EnumAttribute("readonly", 0; ctx)) + push!(function_attributes(fn), LLVM.EnumAttribute("inaccessiblememonly", 0; ctx)) + end + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 3bd9f0c..55f0488 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -176,12 +176,16 @@ end end a = [1.0, 2.0] - # This used to work within a session, but now that I'm doing pointer relocation a bit better, - # it's chocking on the `BLAS` pointer call. I'm not sure yet how to relocate this properly. + mydot_compiled, path = compile(mydot, (Vector{Float64},)) + # Works locally for me, but not on CI. Need some improvements to pointer relocation to be robust. + @test_skip remote_load_call(path, a) == 5.0 + @test mydot_compiled(a) ≈ 5.0 + + # This will need some more work apparently @test_skip begin - mydot_compiled, path = compile(mydot, (Vector{Float64},)) - @test remote_load_call(path, a) == 5.0 - @test mydot_compiled(a) == 5.0 + _, path = compile((*), (Matrix{Float64}, Matrix{Float64})) + A, B = rand(10, 11), rand(11, 12) + @test remote_load_call(path, A, B) ≈ A * B end end @@ -199,9 +203,11 @@ end println("Hello World $N") N end - # How do I test this? - # Also ... this segfaults - @test_skip ccall(generate_shlib_fptr(hello, (Int,)), Int, (Int,), 1) == 1 + # We'll need to be able to relocate a bunch of UV stuff for this, and deal with dynamic dispatch. + @test_skip begin + hello_compiled, path = compile(hello, (Int,)) + @test_skip remote_load_call(path, 1) == 1 + end end # This is a trick to get stack allocated arrays inside a function body (so long as they don't escape). From 57e04251246815554b11431dd18f934b33aa4be9 Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Sat, 19 Feb 2022 15:11:59 -0500 Subject: [PATCH 054/159] Add `compile_dylib` (#64) * Clean up `generate_executable` * Delete unused line * Rename generate_shlib to generate_obj * Add generate_dylib * Test generate_dylib * Add and test `compile_dylib` * s/_dylib/_shlib/g to keep original terminology --- src/StaticCompiler.jl | 207 ++++++++++++++++++++++++++++-------------- test/runtests.jl | 32 ++++--- 2 files changed, 156 insertions(+), 83 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 686e6b6..da4b804 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -9,7 +9,7 @@ using Base: RefValue using Serialization: serialize, deserialize using Clang_jll: clang -export compile, load_function, compile_executable +export compile, load_function, compile_shlib, compile_executable export native_code_llvm, native_code_typed, native_llvm_module, native_code_native include("target.jl") @@ -93,10 +93,10 @@ function compile(f, _tt, path::String = tempname(); name = GPUCompiler.safe_nam rt = only(native_code_typed(f, tt))[2] isconcretetype(rt) || error("$f on $_tt did not infer to a concrete type. Got $rt") - + f_wrap!(out::Ref, args::Ref{<:Tuple}) = (out[] = f(args[]...); nothing) - _, _, table = generate_shlib(f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, path, name; opt_level, strip_llvm, strip_asm, filename, kwargs...) - + _, _, table = generate_obj(f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, path, name; opt_level, strip_llvm, strip_asm, filename, kwargs...) + lf = LazyStaticCompiledFunction{rt, tt}(Symbol(f), path, name, filename, table) cjl_path = joinpath(path, "$filename.cjl") serialize(cjl_path, lf) @@ -106,71 +106,59 @@ end """ ```julia -generate_shlib(f, tt, path::String, name::String, filenamebase::String="obj"; kwargs...) +generate_obj(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; + \tstrip_llvm = false, + \tstrip_asm = true, + \topt_level=3, + \tkwargs...) ``` -Low level interface for compiling a shared object / dynamically loaded library - (`.so` / `.dylib`) for function `f` given a tuple type `tt` characterizing -the types of the arguments for which the function will be compiled. - -See also `StaticCompiler.generate_shlib_fptr`. +Low level interface for compiling object code (`.o`) for for function `f` given +a tuple type `tt` characterizing the types of the arguments for which the +function will be compiled. ### Examples ```julia -julia> function test(n) - r = 0.0 - for i=1:n - r += log(sqrt(i)) - end - return r/n - end -test (generic function with 1 method) +julia> fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) +fib (generic function with 1 method) -julia> path, name = StaticCompiler.generate_shlib(test, Tuple{Int64}, "./test") -("./test", "test") +julia> path, name, table = StaticCompiler.generate_obj(fib, Tuple{Int64}, "./test") +("./test", "fib", IdDict{Any, String}()) shell> tree \$path ./test -|-- obj.o -`-- obj.so - -0 directories, 2 files - -julia> test(100_000) -5.256496109495593 +└── obj.o -julia> ccall(StaticCompiler.generate_shlib_fptr(path, name), Float64, (Int64,), 100_000) -5.256496109495593 +0 directories, 1 file ``` """ -function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; +function generate_obj(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; strip_llvm = false, strip_asm = true, opt_level=3, kwargs...) mkpath(path) obj_path = joinpath(path, "$filenamebase.o") - lib_path = joinpath(path, "$filenamebase.$(Libdl.dlext)") tm = GPUCompiler.llvm_machine(NativeCompilerTarget()) job, kwargs = native_job(f, tt; name, kwargs...) #Get LLVM to generated a module of code for us. We don't want GPUCompiler's optimization passes. mod, meta = GPUCompiler.codegen(:llvm, job; strip=strip_llvm, only_entry=false, validate=false, optimize=false) - + # Use Enzyme's annotation and optimization pipeline annotate!(mod) optimize!(mod, tm) - + # Scoop up all the pointers in the optimized module, and replace them with unitialized global variables. # `table` is a dictionary where the keys are julia objects that are needed by the function, and the values # of the dictionary are the names of their associated LLVM GlobalVariable names. table = relocation_table!(mod) - + # Now that we've removed all the pointers from the code, we can (hopefully) safely lower all the instrinsics # (again, using Enzyme's pipeline) post_optimize!(mod, tm) - + # Make sure we didn't make any glaring errors LLVM.verify(mod) - + # Compile the LLVM module to native code and save it to disk obj, _ = GPUCompiler.emit_asm(job, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) open(obj_path, "w") do io @@ -237,12 +225,15 @@ shell> ./hello Hello, world! ``` """ -function compile_executable(f, _tt=(), path::String="./", name=GPUCompiler.safe_name(repr(f)); filename=name, kwargs...) - tt = Base.to_tuple_type(_tt) - tt == Tuple{} || tt == Tuple{Int, Ptr{Ptr{UInt8}}} || error("input type signature $_tt must be either () or (Int, Ptr{Ptr{UInt8}})") +function compile_executable(f, types=(), path::String="./", name=GPUCompiler.safe_name(repr(f)); + filename=name, + kwargs...) + + tt = Base.to_tuple_type(types) + tt == Tuple{} || tt == Tuple{Int, Ptr{Ptr{UInt8}}} || error("input type signature $types must be either () or (Int, Ptr{Ptr{UInt8}})") rt = only(native_code_typed(f, tt))[2] - isconcretetype(rt) || error("$f$_tt did not infer to a concrete type. Got $rt") + isconcretetype(rt) || error("$f$types did not infer to a concrete type. Got $rt") # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this @@ -253,10 +244,36 @@ function compile_executable(f, _tt=(), path::String="./", name=GPUCompiler.safe_ end +""" +```julia +compile_shlib(f, types::Tuple, path::String, name::String=repr(f); filename::String=name, kwargs...) +``` +As `compile_executable`, but compiling to a standalone `.dylib`/`.so` shared library. +""" +function compile_shlib(f, types=(), path::String="./", name=GPUCompiler.safe_name(repr(f)); + filename=name, + kwargs...) + + tt = Base.to_tuple_type(types) + isconcretetype(tt) || error("input type signature $types is not concrete") + + rt = only(native_code_typed(f, tt))[2] + isconcretetype(rt) || error("$f$types did not infer to a concrete type. Got $rt") + + # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals + # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this + + generate_shlib(f, tt, path, name, filename; kwargs...) + + joinpath(abspath(path), filename * "." * Libdl.dlext) +end + +function generate_shlib_fptr(f, tt, path::String=tempname(), name = GPUCompiler.safe_name(repr(f)), filename::String=name; + temp::Bool=true, + kwargs...) -function generate_shlib_fptr(f, tt, path::String=tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; temp::Bool=true, kwargs...) generate_shlib(f, tt, path, name; kwargs...) - lib_path = joinpath(abspath(path), "$filenamebase.$(Libdl.dlext)") + lib_path = joinpath(abspath(path), "$filename.$(Libdl.dlext)") ptr = Libdl.dlopen(lib_path, Libdl.RTLD_LOCAL) fptr = Libdl.dlsym(ptr, "julia_$name") @assert fptr != C_NULL @@ -302,8 +319,8 @@ julia> test(100_000) 5.256496109495593 ``` """ -function generate_shlib_fptr(path::String, name, filenamebase::String="obj") - lib_path = joinpath(abspath(path), "$filenamebase.$(Libdl.dlext)") +function generate_shlib_fptr(path::String, name, filename::String=name) + lib_path = joinpath(abspath(path), "$filename.$(Libdl.dlext)") ptr = Libdl.dlopen(lib_path, Libdl.RTLD_LOCAL) fptr = Libdl.dlsym(ptr, "julia_$name") @assert fptr != C_NULL @@ -334,38 +351,90 @@ function generate_executable(f, tt, path::String = tempname(), name = GPUCompile mkpath(path) obj_path = joinpath(path, "$filename.o") exec_path = joinpath(path, filename) + job, kwargs = native_job(f, tt; name, kwargs...) + obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) + + # Write to file open(obj_path, "w") do io - job, kwargs = native_job(f, tt; name, kwargs...) - obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) + write(io, obj) + end + + # Pick a compiler + cc = Sys.isapple() ? `cc` : clang() + # Compile! + if Sys.isapple() + # Apple no longer uses _start, so we can just specify a custom entry + entry = "_julia_$name" + run(`$cc -e $entry $obj_path -o $exec_path`) + else + # Write a minimal wrapper to avoid having to specify a custom entry + wrapper_path = joinpath(path, "wrapper.c") + f = open(wrapper_path, "w") + print(f, """int main(int argc, char** argv) + { + julia_$name(argc, argv); + return 0; + }""") + close(f) + run(`$cc $wrapper_path $obj_path -o $exec_path`) + # Clean up + run(`rm $wrapper_path`) + end + + path, name +end + +""" +```julia +generate_shlib(f, tt, path::String, name::String, filenamebase::String="obj"; kwargs...) +``` +Low level interface for compiling a shared object / dynamically loaded library + (`.so` / `.dylib`) for function `f` given a tuple type `tt` characterizing +the types of the arguments for which the function will be compiled. +See also `StaticCompiler.generate_shlib_fptr`. +### Examples +```julia +julia> function test(n) + r = 0.0 + for i=1:n + r += log(sqrt(i)) + end + return r/n + end +test (generic function with 1 method) +julia> path, name = StaticCompiler.generate_shlib(test, Tuple{Int64}, "./test") +("./test", "test") +shell> tree \$path +./test +|-- obj.o +`-- obj.so +0 directories, 2 files +julia> test(100_000) +5.256496109495593 +julia> ccall(StaticCompiler.generate_shlib_fptr(path, name), Float64, (Int64,), 100_000) +5.256496109495593 +``` +""" +function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filename::String=name; kwargs...) + mkpath(path) + obj_path = joinpath(path, "$filename.o") + lib_path = joinpath(path, "$filename.$(Libdl.dlext)") + job, kwargs = native_job(f, tt; name, kwargs...) + obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) + open(obj_path, "w") do io write(io, obj) - flush(io) - - # Pick a compiler - cc = Sys.isapple() ? `cc` : clang() - # Compile! - if Sys.isapple() - # Apple no longer uses _start, so we can just specify a custom entry - entry = "_julia_$name" - run(`$cc -e $entry $obj_path -o $exec_path`) - else - # Write a minimal wrapper to avoid having to specify a custom entry - wrapper_path = joinpath(path, "wrapper.c") - f = open(wrapper_path, "w") - print(f, """int main(int argc, char** argv) - { - julia_$name(argc, argv); - return 0; - }""") - close(f) - run(`$cc $wrapper_path $obj_path -o $exec_path`) - # Clean up - run(`rm $wrapper_path`) - end end + + # Pick a Clang + cc = Sys.isapple() ? `cc` : clang() + # Compile! + run(`$cc -shared -o $lib_path $obj_path`) + path, name end + function native_code_llvm(@nospecialize(func), @nospecialize(types); kwargs...) job, kwargs = native_job(func, types; kwargs...) GPUCompiler.code_llvm(stdout, job; kwargs...) diff --git a/test/runtests.jl b/test/runtests.jl index 55f0488..831a3a2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -38,18 +38,6 @@ fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) # This needs to be defined globall #@test compile(fib2, (Int,))[1](20) == fib(20) end -# Call binaries for testing -# @testset "Generate binary" begin -# fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) -# libname = tempname() -# generate_shlib(fib, (Int,), libname) -# ptr = Libdl.dlopen(libname * "." * Libdl.dlext, Libdl.RTLD_LOCAL) -# fptr = Libdl.dlsym(ptr, "julia_fib") -# @assert fptr != C_NULL -# # This works on REPL -# @test_skip ccall(fptr, Int, (Int,), 10) == 55 -# end - @testset "Loops" begin function sum_first_N_int(N) @@ -154,7 +142,7 @@ end e end end - @test fetch(tsk) isa DomainError + @test fetch(tsk) isa DomainError end # Julia wants to treat Tuple (and other things like it) as plain bits, but LLVM wants to treat it as something with a pointer. @@ -175,7 +163,7 @@ end BLAS.dot(N, a, 1, a, 1) end a = [1.0, 2.0] - + mydot_compiled, path = compile(mydot, (Vector{Float64},)) # Works locally for me, but not on CI. Need some improvements to pointer relocation to be robust. @test_skip remote_load_call(path, a) == 5.0 @@ -250,6 +238,22 @@ end @test C ≈ A*B end +@testset "Standalone Dylibs" begin + # Test function + # (already defined) + # fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) + + #Compile dylib + name = repr(fib) + filepath = compile_shlib(fib, (Int,), "./", name) + @test occursin("fib.$(Libdl.dlext)", filepath) + + # Open dylib + ptr = Libdl.dlopen(filepath, Libdl.RTLD_LOCAL) + fptr = Libdl.dlsym(ptr, "julia_$name") + @test fptr != C_NULL + @test ccall(fptr, Int, (Int,), 10) == 55 +end @testset "Standalone Executables" begin From 12f475ba737393222f52291aa837c9da7a4c53e4 Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Sat, 19 Feb 2022 13:29:22 -0700 Subject: [PATCH 055/159] Increment versio. --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index ef7854a..6434109 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short"] -version = "0.4.0" +version = "0.4.1" [deps] Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" From 0409c50d5c2fb4cfc3612b321e33971891ac9a9c Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Sat, 19 Feb 2022 16:05:09 -0500 Subject: [PATCH 056/159] Remove JLD2 from Project.toml (#65) * Remove JLD2 from Project.toml * Delete Manifest.toml --- Manifest.toml | 264 -------------------------------------------------- Project.toml | 1 - 2 files changed, 265 deletions(-) delete mode 100644 Manifest.toml diff --git a/Manifest.toml b/Manifest.toml deleted file mode 100644 index a31f820..0000000 --- a/Manifest.toml +++ /dev/null @@ -1,264 +0,0 @@ -# This file is machine-generated - editing it directly is not advised - -julia_version = "1.7.2" -manifest_format = "2.0" - -[[deps.ArgTools]] -uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" - -[[deps.Artifacts]] -uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" - -[[deps.Base64]] -uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" - -[[deps.CEnum]] -git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9" -uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" -version = "0.4.1" - -[[deps.Clang_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll", "libLLVM_jll"] -git-tree-sha1 = "8cf7e67e264dedc5d321ec87e78525e958aea057" -uuid = "0ee61d77-7f21-5576-8119-9fcc46b10100" -version = "12.0.1+3" - -[[deps.Compat]] -deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] -git-tree-sha1 = "44c37b4636bc54afac5c574d2d02b625349d6582" -uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" -version = "3.41.0" - -[[deps.CompilerSupportLibraries_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" - -[[deps.DataStructures]] -deps = ["Compat", "InteractiveUtils", "OrderedCollections"] -git-tree-sha1 = "3daef5523dd2e769dad2365274f760ff5f282c7d" -uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" -version = "0.18.11" - -[[deps.Dates]] -deps = ["Printf"] -uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" - -[[deps.DelimitedFiles]] -deps = ["Mmap"] -uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" - -[[deps.Distributed]] -deps = ["Random", "Serialization", "Sockets"] -uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" - -[[deps.Downloads]] -deps = ["ArgTools", "LibCURL", "NetworkOptions"] -uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" - -[[deps.ExprTools]] -git-tree-sha1 = "56559bbef6ca5ea0c0818fa5c90320398a6fbf8d" -uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" -version = "0.1.8" - -[[deps.FileIO]] -deps = ["Pkg", "Requires", "UUIDs"] -git-tree-sha1 = "80ced645013a5dbdc52cf70329399c35ce007fae" -uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" -version = "1.13.0" - -[[deps.GPUCompiler]] -deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "TimerOutputs", "UUIDs"] -git-tree-sha1 = "abd824e1f2ecd18d33811629c781441e94a24e81" -uuid = "61eb1bfa-7361-4325-ad38-22787b887f55" -version = "0.13.11" - -[[deps.InteractiveUtils]] -deps = ["Markdown"] -uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" - -[[deps.JLD2]] -deps = ["DataStructures", "FileIO", "MacroTools", "Mmap", "Pkg", "Printf", "Reexport", "TranscodingStreams", "UUIDs"] -git-tree-sha1 = "b528d68220e2aba1d2d0c0461b6f7eda8c5c1e33" -uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819" -version = "0.4.20" - -[[deps.JLLWrappers]] -deps = ["Preferences"] -git-tree-sha1 = "abc9885a7ca2052a736a600f7fa66209f96506e1" -uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" -version = "1.4.1" - -[[deps.LLVM]] -deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"] -git-tree-sha1 = "f8dcd7adfda0dddaf944e62476d823164cccc217" -uuid = "929cbde3-209d-540e-8aea-75f648917ca0" -version = "4.7.1" - -[[deps.LLVMExtra_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "62115afed394c016c2d3096c5b85c407b48be96b" -uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab" -version = "0.0.13+1" - -[[deps.LibCURL]] -deps = ["LibCURL_jll", "MozillaCACerts_jll"] -uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" - -[[deps.LibCURL_jll]] -deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] -uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" - -[[deps.LibGit2]] -deps = ["Base64", "NetworkOptions", "Printf", "SHA"] -uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" - -[[deps.LibSSH2_jll]] -deps = ["Artifacts", "Libdl", "MbedTLS_jll"] -uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" - -[[deps.Libdl]] -uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" - -[[deps.LinearAlgebra]] -deps = ["Libdl", "libblastrampoline_jll"] -uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" - -[[deps.Logging]] -uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" - -[[deps.MacroTools]] -deps = ["Markdown", "Random"] -git-tree-sha1 = "3d3e902b31198a27340d0bf00d6ac452866021cf" -uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" -version = "0.5.9" - -[[deps.Markdown]] -deps = ["Base64"] -uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" - -[[deps.MbedTLS_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" - -[[deps.Mmap]] -uuid = "a63ad114-7e13-5084-954f-fe012c677804" - -[[deps.MozillaCACerts_jll]] -uuid = "14a3606d-f60d-562e-9121-12d972cd8159" - -[[deps.NetworkOptions]] -uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" - -[[deps.OpenBLAS_jll]] -deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] -uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" - -[[deps.OrderedCollections]] -git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c" -uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" -version = "1.4.1" - -[[deps.Pkg]] -deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] -uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" - -[[deps.Preferences]] -deps = ["TOML"] -git-tree-sha1 = "2cf929d64681236a2e074ffafb8d568733d2e6af" -uuid = "21216c6a-2e73-6563-6e65-726566657250" -version = "1.2.3" - -[[deps.Printf]] -deps = ["Unicode"] -uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" - -[[deps.REPL]] -deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] -uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" - -[[deps.Random]] -deps = ["SHA", "Serialization"] -uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" - -[[deps.Reexport]] -git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" -uuid = "189a3867-3050-52da-a836-e630ba90ab69" -version = "1.2.2" - -[[deps.Requires]] -deps = ["UUIDs"] -git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7" -uuid = "ae029012-a4dd-5104-9daa-d747884805df" -version = "1.3.0" - -[[deps.SHA]] -uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" - -[[deps.Serialization]] -uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" - -[[deps.SharedArrays]] -deps = ["Distributed", "Mmap", "Random", "Serialization"] -uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" - -[[deps.Sockets]] -uuid = "6462fe0b-24de-5631-8697-dd941f90decc" - -[[deps.SparseArrays]] -deps = ["LinearAlgebra", "Random"] -uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" - -[[deps.Statistics]] -deps = ["LinearAlgebra", "SparseArrays"] -uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" - -[[deps.TOML]] -deps = ["Dates"] -uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" - -[[deps.Tar]] -deps = ["ArgTools", "SHA"] -uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" - -[[deps.Test]] -deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] -uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[[deps.TimerOutputs]] -deps = ["ExprTools", "Printf"] -git-tree-sha1 = "97e999be94a7147d0609d0b9fc9feca4bf24d76b" -uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" -version = "0.5.15" - -[[deps.TranscodingStreams]] -deps = ["Random", "Test"] -git-tree-sha1 = "216b95ea110b5972db65aa90f88d8d89dcb8851c" -uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" -version = "0.9.6" - -[[deps.UUIDs]] -deps = ["Random", "SHA"] -uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" - -[[deps.Unicode]] -uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" - -[[deps.Zlib_jll]] -deps = ["Libdl"] -uuid = "83775a58-1f1d-513f-b197-d71354ab007a" - -[[deps.libLLVM_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "8f36deef-c2a5-5394-99ed-8e07531fb29a" - -[[deps.libblastrampoline_jll]] -deps = ["Artifacts", "Libdl", "OpenBLAS_jll"] -uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" - -[[deps.nghttp2_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" - -[[deps.p7zip_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" diff --git a/Project.toml b/Project.toml index 6434109..e0c6cdc 100644 --- a/Project.toml +++ b/Project.toml @@ -6,7 +6,6 @@ version = "0.4.1" [deps] Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55" -JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" From c63f1d8966e2833e836bb95b95b771e29967ebeb Mon Sep 17 00:00:00 2001 From: Tom Short Date: Sun, 20 Feb 2022 17:58:33 -0500 Subject: [PATCH 057/159] Update LICENSE Did I miss anyone? --- LICENSE | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index acf4548..cb19b0a 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,6 @@ -Copyright (c) 2019 Tom Short +Copyright (c) 2019-2022 Mason Protter, William Moses, Valentin Churavy, + Brenhin Keller, Julian Samaroo, Tom Short, and + other contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From 59efe6e0814a71d3dea0b7c4893cf2d19264fb15 Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Thu, 24 Feb 2022 16:17:11 -0700 Subject: [PATCH 058/159] Bump LLVM version This should fix the breakages that happened on the MacOS master builds --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index e0c6cdc..3ead116 100644 --- a/Project.toml +++ b/Project.toml @@ -12,7 +12,7 @@ Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" [compat] GPUCompiler = "0.13" -LLVM = "4" +LLVM = "4.8" julia = "1.7" [extras] From f3c9133a73308974c4d540c04ba9007effa37f19 Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Thu, 24 Feb 2022 16:40:11 -0700 Subject: [PATCH 059/159] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 48509ee..3ab4251 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ This is an experimental package to compile Julia code to standalone libraries. A ```julia using Pkg -Pkg.add(PackageSpec( url = "https://github.com/tshort/StaticCompiler.jl", rev = "master")) +Pkg.add("StaticCompiler") ``` ```julia @@ -51,4 +51,4 @@ This package uses the [GPUCompiler package](https://github.com/JuliaGPU/GPUCompi * GC-tracked allocations and global varaibles do *not* work with `compile_executable` (yet). * Type unstable code is not yet supported. * Doesn't currently work on Windows. -* If you find any other limitations, let us know. There's probably lots. \ No newline at end of file +* If you find any other limitations, let us know. There's probably lots. From e6e024e2ab0ff1646af25a84aaeb8e54495089d5 Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Thu, 24 Feb 2022 16:41:20 -0700 Subject: [PATCH 060/159] bump patch version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 3ead116..2c8d290 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short"] -version = "0.4.1" +version = "0.4.2" [deps] Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" From 658a5cdf20cde19e97a80fbcc73d4618b783cd04 Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Sun, 27 Feb 2022 15:12:25 -0700 Subject: [PATCH 061/159] add v1.8 testing --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 03f8476..e1a990d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,6 +11,7 @@ jobs: matrix: version: - '1.7' + - '1.8' - 'nightly' os: - ubuntu-latest From dfd5aa2e681859d0c3737a82c1935b0a7291cea2 Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Sun, 27 Feb 2022 15:14:32 -0700 Subject: [PATCH 062/159] fix versionsoec --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e1a990d..1eca9e1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,7 +11,7 @@ jobs: matrix: version: - '1.7' - - '1.8' + - '~1.8.0-0' - 'nightly' os: - ubuntu-latest From 3c31bae58be4a91adb4c6212f33bf13130746c8f Mon Sep 17 00:00:00 2001 From: Helge Eichhorn Date: Mon, 11 Apr 2022 13:04:17 +0200 Subject: [PATCH 063/159] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3ab4251..a14add0 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ This package uses the [GPUCompiler package](https://github.com/JuliaGPU/GPUCompi ## Limitations * GC-tracked allocations and global variables do work with `compile`, but the way they are implemented is brittle and can be dangerous. Allocate with care. -* GC-tracked allocations and global varaibles do *not* work with `compile_executable` (yet). +* GC-tracked allocations and global variables do *not* work with `compile_executable` (yet). * Type unstable code is not yet supported. * Doesn't currently work on Windows. * If you find any other limitations, let us know. There's probably lots. From d0be3025946ab9be7838c3d0effc07ef5dc4a6fb Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Fri, 20 May 2022 13:09:55 -0300 Subject: [PATCH 064/159] Add option to receive Array of functions --- src/StaticCompiler.jl | 69 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index da4b804..9b97ac2 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -434,7 +434,6 @@ function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.saf path, name end - function native_code_llvm(@nospecialize(func), @nospecialize(types); kwargs...) job, kwargs = native_job(func, types; kwargs...) GPUCompiler.code_llvm(stdout, job; kwargs...) @@ -457,9 +456,77 @@ function native_code_native(@nospecialize(f), @nospecialize(tt), name = GPUCompi GPUCompiler.code_native(stdout, job; kwargs...) end +#Return an LLVM module for multiple functions +function native_llvm_module(funcs::Array; mangle_names = false, kwargs...) + f,tt = funcs[1] + mod = native_llvm_module(f,tt, kwargs...) + if length(funcs) > 1 + for func in funcs[2:end] + @show f,tt = func + tmod = native_llvm_module(f,tt, kwargs...) + link!(mod,tmod) + end + end + if mangle_names + for func in functions(mod) + fname = name(func) + name!(func,fname[7:end]) + end + end + return mod +end +function generate_obj(funcs::Array, path::String = tempname(), filenamebase::String="obj", + mangle_names =false; + strip_llvm = false, + strip_asm = true, + opt_level=3, + kwargs...) + f,tt = funcs[1] + mkpath(path) + obj_path = joinpath(path, "$filenamebase.o") + fakejob, kwargs = native_job(f,tt, kwargs...) + mod = native_llvm_module(funcs, kwargs...) + obj, _ = GPUCompiler.emit_asm(fakejob, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) + open(obj_path, "w") do io + write(io, obj) + end + path, obj_path +end +function generate_shlib(funcs::Array, path::String = tempname(), filename::String="libfoo", mangle_names=false; kwargs...) + + lib_path = joinpath(path, "$filename.$(Libdl.dlext)") + _,obj_path = generate_obj(funcs, path, kwargs...) + # Pick a Clang + cc = Sys.isapple() ? `cc` : clang() + # Compile! + run(`$cc -shared -o $lib_path $obj_path`) + + path, name +end + +function compile_shlib(funcs::Array, path::String="./"; + filename="libfoo", + mangle_names=false, + kwargs...) + for func in funcs + f, types = func + tt = Base.to_tuple_type(types) + isconcretetype(tt) || error("input type signature $types is not concrete") + + rt = only(native_code_typed(f, tt))[2] + isconcretetype(rt) || error("$f$types did not infer to a concrete type. Got $rt") + end + +# Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals +# Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this + + generate_shlib(funcs, path, filename, mangle_names, kwargs...) + + joinpath(abspath(path), filename * "." * Libdl.dlext) +end end # module From 284cd2ccac411780bae7f95a99ffae579bed8ab5 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Fri, 20 May 2022 13:14:48 -0300 Subject: [PATCH 065/159] actually pass mangle_names --- src/StaticCompiler.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 9b97ac2..8c8a1c2 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -486,7 +486,7 @@ function generate_obj(funcs::Array, path::String = tempname(), filenamebase::Str mkpath(path) obj_path = joinpath(path, "$filenamebase.o") fakejob, kwargs = native_job(f,tt, kwargs...) - mod = native_llvm_module(funcs, kwargs...) + mod = native_llvm_module(funcs, mangle_names, kwargs...) obj, _ = GPUCompiler.emit_asm(fakejob, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) open(obj_path, "w") do io write(io, obj) @@ -498,7 +498,7 @@ function generate_shlib(funcs::Array, path::String = tempname(), filename::Strin lib_path = joinpath(path, "$filename.$(Libdl.dlext)") - _,obj_path = generate_obj(funcs, path, kwargs...) + _,obj_path = generate_obj(funcs, path, mangle_names, kwargs...) # Pick a Clang cc = Sys.isapple() ? `cc` : clang() # Compile! From 9ffeefee3bbca58066723d33e81b73e5a8e1d1bb Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Fri, 20 May 2022 13:25:48 -0300 Subject: [PATCH 066/159] Fix keyword argument uses --- src/StaticCompiler.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 8c8a1c2..950dc4d 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -476,8 +476,8 @@ function native_llvm_module(funcs::Array; mangle_names = false, kwargs...) return mod end -function generate_obj(funcs::Array, path::String = tempname(), filenamebase::String="obj", - mangle_names =false; +function generate_obj(funcs::Array, path::String = tempname(), filenamebase::String="obj"; + mangle_names =false, strip_llvm = false, strip_asm = true, opt_level=3, @@ -486,7 +486,7 @@ function generate_obj(funcs::Array, path::String = tempname(), filenamebase::Str mkpath(path) obj_path = joinpath(path, "$filenamebase.o") fakejob, kwargs = native_job(f,tt, kwargs...) - mod = native_llvm_module(funcs, mangle_names, kwargs...) + mod = native_llvm_module(funcs; mangle_names = mangle_names, kwargs...) obj, _ = GPUCompiler.emit_asm(fakejob, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) open(obj_path, "w") do io write(io, obj) @@ -494,11 +494,11 @@ function generate_obj(funcs::Array, path::String = tempname(), filenamebase::Str path, obj_path end -function generate_shlib(funcs::Array, path::String = tempname(), filename::String="libfoo", mangle_names=false; kwargs...) +function generate_shlib(funcs::Array, path::String = tempname(), filename::String="libfoo"; mangle_names=false, kwargs...) lib_path = joinpath(path, "$filename.$(Libdl.dlext)") - _,obj_path = generate_obj(funcs, path, mangle_names, kwargs...) + _,obj_path = generate_obj(funcs, path, filename; mangle_names=mangle_names, kwargs...) # Pick a Clang cc = Sys.isapple() ? `cc` : clang() # Compile! @@ -523,7 +523,7 @@ function compile_shlib(funcs::Array, path::String="./"; # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this - generate_shlib(funcs, path, filename, mangle_names, kwargs...) + generate_shlib(funcs, path, filename; mangle_names=mangle_names, kwargs...) joinpath(abspath(path), filename * "." * Libdl.dlext) end From 95dd56d9a175554f05b8c019d2c1df6b6b6874cd Mon Sep 17 00:00:00 2001 From: "C.Brenhin Keller" Date: Sat, 21 May 2022 23:06:35 -0400 Subject: [PATCH 067/159] Separate out nightly CI, re-enable codecov --- .github/workflows/ci-julia-nightly.yml | 38 ++++++++++++++++++++++++++ .github/workflows/ci.yml | 21 ++++++++++++-- 2 files changed, 56 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/ci-julia-nightly.yml diff --git a/.github/workflows/ci-julia-nightly.yml b/.github/workflows/ci-julia-nightly.yml new file mode 100644 index 0000000..98dc781 --- /dev/null +++ b/.github/workflows/ci-julia-nightly.yml @@ -0,0 +1,38 @@ +name: CI (Julia nightly) +on: + push: + branches: + - '**' + paths-ignore: + - 'README.md' + pull_request: + branches: + - main + paths-ignore: + - 'README.md' +jobs: + test-julia-nightly: + name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + version: + - 'nightly' + os: + - ubuntu-latest + - macOS-latest + arch: + - x64 + group: + - Core + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@latest + with: + version: ${{ matrix.version }} + arch: ${{ matrix.arch }} + - uses: julia-actions/julia-buildpkg@latest + - uses: julia-actions/julia-runtest@latest + env: + GROUP: ${{ matrix.group }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1eca9e1..4e00954 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,7 +1,15 @@ name: CI on: - - push - - pull_request + push: + branches: + - '**' + paths-ignore: + - 'README.md' + pull_request: + branches: + - main + paths-ignore: + - 'README.md' jobs: test: name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} @@ -12,12 +20,13 @@ jobs: version: - '1.7' - '~1.8.0-0' - - 'nightly' os: - ubuntu-latest - macOS-latest arch: - x64 + group: + - Core steps: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@latest @@ -26,3 +35,9 @@ jobs: arch: ${{ matrix.arch }} - uses: julia-actions/julia-buildpkg@latest - uses: julia-actions/julia-runtest@latest + env: + GROUP: ${{ matrix.group }} + - uses: julia-actions/julia-processcoverage@v1 + - uses: codecov/codecov-action@v2 + with: + files: lcov.info From 0d9556b90a6d8758cc3a65fe0e296cd9bf84f92e Mon Sep 17 00:00:00 2001 From: "C.Brenhin Keller" Date: Sat, 21 May 2022 23:18:26 -0400 Subject: [PATCH 068/159] Add integration tests for standalone executables --- .github/workflows/ci-integration.yml | 38 ++ test/Manifest.toml | 450 +++++++++++++++++++++ test/Project.toml | 5 +- test/runtests.jl | 571 +++++++++++++++------------ test/scripts/loopvec_matrix.jl | 54 +++ test/scripts/loopvec_product.jl | 25 ++ test/scripts/print_args.jl | 33 ++ test/scripts/rand_matrix.jl | 21 + test/scripts/times_table.jl | 26 ++ 9 files changed, 979 insertions(+), 244 deletions(-) create mode 100644 .github/workflows/ci-integration.yml create mode 100644 test/Manifest.toml create mode 100644 test/scripts/loopvec_matrix.jl create mode 100644 test/scripts/loopvec_product.jl create mode 100644 test/scripts/print_args.jl create mode 100644 test/scripts/rand_matrix.jl create mode 100644 test/scripts/times_table.jl diff --git a/.github/workflows/ci-integration.yml b/.github/workflows/ci-integration.yml new file mode 100644 index 0000000..025f250 --- /dev/null +++ b/.github/workflows/ci-integration.yml @@ -0,0 +1,38 @@ +name: CI (Integration) +on: + push: + branches: + - '**' + paths-ignore: + - 'README.md' + pull_request: + branches: + - main + paths-ignore: + - 'README.md' +jobs: + test-integration: + name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + version: + - '~1.8.0-0' + os: + - ubuntu-latest + - macOS-latest + arch: + - x64 + group: + - Integration + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@latest + with: + version: ${{ matrix.version }} + arch: ${{ matrix.arch }} + - uses: julia-actions/julia-buildpkg@latest + - uses: julia-actions/julia-runtest@latest + env: + GROUP: ${{ matrix.group }} diff --git a/test/Manifest.toml b/test/Manifest.toml new file mode 100644 index 0000000..2031c1d --- /dev/null +++ b/test/Manifest.toml @@ -0,0 +1,450 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.8.0-beta3" +manifest_format = "2.0" +project_hash = "429ef1d09132391cc32bc872843d4edc99133ef5" + +[[deps.Adapt]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "af92965fb30777147966f58acb05da51c5616b5f" +uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +version = "3.3.3" + +[[deps.ArgTools]] +uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" +version = "1.1.1" + +[[deps.ArrayInterface]] +deps = ["Compat", "IfElse", "LinearAlgebra", "Requires", "SparseArrays", "Static"] +git-tree-sha1 = "81f0cb60dc994ca17f68d9fb7c942a5ae70d9ee4" +uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" +version = "5.0.8" + +[[deps.Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" + +[[deps.Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[deps.BitTwiddlingConvenienceFunctions]] +deps = ["Static"] +git-tree-sha1 = "28bbdbf0354959db89358d1d79d421ff31ef0b5e" +uuid = "62783981-4cbd-42fc-bca8-16325de8dc4b" +version = "0.1.3" + +[[deps.CEnum]] +git-tree-sha1 = "eb4cb44a499229b3b8426dcfb5dd85333951ff90" +uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" +version = "0.4.2" + +[[deps.CPUSummary]] +deps = ["CpuId", "IfElse", "Static"] +git-tree-sha1 = "0eaf4aedad5ccc3e39481db55d72973f856dc564" +uuid = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9" +version = "0.1.22" + +[[deps.ChainRulesCore]] +deps = ["Compat", "LinearAlgebra", "SparseArrays"] +git-tree-sha1 = "9950387274246d08af38f6eef8cb5480862a435f" +uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +version = "1.14.0" + +[[deps.ChangesOfVariables]] +deps = ["ChainRulesCore", "LinearAlgebra", "Test"] +git-tree-sha1 = "1e315e3f4b0b7ce40feded39c73049692126cf53" +uuid = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0" +version = "0.1.3" + +[[deps.CloseOpenIntervals]] +deps = ["ArrayInterface", "Static"] +git-tree-sha1 = "f576084239e6bdf801007c80e27e2cc2cd963fe0" +uuid = "fb6a15b2-703c-40df-9091-08a04967cfa9" +version = "0.1.6" + +[[deps.CommonSubexpressions]] +deps = ["MacroTools", "Test"] +git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7" +uuid = "bbf7d656-a473-5ed7-a52c-81e309532950" +version = "0.3.0" + +[[deps.Compat]] +deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] +git-tree-sha1 = "b153278a25dd42c65abbf4e62344f9d22e59191b" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "3.43.0" + +[[deps.CompilerSupportLibraries_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" +version = "0.5.2+0" + +[[deps.CpuId]] +deps = ["Markdown"] +git-tree-sha1 = "fcbb72b032692610bfbdb15018ac16a36cf2e406" +uuid = "adafc99b-e345-5852-983c-f28acb93d879" +version = "0.3.1" + +[[deps.Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[deps.DelimitedFiles]] +deps = ["Mmap"] +uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" + +[[deps.DiffResults]] +deps = ["StaticArrays"] +git-tree-sha1 = "c18e98cba888c6c25d1c3b048e4b3380ca956805" +uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" +version = "1.0.3" + +[[deps.DiffRules]] +deps = ["IrrationalConstants", "LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"] +git-tree-sha1 = "28d605d9a0ac17118fe2c5e9ce0fbb76c3ceb120" +uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" +version = "1.11.0" + +[[deps.Distributed]] +deps = ["Random", "Serialization", "Sockets"] +uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" + +[[deps.DocStringExtensions]] +deps = ["LibGit2"] +git-tree-sha1 = "b19534d1895d702889b219c382a6e18010797f0b" +uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +version = "0.8.6" + +[[deps.Downloads]] +deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] +uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +version = "1.6.0" + +[[deps.FileWatching]] +uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" + +[[deps.Formatting]] +deps = ["Printf"] +git-tree-sha1 = "8339d61043228fdd3eb658d86c926cb282ae72a8" +uuid = "59287772-0a20-5a39-b81b-1366585eb4c0" +version = "0.4.2" + +[[deps.ForwardDiff]] +deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "LogExpFunctions", "NaNMath", "Preferences", "Printf", "Random", "SpecialFunctions", "StaticArrays"] +git-tree-sha1 = "2f18915445b248731ec5db4e4a17e451020bf21e" +uuid = "f6369f11-7733-5829-9624-2563aa707210" +version = "0.10.30" + +[[deps.HostCPUFeatures]] +deps = ["BitTwiddlingConvenienceFunctions", "IfElse", "Libdl", "Static"] +git-tree-sha1 = "18be5268cf415b5e27f34980ed25a7d34261aa83" +uuid = "3e5b6fbb-0976-4d2c-9146-d79de83f2fb0" +version = "0.1.7" + +[[deps.IfElse]] +git-tree-sha1 = "debdd00ffef04665ccbb3e150747a77560e8fad1" +uuid = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" +version = "0.1.1" + +[[deps.InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[deps.InverseFunctions]] +deps = ["Test"] +git-tree-sha1 = "336cc738f03e069ef2cac55a104eb823455dca75" +uuid = "3587e190-3f89-42d0-90ee-14403ec27112" +version = "0.1.4" + +[[deps.IrrationalConstants]] +git-tree-sha1 = "7fd44fd4ff43fc60815f8e764c0f352b83c49151" +uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" +version = "0.1.1" + +[[deps.JLLWrappers]] +deps = ["Preferences"] +git-tree-sha1 = "abc9885a7ca2052a736a600f7fa66209f96506e1" +uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" +version = "1.4.1" + +[[deps.LLVM]] +deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"] +git-tree-sha1 = "c8d47589611803a0f3b4813d9e267cd4e3dbcefb" +uuid = "929cbde3-209d-540e-8aea-75f648917ca0" +version = "4.11.1" + +[[deps.LLVMExtra_jll]] +deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg", "TOML"] +git-tree-sha1 = "771bfe376249626d3ca12bcd58ba243d3f961576" +uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab" +version = "0.0.16+0" + +[[deps.LayoutPointers]] +deps = ["ArrayInterface", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static"] +git-tree-sha1 = "3da92dc6d4bce776cc850c18274a7508b8d752ef" +uuid = "10f19ff3-798f-405d-979b-55457f8fc047" +version = "0.1.7" + +[[deps.LazyArtifacts]] +deps = ["Artifacts", "Pkg"] +uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" + +[[deps.LibCURL]] +deps = ["LibCURL_jll", "MozillaCACerts_jll"] +uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" +version = "0.6.3" + +[[deps.LibCURL_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" +version = "7.81.0+0" + +[[deps.LibGit2]] +deps = ["Base64", "NetworkOptions", "Printf", "SHA"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + +[[deps.LibSSH2_jll]] +deps = ["Artifacts", "Libdl", "MbedTLS_jll"] +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" +version = "1.10.2+0" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[deps.LinearAlgebra]] +deps = ["Libdl", "libblastrampoline_jll"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + +[[deps.LogExpFunctions]] +deps = ["ChainRulesCore", "ChangesOfVariables", "DocStringExtensions", "InverseFunctions", "IrrationalConstants", "LinearAlgebra"] +git-tree-sha1 = "09e4b894ce6a976c354a69041a04748180d43637" +uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" +version = "0.3.15" + +[[deps.Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[deps.LoopVectorization]] +deps = ["ArrayInterface", "CPUSummary", "ChainRulesCore", "CloseOpenIntervals", "DocStringExtensions", "ForwardDiff", "HostCPUFeatures", "IfElse", "LayoutPointers", "LinearAlgebra", "OffsetArrays", "PolyesterWeave", "SIMDDualNumbers", "SLEEFPirates", "SpecialFunctions", "Static", "ThreadingUtilities", "UnPack", "VectorizationBase"] +git-tree-sha1 = "4392c19f0203df81512b6790a0a67446650bdce0" +uuid = "bdcacae8-1622-11e9-2a5c-532679323890" +version = "0.12.110" + +[[deps.MacroTools]] +deps = ["Markdown", "Random"] +git-tree-sha1 = "3d3e902b31198a27340d0bf00d6ac452866021cf" +uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +version = "0.5.9" + +[[deps.ManualMemory]] +git-tree-sha1 = "bcaef4fc7a0cfe2cba636d84cda54b5e4e4ca3cd" +uuid = "d125e4d3-2237-4719-b19c-fa641b8a4667" +version = "0.1.8" + +[[deps.Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[deps.MbedTLS_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" +version = "2.28.0+0" + +[[deps.Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" + +[[deps.MozillaCACerts_jll]] +uuid = "14a3606d-f60d-562e-9121-12d972cd8159" +version = "2022.2.1" + +[[deps.NaNMath]] +git-tree-sha1 = "737a5957f387b17e74d4ad2f440eb330b39a62c5" +uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" +version = "1.0.0" + +[[deps.NetworkOptions]] +uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" +version = "1.2.0" + +[[deps.OffsetArrays]] +deps = ["Adapt"] +git-tree-sha1 = "52addd9e91df8a6a5781e5c7640787525fd48056" +uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" +version = "1.11.2" + +[[deps.OpenBLAS_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] +uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" +version = "0.3.20+0" + +[[deps.OpenLibm_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "05823500-19ac-5b8b-9628-191a04bc5112" +version = "0.8.1+0" + +[[deps.OpenSpecFun_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1" +uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" +version = "0.5.5+0" + +[[deps.Pkg]] +deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +version = "1.8.0" + +[[deps.PolyesterWeave]] +deps = ["BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "Static", "ThreadingUtilities"] +git-tree-sha1 = "7e597df97e46ffb1c8adbaddfa56908a7a20194b" +uuid = "1d0040c9-8b98-4ee7-8388-3f51789ca0ad" +version = "0.1.5" + +[[deps.Preferences]] +deps = ["TOML"] +git-tree-sha1 = "47e5f437cc0e7ef2ce8406ce1e7e24d44915f88d" +uuid = "21216c6a-2e73-6563-6e65-726566657250" +version = "1.3.0" + +[[deps.Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[deps.REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[deps.Random]] +deps = ["SHA", "Serialization"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[deps.Requires]] +deps = ["UUIDs"] +git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7" +uuid = "ae029012-a4dd-5104-9daa-d747884805df" +version = "1.3.0" + +[[deps.SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +version = "0.7.0" + +[[deps.SIMDDualNumbers]] +deps = ["ForwardDiff", "IfElse", "SLEEFPirates", "VectorizationBase"] +git-tree-sha1 = "dd4195d308df24f33fb10dde7c22103ba88887fa" +uuid = "3cdde19b-5bb0-4aaf-8931-af3e248e098b" +version = "0.1.1" + +[[deps.SIMDTypes]] +git-tree-sha1 = "330289636fb8107c5f32088d2741e9fd7a061a5c" +uuid = "94e857df-77ce-4151-89e5-788b33177be4" +version = "0.1.0" + +[[deps.SLEEFPirates]] +deps = ["IfElse", "Static", "VectorizationBase"] +git-tree-sha1 = "ac399b5b163b9140f9c310dfe9e9aaa225617ff6" +uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa" +version = "0.6.32" + +[[deps.Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[deps.SharedArrays]] +deps = ["Distributed", "Mmap", "Random", "Serialization"] +uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" + +[[deps.Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" + +[[deps.SparseArrays]] +deps = ["LinearAlgebra", "Random"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + +[[deps.SpecialFunctions]] +deps = ["ChainRulesCore", "IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"] +git-tree-sha1 = "bc40f042cfcc56230f781d92db71f0e21496dffd" +uuid = "276daf66-3868-5448-9aa4-cd146d93841b" +version = "2.1.5" + +[[deps.Static]] +deps = ["IfElse"] +git-tree-sha1 = "3a2a99b067090deb096edecec1dc291c5b4b31cb" +uuid = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" +version = "0.6.5" + +[[deps.StaticArrays]] +deps = ["LinearAlgebra", "Random", "Statistics"] +git-tree-sha1 = "cd56bf18ed715e8b09f06ef8c6b781e6cdc49911" +uuid = "90137ffa-7385-5640-81b9-e52037218182" +version = "1.4.4" + +[[deps.StaticTools]] +deps = ["ManualMemory"] +git-tree-sha1 = "45569bece43af73ba51cbdac422e77abd7adba9f" +uuid = "86c06d3c-3f03-46de-9781-57580aa96d0a" +version = "0.3.0" + +[[deps.Statistics]] +deps = ["LinearAlgebra", "SparseArrays"] +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[[deps.StrideArraysCore]] +deps = ["ArrayInterface", "CloseOpenIntervals", "IfElse", "LayoutPointers", "ManualMemory", "Requires", "SIMDTypes", "Static", "ThreadingUtilities"] +git-tree-sha1 = "e03eacc0b8c1520e73aa84922ce44a14f024b210" +uuid = "7792a7ef-975c-4747-a70f-980b88e8d1da" +version = "0.3.6" + +[[deps.TOML]] +deps = ["Dates"] +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" +version = "1.0.0" + +[[deps.Tar]] +deps = ["ArgTools", "SHA"] +uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" +version = "1.10.0" + +[[deps.Test]] +deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[[deps.ThreadingUtilities]] +deps = ["ManualMemory"] +git-tree-sha1 = "f8629df51cab659d70d2e5618a430b4d3f37f2c3" +uuid = "8290d209-cae3-49c0-8002-c8c24d57dab5" +version = "0.5.0" + +[[deps.UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + +[[deps.UnPack]] +git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b" +uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" +version = "1.0.2" + +[[deps.Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[deps.VectorizationBase]] +deps = ["ArrayInterface", "CPUSummary", "HostCPUFeatures", "IfElse", "LayoutPointers", "Libdl", "LinearAlgebra", "SIMDTypes", "Static"] +git-tree-sha1 = "c95d242ade2d67c1510ce52d107cfca7a83e0b4e" +uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" +version = "0.21.33" + +[[deps.Zlib_jll]] +deps = ["Libdl"] +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.12+1" + +[[deps.libblastrampoline_jll]] +deps = ["Artifacts", "Libdl", "OpenBLAS_jll"] +uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" +version = "5.1.0+0" + +[[deps.nghttp2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" +version = "1.41.0+1" + +[[deps.p7zip_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" +version = "16.2.1+1" diff --git a/test/Project.toml b/test/Project.toml index b0cf1dc..b95846e 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,10 +1,11 @@ [deps] +Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" ManualMemory = "d125e4d3-2237-4719-b19c-fa641b8a4667" +StaticTools = "86c06d3c-3f03-46de-9781-57580aa96d0a" StrideArraysCore = "7792a7ef-975c-4747-a70f-980b88e8d1da" -Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" \ No newline at end of file +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/test/runtests.jl b/test/runtests.jl index 831a3a2..6b19c41 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,309 +1,396 @@ -using StaticCompiler using Test -using Libdl -using LinearAlgebra -using LoopVectorization -using ManualMemory -using Distributed -using StrideArraysCore -addprocs(1) -@everywhere using StaticCompiler, StrideArraysCore +using StaticCompiler +const GROUP = get(ENV, "GROUP", "All") + +@static if GROUP == "Core" || GROUP == "All" + using Libdl + using LinearAlgebra + using LoopVectorization + using ManualMemory + using Distributed + using StrideArraysCore + addprocs(1) + @everywhere using StaticCompiler, StrideArraysCore -remote_load_call(path, args...) = fetch(@spawnat 2 load_function(path)(args...)) + remote_load_call(path, args...) = fetch(@spawnat 2 load_function(path)(args...)) -@testset "Basics" begin + @testset "Basics" begin - simple_sum(x) = x + one(typeof(x)) + simple_sum(x) = x + one(typeof(x)) - # This probably needs a macro - for T ∈ (Int, Float64, Int32, Float32, Int16, Float16) - _, path, = compile(simple_sum, (T,)) - @test remote_load_call(path, T(1)) == T(2) + # This probably needs a macro + for T ∈ (Int, Float64, Int32, Float32, Int16, Float16) + _, path, = compile(simple_sum, (T,)) + @test remote_load_call(path, T(1)) == T(2) + end end -end -fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) # This needs to be defined globally due to https://github.com/JuliaLang/julia/issues/40990 + fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) # This needs to be defined globally due to https://github.com/JuliaLang/julia/issues/40990 -@testset "Recursion" begin - _, path = compile(fib, (Int,)) - @test remote_load_call(path, 10) == fib(10) + @testset "Recursion" begin + _, path = compile(fib, (Int,)) + @test remote_load_call(path, 10) == fib(10) - # Trick to work around #40990 - _fib2(_fib2, n) = n <= 1 ? n : _fib2(_fib2, n-1) + _fib2(_fib2, n-2) - fib2(n) = _fib2(_fib2, n) + # Trick to work around #40990 + _fib2(_fib2, n) = n <= 1 ? n : _fib2(_fib2, n-1) + _fib2(_fib2, n-2) + fib2(n) = _fib2(_fib2, n) - _, path = compile(fib2, (Int,)) - @test remote_load_call(path, 20) == fib(20) - #@test compile(fib2, (Int,))[1](20) == fib(20) -end + _, path = compile(fib2, (Int,)) + @test remote_load_call(path, 20) == fib(20) + #@test compile(fib2, (Int,))[1](20) == fib(20) + end -@testset "Loops" begin - function sum_first_N_int(N) - s = 0 - for a in 1:N - s += a + @testset "Loops" begin + function sum_first_N_int(N) + s = 0 + for a in 1:N + s += a + end + s end - s - end - _, path = compile(sum_first_N_int, (Int,)) - @test remote_load_call(path, 10) == 55 + _, path = compile(sum_first_N_int, (Int,)) + @test remote_load_call(path, 10) == 55 - function sum_first_N_float64(N) - s = Float64(0) - for a in 1:N - s += Float64(a) + function sum_first_N_float64(N) + s = Float64(0) + for a in 1:N + s += Float64(a) + end + s end - s - end - _, path = compile(sum_first_N_float64, (Int,)) - @test remote_load_call(path, 10) == 55. + _, path = compile(sum_first_N_float64, (Int,)) + @test remote_load_call(path, 10) == 55. - function sum_first_N_int_inbounds(N) - s = 0 - @inbounds for a in 1:N - s += a + function sum_first_N_int_inbounds(N) + s = 0 + @inbounds for a in 1:N + s += a + end + s end - s - end - _, path = compile(sum_first_N_int_inbounds, (Int,)) - @test remote_load_call(path, 10) == 55 + _, path = compile(sum_first_N_int_inbounds, (Int,)) + @test remote_load_call(path, 10) == 55 - function sum_first_N_float64_inbounds(N) - s = Float64(0) - @inbounds for a in 1:N - s += Float64(a) + function sum_first_N_float64_inbounds(N) + s = Float64(0) + @inbounds for a in 1:N + s += Float64(a) + end + s end - s + _, path = compile(sum_first_N_float64_inbounds, (Int,)) + @test remote_load_call(path, 10) == 55. end - _, path = compile(sum_first_N_float64_inbounds, (Int,)) - @test remote_load_call(path, 10) == 55. -end -# Arrays with different input types Int32, Int64, Float32, Float64, Complex? -@testset "Arrays" begin - function array_sum(n, A) - s = zero(eltype(A)) - for i in 1:n - s += A[i] + # Arrays with different input types Int32, Int64, Float32, Float64, Complex? + @testset "Arrays" begin + function array_sum(n, A) + s = zero(eltype(A)) + for i in 1:n + s += A[i] + end + s + end + for T ∈ (Int, Complex{Float32}, Complex{Float64}) + _, path = compile(array_sum, (Int, Vector{T})) + @test remote_load_call(path, 10, T.(1:10)) == T(55) end - s - end - for T ∈ (Int, Complex{Float32}, Complex{Float64}) - _, path = compile(array_sum, (Int, Vector{T})) - @test remote_load_call(path, 10, T.(1:10)) == T(55) end -end -@testset "Array allocations" begin - function f(N) - v = Vector{Float64}(undef, N) - for i ∈ eachindex(v) - v[i] = i*i + @testset "Array allocations" begin + function f(N) + v = Vector{Float64}(undef, N) + for i ∈ eachindex(v) + v[i] = i*i + end + v end - v + _, path = compile(f, (Int,)) + @test remote_load_call(path, 5) == [1.0, 4.0, 9.0, 16.0, 25.0] end - _, path = compile(f, (Int,)) - @test remote_load_call(path, 5) == [1.0, 4.0, 9.0, 16.0, 25.0] -end -# This is also a good test of loading and storing from the same object -@testset "Load & Store Same object" begin - global const x = Ref(0) - counter() = x[] += 1 - _, path = compile(counter, ()) - @spawnat 2 global counter = load_function(path) - @test fetch(@spawnat 2 counter()) == 1 - @test fetch(@spawnat 2 counter()) == 2 -end - -# This is also a good test of loading and storing from the same object -counter = let x = Ref(0) - () -> x[] += 1 -end -@testset "Closures" begin - #this currently segfaults during compilation - @test_skip begin + # This is also a good test of loading and storing from the same object + @testset "Load & Store Same object" begin + global const x = Ref(0) + counter() = x[] += 1 _, path = compile(counter, ()) - @spawnat 2 global counter_comp = load_function(path) - @test fetch(@spawnat 2 counter_comp()) == 1 - @test fetch(@spawnat 2 counter_comp()) == 2 + @spawnat 2 global counter = load_function(path) + @test fetch(@spawnat 2 counter()) == 1 + @test fetch(@spawnat 2 counter()) == 2 end -end - -@testset "Error handling" begin - _, path = compile(sqrt, (Int,)) - tsk = @spawnat 2 begin - try - load_function(path)(-1) - catch e; - e + # This is also a good test of loading and storing from the same object + counter = let x = Ref(0) + () -> x[] += 1 + end + @testset "Closures" begin + #this currently segfaults during compilation + @test_skip begin + _, path = compile(counter, ()) + @spawnat 2 global counter_comp = load_function(path) + @test fetch(@spawnat 2 counter_comp()) == 1 + @test fetch(@spawnat 2 counter_comp()) == 2 end end - @test fetch(tsk) isa DomainError -end -# Julia wants to treat Tuple (and other things like it) as plain bits, but LLVM wants to treat it as something with a pointer. -# We need to be careful to not send, nor receive an unwrapped Tuple to a compiled function. -# The interface made in `compile` should handle this fine. -@testset "Send and receive Tuple" begin - foo(u::Tuple) = 2 .* reverse(u) .- 1 - _, path = compile(foo, (NTuple{3, Int},)) - @test remote_load_call(path, (1, 2, 3)) == (5, 3, 1) -end + @testset "Error handling" begin + _, path = compile(sqrt, (Int,)) + tsk = @spawnat 2 begin + try + load_function(path)(-1) + catch e; + e + end + end + @test fetch(tsk) isa DomainError + end + # Julia wants to treat Tuple (and other things like it) as plain bits, but LLVM wants to treat it as something with a pointer. + # We need to be careful to not send, nor receive an unwrapped Tuple to a compiled function. + # The interface made in `compile` should handle this fine. + @testset "Send and receive Tuple" begin + foo(u::Tuple) = 2 .* reverse(u) .- 1 -# Just to call external libraries -@testset "BLAS" begin - function mydot(a::Vector{Float64}) - N = length(a) - BLAS.dot(N, a, 1, a, 1) - end - a = [1.0, 2.0] - - mydot_compiled, path = compile(mydot, (Vector{Float64},)) - # Works locally for me, but not on CI. Need some improvements to pointer relocation to be robust. - @test_skip remote_load_call(path, a) == 5.0 - @test mydot_compiled(a) ≈ 5.0 - - # This will need some more work apparently - @test_skip begin - _, path = compile((*), (Matrix{Float64}, Matrix{Float64})) - A, B = rand(10, 11), rand(11, 12) - @test remote_load_call(path, A, B) ≈ A * B + _, path = compile(foo, (NTuple{3, Int},)) + @test remote_load_call(path, (1, 2, 3)) == (5, 3, 1) end -end -@testset "Strings" begin - function hello(name) - "Hello, " * name * "!" + # Just to call external libraries + @testset "BLAS" begin + function mydot(a::Vector{Float64}) + N = length(a) + BLAS.dot(N, a, 1, a, 1) + end + a = [1.0, 2.0] + + mydot_compiled, path = compile(mydot, (Vector{Float64},)) + # Works locally for me, but not on CI. Need some improvements to pointer relocation to be robust. + @test_skip remote_load_call(path, a) == 5.0 + @test mydot_compiled(a) ≈ 5.0 + + # This will need some more work apparently + @test_skip begin + _, path = compile((*), (Matrix{Float64}, Matrix{Float64})) + A, B = rand(10, 11), rand(11, 12) + @test remote_load_call(path, A, B) ≈ A * B + end end - hello_compiled, path = compile(hello, (String,)) - @test remote_load_call(path, "world") == "Hello, world!" -end -@testset "Hello World" begin - function hello(N) - println("Hello World $N") - N - end - # We'll need to be able to relocate a bunch of UV stuff for this, and deal with dynamic dispatch. - @test_skip begin - hello_compiled, path = compile(hello, (Int,)) - @test_skip remote_load_call(path, 1) == 1 - end -end -# This is a trick to get stack allocated arrays inside a function body (so long as they don't escape). -# This lets us have intermediate, mutable stack allocated arrays inside our -@testset "Alloca" begin - function f(N) - # this can hold at most 100 Int values, if you use it for more, you'll segfault - buf = ManualMemory.MemoryBuffer{100, Int}(undef) - GC.@preserve buf begin - # wrap the first N values in a PtrArray - arr = PtrArray(pointer(buf), (N,)) - arr .= 1 # mutate the array to be all 1s - sum(arr) # compute the sum. It is very imporatant that no references to arr escape the function body + @testset "Strings" begin + function hello(name) + "Hello, " * name * "!" end + hello_compiled, path = compile(hello, (String,)) + @test remote_load_call(path, "world") == "Hello, world!" end - _, path = compile(f, (Int,)) - @test remote_load_call(path, 20) == 20 -end -# I can't beleive this works. -@testset "LoopVectorization" begin - function mul!(C, A, B) - # note: @tturbo does NOT work - @turbo for n ∈ indices((C,B), 2), m ∈ indices((C,A), 1) - Cmn = zero(eltype(C)) - for k ∈ indices((A,B), (2,1)) - Cmn += A[m,k] * B[k,n] - end - C[m,n] = Cmn + @testset "Hello World" begin + function hello(N) + println("Hello World $N") + N + end + # We'll need to be able to relocate a bunch of UV stuff for this, and deal with dynamic dispatch. + @test_skip begin + hello_compiled, path = compile(hello, (Int,)) + @test_skip remote_load_call(path, 1) == 1 end end - C = Array{Float64}(undef, 10, 12) - A = rand(10, 11) - B = rand(11, 12) - - _, path = compile(mul!, (Matrix{Float64}, Matrix{Float64}, Matrix{Float64},)) - # remote_load_call(path, C, A, B) This won't work because @spawnat copies C - C .= fetch(@spawnat 2 (load_function(path)(C, A, B); C)) - @test C ≈ A*B -end - -@testset "Standalone Dylibs" begin - # Test function - # (already defined) - # fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) - - #Compile dylib - name = repr(fib) - filepath = compile_shlib(fib, (Int,), "./", name) - @test occursin("fib.$(Libdl.dlext)", filepath) - - # Open dylib - ptr = Libdl.dlopen(filepath, Libdl.RTLD_LOCAL) - fptr = Libdl.dlsym(ptr, "julia_$name") - @test fptr != C_NULL - @test ccall(fptr, Int, (Int,), 10) == 55 -end - - -@testset "Standalone Executables" begin - # Minimal test with no `llvmcall` - @inline function foo() - v = 0.0 - n = 1000 - for i=1:n - v += sqrt(n) + # This is a trick to get stack allocated arrays inside a function body (so long as they don't escape). + # This lets us have intermediate, mutable stack allocated arrays inside our + @testset "Alloca" begin + function f(N) + # this can hold at most 100 Int values, if you use it for more, you'll segfault + buf = ManualMemory.MemoryBuffer{100, Int}(undef) + GC.@preserve buf begin + # wrap the first N values in a PtrArray + arr = PtrArray(pointer(buf), (N,)) + arr .= 1 # mutate the array to be all 1s + sum(arr) # compute the sum. It is very imporatant that no references to arr escape the function body + end end - return 0 + _, path = compile(f, (Int,)) + @test remote_load_call(path, 20) == 20 end - filepath = compile_executable(foo, (), tempdir()) + # I can't beleive this works. + @testset "LoopVectorization" begin + function mul!(C, A, B) + # note: @tturbo does NOT work + @turbo for n ∈ indices((C,B), 2), m ∈ indices((C,A), 1) + Cmn = zero(eltype(C)) + for k ∈ indices((A,B), (2,1)) + Cmn += A[m,k] * B[k,n] + end + C[m,n] = Cmn + end + end - r = run(`$filepath`); - @test isa(r, Base.Process) - @test r.exitcode == 0 + C = Array{Float64}(undef, 10, 12) + A = rand(10, 11) + B = rand(11, 12) - @static if VERSION>v"1.8.0-DEV" # The llvmcall here only works on 1.8+ - @inline function puts(s::Ptr{UInt8}) # Can't use Base.println because it allocates - Base.llvmcall((""" - ; External declaration of the puts function - declare i32 @puts(i8* nocapture) nounwind + _, path = compile(mul!, (Matrix{Float64}, Matrix{Float64}, Matrix{Float64},)) + # remote_load_call(path, C, A, B) This won't work because @spawnat copies C + C .= fetch(@spawnat 2 (load_function(path)(C, A, B); C)) + @test C ≈ A*B + end - define i32 @main(i8*) { - entry: - %call = call i32 (i8*) @puts(i8* %0) - ret i32 0 - } - """, "main"), Int32, Tuple{Ptr{UInt8}}, s) - end + @testset "Standalone Dylibs" begin + # Test function + # (already defined) + # fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) + + #Compile dylib + name = repr(fib) + filepath = compile_shlib(fib, (Int,), "./", name) + @test occursin("fib.$(Libdl.dlext)", filepath) + + # Open dylib + ptr = Libdl.dlopen(filepath, Libdl.RTLD_LOCAL) + fptr = Libdl.dlsym(ptr, "julia_$name") + @test fptr != C_NULL + @test ccall(fptr, Int, (Int,), 10) == 55 + end - @inline function print_args(argc::Int, argv::Ptr{Ptr{UInt8}}) - for i=1:argc - # Get pointer - p = unsafe_load(argv, i) - # Print string at pointer location (which fortunately already exists isn't tracked by the GC) - puts(p) + @testset "Standalone Executables" begin + # Minimal test with no `llvmcall` + @inline function foo() + v = 0.0 + n = 1000 + for i=1:n + v += sqrt(n) end return 0 end - filepath = compile_executable(print_args, (Int, Ptr{Ptr{UInt8}}), tempdir()) + filepath = compile_executable(foo, (), tempdir()) - r = run(`$filepath Hello, world!`); + r = run(`$filepath`); @test isa(r, Base.Process) @test r.exitcode == 0 + + @static if VERSION>v"1.8.0-DEV" # The llvmcall here only works on 1.8+ + @inline function _puts(s::Ptr{UInt8}) # Can't use Base.println because it allocates + Base.llvmcall((""" + ; External declaration of the puts function + declare i32 @puts(i8* nocapture) nounwind + + define i32 @main(i8*) { + entry: + %call = call i32 (i8*) @puts(i8* %0) + ret i32 0 + } + """, "main"), Int32, Tuple{Ptr{UInt8}}, s) + end + + @inline function print_args(argc::Int, argv::Ptr{Ptr{UInt8}}) + for i=1:argc + # Get pointer + p = unsafe_load(argv, i) + # Print string at pointer location (which fortunately already exists isn't tracked by the GC) + _puts(p) + end + return 0 + end + + filepath = compile_executable(print_args, (Int, Ptr{Ptr{UInt8}}), tempdir()) + + r = run(`$filepath Hello, world!`); + @test isa(r, Base.Process) + @test r.exitcode == 0 + end end + end +@static if GROUP == "Integration" || GROUP == "All" + using LoopVectorization + using StaticTools + + @testset "Standalone Executable Integration" begin + # Setup + testpath = pwd() + scratch = tempdir() + cd(scratch) + + # --- Times table, file IO, mallocarray + + # Compile... + # We have to start a new Julia process to get around the fact that Pkg.test + # disables `@inbounds`, but ironically we can use `--compile=min` to make that + # faster. + status = run(`julia --compile=min $testpath/scripts/times_table.jl`) + @test isa(status, Base.Process) + @test status.exitcode == 0 + + # Attempt to run + println("5x5 times table:") + status = run(`./times_table 5 5`) + @test isa(status, Base.Process) + @test status.exitcode == 0 + @test parsedlm(Int64, c"table.tsv", '\t') == (1:5)*(1:5)' + + # --- Random number generation + + # Compile... + status = run(`julia --compile=min $testpath/scripts/rand_matrix.jl`) + @test isa(status, Base.Process) + @test status.exitcode == 0 + + # Run... + println("5x5 random matrix:") + status = run(`./rand_matrix 5 5`) + @test isa(status, Base.Process) + @test status.exitcode == 0 + + # --- Test LoopVectorization integration + + # Compile... + status = run(`julia --compile=min $testpath/scripts/loopvec_product.jl`) + @test isa(status, Base.Process) + @test status.exitcode == 0 + + # Run... + println("10x10 table sum:") + status = run(`./loopvec_product 10 10`) + @test isa(status, Base.Process) + @test status.exitcode == 0 + @test parsedlm(c"product.tsv",'\t')[] == 3025 + + # Compile... + status = run(`julia --compile=min $testpath/scripts/loopvec_matrix.jl`) + @test isa(status, Base.Process) + @test status.exitcode == 0 + + # Run... + println("10x3 matrix product:") + status = run(`./loopvec_matrix 10 3`) + @test isa(status, Base.Process) + @test status.exitcode == 0 + A = (1:10) * (1:3)' + @test parsedlm(c"table.tsv",'\t') == A' * A + + # --- Test string handling + + # Compile... + status = run(`julia --compile=min $testpath/scripts/print_args.jl`) + @test isa(status, Base.Process) + @test status.exitcode == 0 + + # Run... + println("String indexing and handling:") + status = run(`./print_args foo bar`) + @test isa(status, Base.Process) + @test status.exitcode == 0 + + # --- Clean up + cd(testpath) -# data structures, dictionaries, tuples, named tuples + end +end diff --git a/test/scripts/loopvec_matrix.jl b/test/scripts/loopvec_matrix.jl new file mode 100644 index 0000000..5477dd3 --- /dev/null +++ b/test/scripts/loopvec_matrix.jl @@ -0,0 +1,54 @@ +using StaticCompiler +using StaticTools +using LoopVectorization + +@inline function mul!(C::MallocArray, A::MallocArray, B::MallocArray) + @turbo for n ∈ indices((C,B), 2), m ∈ indices((C,A), 1) + Cmn = zero(eltype(C)) + for k ∈ indices((A,B), (2,1)) + Cmn += A[m,k] * B[k,n] + end + C[m,n] = Cmn + end + return C +end + +function loopvec_matrix(argc::Int, argv::Ptr{Ptr{UInt8}}) + argc == 3 || return printf(stderrp(), c"Incorrect number of command-line arguments\n") + rows = parse(Int64, argv, 2) # First command-line argument + cols = parse(Int64, argv, 3) # Second command-line argument + + # LHS + A = MallocArray{Float64}(undef, rows, cols) + @turbo for i ∈ axes(A, 1) + for j ∈ axes(A, 2) + A[i,j] = i*j + end + end + + # RHS + B = MallocArray{Float64}(undef, cols, rows) + @turbo for i ∈ axes(B, 1) + for j ∈ axes(B, 2) + B[i,j] = i*j + end + end + + # # Matrix multiplication + C = MallocArray{Float64}(undef, cols, cols) + mul!(C, B, A) + + # Print to stdout + printf(C) + # Also print to file + fp = fopen(c"table.tsv",c"w") + printf(fp, C) + fclose(fp) + # Clean up matrices + free(A) + free(B) + free(C) +end + +# Attempt to compile +path = compile_executable(loopvec_matrix, (Int64, Ptr{Ptr{UInt8}}), "./") diff --git a/test/scripts/loopvec_product.jl b/test/scripts/loopvec_product.jl new file mode 100644 index 0000000..bffa69c --- /dev/null +++ b/test/scripts/loopvec_product.jl @@ -0,0 +1,25 @@ +using StaticCompiler +using StaticTools +using LoopVectorization + +function loopvec_product(argc::Int, argv::Ptr{Ptr{UInt8}}) + argc == 3 || return printf(stderrp(), c"Incorrect number of command-line arguments\n") + rows = parse(Int64, argv, 2) # First command-line argument + cols = parse(Int64, argv, 3) # Second command-line argument + + s = 0 + @turbo for i=1:rows + for j=1:cols + s += i*j + end + end + # Print result to stdout + printf(s) + # Also print to file + fp = fopen(c"product.tsv",c"w") + printf(fp, s) + fclose(fp) +end + +# Attempt to compile +path = compile_executable(loopvec_product, (Int64, Ptr{Ptr{UInt8}}), "./") diff --git a/test/scripts/print_args.jl b/test/scripts/print_args.jl new file mode 100644 index 0000000..c3d05fb --- /dev/null +++ b/test/scripts/print_args.jl @@ -0,0 +1,33 @@ +using StaticCompiler +using StaticTools + +function print_args(argc::Int, argv::Ptr{Ptr{UInt8}}) + printf(c"Argument count is %d:\n", argc) + for i=1:argc + # iᵗʰ input argument string + pᵢ = unsafe_load(argv, i) # Get pointer + strᵢ = MallocString(pᵢ) # Can wrap to get high-level interface + println(strᵢ) + # No need to `free` since we didn't allocate this memory + end + newline() + println(c"Testing string indexing and substitution") + m = m"Hello world!" + println(m[1:5]) + println(m) + m[7:11] = c"there" + println(m) + free(m) + + s = m"Hello world!" + println(s[1:5]) + println(s) + s[7:11] = c"there" + println(s) + + println(c"That was fun, see you next time!") + return 0 +end + +# Attempt to compile +path = compile_executable(print_args, (Int64, Ptr{Ptr{UInt8}}), "./") diff --git a/test/scripts/rand_matrix.jl b/test/scripts/rand_matrix.jl new file mode 100644 index 0000000..ce9fe46 --- /dev/null +++ b/test/scripts/rand_matrix.jl @@ -0,0 +1,21 @@ +using StaticCompiler +using StaticTools + +function rand_matrix(argc::Int, argv::Ptr{Ptr{UInt8}}) + argc == 3 || return printf(stderrp(), c"Incorrect number of command-line arguments\n") + rows = parse(Int64, argv, 2) # First command-line argument + cols = parse(Int64, argv, 3) # Second command-line argument + + M = MallocArray{Float64}(undef, rows, cols) + rng = static_rng() + @inbounds for i=1:rows + for j=1:cols + M[i,j] = rand(rng) + end + end + printf(M) + free(M) +end + +# Attempt to compile +path = compile_executable(rand_matrix, (Int64, Ptr{Ptr{UInt8}}), "./") diff --git a/test/scripts/times_table.jl b/test/scripts/times_table.jl new file mode 100644 index 0000000..2dab393 --- /dev/null +++ b/test/scripts/times_table.jl @@ -0,0 +1,26 @@ +using StaticCompiler +using StaticTools + +function times_table(argc::Int, argv::Ptr{Ptr{UInt8}}) + argc == 3 || return printf(stderrp(), c"Incorrect number of command-line arguments\n") + rows = parse(Int64, argv, 2) # First command-line argument + cols = parse(Int64, argv, 3) # Second command-line argument + + M = MallocArray{Int64}(undef, rows, cols) + @inbounds for i=1:rows + for j=1:cols + M[i,j] = i*j + end + end + # Print to stdout + printf(M) + # Also print to file + fp = fopen(c"table.tsv",c"w") + printf(fp, M) + fclose(fp) + # Clean up matrix + free(M) +end + +# Attempt to compile +path = compile_executable(times_table, (Int64, Ptr{Ptr{UInt8}}), "./") From a20c6fec2946059fa399476ba2b60024b9745215 Mon Sep 17 00:00:00 2001 From: "C.Brenhin Keller" Date: Sun, 22 May 2022 02:01:01 -0400 Subject: [PATCH 069/159] Reorganize tests, clean up --- .gitignore | 1 + Project.toml | 7 - codecov.yml | 12 ++ test/Manifest.toml | 8 +- test/Project.toml | 1 - test/runtests.jl | 399 ++-------------------------------------- test/testcore.jl | 294 +++++++++++++++++++++++++++++ test/testintegration.jl | 80 ++++++++ 8 files changed, 400 insertions(+), 402 deletions(-) create mode 100644 codecov.yml create mode 100644 test/testcore.jl create mode 100644 test/testintegration.jl diff --git a/.gitignore b/.gitignore index 6e716b0..872875b 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ /dev/ /test/standalone /test/test.* +Manifest.toml test.o test.so diff --git a/Project.toml b/Project.toml index 2c8d290..467c82d 100644 --- a/Project.toml +++ b/Project.toml @@ -14,10 +14,3 @@ Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" GPUCompiler = "0.13" LLVM = "4.8" julia = "1.7" - -[extras] -Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[targets] -test = ["Test", "Formatting"] diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..3ddada3 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,12 @@ +comment: false + +coverage: + status: + project: + default: + threshold: 1% + target: 1% + patch: + default: + threshold: 1% + target: 1% diff --git a/test/Manifest.toml b/test/Manifest.toml index 2031c1d..067ac97 100644 --- a/test/Manifest.toml +++ b/test/Manifest.toml @@ -2,7 +2,7 @@ julia_version = "1.8.0-beta3" manifest_format = "2.0" -project_hash = "429ef1d09132391cc32bc872843d4edc99133ef5" +project_hash = "45548a8fc84942c91e0e727fe14ec23674d9246e" [[deps.Adapt]] deps = ["LinearAlgebra"] @@ -122,12 +122,6 @@ version = "1.6.0" [[deps.FileWatching]] uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" -[[deps.Formatting]] -deps = ["Printf"] -git-tree-sha1 = "8339d61043228fdd3eb658d86c926cb282ae72a8" -uuid = "59287772-0a20-5a39-b81b-1366585eb4c0" -version = "0.4.2" - [[deps.ForwardDiff]] deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "LogExpFunctions", "NaNMath", "Preferences", "Printf", "Random", "SpecialFunctions", "StaticArrays"] git-tree-sha1 = "2f18915445b248731ec5db4e4a17e451020bf21e" diff --git a/test/Project.toml b/test/Project.toml index b95846e..cbcf73f 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,6 +1,5 @@ [deps] Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" -Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" diff --git a/test/runtests.jl b/test/runtests.jl index 6b19c41..67838d9 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,396 +1,21 @@ using Test using StaticCompiler +using Libdl +using LinearAlgebra +using LoopVectorization +using ManualMemory +using Distributed +using StaticTools +using StrideArraysCore +addprocs(1) +@everywhere using StaticCompiler, StrideArraysCore + const GROUP = get(ENV, "GROUP", "All") @static if GROUP == "Core" || GROUP == "All" - using Libdl - using LinearAlgebra - using LoopVectorization - using ManualMemory - using Distributed - using StrideArraysCore - addprocs(1) - @everywhere using StaticCompiler, StrideArraysCore - - remote_load_call(path, args...) = fetch(@spawnat 2 load_function(path)(args...)) - - @testset "Basics" begin - - simple_sum(x) = x + one(typeof(x)) - - # This probably needs a macro - for T ∈ (Int, Float64, Int32, Float32, Int16, Float16) - _, path, = compile(simple_sum, (T,)) - @test remote_load_call(path, T(1)) == T(2) - end - end - - - fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) # This needs to be defined globally due to https://github.com/JuliaLang/julia/issues/40990 - - @testset "Recursion" begin - _, path = compile(fib, (Int,)) - @test remote_load_call(path, 10) == fib(10) - - # Trick to work around #40990 - _fib2(_fib2, n) = n <= 1 ? n : _fib2(_fib2, n-1) + _fib2(_fib2, n-2) - fib2(n) = _fib2(_fib2, n) - - _, path = compile(fib2, (Int,)) - @test remote_load_call(path, 20) == fib(20) - #@test compile(fib2, (Int,))[1](20) == fib(20) - end - - - @testset "Loops" begin - function sum_first_N_int(N) - s = 0 - for a in 1:N - s += a - end - s - end - _, path = compile(sum_first_N_int, (Int,)) - @test remote_load_call(path, 10) == 55 - - function sum_first_N_float64(N) - s = Float64(0) - for a in 1:N - s += Float64(a) - end - s - end - _, path = compile(sum_first_N_float64, (Int,)) - @test remote_load_call(path, 10) == 55. - - function sum_first_N_int_inbounds(N) - s = 0 - @inbounds for a in 1:N - s += a - end - s - end - _, path = compile(sum_first_N_int_inbounds, (Int,)) - @test remote_load_call(path, 10) == 55 - - function sum_first_N_float64_inbounds(N) - s = Float64(0) - @inbounds for a in 1:N - s += Float64(a) - end - s - end - _, path = compile(sum_first_N_float64_inbounds, (Int,)) - @test remote_load_call(path, 10) == 55. - end - - # Arrays with different input types Int32, Int64, Float32, Float64, Complex? - @testset "Arrays" begin - function array_sum(n, A) - s = zero(eltype(A)) - for i in 1:n - s += A[i] - end - s - end - for T ∈ (Int, Complex{Float32}, Complex{Float64}) - _, path = compile(array_sum, (Int, Vector{T})) - @test remote_load_call(path, 10, T.(1:10)) == T(55) - end - end - - @testset "Array allocations" begin - function f(N) - v = Vector{Float64}(undef, N) - for i ∈ eachindex(v) - v[i] = i*i - end - v - end - _, path = compile(f, (Int,)) - @test remote_load_call(path, 5) == [1.0, 4.0, 9.0, 16.0, 25.0] - end - - # This is also a good test of loading and storing from the same object - @testset "Load & Store Same object" begin - global const x = Ref(0) - counter() = x[] += 1 - _, path = compile(counter, ()) - @spawnat 2 global counter = load_function(path) - @test fetch(@spawnat 2 counter()) == 1 - @test fetch(@spawnat 2 counter()) == 2 - end - - # This is also a good test of loading and storing from the same object - counter = let x = Ref(0) - () -> x[] += 1 - end - @testset "Closures" begin - #this currently segfaults during compilation - @test_skip begin - _, path = compile(counter, ()) - @spawnat 2 global counter_comp = load_function(path) - @test fetch(@spawnat 2 counter_comp()) == 1 - @test fetch(@spawnat 2 counter_comp()) == 2 - end - end - - - @testset "Error handling" begin - _, path = compile(sqrt, (Int,)) - tsk = @spawnat 2 begin - try - load_function(path)(-1) - catch e; - e - end - end - @test fetch(tsk) isa DomainError - end - - # Julia wants to treat Tuple (and other things like it) as plain bits, but LLVM wants to treat it as something with a pointer. - # We need to be careful to not send, nor receive an unwrapped Tuple to a compiled function. - # The interface made in `compile` should handle this fine. - @testset "Send and receive Tuple" begin - foo(u::Tuple) = 2 .* reverse(u) .- 1 - - _, path = compile(foo, (NTuple{3, Int},)) - @test remote_load_call(path, (1, 2, 3)) == (5, 3, 1) - end - - - # Just to call external libraries - @testset "BLAS" begin - function mydot(a::Vector{Float64}) - N = length(a) - BLAS.dot(N, a, 1, a, 1) - end - a = [1.0, 2.0] - - mydot_compiled, path = compile(mydot, (Vector{Float64},)) - # Works locally for me, but not on CI. Need some improvements to pointer relocation to be robust. - @test_skip remote_load_call(path, a) == 5.0 - @test mydot_compiled(a) ≈ 5.0 - - # This will need some more work apparently - @test_skip begin - _, path = compile((*), (Matrix{Float64}, Matrix{Float64})) - A, B = rand(10, 11), rand(11, 12) - @test remote_load_call(path, A, B) ≈ A * B - end - end - - - @testset "Strings" begin - function hello(name) - "Hello, " * name * "!" - end - hello_compiled, path = compile(hello, (String,)) - @test remote_load_call(path, "world") == "Hello, world!" - end - - @testset "Hello World" begin - function hello(N) - println("Hello World $N") - N - end - # We'll need to be able to relocate a bunch of UV stuff for this, and deal with dynamic dispatch. - @test_skip begin - hello_compiled, path = compile(hello, (Int,)) - @test_skip remote_load_call(path, 1) == 1 - end - end - - # This is a trick to get stack allocated arrays inside a function body (so long as they don't escape). - # This lets us have intermediate, mutable stack allocated arrays inside our - @testset "Alloca" begin - function f(N) - # this can hold at most 100 Int values, if you use it for more, you'll segfault - buf = ManualMemory.MemoryBuffer{100, Int}(undef) - GC.@preserve buf begin - # wrap the first N values in a PtrArray - arr = PtrArray(pointer(buf), (N,)) - arr .= 1 # mutate the array to be all 1s - sum(arr) # compute the sum. It is very imporatant that no references to arr escape the function body - end - end - _, path = compile(f, (Int,)) - @test remote_load_call(path, 20) == 20 - end - - # I can't beleive this works. - @testset "LoopVectorization" begin - function mul!(C, A, B) - # note: @tturbo does NOT work - @turbo for n ∈ indices((C,B), 2), m ∈ indices((C,A), 1) - Cmn = zero(eltype(C)) - for k ∈ indices((A,B), (2,1)) - Cmn += A[m,k] * B[k,n] - end - C[m,n] = Cmn - end - end - - C = Array{Float64}(undef, 10, 12) - A = rand(10, 11) - B = rand(11, 12) - - _, path = compile(mul!, (Matrix{Float64}, Matrix{Float64}, Matrix{Float64},)) - # remote_load_call(path, C, A, B) This won't work because @spawnat copies C - C .= fetch(@spawnat 2 (load_function(path)(C, A, B); C)) - @test C ≈ A*B - end - - @testset "Standalone Dylibs" begin - # Test function - # (already defined) - # fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) - - #Compile dylib - name = repr(fib) - filepath = compile_shlib(fib, (Int,), "./", name) - @test occursin("fib.$(Libdl.dlext)", filepath) - - # Open dylib - ptr = Libdl.dlopen(filepath, Libdl.RTLD_LOCAL) - fptr = Libdl.dlsym(ptr, "julia_$name") - @test fptr != C_NULL - @test ccall(fptr, Int, (Int,), 10) == 55 - end - - @testset "Standalone Executables" begin - # Minimal test with no `llvmcall` - @inline function foo() - v = 0.0 - n = 1000 - for i=1:n - v += sqrt(n) - end - return 0 - end - - filepath = compile_executable(foo, (), tempdir()) - - r = run(`$filepath`); - @test isa(r, Base.Process) - @test r.exitcode == 0 - - @static if VERSION>v"1.8.0-DEV" # The llvmcall here only works on 1.8+ - @inline function _puts(s::Ptr{UInt8}) # Can't use Base.println because it allocates - Base.llvmcall((""" - ; External declaration of the puts function - declare i32 @puts(i8* nocapture) nounwind - - define i32 @main(i8*) { - entry: - %call = call i32 (i8*) @puts(i8* %0) - ret i32 0 - } - """, "main"), Int32, Tuple{Ptr{UInt8}}, s) - end - - @inline function print_args(argc::Int, argv::Ptr{Ptr{UInt8}}) - for i=1:argc - # Get pointer - p = unsafe_load(argv, i) - # Print string at pointer location (which fortunately already exists isn't tracked by the GC) - _puts(p) - end - return 0 - end - - filepath = compile_executable(print_args, (Int, Ptr{Ptr{UInt8}}), tempdir()) - - r = run(`$filepath Hello, world!`); - @test isa(r, Base.Process) - @test r.exitcode == 0 - end - end - + include("testcore.jl") end @static if GROUP == "Integration" || GROUP == "All" - using LoopVectorization - using StaticTools - - @testset "Standalone Executable Integration" begin - # Setup - testpath = pwd() - scratch = tempdir() - cd(scratch) - - # --- Times table, file IO, mallocarray - - # Compile... - # We have to start a new Julia process to get around the fact that Pkg.test - # disables `@inbounds`, but ironically we can use `--compile=min` to make that - # faster. - status = run(`julia --compile=min $testpath/scripts/times_table.jl`) - @test isa(status, Base.Process) - @test status.exitcode == 0 - - # Attempt to run - println("5x5 times table:") - status = run(`./times_table 5 5`) - @test isa(status, Base.Process) - @test status.exitcode == 0 - @test parsedlm(Int64, c"table.tsv", '\t') == (1:5)*(1:5)' - - # --- Random number generation - - # Compile... - status = run(`julia --compile=min $testpath/scripts/rand_matrix.jl`) - @test isa(status, Base.Process) - @test status.exitcode == 0 - - # Run... - println("5x5 random matrix:") - status = run(`./rand_matrix 5 5`) - @test isa(status, Base.Process) - @test status.exitcode == 0 - - # --- Test LoopVectorization integration - - # Compile... - status = run(`julia --compile=min $testpath/scripts/loopvec_product.jl`) - @test isa(status, Base.Process) - @test status.exitcode == 0 - - # Run... - println("10x10 table sum:") - status = run(`./loopvec_product 10 10`) - @test isa(status, Base.Process) - @test status.exitcode == 0 - @test parsedlm(c"product.tsv",'\t')[] == 3025 - - # Compile... - status = run(`julia --compile=min $testpath/scripts/loopvec_matrix.jl`) - @test isa(status, Base.Process) - @test status.exitcode == 0 - - # Run... - println("10x3 matrix product:") - status = run(`./loopvec_matrix 10 3`) - @test isa(status, Base.Process) - @test status.exitcode == 0 - A = (1:10) * (1:3)' - @test parsedlm(c"table.tsv",'\t') == A' * A - - # --- Test string handling - - # Compile... - status = run(`julia --compile=min $testpath/scripts/print_args.jl`) - @test isa(status, Base.Process) - @test status.exitcode == 0 - - # Run... - println("String indexing and handling:") - status = run(`./print_args foo bar`) - @test isa(status, Base.Process) - @test status.exitcode == 0 - - # --- Clean up - cd(testpath) - - end + include("testintegration.jl") end diff --git a/test/testcore.jl b/test/testcore.jl new file mode 100644 index 0000000..7f12d01 --- /dev/null +++ b/test/testcore.jl @@ -0,0 +1,294 @@ +remote_load_call(path, args...) = fetch(@spawnat 2 load_function(path)(args...)) + +@testset "Basics" begin + + simple_sum(x) = x + one(typeof(x)) + + # This probably needs a macro + for T ∈ (Int, Float64, Int32, Float32, Int16, Float16) + _, path, = compile(simple_sum, (T,)) + @test remote_load_call(path, T(1)) == T(2) + end +end + + +fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) # This needs to be defined globally due to https://github.com/JuliaLang/julia/issues/40990 + +@testset "Recursion" begin + _, path = compile(fib, (Int,)) + @test remote_load_call(path, 10) == fib(10) + + # Trick to work around #40990 + _fib2(_fib2, n) = n <= 1 ? n : _fib2(_fib2, n-1) + _fib2(_fib2, n-2) + fib2(n) = _fib2(_fib2, n) + + _, path = compile(fib2, (Int,)) + @test remote_load_call(path, 20) == fib(20) + #@test compile(fib2, (Int,))[1](20) == fib(20) +end + + +@testset "Loops" begin + function sum_first_N_int(N) + s = 0 + for a in 1:N + s += a + end + s + end + _, path = compile(sum_first_N_int, (Int,)) + @test remote_load_call(path, 10) == 55 + + function sum_first_N_float64(N) + s = Float64(0) + for a in 1:N + s += Float64(a) + end + s + end + _, path = compile(sum_first_N_float64, (Int,)) + @test remote_load_call(path, 10) == 55. + + function sum_first_N_int_inbounds(N) + s = 0 + @inbounds for a in 1:N + s += a + end + s + end + _, path = compile(sum_first_N_int_inbounds, (Int,)) + @test remote_load_call(path, 10) == 55 + + function sum_first_N_float64_inbounds(N) + s = Float64(0) + @inbounds for a in 1:N + s += Float64(a) + end + s + end + _, path = compile(sum_first_N_float64_inbounds, (Int,)) + @test remote_load_call(path, 10) == 55. +end + +# Arrays with different input types Int32, Int64, Float32, Float64, Complex? +@testset "Arrays" begin + function array_sum(n, A) + s = zero(eltype(A)) + for i in 1:n + s += A[i] + end + s + end + for T ∈ (Int, Complex{Float32}, Complex{Float64}) + _, path = compile(array_sum, (Int, Vector{T})) + @test remote_load_call(path, 10, T.(1:10)) == T(55) + end +end + +@testset "Array allocations" begin + function f(N) + v = Vector{Float64}(undef, N) + for i ∈ eachindex(v) + v[i] = i*i + end + v + end + _, path = compile(f, (Int,)) + @test remote_load_call(path, 5) == [1.0, 4.0, 9.0, 16.0, 25.0] +end + +# This is also a good test of loading and storing from the same object +@testset "Load & Store Same object" begin + global const x = Ref(0) + counter() = x[] += 1 + _, path = compile(counter, ()) + @spawnat 2 global counter = load_function(path) + @test fetch(@spawnat 2 counter()) == 1 + @test fetch(@spawnat 2 counter()) == 2 +end + +# This is also a good test of loading and storing from the same object +counter = let x = Ref(0) + () -> x[] += 1 +end +@testset "Closures" begin + #this currently segfaults during compilation + @test_skip begin + _, path = compile(counter, ()) + @spawnat 2 global counter_comp = load_function(path) + @test fetch(@spawnat 2 counter_comp()) == 1 + @test fetch(@spawnat 2 counter_comp()) == 2 + end +end + + +@testset "Error handling" begin + _, path = compile(sqrt, (Int,)) + tsk = @spawnat 2 begin + try + load_function(path)(-1) + catch e; + e + end + end + @test fetch(tsk) isa DomainError +end + +# Julia wants to treat Tuple (and other things like it) as plain bits, but LLVM wants to treat it as something with a pointer. +# We need to be careful to not send, nor receive an unwrapped Tuple to a compiled function. +# The interface made in `compile` should handle this fine. +@testset "Send and receive Tuple" begin + foo(u::Tuple) = 2 .* reverse(u) .- 1 + + _, path = compile(foo, (NTuple{3, Int},)) + @test remote_load_call(path, (1, 2, 3)) == (5, 3, 1) +end + + +# Just to call external libraries +@testset "BLAS" begin + function mydot(a::Vector{Float64}) + N = length(a) + BLAS.dot(N, a, 1, a, 1) + end + a = [1.0, 2.0] + + mydot_compiled, path = compile(mydot, (Vector{Float64},)) + # Works locally for me, but not on CI. Need some improvements to pointer relocation to be robust. + @test_skip remote_load_call(path, a) == 5.0 + @test mydot_compiled(a) ≈ 5.0 + + # This will need some more work apparently + @test_skip begin + _, path = compile((*), (Matrix{Float64}, Matrix{Float64})) + A, B = rand(10, 11), rand(11, 12) + @test remote_load_call(path, A, B) ≈ A * B + end +end + + +@testset "Strings" begin + function hello(name) + "Hello, " * name * "!" + end + hello_compiled, path = compile(hello, (String,)) + @test remote_load_call(path, "world") == "Hello, world!" +end + +@testset "Hello World" begin + function hello(N) + println("Hello World $N") + N + end + # We'll need to be able to relocate a bunch of UV stuff for this, and deal with dynamic dispatch. + @test_skip begin + hello_compiled, path = compile(hello, (Int,)) + @test_skip remote_load_call(path, 1) == 1 + end +end + +# This is a trick to get stack allocated arrays inside a function body (so long as they don't escape). +# This lets us have intermediate, mutable stack allocated arrays inside our +@testset "Alloca" begin + function f(N) + # this can hold at most 100 Int values, if you use it for more, you'll segfault + buf = ManualMemory.MemoryBuffer{100, Int}(undef) + GC.@preserve buf begin + # wrap the first N values in a PtrArray + arr = PtrArray(pointer(buf), (N,)) + arr .= 1 # mutate the array to be all 1s + sum(arr) # compute the sum. It is very imporatant that no references to arr escape the function body + end + end + _, path = compile(f, (Int,)) + @test remote_load_call(path, 20) == 20 +end + +# I can't beleive this works. +@testset "LoopVectorization" begin + function mul!(C, A, B) + # note: @tturbo does NOT work + @turbo for n ∈ indices((C,B), 2), m ∈ indices((C,A), 1) + Cmn = zero(eltype(C)) + for k ∈ indices((A,B), (2,1)) + Cmn += A[m,k] * B[k,n] + end + C[m,n] = Cmn + end + end + + C = Array{Float64}(undef, 10, 12) + A = rand(10, 11) + B = rand(11, 12) + + _, path = compile(mul!, (Matrix{Float64}, Matrix{Float64}, Matrix{Float64},)) + # remote_load_call(path, C, A, B) This won't work because @spawnat copies C + C .= fetch(@spawnat 2 (load_function(path)(C, A, B); C)) + @test C ≈ A*B +end + +@testset "Standalone Dylibs" begin + # Test function + # (already defined) + # fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) + + #Compile dylib + name = repr(fib) + filepath = compile_shlib(fib, (Int,), "./", name) + @test occursin("fib.$(Libdl.dlext)", filepath) + + # Open dylib + ptr = Libdl.dlopen(filepath, Libdl.RTLD_LOCAL) + fptr = Libdl.dlsym(ptr, "julia_$name") + @test fptr != C_NULL + @test ccall(fptr, Int, (Int,), 10) == 55 +end + +@testset "Standalone Executables" begin + # Minimal test with no `llvmcall` + @inline function foo() + v = 0.0 + n = 1000 + for i=1:n + v += sqrt(n) + end + return 0 + end + + filepath = compile_executable(foo, (), tempdir()) + + r = run(`$filepath`); + @test isa(r, Base.Process) + @test r.exitcode == 0 + + + @inline function _puts(s::Ptr{UInt8}) # Can't use Base.println because it allocates + Base.llvmcall((""" + ; External declaration of the puts function + declare i32 @puts(i8* nocapture) nounwind + + define i32 @main(i64) { + entry: + %ptr = inttoptr i64 %0 to i8* + %status = call i32 (i8*) @puts(i8* %ptr) + ret i32 %status + } + """, "main"), Int32, Tuple{Ptr{UInt8}}, s) + end + + @inline function print_args(argc::Int, argv::Ptr{Ptr{UInt8}}) + for i=1:argc + # Get pointer + p = unsafe_load(argv, i) + # Print string at pointer location (which fortunately already exists isn't tracked by the GC) + _puts(p) + end + return 0 + end + + filepath = compile_executable(print_args, (Int, Ptr{Ptr{UInt8}}), tempdir()) + + r = run(`$filepath Hello, world!`); + @test isa(r, Base.Process) + @test r.exitcode == 0 +end diff --git a/test/testintegration.jl b/test/testintegration.jl new file mode 100644 index 0000000..13141c8 --- /dev/null +++ b/test/testintegration.jl @@ -0,0 +1,80 @@ + +@testset "Standalone Executable Integration" begin + + testpath = pwd() + scratch = tempdir() + cd(scratch) + + # --- Times table, file IO, mallocarray + + # Compile... + # We have to start a new Julia process to get around the fact that Pkg.test + # disables `@inbounds`, but ironically we can use `--compile=min` to make that + # faster. + status = run(`julia --compile=min $testpath/scripts/times_table.jl`) + @test isa(status, Base.Process) + @test status.exitcode == 0 + + # Attempt to run + println("5x5 times table:") + status = run(`./times_table 5 5`) + @test isa(status, Base.Process) + @test status.exitcode == 0 + @test parsedlm(Int64, c"table.tsv", '\t') == (1:5)*(1:5)' + + # --- Random number generation + + # Compile... + status = run(`julia --compile=min $testpath/scripts/rand_matrix.jl`) + @test isa(status, Base.Process) + @test status.exitcode == 0 + + # Run... + println("5x5 random matrix:") + status = run(`./rand_matrix 5 5`) + @test isa(status, Base.Process) + @test status.exitcode == 0 + + # --- Test LoopVectorization integration + + # Compile... + status = run(`julia --compile=min $testpath/scripts/loopvec_product.jl`) + @test isa(status, Base.Process) + @test status.exitcode == 0 + + # Run... + println("10x10 table sum:") + status = run(`./loopvec_product 10 10`) + @test isa(status, Base.Process) + @test status.exitcode == 0 + @test parsedlm(c"product.tsv",'\t')[] == 3025 + + # Compile... + status = run(`julia --compile=min $testpath/scripts/loopvec_matrix.jl`) + @test isa(status, Base.Process) + @test status.exitcode == 0 + + # Run... + println("10x3 matrix product:") + status = run(`./loopvec_matrix 10 3`) + @test isa(status, Base.Process) + @test status.exitcode == 0 + A = (1:10) * (1:3)' + @test parsedlm(c"table.tsv",'\t') == A' * A + + # --- Test string handling + + # Compile... + status = run(`julia --compile=min $testpath/scripts/print_args.jl`) + @test isa(status, Base.Process) + @test status.exitcode == 0 + + # Run... + println("String indexing and handling:") + status = run(`./print_args foo bar`) + @test isa(status, Base.Process) + @test status.exitcode == 0 + + # --- Clean up + cd(testpath) +end From bf96341b200208d9000b8fbcbf38f1c3946ede41 Mon Sep 17 00:00:00 2001 From: "C.Brenhin Keller" Date: Sun, 22 May 2022 02:24:10 -0400 Subject: [PATCH 070/159] Update badges in README --- README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a14add0..05dd9db 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,9 @@ # StaticCompiler -[![CI](https://github.com/tshort/StaticCompiler.jl/actions/workflows/ci.yml/badge.svg)](https://github.com/tshort/StaticCompiler.jl/actions/workflows/ci.yml) -[![Codecov](https://codecov.io/gh/tshort/StaticCompiler.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/tshort/StaticCompiler.jl) +[![CI](https://github.com/tshort/StaticCompiler.jl/workflows/CI/badge.svg)](https://github.com/tshort/StaticCompiler.jl/actions/workflows/ci.yml) +[![CI (Integration)](https://github.com/tshort/StaticCompiler.jl/workflows/CI%20(Integration)/badge.svg)](https://github.com/tshort/StaticCompiler.jl/actions/workflows/ci-integration.yml) +[![CI (Julia nightly)](https://github.com/tshort/StaticCompiler.jl/workflows/CI%20(Julia%20nightly)/badge.svg)](https://github.com/tshort/StaticCompiler.jl/actions/workflows/ci-julia-nightly.yml) +[![Coverage](https://codecov.io/gh/tshort/StaticCompiler.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/tshort/StaticCompiler.jl) This is an experimental package to compile Julia code to standalone libraries. A system image is not needed. @@ -45,7 +47,7 @@ marked with `@test_skip`) This package uses the [GPUCompiler package](https://github.com/JuliaGPU/GPUCompiler.jl) to generate code. -## Limitations +## Limitations * GC-tracked allocations and global variables do work with `compile`, but the way they are implemented is brittle and can be dangerous. Allocate with care. * GC-tracked allocations and global variables do *not* work with `compile_executable` (yet). From ceea5f1227a53e5e719221c6c3aa68ffa1808adf Mon Sep 17 00:00:00 2001 From: "C.Brenhin Keller" Date: Sun, 22 May 2022 02:37:58 -0400 Subject: [PATCH 071/159] Update main branch name in workflows --- .github/workflows/ci-integration.yml | 2 +- .github/workflows/ci-julia-nightly.yml | 2 +- .github/workflows/ci.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-integration.yml b/.github/workflows/ci-integration.yml index 025f250..287ae13 100644 --- a/.github/workflows/ci-integration.yml +++ b/.github/workflows/ci-integration.yml @@ -7,7 +7,7 @@ on: - 'README.md' pull_request: branches: - - main + - master paths-ignore: - 'README.md' jobs: diff --git a/.github/workflows/ci-julia-nightly.yml b/.github/workflows/ci-julia-nightly.yml index 98dc781..d01adba 100644 --- a/.github/workflows/ci-julia-nightly.yml +++ b/.github/workflows/ci-julia-nightly.yml @@ -7,7 +7,7 @@ on: - 'README.md' pull_request: branches: - - main + - master paths-ignore: - 'README.md' jobs: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4e00954..5a6bcbb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,7 +7,7 @@ on: - 'README.md' pull_request: branches: - - main + - master paths-ignore: - 'README.md' jobs: From 41ca1212b924e780363b29f9f25b5b3b7c11e373 Mon Sep 17 00:00:00 2001 From: "C.Brenhin Keller" Date: Mon, 30 May 2022 22:03:35 -0400 Subject: [PATCH 072/159] Update CompatHelper workflow --- .github/workflows/CompatHelper.yml | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml index 7c17290..486adf2 100644 --- a/.github/workflows/CompatHelper.yml +++ b/.github/workflows/CompatHelper.yml @@ -1,27 +1,16 @@ name: CompatHelper - on: schedule: - - cron: '0 1 * * *' - issues: - types: [opened, reopened] - + - cron: 0 0 * * 0 + workflow_dispatch: jobs: - build: - runs-on: ${{ matrix.os }} - strategy: - matrix: - julia-version: [1.3.1] - julia-arch: [x64] - os: [ubuntu-latest] + CompatHelper: + runs-on: ubuntu-latest steps: - - uses: julia-actions/setup-julia@latest - with: - version: ${{ matrix.julia-version }} - - name: Install dependencies - run: julia -e 'using Pkg; Pkg.add(Pkg.PackageSpec(name = "CompatHelper", url = "https://github.com/bcbi/CompatHelper.jl.git"))' - - name: CompatHelper.main + - name: Pkg.add("CompatHelper") + run: julia -e 'using Pkg; Pkg.add("CompatHelper")' + - name: CompatHelper.main() env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - JULIA_DEBUG: CompatHelper + COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }} run: julia -e 'using CompatHelper; CompatHelper.main()' From 1fda350f718ded85ac348096fe6be82476a22141 Mon Sep 17 00:00:00 2001 From: "C.Brenhin Keller" Date: Mon, 30 May 2022 23:18:16 -0400 Subject: [PATCH 073/159] Require avx2 for loopvec_product test --- test/testintegration.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/testintegration.jl b/test/testintegration.jl index 13141c8..5fd783e 100644 --- a/test/testintegration.jl +++ b/test/testintegration.jl @@ -37,6 +37,7 @@ # --- Test LoopVectorization integration +@static if LoopVectorization.VectorizationBase.has_feature(Val{:x86_64_avx2}) # Compile... status = run(`julia --compile=min $testpath/scripts/loopvec_product.jl`) @test isa(status, Base.Process) @@ -48,6 +49,7 @@ @test isa(status, Base.Process) @test status.exitcode == 0 @test parsedlm(c"product.tsv",'\t')[] == 3025 +end # Compile... status = run(`julia --compile=min $testpath/scripts/loopvec_matrix.jl`) From e7833c2470ed452bb1f0a387ccd426b86592b197 Mon Sep 17 00:00:00 2001 From: "C.Brenhin Keller" Date: Wed, 1 Jun 2022 00:59:30 -0400 Subject: [PATCH 074/159] Try bumping compat bounds on GPUCompiler --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 467c82d..b3c5af2 100644 --- a/Project.toml +++ b/Project.toml @@ -11,6 +11,6 @@ Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" [compat] -GPUCompiler = "0.13" +GPUCompiler = "0.16" LLVM = "4.8" julia = "1.7" From 60fe2aaaa49d19c7421dda6e9cdf841ebc1b892c Mon Sep 17 00:00:00 2001 From: "C.Brenhin Keller" Date: Wed, 1 Jun 2022 01:13:59 -0400 Subject: [PATCH 075/159] Test integration on nightly too, why not --- .github/workflows/ci-integration-nightly.yml | 38 ++++++++++++++++++++ README.md | 1 + 2 files changed, 39 insertions(+) create mode 100644 .github/workflows/ci-integration-nightly.yml diff --git a/.github/workflows/ci-integration-nightly.yml b/.github/workflows/ci-integration-nightly.yml new file mode 100644 index 0000000..00074ac --- /dev/null +++ b/.github/workflows/ci-integration-nightly.yml @@ -0,0 +1,38 @@ +name: CI (Integration nightly) +on: + push: + branches: + - '**' + paths-ignore: + - 'README.md' + pull_request: + branches: + - master + paths-ignore: + - 'README.md' +jobs: + test-integration-nightly: + name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + version: + - 'nightly' + os: + - ubuntu-latest + - macOS-latest + arch: + - x64 + group: + - Integration + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@latest + with: + version: ${{ matrix.version }} + arch: ${{ matrix.arch }} + - uses: julia-actions/julia-buildpkg@latest + - uses: julia-actions/julia-runtest@latest + env: + GROUP: ${{ matrix.group }} diff --git a/README.md b/README.md index 05dd9db..c45e105 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ [![CI](https://github.com/tshort/StaticCompiler.jl/workflows/CI/badge.svg)](https://github.com/tshort/StaticCompiler.jl/actions/workflows/ci.yml) [![CI (Integration)](https://github.com/tshort/StaticCompiler.jl/workflows/CI%20(Integration)/badge.svg)](https://github.com/tshort/StaticCompiler.jl/actions/workflows/ci-integration.yml) [![CI (Julia nightly)](https://github.com/tshort/StaticCompiler.jl/workflows/CI%20(Julia%20nightly)/badge.svg)](https://github.com/tshort/StaticCompiler.jl/actions/workflows/ci-julia-nightly.yml) +[![CI (Integration nightly)](https://github.com/tshort/StaticCompiler.jl/workflows/CI%20(Integration%20nightly)/badge.svg)](https://github.com/tshort/StaticCompiler.jl/actions/workflows/ci-integration-nightly.yml) [![Coverage](https://codecov.io/gh/tshort/StaticCompiler.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/tshort/StaticCompiler.jl) This is an experimental package to compile Julia code to standalone libraries. A system image is not needed. From e8d6bfc7c62156715dd7f70920dbc8d59d5a7cd3 Mon Sep 17 00:00:00 2001 From: "C.Brenhin Keller" Date: Wed, 1 Jun 2022 03:58:38 -0400 Subject: [PATCH 076/159] Provide JuliaContext to GPUCompiler.codegen --- src/StaticCompiler.jl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index da4b804..e703f47 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -141,8 +141,9 @@ function generate_obj(f, tt, path::String = tempname(), name = GPUCompiler.safe_ tm = GPUCompiler.llvm_machine(NativeCompilerTarget()) job, kwargs = native_job(f, tt; name, kwargs...) #Get LLVM to generated a module of code for us. We don't want GPUCompiler's optimization passes. - mod, meta = GPUCompiler.codegen(:llvm, job; strip=strip_llvm, only_entry=false, validate=false, optimize=false) - + mod, meta = GPUCompiler.JuliaContext() do context + GPUCompiler.codegen(:llvm, job; strip=strip_llvm, only_entry=false, validate=false, optimize=false, ctx=context) + end # Use Enzyme's annotation and optimization pipeline annotate!(mod) optimize!(mod, tm) @@ -448,7 +449,9 @@ end # Return an LLVM module function native_llvm_module(f, tt, name = GPUCompiler.safe_name(repr(f)); kwargs...) job, kwargs = native_job(f, tt; name, kwargs...) - m, _ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) + m, _ = GPUCompiler.JuliaContext() do context + GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false, ctx=context) + end return m end From e9a6b6936c0d6ea31ffba793e021ea603fff18a8 Mon Sep 17 00:00:00 2001 From: "C.Brenhin Keller" Date: Wed, 1 Jun 2022 04:28:42 -0400 Subject: [PATCH 077/159] Don't restrict input types of `compile_executable` functions; we need to modify these to avoid problems with argv pointers in the future --- src/StaticCompiler.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index e703f47..7249ffb 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -231,7 +231,7 @@ function compile_executable(f, types=(), path::String="./", name=GPUCompiler.saf kwargs...) tt = Base.to_tuple_type(types) - tt == Tuple{} || tt == Tuple{Int, Ptr{Ptr{UInt8}}} || error("input type signature $types must be either () or (Int, Ptr{Ptr{UInt8}})") + # tt == Tuple{} || tt == Tuple{Int, Ptr{Ptr{UInt8}}} || error("input type signature $types must be either () or (Int, Ptr{Ptr{UInt8}})") rt = only(native_code_typed(f, tt))[2] isconcretetype(rt) || error("$f$types did not infer to a concrete type. Got $rt") From 0049c88083d14d77508ed83ef374807afdc26b9a Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Wed, 1 Jun 2022 17:41:23 -0400 Subject: [PATCH 078/159] Bump version to 0.4.3 to register new version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index b3c5af2..891f7f4 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short"] -version = "0.4.2" +version = "0.4.3" [deps] Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" From 93bcc15fda5e7e89c8ff6fdb407e90759b67790e Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Tue, 7 Jun 2022 15:21:05 -0300 Subject: [PATCH 079/159] Check mangling --- src/StaticCompiler.jl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 725de2a..1d213b6 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -459,7 +459,7 @@ function native_code_native(@nospecialize(f), @nospecialize(tt), name = GPUCompi GPUCompiler.code_native(stdout, job; kwargs...) end -#Return an LLVM module for multiple functions +#Return an LLVM module for multiple functions function native_llvm_module(funcs::Array; mangle_names = false, kwargs...) f,tt = funcs[1] mod = native_llvm_module(f,tt, kwargs...) @@ -473,7 +473,9 @@ function native_llvm_module(funcs::Array; mangle_names = false, kwargs...) if mangle_names for func in functions(mod) fname = name(func) - name!(func,fname[7:end]) + if fname[1:6] == "julia_" + name!(func,fname[7:end]) + end end end return mod @@ -498,7 +500,7 @@ function generate_obj(funcs::Array, path::String = tempname(), filenamebase::Str end function generate_shlib(funcs::Array, path::String = tempname(), filename::String="libfoo"; mangle_names=false, kwargs...) - + lib_path = joinpath(path, "$filename.$(Libdl.dlext)") _,obj_path = generate_obj(funcs, path, filename; mangle_names=mangle_names, kwargs...) From 2cf98333f5db5cb1f3f0f46331e98e6f99179061 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Tue, 7 Jun 2022 16:36:03 -0300 Subject: [PATCH 080/159] Add function deduplication and tests --- src/StaticCompiler.jl | 4 ++++ test/testcore.jl | 26 ++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 1d213b6..6e5f7a8 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -478,6 +478,10 @@ function native_llvm_module(funcs::Array; mangle_names = false, kwargs...) end end end + LLVM.ModulePassManager() do pass_manager #remove duplicate functions + LLVM.merge_functions!(pass_manager) + LLVM.run!(pass_manager, mod) + end return mod end diff --git a/test/testcore.jl b/test/testcore.jl index 7f12d01..cef78db 100644 --- a/test/testcore.jl +++ b/test/testcore.jl @@ -292,3 +292,29 @@ end @test isa(r, Base.Process) @test r.exitcode == 0 end + + +@testset "Multiple Function Dylibs" begin + + @noinline square(n) = n*n + + function squaresquare(n) + square(square(n)) + end + + function squaresquaresquare(n) + square(squaresquare(n)) + end + + funcs = [(squaresquare,(Float64,)), (squaresquaresquare,(Float64,))] + filepath = compile_shlib(funcs, mangle_names=true) + + ptr = Libdl.dlopen(filepath, Libdl.RTLD_LOCAL) + + fptr2 = Libdl.dlsym(ptr, "squaresquare") + @test ccall(fptr2, Float64, (Float64,), 10.) == squaresquare(10.) + + fptr = Libdl.dlsym(ptr, "squaresquaresquare") + @test ccall(fptr2, Float64, (Float64,), 10.) == squaresquaresquare(10.) + #Compile dylib +end \ No newline at end of file From b09ded03aa6ec8b79ab8f1a0416e1d331d8797a9 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Tue, 7 Jun 2022 17:17:43 -0300 Subject: [PATCH 081/159] Fix typo and move tests outside the macro --- test/testcore.jl | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/test/testcore.jl b/test/testcore.jl index cef78db..9c54b5e 100644 --- a/test/testcore.jl +++ b/test/testcore.jl @@ -293,18 +293,18 @@ end @test r.exitcode == 0 end +@noinline square(n) = n*n -@testset "Multiple Function Dylibs" begin +function squaresquare(n) + square(square(n)) +end - @noinline square(n) = n*n +function squaresquaresquare(n) + square(squaresquare(n)) +end - function squaresquare(n) - square(square(n)) - end +@testset "Multiple Function Dylibs" begin - function squaresquaresquare(n) - square(squaresquare(n)) - end funcs = [(squaresquare,(Float64,)), (squaresquaresquare,(Float64,))] filepath = compile_shlib(funcs, mangle_names=true) @@ -315,6 +315,6 @@ end @test ccall(fptr2, Float64, (Float64,), 10.) == squaresquare(10.) fptr = Libdl.dlsym(ptr, "squaresquaresquare") - @test ccall(fptr2, Float64, (Float64,), 10.) == squaresquaresquare(10.) + @test ccall(fptr, Float64, (Float64,), 10.) == squaresquaresquare(10.) #Compile dylib end \ No newline at end of file From 07a44d24e11c20db219028b00fd84fe9d2a127f4 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Tue, 7 Jun 2022 20:04:32 -0300 Subject: [PATCH 082/159] Change name to demangle --- src/StaticCompiler.jl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 6e5f7a8..b49d7b9 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -460,7 +460,7 @@ function native_code_native(@nospecialize(f), @nospecialize(tt), name = GPUCompi end #Return an LLVM module for multiple functions -function native_llvm_module(funcs::Array; mangle_names = false, kwargs...) +function native_llvm_module(funcs::Array; demangle = false, kwargs...) f,tt = funcs[1] mod = native_llvm_module(f,tt, kwargs...) if length(funcs) > 1 @@ -470,7 +470,7 @@ function native_llvm_module(funcs::Array; mangle_names = false, kwargs...) link!(mod,tmod) end end - if mangle_names + if demangle for func in functions(mod) fname = name(func) if fname[1:6] == "julia_" @@ -486,7 +486,7 @@ function native_llvm_module(funcs::Array; mangle_names = false, kwargs...) end function generate_obj(funcs::Array, path::String = tempname(), filenamebase::String="obj"; - mangle_names =false, + demangle =false, strip_llvm = false, strip_asm = true, opt_level=3, @@ -495,7 +495,7 @@ function generate_obj(funcs::Array, path::String = tempname(), filenamebase::Str mkpath(path) obj_path = joinpath(path, "$filenamebase.o") fakejob, kwargs = native_job(f,tt, kwargs...) - mod = native_llvm_module(funcs; mangle_names = mangle_names, kwargs...) + mod = native_llvm_module(funcs; demangle = demangle, kwargs...) obj, _ = GPUCompiler.emit_asm(fakejob, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) open(obj_path, "w") do io write(io, obj) @@ -503,11 +503,11 @@ function generate_obj(funcs::Array, path::String = tempname(), filenamebase::Str path, obj_path end -function generate_shlib(funcs::Array, path::String = tempname(), filename::String="libfoo"; mangle_names=false, kwargs...) +function generate_shlib(funcs::Array, path::String = tempname(), filename::String="libfoo"; demangle=false, kwargs...) lib_path = joinpath(path, "$filename.$(Libdl.dlext)") - _,obj_path = generate_obj(funcs, path, filename; mangle_names=mangle_names, kwargs...) + _,obj_path = generate_obj(funcs, path, filename; demangle=demangle, kwargs...) # Pick a Clang cc = Sys.isapple() ? `cc` : clang() # Compile! @@ -518,7 +518,7 @@ end function compile_shlib(funcs::Array, path::String="./"; filename="libfoo", - mangle_names=false, + demangle=false, kwargs...) for func in funcs f, types = func @@ -532,7 +532,7 @@ function compile_shlib(funcs::Array, path::String="./"; # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this - generate_shlib(funcs, path, filename; mangle_names=mangle_names, kwargs...) + generate_shlib(funcs, path, filename; demangle=demangle, kwargs...) joinpath(abspath(path), filename * "." * Libdl.dlext) end From 16e047e9cd76351936565dec00490cae5f8de485 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Wed, 8 Jun 2022 17:51:08 -0300 Subject: [PATCH 083/159] Fix text --- test/testcore.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/testcore.jl b/test/testcore.jl index 9c54b5e..fe2d85e 100644 --- a/test/testcore.jl +++ b/test/testcore.jl @@ -307,7 +307,7 @@ end funcs = [(squaresquare,(Float64,)), (squaresquaresquare,(Float64,))] - filepath = compile_shlib(funcs, mangle_names=true) + filepath = compile_shlib(funcs, demangle=true) ptr = Libdl.dlopen(filepath, Libdl.RTLD_LOCAL) @@ -317,4 +317,4 @@ end fptr = Libdl.dlsym(ptr, "squaresquaresquare") @test ccall(fptr, Float64, (Float64,), 10.) == squaresquaresquare(10.) #Compile dylib -end \ No newline at end of file +end From e79b6a6ccda62e2b7b6f99949220d3f3522ce846 Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Sat, 11 Jun 2022 16:31:41 -0400 Subject: [PATCH 084/159] Bump version to 0.4.4 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 891f7f4..22c22a3 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short"] -version = "0.4.3" +version = "0.4.4" [deps] Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" From d183a162cc5234c0b5092f1e36778ea44bc043d4 Mon Sep 17 00:00:00 2001 From: "C.Brenhin Keller" Date: Sun, 17 Jul 2022 14:08:13 -0400 Subject: [PATCH 085/159] Add TOML to test dependencies to avoid weird Clang_jll error --- test/Manifest.toml | 444 --------------------------------------------- test/Project.toml | 1 + 2 files changed, 1 insertion(+), 444 deletions(-) delete mode 100644 test/Manifest.toml diff --git a/test/Manifest.toml b/test/Manifest.toml deleted file mode 100644 index 067ac97..0000000 --- a/test/Manifest.toml +++ /dev/null @@ -1,444 +0,0 @@ -# This file is machine-generated - editing it directly is not advised - -julia_version = "1.8.0-beta3" -manifest_format = "2.0" -project_hash = "45548a8fc84942c91e0e727fe14ec23674d9246e" - -[[deps.Adapt]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "af92965fb30777147966f58acb05da51c5616b5f" -uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" -version = "3.3.3" - -[[deps.ArgTools]] -uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" -version = "1.1.1" - -[[deps.ArrayInterface]] -deps = ["Compat", "IfElse", "LinearAlgebra", "Requires", "SparseArrays", "Static"] -git-tree-sha1 = "81f0cb60dc994ca17f68d9fb7c942a5ae70d9ee4" -uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" -version = "5.0.8" - -[[deps.Artifacts]] -uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" - -[[deps.Base64]] -uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" - -[[deps.BitTwiddlingConvenienceFunctions]] -deps = ["Static"] -git-tree-sha1 = "28bbdbf0354959db89358d1d79d421ff31ef0b5e" -uuid = "62783981-4cbd-42fc-bca8-16325de8dc4b" -version = "0.1.3" - -[[deps.CEnum]] -git-tree-sha1 = "eb4cb44a499229b3b8426dcfb5dd85333951ff90" -uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" -version = "0.4.2" - -[[deps.CPUSummary]] -deps = ["CpuId", "IfElse", "Static"] -git-tree-sha1 = "0eaf4aedad5ccc3e39481db55d72973f856dc564" -uuid = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9" -version = "0.1.22" - -[[deps.ChainRulesCore]] -deps = ["Compat", "LinearAlgebra", "SparseArrays"] -git-tree-sha1 = "9950387274246d08af38f6eef8cb5480862a435f" -uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" -version = "1.14.0" - -[[deps.ChangesOfVariables]] -deps = ["ChainRulesCore", "LinearAlgebra", "Test"] -git-tree-sha1 = "1e315e3f4b0b7ce40feded39c73049692126cf53" -uuid = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0" -version = "0.1.3" - -[[deps.CloseOpenIntervals]] -deps = ["ArrayInterface", "Static"] -git-tree-sha1 = "f576084239e6bdf801007c80e27e2cc2cd963fe0" -uuid = "fb6a15b2-703c-40df-9091-08a04967cfa9" -version = "0.1.6" - -[[deps.CommonSubexpressions]] -deps = ["MacroTools", "Test"] -git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7" -uuid = "bbf7d656-a473-5ed7-a52c-81e309532950" -version = "0.3.0" - -[[deps.Compat]] -deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] -git-tree-sha1 = "b153278a25dd42c65abbf4e62344f9d22e59191b" -uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" -version = "3.43.0" - -[[deps.CompilerSupportLibraries_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" -version = "0.5.2+0" - -[[deps.CpuId]] -deps = ["Markdown"] -git-tree-sha1 = "fcbb72b032692610bfbdb15018ac16a36cf2e406" -uuid = "adafc99b-e345-5852-983c-f28acb93d879" -version = "0.3.1" - -[[deps.Dates]] -deps = ["Printf"] -uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" - -[[deps.DelimitedFiles]] -deps = ["Mmap"] -uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" - -[[deps.DiffResults]] -deps = ["StaticArrays"] -git-tree-sha1 = "c18e98cba888c6c25d1c3b048e4b3380ca956805" -uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" -version = "1.0.3" - -[[deps.DiffRules]] -deps = ["IrrationalConstants", "LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"] -git-tree-sha1 = "28d605d9a0ac17118fe2c5e9ce0fbb76c3ceb120" -uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" -version = "1.11.0" - -[[deps.Distributed]] -deps = ["Random", "Serialization", "Sockets"] -uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" - -[[deps.DocStringExtensions]] -deps = ["LibGit2"] -git-tree-sha1 = "b19534d1895d702889b219c382a6e18010797f0b" -uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" -version = "0.8.6" - -[[deps.Downloads]] -deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] -uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" -version = "1.6.0" - -[[deps.FileWatching]] -uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" - -[[deps.ForwardDiff]] -deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "LogExpFunctions", "NaNMath", "Preferences", "Printf", "Random", "SpecialFunctions", "StaticArrays"] -git-tree-sha1 = "2f18915445b248731ec5db4e4a17e451020bf21e" -uuid = "f6369f11-7733-5829-9624-2563aa707210" -version = "0.10.30" - -[[deps.HostCPUFeatures]] -deps = ["BitTwiddlingConvenienceFunctions", "IfElse", "Libdl", "Static"] -git-tree-sha1 = "18be5268cf415b5e27f34980ed25a7d34261aa83" -uuid = "3e5b6fbb-0976-4d2c-9146-d79de83f2fb0" -version = "0.1.7" - -[[deps.IfElse]] -git-tree-sha1 = "debdd00ffef04665ccbb3e150747a77560e8fad1" -uuid = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" -version = "0.1.1" - -[[deps.InteractiveUtils]] -deps = ["Markdown"] -uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" - -[[deps.InverseFunctions]] -deps = ["Test"] -git-tree-sha1 = "336cc738f03e069ef2cac55a104eb823455dca75" -uuid = "3587e190-3f89-42d0-90ee-14403ec27112" -version = "0.1.4" - -[[deps.IrrationalConstants]] -git-tree-sha1 = "7fd44fd4ff43fc60815f8e764c0f352b83c49151" -uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" -version = "0.1.1" - -[[deps.JLLWrappers]] -deps = ["Preferences"] -git-tree-sha1 = "abc9885a7ca2052a736a600f7fa66209f96506e1" -uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" -version = "1.4.1" - -[[deps.LLVM]] -deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"] -git-tree-sha1 = "c8d47589611803a0f3b4813d9e267cd4e3dbcefb" -uuid = "929cbde3-209d-540e-8aea-75f648917ca0" -version = "4.11.1" - -[[deps.LLVMExtra_jll]] -deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg", "TOML"] -git-tree-sha1 = "771bfe376249626d3ca12bcd58ba243d3f961576" -uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab" -version = "0.0.16+0" - -[[deps.LayoutPointers]] -deps = ["ArrayInterface", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static"] -git-tree-sha1 = "3da92dc6d4bce776cc850c18274a7508b8d752ef" -uuid = "10f19ff3-798f-405d-979b-55457f8fc047" -version = "0.1.7" - -[[deps.LazyArtifacts]] -deps = ["Artifacts", "Pkg"] -uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" - -[[deps.LibCURL]] -deps = ["LibCURL_jll", "MozillaCACerts_jll"] -uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" -version = "0.6.3" - -[[deps.LibCURL_jll]] -deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] -uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" -version = "7.81.0+0" - -[[deps.LibGit2]] -deps = ["Base64", "NetworkOptions", "Printf", "SHA"] -uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" - -[[deps.LibSSH2_jll]] -deps = ["Artifacts", "Libdl", "MbedTLS_jll"] -uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" -version = "1.10.2+0" - -[[deps.Libdl]] -uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" - -[[deps.LinearAlgebra]] -deps = ["Libdl", "libblastrampoline_jll"] -uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" - -[[deps.LogExpFunctions]] -deps = ["ChainRulesCore", "ChangesOfVariables", "DocStringExtensions", "InverseFunctions", "IrrationalConstants", "LinearAlgebra"] -git-tree-sha1 = "09e4b894ce6a976c354a69041a04748180d43637" -uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" -version = "0.3.15" - -[[deps.Logging]] -uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" - -[[deps.LoopVectorization]] -deps = ["ArrayInterface", "CPUSummary", "ChainRulesCore", "CloseOpenIntervals", "DocStringExtensions", "ForwardDiff", "HostCPUFeatures", "IfElse", "LayoutPointers", "LinearAlgebra", "OffsetArrays", "PolyesterWeave", "SIMDDualNumbers", "SLEEFPirates", "SpecialFunctions", "Static", "ThreadingUtilities", "UnPack", "VectorizationBase"] -git-tree-sha1 = "4392c19f0203df81512b6790a0a67446650bdce0" -uuid = "bdcacae8-1622-11e9-2a5c-532679323890" -version = "0.12.110" - -[[deps.MacroTools]] -deps = ["Markdown", "Random"] -git-tree-sha1 = "3d3e902b31198a27340d0bf00d6ac452866021cf" -uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" -version = "0.5.9" - -[[deps.ManualMemory]] -git-tree-sha1 = "bcaef4fc7a0cfe2cba636d84cda54b5e4e4ca3cd" -uuid = "d125e4d3-2237-4719-b19c-fa641b8a4667" -version = "0.1.8" - -[[deps.Markdown]] -deps = ["Base64"] -uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" - -[[deps.MbedTLS_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" -version = "2.28.0+0" - -[[deps.Mmap]] -uuid = "a63ad114-7e13-5084-954f-fe012c677804" - -[[deps.MozillaCACerts_jll]] -uuid = "14a3606d-f60d-562e-9121-12d972cd8159" -version = "2022.2.1" - -[[deps.NaNMath]] -git-tree-sha1 = "737a5957f387b17e74d4ad2f440eb330b39a62c5" -uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" -version = "1.0.0" - -[[deps.NetworkOptions]] -uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" -version = "1.2.0" - -[[deps.OffsetArrays]] -deps = ["Adapt"] -git-tree-sha1 = "52addd9e91df8a6a5781e5c7640787525fd48056" -uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" -version = "1.11.2" - -[[deps.OpenBLAS_jll]] -deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] -uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" -version = "0.3.20+0" - -[[deps.OpenLibm_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "05823500-19ac-5b8b-9628-191a04bc5112" -version = "0.8.1+0" - -[[deps.OpenSpecFun_jll]] -deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1" -uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" -version = "0.5.5+0" - -[[deps.Pkg]] -deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] -uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -version = "1.8.0" - -[[deps.PolyesterWeave]] -deps = ["BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "Static", "ThreadingUtilities"] -git-tree-sha1 = "7e597df97e46ffb1c8adbaddfa56908a7a20194b" -uuid = "1d0040c9-8b98-4ee7-8388-3f51789ca0ad" -version = "0.1.5" - -[[deps.Preferences]] -deps = ["TOML"] -git-tree-sha1 = "47e5f437cc0e7ef2ce8406ce1e7e24d44915f88d" -uuid = "21216c6a-2e73-6563-6e65-726566657250" -version = "1.3.0" - -[[deps.Printf]] -deps = ["Unicode"] -uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" - -[[deps.REPL]] -deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] -uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" - -[[deps.Random]] -deps = ["SHA", "Serialization"] -uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" - -[[deps.Requires]] -deps = ["UUIDs"] -git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7" -uuid = "ae029012-a4dd-5104-9daa-d747884805df" -version = "1.3.0" - -[[deps.SHA]] -uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" -version = "0.7.0" - -[[deps.SIMDDualNumbers]] -deps = ["ForwardDiff", "IfElse", "SLEEFPirates", "VectorizationBase"] -git-tree-sha1 = "dd4195d308df24f33fb10dde7c22103ba88887fa" -uuid = "3cdde19b-5bb0-4aaf-8931-af3e248e098b" -version = "0.1.1" - -[[deps.SIMDTypes]] -git-tree-sha1 = "330289636fb8107c5f32088d2741e9fd7a061a5c" -uuid = "94e857df-77ce-4151-89e5-788b33177be4" -version = "0.1.0" - -[[deps.SLEEFPirates]] -deps = ["IfElse", "Static", "VectorizationBase"] -git-tree-sha1 = "ac399b5b163b9140f9c310dfe9e9aaa225617ff6" -uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa" -version = "0.6.32" - -[[deps.Serialization]] -uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" - -[[deps.SharedArrays]] -deps = ["Distributed", "Mmap", "Random", "Serialization"] -uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" - -[[deps.Sockets]] -uuid = "6462fe0b-24de-5631-8697-dd941f90decc" - -[[deps.SparseArrays]] -deps = ["LinearAlgebra", "Random"] -uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" - -[[deps.SpecialFunctions]] -deps = ["ChainRulesCore", "IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"] -git-tree-sha1 = "bc40f042cfcc56230f781d92db71f0e21496dffd" -uuid = "276daf66-3868-5448-9aa4-cd146d93841b" -version = "2.1.5" - -[[deps.Static]] -deps = ["IfElse"] -git-tree-sha1 = "3a2a99b067090deb096edecec1dc291c5b4b31cb" -uuid = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" -version = "0.6.5" - -[[deps.StaticArrays]] -deps = ["LinearAlgebra", "Random", "Statistics"] -git-tree-sha1 = "cd56bf18ed715e8b09f06ef8c6b781e6cdc49911" -uuid = "90137ffa-7385-5640-81b9-e52037218182" -version = "1.4.4" - -[[deps.StaticTools]] -deps = ["ManualMemory"] -git-tree-sha1 = "45569bece43af73ba51cbdac422e77abd7adba9f" -uuid = "86c06d3c-3f03-46de-9781-57580aa96d0a" -version = "0.3.0" - -[[deps.Statistics]] -deps = ["LinearAlgebra", "SparseArrays"] -uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" - -[[deps.StrideArraysCore]] -deps = ["ArrayInterface", "CloseOpenIntervals", "IfElse", "LayoutPointers", "ManualMemory", "Requires", "SIMDTypes", "Static", "ThreadingUtilities"] -git-tree-sha1 = "e03eacc0b8c1520e73aa84922ce44a14f024b210" -uuid = "7792a7ef-975c-4747-a70f-980b88e8d1da" -version = "0.3.6" - -[[deps.TOML]] -deps = ["Dates"] -uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" -version = "1.0.0" - -[[deps.Tar]] -deps = ["ArgTools", "SHA"] -uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" -version = "1.10.0" - -[[deps.Test]] -deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] -uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[[deps.ThreadingUtilities]] -deps = ["ManualMemory"] -git-tree-sha1 = "f8629df51cab659d70d2e5618a430b4d3f37f2c3" -uuid = "8290d209-cae3-49c0-8002-c8c24d57dab5" -version = "0.5.0" - -[[deps.UUIDs]] -deps = ["Random", "SHA"] -uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" - -[[deps.UnPack]] -git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b" -uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" -version = "1.0.2" - -[[deps.Unicode]] -uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" - -[[deps.VectorizationBase]] -deps = ["ArrayInterface", "CPUSummary", "HostCPUFeatures", "IfElse", "LayoutPointers", "Libdl", "LinearAlgebra", "SIMDTypes", "Static"] -git-tree-sha1 = "c95d242ade2d67c1510ce52d107cfca7a83e0b4e" -uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" -version = "0.21.33" - -[[deps.Zlib_jll]] -deps = ["Libdl"] -uuid = "83775a58-1f1d-513f-b197-d71354ab007a" -version = "1.2.12+1" - -[[deps.libblastrampoline_jll]] -deps = ["Artifacts", "Libdl", "OpenBLAS_jll"] -uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" -version = "5.1.0+0" - -[[deps.nghttp2_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" -version = "1.41.0+1" - -[[deps.p7zip_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" -version = "16.2.1+1" diff --git a/test/Project.toml b/test/Project.toml index cbcf73f..e5a4823 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -7,4 +7,5 @@ LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" ManualMemory = "d125e4d3-2237-4719-b19c-fa641b8a4667" StaticTools = "86c06d3c-3f03-46de-9781-57580aa96d0a" StrideArraysCore = "7792a7ef-975c-4747-a70f-980b88e8d1da" +TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" From 2245ac462c04583b4fff579e64f1746ee93f6784 Mon Sep 17 00:00:00 2001 From: "C.Brenhin Keller" Date: Mon, 18 Jul 2022 03:00:09 -0400 Subject: [PATCH 086/159] Add `cflags` option to `compile_executable` & co --- src/StaticCompiler.jl | 45 ++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index b49d7b9..276dd23 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -227,8 +227,10 @@ Hello, world! ``` """ function compile_executable(f, types=(), path::String="./", name=GPUCompiler.safe_name(repr(f)); - filename=name, - kwargs...) + filename=name, + cflags=``, + kwargs... + ) tt = Base.to_tuple_type(types) # tt == Tuple{} || tt == Tuple{Int, Ptr{Ptr{UInt8}}} || error("input type signature $types must be either () or (Int, Ptr{Ptr{UInt8}})") @@ -239,7 +241,7 @@ function compile_executable(f, types=(), path::String="./", name=GPUCompiler.saf # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this - generate_executable(f, tt, path, name, filename; kwargs...) + generate_executable(f, tt, path, name, filename; cflags=cflags, kwargs...) joinpath(abspath(path), filename) end @@ -252,8 +254,10 @@ compile_shlib(f, types::Tuple, path::String, name::String=repr(f); filename::Str As `compile_executable`, but compiling to a standalone `.dylib`/`.so` shared library. """ function compile_shlib(f, types=(), path::String="./", name=GPUCompiler.safe_name(repr(f)); - filename=name, - kwargs...) + filename=name, + cflags=``, + kwargs... + ) tt = Base.to_tuple_type(types) isconcretetype(tt) || error("input type signature $types is not concrete") @@ -263,8 +267,7 @@ function compile_shlib(f, types=(), path::String="./", name=GPUCompiler.safe_nam # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this - - generate_shlib(f, tt, path, name, filename; kwargs...) + generate_shlib(f, tt, path, name, filename; cflags=cflags, kwargs...) joinpath(abspath(path), filename * "." * Libdl.dlext) end @@ -348,7 +351,10 @@ test (generic function with 1 method) julia> path, name = StaticCompiler.generate_executable(test, Tuple{Int64}, "./scratch") ``` """ -function generate_executable(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filename::String=string(name); kwargs...) +function generate_executable(f, tt, path=tempname(), name=GPUCompiler.safe_name(repr(f)), filename=string(name); + cflags=``, + kwargs... + ) mkpath(path) obj_path = joinpath(path, "$filename.o") exec_path = joinpath(path, filename) @@ -366,7 +372,7 @@ function generate_executable(f, tt, path::String = tempname(), name = GPUCompile if Sys.isapple() # Apple no longer uses _start, so we can just specify a custom entry entry = "_julia_$name" - run(`$cc -e $entry $obj_path -o $exec_path`) + run(`$cc -e $entry $cflags $obj_path -o $exec_path`) else # Write a minimal wrapper to avoid having to specify a custom entry wrapper_path = joinpath(path, "wrapper.c") @@ -377,7 +383,7 @@ function generate_executable(f, tt, path::String = tempname(), name = GPUCompile return 0; }""") close(f) - run(`$cc $wrapper_path $obj_path -o $exec_path`) + run(`$cc $wrapper_path $cflags $obj_path -o $exec_path`) # Clean up run(`rm $wrapper_path`) end @@ -416,7 +422,11 @@ julia> ccall(StaticCompiler.generate_shlib_fptr(path, name), Float64, (Int64,), 5.256496109495593 ``` """ -function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filename::String=name; kwargs...) +function generate_shlib(f, tt, path=tempname(), name=GPUCompiler.safe_name(repr(f)), filename=name; + cflags=``, + kwargs... + ) + mkpath(path) obj_path = joinpath(path, "$filename.o") lib_path = joinpath(path, "$filename.$(Libdl.dlext)") @@ -430,7 +440,7 @@ function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.saf # Pick a Clang cc = Sys.isapple() ? `cc` : clang() # Compile! - run(`$cc -shared -o $lib_path $obj_path`) + run(`$cc -shared $cflags $obj_path -o $lib_path`) path, name end @@ -503,7 +513,11 @@ function generate_obj(funcs::Array, path::String = tempname(), filenamebase::Str path, obj_path end -function generate_shlib(funcs::Array, path::String = tempname(), filename::String="libfoo"; demangle=false, kwargs...) +function generate_shlib(funcs::Array, path::String = tempname(), filename::String="libfoo"; + demangle=false, + cflags=``, + kwargs... + ) lib_path = joinpath(path, "$filename.$(Libdl.dlext)") @@ -511,7 +525,7 @@ function generate_shlib(funcs::Array, path::String = tempname(), filename::Strin # Pick a Clang cc = Sys.isapple() ? `cc` : clang() # Compile! - run(`$cc -shared -o $lib_path $obj_path`) + run(`$cc -shared $cflags $obj_path -o $lib_path `) path, name end @@ -519,6 +533,7 @@ end function compile_shlib(funcs::Array, path::String="./"; filename="libfoo", demangle=false, + cflags=``, kwargs...) for func in funcs f, types = func @@ -532,7 +547,7 @@ function compile_shlib(funcs::Array, path::String="./"; # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this - generate_shlib(funcs, path, filename; demangle=demangle, kwargs...) + generate_shlib(funcs, path, filename; demangle=demangle, cflags=cflags, kwargs...) joinpath(abspath(path), filename * "." * Libdl.dlext) end From 51e90077774af471be2ca96c37268e93f21eb93e Mon Sep 17 00:00:00 2001 From: "C.Brenhin Keller" Date: Mon, 18 Jul 2022 10:24:26 -0400 Subject: [PATCH 087/159] Update tagbot for triggered workflow, adjust ci versions --- .github/workflows/TagBot.yml | 8 ++++++-- .github/workflows/ci-integration.yml | 2 +- .github/workflows/ci.yml | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml index d77d3a0..f49313b 100644 --- a/.github/workflows/TagBot.yml +++ b/.github/workflows/TagBot.yml @@ -1,11 +1,15 @@ name: TagBot on: - schedule: - - cron: 0 * * * * + issue_comment: + types: + - created + workflow_dispatch: jobs: TagBot: + if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' runs-on: ubuntu-latest steps: - uses: JuliaRegistries/TagBot@v1 with: token: ${{ secrets.GITHUB_TOKEN }} + ssh: ${{ secrets.DOCUMENTER_KEY }} diff --git a/.github/workflows/ci-integration.yml b/.github/workflows/ci-integration.yml index 287ae13..6872fce 100644 --- a/.github/workflows/ci-integration.yml +++ b/.github/workflows/ci-integration.yml @@ -18,7 +18,7 @@ jobs: fail-fast: false matrix: version: - - '~1.8.0-0' + - '1.8.0-rc1' os: - ubuntu-latest - macOS-latest diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5a6bcbb..5d5ce48 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ jobs: matrix: version: - '1.7' - - '~1.8.0-0' + - '1.8.0-rc1' os: - ubuntu-latest - macOS-latest From ca99263c78e2626b53f91a8d12eb952a02f0ae1f Mon Sep 17 00:00:00 2001 From: "C.Brenhin Keller" Date: Mon, 18 Jul 2022 16:24:11 -0400 Subject: [PATCH 088/159] Bump version to 0.4.5 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 22c22a3..65b6a98 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short"] -version = "0.4.4" +version = "0.4.5" [deps] Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" From 6cc898b080e059481c4e0fc99265b510da12b293 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Tue, 18 Oct 2022 23:29:17 -0300 Subject: [PATCH 089/159] Initial commit for override --- Project.toml | 1 + src/StaticCompiler.jl | 22 ++++++++--- src/quirks.jl | 89 +++++++++++++++++++++++++++++++++++++++++++ src/target.jl | 70 +++++++++++++++++++++------------- src/utils.jl | 69 +++++++++++++++++++++++++++++++++ 5 files changed, 219 insertions(+), 32 deletions(-) create mode 100644 src/quirks.jl create mode 100644 src/utils.jl diff --git a/Project.toml b/Project.toml index 65b6a98..930b80c 100644 --- a/Project.toml +++ b/Project.toml @@ -9,6 +9,7 @@ GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" +StaticTools = "86c06d3c-3f03-46de-9781-57580aa96d0a" [compat] GPUCompiler = "0.16" diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 276dd23..829e22e 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -8,10 +8,13 @@ using Libdl: Libdl, dlsym, dlopen using Base: RefValue using Serialization: serialize, deserialize using Clang_jll: clang +using StaticTools: @symbolcall, @c_str export compile, load_function, compile_shlib, compile_executable export native_code_llvm, native_code_typed, native_llvm_module, native_code_native +include("utils.jl") +include("quirks.jl") include("target.jl") include("pointer_patching.jl") include("code_loading.jl") @@ -95,8 +98,13 @@ function compile(f, _tt, path::String = tempname(); name = GPUCompiler.safe_nam isconcretetype(rt) || error("$f on $_tt did not infer to a concrete type. Got $rt") f_wrap!(out::Ref, args::Ref{<:Tuple}) = (out[] = f(args[]...); nothing) - _, _, table = generate_obj(f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, path, name; opt_level, strip_llvm, strip_asm, filename, kwargs...) - + @eval GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{<:Any,StaticCompilerParams})) = nothing + @eval GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{NativeCompilerTarget})) = nothing + @eval GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{NativeCompilerTarget, StaticCompilerParams})) = nothing + _, _, table = generate_obj(f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, path, name; opt_level, strip_llvm, strip_asm, filename, ext = false, kwargs...) + @eval GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{<:Any,StaticCompilerParams})) = method_table + @eval GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{NativeCompilerTarget})) = method_table + @eval GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{NativeCompilerTarget, StaticCompilerParams})) = method_table lf = LazyStaticCompiledFunction{rt, tt}(Symbol(f), path, name, filename, table) cjl_path = joinpath(path, "$filename.cjl") serialize(cjl_path, lf) @@ -106,7 +114,7 @@ end """ ```julia -generate_obj(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; +(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; \tstrip_llvm = false, \tstrip_asm = true, \topt_level=3, @@ -135,11 +143,12 @@ function generate_obj(f, tt, path::String = tempname(), name = GPUCompiler.safe_ strip_llvm = false, strip_asm = true, opt_level=3, + ext = true, kwargs...) mkpath(path) obj_path = joinpath(path, "$filenamebase.o") - tm = GPUCompiler.llvm_machine(NativeCompilerTarget()) - job, kwargs = native_job(f, tt; name, kwargs...) + job, kwargs = native_job(f, tt; name,ext = ext, kwargs...) + tm = GPUCompiler.llvm_machine(job.target) #Get LLVM to generated a module of code for us. We don't want GPUCompiler's optimization passes. mod, meta = GPUCompiler.JuliaContext() do context GPUCompiler.codegen(:llvm, job; strip=strip_llvm, only_entry=false, validate=false, optimize=false, ctx=context) @@ -500,11 +509,12 @@ function generate_obj(funcs::Array, path::String = tempname(), filenamebase::Str strip_llvm = false, strip_asm = true, opt_level=3, + ext = true, kwargs...) f,tt = funcs[1] mkpath(path) obj_path = joinpath(path, "$filenamebase.o") - fakejob, kwargs = native_job(f,tt, kwargs...) + fakejob, kwargs = native_job(f,tt, ext = true, kwargs...) mod = native_llvm_module(funcs; demangle = demangle, kwargs...) obj, _ = GPUCompiler.emit_asm(fakejob, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) open(obj_path, "w") do io diff --git a/src/quirks.jl b/src/quirks.jl new file mode 100644 index 0000000..b755583 --- /dev/null +++ b/src/quirks.jl @@ -0,0 +1,89 @@ +macro print_and_throw(err) + quote + println(err) + libcexit(Int32(1)) + end +end + +# math.jl +@device_override @noinline Base.Math.throw_complex_domainerror(f::Symbol, x) = + @print_and_throw c"This operation requires a complex input to return a complex result" +@device_override @noinline Base.Math.throw_exp_domainerror(f::Symbol, x) = + @print_and_throw c"Exponentiation yielding a complex result requires a complex argument" + +# intfuncs.jl +@device_override @noinline Base.throw_domerr_powbysq(::Any, p) = + @print_and_throw c"Cannot raise an integer to a negative power" +@device_override @noinline Base.throw_domerr_powbysq(::Integer, p) = + @print_and_throw c"Cannot raise an integer to a negative power" +@device_override @noinline Base.throw_domerr_powbysq(::AbstractMatrix, p) = + @print_and_throw c"Cannot raise an integer to a negative power" +@device_override @noinline Base.__throw_gcd_overflow(a, b) = + @print_and_throw c"gcd overflow" + +# checked.jl +@device_override @noinline Base.Checked.throw_overflowerr_binaryop(op, x, y) = + @print_and_throw c"Binary operation overflowed" +@device_override @noinline Base.Checked.throw_overflowerr_negation(op, x, y) = + @print_and_throw c"Negation overflowed" +@device_override function Base.Checked.checked_abs(x::Base.Checked.SignedInt) + r = ifelse(x<0, -x, x) + r<0 && @print_and_throw(c"checked arithmetic: cannot compute |x|") + r +end + +# boot.jl +@device_override @noinline Core.throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = + @print_and_throw c"Inexact conversion" + +# abstractarray.jl +@device_override @noinline Base.throw_boundserror(A, I) = + @print_and_throw c"Out-of-bounds array access" + +# trig.jl +@device_override @noinline Base.Math.sincos_domain_error(x) = + @print_and_throw c"sincos(x) is only defined for finite x." + + +# range.jl +@static if VERSION >= v"1.7-" + @eval begin + @device_override function Base.StepRangeLen{T,R,S,L}(ref::R, step::S, len::Integer, + offset::Integer=1) where {T,R,S,L} + if T <: Integer && !isinteger(ref + step) + @print_and_throw(c"StepRangeLen{<:Integer} cannot have non-integer step") + end + len = convert(L, len) + len >= zero(len) || @print_and_throw(c"StepRangeLen length cannot be negative") + offset = convert(L, offset) + L1 = oneunit(typeof(len)) + L1 <= offset <= max(L1, len) || @print_and_throw(c"StepRangeLen: offset must be in [1,...]") + $( + Expr(:new, :(StepRangeLen{T,R,S,L}), :ref, :step, :len, :offset) + ) + end + end +else + @device_override function Base.StepRangeLen{T,R,S}(ref::R, step::S, len::Integer, + offset::Integer=1) where {T,R,S} + if T <: Integer && !isinteger(ref + step) + @print_and_throw(c"StepRangeLen{<:Integer} cannot have non-integer step") + end + len >= 0 || @print_and_throw(c"StepRangeLen length cannot be negative") + 1 <= offset <= max(1,len) || @print_and_throw(c"StepRangeLen: offset must be in [1,...]") + new(ref, step, len, offset) + end +end + + +# fastmath.jl +@static if VERSION <= v"1.7-" +## prevent fallbacks to libm +for f in (:acosh, :asinh, :atanh, :cbrt, :cosh, :exp2, :expm1, :log1p, :sinh, :tanh) + f_fast = Base.FastMath.fast_op[f] + @eval begin + @device_override Base.FastMath.$f_fast(x::Float32) = $f(x) + @device_override Base.FastMath.$f_fast(x::Float64) = $f(x) + end +end +end \ No newline at end of file diff --git a/src/target.jl b/src/target.jl index 2efb191..9bb9128 100644 --- a/src/target.jl +++ b/src/target.jl @@ -3,44 +3,62 @@ Base.@kwdef struct NativeCompilerTarget <: GPUCompiler.AbstractCompilerTarget features::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUFeatures()) end -GPUCompiler.llvm_triple(::NativeCompilerTarget) = Sys.MACHINE +Base.@kwdef struct ExternalNativeCompilerTarget <: GPUCompiler.AbstractCompilerTarget + cpu::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUName()) + features::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUFeatures()) +end -function GPUCompiler.llvm_machine(target::NativeCompilerTarget) - triple = GPUCompiler.llvm_triple(target) +module StaticRuntime +# the runtime library +signal_exception() = return +malloc(sz) = ccall("extern malloc", llvmcall, Csize_t, (Csize_t,), sz) +report_oom(sz) = return +report_exception(ex) = return +report_exception_name(ex) = return +report_exception_frame(idx, func, file, line) = return +end - t = LLVM.Target(triple=triple) +struct StaticCompilerParams <: GPUCompiler.AbstractCompilerParams end - tm = LLVM.TargetMachine(t, triple, target.cpu, target.features, reloc=LLVM.API.LLVMRelocPIC) - GPUCompiler.asm_verbosity!(tm, true) +for target in (:NativeCompilerTarget, :ExternalNativeCompilerTarget) + @eval begin + GPUCompiler.llvm_triple(::$target) = Sys.MACHINE - return tm -end + function GPUCompiler.llvm_machine(target::$target) + triple = GPUCompiler.llvm_triple(target) -GPUCompiler.runtime_slug(job::GPUCompiler.CompilerJob{NativeCompilerTarget}) = "native_$(job.target.cpu)-$(hash(job.target.features))" + t = LLVM.Target(triple=triple) -module StaticRuntime - # the runtime library - signal_exception() = return - malloc(sz) = ccall("extern malloc", llvmcall, Csize_t, (Csize_t,), sz) - report_oom(sz) = return - report_exception(ex) = return - report_exception_name(ex) = return - report_exception_frame(idx, func, file, line) = return -end + tm = LLVM.TargetMachine(t, triple, target.cpu, target.features, reloc=LLVM.API.LLVMRelocPIC) + GPUCompiler.asm_verbosity!(tm, true) -struct StaticCompilerParams <: GPUCompiler.AbstractCompilerParams end + return tm + end -GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{<:Any,StaticCompilerParams}) = StaticRuntime -GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{NativeCompilerTarget}) = StaticRuntime -GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{NativeCompilerTarget, StaticCompilerParams}) = StaticRuntime + GPUCompiler.runtime_slug(job::GPUCompiler.CompilerJob{$target}) = "native_$(job.target.cpu)-$(hash(job.target.features))" + + GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{$target}) = StaticRuntime + GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{$target, StaticCompilerParams}) = StaticRuntime + + GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{$target, StaticCompilerParams}) = true + GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{$target}) = true + end +end + +GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{<:Any,StaticCompilerParams}) = StaticRuntime GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{<:Any,StaticCompilerParams}) = true -GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{NativeCompilerTarget, StaticCompilerParams}) = true -GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{NativeCompilerTarget}) = true -function native_job(@nospecialize(func), @nospecialize(types); kernel::Bool=false, name=GPUCompiler.safe_name(repr(func)), kwargs...) +# GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{<:Any,StaticCompilerParams})) = nothing +# GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{NativeCompilerTarget})) = nothing +# GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{NativeCompilerTarget, StaticCompilerParams})) = nothing + +GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{ExternalNativeCompilerTarget})) = method_table +GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{ExternalNativeCompilerTarget, StaticCompilerParams})) = method_table + +function native_job(@nospecialize(func), @nospecialize(types); kernel::Bool=false, name=GPUCompiler.safe_name(repr(func)), ext = true, kwargs...) source = GPUCompiler.FunctionSpec(func, Base.to_tuple_type(types), kernel, name) - target = NativeCompilerTarget() + target = ext ? ExternalNativeCompilerTarget() : NativeCompilerTarget() params = StaticCompilerParams() GPUCompiler.CompilerJob(target, source, params), kwargs end diff --git a/src/utils.jl b/src/utils.jl new file mode 100644 index 0000000..a581b76 --- /dev/null +++ b/src/utils.jl @@ -0,0 +1,69 @@ + + +# local method table for device functions +@static if isdefined(Base.Experimental, Symbol("@overlay")) +Base.Experimental.@MethodTable(method_table) +else +const method_table = nothing +end + +# list of overrides (only for Julia 1.6) +const overrides = Expr[] + +macro device_override(ex) + ex = macroexpand(__module__, ex) + if Meta.isexpr(ex, :call) + @show ex = eval(ex) + error() + end + code = quote + $GPUCompiler.@override(StaticCompiler.method_table, $ex) + end + if isdefined(Base.Experimental, Symbol("@overlay")) + return esc(code) + else + push!(overrides, code) + return + end +end + +macro device_function(ex) + ex = macroexpand(__module__, ex) + def = splitdef(ex) + + # generate a function that errors + def[:body] = quote + error("This function is not intended for use on the CPU") + end + + esc(quote + $(combinedef(def)) + @device_override $ex + end) +end + +macro device_functions(ex) + ex = macroexpand(__module__, ex) + + # recursively prepend `@device_function` to all function definitions + function rewrite(block) + out = Expr(:block) + for arg in block.args + if Meta.isexpr(arg, :block) + # descend in blocks + push!(out.args, rewrite(arg)) + elseif Meta.isexpr(arg, [:function, :(=)]) + # rewrite function definitions + push!(out.args, :(@device_function $arg)) + else + # preserve all the rest + push!(out.args, arg) + end + end + out + end + + esc(rewrite(ex)) +end + +libcexit(x::Int32) = @symbolcall exit(x::Int32)::Nothing \ No newline at end of file From d3b359b4a63afe647f087bacc8df0d1b775a6466 Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Wed, 19 Oct 2022 19:40:34 -0400 Subject: [PATCH 090/159] Add compat bounds for StaticTools.jl --- Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Project.toml b/Project.toml index 930b80c..ac5159a 100644 --- a/Project.toml +++ b/Project.toml @@ -15,3 +15,4 @@ StaticTools = "86c06d3c-3f03-46de-9781-57580aa96d0a" GPUCompiler = "0.16" LLVM = "4.8" julia = "1.7" +StaticTools = "0.8" From 4a5c3b90f7d50632d75d8a7e15cd230b94baa033 Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Wed, 19 Oct 2022 19:41:47 -0400 Subject: [PATCH 091/159] Apply @giordano formatting suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Mosè Giordano --- src/quirks.jl | 2 +- src/utils.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/quirks.jl b/src/quirks.jl index b755583..d632dd7 100644 --- a/src/quirks.jl +++ b/src/quirks.jl @@ -86,4 +86,4 @@ for f in (:acosh, :asinh, :atanh, :cbrt, :cosh, :exp2, :expm1, :log1p, :sinh, :t @device_override Base.FastMath.$f_fast(x::Float64) = $f(x) end end -end \ No newline at end of file +end diff --git a/src/utils.jl b/src/utils.jl index a581b76..636b147 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -66,4 +66,4 @@ macro device_functions(ex) esc(rewrite(ex)) end -libcexit(x::Int32) = @symbolcall exit(x::Int32)::Nothing \ No newline at end of file +libcexit(x::Int32) = @symbolcall exit(x::Int32)::Nothing From 341ceb3f0d6a5c8966bd31bfb4d764cc742490e4 Mon Sep 17 00:00:00 2001 From: "C.Brenhin Keller" Date: Wed, 19 Oct 2022 22:41:32 -0400 Subject: [PATCH 092/159] Try to avoid messing with Mason's `compile` interface --- src/StaticCompiler.jl | 413 +++++++++++++++++++++--------------------- src/target.jl | 29 +-- 2 files changed, 226 insertions(+), 216 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 829e22e..6641f9b 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -89,7 +89,7 @@ single method (the method determined by `types`). function compile(f, _tt, path::String = tempname(); name = GPUCompiler.safe_name(repr(f)), filename="obj", strip_llvm = false, strip_asm = true, - opt_level=3, + opt_level = 3, kwargs...) tt = Base.to_tuple_type(_tt) isconcretetype(tt) || error("input type signature $_tt is not concrete") @@ -98,13 +98,8 @@ function compile(f, _tt, path::String = tempname(); name = GPUCompiler.safe_nam isconcretetype(rt) || error("$f on $_tt did not infer to a concrete type. Got $rt") f_wrap!(out::Ref, args::Ref{<:Tuple}) = (out[] = f(args[]...); nothing) - @eval GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{<:Any,StaticCompilerParams})) = nothing - @eval GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{NativeCompilerTarget})) = nothing - @eval GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{NativeCompilerTarget, StaticCompilerParams})) = nothing - _, _, table = generate_obj(f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, path, name; opt_level, strip_llvm, strip_asm, filename, ext = false, kwargs...) - @eval GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{<:Any,StaticCompilerParams})) = method_table - @eval GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{NativeCompilerTarget})) = method_table - @eval GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{NativeCompilerTarget, StaticCompilerParams})) = method_table + _, _, table = generate_obj(f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, path, name; opt_level, strip_llvm, strip_asm, filename, kwargs...) + lf = LazyStaticCompiledFunction{rt, tt}(Symbol(f), path, name, filename, table) cjl_path = joinpath(path, "$filename.cjl") serialize(cjl_path, lf) @@ -114,41 +109,37 @@ end """ ```julia -(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; +generate_obj(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; \tstrip_llvm = false, \tstrip_asm = true, - \topt_level=3, + \topt_level = 3, \tkwargs...) ``` Low level interface for compiling object code (`.o`) for for function `f` given a tuple type `tt` characterizing the types of the arguments for which the function will be compiled. - ### Examples ```julia julia> fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) fib (generic function with 1 method) - julia> path, name, table = StaticCompiler.generate_obj(fib, Tuple{Int64}, "./test") ("./test", "fib", IdDict{Any, String}()) - shell> tree \$path ./test └── obj.o - 0 directories, 1 file ``` """ function generate_obj(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; strip_llvm = false, strip_asm = true, - opt_level=3, - ext = true, + libjulia = true, + opt_level = 3, kwargs...) mkpath(path) obj_path = joinpath(path, "$filenamebase.o") - job, kwargs = native_job(f, tt; name,ext = ext, kwargs...) - tm = GPUCompiler.llvm_machine(job.target) + tm = GPUCompiler.llvm_machine(libjulia ? NativeCompilerTarget() : ExternalNativeCompilerTarget()) + job, kwargs = native_job(f, tt; name, libjulia, kwargs...) #Get LLVM to generated a module of code for us. We don't want GPUCompiler's optimization passes. mod, meta = GPUCompiler.JuliaContext() do context GPUCompiler.codegen(:llvm, job; strip=strip_llvm, only_entry=false, validate=false, optimize=false, ctx=context) @@ -177,94 +168,16 @@ function generate_obj(f, tt, path::String = tempname(), name = GPUCompiler.safe_ path, name, table end -""" -```julia -compile_executable(f, types::Tuple, path::String, name::String=repr(f); filename::String=name, kwargs...) -``` -Attempt to compile a standalone executable that runs function `f` with a type signature given by the tuple of `types`. - -### Examples -```julia -julia> using StaticCompiler - -julia> function puts(s::Ptr{UInt8}) # Can't use Base.println because it allocates. - # Note, this `llvmcall` requires Julia 1.8+ - Base.llvmcall((\""" - ; External declaration of the puts function - declare i32 @puts(i8* nocapture) nounwind - - define i32 @main(i8*) { - entry: - %call = call i32 (i8*) @puts(i8* %0) - ret i32 0 - } - \""", "main"), Int32, Tuple{Ptr{UInt8}}, s) - end -puts (generic function with 1 method) - -julia> function print_args(argc::Int, argv::Ptr{Ptr{UInt8}}) - for i=1:argc - # Get pointer - p = unsafe_load(argv, i) - # Print string at pointer location (which fortunately already exists isn't tracked by the GC) - puts(p) - end - return 0 - end - -julia> compile_executable(print_args, (Int, Ptr{Ptr{UInt8}})) -"/Users/foo/code/StaticCompiler.jl/print_args" - -shell> ./print_args 1 2 3 4 Five -./print_args -1 -2 -3 -4 -Five -``` -```julia -julia> using StaticTools # So you don't have to define `puts` and friends every time - -julia> hello() = println(c"Hello, world!") # c"..." makes a stack-allocated StaticString - -julia> compile_executable(hello) -"/Users/foo/code/StaticCompiler.jl/hello" - -shell> ./hello -Hello, world! -``` -""" -function compile_executable(f, types=(), path::String="./", name=GPUCompiler.safe_name(repr(f)); - filename=name, - cflags=``, - kwargs... - ) - - tt = Base.to_tuple_type(types) - # tt == Tuple{} || tt == Tuple{Int, Ptr{Ptr{UInt8}}} || error("input type signature $types must be either () or (Int, Ptr{Ptr{UInt8}})") - - rt = only(native_code_typed(f, tt))[2] - isconcretetype(rt) || error("$f$types did not infer to a concrete type. Got $rt") - - # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals - # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this - - generate_executable(f, tt, path, name, filename; cflags=cflags, kwargs...) - - joinpath(abspath(path), filename) -end - - """ ```julia compile_shlib(f, types::Tuple, path::String, name::String=repr(f); filename::String=name, kwargs...) ``` As `compile_executable`, but compiling to a standalone `.dylib`/`.so` shared library. """ -function compile_shlib(f, types=(), path::String="./", name=GPUCompiler.safe_name(repr(f)); - filename=name, - cflags=``, +function compile_shlib(f::Function, types=(), path::String="./", name=GPUCompiler.safe_name(repr(f)); + filename = name, + libjulia = false, + cflags = ``, kwargs... ) @@ -276,16 +189,17 @@ function compile_shlib(f, types=(), path::String="./", name=GPUCompiler.safe_nam # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this - generate_shlib(f, tt, path, name, filename; cflags=cflags, kwargs...) + generate_shlib(f, tt, path, name, filename; libjulia, cflags, kwargs...) joinpath(abspath(path), filename * "." * Libdl.dlext) end function generate_shlib_fptr(f, tt, path::String=tempname(), name = GPUCompiler.safe_name(repr(f)), filename::String=name; temp::Bool=true, + libjulia=true, kwargs...) - generate_shlib(f, tt, path, name; kwargs...) + generate_shlib(f, tt, path, name; libjulia, kwargs...) lib_path = joinpath(abspath(path), "$filename.$(Libdl.dlext)") ptr = Libdl.dlopen(lib_path, Libdl.RTLD_LOCAL) fptr = Libdl.dlsym(ptr, "julia_$name") @@ -340,66 +254,6 @@ function generate_shlib_fptr(path::String, name, filename::String=name) fptr end -""" -```julia -generate_executable(f, tt, path::String, name, filename=string(name); kwargs...) -``` -Attempt to compile a standalone executable that runs `f`. - -### Examples -```julia -julia> function test(n) - r = 0.0 - for i=1:n - r += log(sqrt(i)) - end - return r/n - end -test (generic function with 1 method) - -julia> path, name = StaticCompiler.generate_executable(test, Tuple{Int64}, "./scratch") -``` -""" -function generate_executable(f, tt, path=tempname(), name=GPUCompiler.safe_name(repr(f)), filename=string(name); - cflags=``, - kwargs... - ) - mkpath(path) - obj_path = joinpath(path, "$filename.o") - exec_path = joinpath(path, filename) - job, kwargs = native_job(f, tt; name, kwargs...) - obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) - - # Write to file - open(obj_path, "w") do io - write(io, obj) - end - - # Pick a compiler - cc = Sys.isapple() ? `cc` : clang() - # Compile! - if Sys.isapple() - # Apple no longer uses _start, so we can just specify a custom entry - entry = "_julia_$name" - run(`$cc -e $entry $cflags $obj_path -o $exec_path`) - else - # Write a minimal wrapper to avoid having to specify a custom entry - wrapper_path = joinpath(path, "wrapper.c") - f = open(wrapper_path, "w") - print(f, """int main(int argc, char** argv) - { - julia_$name(argc, argv); - return 0; - }""") - close(f) - run(`$cc $wrapper_path $cflags $obj_path -o $exec_path`) - # Clean up - run(`rm $wrapper_path`) - end - - path, name -end - """ ```julia generate_shlib(f, tt, path::String, name::String, filenamebase::String="obj"; kwargs...) @@ -432,14 +286,15 @@ julia> ccall(StaticCompiler.generate_shlib_fptr(path, name), Float64, (Int64,), ``` """ function generate_shlib(f, tt, path=tempname(), name=GPUCompiler.safe_name(repr(f)), filename=name; - cflags=``, + libjulia = false, + cflags = ``, kwargs... ) mkpath(path) obj_path = joinpath(path, "$filename.o") lib_path = joinpath(path, "$filename.$(Libdl.dlext)") - job, kwargs = native_job(f, tt; name, kwargs...) + job, kwargs = native_job(f, tt; name, libjulia, kwargs...) obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) open(obj_path, "w") do io @@ -480,12 +335,12 @@ end #Return an LLVM module for multiple functions function native_llvm_module(funcs::Array; demangle = false, kwargs...) - f,tt = funcs[1] - mod = native_llvm_module(f,tt, kwargs...) + f,tt = first(funcs) + mod = native_llvm_module(f,tt; kwargs...) if length(funcs) > 1 for func in funcs[2:end] @show f,tt = func - tmod = native_llvm_module(f,tt, kwargs...) + tmod = native_llvm_module(f,tt; kwargs...) link!(mod,tmod) end end @@ -504,18 +359,60 @@ function native_llvm_module(funcs::Array; demangle = false, kwargs...) return mod end -function generate_obj(funcs::Array, path::String = tempname(), filenamebase::String="obj"; - demangle =false, +## -- compile_shlib / generate_shlib / generate_obj, but for multiple functions at once + +function compile_shlib(funcs::Vector{<:Tuple}, path::String="./"; + filename = "libfoo", + demangle = false, + libjulia = false, + cflags = ``, + kwargs... + ) + for func in funcs + f, types = func + tt = Base.to_tuple_type(types) + isconcretetype(tt) || error("input type signature $types is not concrete") + + rt = only(native_code_typed(f, tt))[2] + isconcretetype(rt) || error("$f$types did not infer to a concrete type. Got $rt") + end + + funcs_tt = [(first(f), Base.to_tuple_type(last(f))) for f in funcs] + generate_shlib(funcs_tt, path, filename; demangle, libjulia, cflags, kwargs...) + + joinpath(abspath(path), filename * "." * Libdl.dlext) +end + +function generate_shlib(funcs::Vector{<:Tuple}, path::String = tempname(), filename::String="libfoo"; + demangle = false, + libjulia = false, + cflags = ``, + kwargs... + ) + + lib_path = joinpath(path, "$filename.$(Libdl.dlext)") + + _,obj_path = generate_obj(funcs, path, filename; demangle, libjulia, kwargs...) + # Pick a Clang + cc = Sys.isapple() ? `cc` : clang() + # Compile! + run(`$cc -shared $cflags $obj_path -o $lib_path `) + + path, name +end + +function generate_obj(funcs::Vector{<:Tuple}, path::String = tempname(), filenamebase::String="obj"; + demangle = false, + libjulia = false, strip_llvm = false, - strip_asm = true, - opt_level=3, - ext = true, + strip_asm = true, + opt_level = 3, kwargs...) - f,tt = funcs[1] + f,tt = first(funcs) mkpath(path) obj_path = joinpath(path, "$filenamebase.o") - fakejob, kwargs = native_job(f,tt, ext = true, kwargs...) - mod = native_llvm_module(funcs; demangle = demangle, kwargs...) + fakejob, kwargs = native_job(f, tt; libjulia, kwargs...) + mod = native_llvm_module(funcs; demangle, libjulia, kwargs...) obj, _ = GPUCompiler.emit_asm(fakejob, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) open(obj_path, "w") do io write(io, obj) @@ -523,44 +420,156 @@ function generate_obj(funcs::Array, path::String = tempname(), filenamebase::Str path, obj_path end -function generate_shlib(funcs::Array, path::String = tempname(), filename::String="libfoo"; - demangle=false, - cflags=``, +## --- Compile standalone binaries + +""" +```julia +function compile_executable(f, types=(), path::String="./", name=GPUCompiler.safe_name(repr(f)); + filename = name, + libjulia = false, + cflags = ``, kwargs... ) +``` +Attempt to compile a standalone executable that runs function `f` with a type signature given by the tuple of `types`. - lib_path = joinpath(path, "$filename.$(Libdl.dlext)") +### Examples +```julia +julia> using StaticCompiler - _,obj_path = generate_obj(funcs, path, filename; demangle=demangle, kwargs...) - # Pick a Clang - cc = Sys.isapple() ? `cc` : clang() - # Compile! - run(`$cc -shared $cflags $obj_path -o $lib_path `) +julia> function puts(s::Ptr{UInt8}) # Can't use Base.println because it allocates. + # Note, this `llvmcall` requires Julia 1.8+ + Base.llvmcall((\""" + ; External declaration of the puts function + declare i32 @puts(i8* nocapture) nounwind - path, name -end + define i32 @main(i8*) { + entry: + %call = call i32 (i8*) @puts(i8* %0) + ret i32 0 + } + \""", "main"), Int32, Tuple{Ptr{UInt8}}, s) + end +puts (generic function with 1 method) -function compile_shlib(funcs::Array, path::String="./"; - filename="libfoo", - demangle=false, - cflags=``, - kwargs...) - for func in funcs - f, types = func - tt = Base.to_tuple_type(types) - isconcretetype(tt) || error("input type signature $types is not concrete") +julia> function print_args(argc::Int, argv::Ptr{Ptr{UInt8}}) + for i=1:argc + # Get pointer + p = unsafe_load(argv, i) + # Print string at pointer location (which fortunately already exists isn't tracked by the GC) + puts(p) + end + return 0 + end - rt = only(native_code_typed(f, tt))[2] - isconcretetype(rt) || error("$f$types did not infer to a concrete type. Got $rt") - end +julia> compile_executable(print_args, (Int, Ptr{Ptr{UInt8}})) +"/Users/foo/code/StaticCompiler.jl/print_args" -# Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals -# Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this +shell> ./print_args 1 2 3 4 Five +./print_args +1 +2 +3 +4 +Five +``` +```julia +julia> using StaticTools # So you don't have to define `puts` and friends every time - generate_shlib(funcs, path, filename; demangle=demangle, cflags=cflags, kwargs...) +julia> hello() = println(c"Hello, world!") # c"..." makes a stack-allocated StaticString - joinpath(abspath(path), filename * "." * Libdl.dlext) +julia> compile_executable(hello) +"/Users/foo/code/StaticCompiler.jl/hello" + +shell> ./hello +Hello, world! +``` +""" +function compile_executable(f, types=(), path::String="./", name=GPUCompiler.safe_name(repr(f)); + filename = name, + libjulia = false, + cflags = ``, + kwargs... + ) + + tt = Base.to_tuple_type(types) + # tt == Tuple{} || tt == Tuple{Int, Ptr{Ptr{UInt8}}} || error("input type signature $types must be either () or (Int, Ptr{Ptr{UInt8}})") + + rt = only(native_code_typed(f, tt))[2] + isconcretetype(rt) || error("$f$types did not infer to a concrete type. Got $rt") + + # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals + # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this + + generate_executable(f, tt, path, name, filename; libjulia, cflags, kwargs...) + + joinpath(abspath(path), filename) end +""" +```julia +function generate_executable(f, tt, path=tempname(), name=GPUCompiler.safe_name(repr(f)), filename=string(name); + cflags = ``, + libjulia = false, + kwargs... + ) +``` +Attempt to compile a standalone executable that runs `f`. + +### Examples +```julia +julia> function test(n) + r = 0.0 + for i=1:n + r += log(sqrt(i)) + end + return r/n + end +test (generic function with 1 method) + +julia> path, name = StaticCompiler.generate_executable(test, Tuple{Int64}, "./scratch") +``` +""" +function generate_executable(f, tt, path=tempname(), name=GPUCompiler.safe_name(repr(f)), filename=string(name); + cflags = ``, + libjulia = false, + kwargs... + ) + mkpath(path) + obj_path = joinpath(path, "$filename.o") + exec_path = joinpath(path, filename) + job, kwargs = native_job(f, tt; name, libjulia, kwargs...) + obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) + + # Write to file + open(obj_path, "w") do io + write(io, obj) + end + + # Pick a compiler + cc = Sys.isapple() ? `cc` : clang() + # Compile! + if Sys.isapple() + # Apple no longer uses _start, so we can just specify a custom entry + entry = "_julia_$name" + run(`$cc -e $entry $cflags $obj_path -o $exec_path`) + else + # Write a minimal wrapper to avoid having to specify a custom entry + wrapper_path = joinpath(path, "wrapper.c") + f = open(wrapper_path, "w") + print(f, """int main(int argc, char** argv) + { + julia_$name(argc, argv); + return 0; + }""") + close(f) + run(`$cc $wrapper_path $cflags $obj_path -o $exec_path`) + # Clean up + run(`rm $wrapper_path`) + end + + path, name +end + end # module diff --git a/src/target.jl b/src/target.jl index 9bb9128..b6713b8 100644 --- a/src/target.jl +++ b/src/target.jl @@ -9,13 +9,13 @@ Base.@kwdef struct ExternalNativeCompilerTarget <: GPUCompiler.AbstractCompilerT end module StaticRuntime -# the runtime library -signal_exception() = return -malloc(sz) = ccall("extern malloc", llvmcall, Csize_t, (Csize_t,), sz) -report_oom(sz) = return -report_exception(ex) = return -report_exception_name(ex) = return -report_exception_frame(idx, func, file, line) = return + # the runtime library + signal_exception() = return + malloc(sz) = ccall("extern malloc", llvmcall, Csize_t, (Csize_t,), sz) + report_oom(sz) = return + report_exception(ex) = return + report_exception_name(ex) = return + report_exception_frame(idx, func, file, line) = return end struct StaticCompilerParams <: GPUCompiler.AbstractCompilerParams end @@ -49,16 +49,17 @@ end GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{<:Any,StaticCompilerParams}) = StaticRuntime GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{<:Any,StaticCompilerParams}) = true -# GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{<:Any,StaticCompilerParams})) = nothing -# GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{NativeCompilerTarget})) = nothing -# GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{NativeCompilerTarget, StaticCompilerParams})) = nothing - GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{ExternalNativeCompilerTarget})) = method_table GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{ExternalNativeCompilerTarget, StaticCompilerParams})) = method_table -function native_job(@nospecialize(func), @nospecialize(types); kernel::Bool=false, name=GPUCompiler.safe_name(repr(func)), ext = true, kwargs...) - source = GPUCompiler.FunctionSpec(func, Base.to_tuple_type(types), kernel, name) - target = ext ? ExternalNativeCompilerTarget() : NativeCompilerTarget() +function native_job(@nospecialize(func::Function), @nospecialize(types::Type); + name = GPUCompiler.safe_name(repr(func)), + libjulia::Bool = true, + kernel::Bool = false, + kwargs... + ) + source = GPUCompiler.FunctionSpec(func, types, kernel, name) + target = libjulia ? NativeCompilerTarget() : ExternalNativeCompilerTarget() params = StaticCompilerParams() GPUCompiler.CompilerJob(target, source, params), kwargs end From 24b8ca2dca6976942277507eef7954767b2590d7 Mon Sep 17 00:00:00 2001 From: "C.Brenhin Keller" Date: Thu, 20 Oct 2022 01:23:23 -0400 Subject: [PATCH 093/159] Prototype julia function when compiling to avoid implicit declaration warning --- src/StaticCompiler.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 276dd23..879a21a 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -377,7 +377,9 @@ function generate_executable(f, tt, path=tempname(), name=GPUCompiler.safe_name( # Write a minimal wrapper to avoid having to specify a custom entry wrapper_path = joinpath(path, "wrapper.c") f = open(wrapper_path, "w") - print(f, """int main(int argc, char** argv) + print(f, """int julia_$name(int argc, char** argv); + + int main(int argc, char** argv) { julia_$name(argc, argv); return 0; From 3ea9dac8496bde17088286db69c968d3731e252f Mon Sep 17 00:00:00 2001 From: "C.Brenhin Keller" Date: Thu, 20 Oct 2022 01:24:07 -0400 Subject: [PATCH 094/159] Define `__stack_chk_guard` to fix linking error on linux --- Project.toml | 4 ++-- src/StaticCompiler.jl | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index 65b6a98..bfea97f 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" -authors = ["Tom Short"] -version = "0.4.5" +authors = ["Tom Short and contributors"] +version = "0.4.6" [deps] Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 879a21a..5a447cd 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -378,7 +378,8 @@ function generate_executable(f, tt, path=tempname(), name=GPUCompiler.safe_name( wrapper_path = joinpath(path, "wrapper.c") f = open(wrapper_path, "w") print(f, """int julia_$name(int argc, char** argv); - + void* __stack_chk_guard = (void*) $(rand(UInt) >> 1); + int main(int argc, char** argv) { julia_$name(argc, argv); From af5d9f2e6be3d178f203c5affc38f852a100a4af Mon Sep 17 00:00:00 2001 From: "C.Brenhin Keller" Date: Thu, 20 Oct 2022 12:09:49 -0400 Subject: [PATCH 095/159] Stick with known good versions in CI for now --- .github/workflows/ci-integration.yml | 2 +- .github/workflows/ci.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-integration.yml b/.github/workflows/ci-integration.yml index 6872fce..03fa4d2 100644 --- a/.github/workflows/ci-integration.yml +++ b/.github/workflows/ci-integration.yml @@ -18,7 +18,7 @@ jobs: fail-fast: false matrix: version: - - '1.8.0-rc1' + - '1.8.0-beta3' os: - ubuntu-latest - macOS-latest diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5d5ce48..dddaf90 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ jobs: matrix: version: - '1.7' - - '1.8.0-rc1' + - '1.8.0-beta3' os: - ubuntu-latest - macOS-latest From 5bd9159fa4bab1bc51d97ed2475e52e9eb349df6 Mon Sep 17 00:00:00 2001 From: "C.Brenhin Keller" Date: Thu, 20 Oct 2022 12:50:18 -0400 Subject: [PATCH 096/159] Update integration tests --- test/scripts/interop.jl | 16 ++ test/scripts/loopvec_matrix.jl | 9 +- test/scripts/loopvec_matrix_stack.jl | 49 ++++ test/scripts/loopvec_product.jl | 4 +- test/scripts/rand_matrix.jl | 8 +- test/scripts/randn_matrix.jl | 22 ++ test/scripts/times_table.jl | 9 +- test/scripts/withmallocarray.jl | 31 +++ test/testintegration.jl | 342 +++++++++++++++++++++------ 9 files changed, 402 insertions(+), 88 deletions(-) create mode 100644 test/scripts/interop.jl create mode 100644 test/scripts/loopvec_matrix_stack.jl create mode 100644 test/scripts/randn_matrix.jl create mode 100644 test/scripts/withmallocarray.jl diff --git a/test/scripts/interop.jl b/test/scripts/interop.jl new file mode 100644 index 0000000..b601e09 --- /dev/null +++ b/test/scripts/interop.jl @@ -0,0 +1,16 @@ +using StaticCompiler +using StaticTools + +function interop(argc, argv) + lib = StaticTools.dlopen(c"libm") + printf(lib) + sin = StaticTools.dlsym(lib, c"sin") + printf(sin) + x = @ptrcall sin(5.0::Float64)::Float64 + printf(x) + newline() + StaticTools.dlclose(lib) +end + +# Attempt to compile +path = compile_executable(interop, (Int64, Ptr{Ptr{UInt8}}), "./", cflags=`-ldl -lm`) diff --git a/test/scripts/loopvec_matrix.jl b/test/scripts/loopvec_matrix.jl index 5477dd3..7b19ce8 100644 --- a/test/scripts/loopvec_matrix.jl +++ b/test/scripts/loopvec_matrix.jl @@ -15,8 +15,8 @@ end function loopvec_matrix(argc::Int, argv::Ptr{Ptr{UInt8}}) argc == 3 || return printf(stderrp(), c"Incorrect number of command-line arguments\n") - rows = parse(Int64, argv, 2) # First command-line argument - cols = parse(Int64, argv, 3) # Second command-line argument + rows = argparse(Int64, argv, 2) # First command-line argument + cols = argparse(Int64, argv, 3) # Second command-line argument # LHS A = MallocArray{Float64}(undef, rows, cols) @@ -41,9 +41,8 @@ function loopvec_matrix(argc::Int, argv::Ptr{Ptr{UInt8}}) # Print to stdout printf(C) # Also print to file - fp = fopen(c"table.tsv",c"w") - printf(fp, C) - fclose(fp) + printdlm(c"table.tsv", C, '\t') + fwrite(c"table.b", C) # Clean up matrices free(A) free(B) diff --git a/test/scripts/loopvec_matrix_stack.jl b/test/scripts/loopvec_matrix_stack.jl new file mode 100644 index 0000000..5e0c90d --- /dev/null +++ b/test/scripts/loopvec_matrix_stack.jl @@ -0,0 +1,49 @@ +using StaticCompiler +using StaticTools +using LoopVectorization + +@inline function mul!(C::StackArray, A::StackArray, B::StackArray) + @turbo for n ∈ indices((C,B), 2), m ∈ indices((C,A), 1) + Cmn = zero(eltype(C)) + for k ∈ indices((A,B), (2,1)) + Cmn += A[m,k] * B[k,n] + end + C[m,n] = Cmn + end + return C +end + +function loopvec_matrix_stack() + rows = 10 + cols = 5 + + # LHS + A = StackArray{Float64}(undef, rows, cols) + @turbo for i ∈ axes(A, 1) + for j ∈ axes(A, 2) + A[i,j] = i*j + end + end + + # RHS + B = StackArray{Float64}(undef, cols, rows) + @turbo for i ∈ axes(B, 1) + for j ∈ axes(B, 2) + B[i,j] = i*j + end + end + + # # Matrix multiplication + C = StackArray{Float64}(undef, cols, cols) + mul!(C, B, A) + + # Print to stdout + printf(C) + # Also print to file + fp = fopen(c"table.tsv",c"w") + printf(fp, C) + fclose(fp) +end + +# Attempt to compile +path = compile_executable(loopvec_matrix_stack, (), "./") diff --git a/test/scripts/loopvec_product.jl b/test/scripts/loopvec_product.jl index bffa69c..8342450 100644 --- a/test/scripts/loopvec_product.jl +++ b/test/scripts/loopvec_product.jl @@ -4,8 +4,8 @@ using LoopVectorization function loopvec_product(argc::Int, argv::Ptr{Ptr{UInt8}}) argc == 3 || return printf(stderrp(), c"Incorrect number of command-line arguments\n") - rows = parse(Int64, argv, 2) # First command-line argument - cols = parse(Int64, argv, 3) # Second command-line argument + rows = argparse(Int64, argv, 2) # First command-line argument + cols = argparse(Int64, argv, 3) # Second command-line argument s = 0 @turbo for i=1:rows diff --git a/test/scripts/rand_matrix.jl b/test/scripts/rand_matrix.jl index ce9fe46..604e44a 100644 --- a/test/scripts/rand_matrix.jl +++ b/test/scripts/rand_matrix.jl @@ -3,9 +3,10 @@ using StaticTools function rand_matrix(argc::Int, argv::Ptr{Ptr{UInt8}}) argc == 3 || return printf(stderrp(), c"Incorrect number of command-line arguments\n") - rows = parse(Int64, argv, 2) # First command-line argument - cols = parse(Int64, argv, 3) # Second command-line argument + rows = argparse(Int64, argv, 2) # First command-line argument + cols = argparse(Int64, argv, 3) # Second command-line argument + # Manually fil matrix M = MallocArray{Float64}(undef, rows, cols) rng = static_rng() @inbounds for i=1:rows @@ -18,4 +19,5 @@ function rand_matrix(argc::Int, argv::Ptr{Ptr{UInt8}}) end # Attempt to compile -path = compile_executable(rand_matrix, (Int64, Ptr{Ptr{UInt8}}), "./") +# cflags=`-lm`: need to explicitly include libm math library on linux +path = compile_executable(rand_matrix, (Int64, Ptr{Ptr{UInt8}}), "./", cflags=`-lm`) diff --git a/test/scripts/randn_matrix.jl b/test/scripts/randn_matrix.jl new file mode 100644 index 0000000..065d466 --- /dev/null +++ b/test/scripts/randn_matrix.jl @@ -0,0 +1,22 @@ +using StaticCompiler +using StaticTools + +function randn_matrix(argc::Int, argv::Ptr{Ptr{UInt8}}) + argc == 3 || return printf(stderrp(), c"Incorrect number of command-line arguments\n") + rows = argparse(Int64, argv, 2) # First command-line argument + cols = argparse(Int64, argv, 3) # Second command-line argument + + M = MallocArray{Float64}(undef, rows, cols) + rng = MarsagliaPolar(static_rng()) + @inbounds for i=1:rows + for j=1:cols + M[i,j] = randn(rng) + end + end + printf(M) + free(M) +end + +# Attempt to compile +# cflags=`-lm`: need to explicitly include libm math library on linux +path = compile_executable(randn_matrix, (Int64, Ptr{Ptr{UInt8}}), "./", cflags=`-lm`) diff --git a/test/scripts/times_table.jl b/test/scripts/times_table.jl index 2dab393..7d295eb 100644 --- a/test/scripts/times_table.jl +++ b/test/scripts/times_table.jl @@ -3,8 +3,8 @@ using StaticTools function times_table(argc::Int, argv::Ptr{Ptr{UInt8}}) argc == 3 || return printf(stderrp(), c"Incorrect number of command-line arguments\n") - rows = parse(Int64, argv, 2) # First command-line argument - cols = parse(Int64, argv, 3) # Second command-line argument + rows = argparse(Int64, argv, 2) # First command-line argument + cols = argparse(Int64, argv, 3) # Second command-line argument M = MallocArray{Int64}(undef, rows, cols) @inbounds for i=1:rows @@ -15,9 +15,8 @@ function times_table(argc::Int, argv::Ptr{Ptr{UInt8}}) # Print to stdout printf(M) # Also print to file - fp = fopen(c"table.tsv",c"w") - printf(fp, M) - fclose(fp) + fwrite(c"table.b", M) + printdlm(c"table.tsv", M) # Clean up matrix free(M) end diff --git a/test/scripts/withmallocarray.jl b/test/scripts/withmallocarray.jl new file mode 100644 index 0000000..2930121 --- /dev/null +++ b/test/scripts/withmallocarray.jl @@ -0,0 +1,31 @@ +using StaticCompiler +using StaticTools + +function withmallocarray(argc::Int, argv::Ptr{Ptr{UInt8}}) + argc == 3 || return printf(stderrp(), c"Incorrect number of command-line arguments\n") + rows = argparse(Int64, argv, 2) # First command-line argument + cols = argparse(Int64, argv, 3) # Second command-line argument + + mzeros(rows, cols) do A + printf(A) + end + mones(Int, rows, cols) do A + printf(A) + end + mfill(3.141592, rows, cols) do A + printf(A) + end + + # Random number generation + rng = MarsagliaPolar() + mrand(rng, rows, cols) do A + printf(A) + end + mrandn(rng, rows, cols) do A + printf(A) + end +end + +# Attempt to compile +# cflags=`-lm`: need to explicitly include libm math library on linux +path = compile_executable(withmallocarray, (Int64, Ptr{Ptr{UInt8}}), "./", cflags=`-lm`) diff --git a/test/testintegration.jl b/test/testintegration.jl index 5fd783e..f54a808 100644 --- a/test/testintegration.jl +++ b/test/testintegration.jl @@ -1,82 +1,278 @@ @testset "Standalone Executable Integration" begin - + # Setup testpath = pwd() scratch = tempdir() cd(scratch) + jlpath = joinpath(Sys.BINDIR, Base.julia_exename()) # Get path to julia executable - # --- Times table, file IO, mallocarray - - # Compile... - # We have to start a new Julia process to get around the fact that Pkg.test - # disables `@inbounds`, but ironically we can use `--compile=min` to make that - # faster. - status = run(`julia --compile=min $testpath/scripts/times_table.jl`) - @test isa(status, Base.Process) - @test status.exitcode == 0 - - # Attempt to run - println("5x5 times table:") - status = run(`./times_table 5 5`) - @test isa(status, Base.Process) - @test status.exitcode == 0 - @test parsedlm(Int64, c"table.tsv", '\t') == (1:5)*(1:5)' - - # --- Random number generation - - # Compile... - status = run(`julia --compile=min $testpath/scripts/rand_matrix.jl`) - @test isa(status, Base.Process) - @test status.exitcode == 0 - - # Run... - println("5x5 random matrix:") - status = run(`./rand_matrix 5 5`) - @test isa(status, Base.Process) - @test status.exitcode == 0 - - # --- Test LoopVectorization integration - -@static if LoopVectorization.VectorizationBase.has_feature(Val{:x86_64_avx2}) - # Compile... - status = run(`julia --compile=min $testpath/scripts/loopvec_product.jl`) - @test isa(status, Base.Process) - @test status.exitcode == 0 - - # Run... - println("10x10 table sum:") - status = run(`./loopvec_product 10 10`) - @test isa(status, Base.Process) - @test status.exitcode == 0 - @test parsedlm(c"product.tsv",'\t')[] == 3025 -end + ## --- Times table, file IO, mallocarray + let + # Attempt to compile + # We have to start a new Julia process to get around the fact that Pkg.test + # disables `@inbounds`, but ironically we can use `--compile=min` to make that + # faster. + status = -1 + try + isfile("times_table") && rm("times_table") + status = run(`$jlpath --compile=min $testpath/scripts/times_table.jl`) + catch e + @warn "Could not compile $testpath/scripts/times_table.jl" + println(e) + end + @test isa(status, Base.Process) + @test isa(status, Base.Process) && status.exitcode == 0 + + # Attempt to run + println("5x5 times table:") + status = -1 + try + status = run(`./times_table 5 5`) + catch e + @warn "Could not run $(scratch)/times_table" + println(e) + end + @test isa(status, Base.Process) + @test isa(status, Base.Process) && status.exitcode == 0 + # Test ascii output + @test parsedlm(Int, c"table.tsv", '\t') == (1:5)*(1:5)' + # Test binary output + @test fread!(szeros(Int, 5,5), c"table.b") == (1:5)*(1:5)' + end + + ## --- "withmallocarray"-type do-block pattern + let + # Compile... + status = -1 + try + isfile("withmallocarray") && rm("withmallocarray") + status = run(`$jlpath --compile=min $testpath/scripts/withmallocarray.jl`) + catch e + @warn "Could not compile $testpath/scripts/withmallocarray.jl" + println(e) + end + @test isa(status, Base.Process) + @test isa(status, Base.Process) && status.exitcode == 0 + + # Run... + println("3x3 malloc arrays via do-block syntax:") + status = -1 + try + status = run(`./withmallocarray 3 3`) + catch e + @warn "Could not run $(scratch)/withmallocarray" + println(e) + end + @test isa(status, Base.Process) + @test isa(status, Base.Process) && status.exitcode == 0 + end + + ## --- Random number generation + let + # Compile... + status = -1 + try + isfile("rand_matrix") && rm("rand_matrix") + status = run(`$jlpath --compile=min $testpath/scripts/rand_matrix.jl`) + catch e + @warn "Could not compile $testpath/scripts/rand_matrix.jl" + println(e) + end + @test isa(status, Base.Process) + @test isa(status, Base.Process) && status.exitcode == 0 + + # Run... + println("5x5 uniform random matrix:") + status = -1 + try + status = run(`./rand_matrix 5 5`) + catch e + @warn "Could not run $(scratch)/rand_matrix" + println(e) + end + @test isa(status, Base.Process) + @test isa(status, Base.Process) && status.exitcode == 0 + end + + let + # Compile... + status = -1 + try + isfile("randn_matrix") && rm("randn_matrix") + status = run(`$jlpath --compile=min $testpath/scripts/randn_matrix.jl`) + catch e + @warn "Could not compile $testpath/scripts/randn_matrix.jl" + println(e) + end + @static if Sys.isbsd() + @test isa(status, Base.Process) + @test isa(status, Base.Process) && status.exitcode == 0 + end + + # Run... + println("5x5 Normal random matrix:") + status = -1 + try + status = run(`./randn_matrix 5 5`) + catch e + @warn "Could not run $(scratch)/randn_matrix" + println(e) + end + @static if Sys.isbsd() + @test isa(status, Base.Process) + @test isa(status, Base.Process) && status.exitcode == 0 + end + end + + ## --- Test LoopVectorization integration + @static if LoopVectorization.VectorizationBase.has_feature(Val{:x86_64_avx2}) + let + # Compile... + status = -1 + try + isfile("loopvec_product") && rm("loopvec_product") + status = run(`$jlpath --compile=min $testpath/scripts/loopvec_product.jl`) + catch e + @warn "Could not compile $testpath/scripts/loopvec_product.jl" + println(e) + end + @test isa(status, Base.Process) + @test isa(status, Base.Process) && status.exitcode == 0 + + # Run... + println("10x10 table sum:") + status = -1 + try + status = run(`./loopvec_product 10 10`) + catch e + @warn "Could not run $(scratch)/loopvec_product" + println(e) + end + @test isa(status, Base.Process) + @test isa(status, Base.Process) && status.exitcode == 0 + @test parsedlm(c"product.tsv",'\t')[] == 3025 + end + end + + let + # Compile... + status = -1 + try + isfile("loopvec_matrix") && rm("loopvec_matrix") + status = run(`$jlpath --compile=min $testpath/scripts/loopvec_matrix.jl`) + catch e + @warn "Could not compile $testpath/scripts/loopvec_matrix.jl" + println(e) + end + @test isa(status, Base.Process) + @test isa(status, Base.Process) && status.exitcode == 0 + + # Run... + println("10x5 matrix product:") + status = -1 + try + status = run(`./loopvec_matrix 10 5`) + catch e + @warn "Could not run $(scratch)/loopvec_matrix" + println(e) + end + @test isa(status, Base.Process) + @test isa(status, Base.Process) && status.exitcode == 0 + A = (1:10) * (1:5)' + # Check ascii output + @test parsedlm(c"table.tsv",'\t') == A' * A + # Check binary output + @test fread!(szeros(5,5), c"table.b") == A' * A + end + + let + # Compile... + status = -1 + try + isfile("loopvec_matrix_stack") && rm("loopvec_matrix_stack") + status = run(`$jlpath --compile=min $testpath/scripts/loopvec_matrix_stack.jl`) + catch e + @warn "Could not compile $testpath/scripts/loopvec_matrix_stack.jl" + println(e) + end + @test isa(status, Base.Process) + @test isa(status, Base.Process) && status.exitcode == 0 + + # Run... + println("10x5 matrix product:") + status = -1 + try + status = run(`./loopvec_matrix_stack`) + catch e + @warn "Could not run $(scratch)/loopvec_matrix_stack" + println(e) + end + @test isa(status, Base.Process) + @test isa(status, Base.Process) && status.exitcode == 0 + A = (1:10) * (1:5)' + @test parsedlm(c"table.tsv",'\t') == A' * A + end + + + ## --- Test string handling + + let + # Compile... + status = -1 + try + isfile("print_args") && rm("print_args") + status = run(`$jlpath --compile=min $testpath/scripts/print_args.jl`) + catch e + @warn "Could not compile $testpath/scripts/print_args.jl" + println(e) + end + @test isa(status, Base.Process) + @test isa(status, Base.Process) && status.exitcode == 0 + + # Run... + println("String indexing and handling:") + status = -1 + try + status = run(`./print_args foo bar`) + catch e + @warn "Could not run $(scratch)/print_args" + println(e) + end + @test isa(status, Base.Process) + @test isa(status, Base.Process) && status.exitcode == 0 + end + + ## --- Test interop + + @static if Sys.isbsd() + let + # Compile... + status = -1 + try + isfile("interop") && rm("interop") + status = run(`$jlpath --compile=min $testpath/scripts/interop.jl`) + catch e + @warn "Could not compile $testpath/scripts/interop.jl" + println(e) + end + @test isa(status, Base.Process) + @test isa(status, Base.Process) && status.exitcode == 0 + + # Run... + println("Interop:") + status = -1 + try + status = run(`./interop`) + catch e + @warn "Could not run $(scratch)/interop" + println(e) + end + @test isa(status, Base.Process) + @test isa(status, Base.Process) && status.exitcode == 0 + end + end + + ## --- Clean up - # Compile... - status = run(`julia --compile=min $testpath/scripts/loopvec_matrix.jl`) - @test isa(status, Base.Process) - @test status.exitcode == 0 - - # Run... - println("10x3 matrix product:") - status = run(`./loopvec_matrix 10 3`) - @test isa(status, Base.Process) - @test status.exitcode == 0 - A = (1:10) * (1:3)' - @test parsedlm(c"table.tsv",'\t') == A' * A - - # --- Test string handling - - # Compile... - status = run(`julia --compile=min $testpath/scripts/print_args.jl`) - @test isa(status, Base.Process) - @test status.exitcode == 0 - - # Run... - println("String indexing and handling:") - status = run(`./print_args foo bar`) - @test isa(status, Base.Process) - @test status.exitcode == 0 - - # --- Clean up cd(testpath) end From 5e4f03febc5b8a816ef24a13c5a72019133f7a79 Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Thu, 20 Oct 2022 14:49:24 -0400 Subject: [PATCH 097/159] Revert "Merge branch 'errors-are-fine' into master" This reverts commit 89aded496846fdadab46b4720ebbdf367e105b20, reversing changes made to 38dbdbef14ca2f484661221ad238af4b5b75c0dc. --- Project.toml | 2 - src/StaticCompiler.jl | 407 ++++++++++++++++++++---------------------- src/quirks.jl | 89 --------- src/target.jl | 61 +++---- src/utils.jl | 69 ------- 5 files changed, 215 insertions(+), 413 deletions(-) delete mode 100644 src/quirks.jl delete mode 100644 src/utils.jl diff --git a/Project.toml b/Project.toml index 18616d2..bfea97f 100644 --- a/Project.toml +++ b/Project.toml @@ -9,10 +9,8 @@ GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" -StaticTools = "86c06d3c-3f03-46de-9781-57580aa96d0a" [compat] GPUCompiler = "0.16" LLVM = "4.8" julia = "1.7" -StaticTools = "0.8" diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index baba6c6..5a447cd 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -8,13 +8,10 @@ using Libdl: Libdl, dlsym, dlopen using Base: RefValue using Serialization: serialize, deserialize using Clang_jll: clang -using StaticTools: @symbolcall, @c_str export compile, load_function, compile_shlib, compile_executable export native_code_llvm, native_code_typed, native_llvm_module, native_code_native -include("utils.jl") -include("quirks.jl") include("target.jl") include("pointer_patching.jl") include("code_loading.jl") @@ -89,7 +86,7 @@ single method (the method determined by `types`). function compile(f, _tt, path::String = tempname(); name = GPUCompiler.safe_name(repr(f)), filename="obj", strip_llvm = false, strip_asm = true, - opt_level = 3, + opt_level=3, kwargs...) tt = Base.to_tuple_type(_tt) isconcretetype(tt) || error("input type signature $_tt is not concrete") @@ -112,34 +109,37 @@ end generate_obj(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; \tstrip_llvm = false, \tstrip_asm = true, - \topt_level = 3, + \topt_level=3, \tkwargs...) ``` Low level interface for compiling object code (`.o`) for for function `f` given a tuple type `tt` characterizing the types of the arguments for which the function will be compiled. + ### Examples ```julia julia> fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) fib (generic function with 1 method) + julia> path, name, table = StaticCompiler.generate_obj(fib, Tuple{Int64}, "./test") ("./test", "fib", IdDict{Any, String}()) + shell> tree \$path ./test └── obj.o + 0 directories, 1 file ``` """ function generate_obj(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; strip_llvm = false, strip_asm = true, - libjulia = true, - opt_level = 3, + opt_level=3, kwargs...) mkpath(path) obj_path = joinpath(path, "$filenamebase.o") - tm = GPUCompiler.llvm_machine(libjulia ? NativeCompilerTarget() : ExternalNativeCompilerTarget()) - job, kwargs = native_job(f, tt; name, libjulia, kwargs...) + tm = GPUCompiler.llvm_machine(NativeCompilerTarget()) + job, kwargs = native_job(f, tt; name, kwargs...) #Get LLVM to generated a module of code for us. We don't want GPUCompiler's optimization passes. mod, meta = GPUCompiler.JuliaContext() do context GPUCompiler.codegen(:llvm, job; strip=strip_llvm, only_entry=false, validate=false, optimize=false, ctx=context) @@ -168,16 +168,94 @@ function generate_obj(f, tt, path::String = tempname(), name = GPUCompiler.safe_ path, name, table end +""" +```julia +compile_executable(f, types::Tuple, path::String, name::String=repr(f); filename::String=name, kwargs...) +``` +Attempt to compile a standalone executable that runs function `f` with a type signature given by the tuple of `types`. + +### Examples +```julia +julia> using StaticCompiler + +julia> function puts(s::Ptr{UInt8}) # Can't use Base.println because it allocates. + # Note, this `llvmcall` requires Julia 1.8+ + Base.llvmcall((\""" + ; External declaration of the puts function + declare i32 @puts(i8* nocapture) nounwind + + define i32 @main(i8*) { + entry: + %call = call i32 (i8*) @puts(i8* %0) + ret i32 0 + } + \""", "main"), Int32, Tuple{Ptr{UInt8}}, s) + end +puts (generic function with 1 method) + +julia> function print_args(argc::Int, argv::Ptr{Ptr{UInt8}}) + for i=1:argc + # Get pointer + p = unsafe_load(argv, i) + # Print string at pointer location (which fortunately already exists isn't tracked by the GC) + puts(p) + end + return 0 + end + +julia> compile_executable(print_args, (Int, Ptr{Ptr{UInt8}})) +"/Users/foo/code/StaticCompiler.jl/print_args" + +shell> ./print_args 1 2 3 4 Five +./print_args +1 +2 +3 +4 +Five +``` +```julia +julia> using StaticTools # So you don't have to define `puts` and friends every time + +julia> hello() = println(c"Hello, world!") # c"..." makes a stack-allocated StaticString + +julia> compile_executable(hello) +"/Users/foo/code/StaticCompiler.jl/hello" + +shell> ./hello +Hello, world! +``` +""" +function compile_executable(f, types=(), path::String="./", name=GPUCompiler.safe_name(repr(f)); + filename=name, + cflags=``, + kwargs... + ) + + tt = Base.to_tuple_type(types) + # tt == Tuple{} || tt == Tuple{Int, Ptr{Ptr{UInt8}}} || error("input type signature $types must be either () or (Int, Ptr{Ptr{UInt8}})") + + rt = only(native_code_typed(f, tt))[2] + isconcretetype(rt) || error("$f$types did not infer to a concrete type. Got $rt") + + # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals + # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this + + generate_executable(f, tt, path, name, filename; cflags=cflags, kwargs...) + + joinpath(abspath(path), filename) +end + + """ ```julia compile_shlib(f, types::Tuple, path::String, name::String=repr(f); filename::String=name, kwargs...) ``` As `compile_executable`, but compiling to a standalone `.dylib`/`.so` shared library. """ -function compile_shlib(f::Function, types=(), path::String="./", name=GPUCompiler.safe_name(repr(f)); - filename = name, - libjulia = false, - cflags = ``, +function compile_shlib(f, types=(), path::String="./", name=GPUCompiler.safe_name(repr(f)); + filename=name, + cflags=``, kwargs... ) @@ -189,17 +267,16 @@ function compile_shlib(f::Function, types=(), path::String="./", name=GPUCompile # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this - generate_shlib(f, tt, path, name, filename; libjulia, cflags, kwargs...) + generate_shlib(f, tt, path, name, filename; cflags=cflags, kwargs...) joinpath(abspath(path), filename * "." * Libdl.dlext) end function generate_shlib_fptr(f, tt, path::String=tempname(), name = GPUCompiler.safe_name(repr(f)), filename::String=name; temp::Bool=true, - libjulia=true, kwargs...) - generate_shlib(f, tt, path, name; libjulia, kwargs...) + generate_shlib(f, tt, path, name; kwargs...) lib_path = joinpath(abspath(path), "$filename.$(Libdl.dlext)") ptr = Libdl.dlopen(lib_path, Libdl.RTLD_LOCAL) fptr = Libdl.dlsym(ptr, "julia_$name") @@ -254,6 +331,69 @@ function generate_shlib_fptr(path::String, name, filename::String=name) fptr end +""" +```julia +generate_executable(f, tt, path::String, name, filename=string(name); kwargs...) +``` +Attempt to compile a standalone executable that runs `f`. + +### Examples +```julia +julia> function test(n) + r = 0.0 + for i=1:n + r += log(sqrt(i)) + end + return r/n + end +test (generic function with 1 method) + +julia> path, name = StaticCompiler.generate_executable(test, Tuple{Int64}, "./scratch") +``` +""" +function generate_executable(f, tt, path=tempname(), name=GPUCompiler.safe_name(repr(f)), filename=string(name); + cflags=``, + kwargs... + ) + mkpath(path) + obj_path = joinpath(path, "$filename.o") + exec_path = joinpath(path, filename) + job, kwargs = native_job(f, tt; name, kwargs...) + obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) + + # Write to file + open(obj_path, "w") do io + write(io, obj) + end + + # Pick a compiler + cc = Sys.isapple() ? `cc` : clang() + # Compile! + if Sys.isapple() + # Apple no longer uses _start, so we can just specify a custom entry + entry = "_julia_$name" + run(`$cc -e $entry $cflags $obj_path -o $exec_path`) + else + # Write a minimal wrapper to avoid having to specify a custom entry + wrapper_path = joinpath(path, "wrapper.c") + f = open(wrapper_path, "w") + print(f, """int julia_$name(int argc, char** argv); + void* __stack_chk_guard = (void*) $(rand(UInt) >> 1); + + int main(int argc, char** argv) + { + julia_$name(argc, argv); + return 0; + }""") + close(f) + run(`$cc $wrapper_path $cflags $obj_path -o $exec_path`) + # Clean up + run(`rm $wrapper_path`) + end + + path, name +end + """ ```julia generate_shlib(f, tt, path::String, name::String, filenamebase::String="obj"; kwargs...) @@ -286,15 +426,14 @@ julia> ccall(StaticCompiler.generate_shlib_fptr(path, name), Float64, (Int64,), ``` """ function generate_shlib(f, tt, path=tempname(), name=GPUCompiler.safe_name(repr(f)), filename=name; - libjulia = false, - cflags = ``, + cflags=``, kwargs... ) mkpath(path) obj_path = joinpath(path, "$filename.o") lib_path = joinpath(path, "$filename.$(Libdl.dlext)") - job, kwargs = native_job(f, tt; name, libjulia, kwargs...) + job, kwargs = native_job(f, tt; name, kwargs...) obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) open(obj_path, "w") do io @@ -335,12 +474,12 @@ end #Return an LLVM module for multiple functions function native_llvm_module(funcs::Array; demangle = false, kwargs...) - f,tt = first(funcs) - mod = native_llvm_module(f,tt; kwargs...) + f,tt = funcs[1] + mod = native_llvm_module(f,tt, kwargs...) if length(funcs) > 1 for func in funcs[2:end] @show f,tt = func - tmod = native_llvm_module(f,tt; kwargs...) + tmod = native_llvm_module(f,tt, kwargs...) link!(mod,tmod) end end @@ -359,60 +498,17 @@ function native_llvm_module(funcs::Array; demangle = false, kwargs...) return mod end -## -- compile_shlib / generate_shlib / generate_obj, but for multiple functions at once - -function compile_shlib(funcs::Vector{<:Tuple}, path::String="./"; - filename = "libfoo", - demangle = false, - libjulia = false, - cflags = ``, - kwargs... - ) - for func in funcs - f, types = func - tt = Base.to_tuple_type(types) - isconcretetype(tt) || error("input type signature $types is not concrete") - - rt = only(native_code_typed(f, tt))[2] - isconcretetype(rt) || error("$f$types did not infer to a concrete type. Got $rt") - end - - funcs_tt = [(first(f), Base.to_tuple_type(last(f))) for f in funcs] - generate_shlib(funcs_tt, path, filename; demangle, libjulia, cflags, kwargs...) - - joinpath(abspath(path), filename * "." * Libdl.dlext) -end - -function generate_shlib(funcs::Vector{<:Tuple}, path::String = tempname(), filename::String="libfoo"; - demangle = false, - libjulia = false, - cflags = ``, - kwargs... - ) - - lib_path = joinpath(path, "$filename.$(Libdl.dlext)") - - _,obj_path = generate_obj(funcs, path, filename; demangle, libjulia, kwargs...) - # Pick a Clang - cc = Sys.isapple() ? `cc` : clang() - # Compile! - run(`$cc -shared $cflags $obj_path -o $lib_path `) - - path, name -end - -function generate_obj(funcs::Vector{<:Tuple}, path::String = tempname(), filenamebase::String="obj"; - demangle = false, - libjulia = false, +function generate_obj(funcs::Array, path::String = tempname(), filenamebase::String="obj"; + demangle =false, strip_llvm = false, - strip_asm = true, - opt_level = 3, + strip_asm = true, + opt_level=3, kwargs...) - f,tt = first(funcs) + f,tt = funcs[1] mkpath(path) obj_path = joinpath(path, "$filenamebase.o") - fakejob, kwargs = native_job(f, tt; libjulia, kwargs...) - mod = native_llvm_module(funcs; demangle, libjulia, kwargs...) + fakejob, kwargs = native_job(f,tt, kwargs...) + mod = native_llvm_module(funcs; demangle = demangle, kwargs...) obj, _ = GPUCompiler.emit_asm(fakejob, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) open(obj_path, "w") do io write(io, obj) @@ -420,159 +516,44 @@ function generate_obj(funcs::Vector{<:Tuple}, path::String = tempname(), filenam path, obj_path end -## --- Compile standalone binaries - -""" -```julia -function compile_executable(f, types=(), path::String="./", name=GPUCompiler.safe_name(repr(f)); - filename = name, - libjulia = false, - cflags = ``, +function generate_shlib(funcs::Array, path::String = tempname(), filename::String="libfoo"; + demangle=false, + cflags=``, kwargs... ) -``` -Attempt to compile a standalone executable that runs function `f` with a type signature given by the tuple of `types`. - -### Examples -```julia -julia> using StaticCompiler - -julia> function puts(s::Ptr{UInt8}) # Can't use Base.println because it allocates. - # Note, this `llvmcall` requires Julia 1.8+ - Base.llvmcall((\""" - ; External declaration of the puts function - declare i32 @puts(i8* nocapture) nounwind - - define i32 @main(i8*) { - entry: - %call = call i32 (i8*) @puts(i8* %0) - ret i32 0 - } - \""", "main"), Int32, Tuple{Ptr{UInt8}}, s) - end -puts (generic function with 1 method) -julia> function print_args(argc::Int, argv::Ptr{Ptr{UInt8}}) - for i=1:argc - # Get pointer - p = unsafe_load(argv, i) - # Print string at pointer location (which fortunately already exists isn't tracked by the GC) - puts(p) - end - return 0 - end - -julia> compile_executable(print_args, (Int, Ptr{Ptr{UInt8}})) -"/Users/foo/code/StaticCompiler.jl/print_args" - -shell> ./print_args 1 2 3 4 Five -./print_args -1 -2 -3 -4 -Five -``` -```julia -julia> using StaticTools # So you don't have to define `puts` and friends every time - -julia> hello() = println(c"Hello, world!") # c"..." makes a stack-allocated StaticString - -julia> compile_executable(hello) -"/Users/foo/code/StaticCompiler.jl/hello" - -shell> ./hello -Hello, world! -``` -""" -function compile_executable(f, types=(), path::String="./", name=GPUCompiler.safe_name(repr(f)); - filename = name, - libjulia = false, - cflags = ``, - kwargs... - ) - - tt = Base.to_tuple_type(types) - # tt == Tuple{} || tt == Tuple{Int, Ptr{Ptr{UInt8}}} || error("input type signature $types must be either () or (Int, Ptr{Ptr{UInt8}})") - - rt = only(native_code_typed(f, tt))[2] - isconcretetype(rt) || error("$f$types did not infer to a concrete type. Got $rt") - - # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals - # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this + lib_path = joinpath(path, "$filename.$(Libdl.dlext)") - generate_executable(f, tt, path, name, filename; libjulia, cflags, kwargs...) + _,obj_path = generate_obj(funcs, path, filename; demangle=demangle, kwargs...) + # Pick a Clang + cc = Sys.isapple() ? `cc` : clang() + # Compile! + run(`$cc -shared $cflags $obj_path -o $lib_path `) - joinpath(abspath(path), filename) + path, name end +function compile_shlib(funcs::Array, path::String="./"; + filename="libfoo", + demangle=false, + cflags=``, + kwargs...) + for func in funcs + f, types = func + tt = Base.to_tuple_type(types) + isconcretetype(tt) || error("input type signature $types is not concrete") -""" -```julia -function generate_executable(f, tt, path=tempname(), name=GPUCompiler.safe_name(repr(f)), filename=string(name); - cflags = ``, - libjulia = false, - kwargs... - ) -``` -Attempt to compile a standalone executable that runs `f`. - -### Examples -```julia -julia> function test(n) - r = 0.0 - for i=1:n - r += log(sqrt(i)) - end - return r/n - end -test (generic function with 1 method) - -julia> path, name = StaticCompiler.generate_executable(test, Tuple{Int64}, "./scratch") -``` -""" -function generate_executable(f, tt, path=tempname(), name=GPUCompiler.safe_name(repr(f)), filename=string(name); - cflags = ``, - libjulia = false, - kwargs... - ) - mkpath(path) - obj_path = joinpath(path, "$filename.o") - exec_path = joinpath(path, filename) - job, kwargs = native_job(f, tt; name, libjulia, kwargs...) - obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) - - # Write to file - open(obj_path, "w") do io - write(io, obj) + rt = only(native_code_typed(f, tt))[2] + isconcretetype(rt) || error("$f$types did not infer to a concrete type. Got $rt") end - # Pick a compiler - cc = Sys.isapple() ? `cc` : clang() - # Compile! - if Sys.isapple() - # Apple no longer uses _start, so we can just specify a custom entry - entry = "_julia_$name" - run(`$cc -e $entry $cflags $obj_path -o $exec_path`) - else - # Write a minimal wrapper to avoid having to specify a custom entry - wrapper_path = joinpath(path, "wrapper.c") - f = open(wrapper_path, "w") - print(f, """int julia_$name(int argc, char** argv); - void* __stack_chk_guard = (void*) $(rand(UInt) >> 1); +# Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals +# Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this - int main(int argc, char** argv) - { - julia_$name(argc, argv); - return 0; - }""") - close(f) - run(`$cc $wrapper_path $cflags $obj_path -o $exec_path`) - # Clean up - run(`rm $wrapper_path`) - end + generate_shlib(funcs, path, filename; demangle=demangle, cflags=cflags, kwargs...) - path, name + joinpath(abspath(path), filename * "." * Libdl.dlext) end + end # module diff --git a/src/quirks.jl b/src/quirks.jl deleted file mode 100644 index d632dd7..0000000 --- a/src/quirks.jl +++ /dev/null @@ -1,89 +0,0 @@ -macro print_and_throw(err) - quote - println(err) - libcexit(Int32(1)) - end -end - -# math.jl -@device_override @noinline Base.Math.throw_complex_domainerror(f::Symbol, x) = - @print_and_throw c"This operation requires a complex input to return a complex result" -@device_override @noinline Base.Math.throw_exp_domainerror(f::Symbol, x) = - @print_and_throw c"Exponentiation yielding a complex result requires a complex argument" - -# intfuncs.jl -@device_override @noinline Base.throw_domerr_powbysq(::Any, p) = - @print_and_throw c"Cannot raise an integer to a negative power" -@device_override @noinline Base.throw_domerr_powbysq(::Integer, p) = - @print_and_throw c"Cannot raise an integer to a negative power" -@device_override @noinline Base.throw_domerr_powbysq(::AbstractMatrix, p) = - @print_and_throw c"Cannot raise an integer to a negative power" -@device_override @noinline Base.__throw_gcd_overflow(a, b) = - @print_and_throw c"gcd overflow" - -# checked.jl -@device_override @noinline Base.Checked.throw_overflowerr_binaryop(op, x, y) = - @print_and_throw c"Binary operation overflowed" -@device_override @noinline Base.Checked.throw_overflowerr_negation(op, x, y) = - @print_and_throw c"Negation overflowed" -@device_override function Base.Checked.checked_abs(x::Base.Checked.SignedInt) - r = ifelse(x<0, -x, x) - r<0 && @print_and_throw(c"checked arithmetic: cannot compute |x|") - r -end - -# boot.jl -@device_override @noinline Core.throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = - @print_and_throw c"Inexact conversion" - -# abstractarray.jl -@device_override @noinline Base.throw_boundserror(A, I) = - @print_and_throw c"Out-of-bounds array access" - -# trig.jl -@device_override @noinline Base.Math.sincos_domain_error(x) = - @print_and_throw c"sincos(x) is only defined for finite x." - - -# range.jl -@static if VERSION >= v"1.7-" - @eval begin - @device_override function Base.StepRangeLen{T,R,S,L}(ref::R, step::S, len::Integer, - offset::Integer=1) where {T,R,S,L} - if T <: Integer && !isinteger(ref + step) - @print_and_throw(c"StepRangeLen{<:Integer} cannot have non-integer step") - end - len = convert(L, len) - len >= zero(len) || @print_and_throw(c"StepRangeLen length cannot be negative") - offset = convert(L, offset) - L1 = oneunit(typeof(len)) - L1 <= offset <= max(L1, len) || @print_and_throw(c"StepRangeLen: offset must be in [1,...]") - $( - Expr(:new, :(StepRangeLen{T,R,S,L}), :ref, :step, :len, :offset) - ) - end - end -else - @device_override function Base.StepRangeLen{T,R,S}(ref::R, step::S, len::Integer, - offset::Integer=1) where {T,R,S} - if T <: Integer && !isinteger(ref + step) - @print_and_throw(c"StepRangeLen{<:Integer} cannot have non-integer step") - end - len >= 0 || @print_and_throw(c"StepRangeLen length cannot be negative") - 1 <= offset <= max(1,len) || @print_and_throw(c"StepRangeLen: offset must be in [1,...]") - new(ref, step, len, offset) - end -end - - -# fastmath.jl -@static if VERSION <= v"1.7-" -## prevent fallbacks to libm -for f in (:acosh, :asinh, :atanh, :cbrt, :cosh, :exp2, :expm1, :log1p, :sinh, :tanh) - f_fast = Base.FastMath.fast_op[f] - @eval begin - @device_override Base.FastMath.$f_fast(x::Float32) = $f(x) - @device_override Base.FastMath.$f_fast(x::Float64) = $f(x) - end -end -end diff --git a/src/target.jl b/src/target.jl index b6713b8..2efb191 100644 --- a/src/target.jl +++ b/src/target.jl @@ -3,11 +3,21 @@ Base.@kwdef struct NativeCompilerTarget <: GPUCompiler.AbstractCompilerTarget features::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUFeatures()) end -Base.@kwdef struct ExternalNativeCompilerTarget <: GPUCompiler.AbstractCompilerTarget - cpu::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUName()) - features::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUFeatures()) +GPUCompiler.llvm_triple(::NativeCompilerTarget) = Sys.MACHINE + +function GPUCompiler.llvm_machine(target::NativeCompilerTarget) + triple = GPUCompiler.llvm_triple(target) + + t = LLVM.Target(triple=triple) + + tm = LLVM.TargetMachine(t, triple, target.cpu, target.features, reloc=LLVM.API.LLVMRelocPIC) + GPUCompiler.asm_verbosity!(tm, true) + + return tm end +GPUCompiler.runtime_slug(job::GPUCompiler.CompilerJob{NativeCompilerTarget}) = "native_$(job.target.cpu)-$(hash(job.target.features))" + module StaticRuntime # the runtime library signal_exception() = return @@ -20,46 +30,17 @@ end struct StaticCompilerParams <: GPUCompiler.AbstractCompilerParams end -for target in (:NativeCompilerTarget, :ExternalNativeCompilerTarget) - @eval begin - GPUCompiler.llvm_triple(::$target) = Sys.MACHINE - - function GPUCompiler.llvm_machine(target::$target) - triple = GPUCompiler.llvm_triple(target) - - t = LLVM.Target(triple=triple) - - tm = LLVM.TargetMachine(t, triple, target.cpu, target.features, reloc=LLVM.API.LLVMRelocPIC) - GPUCompiler.asm_verbosity!(tm, true) - - return tm - end - - GPUCompiler.runtime_slug(job::GPUCompiler.CompilerJob{$target}) = "native_$(job.target.cpu)-$(hash(job.target.features))" - - GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{$target}) = StaticRuntime - GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{$target, StaticCompilerParams}) = StaticRuntime - - - GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{$target, StaticCompilerParams}) = true - GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{$target}) = true - end -end - GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{<:Any,StaticCompilerParams}) = StaticRuntime -GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{<:Any,StaticCompilerParams}) = true +GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{NativeCompilerTarget}) = StaticRuntime +GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{NativeCompilerTarget, StaticCompilerParams}) = StaticRuntime -GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{ExternalNativeCompilerTarget})) = method_table -GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{ExternalNativeCompilerTarget, StaticCompilerParams})) = method_table +GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{<:Any,StaticCompilerParams}) = true +GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{NativeCompilerTarget, StaticCompilerParams}) = true +GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{NativeCompilerTarget}) = true -function native_job(@nospecialize(func::Function), @nospecialize(types::Type); - name = GPUCompiler.safe_name(repr(func)), - libjulia::Bool = true, - kernel::Bool = false, - kwargs... - ) - source = GPUCompiler.FunctionSpec(func, types, kernel, name) - target = libjulia ? NativeCompilerTarget() : ExternalNativeCompilerTarget() +function native_job(@nospecialize(func), @nospecialize(types); kernel::Bool=false, name=GPUCompiler.safe_name(repr(func)), kwargs...) + source = GPUCompiler.FunctionSpec(func, Base.to_tuple_type(types), kernel, name) + target = NativeCompilerTarget() params = StaticCompilerParams() GPUCompiler.CompilerJob(target, source, params), kwargs end diff --git a/src/utils.jl b/src/utils.jl deleted file mode 100644 index 636b147..0000000 --- a/src/utils.jl +++ /dev/null @@ -1,69 +0,0 @@ - - -# local method table for device functions -@static if isdefined(Base.Experimental, Symbol("@overlay")) -Base.Experimental.@MethodTable(method_table) -else -const method_table = nothing -end - -# list of overrides (only for Julia 1.6) -const overrides = Expr[] - -macro device_override(ex) - ex = macroexpand(__module__, ex) - if Meta.isexpr(ex, :call) - @show ex = eval(ex) - error() - end - code = quote - $GPUCompiler.@override(StaticCompiler.method_table, $ex) - end - if isdefined(Base.Experimental, Symbol("@overlay")) - return esc(code) - else - push!(overrides, code) - return - end -end - -macro device_function(ex) - ex = macroexpand(__module__, ex) - def = splitdef(ex) - - # generate a function that errors - def[:body] = quote - error("This function is not intended for use on the CPU") - end - - esc(quote - $(combinedef(def)) - @device_override $ex - end) -end - -macro device_functions(ex) - ex = macroexpand(__module__, ex) - - # recursively prepend `@device_function` to all function definitions - function rewrite(block) - out = Expr(:block) - for arg in block.args - if Meta.isexpr(arg, :block) - # descend in blocks - push!(out.args, rewrite(arg)) - elseif Meta.isexpr(arg, [:function, :(=)]) - # rewrite function definitions - push!(out.args, :(@device_function $arg)) - else - # preserve all the rest - push!(out.args, arg) - end - end - out - end - - esc(rewrite(ex)) -end - -libcexit(x::Int32) = @symbolcall exit(x::Int32)::Nothing From 63eed827707ee27f35af70b1d8d5dde9f6f48c14 Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Thu, 20 Oct 2022 12:55:53 -0600 Subject: [PATCH 098/159] test 1.8.0-beta3 -> 1.8 --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dddaf90..341215a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ jobs: matrix: version: - '1.7' - - '1.8.0-beta3' + - '1.8' os: - ubuntu-latest - macOS-latest From 6679965c071109d902ef7d5aaba4f996ac377ada Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Thu, 20 Oct 2022 12:56:30 -0600 Subject: [PATCH 099/159] 1.8.0-beta 3 -> 1.8 --- .github/workflows/ci-integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-integration.yml b/.github/workflows/ci-integration.yml index 03fa4d2..3b9024d 100644 --- a/.github/workflows/ci-integration.yml +++ b/.github/workflows/ci-integration.yml @@ -18,7 +18,7 @@ jobs: fail-fast: false matrix: version: - - '1.8.0-beta3' + - '1.8' os: - ubuntu-latest - macOS-latest From c8cfd3ff36c26df230deed76366c780d72060dc5 Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Thu, 20 Oct 2022 13:31:10 -0600 Subject: [PATCH 100/159] run without startup files --- test/testintegration.jl | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/test/testintegration.jl b/test/testintegration.jl index f54a808..6eb2c08 100644 --- a/test/testintegration.jl +++ b/test/testintegration.jl @@ -15,7 +15,7 @@ status = -1 try isfile("times_table") && rm("times_table") - status = run(`$jlpath --compile=min $testpath/scripts/times_table.jl`) + status = run(`$jlpath --startup=no --compile=min $testpath/scripts/times_table.jl`) catch e @warn "Could not compile $testpath/scripts/times_table.jl" println(e) @@ -46,7 +46,7 @@ status = -1 try isfile("withmallocarray") && rm("withmallocarray") - status = run(`$jlpath --compile=min $testpath/scripts/withmallocarray.jl`) + status = run(`$jlpath --startup=no --compile=min $testpath/scripts/withmallocarray.jl`) catch e @warn "Could not compile $testpath/scripts/withmallocarray.jl" println(e) @@ -73,7 +73,7 @@ status = -1 try isfile("rand_matrix") && rm("rand_matrix") - status = run(`$jlpath --compile=min $testpath/scripts/rand_matrix.jl`) + status = run(`$jlpath --startup=no --compile=min $testpath/scripts/rand_matrix.jl`) catch e @warn "Could not compile $testpath/scripts/rand_matrix.jl" println(e) @@ -99,7 +99,7 @@ status = -1 try isfile("randn_matrix") && rm("randn_matrix") - status = run(`$jlpath --compile=min $testpath/scripts/randn_matrix.jl`) + status = run(`$jlpath --startup=no --compile=min $testpath/scripts/randn_matrix.jl`) catch e @warn "Could not compile $testpath/scripts/randn_matrix.jl" println(e) @@ -131,7 +131,7 @@ status = -1 try isfile("loopvec_product") && rm("loopvec_product") - status = run(`$jlpath --compile=min $testpath/scripts/loopvec_product.jl`) + status = run(`$jlpath --startup=no --compile=min $testpath/scripts/loopvec_product.jl`) catch e @warn "Could not compile $testpath/scripts/loopvec_product.jl" println(e) @@ -159,7 +159,7 @@ status = -1 try isfile("loopvec_matrix") && rm("loopvec_matrix") - status = run(`$jlpath --compile=min $testpath/scripts/loopvec_matrix.jl`) + status = run(`$jlpath --startup=no --compile=min $testpath/scripts/loopvec_matrix.jl`) catch e @warn "Could not compile $testpath/scripts/loopvec_matrix.jl" println(e) @@ -190,7 +190,7 @@ status = -1 try isfile("loopvec_matrix_stack") && rm("loopvec_matrix_stack") - status = run(`$jlpath --compile=min $testpath/scripts/loopvec_matrix_stack.jl`) + status = run(`$jlpath --startup=no --compile=min $testpath/scripts/loopvec_matrix_stack.jl`) catch e @warn "Could not compile $testpath/scripts/loopvec_matrix_stack.jl" println(e) @@ -221,7 +221,7 @@ status = -1 try isfile("print_args") && rm("print_args") - status = run(`$jlpath --compile=min $testpath/scripts/print_args.jl`) + status = run(`$jlpath --startup=no --compile=min $testpath/scripts/print_args.jl`) catch e @warn "Could not compile $testpath/scripts/print_args.jl" println(e) @@ -250,7 +250,7 @@ status = -1 try isfile("interop") && rm("interop") - status = run(`$jlpath --compile=min $testpath/scripts/interop.jl`) + status = run(`$jlpath --startup=no --compile=min $testpath/scripts/interop.jl`) catch e @warn "Could not compile $testpath/scripts/interop.jl" println(e) From d5e15321140b0c6124ab470da164915f849dfb4e Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Thu, 20 Oct 2022 14:26:00 -0600 Subject: [PATCH 101/159] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index c45e105..cdffd9f 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ # StaticCompiler -[![CI](https://github.com/tshort/StaticCompiler.jl/workflows/CI/badge.svg)](https://github.com/tshort/StaticCompiler.jl/actions/workflows/ci.yml) -[![CI (Integration)](https://github.com/tshort/StaticCompiler.jl/workflows/CI%20(Integration)/badge.svg)](https://github.com/tshort/StaticCompiler.jl/actions/workflows/ci-integration.yml) +[![CI](https://github.com/tshort/StaticCompiler.jl/actions/workflows/ci.yml/badge.svg)](https://github.com/tshort/StaticCompiler.jl/actions/workflows/ci.yml) +[![CI (Integration)](https://github.com/tshort/StaticCompiler.jl/actions/workflows/ci-integration.yml/badge.svg)](https://github.com/tshort/StaticCompiler.jl/actions/workflows/ci-integration.yml) [![CI (Julia nightly)](https://github.com/tshort/StaticCompiler.jl/workflows/CI%20(Julia%20nightly)/badge.svg)](https://github.com/tshort/StaticCompiler.jl/actions/workflows/ci-julia-nightly.yml) -[![CI (Integration nightly)](https://github.com/tshort/StaticCompiler.jl/workflows/CI%20(Integration%20nightly)/badge.svg)](https://github.com/tshort/StaticCompiler.jl/actions/workflows/ci-integration-nightly.yml) +[![CI (Integration nightly)](https://github.com/tshort/StaticCompiler.jl/actions/workflows/ci-integration-nightly.yml/badge.svg)](https://github.com/tshort/StaticCompiler.jl/actions/workflows/ci-integration-nightly.yml) [![Coverage](https://codecov.io/gh/tshort/StaticCompiler.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/tshort/StaticCompiler.jl) This is an experimental package to compile Julia code to standalone libraries. A system image is not needed. From 1bbff0c7e7ce7b5dea85c0fefab10bd593c18742 Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Thu, 20 Oct 2022 15:04:47 -0600 Subject: [PATCH 102/159] change pointer_patching_diff --- src/pointer_patching.jl | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/pointer_patching.jl b/src/pointer_patching.jl index 18be73e..4c59f4f 100644 --- a/src/pointer_patching.jl +++ b/src/pointer_patching.jl @@ -155,7 +155,17 @@ end llvmeltype(x::LLVM.Value) = eltype(LLVM.llvmtype(x)) -function pointer_patching_diff(mod::LLVM.Module, path1=tempname(), path2=tempname(); show_reloc_table=false) +function pointer_patching_diff(f, tt, path1=tempname(), path2=tempname(); show_reloc_table=false) + tm = GPUCompiler.llvm_machine(NativeCompilerTarget()) + job, kwargs = native_job(f, tt; name=GPUCompiler.safe_name(repr(f))) + #Get LLVM to generated a module of code for us. We don't want GPUCompiler's optimization passes. + mod, meta = GPUCompiler.JuliaContext() do context + GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false, optimize=false, ctx=context) + end + # Use Enzyme's annotation and optimization pipeline + annotate!(mod) + optimize!(mod, tm) + s1 = string(mod) write(path1, s1) @@ -167,12 +177,9 @@ function pointer_patching_diff(mod::LLVM.Module, path1=tempname(), path2=tempnam s2 = string(mod) write(path2, s2) - try - # this always ends in an error for me for some reason - run(`diff $path1 $path2`) - catch e; - nothing - end + pdiff = run(Cmd(`diff $path1 $path2`, ignorestatus=true)) + pdiff.exitcode == 2 && error("Showing diff caused an error") + nothing end From c825a4d6fa7b2ab5e29cf9d2a8778dc481e28a85 Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Thu, 20 Oct 2022 20:38:47 -0400 Subject: [PATCH 103/159] Update readme to explain both compilation interfaces --- README.md | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index cdffd9f..6c08808 100644 --- a/README.md +++ b/README.md @@ -9,12 +9,13 @@ This is an experimental package to compile Julia code to standalone libraries. A system image is not needed. ## Installation and Usage - +Installation is the same as any other registered Julia package ```julia using Pkg Pkg.add("StaticCompiler") ``` +There are two main ways to use this package. The first is via the `compile` function, which can be used when you want to compile a Julia function for later use from within Julia: ```julia julia> using StaticCompiler @@ -40,9 +41,25 @@ fib(::Int64) :: Int64 julia> fib_compiled(10) 55 ``` +See the file `tests/runtests.jl` for some examples of functions that work with `compile` (and some that don't, marked with `@test_skip`). + +The second way to use this package is via the `compile_executable` and `compile_shlib` functions, for use when you want to compile a Julia function to a native executable or shared library for use from outside of Julia: +```julia +julia> using StaticCompiler, StaticTools + +julia> hello() = println(c"Hello, world!") +hello (generic function with 1 method) -See the file `tests/runtests.jl` for some examples of functions that work with static compilation (and some that don't, -marked with `@test_skip`) +julia> compile_executable(hello, (), "./") +"/Users/user/hello" + +shell> ls -alh hello +-rwxrwxr-x. 1 user user 8.4K Oct 20 20:36 hello + +shell> ./hello +Hello, world! +``` +This latter approach comes with substantially more limitations, as you cannot rely on `libjulia` (see, e.g., [StaticTools.jl](https://github.com/brenhinkeller/StaticTools.jl) for some ways to work around these limitations). ## Approach From 1cb977f303cd3809b3325ceb41a83a4c0b5a167c Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Sat, 11 Mar 2023 12:23:24 -0500 Subject: [PATCH 104/159] Update ci versions (#96) * Also test integration on 1.9 prerelease version * Also run main tests on Julia 1.9 prerelease --- .github/workflows/ci-integration-nightly.yml | 1 + .github/workflows/ci-julia-nightly.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/ci-integration-nightly.yml b/.github/workflows/ci-integration-nightly.yml index 00074ac..6d4cb3e 100644 --- a/.github/workflows/ci-integration-nightly.yml +++ b/.github/workflows/ci-integration-nightly.yml @@ -19,6 +19,7 @@ jobs: matrix: version: - 'nightly' + - '~1.9.0-0' os: - ubuntu-latest - macOS-latest diff --git a/.github/workflows/ci-julia-nightly.yml b/.github/workflows/ci-julia-nightly.yml index d01adba..dfb2ad8 100644 --- a/.github/workflows/ci-julia-nightly.yml +++ b/.github/workflows/ci-julia-nightly.yml @@ -19,6 +19,7 @@ jobs: matrix: version: - 'nightly' + - '~1.9.0-0' os: - ubuntu-latest - macOS-latest From 25f612344e9c4e79b1f8c4c1d1761cf4bd877573 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 11 Mar 2023 12:25:46 -0500 Subject: [PATCH 105/159] CompatHelper: bump compat for GPUCompiler to 0.17, (keep existing compat) (#94) * CompatHelper: bump compat for GPUCompiler to 0.17, (keep existing compat) * Try running CI * Rerun CI --------- Co-authored-by: CompatHelper Julia Co-authored-by: C. Brenhin Keller --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index bfea97f..12f6833 100644 --- a/Project.toml +++ b/Project.toml @@ -11,6 +11,6 @@ Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" [compat] -GPUCompiler = "0.16" +GPUCompiler = "0.16, 0.17" LLVM = "4.8" julia = "1.7" From 7e4bbe44ec81ee348ad545f1460a7b91a788b943 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Tue, 14 Mar 2023 21:39:25 -0300 Subject: [PATCH 106/159] Errors are fine! (#97) Hooray! --- Project.toml | 2 + src/StaticCompiler.jl | 39 +++++++++--------- src/pointer_patching.jl | 18 ++++----- src/quirks.jl | 46 +++++++++++++++++++++ src/target.jl | 88 ++++++++++++++++++++++++++++++++--------- 5 files changed, 147 insertions(+), 46 deletions(-) create mode 100644 src/quirks.jl diff --git a/Project.toml b/Project.toml index 12f6833..4b60b47 100644 --- a/Project.toml +++ b/Project.toml @@ -9,8 +9,10 @@ GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" +StaticTools = "86c06d3c-3f03-46de-9781-57580aa96d0a" [compat] GPUCompiler = "0.16, 0.17" LLVM = "4.8" +StaticTools ="0.8" julia = "1.7" diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 5a447cd..0255d2c 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -8,6 +8,8 @@ using Libdl: Libdl, dlsym, dlopen using Base: RefValue using Serialization: serialize, deserialize using Clang_jll: clang +using StaticTools +using StaticTools: @symbolcall, @c_str, println export compile, load_function, compile_shlib, compile_executable export native_code_llvm, native_code_typed, native_llvm_module, native_code_native @@ -16,6 +18,7 @@ include("target.jl") include("pointer_patching.jl") include("code_loading.jl") include("optimize.jl") +include("quirks.jl") """ @@ -95,7 +98,7 @@ function compile(f, _tt, path::String = tempname(); name = GPUCompiler.safe_nam isconcretetype(rt) || error("$f on $_tt did not infer to a concrete type. Got $rt") f_wrap!(out::Ref, args::Ref{<:Tuple}) = (out[] = f(args[]...); nothing) - _, _, table = generate_obj(f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, path, name; opt_level, strip_llvm, strip_asm, filename, kwargs...) + _, _, table = generate_obj(f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, false, path, name; opt_level, strip_llvm, strip_asm, filename, kwargs...) lf = LazyStaticCompiledFunction{rt, tt}(Symbol(f), path, name, filename, table) cjl_path = joinpath(path, "$filename.cjl") @@ -131,15 +134,15 @@ shell> tree \$path 0 directories, 1 file ``` """ -function generate_obj(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; +function generate_obj(f, tt, external = true, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; strip_llvm = false, strip_asm = true, opt_level=3, kwargs...) mkpath(path) obj_path = joinpath(path, "$filenamebase.o") - tm = GPUCompiler.llvm_machine(NativeCompilerTarget()) - job, kwargs = native_job(f, tt; name, kwargs...) + tm = GPUCompiler.llvm_machine(external ? ExternalNativeCompilerTarget() : NativeCompilerTarget()) + job, kwargs = native_job(f, tt, external; name, kwargs...) #Get LLVM to generated a module of code for us. We don't want GPUCompiler's optimization passes. mod, meta = GPUCompiler.JuliaContext() do context GPUCompiler.codegen(:llvm, job; strip=strip_llvm, only_entry=false, validate=false, optimize=false, ctx=context) @@ -267,7 +270,7 @@ function compile_shlib(f, types=(), path::String="./", name=GPUCompiler.safe_nam # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this - generate_shlib(f, tt, path, name, filename; cflags=cflags, kwargs...) + generate_shlib(f, tt, true, path, name, filename; cflags=cflags, kwargs...) joinpath(abspath(path), filename * "." * Libdl.dlext) end @@ -276,7 +279,7 @@ function generate_shlib_fptr(f, tt, path::String=tempname(), name = GPUCompiler. temp::Bool=true, kwargs...) - generate_shlib(f, tt, path, name; kwargs...) + generate_shlib(f, tt, false, path, name; kwargs...) lib_path = joinpath(abspath(path), "$filename.$(Libdl.dlext)") ptr = Libdl.dlopen(lib_path, Libdl.RTLD_LOCAL) fptr = Libdl.dlsym(ptr, "julia_$name") @@ -358,7 +361,7 @@ function generate_executable(f, tt, path=tempname(), name=GPUCompiler.safe_name( mkpath(path) obj_path = joinpath(path, "$filename.o") exec_path = joinpath(path, filename) - job, kwargs = native_job(f, tt; name, kwargs...) + job, kwargs = native_job(f, tt, true; name, kwargs...) obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) # Write to file @@ -425,7 +428,7 @@ julia> ccall(StaticCompiler.generate_shlib_fptr(path, name), Float64, (Int64,), 5.256496109495593 ``` """ -function generate_shlib(f, tt, path=tempname(), name=GPUCompiler.safe_name(repr(f)), filename=name; +function generate_shlib(f, tt, external = true, path=tempname(), name=GPUCompiler.safe_name(repr(f)), filename=name; cflags=``, kwargs... ) @@ -433,7 +436,7 @@ function generate_shlib(f, tt, path=tempname(), name=GPUCompiler.safe_name(repr( mkpath(path) obj_path = joinpath(path, "$filename.o") lib_path = joinpath(path, "$filename.$(Libdl.dlext)") - job, kwargs = native_job(f, tt; name, kwargs...) + job, kwargs = native_job(f, tt, external; name, kwargs...) obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) open(obj_path, "w") do io @@ -449,18 +452,18 @@ function generate_shlib(f, tt, path=tempname(), name=GPUCompiler.safe_name(repr( end function native_code_llvm(@nospecialize(func), @nospecialize(types); kwargs...) - job, kwargs = native_job(func, types; kwargs...) + job, kwargs = native_job(func, types, true; kwargs...) GPUCompiler.code_llvm(stdout, job; kwargs...) end function native_code_typed(@nospecialize(func), @nospecialize(types); kwargs...) - job, kwargs = native_job(func, types; kwargs...) + job, kwargs = native_job(func, types, true; kwargs...) GPUCompiler.code_typed(job; kwargs...) end # Return an LLVM module function native_llvm_module(f, tt, name = GPUCompiler.safe_name(repr(f)); kwargs...) - job, kwargs = native_job(f, tt; name, kwargs...) + job, kwargs = native_job(f, tt, true; name, kwargs...) m, _ = GPUCompiler.JuliaContext() do context GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false, ctx=context) end @@ -468,7 +471,7 @@ function native_llvm_module(f, tt, name = GPUCompiler.safe_name(repr(f)); kwargs end function native_code_native(@nospecialize(f), @nospecialize(tt), name = GPUCompiler.safe_name(repr(f)); kwargs...) - job, kwargs = native_job(f, tt; name, kwargs...) + job, kwargs = native_job(f, tt, true; name, kwargs...) GPUCompiler.code_native(stdout, job; kwargs...) end @@ -498,7 +501,7 @@ function native_llvm_module(funcs::Array; demangle = false, kwargs...) return mod end -function generate_obj(funcs::Array, path::String = tempname(), filenamebase::String="obj"; +function generate_obj(funcs::Array, external, path::String = tempname(), filenamebase::String="obj"; demangle =false, strip_llvm = false, strip_asm = true, @@ -507,7 +510,7 @@ function generate_obj(funcs::Array, path::String = tempname(), filenamebase::Str f,tt = funcs[1] mkpath(path) obj_path = joinpath(path, "$filenamebase.o") - fakejob, kwargs = native_job(f,tt, kwargs...) + fakejob, kwargs = native_job(f,tt, external, kwargs...) mod = native_llvm_module(funcs; demangle = demangle, kwargs...) obj, _ = GPUCompiler.emit_asm(fakejob, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) open(obj_path, "w") do io @@ -516,7 +519,7 @@ function generate_obj(funcs::Array, path::String = tempname(), filenamebase::Str path, obj_path end -function generate_shlib(funcs::Array, path::String = tempname(), filename::String="libfoo"; +function generate_shlib(funcs::Array, external = true, path::String = tempname(), filename::String="libfoo"; demangle=false, cflags=``, kwargs... @@ -524,7 +527,7 @@ function generate_shlib(funcs::Array, path::String = tempname(), filename::Strin lib_path = joinpath(path, "$filename.$(Libdl.dlext)") - _,obj_path = generate_obj(funcs, path, filename; demangle=demangle, kwargs...) + _,obj_path = generate_obj(funcs, external, path, filename; demangle=demangle, kwargs...) # Pick a Clang cc = Sys.isapple() ? `cc` : clang() # Compile! @@ -550,7 +553,7 @@ function compile_shlib(funcs::Array, path::String="./"; # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this - generate_shlib(funcs, path, filename; demangle=demangle, cflags=cflags, kwargs...) + generate_shlib(funcs, true, path, filename; demangle=demangle, cflags=cflags, kwargs...) joinpath(abspath(path), filename * "." * Libdl.dlext) end diff --git a/src/pointer_patching.jl b/src/pointer_patching.jl index 4c59f4f..822d8e9 100644 --- a/src/pointer_patching.jl +++ b/src/pointer_patching.jl @@ -1,11 +1,11 @@ function relocation_table!(mod) i64 = LLVM.IntType(64; ctx=LLVM.context(mod)) d = IdDict{Any, Tuple{String, LLVM.GlobalVariable}}() - + for func ∈ LLVM.functions(mod), bb ∈ LLVM.blocks(func), inst ∈ LLVM.instructions(bb) - if isa(inst, LLVM.LoadInst) && occursin("inttoptr", string(inst)) + if isa(inst, LLVM.LoadInst) && occursin("inttoptr", string(inst)) get_pointers!(d, mod, inst) - elseif isa(inst, LLVM.StoreInst) && occursin("inttoptr", string(inst)) + elseif isa(inst, LLVM.StoreInst) && occursin("inttoptr", string(inst)) @debug "Relocating StoreInst" inst get_pointers!(d, mod, inst) elseif inst isa LLVM.RetInst && occursin("inttoptr", string(inst)) @@ -101,7 +101,7 @@ function relocation_table!(mod) end end - if length(fn) > 1 && fromC + if length(fn) > 1 && fromC mod = LLVM.parent(LLVM.parent(LLVM.parent(inst))) lfn = LLVM.API.LLVMGetNamedFunction(mod, fn) @@ -139,7 +139,7 @@ function get_pointers!(d, mod, inst) else gv_name = GPUCompiler.safe_name(String(gensym(repr(Core.Typeof(val))))) gv = LLVM.GlobalVariable(mod, llvmeltype(arg), gv_name, LLVM.addrspace(llvmtype(arg))) - + LLVM.extinit!(gv, true) LLVM.API.LLVMSetOperand(inst, i-1, gv) @@ -157,7 +157,7 @@ llvmeltype(x::LLVM.Value) = eltype(LLVM.llvmtype(x)) function pointer_patching_diff(f, tt, path1=tempname(), path2=tempname(); show_reloc_table=false) tm = GPUCompiler.llvm_machine(NativeCompilerTarget()) - job, kwargs = native_job(f, tt; name=GPUCompiler.safe_name(repr(f))) + job, kwargs = native_job(f, tt, false; name=GPUCompiler.safe_name(repr(f))) #Get LLVM to generated a module of code for us. We don't want GPUCompiler's optimization passes. mod, meta = GPUCompiler.JuliaContext() do context GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false, optimize=false, ctx=context) @@ -165,15 +165,15 @@ function pointer_patching_diff(f, tt, path1=tempname(), path2=tempname(); show_r # Use Enzyme's annotation and optimization pipeline annotate!(mod) optimize!(mod, tm) - + s1 = string(mod) write(path1, s1) - + d = StaticCompiler.relocation_table!(mod) if show_reloc_table @show d end - + s2 = string(mod) write(path2, s2) diff --git a/src/quirks.jl b/src/quirks.jl new file mode 100644 index 0000000..ffa25ac --- /dev/null +++ b/src/quirks.jl @@ -0,0 +1,46 @@ +libcexit(x::Int32) = @symbolcall exit(x::Int32)::Nothing +macro print_and_throw(err) + quote + println($err) + libcexit(Int32(1)) + end +end + +# math.jl +@device_override @noinline Base.Math.throw_complex_domainerror(f::Symbol, x) = + @print_and_throw c"This operation requires a complex input to return a complex result" +@device_override @noinline Base.Math.throw_exp_domainerror(f::Symbol, x) = + @print_and_throw c"Exponentiation yielding a complex result requires a complex argument" + +# intfuncs.jl +@device_override @noinline Base.throw_domerr_powbysq(::Any, p) = + @print_and_throw c"Cannot raise an integer to a negative power" +@device_override @noinline Base.throw_domerr_powbysq(::Integer, p) = + @print_and_throw c"Cannot raise an integer to a negative power" +@device_override @noinline Base.throw_domerr_powbysq(::AbstractMatrix, p) = + @print_and_throw c"Cannot raise an integer to a negative power" +@device_override @noinline Base.__throw_gcd_overflow(a, b) = + @print_and_throw c"gcd overflow" + +# checked.jl +@device_override @noinline Base.Checked.throw_overflowerr_binaryop(op, x, y) = + @print_and_throw c"Binary operation overflowed" +@device_override @noinline Base.Checked.throw_overflowerr_negation(op, x, y) = + @print_and_throw c"Negation overflowed" +@device_override function Base.Checked.checked_abs(x::Base.Checked.SignedInt) + r = ifelse(x < 0, -x, x) + r < 0 && @print_and_throw(c"checked arithmetic: cannot compute |x|") + r +end + +# boot.jl +@device_override @noinline Core.throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = + @print_and_throw c"Inexact conversion" + +# abstractarray.jl +@device_override @noinline Base.throw_boundserror(A, I) = + @print_and_throw c"Out-of-bounds array access" + +# trig.jl +@device_override @noinline Base.Math.sincos_domain_error(x) = + @print_and_throw c"sincos(x) is only defined for finite x." diff --git a/src/target.jl b/src/target.jl index 2efb191..a14af90 100644 --- a/src/target.jl +++ b/src/target.jl @@ -1,22 +1,39 @@ -Base.@kwdef struct NativeCompilerTarget <: GPUCompiler.AbstractCompilerTarget - cpu::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUName()) - features::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUFeatures()) +@static if isdefined(Base.Experimental, Symbol("@overlay")) + Base.Experimental.@MethodTable(method_table) +else + const method_table = nothing end -GPUCompiler.llvm_triple(::NativeCompilerTarget) = Sys.MACHINE +const overrides = quote end -function GPUCompiler.llvm_machine(target::NativeCompilerTarget) - triple = GPUCompiler.llvm_triple(target) - t = LLVM.Target(triple=triple) +macro device_override(ex) + ex = macroexpand(__module__, ex) + if Meta.isexpr(ex, :call) + @show ex = eval(ex) + error() + end + code = quote + $GPUCompiler.@override(StaticCompiler.method_table, $ex) + end + if isdefined(Base.Experimental, Symbol("@overlay")) + return esc(code) + else + push!(overrides, code) + return + end +end - tm = LLVM.TargetMachine(t, triple, target.cpu, target.features, reloc=LLVM.API.LLVMRelocPIC) - GPUCompiler.asm_verbosity!(tm, true) - return tm +Base.@kwdef struct NativeCompilerTarget <: GPUCompiler.AbstractCompilerTarget + cpu::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUName()) + features::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUFeatures()) end -GPUCompiler.runtime_slug(job::GPUCompiler.CompilerJob{NativeCompilerTarget}) = "native_$(job.target.cpu)-$(hash(job.target.features))" +Base.@kwdef struct ExternalNativeCompilerTarget <: GPUCompiler.AbstractCompilerTarget + cpu::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUName()) + features::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUFeatures()) +end module StaticRuntime # the runtime library @@ -30,17 +47,50 @@ end struct StaticCompilerParams <: GPUCompiler.AbstractCompilerParams end -GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{<:Any,StaticCompilerParams}) = StaticRuntime -GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{NativeCompilerTarget}) = StaticRuntime -GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{NativeCompilerTarget, StaticCompilerParams}) = StaticRuntime +for target in (:NativeCompilerTarget, :ExternalNativeCompilerTarget) + @eval begin + GPUCompiler.llvm_triple(::$target) = Sys.MACHINE + + function GPUCompiler.llvm_machine(target::$target) + triple = GPUCompiler.llvm_triple(target) + + t = LLVM.Target(triple=triple) + + tm = LLVM.TargetMachine(t, triple, target.cpu, target.features, reloc=LLVM.API.LLVMRelocPIC) + GPUCompiler.asm_verbosity!(tm, true) + + return tm + end + + GPUCompiler.runtime_slug(job::GPUCompiler.CompilerJob{$target}) = "native_$(job.target.cpu)-$(hash(job.target.features))" + + GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{$target}) = StaticRuntime + GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{$target, StaticCompilerParams}) = StaticRuntime + + + GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{$target, StaticCompilerParams}) = true + GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{$target}) = true + end +end + +GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{ExternalNativeCompilerTarget})) = method_table +GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{ExternalNativeCompilerTarget, StaticCompilerParams})) = method_table + +function native_job(@nospecialize(func::Function), @nospecialize(types::Type), external::Bool; + name = GPUCompiler.safe_name(repr(func)), + kernel::Bool = false, + kwargs... + ) + source = GPUCompiler.FunctionSpec(func, types, kernel, name) + target = external ? ExternalNativeCompilerTarget() : NativeCompilerTarget() + params = StaticCompilerParams() + GPUCompiler.CompilerJob(target, source, params), kwargs +end -GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{<:Any,StaticCompilerParams}) = true -GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{NativeCompilerTarget, StaticCompilerParams}) = true -GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{NativeCompilerTarget}) = true -function native_job(@nospecialize(func), @nospecialize(types); kernel::Bool=false, name=GPUCompiler.safe_name(repr(func)), kwargs...) +function native_job(@nospecialize(func), @nospecialize(types), external; kernel::Bool=false, name=GPUCompiler.safe_name(repr(func)), kwargs...) source = GPUCompiler.FunctionSpec(func, Base.to_tuple_type(types), kernel, name) - target = NativeCompilerTarget() + target = external ? ExternalNativeCompilerTarget() : NativeCompilerTarget() params = StaticCompilerParams() GPUCompiler.CompilerJob(target, source, params), kwargs end From 080181f40fa3c99d3efab3309d3e24f5988cb21b Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Tue, 14 Mar 2023 21:21:39 -0400 Subject: [PATCH 107/159] Error handling tests (#98) * Add simple core and integration test for new error handling code * Bump version to 0.4.7 --- Project.toml | 2 +- test/scripts/throw_errors.jl | 15 +++++++++++++++ test/testcore.jl | 13 +++++++++++++ test/testintegration.jl | 35 +++++++++++++++++++++++++++++++++++ 4 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 test/scripts/throw_errors.jl diff --git a/Project.toml b/Project.toml index 4b60b47..58fa736 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short and contributors"] -version = "0.4.6" +version = "0.4.7" [deps] Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" diff --git a/test/scripts/throw_errors.jl b/test/scripts/throw_errors.jl new file mode 100644 index 0000000..fb823a0 --- /dev/null +++ b/test/scripts/throw_errors.jl @@ -0,0 +1,15 @@ +using StaticCompiler +using StaticTools + +function maybe_throw(argc::Int, argv::Ptr{Ptr{UInt8}}) + printf(c"Argument count is %d:\n", argc) + argc > 1 || return printf(stderrp(), c"Too few command-line arguments\n") + n = argparse(Int64, argv, 2) # First command-line argument + printf((c"Input:\n", n, c"\n")) + printf(c"\nAttempting to represent input as UInt64:\n") + x = UInt64(n) + printf(x) +end + +# Attempt to compile +path = compile_executable(maybe_throw, (Int64, Ptr{Ptr{UInt8}}), "./") diff --git a/test/testcore.jl b/test/testcore.jl index fe2d85e..e74cfbe 100644 --- a/test/testcore.jl +++ b/test/testcore.jl @@ -291,6 +291,19 @@ end r = run(`$filepath Hello, world!`); @test isa(r, Base.Process) @test r.exitcode == 0 + + # Compile a function that definitely fails + @inline foo_err() = UInt64(-1) + filepath = compile_executable(print_args, (Int, Ptr{Ptr{UInt8}}), tempdir()) + @test isfile(filepath) + status = -1 + try + status = run(`filepath`) + catch + @info "foo_err: Task failed successfully!" + end + @test status === -1 + end @noinline square(n) = n*n diff --git a/test/testintegration.jl b/test/testintegration.jl index 6eb2c08..15012df 100644 --- a/test/testintegration.jl +++ b/test/testintegration.jl @@ -242,6 +242,41 @@ @test isa(status, Base.Process) && status.exitcode == 0 end + ## --- Test error throwing + + let + # Compile... + status = -1 + try + isfile("maybe_throw") && rm("maybe_throw") + status = run(`$jlpath --startup=no --compile=min $testpath/scripts/throw_errors.jl`) + catch e + @warn "Could not compile $testpath/scripts/throw_errors.jl" + println(e) + end + @test isa(status, Base.Process) + @test isa(status, Base.Process) && status.exitcode == 0 + + # Run... + println("Error handling:") + status = -1 + try + status = run(`./maybe_throw 10`) + catch e + @warn "Could not run $(scratch)/maybe_throw" + println(e) + end + @test isa(status, Base.Process) + @test isa(status, Base.Process) && status.exitcode == 0 + status = -1 + try + status = run(`./maybe_throw -10`) + catch e + @info "maybe_throw: task failed sucessfully!" + end + @test status === -1 + end + ## --- Test interop @static if Sys.isbsd() From 156210c8a35f5b4b159791c2e9654d3758b45b67 Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Mon, 20 Mar 2023 19:39:53 -0400 Subject: [PATCH 108/159] Add warnings if return type is not a native type in standalone binaries/shlibs (#102) * Add warnings for if return type is not a native type in `compile_shlib` and `compile_executable` * Make warning spookier * Bump version to 0.4.8 --- Project.toml | 2 +- src/StaticCompiler.jl | 27 +++++++++++++++++---------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/Project.toml b/Project.toml index 58fa736..d86a206 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short and contributors"] -version = "0.4.7" +version = "0.4.8" [deps] Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 0255d2c..b4ef719 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -94,7 +94,7 @@ function compile(f, _tt, path::String = tempname(); name = GPUCompiler.safe_nam tt = Base.to_tuple_type(_tt) isconcretetype(tt) || error("input type signature $_tt is not concrete") - rt = only(native_code_typed(f, tt))[2] + rt = last(only(native_code_typed(f, tt))) isconcretetype(rt) || error("$f on $_tt did not infer to a concrete type. Got $rt") f_wrap!(out::Ref, args::Ref{<:Tuple}) = (out[] = f(args[]...); nothing) @@ -236,10 +236,13 @@ function compile_executable(f, types=(), path::String="./", name=GPUCompiler.saf ) tt = Base.to_tuple_type(types) - # tt == Tuple{} || tt == Tuple{Int, Ptr{Ptr{UInt8}}} || error("input type signature $types must be either () or (Int, Ptr{Ptr{UInt8}})") + isexecutableargtype = tt == Tuple{} || tt == Tuple{Int, Ptr{Ptr{UInt8}}} + isexecutableargtype || @warn "input type signature $types should be either `()` or `(Int, Ptr{Ptr{UInt8}})` for standard executables" - rt = only(native_code_typed(f, tt))[2] - isconcretetype(rt) || error("$f$types did not infer to a concrete type. Got $rt") + rt = last(only(native_code_typed(f, tt))) + isconcretetype(rt) || error("`$f$types` did not infer to a concrete type. Got `$rt`") + nativetype = isprimitivetype(rt) || isa(rt, Ptr) + nativetype || @warn "Return type `$rt` of `$f$types` does not appear to be a native type. Consider returning only a single value of a native machine type (i.e., a single float, int/uint, bool, or pointer). \n\nIgnoring this warning may result in Undefined Behavior!" # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this @@ -263,10 +266,12 @@ function compile_shlib(f, types=(), path::String="./", name=GPUCompiler.safe_nam ) tt = Base.to_tuple_type(types) - isconcretetype(tt) || error("input type signature $types is not concrete") + isconcretetype(tt) || error("input type signature `$types` is not concrete") - rt = only(native_code_typed(f, tt))[2] - isconcretetype(rt) || error("$f$types did not infer to a concrete type. Got $rt") + rt = last(only(native_code_typed(f, tt))) + isconcretetype(rt) || error("`$f$types` did not infer to a concrete type. Got `$rt`") + nativetype = isprimitivetype(rt) || isa(rt, Ptr) + nativetype || @warn "Return type `$rt` of `$f$types` does not appear to be a native type. Consider returning only a single value of a native machine type (i.e., a single float, int/uint, bool, or pointer). \n\nIgnoring this warning may result in Undefined Behavior!" # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this @@ -544,10 +549,12 @@ function compile_shlib(funcs::Array, path::String="./"; for func in funcs f, types = func tt = Base.to_tuple_type(types) - isconcretetype(tt) || error("input type signature $types is not concrete") + isconcretetype(tt) || error("input type signature `$types` is not concrete") - rt = only(native_code_typed(f, tt))[2] - isconcretetype(rt) || error("$f$types did not infer to a concrete type. Got $rt") + rt = last(only(native_code_typed(f, tt))) + isconcretetype(rt) || error("`$f$types` did not infer to a concrete type. Got `$rt`") + nativetype = isprimitivetype(rt) || isa(rt, Ptr) + nativetype || @warn "Return type `$rt` of `$f$types` does not appear to be a native type. Consider returning only a single value of a native machine type (i.e., a single float, int/uint, bool, or pointer). \n\nIgnoring this warning may result in Undefined Behavior!" end # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals From 7788852a5efd5e33f4239a892eb5714dced0e384 Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Mon, 20 Mar 2023 22:58:12 -0400 Subject: [PATCH 109/159] Cleanup (#104) * Reorganize main compilation functions, update docstrings and README * Document and export `@device_overload` * Typo --- README.md | 16 ++-- src/StaticCompiler.jl | 204 ++++++++++++++++++++++++------------------ src/target.jl | 14 ++- test/testcore.jl | 11 ++- 4 files changed, 145 insertions(+), 100 deletions(-) diff --git a/README.md b/README.md index 6c08808..6269635 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,10 @@ using Pkg Pkg.add("StaticCompiler") ``` -There are two main ways to use this package. The first is via the `compile` function, which can be used when you want to compile a Julia function for later use from within Julia: +There are two main ways to use this package: + +### Linked compilation +The first option is via the `compile` function, which can be used when you want to compile a Julia function for later use from within Julia: ```julia julia> using StaticCompiler @@ -43,7 +46,8 @@ julia> fib_compiled(10) ``` See the file `tests/runtests.jl` for some examples of functions that work with `compile` (and some that don't, marked with `@test_skip`). -The second way to use this package is via the `compile_executable` and `compile_shlib` functions, for use when you want to compile a Julia function to a native executable or shared library for use from outside of Julia: +### Standalone compilation +The second way to use this package is via the `compile_executable` and `compile_shlib` functions, for when you want to compile a Julia function to a native executable or shared library for use from outside of Julia: ```julia julia> using StaticCompiler, StaticTools @@ -68,7 +72,7 @@ This package uses the [GPUCompiler package](https://github.com/JuliaGPU/GPUCompi ## Limitations * GC-tracked allocations and global variables do work with `compile`, but the way they are implemented is brittle and can be dangerous. Allocate with care. -* GC-tracked allocations and global variables do *not* work with `compile_executable` (yet). -* Type unstable code is not yet supported. -* Doesn't currently work on Windows. -* If you find any other limitations, let us know. There's probably lots. +* GC-tracked allocations and global variables do *not* work with `compile_executable` or `compile_shlib`. This has some interesting consequences, including that all functions _within_ the function you want to compile must either be inlined or return only native types (otherwise Julia would have to allocate a place to put the results, which will fail). +* Since error handling relies on libjulia, you can only throw errors from standalone-compiled (`compile_executable` / `compile_shlib`) code if an explicit overload has been defined for that particular error with `@device_override` (see [quirks.jl](src/quirks.jl)). +* Type instability. Type unstable code cannot currently be statically compiled via this package. +* Doesn't work on Windows. PRs welcome. diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index b4ef719..ca8f696 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -13,6 +13,7 @@ using StaticTools: @symbolcall, @c_str, println export compile, load_function, compile_shlib, compile_executable export native_code_llvm, native_code_typed, native_llvm_module, native_code_native +export @device_override, @print_and_throw include("target.jl") include("pointer_patching.jl") @@ -173,7 +174,11 @@ end """ ```julia -compile_executable(f, types::Tuple, path::String, name::String=repr(f); filename::String=name, kwargs...) +compile_executable(f::Function, types::Tuple, path::String, [name::String=repr(f)]; + filename::String=name, + cflags=``, # Specify libraries you would like to link against, and other compiler options here + kwargs... +) ``` Attempt to compile a standalone executable that runs function `f` with a type signature given by the tuple of `types`. @@ -207,7 +212,7 @@ julia> function print_args(argc::Int, argv::Ptr{Ptr{UInt8}}) end julia> compile_executable(print_args, (Int, Ptr{Ptr{UInt8}})) -"/Users/foo/code/StaticCompiler.jl/print_args" +""/Users/user/print_args"" shell> ./print_args 1 2 3 4 Five ./print_args @@ -223,13 +228,16 @@ julia> using StaticTools # So you don't have to define `puts` and friends every julia> hello() = println(c"Hello, world!") # c"..." makes a stack-allocated StaticString julia> compile_executable(hello) -"/Users/foo/code/StaticCompiler.jl/hello" +"/Users/cbkeller/hello" + +shell> ls -alh hello +-rwxr-xr-x 1 user staff 33K Mar 20 21:11 hello shell> ./hello Hello, world! ``` """ -function compile_executable(f, types=(), path::String="./", name=GPUCompiler.safe_name(repr(f)); +function compile_executable(f::Function, types=(), path::String="./", name=GPUCompiler.safe_name(repr(f)); filename=name, cflags=``, kwargs... @@ -244,9 +252,6 @@ function compile_executable(f, types=(), path::String="./", name=GPUCompiler.saf nativetype = isprimitivetype(rt) || isa(rt, Ptr) nativetype || @warn "Return type `$rt` of `$f$types` does not appear to be a native type. Consider returning only a single value of a native machine type (i.e., a single float, int/uint, bool, or pointer). \n\nIgnoring this warning may result in Undefined Behavior!" - # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals - # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this - generate_executable(f, tt, path, name, filename; cflags=cflags, kwargs...) joinpath(abspath(path), filename) @@ -255,11 +260,35 @@ end """ ```julia -compile_shlib(f, types::Tuple, path::String, name::String=repr(f); filename::String=name, kwargs...) +compile_shlib(f::Function, types::Tuple, [path::String="./"], [name::String=repr(f)]; filename::String=name, cflags=``, kwargs...) +compile_shlib(funcs::Array, [path::String="./"]; filename="libfoo", demangle=false, cflags=``, kwargs...) ``` As `compile_executable`, but compiling to a standalone `.dylib`/`.so` shared library. + +### Examples +```julia +julia> using StaticCompiler, LoopVectorization + +julia> function test(n) + r = 0.0 + @turbo for i=1:n + r += log(sqrt(i)) + end + return r/n + end +test (generic function with 1 method) + +julia> compile_shlib(test, (Int,)) +"/Users/user/test.dylib" + +julia> test(100_000) +5.2564961094956075 + +julia> ccall(("julia_test", "test.dylib"), Float64, (Int64,), 100_000) +5.2564961094956075 +``` """ -function compile_shlib(f, types=(), path::String="./", name=GPUCompiler.safe_name(repr(f)); +function compile_shlib(f::Function, types=(), path::String="./", name=GPUCompiler.safe_name(repr(f)); filename=name, cflags=``, kwargs... @@ -273,28 +302,34 @@ function compile_shlib(f, types=(), path::String="./", name=GPUCompiler.safe_nam nativetype = isprimitivetype(rt) || isa(rt, Ptr) nativetype || @warn "Return type `$rt` of `$f$types` does not appear to be a native type. Consider returning only a single value of a native machine type (i.e., a single float, int/uint, bool, or pointer). \n\nIgnoring this warning may result in Undefined Behavior!" - # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals - # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this generate_shlib(f, tt, true, path, name, filename; cflags=cflags, kwargs...) joinpath(abspath(path), filename * "." * Libdl.dlext) end +# As above, but taking an array of functions and returning a single shlib +function compile_shlib(funcs::Array, path::String="./"; + filename="libfoo", + demangle=false, + cflags=``, + kwargs... + ) + for func in funcs + f, types = func + tt = Base.to_tuple_type(types) + isconcretetype(tt) || error("input type signature `$types` is not concrete") -function generate_shlib_fptr(f, tt, path::String=tempname(), name = GPUCompiler.safe_name(repr(f)), filename::String=name; - temp::Bool=true, - kwargs...) - - generate_shlib(f, tt, false, path, name; kwargs...) - lib_path = joinpath(abspath(path), "$filename.$(Libdl.dlext)") - ptr = Libdl.dlopen(lib_path, Libdl.RTLD_LOCAL) - fptr = Libdl.dlsym(ptr, "julia_$name") - @assert fptr != C_NULL - if temp - atexit(()->rm(path; recursive=true)) + rt = last(only(native_code_typed(f, tt))) + isconcretetype(rt) || error("`$f$types` did not infer to a concrete type. Got `$rt`") + nativetype = isprimitivetype(rt) || isa(rt, Ptr) + nativetype || @warn "Return type `$rt` of `$f$types` does not appear to be a native type. Consider returning only a single value of a native machine type (i.e., a single float, int/uint, bool, or pointer). \n\nIgnoring this warning may result in Undefined Behavior!" end - fptr + + generate_shlib(funcs, true, path, filename; demangle=demangle, cflags=cflags, kwargs...) + + joinpath(abspath(path), filename * "." * Libdl.dlext) end + """ ```julia generate_shlib_fptr(path::String, name) @@ -338,25 +373,40 @@ function generate_shlib_fptr(path::String, name, filename::String=name) @assert fptr != C_NULL fptr end +# As above, but also compile (maybe remove this method in the future?) +function generate_shlib_fptr(f, tt, path::String=tempname(), name = GPUCompiler.safe_name(repr(f)), filename::String=name; + temp::Bool=true, + kwargs...) + + generate_shlib(f, tt, false, path, name; kwargs...) + lib_path = joinpath(abspath(path), "$filename.$(Libdl.dlext)") + ptr = Libdl.dlopen(lib_path, Libdl.RTLD_LOCAL) + fptr = Libdl.dlsym(ptr, "julia_$name") + @assert fptr != C_NULL + if temp + atexit(()->rm(path; recursive=true)) + end + fptr +end """ ```julia generate_executable(f, tt, path::String, name, filename=string(name); kwargs...) ``` Attempt to compile a standalone executable that runs `f`. +Low-level interface; you should generally use `compile_executable` instead. ### Examples ```julia -julia> function test(n) - r = 0.0 - for i=1:n - r += log(sqrt(i)) - end - return r/n - end -test (generic function with 1 method) +julia> using StaticCompiler, StaticTools -julia> path, name = StaticCompiler.generate_executable(test, Tuple{Int64}, "./scratch") +julia> hello() = println(c"Hello, world!") + +julia> path, name = StaticCompiler.generate_executable(hello, Tuple{}, "./") +("./", "hello") + +shell> ./hello +Hello, world! ``` """ function generate_executable(f, tt, path=tempname(), name=GPUCompiler.safe_name(repr(f)), filename=string(name); @@ -404,36 +454,43 @@ end """ ```julia -generate_shlib(f, tt, path::String, name::String, filenamebase::String="obj"; kwargs...) +generate_shlib(f::Function, tt, [external::Bool=true], [path::String], [name], [filename]; kwargs...) +generate_shlib(funcs::Array, [external::Bool=true], [path::String], [filename::String]; demangle=false, kwargs...) ``` Low level interface for compiling a shared object / dynamically loaded library (`.so` / `.dylib`) for function `f` given a tuple type `tt` characterizing the types of the arguments for which the function will be compiled. -See also `StaticCompiler.generate_shlib_fptr`. + ### Examples ```julia +julia> using StaticCompiler, LoopVectorization + julia> function test(n) r = 0.0 - for i=1:n + @turbo for i=1:n r += log(sqrt(i)) end return r/n end test (generic function with 1 method) -julia> path, name = StaticCompiler.generate_shlib(test, Tuple{Int64}, "./test") -("./test", "test") + +julia> path, name = StaticCompiler.generate_shlib(test, Tuple{Int64}, true, "./example") +("./example", "test") + shell> tree \$path -./test -|-- obj.o -`-- obj.so +./example +|-- test.dylib +`-- test.o 0 directories, 2 files + julia> test(100_000) -5.256496109495593 -julia> ccall(StaticCompiler.generate_shlib_fptr(path, name), Float64, (Int64,), 100_000) -5.256496109495593 +5.2564961094956075 + +julia> ccall(("julia_test", "example/test.dylib"), Float64, (Int64,), 100_000) +5.2564961094956075 ``` """ -function generate_shlib(f, tt, external = true, path=tempname(), name=GPUCompiler.safe_name(repr(f)), filename=name; +function generate_shlib(f::Function, tt, external::Bool=true, path::String=tempname(), name=GPUCompiler.safe_name(repr(f)), filename=name; cflags=``, kwargs... ) @@ -455,6 +512,23 @@ function generate_shlib(f, tt, external = true, path=tempname(), name=GPUCompile path, name end +# As above, but taking an array of functions and returning a single shlib +function generate_shlib(funcs::Array, external::Bool=true, path::String=tempname(), filename::String="libfoo"; + demangle=false, + cflags=``, + kwargs... + ) + + lib_path = joinpath(path, "$filename.$(Libdl.dlext)") + + _,obj_path = generate_obj(funcs, external, path, filename; demangle=demangle, kwargs...) + # Pick a Clang + cc = Sys.isapple() ? `cc` : clang() + # Compile! + run(`$cc -shared $cflags $obj_path -o $lib_path `) + + path, name +end function native_code_llvm(@nospecialize(func), @nospecialize(types); kwargs...) job, kwargs = native_job(func, types, true; kwargs...) @@ -524,46 +598,4 @@ function generate_obj(funcs::Array, external, path::String = tempname(), filenam path, obj_path end -function generate_shlib(funcs::Array, external = true, path::String = tempname(), filename::String="libfoo"; - demangle=false, - cflags=``, - kwargs... - ) - - lib_path = joinpath(path, "$filename.$(Libdl.dlext)") - - _,obj_path = generate_obj(funcs, external, path, filename; demangle=demangle, kwargs...) - # Pick a Clang - cc = Sys.isapple() ? `cc` : clang() - # Compile! - run(`$cc -shared $cflags $obj_path -o $lib_path `) - - path, name -end - -function compile_shlib(funcs::Array, path::String="./"; - filename="libfoo", - demangle=false, - cflags=``, - kwargs...) - for func in funcs - f, types = func - tt = Base.to_tuple_type(types) - isconcretetype(tt) || error("input type signature `$types` is not concrete") - - rt = last(only(native_code_typed(f, tt))) - isconcretetype(rt) || error("`$f$types` did not infer to a concrete type. Got `$rt`") - nativetype = isprimitivetype(rt) || isa(rt, Ptr) - nativetype || @warn "Return type `$rt` of `$f$types` does not appear to be a native type. Consider returning only a single value of a native machine type (i.e., a single float, int/uint, bool, or pointer). \n\nIgnoring this warning may result in Undefined Behavior!" - end - -# Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals -# Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this - - generate_shlib(funcs, true, path, filename; demangle=demangle, cflags=cflags, kwargs...) - - joinpath(abspath(path), filename * "." * Libdl.dlext) -end - - end # module diff --git a/src/target.jl b/src/target.jl index a14af90..734a0cc 100644 --- a/src/target.jl +++ b/src/target.jl @@ -6,7 +6,18 @@ end const overrides = quote end - +""" +```julia +@device_override old_bad_method(arg1::Type1, arg2::Type2) = new_good_method(arg1, arg2) +``` +Override a non-static-compilable method (e.g. `old_bad_method(::Type1, ::Type2)`) +with a more compileable replacement. +### Examples +``` +@device_override @noinline Core.throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = + @print_and_throw c"Inexact conversion" +``` +""" macro device_override(ex) ex = macroexpand(__module__, ex) if Meta.isexpr(ex, :call) @@ -24,7 +35,6 @@ macro device_override(ex) end end - Base.@kwdef struct NativeCompilerTarget <: GPUCompiler.AbstractCompilerTarget cpu::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUName()) features::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUFeatures()) diff --git a/test/testcore.jl b/test/testcore.jl index e74cfbe..450fbda 100644 --- a/test/testcore.jl +++ b/test/testcore.jl @@ -173,15 +173,14 @@ end end hello_compiled, path = compile(hello, (String,)) @test remote_load_call(path, "world") == "Hello, world!" -end -@testset "Hello World" begin - function hello(N) - println("Hello World $N") - N - end # We'll need to be able to relocate a bunch of UV stuff for this, and deal with dynamic dispatch. @test_skip begin + function hello(N) + println("Hello World $N") + N + end + hello_compiled, path = compile(hello, (Int,)) @test_skip remote_load_call(path, 1) == 1 end From fed55dd28d46b8807ce1eb84c0becb4f4ed78feb Mon Sep 17 00:00:00 2001 From: Fredrik Bagge Carlson Date: Tue, 21 Mar 2023 07:14:13 +0100 Subject: [PATCH 110/159] Document symbol naming convention (#105) * Document symbol naming convention * A bit more context on name mangling --------- Co-authored-by: C. Brenhin Keller --- src/StaticCompiler.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index ca8f696..07368d1 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -265,6 +265,11 @@ compile_shlib(funcs::Array, [path::String="./"]; filename="libfoo", demangle=fal ``` As `compile_executable`, but compiling to a standalone `.dylib`/`.so` shared library. +The compiled function is by default given the symbol name `julia_$(name)`, i.e., +the function `test` in the example below is called `julia_test` in the shared library. +The keword argument `demangle=true` will remove this prefix, but is currently only +supported the second (multi-function-shlib) method. + ### Examples ```julia julia> using StaticCompiler, LoopVectorization From 141e1abacce529f2de1213fe390369b8db65a87b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 1 Apr 2023 23:02:36 -0400 Subject: [PATCH 111/159] CompatHelper: bump compat for LLVM to 5, (keep existing compat) (#107) * CompatHelper: bump compat for LLVM to 5, (keep existing compat) * Run CI --------- Co-authored-by: CompatHelper Julia Co-authored-by: C. Brenhin Keller --- Project.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index d86a206..5cd8574 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short and contributors"] -version = "0.4.8" +version = "0.4.9" [deps] Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" @@ -13,6 +13,6 @@ StaticTools = "86c06d3c-3f03-46de-9781-57580aa96d0a" [compat] GPUCompiler = "0.16, 0.17" -LLVM = "4.8" -StaticTools ="0.8" +LLVM = "4.8, 5" +StaticTools = "0.8" julia = "1.7" From b1240f09458e044f2a1c4f9d5a2d337c5bae6b9c Mon Sep 17 00:00:00 2001 From: Tom Short Date: Wed, 12 Apr 2023 17:21:29 -0400 Subject: [PATCH 112/159] Cassette-type functionality with Mixtape (#69) --- .github/workflows/ci-integration-nightly.yml | 39 ----- .github/workflows/ci-integration.yml | 38 ----- .github/workflows/ci-julia-nightly.yml | 39 ----- .github/workflows/ci.yml | 2 +- LICENSE | 4 +- Project.toml | 18 ++- README.md | 8 ++ src/StaticCompiler.jl | 30 ++-- src/code_loading.jl | 1 - src/interpreter.jl | 141 +++++++++++++++++++ src/mixtape.jl | 77 ++++++++++ src/target.jl | 18 ++- test/Project.toml | 9 +- test/runtests.jl | 3 + test/testintegration.jl | 72 ++++++++++ 15 files changed, 358 insertions(+), 141 deletions(-) delete mode 100644 .github/workflows/ci-integration-nightly.yml delete mode 100644 .github/workflows/ci-integration.yml delete mode 100644 .github/workflows/ci-julia-nightly.yml create mode 100644 src/interpreter.jl create mode 100644 src/mixtape.jl diff --git a/.github/workflows/ci-integration-nightly.yml b/.github/workflows/ci-integration-nightly.yml deleted file mode 100644 index 6d4cb3e..0000000 --- a/.github/workflows/ci-integration-nightly.yml +++ /dev/null @@ -1,39 +0,0 @@ -name: CI (Integration nightly) -on: - push: - branches: - - '**' - paths-ignore: - - 'README.md' - pull_request: - branches: - - master - paths-ignore: - - 'README.md' -jobs: - test-integration-nightly: - name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - version: - - 'nightly' - - '~1.9.0-0' - os: - - ubuntu-latest - - macOS-latest - arch: - - x64 - group: - - Integration - steps: - - uses: actions/checkout@v2 - - uses: julia-actions/setup-julia@latest - with: - version: ${{ matrix.version }} - arch: ${{ matrix.arch }} - - uses: julia-actions/julia-buildpkg@latest - - uses: julia-actions/julia-runtest@latest - env: - GROUP: ${{ matrix.group }} diff --git a/.github/workflows/ci-integration.yml b/.github/workflows/ci-integration.yml deleted file mode 100644 index 3b9024d..0000000 --- a/.github/workflows/ci-integration.yml +++ /dev/null @@ -1,38 +0,0 @@ -name: CI (Integration) -on: - push: - branches: - - '**' - paths-ignore: - - 'README.md' - pull_request: - branches: - - master - paths-ignore: - - 'README.md' -jobs: - test-integration: - name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - version: - - '1.8' - os: - - ubuntu-latest - - macOS-latest - arch: - - x64 - group: - - Integration - steps: - - uses: actions/checkout@v2 - - uses: julia-actions/setup-julia@latest - with: - version: ${{ matrix.version }} - arch: ${{ matrix.arch }} - - uses: julia-actions/julia-buildpkg@latest - - uses: julia-actions/julia-runtest@latest - env: - GROUP: ${{ matrix.group }} diff --git a/.github/workflows/ci-julia-nightly.yml b/.github/workflows/ci-julia-nightly.yml deleted file mode 100644 index dfb2ad8..0000000 --- a/.github/workflows/ci-julia-nightly.yml +++ /dev/null @@ -1,39 +0,0 @@ -name: CI (Julia nightly) -on: - push: - branches: - - '**' - paths-ignore: - - 'README.md' - pull_request: - branches: - - master - paths-ignore: - - 'README.md' -jobs: - test-julia-nightly: - name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - version: - - 'nightly' - - '~1.9.0-0' - os: - - ubuntu-latest - - macOS-latest - arch: - - x64 - group: - - Core - steps: - - uses: actions/checkout@v2 - - uses: julia-actions/setup-julia@latest - with: - version: ${{ matrix.version }} - arch: ${{ matrix.arch }} - - uses: julia-actions/julia-buildpkg@latest - - uses: julia-actions/julia-runtest@latest - env: - GROUP: ${{ matrix.group }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 341215a..d312a54 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,8 +18,8 @@ jobs: fail-fast: false matrix: version: - - '1.7' - '1.8' + - '^1.9.0-rc2' os: - ubuntu-latest - macOS-latest diff --git a/LICENSE b/LICENSE index cb19b0a..b4e5ab8 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ Copyright (c) 2019-2022 Mason Protter, William Moses, Valentin Churavy, - Brenhin Keller, Julian Samaroo, Tom Short, and - other contributors + McCoy R. Becker, Brenhin Keller, Julian Samaroo, + Tom Short, and other contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Project.toml b/Project.toml index 5cd8574..212d07a 100644 --- a/Project.toml +++ b/Project.toml @@ -1,18 +1,30 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short and contributors"] -version = "0.4.9" +version = "0.4.10" [deps] Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" +CodeInfoTools = "bc773b8a-8374-437a-b9f2-0e9785855863" GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55" +InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" +MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" StaticTools = "86c06d3c-3f03-46de-9781-57580aa96d0a" [compat] -GPUCompiler = "0.16, 0.17" +CodeInfoTools = "0.3" +GPUCompiler = "0.17" LLVM = "4.8, 5" StaticTools = "0.8" -julia = "1.7" +MacroTools = "0.5" +julia = "1.8, 1.9" + +[extras] +Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["Test", "Formatting"] diff --git a/README.md b/README.md index 6269635..6ddb978 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,14 @@ Hello, world! ``` This latter approach comes with substantially more limitations, as you cannot rely on `libjulia` (see, e.g., [StaticTools.jl](https://github.com/brenhinkeller/StaticTools.jl) for some ways to work around these limitations). +### Mixtape + +This feature allows one to change functionality when statically compiling. This uses code and API from [Mixtape](https://github.com/JuliaCompilerPlugins/Mixtape.jl) to transform lowered code much like [Cassette](https://github.com/JuliaLabs/Cassette.jl). + +To use the Mixtape feature, define a `CompilationContext` struct and pass this to any of the compilation functions with the `mixtape` keyword. Define `transform` and `allow` functions for this `CompilationContext` to define the transformation to be done. + +See [here](https://github.com/tshort/StaticCompiler.jl/blob/master/test/testintegration.jl#L329) for an example. + ## Approach This package uses the [GPUCompiler package](https://github.com/JuliaGPU/GPUCompiler.jl) to generate code. diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 07368d1..6e1b2b7 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -1,5 +1,5 @@ module StaticCompiler - +using InteractiveUtils using GPUCompiler: GPUCompiler using LLVM using LLVM.Interop @@ -15,6 +15,8 @@ export compile, load_function, compile_shlib, compile_executable export native_code_llvm, native_code_typed, native_llvm_module, native_code_native export @device_override, @print_and_throw +include("mixtape.jl") +include("interpreter.jl") include("target.jl") include("pointer_patching.jl") include("code_loading.jl") @@ -87,7 +89,10 @@ with `load_function(path)`. `LazyStaticcompiledfunction`s contain the requisite `StaticCompiledFunction` and may be called with arguments of type `types` as if it were a function with a single method (the method determined by `types`). """ -function compile(f, _tt, path::String = tempname(); name = GPUCompiler.safe_name(repr(f)), filename="obj", +function compile(f, _tt, path::String = tempname(); + mixtape = NoContext(), + name = GPUCompiler.safe_name(repr(f)), + filename = "obj", strip_llvm = false, strip_asm = true, opt_level=3, @@ -95,11 +100,10 @@ function compile(f, _tt, path::String = tempname(); name = GPUCompiler.safe_nam tt = Base.to_tuple_type(_tt) isconcretetype(tt) || error("input type signature $_tt is not concrete") - rt = last(only(native_code_typed(f, tt))) + rt = last(only(native_code_typed(f, tt, mixtape = mixtape))) isconcretetype(rt) || error("$f on $_tt did not infer to a concrete type. Got $rt") - f_wrap!(out::Ref, args::Ref{<:Tuple}) = (out[] = f(args[]...); nothing) - _, _, table = generate_obj(f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, false, path, name; opt_level, strip_llvm, strip_asm, filename, kwargs...) + _, _, table = generate_obj(f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, false, path, name; mixtape = mixtape, opt_level, strip_llvm, strip_asm, filename, kwargs...) lf = LazyStaticCompiledFunction{rt, tt}(Symbol(f), path, name, filename, table) cjl_path = joinpath(path, "$filename.cjl") @@ -136,6 +140,7 @@ shell> tree \$path ``` """ function generate_obj(f, tt, external = true, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; + mixtape = NoContext(), strip_llvm = false, strip_asm = true, opt_level=3, @@ -143,11 +148,18 @@ function generate_obj(f, tt, external = true, path::String = tempname(), name = mkpath(path) obj_path = joinpath(path, "$filenamebase.o") tm = GPUCompiler.llvm_machine(external ? ExternalNativeCompilerTarget() : NativeCompilerTarget()) - job, kwargs = native_job(f, tt, external; name, kwargs...) #Get LLVM to generated a module of code for us. We don't want GPUCompiler's optimization passes. + job = CompilerJob(NativeCompilerTarget(), + FunctionSpec(f, tt, false, name), + StaticCompilerParams(; + opt = true, + mixtape = mixtape, + optlevel = Base.JLOptions().opt_level)) + mod, meta = GPUCompiler.JuliaContext() do context GPUCompiler.codegen(:llvm, job; strip=strip_llvm, only_entry=false, validate=false, optimize=false, ctx=context) - end + end + # Use Enzyme's annotation and optimization pipeline annotate!(mod) optimize!(mod, tm) @@ -247,7 +259,7 @@ function compile_executable(f::Function, types=(), path::String="./", name=GPUCo isexecutableargtype = tt == Tuple{} || tt == Tuple{Int, Ptr{Ptr{UInt8}}} isexecutableargtype || @warn "input type signature $types should be either `()` or `(Int, Ptr{Ptr{UInt8}})` for standard executables" - rt = last(only(native_code_typed(f, tt))) + rt = last(only(native_code_typed(f, tt; kwargs...))) isconcretetype(rt) || error("`$f$types` did not infer to a concrete type. Got `$rt`") nativetype = isprimitivetype(rt) || isa(rt, Ptr) nativetype || @warn "Return type `$rt` of `$f$types` does not appear to be a native type. Consider returning only a single value of a native machine type (i.e., a single float, int/uint, bool, or pointer). \n\nIgnoring this warning may result in Undefined Behavior!" @@ -302,7 +314,7 @@ function compile_shlib(f::Function, types=(), path::String="./", name=GPUCompile tt = Base.to_tuple_type(types) isconcretetype(tt) || error("input type signature `$types` is not concrete") - rt = last(only(native_code_typed(f, tt))) + rt = last(only(native_code_typed(f, tt; kwargs...))) isconcretetype(rt) || error("`$f$types` did not infer to a concrete type. Got `$rt`") nativetype = isprimitivetype(rt) || isa(rt, Ptr) nativetype || @warn "Return type `$rt` of `$f$types` does not appear to be a native type. Consider returning only a single value of a native machine type (i.e., a single float, int/uint, bool, or pointer). \n\nIgnoring this warning may result in Undefined Behavior!" diff --git a/src/code_loading.jl b/src/code_loading.jl index 931e6a5..665df22 100644 --- a/src/code_loading.jl +++ b/src/code_loading.jl @@ -49,7 +49,6 @@ function instantiate(p::LazyStaticCompiledFunction{rt, tt}) where {rt, tt} prefix = LLVM.get_prefix(lljit) dg = LLVM.CreateDynamicLibrarySearchGeneratorForProcess(prefix) LLVM.add!(jd, dg) - LLVM.add!(lljit, jd, ofile) fptr = pointer(LLVM.lookup(lljit, "julia_" * p.name)) diff --git a/src/interpreter.jl b/src/interpreter.jl new file mode 100644 index 0000000..09ffd4a --- /dev/null +++ b/src/interpreter.jl @@ -0,0 +1,141 @@ +## interpreter + +using Core.Compiler: + AbstractInterpreter, InferenceResult, InferenceParams, InferenceState, MethodInstance, OptimizationParams, WorldView +using GPUCompiler: + @safe_debug, AbstractCompilerParams, CodeCache, CompilerJob, FunctionSpec +using CodeInfoTools +using CodeInfoTools: resolve + +struct StaticInterpreter{M} <: AbstractInterpreter + global_cache::CodeCache + method_table::Union{Nothing,Core.MethodTable} + + # Cache of inference results for this particular interpreter + local_cache::Vector{InferenceResult} + # The world age we're working inside of + world::UInt + + # Parameters for inference and optimization + inf_params::InferenceParams + opt_params::OptimizationParams + + # Mixtape context + mixtape::M + + function StaticInterpreter(cache::CodeCache, mt::Union{Nothing,Core.MethodTable}, world::UInt, ip::InferenceParams, op::OptimizationParams, mixtape::CompilationContext) + @assert world <= Base.get_world_counter() + + return new{typeof(mixtape)}( + cache, + mt, + + # Initially empty cache + Vector{InferenceResult}(), + + # world age counter + world, + + # parameters for inference and optimization + ip, + op, + + # Mixtape context + mixtape + ) + end +end + + +Core.Compiler.InferenceParams(interp::StaticInterpreter) = interp.inf_params +Core.Compiler.OptimizationParams(interp::StaticInterpreter) = interp.opt_params +Core.Compiler.get_world_counter(interp::StaticInterpreter) = interp.world +Core.Compiler.get_inference_cache(interp::StaticInterpreter) = interp.local_cache +Core.Compiler.code_cache(interp::StaticInterpreter) = WorldView(interp.global_cache, interp.world) + +# No need to do any locking since we're not putting our results into the runtime cache +Core.Compiler.lock_mi_inference(interp::StaticInterpreter, mi::MethodInstance) = nothing +Core.Compiler.unlock_mi_inference(interp::StaticInterpreter, mi::MethodInstance) = nothing + +function Core.Compiler.add_remark!(interp::StaticInterpreter, sv::InferenceState, msg) + @safe_debug "Inference remark during static compilation of $(sv.linfo): $msg" +end + + +##### +##### Pre-inference +##### + +function resolve_generic(a) + if a isa Type && a <: Function && isdefined(a, :instance) + return a.instance + else + return resolve(a) + end +end + +function custom_pass!(interp::StaticInterpreter, result::InferenceResult, mi::Core.MethodInstance, src) + src === nothing && return src + mi.specTypes isa UnionAll && return src + sig = Tuple(mi.specTypes.parameters) + as = map(resolve_generic, sig) + if allow(interp.mixtape, mi.def.module, as...) + src = transform(interp.mixtape, src, sig) + end + return src +end + +function InferenceState(result::InferenceResult, cache::Symbol, interp::StaticInterpreter) + src = Core.Compiler.retrieve_code_info(result.linfo) + mi = result.linfo + src = custom_pass!(interp, result, mi, src) + src === nothing && return nothing + Core.Compiler.validate_code_in_debug_mode(result.linfo, src, "lowered") + return InferenceState(result, src, cache, interp) +end + +Core.Compiler.may_optimize(interp::StaticInterpreter) = true +Core.Compiler.may_compress(interp::StaticInterpreter) = true +Core.Compiler.may_discard_trees(interp::StaticInterpreter) = true +if VERSION >= v"1.7.0-DEV.577" +Core.Compiler.verbose_stmt_info(interp::StaticInterpreter) = false +end + +if isdefined(Base.Experimental, Symbol("@overlay")) +using Core.Compiler: OverlayMethodTable +if v"1.8-beta2" <= VERSION < v"1.9-" || VERSION >= v"1.9.0-DEV.120" +Core.Compiler.method_table(interp::StaticInterpreter) = + OverlayMethodTable(interp.world, interp.method_table) +else +Core.Compiler.method_table(interp::StaticInterpreter, sv::InferenceState) = + OverlayMethodTable(interp.world, interp.method_table) +end +else +Core.Compiler.method_table(interp::StaticInterpreter, sv::InferenceState) = + WorldOverlayMethodTable(interp.world) +end + +# semi-concrete interepretation is broken with overlays (JuliaLang/julia#47349) +@static if VERSION >= v"1.9.0-DEV.1248" +function Core.Compiler.concrete_eval_eligible(interp::StaticInterpreter, + @nospecialize(f), result::Core.Compiler.MethodCallResult, arginfo::Core.Compiler.ArgInfo) + ret = @invoke Core.Compiler.concrete_eval_eligible(interp::AbstractInterpreter, + f::Any, result::Core.Compiler.MethodCallResult, arginfo::Core.Compiler.ArgInfo) + ret === false && return nothing + return ret +end +end + +struct StaticCompilerParams <: AbstractCompilerParams + opt::Bool + optlevel::Int + mixtape::CompilationContext + cache::CodeCache +end + +function StaticCompilerParams(; opt = false, + optlevel = Base.JLOptions().opt_level, + mixtape = NoContext(), + cache = CodeCache()) + return StaticCompilerParams(opt, optlevel, mixtape, cache) +end diff --git a/src/mixtape.jl b/src/mixtape.jl new file mode 100644 index 0000000..5e6098a --- /dev/null +++ b/src/mixtape.jl @@ -0,0 +1,77 @@ + +##### +##### Exports +##### + +export CompilationContext, + NoContext, + allow, + transform + +##### +##### Compilation context +##### + +# User-extended context allows parametrization of the pipeline through +# our subtype of AbstractInterpreter +abstract type CompilationContext end + +struct NoContext <: CompilationContext end + +@doc( +""" + abstract type CompilationContext end + +Parametrize the Mixtape pipeline by inheriting from `CompilationContext`. Similar to the context objects in [Cassette.jl](https://julia.mit.edu/Cassette.jl/stable/contextualdispatch.html). By using the interface methods [`transform`](@ref) and [`optimize!`](@ref) -- the user can control different parts of the compilation pipeline. +""", CompilationContext) + +transform(ctx::CompilationContext, b) = b +transform(ctx::CompilationContext, b, sig) = transform(ctx, b) + +@doc( +""" + transform(ctx::CompilationContext, b::Core.CodeInfo)::Core.CodeInfo + transform(ctx::CompilationContext, b::Core.CodeInfo, sig::Tuple)::Core.CodeInfo + +User-defined transform which operates on lowered `Core.CodeInfo`. There's two versions: (1) ignores the signature of the current method body under consideration and (2) provides the signature as `sig`. + +Transforms might typically follow a simple "swap" format using `CodeInfoTools.Builder`: + +```julia +function transform(::MyCtx, src) + b = CodeInfoTools.Builder(b) + for (k, st) in b + b[k] = swap(st)) + end + return CodeInfoTools.finish(b) +end +``` + +but more advanced formats are possible. For further utilities, please see [CodeInfoTools.jl](https://github.com/JuliaCompilerPlugins/CodeInfoTools.jl). +""", transform) + + +allow(f::C, args...) where {C <: CompilationContext} = false +function allow(ctx::CompilationContext, mod::Module, fn, args...) + return allow(ctx, mod) || allow(ctx, fn, args...) +end + +@doc( +""" + allow(f::CompilationContext, args...)::Bool + +Determines whether the user-defined [`transform`](@ref) and [`optimize!`](@ref) are allowed to look at a lowered `Core.CodeInfo` or `Core.Compiler.IRCode` instance. + +The user is allowed to greenlight modules: + +```julia +allow(::MyCtx, m::Module) == m == SomeModule +``` + +or even specific signatures + +```julia +allow(::MyCtx, fn::typeof(rand), args...) = true +``` +""", allow) + diff --git a/src/target.jl b/src/target.jl index 734a0cc..83872a0 100644 --- a/src/target.jl +++ b/src/target.jl @@ -55,8 +55,6 @@ module StaticRuntime report_exception_frame(idx, func, file, line) = return end -struct StaticCompilerParams <: GPUCompiler.AbstractCompilerParams end - for target in (:NativeCompilerTarget, :ExternalNativeCompilerTarget) @eval begin GPUCompiler.llvm_triple(::$target) = Sys.MACHINE @@ -80,6 +78,12 @@ for target in (:NativeCompilerTarget, :ExternalNativeCompilerTarget) GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{$target, StaticCompilerParams}) = true GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{$target}) = true + + GPUCompiler.get_interpreter(job::GPUCompiler.CompilerJob{$target, StaticCompilerParams}) = + StaticInterpreter(job.params.cache, GPUCompiler.method_table(job), job.source.world, + GPUCompiler.inference_params(job), GPUCompiler.optimization_params(job), + job.params.mixtape) + GPUCompiler.ci_cache(job::GPUCompiler.CompilerJob{$target, StaticCompilerParams}) = job.params.cache end end @@ -87,20 +91,20 @@ GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{ExternalNati GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{ExternalNativeCompilerTarget, StaticCompilerParams})) = method_table function native_job(@nospecialize(func::Function), @nospecialize(types::Type), external::Bool; + mixtape = NoContext(), name = GPUCompiler.safe_name(repr(func)), kernel::Bool = false, kwargs... ) source = GPUCompiler.FunctionSpec(func, types, kernel, name) target = external ? ExternalNativeCompilerTarget() : NativeCompilerTarget() - params = StaticCompilerParams() - GPUCompiler.CompilerJob(target, source, params), kwargs + params = StaticCompilerParams(mixtape = mixtape) + StaticCompiler.CompilerJob(target, source, params), kwargs end - -function native_job(@nospecialize(func), @nospecialize(types), external; kernel::Bool=false, name=GPUCompiler.safe_name(repr(func)), kwargs...) +function native_job(@nospecialize(func), @nospecialize(types), external; mixtape = NoContext(), kernel::Bool=false, name=GPUCompiler.safe_name(repr(func)), kwargs...) source = GPUCompiler.FunctionSpec(func, Base.to_tuple_type(types), kernel, name) target = external ? ExternalNativeCompilerTarget() : NativeCompilerTarget() - params = StaticCompilerParams() + params = StaticCompilerParams(mixtape = mixtape) GPUCompiler.CompilerJob(target, source, params), kwargs end diff --git a/test/Project.toml b/test/Project.toml index e5a4823..c9d1de5 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,11 +1,16 @@ [deps] +CodeInfoTools = "bc773b8a-8374-437a-b9f2-0e9785855863" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" -LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" +Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0" +GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55" +Infiltrator = "5903a43b-9cc3-4c30-8d17-598619ec4e9b" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" +MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" ManualMemory = "d125e4d3-2237-4719-b19c-fa641b8a4667" StaticTools = "86c06d3c-3f03-46de-9781-57580aa96d0a" StrideArraysCore = "7792a7ef-975c-4747-a70f-980b88e8d1da" -TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" diff --git a/test/runtests.jl b/test/runtests.jl index 67838d9..d117aa5 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -7,6 +7,9 @@ using ManualMemory using Distributed using StaticTools using StrideArraysCore +using CodeInfoTools +using MacroTools + addprocs(1) @everywhere using StaticCompiler, StrideArraysCore diff --git a/test/testintegration.jl b/test/testintegration.jl index 15012df..6f98b5f 100644 --- a/test/testintegration.jl +++ b/test/testintegration.jl @@ -311,3 +311,75 @@ cd(testpath) end + +# Mixtape + +module SubFoo + +function f() + x = rand() + y = rand() + return x + y +end + +function stringfun(s1, s2) + return s1 * s2 +end + +function teststring() + return stringfun("ab", "c") == "abc" +end + +end + +struct MyMix <: CompilationContext end + +@testset "Mixtape" begin + # 101: How2Mix + + # A few little utility functions for working with Expr instances. + swap(e) = e + function swap(e::Expr) + new = MacroTools.postwalk(e) do s + isexpr(s, :call) || return s + s.args[1] == Base.rand || return s + return 4 + end + return new + end + + # This is pre-inference - you get to see a CodeInfoTools.Builder instance. + function StaticCompiler.transform(::MyMix, src) + b = CodeInfoTools.Builder(src) + for (v, st) in b + b[v] = swap(st) + end + return CodeInfoTools.finish(b) + end + + # MyMix will only transform functions which you explicitly allow. + # You can also greenlight modules. + StaticCompiler.allow(ctx::MyMix, m::Module) = m == SubFoo + + _, path = compile(SubFoo.f, (), mixtape = MyMix()) + @test load_function(path)() == 8 + @test SubFoo.f() != 8 + + # redefine swap to test caching and add StaticString substitution + function swap(e::Expr) + new = MacroTools.postwalk(e) do s + s isa String && return StaticTools.StaticString(tuple(codeunits(s)..., 0x00)) + isexpr(s, :call) || return s + s.args[1] == Base.rand || return s + return 2 + end + return new + end + _, path = compile(SubFoo.f, (), mixtape = MyMix()) + @test load_function(path)() == 4 + + _, path = compile(SubFoo.teststring, (), mixtape = MyMix()) + @test load_function(path)() + +end + From 84e1b908e007d3ff49dd429d6e54b00daf7a0957 Mon Sep 17 00:00:00 2001 From: Tom Short Date: Mon, 24 Apr 2023 18:31:03 -0400 Subject: [PATCH 113/159] Allow compilation to other target triples (#109) --- README.md | 2 ++ src/StaticCompiler.jl | 14 ++++++++++++-- src/target.jl | 11 +++++++---- test/testintegration.jl | 6 ++++++ 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 6ddb978..bcb1cee 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,8 @@ Hello, world! ``` This latter approach comes with substantially more limitations, as you cannot rely on `libjulia` (see, e.g., [StaticTools.jl](https://github.com/brenhinkeller/StaticTools.jl) for some ways to work around these limitations). +The low-level function `StaticCompiler.generate_obj` (not exported) generates object files. This can be used for more control of compilation. This can be used to cross-compile to other targets. + ### Mixtape This feature allows one to change functionality when statically compiling. This uses code and API from [Mixtape](https://github.com/JuliaCompilerPlugins/Mixtape.jl) to transform lowered code much like [Cassette](https://github.com/JuliaLabs/Cassette.jl). diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 6e1b2b7..4c23698 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -115,8 +115,10 @@ end """ ```julia generate_obj(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; + \tmixtape = NoContext(), \tstrip_llvm = false, \tstrip_asm = true, + \ttarget = (), \topt_level=3, \tkwargs...) ``` @@ -124,6 +126,13 @@ Low level interface for compiling object code (`.o`) for for function `f` given a tuple type `tt` characterizing the types of the arguments for which the function will be compiled. +`mixtape` defines a context that can be used to transform IR prior to compilation using +[Mixtape](https://github.com/JuliaCompilerPlugins/Mixtape.jl) features. + +`target` can be used to change the output target. This is useful for compiling to WebAssembly and embedded targets. +This is a named tuple with fields `triple`, `cpu`, and `features` (each of these are strings). +The defaults compile to the native target. + ### Examples ```julia julia> fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) @@ -144,12 +153,12 @@ function generate_obj(f, tt, external = true, path::String = tempname(), name = strip_llvm = false, strip_asm = true, opt_level=3, + target = (), kwargs...) mkpath(path) obj_path = joinpath(path, "$filenamebase.o") - tm = GPUCompiler.llvm_machine(external ? ExternalNativeCompilerTarget() : NativeCompilerTarget()) #Get LLVM to generated a module of code for us. We don't want GPUCompiler's optimization passes. - job = CompilerJob(NativeCompilerTarget(), + job = CompilerJob(NativeCompilerTarget(target...), FunctionSpec(f, tt, false, name), StaticCompilerParams(; opt = true, @@ -162,6 +171,7 @@ function generate_obj(f, tt, external = true, path::String = tempname(), name = # Use Enzyme's annotation and optimization pipeline annotate!(mod) + tm = GPUCompiler.llvm_machine(external ? ExternalNativeCompilerTarget(target...) : NativeCompilerTarget(target...)) optimize!(mod, tm) # Scoop up all the pointers in the optimized module, and replace them with unitialized global variables. diff --git a/src/target.jl b/src/target.jl index 83872a0..28be261 100644 --- a/src/target.jl +++ b/src/target.jl @@ -36,11 +36,13 @@ macro device_override(ex) end Base.@kwdef struct NativeCompilerTarget <: GPUCompiler.AbstractCompilerTarget + triple::String=Sys.MACHINE cpu::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUName()) features::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUFeatures()) end Base.@kwdef struct ExternalNativeCompilerTarget <: GPUCompiler.AbstractCompilerTarget + triple::String=Sys.MACHINE cpu::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUName()) features::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUFeatures()) end @@ -57,7 +59,7 @@ end for target in (:NativeCompilerTarget, :ExternalNativeCompilerTarget) @eval begin - GPUCompiler.llvm_triple(::$target) = Sys.MACHINE + GPUCompiler.llvm_triple(target::$target) = target.triple function GPUCompiler.llvm_machine(target::$target) triple = GPUCompiler.llvm_triple(target) @@ -94,17 +96,18 @@ function native_job(@nospecialize(func::Function), @nospecialize(types::Type), e mixtape = NoContext(), name = GPUCompiler.safe_name(repr(func)), kernel::Bool = false, + target = (), kwargs... ) source = GPUCompiler.FunctionSpec(func, types, kernel, name) - target = external ? ExternalNativeCompilerTarget() : NativeCompilerTarget() + target = external ? ExternalNativeCompilerTarget(;target...) : NativeCompilerTarget(;target...) params = StaticCompilerParams(mixtape = mixtape) StaticCompiler.CompilerJob(target, source, params), kwargs end -function native_job(@nospecialize(func), @nospecialize(types), external; mixtape = NoContext(), kernel::Bool=false, name=GPUCompiler.safe_name(repr(func)), kwargs...) +function native_job(@nospecialize(func), @nospecialize(types), external; mixtape = NoContext(), kernel::Bool=false, name=GPUCompiler.safe_name(repr(func)), target = (), kwargs...) source = GPUCompiler.FunctionSpec(func, Base.to_tuple_type(types), kernel, name) - target = external ? ExternalNativeCompilerTarget() : NativeCompilerTarget() + target = external ? ExternalNativeCompilerTarget(;target...) : NativeCompilerTarget(;target...) params = StaticCompilerParams(mixtape = mixtape) GPUCompiler.CompilerJob(target, source, params), kwargs end diff --git a/test/testintegration.jl b/test/testintegration.jl index 6f98b5f..4828c61 100644 --- a/test/testintegration.jl +++ b/test/testintegration.jl @@ -383,3 +383,9 @@ struct MyMix <: CompilationContext end end +@testset "Cross compiling" begin + + x, obj_path = StaticCompiler.generate_obj(x -> 2x, Tuple{Float64}, true, tempname(); target = (triple = "wasm32-unknown-wasi", cpu = "", features = "")) + +end + From 0034adc704d675ad6ee742f92a7eb96fa19b0048 Mon Sep 17 00:00:00 2001 From: Tom Short Date: Tue, 25 Apr 2023 20:59:43 -0400 Subject: [PATCH 114/159] Upgrade to GPUCompiler v0.19 (#113) --- .github/workflows/ci.yml | 4 ++++ .gitignore | 4 ++++ Project.toml | 7 +++--- src/StaticCompiler.jl | 47 ++++++++++++++++++++-------------------- src/code_loading.jl | 2 +- src/interpreter.jl | 2 +- src/pointer_patching.jl | 4 ++-- src/target.jl | 22 ++++++++++--------- test/Project.toml | 6 ++--- test/runtests.jl | 1 + test/testcore.jl | 4 ++-- test/testintegration.jl | 32 ++++++++++++++++----------- 12 files changed, 76 insertions(+), 59 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d312a54..65cbcf3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,6 +27,10 @@ jobs: - x64 group: - Core + include: + - arch: x86 + version: '1' + os: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@latest diff --git a/.gitignore b/.gitignore index 872875b..7d7ea3b 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,10 @@ /test/test.* Manifest.toml +*.wasm +*.dll +*.o +*.so test.o test.so test.bc diff --git a/Project.toml b/Project.toml index 212d07a..340a75a 100644 --- a/Project.toml +++ b/Project.toml @@ -8,6 +8,7 @@ Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" CodeInfoTools = "bc773b8a-8374-437a-b9f2-0e9785855863" GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +LLD_jll = "d55e3150-da41-5e91-b323-ecfd1eec6109" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" @@ -16,10 +17,10 @@ StaticTools = "86c06d3c-3f03-46de-9781-57580aa96d0a" [compat] CodeInfoTools = "0.3" -GPUCompiler = "0.17" -LLVM = "4.8, 5" -StaticTools = "0.8" +GPUCompiler = "0.19" +LLVM = "5" MacroTools = "0.5" +StaticTools = "0.8" julia = "1.8, 1.9" [extras] diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 4c23698..7fe8284 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -23,6 +23,7 @@ include("code_loading.jl") include("optimize.jl") include("quirks.jl") +fix_name(s) = string("julia_", GPUCompiler.safe_name(s)) """ compile(f, types, path::String = tempname()) --> (compiled_f, path) @@ -91,12 +92,13 @@ single method (the method determined by `types`). """ function compile(f, _tt, path::String = tempname(); mixtape = NoContext(), - name = GPUCompiler.safe_name(repr(f)), + name = fix_name(repr(f)), filename = "obj", strip_llvm = false, strip_asm = true, opt_level=3, kwargs...) + tt = Base.to_tuple_type(_tt) isconcretetype(tt) || error("input type signature $_tt is not concrete") @@ -114,7 +116,7 @@ end """ ```julia -generate_obj(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; +generate_obj(f, tt, path::String = tempname(), name = fix_name(repr(f)), filenamebase::String="obj"; \tmixtape = NoContext(), \tstrip_llvm = false, \tstrip_asm = true, @@ -148,7 +150,7 @@ shell> tree \$path 0 directories, 1 file ``` """ -function generate_obj(f, tt, external = true, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; +function generate_obj(f, tt, external = true, path::String = tempname(), name = fix_name(repr(f)), filenamebase::String="obj"; mixtape = NoContext(), strip_llvm = false, strip_asm = true, @@ -158,12 +160,9 @@ function generate_obj(f, tt, external = true, path::String = tempname(), name = mkpath(path) obj_path = joinpath(path, "$filenamebase.o") #Get LLVM to generated a module of code for us. We don't want GPUCompiler's optimization passes. - job = CompilerJob(NativeCompilerTarget(target...), - FunctionSpec(f, tt, false, name), - StaticCompilerParams(; - opt = true, - mixtape = mixtape, - optlevel = Base.JLOptions().opt_level)) + params = StaticCompilerParams(opt = true, mixtape = mixtape, optlevel = Base.JLOptions().opt_level) + config = GPUCompiler.CompilerConfig(NativeCompilerTarget(target...), params, name = name, kernel = false) + job = GPUCompiler.CompilerJob(GPUCompiler.methodinstance(typeof(f), tt), config) mod, meta = GPUCompiler.JuliaContext() do context GPUCompiler.codegen(:llvm, job; strip=strip_llvm, only_entry=false, validate=false, optimize=false, ctx=context) @@ -259,7 +258,7 @@ shell> ./hello Hello, world! ``` """ -function compile_executable(f::Function, types=(), path::String="./", name=GPUCompiler.safe_name(repr(f)); +function compile_executable(f::Function, types=(), path::String="./", name=fix_name(repr(f)); filename=name, cflags=``, kwargs... @@ -315,7 +314,7 @@ julia> ccall(("julia_test", "test.dylib"), Float64, (Int64,), 100_000) 5.2564961094956075 ``` """ -function compile_shlib(f::Function, types=(), path::String="./", name=GPUCompiler.safe_name(repr(f)); +function compile_shlib(f::Function, types=(), path::String="./", name=fix_name(repr(f)); filename=name, cflags=``, kwargs... @@ -396,19 +395,19 @@ julia> test(100_000) function generate_shlib_fptr(path::String, name, filename::String=name) lib_path = joinpath(abspath(path), "$filename.$(Libdl.dlext)") ptr = Libdl.dlopen(lib_path, Libdl.RTLD_LOCAL) - fptr = Libdl.dlsym(ptr, "julia_$name") + fptr = Libdl.dlsym(ptr, name) @assert fptr != C_NULL fptr end # As above, but also compile (maybe remove this method in the future?) -function generate_shlib_fptr(f, tt, path::String=tempname(), name = GPUCompiler.safe_name(repr(f)), filename::String=name; +function generate_shlib_fptr(f, tt, path::String=tempname(), name = fix_name(repr(f)), filename::String=name; temp::Bool=true, kwargs...) generate_shlib(f, tt, false, path, name; kwargs...) lib_path = joinpath(abspath(path), "$filename.$(Libdl.dlext)") ptr = Libdl.dlopen(lib_path, Libdl.RTLD_LOCAL) - fptr = Libdl.dlsym(ptr, "julia_$name") + fptr = Libdl.dlsym(ptr, name) @assert fptr != C_NULL if temp atexit(()->rm(path; recursive=true)) @@ -436,7 +435,7 @@ shell> ./hello Hello, world! ``` """ -function generate_executable(f, tt, path=tempname(), name=GPUCompiler.safe_name(repr(f)), filename=string(name); +function generate_executable(f, tt, path=tempname(), name=fix_name(repr(f)), filename=string(name); cflags=``, kwargs... ) @@ -456,18 +455,18 @@ function generate_executable(f, tt, path=tempname(), name=GPUCompiler.safe_name( # Compile! if Sys.isapple() # Apple no longer uses _start, so we can just specify a custom entry - entry = "_julia_$name" + entry = "_$name" run(`$cc -e $entry $cflags $obj_path -o $exec_path`) else # Write a minimal wrapper to avoid having to specify a custom entry wrapper_path = joinpath(path, "wrapper.c") f = open(wrapper_path, "w") - print(f, """int julia_$name(int argc, char** argv); + print(f, """int $name(int argc, char** argv); void* __stack_chk_guard = (void*) $(rand(UInt) >> 1); int main(int argc, char** argv) { - julia_$name(argc, argv); + $name(argc, argv); return 0; }""") close(f) @@ -517,7 +516,7 @@ julia> ccall(("julia_test", "example/test.dylib"), Float64, (Int64,), 100_000) 5.2564961094956075 ``` """ -function generate_shlib(f::Function, tt, external::Bool=true, path::String=tempname(), name=GPUCompiler.safe_name(repr(f)), filename=name; +function generate_shlib(f::Function, tt, external::Bool=true, path::String=tempname(), name=fix_name(repr(f)), filename=name; cflags=``, kwargs... ) @@ -568,7 +567,7 @@ function native_code_typed(@nospecialize(func), @nospecialize(types); kwargs...) end # Return an LLVM module -function native_llvm_module(f, tt, name = GPUCompiler.safe_name(repr(f)); kwargs...) +function native_llvm_module(f, tt, name = fix_name(repr(f)); kwargs...) job, kwargs = native_job(f, tt, true; name, kwargs...) m, _ = GPUCompiler.JuliaContext() do context GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false, ctx=context) @@ -576,7 +575,7 @@ function native_llvm_module(f, tt, name = GPUCompiler.safe_name(repr(f)); kwargs return m end -function native_code_native(@nospecialize(f), @nospecialize(tt), name = GPUCompiler.safe_name(repr(f)); kwargs...) +function native_code_native(@nospecialize(f), @nospecialize(tt), name = fix_name(repr(f)); kwargs...) job, kwargs = native_job(f, tt, true; name, kwargs...) GPUCompiler.code_native(stdout, job; kwargs...) end @@ -587,7 +586,7 @@ function native_llvm_module(funcs::Array; demangle = false, kwargs...) mod = native_llvm_module(f,tt, kwargs...) if length(funcs) > 1 for func in funcs[2:end] - @show f,tt = func + f,tt = func tmod = native_llvm_module(f,tt, kwargs...) link!(mod,tmod) end @@ -613,10 +612,10 @@ function generate_obj(funcs::Array, external, path::String = tempname(), filenam strip_asm = true, opt_level=3, kwargs...) - f,tt = funcs[1] + f, tt = funcs[1] mkpath(path) obj_path = joinpath(path, "$filenamebase.o") - fakejob, kwargs = native_job(f,tt, external, kwargs...) + fakejob, kwargs = native_job(f, tt, external, kwargs...) mod = native_llvm_module(funcs; demangle = demangle, kwargs...) obj, _ = GPUCompiler.emit_asm(fakejob, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) open(obj_path, "w") do io diff --git a/src/code_loading.jl b/src/code_loading.jl index 665df22..8fc1ae6 100644 --- a/src/code_loading.jl +++ b/src/code_loading.jl @@ -50,7 +50,7 @@ function instantiate(p::LazyStaticCompiledFunction{rt, tt}) where {rt, tt} dg = LLVM.CreateDynamicLibrarySearchGeneratorForProcess(prefix) LLVM.add!(jd, dg) LLVM.add!(lljit, jd, ofile) - fptr = pointer(LLVM.lookup(lljit, "julia_" * p.name)) + fptr = pointer(LLVM.lookup(lljit, p.name)) StaticCompiledFunction{rt, tt}(p.f, fptr, lljit, p.reloc) end diff --git a/src/interpreter.jl b/src/interpreter.jl index 09ffd4a..89433c9 100644 --- a/src/interpreter.jl +++ b/src/interpreter.jl @@ -3,7 +3,7 @@ using Core.Compiler: AbstractInterpreter, InferenceResult, InferenceParams, InferenceState, MethodInstance, OptimizationParams, WorldView using GPUCompiler: - @safe_debug, AbstractCompilerParams, CodeCache, CompilerJob, FunctionSpec + @safe_debug, AbstractCompilerParams, CodeCache, CompilerJob, methodinstance using CodeInfoTools using CodeInfoTools: resolve diff --git a/src/pointer_patching.jl b/src/pointer_patching.jl index 822d8e9..1feebe8 100644 --- a/src/pointer_patching.jl +++ b/src/pointer_patching.jl @@ -137,7 +137,7 @@ function get_pointers!(d, mod, inst) _, gv = d[val] LLVM.API.LLVMSetOperand(inst, i-1, gv) else - gv_name = GPUCompiler.safe_name(String(gensym(repr(Core.Typeof(val))))) + gv_name = fix_name(String(gensym(repr(Core.Typeof(val))))) gv = LLVM.GlobalVariable(mod, llvmeltype(arg), gv_name, LLVM.addrspace(llvmtype(arg))) LLVM.extinit!(gv, true) @@ -157,7 +157,7 @@ llvmeltype(x::LLVM.Value) = eltype(LLVM.llvmtype(x)) function pointer_patching_diff(f, tt, path1=tempname(), path2=tempname(); show_reloc_table=false) tm = GPUCompiler.llvm_machine(NativeCompilerTarget()) - job, kwargs = native_job(f, tt, false; name=GPUCompiler.safe_name(repr(f))) + job, kwargs = native_job(f, tt, false; name=fix_name(repr(f))) #Get LLVM to generated a module of code for us. We don't want GPUCompiler's optimization passes. mod, meta = GPUCompiler.JuliaContext() do context GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false, optimize=false, ctx=context) diff --git a/src/target.jl b/src/target.jl index 28be261..8ea16ca 100644 --- a/src/target.jl +++ b/src/target.jl @@ -72,7 +72,7 @@ for target in (:NativeCompilerTarget, :ExternalNativeCompilerTarget) return tm end - GPUCompiler.runtime_slug(job::GPUCompiler.CompilerJob{$target}) = "native_$(job.target.cpu)-$(hash(job.target.features))" + GPUCompiler.runtime_slug(job::GPUCompiler.CompilerJob{$target}) = "native_$(job.config.target.cpu)-$(hash(job.config.target.features))" GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{$target}) = StaticRuntime GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{$target, StaticCompilerParams}) = StaticRuntime @@ -82,10 +82,10 @@ for target in (:NativeCompilerTarget, :ExternalNativeCompilerTarget) GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{$target}) = true GPUCompiler.get_interpreter(job::GPUCompiler.CompilerJob{$target, StaticCompilerParams}) = - StaticInterpreter(job.params.cache, GPUCompiler.method_table(job), job.source.world, + StaticInterpreter(job.config.params.cache, GPUCompiler.method_table(job), job.world, GPUCompiler.inference_params(job), GPUCompiler.optimization_params(job), - job.params.mixtape) - GPUCompiler.ci_cache(job::GPUCompiler.CompilerJob{$target, StaticCompilerParams}) = job.params.cache + job.config.params.mixtape) + GPUCompiler.ci_cache(job::GPUCompiler.CompilerJob{$target, StaticCompilerParams}) = job.config.params.cache end end @@ -94,20 +94,22 @@ GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{ExternalNati function native_job(@nospecialize(func::Function), @nospecialize(types::Type), external::Bool; mixtape = NoContext(), - name = GPUCompiler.safe_name(repr(func)), + name = fix_name(repr(func)), kernel::Bool = false, target = (), kwargs... ) - source = GPUCompiler.FunctionSpec(func, types, kernel, name) + source = methodinstance(typeof(func), Base.to_tuple_type(types)) target = external ? ExternalNativeCompilerTarget(;target...) : NativeCompilerTarget(;target...) params = StaticCompilerParams(mixtape = mixtape) - StaticCompiler.CompilerJob(target, source, params), kwargs + config = GPUCompiler.CompilerConfig(target, params, name = name, kernel = kernel) + StaticCompiler.CompilerJob(source, config), kwargs end -function native_job(@nospecialize(func), @nospecialize(types), external; mixtape = NoContext(), kernel::Bool=false, name=GPUCompiler.safe_name(repr(func)), target = (), kwargs...) - source = GPUCompiler.FunctionSpec(func, Base.to_tuple_type(types), kernel, name) +function native_job(@nospecialize(func), @nospecialize(types), external; mixtape = NoContext(), kernel::Bool=false, name=fix_name(repr(func)), target = (), kwargs...) + source = methodinstance(typeof(func), Base.to_tuple_type(types)) target = external ? ExternalNativeCompilerTarget(;target...) : NativeCompilerTarget(;target...) params = StaticCompilerParams(mixtape = mixtape) - GPUCompiler.CompilerJob(target, source, params), kwargs + config = GPUCompiler.CompilerConfig(target, params, name = name, kernel = kernel) + GPUCompiler.CompilerJob(source, config), kwargs end diff --git a/test/Project.toml b/test/Project.toml index c9d1de5..cc88d4e 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -3,14 +3,14 @@ CodeInfoTools = "bc773b8a-8374-437a-b9f2-0e9785855863" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0" GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55" -Infiltrator = "5903a43b-9cc3-4c30-8d17-598619ec4e9b" +LLD_jll = "d55e3150-da41-5e91-b323-ecfd1eec6109" +LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" ManualMemory = "d125e4d3-2237-4719-b19c-fa641b8a4667" StaticTools = "86c06d3c-3f03-46de-9781-57580aa96d0a" StrideArraysCore = "7792a7ef-975c-4747-a70f-980b88e8d1da" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/test/runtests.jl b/test/runtests.jl index d117aa5..697ef7e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,6 +9,7 @@ using StaticTools using StrideArraysCore using CodeInfoTools using MacroTools +using LLD_jll addprocs(1) @everywhere using StaticCompiler, StrideArraysCore diff --git a/test/testcore.jl b/test/testcore.jl index 450fbda..68555df 100644 --- a/test/testcore.jl +++ b/test/testcore.jl @@ -232,13 +232,13 @@ end # fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) #Compile dylib - name = repr(fib) + name = "julia_" * repr(fib) filepath = compile_shlib(fib, (Int,), "./", name) @test occursin("fib.$(Libdl.dlext)", filepath) # Open dylib ptr = Libdl.dlopen(filepath, Libdl.RTLD_LOCAL) - fptr = Libdl.dlsym(ptr, "julia_$name") + fptr = Libdl.dlsym(ptr, name) @test fptr != C_NULL @test ccall(fptr, Int, (Int,), 10) == 55 end diff --git a/test/testintegration.jl b/test/testintegration.jl index 4828c61..cc5818d 100644 --- a/test/testintegration.jl +++ b/test/testintegration.jl @@ -14,7 +14,7 @@ # faster. status = -1 try - isfile("times_table") && rm("times_table") + isfile("julia_times_table") && rm("julia_times_table") status = run(`$jlpath --startup=no --compile=min $testpath/scripts/times_table.jl`) catch e @warn "Could not compile $testpath/scripts/times_table.jl" @@ -27,7 +27,7 @@ println("5x5 times table:") status = -1 try - status = run(`./times_table 5 5`) + status = run(`./julia_times_table 5 5`) catch e @warn "Could not run $(scratch)/times_table" println(e) @@ -58,7 +58,7 @@ println("3x3 malloc arrays via do-block syntax:") status = -1 try - status = run(`./withmallocarray 3 3`) + status = run(`./julia_withmallocarray 3 3`) catch e @warn "Could not run $(scratch)/withmallocarray" println(e) @@ -85,7 +85,7 @@ println("5x5 uniform random matrix:") status = -1 try - status = run(`./rand_matrix 5 5`) + status = run(`./julia_rand_matrix 5 5`) catch e @warn "Could not run $(scratch)/rand_matrix" println(e) @@ -113,7 +113,7 @@ println("5x5 Normal random matrix:") status = -1 try - status = run(`./randn_matrix 5 5`) + status = run(`./julia_randn_matrix 5 5`) catch e @warn "Could not run $(scratch)/randn_matrix" println(e) @@ -143,7 +143,7 @@ println("10x10 table sum:") status = -1 try - status = run(`./loopvec_product 10 10`) + status = run(`./julia_loopvec_product 10 10`) catch e @warn "Could not run $(scratch)/loopvec_product" println(e) @@ -171,7 +171,7 @@ println("10x5 matrix product:") status = -1 try - status = run(`./loopvec_matrix 10 5`) + status = run(`./julia_loopvec_matrix 10 5`) catch e @warn "Could not run $(scratch)/loopvec_matrix" println(e) @@ -202,7 +202,7 @@ println("10x5 matrix product:") status = -1 try - status = run(`./loopvec_matrix_stack`) + status = run(`./julia_loopvec_matrix_stack`) catch e @warn "Could not run $(scratch)/loopvec_matrix_stack" println(e) @@ -233,7 +233,7 @@ println("String indexing and handling:") status = -1 try - status = run(`./print_args foo bar`) + status = run(`./julia_print_args foo bar`) catch e @warn "Could not run $(scratch)/print_args" println(e) @@ -261,7 +261,7 @@ println("Error handling:") status = -1 try - status = run(`./maybe_throw 10`) + status = run(`./julia_maybe_throw 10`) catch e @warn "Could not run $(scratch)/maybe_throw" println(e) @@ -297,7 +297,7 @@ println("Interop:") status = -1 try - status = run(`./interop`) + status = run(`./julia_interop`) catch e @warn "Could not run $(scratch)/interop" println(e) @@ -383,9 +383,15 @@ struct MyMix <: CompilationContext end end -@testset "Cross compiling" begin +@testset "Cross compiling to WebAssembly" begin - x, obj_path = StaticCompiler.generate_obj(x -> 2x, Tuple{Float64}, true, tempname(); target = (triple = "wasm32-unknown-wasi", cpu = "", features = "")) + m2(x) = 2x + obj_path, name = StaticCompiler.generate_obj(m2, Tuple{Float64}, true, tempname(); target = (triple = "wasm32-unknown-unknown", cpu = "", features = "")) + # link with `lld` from LLD_jll + run(`$(lld()) -flavor wasm --no-entry --export-all $obj_path/obj.o -o $name.wasm`) + # On Julia v1.9, link with included linker + # run(`$(Base.Linking.lld()) -flavor wasm --no-entry --export-all $obj_path/obj.o -o $name.wasm`) + # run(`wasm2wat $name.wasm`) # to see a text representation (wasm2wat isn't included) end From 2ef9d757f33190936e0c0aae59bb5c4307fcd7c3 Mon Sep 17 00:00:00 2001 From: Tom Short Date: Fri, 5 May 2023 17:35:28 -0400 Subject: [PATCH 115/159] Helpers to compile to WebAssembly (#114) --- .github/workflows/ci.yml | 2 +- src/StaticCompiler.jl | 46 +++++++++++++++++++++++++++++++++++----- src/optimize.jl | 7 ++++-- test/testintegration.jl | 12 +++++------ 4 files changed, 53 insertions(+), 14 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 65cbcf3..12dae36 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ jobs: matrix: version: - '1.8' - - '^1.9.0-rc2' + - '^1.9.0-rc3' os: - ubuntu-latest - macOS-latest diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 7fe8284..0c42dbe 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -8,10 +8,11 @@ using Libdl: Libdl, dlsym, dlopen using Base: RefValue using Serialization: serialize, deserialize using Clang_jll: clang +using LLD_jll: lld using StaticTools using StaticTools: @symbolcall, @c_str, println -export compile, load_function, compile_shlib, compile_executable +export compile, load_function, compile_shlib, compile_executable, compile_wasm export native_code_llvm, native_code_typed, native_llvm_module, native_code_native export @device_override, @print_and_throw @@ -154,7 +155,8 @@ function generate_obj(f, tt, external = true, path::String = tempname(), name = mixtape = NoContext(), strip_llvm = false, strip_asm = true, - opt_level=3, + opt_level = 3, + remove_julia_addrspaces = false, target = (), kwargs...) mkpath(path) @@ -180,7 +182,7 @@ function generate_obj(f, tt, external = true, path::String = tempname(), name = # Now that we've removed all the pointers from the code, we can (hopefully) safely lower all the instrinsics # (again, using Enzyme's pipeline) - post_optimize!(mod, tm) + post_optimize!(mod, tm; remove_julia_addrspaces) # Make sure we didn't make any glaring errors LLVM.verify(mod) @@ -355,6 +357,40 @@ function compile_shlib(funcs::Array, path::String="./"; joinpath(abspath(path), filename * "." * Libdl.dlext) end +""" +```julia +compile_wasm(f::Function, types::Tuple, [path::String="./"], [name::String=repr(f)]; filename::String=name, flags=``, kwargs...) +compile_wasm(funcs::Array, [path::String="./"]; filename="libfoo", demangle=false, flags=``, kwargs...) +``` +As `compile_shlib`, but compiling to a WebAssembly library. + +The compiled function is by default given the symbol name `julia_$(name)`, i.e., +the function `test` in the example below is called `julia_test` in the shared library. +The keword argument `demangle=true` will remove this prefix, but is currently only +supported the second (multi-function-shlib) method. +``` +""" +function compile_wasm(f::Function, types=(); + path::String="./", + filename=fix_name(repr(f)), + flags=``, + kwargs... + ) + tt = Base.to_tuple_type(types) + obj_path, name = generate_obj(f, tt, true, path, filename; target = (triple = "wasm32-unknown-wasi", cpu = "", features = ""), remove_julia_addrspaces = true, kwargs...) + run(`$(lld()) -flavor wasm --no-entry --export-all $flags $obj_path/obj.o -o $path/$name.wasm`) + joinpath(abspath(path), filename * ".wasm") +end +function compile_wasm(funcs::Array; + path::String="./", + filename="libfoo", + flags=``, + kwargs... + ) + obj_path, name = generate_obj(funcs, true; target = (triple = "wasm32-unknown-wasi", cpu = "", features = ""), remove_julia_addrspaces = true, kwargs...) + run(`$(lld()) -flavor wasm --no-entry --export-all $flags $obj_path/obj.o -o $path/$filename.wasm`) + joinpath(abspath(path), filename * ".wasm") +end """ ```julia @@ -606,7 +642,7 @@ function native_llvm_module(funcs::Array; demangle = false, kwargs...) return mod end -function generate_obj(funcs::Array, external, path::String = tempname(), filenamebase::String="obj"; +function generate_obj(funcs::Array, external::Bool, path::String = tempname(), filenamebase::String="obj"; demangle =false, strip_llvm = false, strip_asm = true, @@ -615,7 +651,7 @@ function generate_obj(funcs::Array, external, path::String = tempname(), filenam f, tt = funcs[1] mkpath(path) obj_path = joinpath(path, "$filenamebase.o") - fakejob, kwargs = native_job(f, tt, external, kwargs...) + fakejob, kwargs = native_job(f, tt, external; kwargs...) mod = native_llvm_module(funcs; demangle = demangle, kwargs...) obj, _ = GPUCompiler.emit_asm(fakejob, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) open(obj_path, "w") do io diff --git a/src/optimize.jl b/src/optimize.jl index f2934a1..0bc4646 100644 --- a/src/optimize.jl +++ b/src/optimize.jl @@ -211,8 +211,8 @@ function addJuliaLegalizationPasses!(pm, lower_intrinsics=true) remove_ni!(pm) end end - -function post_optimize!(mod, tm) + +function post_optimize!(mod, tm; remove_julia_addrspaces = false) # @show "pre_post", mod # flush(stdout) # flush(stderr) @@ -224,6 +224,9 @@ function post_optimize!(mod, tm) LLVM.ModulePassManager() do pm addJuliaLegalizationPasses!(pm, true) addMachinePasses!(pm) + if remove_julia_addrspaces + remove_julia_addrspaces!(pm) + end run!(pm, mod) end # @show "post_mod", mod diff --git a/test/testintegration.jl b/test/testintegration.jl index cc5818d..7582df0 100644 --- a/test/testintegration.jl +++ b/test/testintegration.jl @@ -386,12 +386,12 @@ end @testset "Cross compiling to WebAssembly" begin m2(x) = 2x - obj_path, name = StaticCompiler.generate_obj(m2, Tuple{Float64}, true, tempname(); target = (triple = "wasm32-unknown-unknown", cpu = "", features = "")) - # link with `lld` from LLD_jll - run(`$(lld()) -flavor wasm --no-entry --export-all $obj_path/obj.o -o $name.wasm`) - # On Julia v1.9, link with included linker - # run(`$(Base.Linking.lld()) -flavor wasm --no-entry --export-all $obj_path/obj.o -o $name.wasm`) - # run(`wasm2wat $name.wasm`) # to see a text representation (wasm2wat isn't included) + m3(x) = 3x + wasm_path = compile_wasm(m2, Tuple{Float64}) + wasm_path2 = compile_wasm([(m2, Tuple{Float64}), (m3, Tuple{Float64})]) + + wasm_path = compile_wasm(m2, (Float64,)) + wasm_path2 = compile_wasm([(m2, (Float64,)), (m3, (Float64,))]) end From 7a141cbe0c1a0ab17c13308098e6eb10f6d346da Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Fri, 5 May 2023 18:46:20 -0600 Subject: [PATCH 116/159] actually test `maybe_throw` --- test/testcore.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/testcore.jl b/test/testcore.jl index 68555df..be2eed3 100644 --- a/test/testcore.jl +++ b/test/testcore.jl @@ -293,7 +293,7 @@ end # Compile a function that definitely fails @inline foo_err() = UInt64(-1) - filepath = compile_executable(print_args, (Int, Ptr{Ptr{UInt8}}), tempdir()) + filepath = compile_executable(maybe_throw, (Int, Ptr{Ptr{UInt8}}), tempdir()) @test isfile(filepath) status = -1 try From 99d5455875be80f10df5c04e0e74e5a42802026d Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Fri, 5 May 2023 18:51:53 -0600 Subject: [PATCH 117/159] Update testcore.jl --- test/testcore.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/testcore.jl b/test/testcore.jl index be2eed3..83a6b64 100644 --- a/test/testcore.jl +++ b/test/testcore.jl @@ -293,7 +293,7 @@ end # Compile a function that definitely fails @inline foo_err() = UInt64(-1) - filepath = compile_executable(maybe_throw, (Int, Ptr{Ptr{UInt8}}), tempdir()) + filepath = compile_executable(foo_err, (Int, Ptr{Ptr{UInt8}}), tempdir()) @test isfile(filepath) status = -1 try From 4f10d57c25f6742cdd461f852ff96d8dd1f27610 Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Fri, 5 May 2023 18:55:29 -0600 Subject: [PATCH 118/159] provide correct signature --- test/testcore.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/testcore.jl b/test/testcore.jl index 83a6b64..4c0bc5f 100644 --- a/test/testcore.jl +++ b/test/testcore.jl @@ -293,7 +293,7 @@ end # Compile a function that definitely fails @inline foo_err() = UInt64(-1) - filepath = compile_executable(foo_err, (Int, Ptr{Ptr{UInt8}}), tempdir()) + filepath = compile_executable(foo_err, (), tempdir()) @test isfile(filepath) status = -1 try From c3f7340a1ed0c29733ffe543d3297234102ce058 Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Mon, 8 May 2023 15:59:16 -0600 Subject: [PATCH 119/159] Re-add nightly tests and integration tests (#117) * Re-add ci-julia-nightly.yml * Create ci-integration.yml * Create ci-integration-nightly.yml --- .github/workflows/ci-integration-nightly.yml | 39 ++++++++++++++++++++ .github/workflows/ci-integration.yml | 38 +++++++++++++++++++ .github/workflows/ci-julia-nightly.yml | 37 +++++++++++++++++++ 3 files changed, 114 insertions(+) create mode 100644 .github/workflows/ci-integration-nightly.yml create mode 100644 .github/workflows/ci-integration.yml create mode 100644 .github/workflows/ci-julia-nightly.yml diff --git a/.github/workflows/ci-integration-nightly.yml b/.github/workflows/ci-integration-nightly.yml new file mode 100644 index 0000000..6d4cb3e --- /dev/null +++ b/.github/workflows/ci-integration-nightly.yml @@ -0,0 +1,39 @@ +name: CI (Integration nightly) +on: + push: + branches: + - '**' + paths-ignore: + - 'README.md' + pull_request: + branches: + - master + paths-ignore: + - 'README.md' +jobs: + test-integration-nightly: + name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + version: + - 'nightly' + - '~1.9.0-0' + os: + - ubuntu-latest + - macOS-latest + arch: + - x64 + group: + - Integration + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@latest + with: + version: ${{ matrix.version }} + arch: ${{ matrix.arch }} + - uses: julia-actions/julia-buildpkg@latest + - uses: julia-actions/julia-runtest@latest + env: + GROUP: ${{ matrix.group }} diff --git a/.github/workflows/ci-integration.yml b/.github/workflows/ci-integration.yml new file mode 100644 index 0000000..3b9024d --- /dev/null +++ b/.github/workflows/ci-integration.yml @@ -0,0 +1,38 @@ +name: CI (Integration) +on: + push: + branches: + - '**' + paths-ignore: + - 'README.md' + pull_request: + branches: + - master + paths-ignore: + - 'README.md' +jobs: + test-integration: + name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + version: + - '1.8' + os: + - ubuntu-latest + - macOS-latest + arch: + - x64 + group: + - Integration + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@latest + with: + version: ${{ matrix.version }} + arch: ${{ matrix.arch }} + - uses: julia-actions/julia-buildpkg@latest + - uses: julia-actions/julia-runtest@latest + env: + GROUP: ${{ matrix.group }} diff --git a/.github/workflows/ci-julia-nightly.yml b/.github/workflows/ci-julia-nightly.yml new file mode 100644 index 0000000..4cdbcfa --- /dev/null +++ b/.github/workflows/ci-julia-nightly.yml @@ -0,0 +1,37 @@ +name: CI (Julia nightly) +on: + push: + branches: + - '**' + paths-ignore: + - 'README.md' + pull_request: + branches: + - master + paths-ignore: + - 'README.md' +jobs: + test-julia-nightly: + name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + version: + - 'nightly' + os: + - ubuntu-latest + arch: + - x64 + group: + - Core + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@latest + with: + version: ${{ matrix.version }} + arch: ${{ matrix.arch }} + - uses: julia-actions/julia-buildpkg@latest + - uses: julia-actions/julia-runtest@latest + env: + GROUP: ${{ matrix.group }} From a076a623b2c8dcac449b9e29e42a23c9fe84878a Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Mon, 8 May 2023 18:22:36 -0600 Subject: [PATCH 120/159] Fixup with current versions (#120) * add more escaping slashes to docstring * update llvmtype -> value_type * properly splat kwargs * test 1.9.0-rc3 in the integration tests * remove 1.9.0 from the nightly integration tests * fix for julia master * remove some --compile=min * [noci] bump version --- .github/workflows/ci-integration-nightly.yml | 1 - .github/workflows/ci-integration.yml | 1 + Project.toml | 2 +- src/StaticCompiler.jl | 8 ++++---- src/interpreter.jl | 11 ++++++++--- src/pointer_patching.jl | 4 ++-- test/testintegration.jl | 4 ++-- 7 files changed, 18 insertions(+), 13 deletions(-) diff --git a/.github/workflows/ci-integration-nightly.yml b/.github/workflows/ci-integration-nightly.yml index 6d4cb3e..00074ac 100644 --- a/.github/workflows/ci-integration-nightly.yml +++ b/.github/workflows/ci-integration-nightly.yml @@ -19,7 +19,6 @@ jobs: matrix: version: - 'nightly' - - '~1.9.0-0' os: - ubuntu-latest - macOS-latest diff --git a/.github/workflows/ci-integration.yml b/.github/workflows/ci-integration.yml index 3b9024d..da4da2d 100644 --- a/.github/workflows/ci-integration.yml +++ b/.github/workflows/ci-integration.yml @@ -18,6 +18,7 @@ jobs: fail-fast: false matrix: version: + - '1.9.0-rc3' - '1.8' os: - ubuntu-latest diff --git a/Project.toml b/Project.toml index 340a75a..4037514 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short and contributors"] -version = "0.4.10" +version = "0.4.11" [deps] Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 0c42dbe..79f7c86 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -211,7 +211,7 @@ julia> using StaticCompiler julia> function puts(s::Ptr{UInt8}) # Can't use Base.println because it allocates. # Note, this `llvmcall` requires Julia 1.8+ - Base.llvmcall((\""" + Base.llvmcall((\"\"\" ; External declaration of the puts function declare i32 @puts(i8* nocapture) nounwind @@ -220,7 +220,7 @@ julia> function puts(s::Ptr{UInt8}) # Can't use Base.println because it allocate %call = call i32 (i8*) @puts(i8* %0) ret i32 0 } - \""", "main"), Int32, Tuple{Ptr{UInt8}}, s) + \"\"\", "main"), Int32, Tuple{Ptr{UInt8}}, s) end puts (generic function with 1 method) @@ -619,11 +619,11 @@ end #Return an LLVM module for multiple functions function native_llvm_module(funcs::Array; demangle = false, kwargs...) f,tt = funcs[1] - mod = native_llvm_module(f,tt, kwargs...) + mod = native_llvm_module(f,tt; kwargs...) if length(funcs) > 1 for func in funcs[2:end] f,tt = func - tmod = native_llvm_module(f,tt, kwargs...) + tmod = native_llvm_module(f,tt; kwargs...) link!(mod,tmod) end end diff --git a/src/interpreter.jl b/src/interpreter.jl index 89433c9..c0e00c3 100644 --- a/src/interpreter.jl +++ b/src/interpreter.jl @@ -1,7 +1,7 @@ ## interpreter using Core.Compiler: - AbstractInterpreter, InferenceResult, InferenceParams, InferenceState, MethodInstance, OptimizationParams, WorldView + AbstractInterpreter, InferenceResult, InferenceParams, InferenceState, MethodInstance, OptimizationParams, WorldView, get_world_counter using GPUCompiler: @safe_debug, AbstractCompilerParams, CodeCache, CompilerJob, methodinstance using CodeInfoTools @@ -85,8 +85,13 @@ function custom_pass!(interp::StaticInterpreter, result::InferenceResult, mi::Co return src end -function InferenceState(result::InferenceResult, cache::Symbol, interp::StaticInterpreter) - src = Core.Compiler.retrieve_code_info(result.linfo) +function Core.Compiler.InferenceState(result::InferenceResult, cache::Symbol, interp::StaticInterpreter) + world = get_world_counter(interp) + src = @static if VERSION >= v"1.10.0-DEV.873" + Core.Compiler.retrieve_code_info(result.linfo, world) + else + Core.Compiler.retrieve_code_info(result.linfo) + end mi = result.linfo src = custom_pass!(interp, result, mi, src) src === nothing && return nothing diff --git a/src/pointer_patching.jl b/src/pointer_patching.jl index 1feebe8..c361c3d 100644 --- a/src/pointer_patching.jl +++ b/src/pointer_patching.jl @@ -138,7 +138,7 @@ function get_pointers!(d, mod, inst) LLVM.API.LLVMSetOperand(inst, i-1, gv) else gv_name = fix_name(String(gensym(repr(Core.Typeof(val))))) - gv = LLVM.GlobalVariable(mod, llvmeltype(arg), gv_name, LLVM.addrspace(llvmtype(arg))) + gv = LLVM.GlobalVariable(mod, llvmeltype(arg), gv_name, LLVM.addrspace(value_type(arg))) LLVM.extinit!(gv, true) LLVM.API.LLVMSetOperand(inst, i-1, gv) @@ -153,7 +153,7 @@ function get_pointers!(d, mod, inst) end end -llvmeltype(x::LLVM.Value) = eltype(LLVM.llvmtype(x)) +llvmeltype(x::LLVM.Value) = eltype(LLVM.value_type(x)) function pointer_patching_diff(f, tt, path1=tempname(), path2=tempname(); show_reloc_table=false) tm = GPUCompiler.llvm_machine(NativeCompilerTarget()) diff --git a/test/testintegration.jl b/test/testintegration.jl index 7582df0..0dacb82 100644 --- a/test/testintegration.jl +++ b/test/testintegration.jl @@ -159,7 +159,7 @@ status = -1 try isfile("loopvec_matrix") && rm("loopvec_matrix") - status = run(`$jlpath --startup=no --compile=min $testpath/scripts/loopvec_matrix.jl`) + status = run(`$jlpath --startup=no $testpath/scripts/loopvec_matrix.jl`) catch e @warn "Could not compile $testpath/scripts/loopvec_matrix.jl" println(e) @@ -190,7 +190,7 @@ status = -1 try isfile("loopvec_matrix_stack") && rm("loopvec_matrix_stack") - status = run(`$jlpath --startup=no --compile=min $testpath/scripts/loopvec_matrix_stack.jl`) + status = run(`$jlpath --startup=no $testpath/scripts/loopvec_matrix_stack.jl`) catch e @warn "Could not compile $testpath/scripts/loopvec_matrix_stack.jl" println(e) From 70d406566e7c24c7a3487a9b11e742e78c9dfcd8 Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Wed, 10 May 2023 13:52:22 -0600 Subject: [PATCH 121/159] Cleanup and more support for arrays of functions (reland) (#122) * Cleanup and more support for arrays of functions * switch wasm back to generate_obj_for_compile --- src/StaticCompiler.jl | 140 +++++++++++++++++++++++++++++++----------- 1 file changed, 103 insertions(+), 37 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 79f7c86..7c9c103 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -106,7 +106,7 @@ function compile(f, _tt, path::String = tempname(); rt = last(only(native_code_typed(f, tt, mixtape = mixtape))) isconcretetype(rt) || error("$f on $_tt did not infer to a concrete type. Got $rt") f_wrap!(out::Ref, args::Ref{<:Tuple}) = (out[] = f(args[]...); nothing) - _, _, table = generate_obj(f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, false, path, name; mixtape = mixtape, opt_level, strip_llvm, strip_asm, filename, kwargs...) + _, _, table = generate_obj_for_compile(f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, false, path, name; mixtape = mixtape, opt_level, strip_llvm, strip_asm, filename, kwargs...) lf = LazyStaticCompiledFunction{rt, tt}(Symbol(f), path, name, filename, table) cjl_path = joinpath(path, "$filename.cjl") @@ -117,7 +117,7 @@ end """ ```julia -generate_obj(f, tt, path::String = tempname(), name = fix_name(repr(f)), filenamebase::String="obj"; +generate_obj_for_compile(f, tt, path::String = tempname(), name = fix_name(repr(f)), filenamebase::String="obj"; \tmixtape = NoContext(), \tstrip_llvm = false, \tstrip_asm = true, @@ -141,7 +141,7 @@ The defaults compile to the native target. julia> fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) fib (generic function with 1 method) -julia> path, name, table = StaticCompiler.generate_obj(fib, Tuple{Int64}, "./test") +julia> path, name, table = StaticCompiler.generate_obj_for_compile(fib, Tuple{Int64}, "./test") ("./test", "fib", IdDict{Any, String}()) shell> tree \$path @@ -151,7 +151,7 @@ shell> tree \$path 0 directories, 1 file ``` """ -function generate_obj(f, tt, external = true, path::String = tempname(), name = fix_name(repr(f)), filenamebase::String="obj"; +function generate_obj_for_compile(f, tt, external = true, path::String = tempname(), name = fix_name(repr(f)), filenamebase::String="obj"; mixtape = NoContext(), strip_llvm = false, strip_asm = true, @@ -200,10 +200,13 @@ end compile_executable(f::Function, types::Tuple, path::String, [name::String=repr(f)]; filename::String=name, cflags=``, # Specify libraries you would like to link against, and other compiler options here + also_expose=[], kwargs... ) ``` Attempt to compile a standalone executable that runs function `f` with a type signature given by the tuple of `types`. +If there are extra methods you would like to protect from name mangling in the produced binary for whatever reason, +you can provide them as a vector of tuples of functions and types, i.e. `[(f1, types1), (f2, types2), ...]` ### Examples ```julia @@ -261,11 +264,21 @@ Hello, world! ``` """ function compile_executable(f::Function, types=(), path::String="./", name=fix_name(repr(f)); + also_expose=[], + filename=name, + cflags=``, + kwargs...) + compile_executable(vcat([(f, types)], also_expose), path, name; filename, cflags, kwargs...) +end + + +function compile_executable(funcs::Array, path::String="./", name=fix_name(repr(funcs[1][1])); filename=name, cflags=``, kwargs... ) + (f, types) = funcs[1] tt = Base.to_tuple_type(types) isexecutableargtype = tt == Tuple{} || tt == Tuple{Int, Ptr{Ptr{UInt8}}} isexecutableargtype || @warn "input type signature $types should be either `()` or `(Int, Ptr{Ptr{UInt8}})` for standard executables" @@ -274,13 +287,11 @@ function compile_executable(f::Function, types=(), path::String="./", name=fix_n isconcretetype(rt) || error("`$f$types` did not infer to a concrete type. Got `$rt`") nativetype = isprimitivetype(rt) || isa(rt, Ptr) nativetype || @warn "Return type `$rt` of `$f$types` does not appear to be a native type. Consider returning only a single value of a native machine type (i.e., a single float, int/uint, bool, or pointer). \n\nIgnoring this warning may result in Undefined Behavior!" - - generate_executable(f, tt, path, name, filename; cflags=cflags, kwargs...) - + + generate_executable(funcs, path, name, filename; cflags=cflags, kwargs...) joinpath(abspath(path), filename) end - """ ```julia compile_shlib(f::Function, types::Tuple, [path::String="./"], [name::String=repr(f)]; filename::String=name, cflags=``, kwargs...) @@ -377,7 +388,7 @@ function compile_wasm(f::Function, types=(); kwargs... ) tt = Base.to_tuple_type(types) - obj_path, name = generate_obj(f, tt, true, path, filename; target = (triple = "wasm32-unknown-wasi", cpu = "", features = ""), remove_julia_addrspaces = true, kwargs...) + obj_path, name = generate_obj_for_compile(f, tt, true, path, filename; target = (triple = "wasm32-unknown-wasi", cpu = "", features = ""), remove_julia_addrspaces = true, kwargs...) run(`$(lld()) -flavor wasm --no-entry --export-all $flags $obj_path/obj.o -o $path/$name.wasm`) joinpath(abspath(path), filename * ".wasm") end @@ -471,21 +482,19 @@ shell> ./hello Hello, world! ``` """ -function generate_executable(f, tt, path=tempname(), name=fix_name(repr(f)), filename=string(name); - cflags=``, - kwargs... - ) - mkpath(path) - obj_path = joinpath(path, "$filename.o") - exec_path = joinpath(path, filename) - job, kwargs = native_job(f, tt, true; name, kwargs...) - obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) - - # Write to file - open(obj_path, "w") do io - write(io, obj) - end +function generate_executable(f, tt, args...; kwargs...) + generate_executable([(f, tt)], args...; kwargs...) +end +function generate_executable(funcs::Array, path=tempname(), name=fix_name(repr(funcs[1][1])), filename=string(name); + demangle=false, + cflags=``, + kwargs... + ) + lib_path = joinpath(path, "$filename.$(Libdl.dlext)") + exec_path = joinpath(path, filename) + external = true + _, obj_path = generate_obj(funcs, external, path, filename; demangle=demangle, kwargs...) # Pick a compiler cc = Sys.isapple() ? `cc` : clang() # Compile! @@ -510,10 +519,10 @@ function generate_executable(f, tt, path=tempname(), name=fix_name(repr(f)), fil # Clean up run(`rm $wrapper_path`) end - path, name end + """ ```julia generate_shlib(f::Function, tt, [external::Bool=true], [path::String], [name], [filename]; kwargs...) @@ -553,23 +562,15 @@ julia> ccall(("julia_test", "example/test.dylib"), Float64, (Int64,), 100_000) ``` """ function generate_shlib(f::Function, tt, external::Bool=true, path::String=tempname(), name=fix_name(repr(f)), filename=name; - cflags=``, + cflags=``, demangle=false, kwargs... ) - - mkpath(path) - obj_path = joinpath(path, "$filename.o") lib_path = joinpath(path, "$filename.$(Libdl.dlext)") - job, kwargs = native_job(f, tt, external; name, kwargs...) - obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) - - open(obj_path, "w") do io - write(io, obj) - end - + _, obj_path = generate_obj([(f, tt)], external, path, filename; demangle=demangle, kwargs...) + # Pick a Clang cc = Sys.isapple() ? `cc` : clang() - # Compile! + # Compile run(`$cc -shared $cflags $obj_path -o $lib_path`) path, name @@ -583,7 +584,7 @@ function generate_shlib(funcs::Array, external::Bool=true, path::String=tempname lib_path = joinpath(path, "$filename.$(Libdl.dlext)") - _,obj_path = generate_obj(funcs, external, path, filename; demangle=demangle, kwargs...) + _, obj_path = generate_obj(funcs, external, path, filename; demangle=demangle, kwargs...) # Pick a Clang cc = Sys.isapple() ? `cc` : clang() # Compile! @@ -642,6 +643,71 @@ function native_llvm_module(funcs::Array; demangle = false, kwargs...) return mod end + +""" +```julia +generate_obj(f, tt, external::Bool, path::String = tempname(), filenamebase::String="obj"; + mixtape = NoContext(), + target = (), + demangle =false, + strip_llvm = false, + strip_asm = true, + opt_level=3, + kwargs...) +``` +Low level interface for compiling object code (`.o`) for for function `f` given +a tuple type `tt` characterizing the types of the arguments for which the +function will be compiled. + +`mixtape` defines a context that can be used to transform IR prior to compilation using +[Mixtape](https://github.com/JuliaCompilerPlugins/Mixtape.jl) features. + +`target` can be used to change the output target. This is useful for compiling to WebAssembly and embedded targets. +This is a named tuple with fields `triple`, `cpu`, and `features` (each of these are strings). +The defaults compile to the native target. + +### Examples +```julia +julia> fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) +fib (generic function with 1 method) + +julia> path, name, table = StaticCompiler.generate_obj_for_compile(fib, Tuple{Int64}, "./test") +("./test", "fib", IdDict{Any, String}()) + +shell> tree \$path +./test +└── obj.o + +0 directories, 1 file +``` +""" +function generate_obj(f, tt, args...; kwargs...) + generate_obj([(f, tt)], args...; kwargs...) +end + + +""" +```julia +generate_obj(funcs::Array, external::Bool, path::String = tempname(), filenamebase::String="obj"; + mixtape = NoContext(), + target = (), + demangle =false, + strip_llvm = false, + strip_asm = true, + opt_level=3, + kwargs...) +``` +Low level interface for compiling object code (`.o`) for an array of Tuples +(f, tt) where each function `f` and tuple type `tt` determine the set of methods +which will be compiled. + +`mixtape` defines a context that can be used to transform IR prior to compilation using +[Mixtape](https://github.com/JuliaCompilerPlugins/Mixtape.jl) features. + +`target` can be used to change the output target. This is useful for compiling to WebAssembly and embedded targets. +This is a named tuple with fields `triple`, `cpu`, and `features` (each of these are strings). +The defaults compile to the native target. +""" function generate_obj(funcs::Array, external::Bool, path::String = tempname(), filenamebase::String="obj"; demangle =false, strip_llvm = false, From a571cb1d473b4bfa4ba293fbfc71a8b5c2e41176 Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Sun, 14 May 2023 11:41:54 +0100 Subject: [PATCH 122/159] Demangle (#123) * Attempt to fix demangling * Fixup demangling, also general cleanup * Check names after constructing llvm module * Equivalent demangling on non-mac systems * Bump version to 0.4.12 * Test against released version of 1.9 --- .github/workflows/ci-integration.yml | 2 +- .github/workflows/ci.yml | 4 +- Project.toml | 2 +- src/StaticCompiler.jl | 164 ++++++++++++--------------- src/target.jl | 14 +-- test/testcore.jl | 37 ++++-- test/testintegration.jl | 46 ++++---- 7 files changed, 139 insertions(+), 130 deletions(-) diff --git a/.github/workflows/ci-integration.yml b/.github/workflows/ci-integration.yml index da4da2d..be2fadd 100644 --- a/.github/workflows/ci-integration.yml +++ b/.github/workflows/ci-integration.yml @@ -18,8 +18,8 @@ jobs: fail-fast: false matrix: version: - - '1.9.0-rc3' - '1.8' + - '1.9' os: - ubuntu-latest - macOS-latest diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 12dae36..eb6a3fa 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ jobs: matrix: version: - '1.8' - - '^1.9.0-rc3' + - '1.9' os: - ubuntu-latest - macOS-latest @@ -30,7 +30,7 @@ jobs: include: - arch: x86 version: '1' - os: ubuntu-latest + os: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@latest diff --git a/Project.toml b/Project.toml index 4037514..1fcd778 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short and contributors"] -version = "0.4.11" +version = "0.4.12" [deps] Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 7c9c103..96189b0 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -24,7 +24,8 @@ include("code_loading.jl") include("optimize.jl") include("quirks.jl") -fix_name(s) = string("julia_", GPUCompiler.safe_name(s)) +fix_name(f::Function) = fix_name(repr(f)) +fix_name(s) = String(GPUCompiler.safe_name(s)) """ compile(f, types, path::String = tempname()) --> (compiled_f, path) @@ -91,9 +92,9 @@ with `load_function(path)`. `LazyStaticcompiledfunction`s contain the requisite `StaticCompiledFunction` and may be called with arguments of type `types` as if it were a function with a single method (the method determined by `types`). """ -function compile(f, _tt, path::String = tempname(); - mixtape = NoContext(), - name = fix_name(repr(f)), +function compile(f, _tt, path::String = tempname(); + mixtape = NoContext(), + name = fix_name(f), filename = "obj", strip_llvm = false, strip_asm = true, @@ -117,7 +118,7 @@ end """ ```julia -generate_obj_for_compile(f, tt, path::String = tempname(), name = fix_name(repr(f)), filenamebase::String="obj"; +generate_obj_for_compile(f, tt, path::String = tempname(), name = fix_name(f), filenamebase::String="obj"; \tmixtape = NoContext(), \tstrip_llvm = false, \tstrip_asm = true, @@ -129,11 +130,11 @@ Low level interface for compiling object code (`.o`) for for function `f` given a tuple type `tt` characterizing the types of the arguments for which the function will be compiled. -`mixtape` defines a context that can be used to transform IR prior to compilation using +`mixtape` defines a context that can be used to transform IR prior to compilation using [Mixtape](https://github.com/JuliaCompilerPlugins/Mixtape.jl) features. `target` can be used to change the output target. This is useful for compiling to WebAssembly and embedded targets. -This is a named tuple with fields `triple`, `cpu`, and `features` (each of these are strings). +This is a named tuple with fields `triple`, `cpu`, and `features` (each of these are strings). The defaults compile to the native target. ### Examples @@ -151,7 +152,7 @@ shell> tree \$path 0 directories, 1 file ``` """ -function generate_obj_for_compile(f, tt, external = true, path::String = tempname(), name = fix_name(repr(f)), filenamebase::String="obj"; +function generate_obj_for_compile(f, tt, external = true, path::String = tempname(), name = fix_name(f), filenamebase::String="obj"; mixtape = NoContext(), strip_llvm = false, strip_asm = true, @@ -168,7 +169,7 @@ function generate_obj_for_compile(f, tt, external = true, path::String = tempnam mod, meta = GPUCompiler.JuliaContext() do context GPUCompiler.codegen(:llvm, job; strip=strip_llvm, only_entry=false, validate=false, optimize=false, ctx=context) - end + end # Use Enzyme's annotation and optimization pipeline annotate!(mod) @@ -263,17 +264,15 @@ shell> ./hello Hello, world! ``` """ -function compile_executable(f::Function, types=(), path::String="./", name=fix_name(repr(f)); - also_expose=[], - filename=name, - cflags=``, +function compile_executable(f::Function, types=(), path::String="./", name=fix_name(f); + also_expose=Tuple{Function, Tuple{DataType}}[], kwargs...) - compile_executable(vcat([(f, types)], also_expose), path, name; filename, cflags, kwargs...) + compile_executable(vcat([(f, types)], also_expose), path, name; kwargs...) end - -function compile_executable(funcs::Array, path::String="./", name=fix_name(repr(funcs[1][1])); +function compile_executable(funcs::Union{Array,Tuple}, path::String="./", name=fix_name(first(first(funcs))); filename=name, + demangle=false, cflags=``, kwargs... ) @@ -287,8 +286,8 @@ function compile_executable(funcs::Array, path::String="./", name=fix_name(repr( isconcretetype(rt) || error("`$f$types` did not infer to a concrete type. Got `$rt`") nativetype = isprimitivetype(rt) || isa(rt, Ptr) nativetype || @warn "Return type `$rt` of `$f$types` does not appear to be a native type. Consider returning only a single value of a native machine type (i.e., a single float, int/uint, bool, or pointer). \n\nIgnoring this warning may result in Undefined Behavior!" - - generate_executable(funcs, path, name, filename; cflags=cflags, kwargs...) + + generate_executable(funcs, path, name, filename; demangle, cflags, kwargs...) joinpath(abspath(path), filename) end @@ -327,26 +326,14 @@ julia> ccall(("julia_test", "test.dylib"), Float64, (Int64,), 100_000) 5.2564961094956075 ``` """ -function compile_shlib(f::Function, types=(), path::String="./", name=fix_name(repr(f)); +function compile_shlib(f::Function, types=(), path::String="./", name=fix_name(f); filename=name, - cflags=``, kwargs... ) - - tt = Base.to_tuple_type(types) - isconcretetype(tt) || error("input type signature `$types` is not concrete") - - rt = last(only(native_code_typed(f, tt; kwargs...))) - isconcretetype(rt) || error("`$f$types` did not infer to a concrete type. Got `$rt`") - nativetype = isprimitivetype(rt) || isa(rt, Ptr) - nativetype || @warn "Return type `$rt` of `$f$types` does not appear to be a native type. Consider returning only a single value of a native machine type (i.e., a single float, int/uint, bool, or pointer). \n\nIgnoring this warning may result in Undefined Behavior!" - - generate_shlib(f, tt, true, path, name, filename; cflags=cflags, kwargs...) - - joinpath(abspath(path), filename * "." * Libdl.dlext) + compile_shlib(((f, types),), path; filename, kwargs...) end # As above, but taking an array of functions and returning a single shlib -function compile_shlib(funcs::Array, path::String="./"; +function compile_shlib(funcs::Union{Array,Tuple}, path::String="./"; filename="libfoo", demangle=false, cflags=``, @@ -363,7 +350,7 @@ function compile_shlib(funcs::Array, path::String="./"; nativetype || @warn "Return type `$rt` of `$f$types` does not appear to be a native type. Consider returning only a single value of a native machine type (i.e., a single float, int/uint, bool, or pointer). \n\nIgnoring this warning may result in Undefined Behavior!" end - generate_shlib(funcs, true, path, filename; demangle=demangle, cflags=cflags, kwargs...) + generate_shlib(funcs, true, path, filename; demangle, cflags, kwargs...) joinpath(abspath(path), filename * "." * Libdl.dlext) end @@ -371,7 +358,7 @@ end """ ```julia compile_wasm(f::Function, types::Tuple, [path::String="./"], [name::String=repr(f)]; filename::String=name, flags=``, kwargs...) -compile_wasm(funcs::Array, [path::String="./"]; filename="libfoo", demangle=false, flags=``, kwargs...) +compile_wasm(funcs::Union{Array,Tuple}, [path::String="./"]; filename="libfoo", demangle=false, flags=``, kwargs...) ``` As `compile_shlib`, but compiling to a WebAssembly library. @@ -381,9 +368,9 @@ The keword argument `demangle=true` will remove this prefix, but is currently on supported the second (multi-function-shlib) method. ``` """ -function compile_wasm(f::Function, types=(); +function compile_wasm(f::Function, types=(); path::String="./", - filename=fix_name(repr(f)), + filename=fix_name(f), flags=``, kwargs... ) @@ -392,7 +379,7 @@ function compile_wasm(f::Function, types=(); run(`$(lld()) -flavor wasm --no-entry --export-all $flags $obj_path/obj.o -o $path/$name.wasm`) joinpath(abspath(path), filename * ".wasm") end -function compile_wasm(funcs::Array; +function compile_wasm(funcs::Union{Array,Tuple}; path::String="./", filename="libfoo", flags=``, @@ -447,7 +434,7 @@ function generate_shlib_fptr(path::String, name, filename::String=name) fptr end # As above, but also compile (maybe remove this method in the future?) -function generate_shlib_fptr(f, tt, path::String=tempname(), name = fix_name(repr(f)), filename::String=name; +function generate_shlib_fptr(f, tt, path::String=tempname(), name=fix_name(f), filename::String=name; temp::Bool=true, kwargs...) @@ -482,11 +469,8 @@ shell> ./hello Hello, world! ``` """ -function generate_executable(f, tt, args...; kwargs...) - generate_executable([(f, tt)], args...; kwargs...) -end - -function generate_executable(funcs::Array, path=tempname(), name=fix_name(repr(funcs[1][1])), filename=string(name); +generate_executable(f, tt, args...; kwargs...) = generate_executable(((f, tt),), args...; kwargs...) +function generate_executable(funcs::Union{Array,Tuple}, path=tempname(), name=fix_name(first(first(funcs))), filename=name; demangle=false, cflags=``, kwargs... @@ -494,24 +478,25 @@ function generate_executable(funcs::Array, path=tempname(), name=fix_name(repr(f lib_path = joinpath(path, "$filename.$(Libdl.dlext)") exec_path = joinpath(path, filename) external = true - _, obj_path = generate_obj(funcs, external, path, filename; demangle=demangle, kwargs...) + _, obj_path = generate_obj(funcs, external, path, filename; demangle, kwargs...) # Pick a compiler cc = Sys.isapple() ? `cc` : clang() # Compile! if Sys.isapple() # Apple no longer uses _start, so we can just specify a custom entry - entry = "_$name" + entry = demangle ? "_$name" : "_julia_$name" run(`$cc -e $entry $cflags $obj_path -o $exec_path`) else + fn = demangle ? "$name" : "julia_$name" # Write a minimal wrapper to avoid having to specify a custom entry wrapper_path = joinpath(path, "wrapper.c") f = open(wrapper_path, "w") - print(f, """int $name(int argc, char** argv); + print(f, """int $fn(int argc, char** argv); void* __stack_chk_guard = (void*) $(rand(UInt) >> 1); int main(int argc, char** argv) { - $name(argc, argv); + $fn(argc, argv); return 0; }""") close(f) @@ -561,22 +546,11 @@ julia> ccall(("julia_test", "example/test.dylib"), Float64, (Int64,), 100_000) 5.2564961094956075 ``` """ -function generate_shlib(f::Function, tt, external::Bool=true, path::String=tempname(), name=fix_name(repr(f)), filename=name; - cflags=``, demangle=false, - kwargs... - ) - lib_path = joinpath(path, "$filename.$(Libdl.dlext)") - _, obj_path = generate_obj([(f, tt)], external, path, filename; demangle=demangle, kwargs...) - - # Pick a Clang - cc = Sys.isapple() ? `cc` : clang() - # Compile - run(`$cc -shared $cflags $obj_path -o $lib_path`) - - path, name +function generate_shlib(f::Function, tt, external::Bool=true, path::String=tempname(), name=fix_name(f), filename=name; kwargs...) + generate_shlib(((f, tt),), external, path, filename; kwargs...) end # As above, but taking an array of functions and returning a single shlib -function generate_shlib(funcs::Array, external::Bool=true, path::String=tempname(), filename::String="libfoo"; +function generate_shlib(funcs::Union{Array,Tuple}, external::Bool=true, path::String=tempname(), filename::String="libfoo"; demangle=false, cflags=``, kwargs... @@ -584,7 +558,7 @@ function generate_shlib(funcs::Array, external::Bool=true, path::String=tempname lib_path = joinpath(path, "$filename.$(Libdl.dlext)") - _, obj_path = generate_obj(funcs, external, path, filename; demangle=demangle, kwargs...) + _, obj_path = generate_obj(funcs, external, path, filename; demangle, kwargs...) # Pick a Clang cc = Sys.isapple() ? `cc` : clang() # Compile! @@ -603,8 +577,16 @@ function native_code_typed(@nospecialize(func), @nospecialize(types); kwargs...) GPUCompiler.code_typed(job; kwargs...) end +function native_code_native(@nospecialize(f), @nospecialize(tt), fname=fix_name(f); kwargs...) + job, kwargs = native_job(f, tt, true; fname, kwargs...) + GPUCompiler.code_native(stdout, job; kwargs...) +end + # Return an LLVM module -function native_llvm_module(f, tt, name = fix_name(repr(f)); kwargs...) +function native_llvm_module(f, tt, name=fix_name(f); demangle, kwargs...) + if !demangle + name = "julia_"*name + end job, kwargs = native_job(f, tt, true; name, kwargs...) m, _ = GPUCompiler.JuliaContext() do context GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false, ctx=context) @@ -612,28 +594,24 @@ function native_llvm_module(f, tt, name = fix_name(repr(f)); kwargs...) return m end -function native_code_native(@nospecialize(f), @nospecialize(tt), name = fix_name(repr(f)); kwargs...) - job, kwargs = native_job(f, tt, true; name, kwargs...) - GPUCompiler.code_native(stdout, job; kwargs...) -end - #Return an LLVM module for multiple functions -function native_llvm_module(funcs::Array; demangle = false, kwargs...) +function native_llvm_module(funcs::Union{Array,Tuple}; demangle=false, kwargs...) f,tt = funcs[1] - mod = native_llvm_module(f,tt; kwargs...) + mod = native_llvm_module(f,tt; demangle, kwargs...) if length(funcs) > 1 for func in funcs[2:end] f,tt = func - tmod = native_llvm_module(f,tt; kwargs...) + tmod = native_llvm_module(f,tt; demangle, kwargs...) link!(mod,tmod) end end - if demangle - for func in functions(mod) - fname = name(func) - if fname[1:6] == "julia_" - name!(func,fname[7:end]) - end + # Just to be sure + for (modfunc, func) in zip(functions(mod), funcs) + fname = name(modfunc) + expectedname = (demangle ? "" : "julia_") * fix_name(func) + d = prefixlen(fname) - prefixlen(expectedname) + 1 + if d > 1 + name!(modfunc,fname[d:end]) end end LLVM.ModulePassManager() do pass_manager #remove duplicate functions @@ -643,6 +621,14 @@ function native_llvm_module(funcs::Array; demangle = false, kwargs...) return mod end +function prefixlen(s) + m = match(r"^(?:julia_)+", s) + if m isa RegexMatch + length(m.match) + else + 0 + end +end """ ```julia @@ -659,11 +645,11 @@ Low level interface for compiling object code (`.o`) for for function `f` given a tuple type `tt` characterizing the types of the arguments for which the function will be compiled. -`mixtape` defines a context that can be used to transform IR prior to compilation using +`mixtape` defines a context that can be used to transform IR prior to compilation using [Mixtape](https://github.com/JuliaCompilerPlugins/Mixtape.jl) features. `target` can be used to change the output target. This is useful for compiling to WebAssembly and embedded targets. -This is a named tuple with fields `triple`, `cpu`, and `features` (each of these are strings). +This is a named tuple with fields `triple`, `cpu`, and `features` (each of these are strings). The defaults compile to the native target. ### Examples @@ -682,13 +668,13 @@ shell> tree \$path ``` """ function generate_obj(f, tt, args...; kwargs...) - generate_obj([(f, tt)], args...; kwargs...) + generate_obj(((f, tt),), args...; kwargs...) end """ ```julia -generate_obj(funcs::Array, external::Bool, path::String = tempname(), filenamebase::String="obj"; +generate_obj(funcs::Union{Array,Tuple}, external::Bool, path::String = tempname(), filenamebase::String="obj"; mixtape = NoContext(), target = (), demangle =false, @@ -701,24 +687,24 @@ Low level interface for compiling object code (`.o`) for an array of Tuples (f, tt) where each function `f` and tuple type `tt` determine the set of methods which will be compiled. -`mixtape` defines a context that can be used to transform IR prior to compilation using +`mixtape` defines a context that can be used to transform IR prior to compilation using [Mixtape](https://github.com/JuliaCompilerPlugins/Mixtape.jl) features. `target` can be used to change the output target. This is useful for compiling to WebAssembly and embedded targets. -This is a named tuple with fields `triple`, `cpu`, and `features` (each of these are strings). +This is a named tuple with fields `triple`, `cpu`, and `features` (each of these are strings). The defaults compile to the native target. """ -function generate_obj(funcs::Array, external::Bool, path::String = tempname(), filenamebase::String="obj"; - demangle =false, +function generate_obj(funcs::Union{Array,Tuple}, external::Bool, path::String = tempname(), filenamebase::String="obj"; + demangle = false, strip_llvm = false, - strip_asm = true, - opt_level=3, + strip_asm = true, + opt_level = 3, kwargs...) f, tt = funcs[1] mkpath(path) obj_path = joinpath(path, "$filenamebase.o") fakejob, kwargs = native_job(f, tt, external; kwargs...) - mod = native_llvm_module(funcs; demangle = demangle, kwargs...) + mod = native_llvm_module(funcs; demangle, kwargs...) obj, _ = GPUCompiler.emit_asm(fakejob, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) open(obj_path, "w") do io write(io, obj) diff --git a/src/target.jl b/src/target.jl index 8ea16ca..d9647b4 100644 --- a/src/target.jl +++ b/src/target.jl @@ -82,10 +82,10 @@ for target in (:NativeCompilerTarget, :ExternalNativeCompilerTarget) GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{$target}) = true GPUCompiler.get_interpreter(job::GPUCompiler.CompilerJob{$target, StaticCompilerParams}) = - StaticInterpreter(job.config.params.cache, GPUCompiler.method_table(job), job.world, - GPUCompiler.inference_params(job), GPUCompiler.optimization_params(job), - job.config.params.mixtape) - GPUCompiler.ci_cache(job::GPUCompiler.CompilerJob{$target, StaticCompilerParams}) = job.config.params.cache + StaticInterpreter(job.config.params.cache, GPUCompiler.method_table(job), job.world, + GPUCompiler.inference_params(job), GPUCompiler.optimization_params(job), + job.config.params.mixtape) + GPUCompiler.ci_cache(job::GPUCompiler.CompilerJob{$target, StaticCompilerParams}) = job.config.params.cache end end @@ -94,12 +94,12 @@ GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{ExternalNati function native_job(@nospecialize(func::Function), @nospecialize(types::Type), external::Bool; mixtape = NoContext(), - name = fix_name(repr(func)), + name = fix_name(func), kernel::Bool = false, target = (), kwargs... ) - source = methodinstance(typeof(func), Base.to_tuple_type(types)) + source = methodinstance(typeof(func), Base.to_tuple_type(types)) target = external ? ExternalNativeCompilerTarget(;target...) : NativeCompilerTarget(;target...) params = StaticCompilerParams(mixtape = mixtape) config = GPUCompiler.CompilerConfig(target, params, name = name, kernel = kernel) @@ -107,7 +107,7 @@ function native_job(@nospecialize(func::Function), @nospecialize(types::Type), e end function native_job(@nospecialize(func), @nospecialize(types), external; mixtape = NoContext(), kernel::Bool=false, name=fix_name(repr(func)), target = (), kwargs...) - source = methodinstance(typeof(func), Base.to_tuple_type(types)) + source = methodinstance(typeof(func), Base.to_tuple_type(types)) target = external ? ExternalNativeCompilerTarget(;target...) : NativeCompilerTarget(;target...) params = StaticCompilerParams(mixtape = mixtape) config = GPUCompiler.CompilerConfig(target, params, name = name, kernel = kernel) diff --git a/test/testcore.jl b/test/testcore.jl index 4c0bc5f..823729c 100644 --- a/test/testcore.jl +++ b/test/testcore.jl @@ -1,3 +1,6 @@ +workdir = tempdir() +# workdir = "./" # For debugging + remote_load_call(path, args...) = fetch(@spawnat 2 load_function(path)(args...)) @testset "Basics" begin @@ -232,15 +235,23 @@ end # fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) #Compile dylib - name = "julia_" * repr(fib) - filepath = compile_shlib(fib, (Int,), "./", name) + name = repr(fib) + filepath = compile_shlib(fib, (Int,), workdir, name, demangle=true) @test occursin("fib.$(Libdl.dlext)", filepath) - - # Open dylib + # Open dylib manually ptr = Libdl.dlopen(filepath, Libdl.RTLD_LOCAL) fptr = Libdl.dlsym(ptr, name) @test fptr != C_NULL @test ccall(fptr, Int, (Int,), 10) == 55 + Libdl.dlclose(ptr) + + # As above, but without demangling + filepath = compile_shlib(fib, (Int,), workdir, name, demangle=false) + ptr = Libdl.dlopen(filepath, Libdl.RTLD_LOCAL) + fptr = Libdl.dlsym(ptr, "julia_"*name) + @test fptr != C_NULL + @test ccall(fptr, Int, (Int,), 10) == 55 + Libdl.dlclose(ptr) end @testset "Standalone Executables" begin @@ -254,8 +265,12 @@ end return 0 end - filepath = compile_executable(foo, (), tempdir()) + filepath = compile_executable(foo, (), workdir, demangle=false) + r = run(`$filepath`); + @test isa(r, Base.Process) + @test r.exitcode == 0 + filepath = compile_executable(foo, (), workdir, demangle=true) r = run(`$filepath`); @test isa(r, Base.Process) @test r.exitcode == 0 @@ -285,15 +300,20 @@ end return 0 end - filepath = compile_executable(print_args, (Int, Ptr{Ptr{UInt8}}), tempdir()) + filepath = compile_executable(print_args, (Int, Ptr{Ptr{UInt8}}), workdir, demangle=false) + r = run(`$filepath Hello, world!`); + @test isa(r, Base.Process) + @test r.exitcode == 0 + filepath = compile_executable(print_args, (Int, Ptr{Ptr{UInt8}}), workdir, demangle=true) r = run(`$filepath Hello, world!`); @test isa(r, Base.Process) @test r.exitcode == 0 + # Compile a function that definitely fails @inline foo_err() = UInt64(-1) - filepath = compile_executable(foo_err, (), tempdir()) + filepath = compile_executable(foo_err, (), workdir, demangle=true) @test isfile(filepath) status = -1 try @@ -317,9 +337,8 @@ end @testset "Multiple Function Dylibs" begin - funcs = [(squaresquare,(Float64,)), (squaresquaresquare,(Float64,))] - filepath = compile_shlib(funcs, demangle=true) + filepath = compile_shlib(funcs, workdir, demangle=true) ptr = Libdl.dlopen(filepath, Libdl.RTLD_LOCAL) diff --git a/test/testintegration.jl b/test/testintegration.jl index 0dacb82..531d1f9 100644 --- a/test/testintegration.jl +++ b/test/testintegration.jl @@ -1,9 +1,10 @@ +# Setup +testpath = pwd() +scratch = tempdir() +cd(scratch) @testset "Standalone Executable Integration" begin - # Setup - testpath = pwd() - scratch = tempdir() - cd(scratch) + jlpath = joinpath(Sys.BINDIR, Base.julia_exename()) # Get path to julia executable ## --- Times table, file IO, mallocarray @@ -14,7 +15,7 @@ # faster. status = -1 try - isfile("julia_times_table") && rm("julia_times_table") + isfile("times_table") && rm("times_table") status = run(`$jlpath --startup=no --compile=min $testpath/scripts/times_table.jl`) catch e @warn "Could not compile $testpath/scripts/times_table.jl" @@ -27,7 +28,7 @@ println("5x5 times table:") status = -1 try - status = run(`./julia_times_table 5 5`) + status = run(`./times_table 5 5`) catch e @warn "Could not run $(scratch)/times_table" println(e) @@ -35,7 +36,7 @@ @test isa(status, Base.Process) @test isa(status, Base.Process) && status.exitcode == 0 # Test ascii output - @test parsedlm(Int, c"table.tsv", '\t') == (1:5)*(1:5)' + @test parsedlm(Int, c"table.tsv", '\t') == (1:5)*(1:5)' broken=Sys.ARCH===:aarch64 # Test binary output @test fread!(szeros(Int, 5,5), c"table.b") == (1:5)*(1:5)' end @@ -58,7 +59,7 @@ println("3x3 malloc arrays via do-block syntax:") status = -1 try - status = run(`./julia_withmallocarray 3 3`) + status = run(`./withmallocarray 3 3`) catch e @warn "Could not run $(scratch)/withmallocarray" println(e) @@ -85,7 +86,7 @@ println("5x5 uniform random matrix:") status = -1 try - status = run(`./julia_rand_matrix 5 5`) + status = run(`./rand_matrix 5 5`) catch e @warn "Could not run $(scratch)/rand_matrix" println(e) @@ -113,7 +114,7 @@ println("5x5 Normal random matrix:") status = -1 try - status = run(`./julia_randn_matrix 5 5`) + status = run(`./randn_matrix 5 5`) catch e @warn "Could not run $(scratch)/randn_matrix" println(e) @@ -143,7 +144,7 @@ println("10x10 table sum:") status = -1 try - status = run(`./julia_loopvec_product 10 10`) + status = run(`./loopvec_product 10 10`) catch e @warn "Could not run $(scratch)/loopvec_product" println(e) @@ -171,7 +172,7 @@ println("10x5 matrix product:") status = -1 try - status = run(`./julia_loopvec_matrix 10 5`) + status = run(`./loopvec_matrix 10 5`) catch e @warn "Could not run $(scratch)/loopvec_matrix" println(e) @@ -180,7 +181,7 @@ @test isa(status, Base.Process) && status.exitcode == 0 A = (1:10) * (1:5)' # Check ascii output - @test parsedlm(c"table.tsv",'\t') == A' * A + @test parsedlm(c"table.tsv",'\t') == A' * A broken=Sys.ARCH===:aarch64 # Check binary output @test fread!(szeros(5,5), c"table.b") == A' * A end @@ -202,7 +203,7 @@ println("10x5 matrix product:") status = -1 try - status = run(`./julia_loopvec_matrix_stack`) + status = run(`./loopvec_matrix_stack`) catch e @warn "Could not run $(scratch)/loopvec_matrix_stack" println(e) @@ -210,7 +211,7 @@ @test isa(status, Base.Process) @test isa(status, Base.Process) && status.exitcode == 0 A = (1:10) * (1:5)' - @test parsedlm(c"table.tsv",'\t') == A' * A + @test parsedlm(c"table.tsv",'\t') == A' * A broken=Sys.ARCH===:aarch64 end @@ -233,7 +234,7 @@ println("String indexing and handling:") status = -1 try - status = run(`./julia_print_args foo bar`) + status = run(`./print_args foo bar`) catch e @warn "Could not run $(scratch)/print_args" println(e) @@ -261,7 +262,7 @@ println("Error handling:") status = -1 try - status = run(`./julia_maybe_throw 10`) + status = run(`./maybe_throw 10`) catch e @warn "Could not run $(scratch)/maybe_throw" println(e) @@ -297,7 +298,7 @@ println("Interop:") status = -1 try - status = run(`./julia_interop`) + status = run(`./interop`) catch e @warn "Could not run $(scratch)/interop" println(e) @@ -307,9 +308,6 @@ end end - ## --- Clean up - - cd(testpath) end # Mixtape @@ -384,6 +382,9 @@ struct MyMix <: CompilationContext end end @testset "Cross compiling to WebAssembly" begin + testpath = pwd() + scratch = tempdir() + cd(scratch) m2(x) = 2x m3(x) = 3x @@ -395,3 +396,6 @@ end end +## --- Clean up + +cd(testpath) From 229863a8c754011d4c20ddf778f1f6a4203fba6e Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Sun, 14 May 2023 21:48:06 +0100 Subject: [PATCH 123/159] Set `demangle=true` by default --- src/StaticCompiler.jl | 64 +++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 33 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 96189b0..d0ee3da 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -121,9 +121,9 @@ end generate_obj_for_compile(f, tt, path::String = tempname(), name = fix_name(f), filenamebase::String="obj"; \tmixtape = NoContext(), \tstrip_llvm = false, - \tstrip_asm = true, + \tstrip_asm = true, \ttarget = (), - \topt_level=3, + \topt_level = 3, \tkwargs...) ``` Low level interface for compiling object code (`.o`) for for function `f` given @@ -155,7 +155,7 @@ shell> tree \$path function generate_obj_for_compile(f, tt, external = true, path::String = tempname(), name = fix_name(f), filenamebase::String="obj"; mixtape = NoContext(), strip_llvm = false, - strip_asm = true, + strip_asm = true, opt_level = 3, remove_julia_addrspaces = false, target = (), @@ -271,9 +271,9 @@ function compile_executable(f::Function, types=(), path::String="./", name=fix_n end function compile_executable(funcs::Union{Array,Tuple}, path::String="./", name=fix_name(first(first(funcs))); - filename=name, - demangle=false, - cflags=``, + filename = name, + demangle = true, + cflags = ``, kwargs... ) @@ -294,14 +294,11 @@ end """ ```julia compile_shlib(f::Function, types::Tuple, [path::String="./"], [name::String=repr(f)]; filename::String=name, cflags=``, kwargs...) -compile_shlib(funcs::Array, [path::String="./"]; filename="libfoo", demangle=false, cflags=``, kwargs...) +compile_shlib(funcs::Array, [path::String="./"]; filename="libfoo", demangle=true, cflags=``, kwargs...) ``` As `compile_executable`, but compiling to a standalone `.dylib`/`.so` shared library. -The compiled function is by default given the symbol name `julia_$(name)`, i.e., -the function `test` in the example below is called `julia_test` in the shared library. -The keword argument `demangle=true` will remove this prefix, but is currently only -supported the second (multi-function-shlib) method. +If `demangle` is set to `false`, compiled function names are prepended with "julia_". ### Examples ```julia @@ -322,7 +319,7 @@ julia> compile_shlib(test, (Int,)) julia> test(100_000) 5.2564961094956075 -julia> ccall(("julia_test", "test.dylib"), Float64, (Int64,), 100_000) +julia> ccall(("test", "test.dylib"), Float64, (Int64,), 100_000) 5.2564961094956075 ``` """ @@ -334,9 +331,9 @@ function compile_shlib(f::Function, types=(), path::String="./", name=fix_name(f end # As above, but taking an array of functions and returning a single shlib function compile_shlib(funcs::Union{Array,Tuple}, path::String="./"; - filename="libfoo", - demangle=false, - cflags=``, + filename = "libfoo", + demangle = true, + cflags = ``, kwargs... ) for func in funcs @@ -358,20 +355,17 @@ end """ ```julia compile_wasm(f::Function, types::Tuple, [path::String="./"], [name::String=repr(f)]; filename::String=name, flags=``, kwargs...) -compile_wasm(funcs::Union{Array,Tuple}, [path::String="./"]; filename="libfoo", demangle=false, flags=``, kwargs...) +compile_wasm(funcs::Union{Array,Tuple}, [path::String="./"]; filename="libfoo", demangle=true, flags=``, kwargs...) ``` As `compile_shlib`, but compiling to a WebAssembly library. -The compiled function is by default given the symbol name `julia_$(name)`, i.e., -the function `test` in the example below is called `julia_test` in the shared library. -The keword argument `demangle=true` will remove this prefix, but is currently only -supported the second (multi-function-shlib) method. +If `demangle` is set to `false`, compiled function names are prepended with "julia_". ``` """ function compile_wasm(f::Function, types=(); - path::String="./", - filename=fix_name(f), - flags=``, + path::String = "./", + filename = fix_name(f), + flags = ``, kwargs... ) tt = Base.to_tuple_type(types) @@ -471,8 +465,8 @@ Hello, world! """ generate_executable(f, tt, args...; kwargs...) = generate_executable(((f, tt),), args...; kwargs...) function generate_executable(funcs::Union{Array,Tuple}, path=tempname(), name=fix_name(first(first(funcs))), filename=name; - demangle=false, - cflags=``, + demangle = true, + cflags = ``, kwargs... ) lib_path = joinpath(path, "$filename.$(Libdl.dlext)") @@ -511,12 +505,14 @@ end """ ```julia generate_shlib(f::Function, tt, [external::Bool=true], [path::String], [name], [filename]; kwargs...) -generate_shlib(funcs::Array, [external::Bool=true], [path::String], [filename::String]; demangle=false, kwargs...) +generate_shlib(funcs::Array, [external::Bool=true], [path::String], [filename::String]; demangle=true, kwargs...) ``` Low level interface for compiling a shared object / dynamically loaded library (`.so` / `.dylib`) for function `f` given a tuple type `tt` characterizing the types of the arguments for which the function will be compiled. +If `demangle` is set to `false`, compiled function names are prepended with "julia_". + ### Examples ```julia julia> using StaticCompiler, LoopVectorization @@ -542,7 +538,7 @@ shell> tree \$path julia> test(100_000) 5.2564961094956075 -julia> ccall(("julia_test", "example/test.dylib"), Float64, (Int64,), 100_000) +julia> ccall(("test", "example/test.dylib"), Float64, (Int64,), 100_000) 5.2564961094956075 ``` """ @@ -551,8 +547,8 @@ function generate_shlib(f::Function, tt, external::Bool=true, path::String=tempn end # As above, but taking an array of functions and returning a single shlib function generate_shlib(funcs::Union{Array,Tuple}, external::Bool=true, path::String=tempname(), filename::String="libfoo"; - demangle=false, - cflags=``, + demangle = true, + cflags = ``, kwargs... ) @@ -595,7 +591,7 @@ function native_llvm_module(f, tt, name=fix_name(f); demangle, kwargs...) end #Return an LLVM module for multiple functions -function native_llvm_module(funcs::Union{Array,Tuple}; demangle=false, kwargs...) +function native_llvm_module(funcs::Union{Array,Tuple}; demangle=true, kwargs...) f,tt = funcs[1] mod = native_llvm_module(f,tt; demangle, kwargs...) if length(funcs) > 1 @@ -635,10 +631,10 @@ end generate_obj(f, tt, external::Bool, path::String = tempname(), filenamebase::String="obj"; mixtape = NoContext(), target = (), - demangle =false, + demangle = true, strip_llvm = false, strip_asm = true, - opt_level=3, + opt_level = 3, kwargs...) ``` Low level interface for compiling object code (`.o`) for for function `f` given @@ -652,6 +648,8 @@ function will be compiled. This is a named tuple with fields `triple`, `cpu`, and `features` (each of these are strings). The defaults compile to the native target. +If `demangle` is set to `false`, compiled function names are prepended with "julia_". + ### Examples ```julia julia> fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) @@ -695,7 +693,7 @@ This is a named tuple with fields `triple`, `cpu`, and `features` (each of these The defaults compile to the native target. """ function generate_obj(funcs::Union{Array,Tuple}, external::Bool, path::String = tempname(), filenamebase::String="obj"; - demangle = false, + demangle = true, strip_llvm = false, strip_asm = true, opt_level = 3, From 84b9761f69534d238b990a48d0a5721e1e5c34c4 Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Sun, 14 May 2023 21:57:34 +0100 Subject: [PATCH 124/159] Bump version to 0.5.0 --- Project.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 1fcd778..b8c49f1 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short and contributors"] -version = "0.4.12" +version = "0.5.0" [deps] Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" @@ -21,7 +21,7 @@ GPUCompiler = "0.19" LLVM = "5" MacroTools = "0.5" StaticTools = "0.8" -julia = "1.8, 1.9" +julia = "1.8" [extras] Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0" From 7600296e286b8854ed521f28ea96ee401c98b1b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1ll=20Haraldsson?= Date: Fri, 19 May 2023 16:33:00 +0000 Subject: [PATCH 125/159] Update README.md, syncing docs. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bcb1cee..d54fc57 100644 --- a/README.md +++ b/README.md @@ -85,4 +85,4 @@ This package uses the [GPUCompiler package](https://github.com/JuliaGPU/GPUCompi * GC-tracked allocations and global variables do *not* work with `compile_executable` or `compile_shlib`. This has some interesting consequences, including that all functions _within_ the function you want to compile must either be inlined or return only native types (otherwise Julia would have to allocate a place to put the results, which will fail). * Since error handling relies on libjulia, you can only throw errors from standalone-compiled (`compile_executable` / `compile_shlib`) code if an explicit overload has been defined for that particular error with `@device_override` (see [quirks.jl](src/quirks.jl)). * Type instability. Type unstable code cannot currently be statically compiled via this package. -* Doesn't work on Windows. PRs welcome. +* Doesn't work on Windows (but works in WSL on Windows 10+). PRs welcome. From ef5605018480fdd8e455ce9dcb6a14bb0495c703 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 25 May 2023 11:54:36 -0600 Subject: [PATCH 126/159] CompatHelper: bump compat for GPUCompiler to 0.20, (keep existing compat) (#130) * CompatHelper: bump compat for GPUCompiler to 0.20, (keep existing compat) * whitespace change to trigger CI --------- Co-authored-by: CompatHelper Julia Co-authored-by: Mason Protter --- Project.toml | 2 +- src/StaticCompiler.jl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index b8c49f1..9237d68 100644 --- a/Project.toml +++ b/Project.toml @@ -17,7 +17,7 @@ StaticTools = "86c06d3c-3f03-46de-9781-57580aa96d0a" [compat] CodeInfoTools = "0.3" -GPUCompiler = "0.19" +GPUCompiler = "0.19, 0.20" LLVM = "5" MacroTools = "0.5" StaticTools = "0.8" diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index d0ee3da..81049bc 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -12,6 +12,7 @@ using LLD_jll: lld using StaticTools using StaticTools: @symbolcall, @c_str, println + export compile, load_function, compile_shlib, compile_executable, compile_wasm export native_code_llvm, native_code_typed, native_llvm_module, native_code_native export @device_override, @print_and_throw From 4b59c713d219df9f009afcf47c893887550eae62 Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Thu, 25 May 2023 11:55:24 -0600 Subject: [PATCH 127/159] bump version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 9237d68..17590d4 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short and contributors"] -version = "0.5.0" +version = "0.5.1" [deps] Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" From 12f89c7725d8bc3cc906e03fcbe47297c32218e3 Mon Sep 17 00:00:00 2001 From: Tom Short Date: Sat, 24 Jun 2023 13:08:14 -0400 Subject: [PATCH 128/159] Convert repr to string(nameof( )) (#133) --- src/StaticCompiler.jl | 8 ++++---- src/pointer_patching.jl | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 81049bc..d4e7148 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -25,7 +25,7 @@ include("code_loading.jl") include("optimize.jl") include("quirks.jl") -fix_name(f::Function) = fix_name(repr(f)) +fix_name(f::Function) = fix_name(string(nameof(f))) fix_name(s) = String(GPUCompiler.safe_name(s)) """ @@ -199,7 +199,7 @@ end """ ```julia -compile_executable(f::Function, types::Tuple, path::String, [name::String=repr(f)]; +compile_executable(f::Function, types::Tuple, path::String, [name::String=string(nameof(f))]; filename::String=name, cflags=``, # Specify libraries you would like to link against, and other compiler options here also_expose=[], @@ -294,7 +294,7 @@ end """ ```julia -compile_shlib(f::Function, types::Tuple, [path::String="./"], [name::String=repr(f)]; filename::String=name, cflags=``, kwargs...) +compile_shlib(f::Function, types::Tuple, [path::String="./"], [name::String=string(nameof(f))]; filename::String=name, cflags=``, kwargs...) compile_shlib(funcs::Array, [path::String="./"]; filename="libfoo", demangle=true, cflags=``, kwargs...) ``` As `compile_executable`, but compiling to a standalone `.dylib`/`.so` shared library. @@ -355,7 +355,7 @@ end """ ```julia -compile_wasm(f::Function, types::Tuple, [path::String="./"], [name::String=repr(f)]; filename::String=name, flags=``, kwargs...) +compile_wasm(f::Function, types::Tuple, [path::String="./"], [name::String=string(nameof(f))]; filename::String=name, flags=``, kwargs...) compile_wasm(funcs::Union{Array,Tuple}, [path::String="./"]; filename="libfoo", demangle=true, flags=``, kwargs...) ``` As `compile_shlib`, but compiling to a WebAssembly library. diff --git a/src/pointer_patching.jl b/src/pointer_patching.jl index c361c3d..783a74a 100644 --- a/src/pointer_patching.jl +++ b/src/pointer_patching.jl @@ -157,7 +157,7 @@ llvmeltype(x::LLVM.Value) = eltype(LLVM.value_type(x)) function pointer_patching_diff(f, tt, path1=tempname(), path2=tempname(); show_reloc_table=false) tm = GPUCompiler.llvm_machine(NativeCompilerTarget()) - job, kwargs = native_job(f, tt, false; name=fix_name(repr(f))) + job, kwargs = native_job(f, tt, false; name=fix_name(string(nameof(f)))) #Get LLVM to generated a module of code for us. We don't want GPUCompiler's optimization passes. mod, meta = GPUCompiler.JuliaContext() do context GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false, optimize=false, ctx=context) From 8420b4ffb2d3e3c11977478404ee60e7a646efc5 Mon Sep 17 00:00:00 2001 From: Tom Short Date: Sat, 24 Jun 2023 13:14:41 -0400 Subject: [PATCH 129/159] Add some guidance (#118) --- README.md | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/README.md b/README.md index d54fc57..4b0779b 100644 --- a/README.md +++ b/README.md @@ -86,3 +86,40 @@ This package uses the [GPUCompiler package](https://github.com/JuliaGPU/GPUCompi * Since error handling relies on libjulia, you can only throw errors from standalone-compiled (`compile_executable` / `compile_shlib`) code if an explicit overload has been defined for that particular error with `@device_override` (see [quirks.jl](src/quirks.jl)). * Type instability. Type unstable code cannot currently be statically compiled via this package. * Doesn't work on Windows (but works in WSL on Windows 10+). PRs welcome. + +## Guide for Package Authors + +To enable code to be statically compiled, consider the following: + +* Use type-stable code. + +* Use Tuples, NamedTuples, StaticArrays, and other types where appropriate. These allocate on the stack and don't use Julia's heap allocation. + +* Avoid Julia's internal allocations. That means don't bake in use of Arrays or Strings or Dicts. Types from StaticTools can help, like StaticStrings and MallocArrays. + +* If need be, manage memory manually, using `malloc` and `free`. This works with StaticTools.MallocString and StaticTools.MallocArray. + +* Don't use global variables that need to be allocated and initialized. Instead of global variables, use context structures that have an initialization function. It is okay to use global Tuples or NamedTuples as the use of these should be baked into compiled code. + +* Use context variables to store program state, inputs, and outputs. Parameterize these typese as needed, so your code can handle normal types (Arrays) and static-friendly types (StaticArrays, MallocArrays, or StrideArrays). The SciML ecosystem does this well ([example](https://github.com/SciML/OrdinaryDiffEq.jl/blob/e7f045950615352ddfcb126d13d92afd2bad05e4/src/integrators/type.jl#L82)). Use of these context variables also enables allocations and initialization to be centralized, so these could be managed by the calling routines in Julia, Python, JavaScript, or other language. + +* If your code needs an array as a workspace, instead of directly creating it, create it as a function argument (where it could default to a standard array creation). That code could be statically compiled if that function argument is changed to a MallocArray or another static-friendly alternative. + +* Use [Bumper.jl](https://github.com/MasonProtter/Bumper.jl) to avoid allocations in some loops. + +## Guide for Statically Compiling Code + +If you're trying to statically compile generic code, you may run into issues if that code uses features not supported by StaticCompiler. One option is to change the code you're calling using the tips above. If that is not easy, you may by able to compile it anyway. One option is to use method overrides to change what methods are called. Another option is to use the Mixtape feature to change problematic code as part of compilation. For example, you could convert all Strings to StaticStrings. + +[Cthulhu](https://github.com/JuliaDebug/Cthulhu.jl) is a great help in digging into code, finding type instabilities, and finding other sources of code that may break static compilation. + +## Foreign Function Interfacing + +Because Julia objects follow C memory layouts, compiled libraries should be usable from most languages that can interface with C. For example, results should be usable with Python's [CFFI](https://cffi.readthedocs.io/en/latest/) package. + +For WebAssembly, interface helpers are available at [WebAssemblyInterfaces](https://github.com/tshort/WebAssemblyInterfaces.jl). + + + + + From 704933ae623ced5af2c59725e11da8f29a7fe8f2 Mon Sep 17 00:00:00 2001 From: Tom Short Date: Sun, 25 Jun 2023 14:40:37 -0400 Subject: [PATCH 130/159] WebAssembly updates (#135) --- src/StaticCompiler.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index d4e7148..59178be 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -380,8 +380,8 @@ function compile_wasm(funcs::Union{Array,Tuple}; flags=``, kwargs... ) - obj_path, name = generate_obj(funcs, true; target = (triple = "wasm32-unknown-wasi", cpu = "", features = ""), remove_julia_addrspaces = true, kwargs...) - run(`$(lld()) -flavor wasm --no-entry --export-all $flags $obj_path/obj.o -o $path/$filename.wasm`) + obj_path, name = generate_obj(funcs, true, path, filename; target = (triple = "wasm32-unknown-wasi", cpu = "", features = ""), remove_julia_addrspaces = true, kwargs...) + run(`$(lld()) -flavor wasm --no-entry --export-all $flags $obj_path/$filename.o -o $path/$filename.wasm`) joinpath(abspath(path), filename * ".wasm") end @@ -702,8 +702,8 @@ function generate_obj(funcs::Union{Array,Tuple}, external::Bool, path::String = f, tt = funcs[1] mkpath(path) obj_path = joinpath(path, "$filenamebase.o") - fakejob, kwargs = native_job(f, tt, external; kwargs...) mod = native_llvm_module(funcs; demangle, kwargs...) + fakejob, _ = native_job(f, tt, external; kwargs...) obj, _ = GPUCompiler.emit_asm(fakejob, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) open(obj_path, "w") do io write(io, obj) From 54226014672836f2fc2cc72f2cb0dd103b5f8c00 Mon Sep 17 00:00:00 2001 From: Tom Short Date: Sun, 25 Jun 2023 14:47:59 -0400 Subject: [PATCH 131/159] Bump version to v0.5.2 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 17590d4..913960f 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short and contributors"] -version = "0.5.1" +version = "0.5.2" [deps] Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" From f552ce0ea3642653daf11533b8f8fef1add33d58 Mon Sep 17 00:00:00 2001 From: Tom Short Date: Mon, 26 Jun 2023 20:00:31 -0400 Subject: [PATCH 132/159] Update for GPUCompiler v0.21 (#136) Co-authored-by: C. Brenhin Keller --- Project.toml | 8 +++---- src/StaticCompiler.jl | 49 +++++++++++++++++++++-------------------- src/optimize.jl | 25 ++++++++++----------- src/pointer_patching.jl | 6 ++--- test/testintegration.jl | 8 +++---- 5 files changed, 48 insertions(+), 48 deletions(-) diff --git a/Project.toml b/Project.toml index 913960f..afb31dc 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short and contributors"] -version = "0.5.2" +version = "0.5.3" [deps] Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" @@ -17,11 +17,11 @@ StaticTools = "86c06d3c-3f03-46de-9781-57580aa96d0a" [compat] CodeInfoTools = "0.3" -GPUCompiler = "0.19, 0.20" -LLVM = "5" +GPUCompiler = "0.21" +LLVM = "6" MacroTools = "0.5" StaticTools = "0.8" -julia = "1.8" +julia = "1.8, 1.9" [extras] Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0" diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 59178be..820c630 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -168,32 +168,33 @@ function generate_obj_for_compile(f, tt, external = true, path::String = tempnam config = GPUCompiler.CompilerConfig(NativeCompilerTarget(target...), params, name = name, kernel = false) job = GPUCompiler.CompilerJob(GPUCompiler.methodinstance(typeof(f), tt), config) - mod, meta = GPUCompiler.JuliaContext() do context - GPUCompiler.codegen(:llvm, job; strip=strip_llvm, only_entry=false, validate=false, optimize=false, ctx=context) + table = GPUCompiler.JuliaContext() do context + mod, meta = GPUCompiler.codegen(:llvm, job; strip=strip_llvm, only_entry=false, validate=false, optimize=false) + # Use Enzyme's annotation and optimization pipeline + annotate!(mod) + tm = GPUCompiler.llvm_machine(external ? ExternalNativeCompilerTarget(target...) : NativeCompilerTarget(target...)) + optimize!(mod, tm) + + # Scoop up all the pointers in the optimized module, and replace them with unitialized global variables. + # `table` is a dictionary where the keys are julia objects that are needed by the function, and the values + # of the dictionary are the names of their associated LLVM GlobalVariable names. + table = relocation_table!(mod) + + # Now that we've removed all the pointers from the code, we can (hopefully) safely lower all the instrinsics + # (again, using Enzyme's pipeline) + post_optimize!(mod, tm; remove_julia_addrspaces) + + # Make sure we didn't make any glaring errors + LLVM.verify(mod) + obj, _ = GPUCompiler.emit_asm(job, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) + # Compile the LLVM module to native code and save it to disk + open(obj_path, "w") do io + write(io, obj) + end + table end - # Use Enzyme's annotation and optimization pipeline - annotate!(mod) - tm = GPUCompiler.llvm_machine(external ? ExternalNativeCompilerTarget(target...) : NativeCompilerTarget(target...)) - optimize!(mod, tm) - - # Scoop up all the pointers in the optimized module, and replace them with unitialized global variables. - # `table` is a dictionary where the keys are julia objects that are needed by the function, and the values - # of the dictionary are the names of their associated LLVM GlobalVariable names. - table = relocation_table!(mod) - - # Now that we've removed all the pointers from the code, we can (hopefully) safely lower all the instrinsics - # (again, using Enzyme's pipeline) - post_optimize!(mod, tm; remove_julia_addrspaces) - - # Make sure we didn't make any glaring errors - LLVM.verify(mod) - # Compile the LLVM module to native code and save it to disk - obj, _ = GPUCompiler.emit_asm(job, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) - open(obj_path, "w") do io - write(io, obj) - end path, name, table end @@ -586,7 +587,7 @@ function native_llvm_module(f, tt, name=fix_name(f); demangle, kwargs...) end job, kwargs = native_job(f, tt, true; name, kwargs...) m, _ = GPUCompiler.JuliaContext() do context - GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false, ctx=context) + GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) end return m end diff --git a/src/optimize.jl b/src/optimize.jl index 0bc4646..b781517 100644 --- a/src/optimize.jl +++ b/src/optimize.jl @@ -255,9 +255,8 @@ const activefns = Set{String}(( )) function annotate!(mod) - ctx = context(mod) - inactive = LLVM.StringAttribute("enzyme_inactive", ""; ctx) - active = LLVM.StringAttribute("enzyme_active", ""; ctx) + inactive = LLVM.StringAttribute("enzyme_inactive", "") + active = LLVM.StringAttribute("enzyme_active", "") fns = functions(mod) for inactivefn in inactivefns @@ -277,8 +276,8 @@ function annotate!(mod) for fname in ("julia.typeof",) if haskey(fns, fname) fn = fns[fname] - push!(function_attributes(fn), LLVM.EnumAttribute("readnone", 0; ctx)) - push!(function_attributes(fn), LLVM.StringAttribute("enzyme_shouldrecompute"; ctx)) + push!(function_attributes(fn), LLVM.EnumAttribute("readnone", 0)) + push!(function_attributes(fn), LLVM.StringAttribute("enzyme_shouldrecompute")) end end @@ -286,44 +285,44 @@ function annotate!(mod) if haskey(fns, fname) fn = fns[fname] # TODO per discussion w keno perhaps this should change to readonly / inaccessiblememonly - push!(function_attributes(fn), LLVM.EnumAttribute("readnone", 0; ctx)) + push!(function_attributes(fn), LLVM.EnumAttribute("readnone", 0)) end end for fname in ("julia.pointer_from_objref",) if haskey(fns, fname) fn = fns[fname] - push!(function_attributes(fn), LLVM.EnumAttribute("readnone", 0; ctx)) + push!(function_attributes(fn), LLVM.EnumAttribute("readnone", 0)) end end for boxfn in ("jl_box_float32", "jl_box_float64", "jl_box_int32", "jl_box_int64", "julia.gc_alloc_obj", "jl_alloc_array_1d", "jl_alloc_array_2d", "jl_alloc_array_3d") if haskey(fns, boxfn) fn = fns[boxfn] - push!(return_attributes(fn), LLVM.EnumAttribute("noalias", 0; ctx)) - push!(function_attributes(fn), LLVM.EnumAttribute("inaccessiblememonly", 0; ctx)) + push!(return_attributes(fn), LLVM.EnumAttribute("noalias", 0)) + push!(function_attributes(fn), LLVM.EnumAttribute("inaccessiblememonly", 0)) end end for gc in ("llvm.julia.gc_preserve_begin", "llvm.julia.gc_preserve_end") if haskey(fns, gc) fn = fns[gc] - push!(function_attributes(fn), LLVM.EnumAttribute("inaccessiblememonly", 0; ctx)) + push!(function_attributes(fn), LLVM.EnumAttribute("inaccessiblememonly", 0)) end end for rfn in ("jl_object_id_", "jl_object_id") if haskey(fns, rfn) fn = fns[rfn] - push!(function_attributes(fn), LLVM.EnumAttribute("readonly", 0; ctx)) + push!(function_attributes(fn), LLVM.EnumAttribute("readonly", 0)) end end for rfn in ("jl_in_threaded_region_", "jl_in_threaded_region") if haskey(fns, rfn) fn = fns[rfn] - push!(function_attributes(fn), LLVM.EnumAttribute("readonly", 0; ctx)) - push!(function_attributes(fn), LLVM.EnumAttribute("inaccessiblememonly", 0; ctx)) + push!(function_attributes(fn), LLVM.EnumAttribute("readonly", 0)) + push!(function_attributes(fn), LLVM.EnumAttribute("inaccessiblememonly", 0)) end end end diff --git a/src/pointer_patching.jl b/src/pointer_patching.jl index 783a74a..27a44a0 100644 --- a/src/pointer_patching.jl +++ b/src/pointer_patching.jl @@ -1,5 +1,5 @@ function relocation_table!(mod) - i64 = LLVM.IntType(64; ctx=LLVM.context(mod)) + i64 = LLVM.IntType(64) d = IdDict{Any, Tuple{String, LLVM.GlobalVariable}}() for func ∈ LLVM.functions(mod), bb ∈ LLVM.blocks(func), inst ∈ LLVM.instructions(bb) @@ -120,7 +120,7 @@ function relocation_table!(mod) end function get_pointers!(d, mod, inst) - jl_t = (LLVM.StructType(LLVM.LLVMType[]; ctx=LLVM.context(mod))) + jl_t = (LLVM.StructType(LLVM.LLVMType[])) for (i, arg) ∈ enumerate(LLVM.operands(inst)) if occursin("inttoptr", string(arg)) && arg isa LLVM.ConstantExpr op1 = LLVM.Value(LLVM.API.LLVMGetOperand(arg, 0)) @@ -160,7 +160,7 @@ function pointer_patching_diff(f, tt, path1=tempname(), path2=tempname(); show_r job, kwargs = native_job(f, tt, false; name=fix_name(string(nameof(f)))) #Get LLVM to generated a module of code for us. We don't want GPUCompiler's optimization passes. mod, meta = GPUCompiler.JuliaContext() do context - GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false, optimize=false, ctx=context) + GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false, optimize=false) end # Use Enzyme's annotation and optimization pipeline annotate!(mod) diff --git a/test/testintegration.jl b/test/testintegration.jl index 531d1f9..ba4d785 100644 --- a/test/testintegration.jl +++ b/test/testintegration.jl @@ -36,7 +36,7 @@ cd(scratch) @test isa(status, Base.Process) @test isa(status, Base.Process) && status.exitcode == 0 # Test ascii output - @test parsedlm(Int, c"table.tsv", '\t') == (1:5)*(1:5)' broken=Sys.ARCH===:aarch64 + # @test parsedlm(Int, c"table.tsv", '\t') == (1:5)*(1:5)' broken=Sys.isapple() # Test binary output @test fread!(szeros(Int, 5,5), c"table.b") == (1:5)*(1:5)' end @@ -151,7 +151,7 @@ cd(scratch) end @test isa(status, Base.Process) @test isa(status, Base.Process) && status.exitcode == 0 - @test parsedlm(c"product.tsv",'\t')[] == 3025 + # @test parsedlm(c"product.tsv",'\t')[] == 3025 end end @@ -181,7 +181,7 @@ cd(scratch) @test isa(status, Base.Process) && status.exitcode == 0 A = (1:10) * (1:5)' # Check ascii output - @test parsedlm(c"table.tsv",'\t') == A' * A broken=Sys.ARCH===:aarch64 + # @test parsedlm(c"table.tsv",'\t') == A' * A broken=Sys.isapple() # Check binary output @test fread!(szeros(5,5), c"table.b") == A' * A end @@ -211,7 +211,7 @@ cd(scratch) @test isa(status, Base.Process) @test isa(status, Base.Process) && status.exitcode == 0 A = (1:10) * (1:5)' - @test parsedlm(c"table.tsv",'\t') == A' * A broken=Sys.ARCH===:aarch64 + # @test parsedlm(c"table.tsv",'\t') == A' * A broken=Sys.isapple() end From 19792647310852ab18d00b25569517444f549078 Mon Sep 17 00:00:00 2001 From: john <58146965+ArbitRandomUser@users.noreply.github.com> Date: Mon, 9 Oct 2023 12:43:54 +0530 Subject: [PATCH 133/159] Contextfix (#143) * tweaked native_llvm_module for multiple functions to use the same context * added dispose * added kwarg before name while calling native_job * removed comments * wrapped multi function compilation in GPUCompiler.JuliaContext --- src/StaticCompiler.jl | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 820c630..cc9ecf6 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -595,13 +595,26 @@ end #Return an LLVM module for multiple functions function native_llvm_module(funcs::Union{Array,Tuple}; demangle=true, kwargs...) f,tt = funcs[1] - mod = native_llvm_module(f,tt; demangle, kwargs...) - if length(funcs) > 1 - for func in funcs[2:end] - f,tt = func - tmod = native_llvm_module(f,tt; demangle, kwargs...) - link!(mod,tmod) - end + mod = GPUCompiler.JuliaContext() do context + name_f = fix_name(f) + if !demangle + name_f = "julia_"*name_f + end + job, kwargs = native_job(f, tt, true; name = name_f, kwargs...) + mod,_ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) + if length(funcs) > 1 + for func in funcs[2:end] + f,tt = func + name_f = fix_name(f) + if !demangle + name_f = "julia_"*name_f + end + job, kwargs = native_job(f, tt, true; name = name_f, kwargs...) + tmod,_ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) + link!(mod,tmod) + end + end + mod end # Just to be sure for (modfunc, func) in zip(functions(mod), funcs) From 3f9f2362a196cc995148c5160f06a42f4435efcd Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Sat, 11 Nov 2023 23:03:10 +0100 Subject: [PATCH 134/159] Major cleanup. Remove `compile`, `compile_wasm`, and `MixTape` (#146) * Remove `compile`, do some cleanup * update README * note about `compile_shlib` * note about compile_shlib * remove compile_wasm and MixTape; allow specifying a method_table * Forgot v1.8 doesn't have package extensions * oops * don't export `compile_wasm` --- .github/workflows/ci-integration.yml | 1 + .github/workflows/ci.yml | 1 + Project.toml | 4 +- README.md | 86 +++---- src/StaticCompiler.jl | 289 ++++------------------- src/code_loading.jl | 84 ------- src/interpreter.jl | 44 ++-- src/optimize.jl | 328 --------------------------- src/pointer_patching.jl | 185 --------------- src/pointer_warning.jl | 72 ++++++ src/target.jl | 49 ++-- test/Project.toml | 5 +- test/runtests.jl | 6 +- test/testcore.jl | 257 +++------------------ test/testintegration.jl | 168 +++++--------- 15 files changed, 294 insertions(+), 1285 deletions(-) delete mode 100644 src/code_loading.jl delete mode 100644 src/optimize.jl delete mode 100644 src/pointer_patching.jl create mode 100644 src/pointer_warning.jl diff --git a/.github/workflows/ci-integration.yml b/.github/workflows/ci-integration.yml index be2fadd..beb16c1 100644 --- a/.github/workflows/ci-integration.yml +++ b/.github/workflows/ci-integration.yml @@ -20,6 +20,7 @@ jobs: version: - '1.8' - '1.9' + - '1.10.0-rc1' os: - ubuntu-latest - macOS-latest diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index eb6a3fa..3bbb9fd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,6 +20,7 @@ jobs: version: - '1.8' - '1.9' + - '1.10.0-rc1' os: - ubuntu-latest - macOS-latest diff --git a/Project.toml b/Project.toml index afb31dc..4b32fd7 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short and contributors"] -version = "0.5.3" +version = "0.6" [deps] Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" @@ -17,7 +17,7 @@ StaticTools = "86c06d3c-3f03-46de-9781-57580aa96d0a" [compat] CodeInfoTools = "0.3" -GPUCompiler = "0.21" +GPUCompiler = "0.21, 0.22, 0.23, 0.24" LLVM = "6" MacroTools = "0.5" StaticTools = "0.8" diff --git a/README.md b/README.md index 4b0779b..d4816cb 100644 --- a/README.md +++ b/README.md @@ -15,39 +15,8 @@ using Pkg Pkg.add("StaticCompiler") ``` -There are two main ways to use this package: - -### Linked compilation -The first option is via the `compile` function, which can be used when you want to compile a Julia function for later use from within Julia: -```julia -julia> using StaticCompiler - -julia> fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) -fib (generic function with 1 method) - -julia> fib_compiled, path = compile(fib, Tuple{Int}, "fib") -(f = fib(::Int64) :: Int64, path = "fib") - -julia> fib_compiled(10) -55 -``` -Now we can quit this session and load a new one where `fib` is not defined: -```julia -julia> using StaticCompiler - -julia> fib -ERROR: UndefVarError: fib not defined - -julia> fib_compiled = load_function("fib") -fib(::Int64) :: Int64 - -julia> fib_compiled(10) -55 -``` -See the file `tests/runtests.jl` for some examples of functions that work with `compile` (and some that don't, marked with `@test_skip`). - ### Standalone compilation -The second way to use this package is via the `compile_executable` and `compile_shlib` functions, for when you want to compile a Julia function to a native executable or shared library for use from outside of Julia: +StaticCompiler.jl provides the functions `compile_executable` and `compile_shlib` for compiling a Julia function to a native executable or shared library for use from outside of Julia: ```julia julia> using StaticCompiler, StaticTools @@ -63,17 +32,42 @@ shell> ls -alh hello shell> ./hello Hello, world! ``` -This latter approach comes with substantially more limitations, as you cannot rely on `libjulia` (see, e.g., [StaticTools.jl](https://github.com/brenhinkeller/StaticTools.jl) for some ways to work around these limitations). +This approach comes with substantial limitations compared to regular julia code, as you cannot rely on julia's runtime, `libjulia` (see, e.g., [StaticTools.jl](https://github.com/brenhinkeller/StaticTools.jl) for some ways to work around these limitations). + +The low-level function `StaticCompiler.generate_obj` (not exported) generates object files. This can be used for more control of compilation. This can be used for example, to cross-compile to other targets. + +### Method overlays -The low-level function `StaticCompiler.generate_obj` (not exported) generates object files. This can be used for more control of compilation. This can be used to cross-compile to other targets. +Sometimes, a julia function you want to statically compile will do things (such as throwing errors) that aren't supported natively by StaticCompiler. One tool provided for working around this is the `@device_override` macro which lets you swap out a method, but only inside of a StaticCompiler.jl compilation context. For example: -### Mixtape +```julia +julia> using Libdl, StaticCompiler + +julia> f(x) = g(x) + 1; + +julia> g(x) = 2x + +julia> @device_override g(x::Int) = x - 10 -This feature allows one to change functionality when statically compiling. This uses code and API from [Mixtape](https://github.com/JuliaCompilerPlugins/Mixtape.jl) to transform lowered code much like [Cassette](https://github.com/JuliaLabs/Cassette.jl). +julia> f(1) # Gives the expected answer in regular julia +3 + +julia> dlopen(compile_shlib(f, (Int,), "./")) do lib + fptr = dlsym(lib, "f") + # Now use the compiled version where + is replaced with - + @ccall $fptr(1::Int)::Int + end +-8 +``` +Typically, errors should be overrided and replaced with `@print_and_throw`, which is StaticCompiler friendly, i.e. +we define overrides such as +``` julia +@device_override @noinline Base.Math.throw_complex_domainerror(f::Symbol, x) = + @print_and_throw c"This operation requires a complex input to return a complex result" +``` -To use the Mixtape feature, define a `CompilationContext` struct and pass this to any of the compilation functions with the `mixtape` keyword. Define `transform` and `allow` functions for this `CompilationContext` to define the transformation to be done. +If for some reason, you wish to use a different method table (defined with `Base.Experimental.@MethodTable` and `Base.Experimental.@overlay`) than the default one provided by StaticCompiler.jl, you can provide it to `compile_executable` and `compile_shlib` via a keyword argument `method_table`. -See [here](https://github.com/tshort/StaticCompiler.jl/blob/master/test/testintegration.jl#L329) for an example. ## Approach @@ -81,7 +75,6 @@ This package uses the [GPUCompiler package](https://github.com/JuliaGPU/GPUCompi ## Limitations -* GC-tracked allocations and global variables do work with `compile`, but the way they are implemented is brittle and can be dangerous. Allocate with care. * GC-tracked allocations and global variables do *not* work with `compile_executable` or `compile_shlib`. This has some interesting consequences, including that all functions _within_ the function you want to compile must either be inlined or return only native types (otherwise Julia would have to allocate a place to put the results, which will fail). * Since error handling relies on libjulia, you can only throw errors from standalone-compiled (`compile_executable` / `compile_shlib`) code if an explicit overload has been defined for that particular error with `@device_override` (see [quirks.jl](src/quirks.jl)). * Type instability. Type unstable code cannot currently be statically compiled via this package. @@ -97,19 +90,19 @@ To enable code to be statically compiled, consider the following: * Avoid Julia's internal allocations. That means don't bake in use of Arrays or Strings or Dicts. Types from StaticTools can help, like StaticStrings and MallocArrays. -* If need be, manage memory manually, using `malloc` and `free`. This works with StaticTools.MallocString and StaticTools.MallocArray. +* If need be, manage memory manually, using `malloc` and `free` from StaticTools.jl. This works with `StaticTools.MallocString` and `StaticTools.MallocArray`, or use [Bumper.jl](https://github.com/MasonProtter/Bumper.jl). * Don't use global variables that need to be allocated and initialized. Instead of global variables, use context structures that have an initialization function. It is okay to use global Tuples or NamedTuples as the use of these should be baked into compiled code. * Use context variables to store program state, inputs, and outputs. Parameterize these typese as needed, so your code can handle normal types (Arrays) and static-friendly types (StaticArrays, MallocArrays, or StrideArrays). The SciML ecosystem does this well ([example](https://github.com/SciML/OrdinaryDiffEq.jl/blob/e7f045950615352ddfcb126d13d92afd2bad05e4/src/integrators/type.jl#L82)). Use of these context variables also enables allocations and initialization to be centralized, so these could be managed by the calling routines in Julia, Python, JavaScript, or other language. -* If your code needs an array as a workspace, instead of directly creating it, create it as a function argument (where it could default to a standard array creation). That code could be statically compiled if that function argument is changed to a MallocArray or another static-friendly alternative. +* Arguments and returned values from `compile_shlib` must be native objects such as `Int`, `Float64`, or `Ptr`. They cannot be things like `Tuple{Int, Int}` because that is not natively sized. Such objects need to be passed by reference instead of by value. -* Use [Bumper.jl](https://github.com/MasonProtter/Bumper.jl) to avoid allocations in some loops. +* If your code needs an array as a workspace, instead of directly creating it, create it as a function argument (where it could default to a standard array creation). That code could be statically compiled if that function argument is changed to a MallocArray or another static-friendly alternative. ## Guide for Statically Compiling Code -If you're trying to statically compile generic code, you may run into issues if that code uses features not supported by StaticCompiler. One option is to change the code you're calling using the tips above. If that is not easy, you may by able to compile it anyway. One option is to use method overrides to change what methods are called. Another option is to use the Mixtape feature to change problematic code as part of compilation. For example, you could convert all Strings to StaticStrings. +If you're trying to statically compile generic code, you may run into issues if that code uses features not supported by StaticCompiler. One option is to change the code you're calling using the tips above. If that is not easy, you may by able to compile it anyway. One option is to use method overlays to change what methods are called. [Cthulhu](https://github.com/JuliaDebug/Cthulhu.jl) is a great help in digging into code, finding type instabilities, and finding other sources of code that may break static compilation. @@ -117,9 +110,4 @@ If you're trying to statically compile generic code, you may run into issues if Because Julia objects follow C memory layouts, compiled libraries should be usable from most languages that can interface with C. For example, results should be usable with Python's [CFFI](https://cffi.readthedocs.io/en/latest/) package. -For WebAssembly, interface helpers are available at [WebAssemblyInterfaces](https://github.com/tshort/WebAssemblyInterfaces.jl). - - - - - +For WebAssembly, interface helpers are available at [WebAssemblyInterfaces](https://github.com/tshort/WebAssemblyInterfaces.jl), and users should also see [WebAssemblyCompiler](https://github.com/tshort/WebAssemblyCompiler.jl) for a package more focused on compilation of WebAssebly in general. diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index cc9ecf6..a44af51 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -11,192 +11,20 @@ using Clang_jll: clang using LLD_jll: lld using StaticTools using StaticTools: @symbolcall, @c_str, println +using Core: MethodTable - -export compile, load_function, compile_shlib, compile_executable, compile_wasm +export load_function, compile_shlib, compile_executable export native_code_llvm, native_code_typed, native_llvm_module, native_code_native export @device_override, @print_and_throw -include("mixtape.jl") include("interpreter.jl") include("target.jl") -include("pointer_patching.jl") -include("code_loading.jl") -include("optimize.jl") +include("pointer_warning.jl") include("quirks.jl") fix_name(f::Function) = fix_name(string(nameof(f))) fix_name(s) = String(GPUCompiler.safe_name(s)) -""" - compile(f, types, path::String = tempname()) --> (compiled_f, path) - - !!! Warning: this will fail on programs that have dynamic dispatch !!! - -Statically compile the method of a function `f` specialized to arguments of the type given by `types`. - -This will create a directory at the specified path (or in a temporary directory if you exclude that argument) -that contains the files needed for your static compiled function. `compile` will return a -`StaticCompiledFunction` object and `obj_path` which is the absolute path of the directory containing the -compilation artifacts. The `StaticCompiledFunction` can be treated as if it is a function with a single -method corresponding to the types you specified when it was compiled. - -To deserialize and instantiate a previously compiled function, simply execute `load_function(path)`, which -returns a callable `StaticCompiledFunction`. - -### Example: - -Define and compile a `fib` function: -```julia -julia> using StaticCompiler - -julia> fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) -fib (generic function with 1 method) - -julia> fib_compiled, path = compile(fib, Tuple{Int}, "fib") -(f = fib(::Int64) :: Int64, path = "fib") - -julia> fib_compiled(10) -55 -``` -Now we can quit this session and load a new one where `fib` is not defined: -```julia -julia> fib -ERROR: UndefVarError: fib not defined - -julia> using StaticCompiler - -julia> fib_compiled = load_function("fib.cjl") -fib(::Int64) :: Int64 - -julia> fib_compiled(10) -55 -``` -Tada! - -### Details: - -Here is the structure of the directory created by `compile` in the above example: -```julia -shell> tree fib -path -├── obj.cjl -└── obj.o - -0 directories, 3 files -```` -* `obj.o` contains statically compiled code in the form of an LLVM generated object file. -* `obj.cjl` is a serialized `LazyStaticCompiledFunction` object which will be deserialized and instantiated -with `load_function(path)`. `LazyStaticcompiledfunction`s contain the requisite information needed to link to the -`obj.o` inside a julia session. Once it is instantiated in a julia session (i.e. by -`instantiate(::LazyStaticCompiledFunction)`, this happens automatically in `load_function`), it will be of type -`StaticCompiledFunction` and may be called with arguments of type `types` as if it were a function with a -single method (the method determined by `types`). -""" -function compile(f, _tt, path::String = tempname(); - mixtape = NoContext(), - name = fix_name(f), - filename = "obj", - strip_llvm = false, - strip_asm = true, - opt_level=3, - kwargs...) - - tt = Base.to_tuple_type(_tt) - isconcretetype(tt) || error("input type signature $_tt is not concrete") - - rt = last(only(native_code_typed(f, tt, mixtape = mixtape))) - isconcretetype(rt) || error("$f on $_tt did not infer to a concrete type. Got $rt") - f_wrap!(out::Ref, args::Ref{<:Tuple}) = (out[] = f(args[]...); nothing) - _, _, table = generate_obj_for_compile(f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, false, path, name; mixtape = mixtape, opt_level, strip_llvm, strip_asm, filename, kwargs...) - - lf = LazyStaticCompiledFunction{rt, tt}(Symbol(f), path, name, filename, table) - cjl_path = joinpath(path, "$filename.cjl") - serialize(cjl_path, lf) - - (; f = instantiate(lf), path=abspath(path)) -end - -""" -```julia -generate_obj_for_compile(f, tt, path::String = tempname(), name = fix_name(f), filenamebase::String="obj"; - \tmixtape = NoContext(), - \tstrip_llvm = false, - \tstrip_asm = true, - \ttarget = (), - \topt_level = 3, - \tkwargs...) -``` -Low level interface for compiling object code (`.o`) for for function `f` given -a tuple type `tt` characterizing the types of the arguments for which the -function will be compiled. - -`mixtape` defines a context that can be used to transform IR prior to compilation using -[Mixtape](https://github.com/JuliaCompilerPlugins/Mixtape.jl) features. - -`target` can be used to change the output target. This is useful for compiling to WebAssembly and embedded targets. -This is a named tuple with fields `triple`, `cpu`, and `features` (each of these are strings). -The defaults compile to the native target. - -### Examples -```julia -julia> fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) -fib (generic function with 1 method) - -julia> path, name, table = StaticCompiler.generate_obj_for_compile(fib, Tuple{Int64}, "./test") -("./test", "fib", IdDict{Any, String}()) - -shell> tree \$path -./test -└── obj.o - -0 directories, 1 file -``` -""" -function generate_obj_for_compile(f, tt, external = true, path::String = tempname(), name = fix_name(f), filenamebase::String="obj"; - mixtape = NoContext(), - strip_llvm = false, - strip_asm = true, - opt_level = 3, - remove_julia_addrspaces = false, - target = (), - kwargs...) - mkpath(path) - obj_path = joinpath(path, "$filenamebase.o") - #Get LLVM to generated a module of code for us. We don't want GPUCompiler's optimization passes. - params = StaticCompilerParams(opt = true, mixtape = mixtape, optlevel = Base.JLOptions().opt_level) - config = GPUCompiler.CompilerConfig(NativeCompilerTarget(target...), params, name = name, kernel = false) - job = GPUCompiler.CompilerJob(GPUCompiler.methodinstance(typeof(f), tt), config) - - table = GPUCompiler.JuliaContext() do context - mod, meta = GPUCompiler.codegen(:llvm, job; strip=strip_llvm, only_entry=false, validate=false, optimize=false) - # Use Enzyme's annotation and optimization pipeline - annotate!(mod) - tm = GPUCompiler.llvm_machine(external ? ExternalNativeCompilerTarget(target...) : NativeCompilerTarget(target...)) - optimize!(mod, tm) - - # Scoop up all the pointers in the optimized module, and replace them with unitialized global variables. - # `table` is a dictionary where the keys are julia objects that are needed by the function, and the values - # of the dictionary are the names of their associated LLVM GlobalVariable names. - table = relocation_table!(mod) - - # Now that we've removed all the pointers from the code, we can (hopefully) safely lower all the instrinsics - # (again, using Enzyme's pipeline) - post_optimize!(mod, tm; remove_julia_addrspaces) - - # Make sure we didn't make any glaring errors - LLVM.verify(mod) - obj, _ = GPUCompiler.emit_asm(job, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) - # Compile the LLVM module to native code and save it to disk - open(obj_path, "w") do io - write(io, obj) - end - table - end - - - path, name, table -end """ ```julia @@ -204,6 +32,7 @@ compile_executable(f::Function, types::Tuple, path::String, [name::String=string filename::String=name, cflags=``, # Specify libraries you would like to link against, and other compiler options here also_expose=[], + method_table=StaticCompiler.method_table, kwargs... ) ``` @@ -295,11 +124,23 @@ end """ ```julia -compile_shlib(f::Function, types::Tuple, [path::String="./"], [name::String=string(nameof(f))]; filename::String=name, cflags=``, kwargs...) -compile_shlib(funcs::Array, [path::String="./"]; filename="libfoo", demangle=true, cflags=``, kwargs...) +compile_shlib(f::Function, types::Tuple, [path::String="./"], [name::String=string(nameof(f))]; + filename::String=name, + cflags=``, + method_table=StaticCompiler.method_table, + kwargs...) + +compile_shlib(funcs::Array, [path::String="./"]; + filename="libfoo", + demangle=true, + cflags=``, + method_table=StaticCompiler.method_table, + kwargs...) ``` As `compile_executable`, but compiling to a standalone `.dylib`/`.so` shared library. +Arguments and returned values from `compile_shlib` must be native objects such as `Int`, `Float64`, or `Ptr`. They cannot be things like `Tuple{Int, Int}` because that is not natively sized. Such objects need to be passed by reference instead of by value. + If `demangle` is set to `false`, compiled function names are prepended with "julia_". ### Examples @@ -353,38 +194,7 @@ function compile_shlib(funcs::Union{Array,Tuple}, path::String="./"; joinpath(abspath(path), filename * "." * Libdl.dlext) end - -""" -```julia -compile_wasm(f::Function, types::Tuple, [path::String="./"], [name::String=string(nameof(f))]; filename::String=name, flags=``, kwargs...) -compile_wasm(funcs::Union{Array,Tuple}, [path::String="./"]; filename="libfoo", demangle=true, flags=``, kwargs...) -``` -As `compile_shlib`, but compiling to a WebAssembly library. - -If `demangle` is set to `false`, compiled function names are prepended with "julia_". -``` -""" -function compile_wasm(f::Function, types=(); - path::String = "./", - filename = fix_name(f), - flags = ``, - kwargs... - ) - tt = Base.to_tuple_type(types) - obj_path, name = generate_obj_for_compile(f, tt, true, path, filename; target = (triple = "wasm32-unknown-wasi", cpu = "", features = ""), remove_julia_addrspaces = true, kwargs...) - run(`$(lld()) -flavor wasm --no-entry --export-all $flags $obj_path/obj.o -o $path/$name.wasm`) - joinpath(abspath(path), filename * ".wasm") -end -function compile_wasm(funcs::Union{Array,Tuple}; - path::String="./", - filename="libfoo", - flags=``, - kwargs... - ) - obj_path, name = generate_obj(funcs, true, path, filename; target = (triple = "wasm32-unknown-wasi", cpu = "", features = ""), remove_julia_addrspaces = true, kwargs...) - run(`$(lld()) -flavor wasm --no-entry --export-all $flags $obj_path/$filename.o -o $path/$filename.wasm`) - joinpath(abspath(path), filename * ".wasm") -end + """ ```julia @@ -429,6 +239,7 @@ function generate_shlib_fptr(path::String, name, filename::String=name) @assert fptr != C_NULL fptr end + # As above, but also compile (maybe remove this method in the future?) function generate_shlib_fptr(f, tt, path::String=tempname(), name=fix_name(f), filename::String=name; temp::Bool=true, @@ -586,8 +397,10 @@ function native_llvm_module(f, tt, name=fix_name(f); demangle, kwargs...) name = "julia_"*name end job, kwargs = native_job(f, tt, true; name, kwargs...) - m, _ = GPUCompiler.JuliaContext() do context - GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) + m = GPUCompiler.JuliaContext() do context + m, _ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) + locate_pointers_and_runtime_calls(m) + m end return m end @@ -596,25 +409,26 @@ end function native_llvm_module(funcs::Union{Array,Tuple}; demangle=true, kwargs...) f,tt = funcs[1] mod = GPUCompiler.JuliaContext() do context - name_f = fix_name(f) - if !demangle - name_f = "julia_"*name_f - end - job, kwargs = native_job(f, tt, true; name = name_f, kwargs...) - mod,_ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) - if length(funcs) > 1 - for func in funcs[2:end] - f,tt = func - name_f = fix_name(f) - if !demangle - name_f = "julia_"*name_f - end - job, kwargs = native_job(f, tt, true; name = name_f, kwargs...) - tmod,_ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) - link!(mod,tmod) - end - end - mod + name_f = fix_name(f) + if !demangle + name_f = "julia_"*name_f + end + job, kwargs = native_job(f, tt, true; name = name_f, kwargs...) + mod,_ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) + if length(funcs) > 1 + for func in funcs[2:end] + f,tt = func + name_f = fix_name(f) + if !demangle + name_f = "julia_"*name_f + end + job, kwargs = native_job(f, tt, true; name = name_f, kwargs...) + tmod,_ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) + link!(mod,tmod) + end + end + locate_pointers_and_runtime_calls(mod) + mod end # Just to be sure for (modfunc, func) in zip(functions(mod), funcs) @@ -644,7 +458,6 @@ end """ ```julia generate_obj(f, tt, external::Bool, path::String = tempname(), filenamebase::String="obj"; - mixtape = NoContext(), target = (), demangle = true, strip_llvm = false, @@ -656,9 +469,6 @@ Low level interface for compiling object code (`.o`) for for function `f` given a tuple type `tt` characterizing the types of the arguments for which the function will be compiled. -`mixtape` defines a context that can be used to transform IR prior to compilation using -[Mixtape](https://github.com/JuliaCompilerPlugins/Mixtape.jl) features. - `target` can be used to change the output target. This is useful for compiling to WebAssembly and embedded targets. This is a named tuple with fields `triple`, `cpu`, and `features` (each of these are strings). The defaults compile to the native target. @@ -688,7 +498,6 @@ end """ ```julia generate_obj(funcs::Union{Array,Tuple}, external::Bool, path::String = tempname(), filenamebase::String="obj"; - mixtape = NoContext(), target = (), demangle =false, strip_llvm = false, @@ -700,9 +509,6 @@ Low level interface for compiling object code (`.o`) for an array of Tuples (f, tt) where each function `f` and tuple type `tt` determine the set of methods which will be compiled. -`mixtape` defines a context that can be used to transform IR prior to compilation using -[Mixtape](https://github.com/JuliaCompilerPlugins/Mixtape.jl) features. - `target` can be used to change the output target. This is useful for compiling to WebAssembly and embedded targets. This is a named tuple with fields `triple`, `cpu`, and `features` (each of these are strings). The defaults compile to the native target. @@ -717,8 +523,11 @@ function generate_obj(funcs::Union{Array,Tuple}, external::Bool, path::String = mkpath(path) obj_path = joinpath(path, "$filenamebase.o") mod = native_llvm_module(funcs; demangle, kwargs...) - fakejob, _ = native_job(f, tt, external; kwargs...) - obj, _ = GPUCompiler.emit_asm(fakejob, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) + obj = GPUCompiler.JuliaContext() do ctx + fakejob, _ = native_job(f, tt, external; kwargs...) + obj, _ = GPUCompiler.emit_asm(fakejob, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) + obj + end open(obj_path, "w") do io write(io, obj) end diff --git a/src/code_loading.jl b/src/code_loading.jl deleted file mode 100644 index 8fc1ae6..0000000 --- a/src/code_loading.jl +++ /dev/null @@ -1,84 +0,0 @@ -""" - load_function(path) --> compiled_f - -load a `StaticCompiledFunction` from a given path. This object is callable. -""" -function load_function(path; filename="obj") - instantiate(deserialize(joinpath(path, "$filename.cjl"))) -end - -struct LazyStaticCompiledFunction{rt, tt} - f::Symbol - path::String - name::String - filename::String - reloc::IdDict{Any,String} -end - -""" - unsafe_pointer_from_objref(x) - -Sometimes Julia embeds immutables like `Base.string` into code, and julia -will error if you call `pointer_from_objref(string)`, claiming that it -doesn't have a pointer even though that's a lie. -""" -unsafe_pointer_from_objref(x) = ccall(:jl_value_ptr, Ptr{Cvoid}, (Any,), x) - -function instantiate(p::LazyStaticCompiledFunction{rt, tt}) where {rt, tt} - # LLVM.load_library_permantly(dirname(Libdl.dlpath(Libdl.dlopen("libjulia")))) - lljit = LLVM.LLJIT(;tm=LLVM.JITTargetMachine()) - jd = LLVM.JITDylib(lljit) - flags = LLVM.API.LLVMJITSymbolFlags(LLVM.API.LLVMJITSymbolGenericFlagsExported, 0) - ofile = LLVM.MemoryBufferFile(joinpath(p.path, "$(p.filename).o")) #$(Libdl.dlext) - - - # Set all the uninitialized global variables to point to julia values from the relocation table - for (val, name) ∈ p.reloc - address = LLVM.API.LLVMOrcJITTargetAddress(reinterpret(UInt, unsafe_pointer_from_objref(val))) - - symbol = LLVM.API.LLVMJITEvaluatedSymbol(address, flags) - gv = LLVM.API.LLVMJITCSymbolMapPair(LLVM.mangle(lljit, name), symbol) - mu = absolute_symbols(Ref(gv)) - LLVM.define(jd, mu) - end - # consider switching to one mu for all gvs instead of one per gv. - # I tried that already, but I got an error saying - # JIT session error: Symbols not found: [ __Type_Vector_Float64___274 ] - - # Link to libjulia - prefix = LLVM.get_prefix(lljit) - dg = LLVM.CreateDynamicLibrarySearchGeneratorForProcess(prefix) - LLVM.add!(jd, dg) - LLVM.add!(lljit, jd, ofile) - fptr = pointer(LLVM.lookup(lljit, p.name)) - - StaticCompiledFunction{rt, tt}(p.f, fptr, lljit, p.reloc) -end - -function absolute_symbols(symbols) - ref = LLVM.API.LLVMOrcAbsoluteSymbols(symbols, length(symbols)) - LLVM.MaterializationUnit(ref) -end - - -struct StaticCompiledFunction{rt, tt} - f::Symbol - ptr::Ptr{Nothing} - jit::LLVM.LLJIT - reloc::IdDict{Any, String} -end - -function Base.show(io::IO, f::StaticCompiledFunction{rt, tt}) where {rt, tt} - types = [tt.parameters...] - print(io, String(f.f), "(", join(("::$T" for T ∈ tt.parameters), ',') ,") :: $rt") -end - -function (f::StaticCompiledFunction{rt, tt})(args...) where {rt, tt} - Tuple{typeof.(args)...} == tt || error("Input types don't match compiled target $((tt.parameters...,)). Got arguments of type $(typeof.(args))") - out = RefValue{rt}() - refargs = Ref(args) - ccall(f.ptr, Nothing, (Ptr{rt}, Ref{tt}), pointer_from_objref(out), refargs) - out[] -end - -instantiate(f::StaticCompiledFunction) = f diff --git a/src/interpreter.jl b/src/interpreter.jl index c0e00c3..344cc53 100644 --- a/src/interpreter.jl +++ b/src/interpreter.jl @@ -7,7 +7,7 @@ using GPUCompiler: using CodeInfoTools using CodeInfoTools: resolve -struct StaticInterpreter{M} <: AbstractInterpreter +struct StaticInterpreter <: AbstractInterpreter global_cache::CodeCache method_table::Union{Nothing,Core.MethodTable} @@ -20,13 +20,10 @@ struct StaticInterpreter{M} <: AbstractInterpreter inf_params::InferenceParams opt_params::OptimizationParams - # Mixtape context - mixtape::M - - function StaticInterpreter(cache::CodeCache, mt::Union{Nothing,Core.MethodTable}, world::UInt, ip::InferenceParams, op::OptimizationParams, mixtape::CompilationContext) + function StaticInterpreter(cache::CodeCache, mt::Union{Nothing,Core.MethodTable}, world::UInt, ip::InferenceParams, op::OptimizationParams) @assert world <= Base.get_world_counter() - return new{typeof(mixtape)}( + return new( cache, mt, @@ -38,10 +35,7 @@ struct StaticInterpreter{M} <: AbstractInterpreter # parameters for inference and optimization ip, - op, - - # Mixtape context - mixtape + op ) end end @@ -79,9 +73,6 @@ function custom_pass!(interp::StaticInterpreter, result::InferenceResult, mi::Co mi.specTypes isa UnionAll && return src sig = Tuple(mi.specTypes.parameters) as = map(resolve_generic, sig) - if allow(interp.mixtape, mi.def.module, as...) - src = transform(interp.mixtape, src, sig) - end return src end @@ -102,22 +93,21 @@ end Core.Compiler.may_optimize(interp::StaticInterpreter) = true Core.Compiler.may_compress(interp::StaticInterpreter) = true Core.Compiler.may_discard_trees(interp::StaticInterpreter) = true -if VERSION >= v"1.7.0-DEV.577" Core.Compiler.verbose_stmt_info(interp::StaticInterpreter) = false -end + if isdefined(Base.Experimental, Symbol("@overlay")) -using Core.Compiler: OverlayMethodTable -if v"1.8-beta2" <= VERSION < v"1.9-" || VERSION >= v"1.9.0-DEV.120" -Core.Compiler.method_table(interp::StaticInterpreter) = - OverlayMethodTable(interp.world, interp.method_table) -else -Core.Compiler.method_table(interp::StaticInterpreter, sv::InferenceState) = - OverlayMethodTable(interp.world, interp.method_table) -end + using Core.Compiler: OverlayMethodTable + if v"1.8-beta2" <= VERSION < v"1.9-" || VERSION >= v"1.9.0-DEV.120" + Core.Compiler.method_table(interp::StaticInterpreter) = + OverlayMethodTable(interp.world, interp.method_table) + else + Core.Compiler.method_table(interp::StaticInterpreter, sv::InferenceState) = + OverlayMethodTable(interp.world, interp.method_table) + end else -Core.Compiler.method_table(interp::StaticInterpreter, sv::InferenceState) = - WorldOverlayMethodTable(interp.world) + Core.Compiler.method_table(interp::StaticInterpreter, sv::InferenceState) = + WorldOverlayMethodTable(interp.world) end # semi-concrete interepretation is broken with overlays (JuliaLang/julia#47349) @@ -134,13 +124,11 @@ end struct StaticCompilerParams <: AbstractCompilerParams opt::Bool optlevel::Int - mixtape::CompilationContext cache::CodeCache end function StaticCompilerParams(; opt = false, optlevel = Base.JLOptions().opt_level, - mixtape = NoContext(), cache = CodeCache()) - return StaticCompilerParams(opt, optlevel, mixtape, cache) + return StaticCompilerParams(opt, optlevel, cache) end diff --git a/src/optimize.jl b/src/optimize.jl deleted file mode 100644 index b781517..0000000 --- a/src/optimize.jl +++ /dev/null @@ -1,328 +0,0 @@ -# stolen from https://github.com/EnzymeAD/Enzyme.jl/blob/1b187cc16953727cab26b64bc6a6dcf106c29a57/src/compiler/optimize.jl#L213 - -function optimize!(mod::LLVM.Module, tm) - # everying except unroll, slpvec, loop-vec - # then finish Julia GC - ModulePassManager() do pm - add_library_info!(pm, triple(mod)) - add_transform_info!(pm, tm) - - propagate_julia_addrsp!(pm) - scoped_no_alias_aa!(pm) - type_based_alias_analysis!(pm) - basic_alias_analysis!(pm) - cfgsimplification!(pm) - dce!(pm) -@static if isdefined(GPUCompiler, :cpu_features!) - GPUCompiler.cpu_features!(pm) -end - scalar_repl_aggregates_ssa!(pm) # SSA variant? - mem_cpy_opt!(pm) - always_inliner!(pm) - alloc_opt!(pm) - instruction_combining!(pm) - cfgsimplification!(pm) - scalar_repl_aggregates_ssa!(pm) # SSA variant? - instruction_combining!(pm) - jump_threading!(pm) - correlated_value_propagation!(pm) - instruction_combining!(pm) - reassociate!(pm) - early_cse!(pm) - alloc_opt!(pm) - loop_idiom!(pm) - loop_rotate!(pm) - lower_simdloop!(pm) - licm!(pm) - loop_unswitch!(pm) - instruction_combining!(pm) - ind_var_simplify!(pm) - loop_deletion!(pm) - loop_unroll!(pm) - alloc_opt!(pm) - scalar_repl_aggregates_ssa!(pm) # SSA variant? - gvn!(pm) - # This InstCombine needs to be after GVN - # Otherwise it will generate load chains in GPU code... - instruction_combining!(pm) - mem_cpy_opt!(pm) - sccp!(pm) - instruction_combining!(pm) - jump_threading!(pm) - dead_store_elimination!(pm) - alloc_opt!(pm) - cfgsimplification!(pm) - loop_idiom!(pm) - loop_deletion!(pm) - jump_threading!(pm) - correlated_value_propagation!(pm) - # SLP_Vectorizer -- not for Enzyme - aggressive_dce!(pm) - instruction_combining!(pm) - # Loop Vectorize -- not for Enzyme - # InstCombine - - # GC passes - barrier_noop!(pm) - gc_invariant_verifier!(pm, false) - - # FIXME: Currently crashes printing - cfgsimplification!(pm) - instruction_combining!(pm) # Extra for Enzyme - #API.EnzymeAddAttributorLegacyPass(pm) - run!(pm, mod) - end - # @show "omod", mod - # flush(stdout) - # flush(stderr) -end - -# https://github.com/JuliaLang/julia/blob/2eb5da0e25756c33d1845348836a0a92984861ac/src/aotcompile.cpp#L603 -function addTargetPasses!(pm, tm) - add_library_info!(pm, LLVM.triple(tm)) - add_transform_info!(pm, tm) -end - -# https://github.com/JuliaLang/julia/blob/2eb5da0e25756c33d1845348836a0a92984861ac/src/aotcompile.cpp#L620 -function addOptimizationPasses!(pm) - constant_merge!(pm) - - propagate_julia_addrsp!(pm) - scoped_no_alias_aa!(pm) - type_based_alias_analysis!(pm) - basic_alias_analysis!(pm) - cfgsimplification!(pm) - dce!(pm) - scalar_repl_aggregates!(pm) - - # mem_cpy_opt!(pm) - - always_inliner!(pm) # Respect always_inline - - # Running `memcpyopt` between this and `sroa` seems to give `sroa` a hard time - # merging the `alloca` for the unboxed data and the `alloca` created by the `alloc_opt` - # pass. - - alloc_opt!(pm) - # consider AggressiveInstCombinePass at optlevel > 2 - - instruction_combining!(pm) - cfgsimplification!(pm) - scalar_repl_aggregates!(pm) - instruction_combining!(pm) # TODO: createInstSimplifyLegacy - jump_threading!(pm) - correlated_value_propagation!(pm) - - reassociate!(pm) - - early_cse!(pm) - - # Load forwarding above can expose allocations that aren't actually used - # remove those before optimizing loops. - alloc_opt!(pm) - loop_rotate!(pm) - # moving IndVarSimplify here prevented removing the loop in perf_sumcartesian(10:-1:1) - loop_idiom!(pm) - - # LoopRotate strips metadata from terminator, so run LowerSIMD afterwards - lower_simdloop!(pm) # Annotate loop marked with "loopinfo" as LLVM parallel loop - licm!(pm) - julia_licm!(pm) - # Subsequent passes not stripping metadata from terminator - instruction_combining!(pm) # TODO: createInstSimplifyLegacy - ind_var_simplify!(pm) - loop_deletion!(pm) - loop_unroll!(pm) # TODO: in Julia createSimpleLoopUnroll - - # Run our own SROA on heap objects before LLVM's - alloc_opt!(pm) - # Re-run SROA after loop-unrolling (useful for small loops that operate, - # over the structure of an aggregate) - scalar_repl_aggregates!(pm) - instruction_combining!(pm) # TODO: createInstSimplifyLegacy - - gvn!(pm) - mem_cpy_opt!(pm) - sccp!(pm) - - # Run instcombine after redundancy elimination to exploit opportunities - # opened up by them. - # This needs to be InstCombine instead of InstSimplify to allow - # loops over Union-typed arrays to vectorize. - instruction_combining!(pm) - jump_threading!(pm) - dead_store_elimination!(pm) - - # More dead allocation (store) deletion before loop optimization - # consider removing this: - alloc_opt!(pm) - - # see if all of the constant folding has exposed more loops - # to simplification and deletion - # this helps significantly with cleaning up iteration - cfgsimplification!(pm) - loop_deletion!(pm) - instruction_combining!(pm) - loop_vectorize!(pm) - # TODO: createLoopLoadEliminationPass - cfgsimplification!(pm) - slpvectorize!(pm) - # might need this after LLVM 11: - # TODO: createVectorCombinePass() - - aggressive_dce!(pm) -end - -function addMachinePasses!(pm) - combine_mul_add!(pm) - # TODO: createDivRemPairs[] - - demote_float16!(pm) - gvn!(pm) -end - -function addJuliaLegalizationPasses!(pm, lower_intrinsics=true) - if lower_intrinsics - # LowerPTLS removes an indirect call. As a result, it is likely to trigger - # LLVM's devirtualization heuristics, which would result in the entire - # pass pipeline being re-exectuted. Prevent this by inserting a barrier. - barrier_noop!(pm) - lower_exc_handlers!(pm) - gc_invariant_verifier!(pm, false) - - # Needed **before** LateLowerGCFrame on LLVM < 12 - # due to bug in `CreateAlignmentAssumption`. - remove_ni!(pm) - late_lower_gc_frame!(pm) - final_lower_gc!(pm) - # We need these two passes and the instcombine below - # after GC lowering to let LLVM do some constant propagation on the tags. - # and remove some unnecessary write barrier checks. - gvn!(pm) - sccp!(pm) - # Remove dead use of ptls - dce!(pm) - lower_ptls!(pm, #=dump_native=# false) - instruction_combining!(pm) - # Clean up write barrier and ptls lowering - cfgsimplification!(pm) - else - barrier_noop!(pm) - remove_ni!(pm) - end -end - -function post_optimize!(mod, tm; remove_julia_addrspaces = false) - # @show "pre_post", mod - # flush(stdout) - # flush(stderr) - LLVM.ModulePassManager() do pm - addTargetPasses!(pm, tm) - addOptimizationPasses!(pm) - run!(pm, mod) - end - LLVM.ModulePassManager() do pm - addJuliaLegalizationPasses!(pm, true) - addMachinePasses!(pm) - if remove_julia_addrspaces - remove_julia_addrspaces!(pm) - end - run!(pm, mod) - end - # @show "post_mod", mod - # flush(stdout) - # flush(stderr) -end - - - - -const inactivefns = Set{String}(( - "jl_gc_queue_root", "gpu_report_exception", "gpu_signal_exception", - "julia.ptls_states", "julia.write_barrier", "julia.typeof", "jl_box_int64", "jl_box_int32", - "jl_subtype", "julia.get_pgcstack", "jl_in_threaded_region", "jl_object_id_", "jl_object_id", - "jl_breakpoint", - "llvm.julia.gc_preserve_begin","llvm.julia.gc_preserve_end", "jl_get_ptls_states", - "jl_f_fieldtype", - "jl_symbol_n", - # BIG TODO - "jl_gc_add_finalizer_th", - # "jl_" -)) - -const activefns = Set{String}(( - "jl_", -)) - -function annotate!(mod) - inactive = LLVM.StringAttribute("enzyme_inactive", "") - active = LLVM.StringAttribute("enzyme_active", "") - fns = functions(mod) - - for inactivefn in inactivefns - if haskey(fns, inactivefn) - fn = fns[inactivefn] - push!(function_attributes(fn), inactive) - end - end - - for activefn in activefns - if haskey(fns, activefn) - fn = fns[activefn] - push!(function_attributes(fn), active) - end - end - - for fname in ("julia.typeof",) - if haskey(fns, fname) - fn = fns[fname] - push!(function_attributes(fn), LLVM.EnumAttribute("readnone", 0)) - push!(function_attributes(fn), LLVM.StringAttribute("enzyme_shouldrecompute")) - end - end - - for fname in ("julia.get_pgcstack", "julia.ptls_states", "jl_get_ptls_states") - if haskey(fns, fname) - fn = fns[fname] - # TODO per discussion w keno perhaps this should change to readonly / inaccessiblememonly - push!(function_attributes(fn), LLVM.EnumAttribute("readnone", 0)) - end - end - - for fname in ("julia.pointer_from_objref",) - if haskey(fns, fname) - fn = fns[fname] - push!(function_attributes(fn), LLVM.EnumAttribute("readnone", 0)) - end - end - - for boxfn in ("jl_box_float32", "jl_box_float64", "jl_box_int32", "jl_box_int64", "julia.gc_alloc_obj", "jl_alloc_array_1d", "jl_alloc_array_2d", "jl_alloc_array_3d") - if haskey(fns, boxfn) - fn = fns[boxfn] - push!(return_attributes(fn), LLVM.EnumAttribute("noalias", 0)) - push!(function_attributes(fn), LLVM.EnumAttribute("inaccessiblememonly", 0)) - end - end - - for gc in ("llvm.julia.gc_preserve_begin", "llvm.julia.gc_preserve_end") - if haskey(fns, gc) - fn = fns[gc] - push!(function_attributes(fn), LLVM.EnumAttribute("inaccessiblememonly", 0)) - end - end - - for rfn in ("jl_object_id_", "jl_object_id") - if haskey(fns, rfn) - fn = fns[rfn] - push!(function_attributes(fn), LLVM.EnumAttribute("readonly", 0)) - end - end - - for rfn in ("jl_in_threaded_region_", "jl_in_threaded_region") - if haskey(fns, rfn) - fn = fns[rfn] - push!(function_attributes(fn), LLVM.EnumAttribute("readonly", 0)) - push!(function_attributes(fn), LLVM.EnumAttribute("inaccessiblememonly", 0)) - end - end -end diff --git a/src/pointer_patching.jl b/src/pointer_patching.jl deleted file mode 100644 index 27a44a0..0000000 --- a/src/pointer_patching.jl +++ /dev/null @@ -1,185 +0,0 @@ -function relocation_table!(mod) - i64 = LLVM.IntType(64) - d = IdDict{Any, Tuple{String, LLVM.GlobalVariable}}() - - for func ∈ LLVM.functions(mod), bb ∈ LLVM.blocks(func), inst ∈ LLVM.instructions(bb) - if isa(inst, LLVM.LoadInst) && occursin("inttoptr", string(inst)) - get_pointers!(d, mod, inst) - elseif isa(inst, LLVM.StoreInst) && occursin("inttoptr", string(inst)) - @debug "Relocating StoreInst" inst - get_pointers!(d, mod, inst) - elseif inst isa LLVM.RetInst && occursin("inttoptr", string(inst)) - @debug "Relocating RetInst" inst LLVM.operands(inst) - get_pointers!(d, mod, inst) - elseif isa(inst, LLVM.BitCastInst) && occursin("inttoptr", string(inst)) - @debug "Relocating BitCastInst" inst LLVM.operands(inst) - get_pointers!(d, mod, inst) - elseif isa(inst, LLVM.CallInst) - @debug "Relocating CallInst" inst LLVM.operands(inst) - dest = LLVM.called_value(inst) - if occursin("inttoptr", string(dest)) && length(LLVM.operands(dest)) > 0 - @debug "Relocating CallInst inttoptr" dest LLVM.operands(dest) LLVM.operands(inst) - ptr_arg = first(LLVM.operands(dest)) - ptr_val = convert(Int, ptr_arg) - ptr = Ptr{Cvoid}(ptr_val) - - frames = ccall(:jl_lookup_code_address, Any, (Ptr{Cvoid}, Cint,), ptr, 0) - if length(frames) >= 1 - fn, file, line, linfo, fromC, inlined = last(frames) - fn = string(fn) - if ptr == cglobal(:jl_alloc_array_1d) - fn = "jl_alloc_array_1d" - end - if ptr == cglobal(:jl_alloc_array_2d) - fn = "jl_alloc_array_2d" - end - if ptr == cglobal(:jl_alloc_array_3d) - fn = "jl_alloc_array_3d" - end - if ptr == cglobal(:jl_new_array) - fn = "jl_new_array" - end - if ptr == cglobal(:jl_array_copy) - fn = "jl_array_copy" - end - if ptr == cglobal(:jl_alloc_string) - fn = "jl_alloc_string" - end - if ptr == cglobal(:jl_in_threaded_region) - fn = "jl_in_threaded_region" - end - if ptr == cglobal(:jl_enter_threaded_region) - fn = "jl_enter_threaded_region" - end - if ptr == cglobal(:jl_exit_threaded_region) - fn = "jl_exit_threaded_region" - end - if ptr == cglobal(:jl_set_task_tid) - fn = "jl_set_task_tid" - end - if ptr == cglobal(:jl_new_task) - fn = "jl_new_task" - end - if ptr == cglobal(:malloc) - fn = "malloc" - end - if ptr == cglobal(:memmove) - fn = "memmove" - end - if ptr == cglobal(:jl_array_grow_beg) - fn = "jl_array_grow_beg" - end - if ptr == cglobal(:jl_array_grow_end) - fn = "jl_array_grow_end" - end - if ptr == cglobal(:jl_array_grow_at) - fn = "jl_array_grow_at" - end - if ptr == cglobal(:jl_array_del_beg) - fn = "jl_array_del_beg" - end - if ptr == cglobal(:jl_array_del_end) - fn = "jl_array_del_end" - end - if ptr == cglobal(:jl_array_del_at) - fn = "jl_array_del_at" - end - if ptr == cglobal(:jl_array_ptr) - fn = "jl_array_ptr" - end - if ptr == cglobal(:jl_value_ptr) - fn = "jl_value_ptr" - end - if ptr == cglobal(:jl_get_ptls_states) - fn = "jl_get_ptls_states" - end - if ptr == cglobal(:jl_gc_add_finalizer_th) - fn = "jl_gc_add_finalizer_th" - end - if ptr == cglobal(:jl_symbol_n) - fn = "jl_symbol_n" - end - end - - if length(fn) > 1 && fromC - mod = LLVM.parent(LLVM.parent(LLVM.parent(inst))) - lfn = LLVM.API.LLVMGetNamedFunction(mod, fn) - - if lfn == C_NULL - lfn = LLVM.API.LLVMAddFunction(mod, fn, LLVM.API.LLVMGetCalledFunctionType(inst)) - else - lfn = LLVM.API.LLVMConstBitCast(lfn, LLVM.PointerType(LLVM.FunctionType(LLVM.API.LLVMGetCalledFunctionType(inst)))) - end - LLVM.API.LLVMSetOperand(inst, LLVM.API.LLVMGetNumOperands(inst)-1, lfn) - end - end - get_pointers!(d, mod, inst) - end - end - IdDict{Any, String}(val => name for (val, (name, _)) ∈ d) -end - -function get_pointers!(d, mod, inst) - jl_t = (LLVM.StructType(LLVM.LLVMType[])) - for (i, arg) ∈ enumerate(LLVM.operands(inst)) - if occursin("inttoptr", string(arg)) && arg isa LLVM.ConstantExpr - op1 = LLVM.Value(LLVM.API.LLVMGetOperand(arg, 0)) - if op1 isa LLVM.ConstantExpr - op1 = LLVM.Value(LLVM.API.LLVMGetOperand(op1, 0)) - end - ptr = Ptr{Cvoid}(convert(Int, op1)) - frames = ccall(:jl_lookup_code_address, Any, (Ptr{Cvoid}, Cint,), ptr, 0) - if length(frames) >= 1 - fn, file, line, linfo, fromC, inlined = last(frames) - if (isempty(String(fn)) && isempty(String(file))) || fn == :jl_system_image_data - val = unsafe_pointer_to_objref(ptr) - if val ∈ keys(d) - _, gv = d[val] - LLVM.API.LLVMSetOperand(inst, i-1, gv) - else - gv_name = fix_name(String(gensym(repr(Core.Typeof(val))))) - gv = LLVM.GlobalVariable(mod, llvmeltype(arg), gv_name, LLVM.addrspace(value_type(arg))) - - LLVM.extinit!(gv, true) - LLVM.API.LLVMSetOperand(inst, i-1, gv) - - d[val] = (gv_name, gv) - end - else - @warn "Found data we don't know how to relocate." frames - end - end - end - end -end - -llvmeltype(x::LLVM.Value) = eltype(LLVM.value_type(x)) - -function pointer_patching_diff(f, tt, path1=tempname(), path2=tempname(); show_reloc_table=false) - tm = GPUCompiler.llvm_machine(NativeCompilerTarget()) - job, kwargs = native_job(f, tt, false; name=fix_name(string(nameof(f)))) - #Get LLVM to generated a module of code for us. We don't want GPUCompiler's optimization passes. - mod, meta = GPUCompiler.JuliaContext() do context - GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false, optimize=false) - end - # Use Enzyme's annotation and optimization pipeline - annotate!(mod) - optimize!(mod, tm) - - s1 = string(mod) - write(path1, s1) - - d = StaticCompiler.relocation_table!(mod) - if show_reloc_table - @show d - end - - s2 = string(mod) - write(path2, s2) - - pdiff = run(Cmd(`diff $path1 $path2`, ignorestatus=true)) - pdiff.exitcode == 2 && error("Showing diff caused an error") - nothing -end - - diff --git a/src/pointer_warning.jl b/src/pointer_warning.jl new file mode 100644 index 0000000..9f8f30c --- /dev/null +++ b/src/pointer_warning.jl @@ -0,0 +1,72 @@ +function locate_pointers_and_runtime_calls(mod) + i64 = LLVM.IntType(64) + # d = IdDict{Any, Tuple{String, LLVM.GlobalVariable}}() + for func ∈ LLVM.functions(mod), bb ∈ LLVM.blocks(func), inst ∈ LLVM.instructions(bb) + warned = false + if isa(inst, LLVM.LoadInst) && occursin("inttoptr", string(inst)) + warned = inspect_pointers(mod, inst) + elseif isa(inst, LLVM.StoreInst) && occursin("inttoptr", string(inst)) + @debug "Inspecting StoreInst" inst + warned = inspect_pointers(mod, inst) + elseif inst isa LLVM.RetInst && occursin("inttoptr", string(inst)) + @debug "Inspecting RetInst" inst LLVM.operands(inst) + warned = inspect_pointers(mod, inst) + elseif isa(inst, LLVM.BitCastInst) && occursin("inttoptr", string(inst)) + @debug "Inspecting BitCastInst" inst LLVM.operands(inst) + warned = inspect_pointers(mod, inst) + elseif isa(inst, LLVM.CallInst) + @debug "Inspecting CallInst" inst LLVM.operands(inst) + dest = LLVM.called_operand(inst) + if occursin("inttoptr", string(dest)) && length(LLVM.operands(dest)) > 0 + @debug "Inspecting CallInst inttoptr" dest LLVM.operands(dest) LLVM.operands(inst) + ptr_arg = first(LLVM.operands(dest)) + ptr_val = convert(Int, ptr_arg) + ptr = Ptr{Cvoid}(ptr_val) + + frames = ccall(:jl_lookup_code_address, Any, (Ptr{Cvoid}, Cint,), ptr, 0) + + data_warnings(inst, frames) + warned = true + end + end + if warned + @warn("LLVM function generated warnings due to raw pointers embedded in the code. This will likely cause errors or undefined behaviour.", + func = func) + end + end +end + +function inspect_pointers(mod, inst) + warned = false + jl_t = (LLVM.StructType(LLVM.LLVMType[])) + for (i, arg) ∈ enumerate(LLVM.operands(inst)) + if occursin("inttoptr", string(arg)) && arg isa LLVM.ConstantExpr + op1 = LLVM.Value(LLVM.API.LLVMGetOperand(arg, 0)) + if op1 isa LLVM.ConstantExpr + op1 = LLVM.Value(LLVM.API.LLVMGetOperand(op1, 0)) + end + ptr = Ptr{Cvoid}(convert(Int, op1)) + frames = ccall(:jl_lookup_code_address, Any, (Ptr{Cvoid}, Cint,), ptr, 0) + data_warnings(inst, frames) + warned = true + end + end + warned +end + +data_warnings(inst, frames) = for frame ∈ frames + fn, file, line, linfo, fromC, inlined = frame + @warn("Found pointer references to julia data", + "llvm instruction" = inst, + name = fn, + file = file, + line = line, + fromC = fromC, + inlined = inlined) +end + +llvmeltype(x::LLVM.Value) = eltype(LLVM.value_type(x)) + + + + diff --git a/src/target.jl b/src/target.jl index d9647b4..777960a 100644 --- a/src/target.jl +++ b/src/target.jl @@ -4,7 +4,6 @@ else const method_table = nothing end -const overrides = quote end """ ```julia @@ -25,26 +24,23 @@ macro device_override(ex) error() end code = quote - $GPUCompiler.@override(StaticCompiler.method_table, $ex) - end - if isdefined(Base.Experimental, Symbol("@overlay")) - return esc(code) - else - push!(overrides, code) - return + $Base.Experimental.@overlay($StaticCompiler.method_table, $ex) end + return esc(code) end -Base.@kwdef struct NativeCompilerTarget <: GPUCompiler.AbstractCompilerTarget +Base.@kwdef struct NativeCompilerTarget{MT} <: GPUCompiler.AbstractCompilerTarget triple::String=Sys.MACHINE cpu::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUName()) features::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUFeatures()) + method_table::MT = method_table end -Base.@kwdef struct ExternalNativeCompilerTarget <: GPUCompiler.AbstractCompilerTarget +Base.@kwdef struct ExternalNativeCompilerTarget{MT} <: GPUCompiler.AbstractCompilerTarget triple::String=Sys.MACHINE cpu::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUName()) features::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUFeatures()) + method_table::MT = method_table end module StaticRuntime @@ -72,44 +68,43 @@ for target in (:NativeCompilerTarget, :ExternalNativeCompilerTarget) return tm end - GPUCompiler.runtime_slug(job::GPUCompiler.CompilerJob{$target}) = "native_$(job.config.target.cpu)-$(hash(job.config.target.features))" + GPUCompiler.runtime_slug(job::GPUCompiler.CompilerJob{<:$target}) = "native_$(job.config.target.cpu)-$(hash(job.config.target.features))" - GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{$target}) = StaticRuntime - GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{$target, StaticCompilerParams}) = StaticRuntime + GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{<:$target}) = StaticRuntime + GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{<:$target, StaticCompilerParams}) = StaticRuntime - GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{$target, StaticCompilerParams}) = true - GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{$target}) = true + GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{<:$target, StaticCompilerParams}) = true + GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{<:$target}) = true - GPUCompiler.get_interpreter(job::GPUCompiler.CompilerJob{$target, StaticCompilerParams}) = + GPUCompiler.get_interpreter(job::GPUCompiler.CompilerJob{<:$target, StaticCompilerParams}) = StaticInterpreter(job.config.params.cache, GPUCompiler.method_table(job), job.world, - GPUCompiler.inference_params(job), GPUCompiler.optimization_params(job), - job.config.params.mixtape) - GPUCompiler.ci_cache(job::GPUCompiler.CompilerJob{$target, StaticCompilerParams}) = job.config.params.cache + GPUCompiler.inference_params(job), GPUCompiler.optimization_params(job)) + GPUCompiler.ci_cache(job::GPUCompiler.CompilerJob{<:$target, StaticCompilerParams}) = job.config.params.cache + GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{<:$target})) = job.config.target.method_table end end -GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{ExternalNativeCompilerTarget})) = method_table -GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{ExternalNativeCompilerTarget, StaticCompilerParams})) = method_table - function native_job(@nospecialize(func::Function), @nospecialize(types::Type), external::Bool; - mixtape = NoContext(), name = fix_name(func), kernel::Bool = false, - target = (), + target = (;), + method_table=method_table, kwargs... ) + target = merge(target, (;method_table)) source = methodinstance(typeof(func), Base.to_tuple_type(types)) target = external ? ExternalNativeCompilerTarget(;target...) : NativeCompilerTarget(;target...) - params = StaticCompilerParams(mixtape = mixtape) + params = StaticCompilerParams() config = GPUCompiler.CompilerConfig(target, params, name = name, kernel = kernel) StaticCompiler.CompilerJob(source, config), kwargs end -function native_job(@nospecialize(func), @nospecialize(types), external; mixtape = NoContext(), kernel::Bool=false, name=fix_name(repr(func)), target = (), kwargs...) +function native_job(@nospecialize(func), @nospecialize(types), external; kernel::Bool=false, name=fix_name(repr(func)), target = (;), method_table=method_table, kwargs...) + target = merge(target, (; method_table)) source = methodinstance(typeof(func), Base.to_tuple_type(types)) target = external ? ExternalNativeCompilerTarget(;target...) : NativeCompilerTarget(;target...) - params = StaticCompilerParams(mixtape = mixtape) + params = StaticCompilerParams() config = GPUCompiler.CompilerConfig(target, params, name = name, kernel = kernel) GPUCompiler.CompilerJob(source, config), kwargs end diff --git a/test/Project.toml b/test/Project.toml index cc88d4e..5498846 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,5 +1,4 @@ [deps] -CodeInfoTools = "bc773b8a-8374-437a-b9f2-0e9785855863" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0" GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55" @@ -14,3 +13,7 @@ StaticTools = "86c06d3c-3f03-46de-9781-57580aa96d0a" StrideArraysCore = "7792a7ef-975c-4747-a70f-980b88e8d1da" TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Bumper = "8ce10254-0962-460f-a3d8-1f77fea1446e" + +[compat] +Bumper = "0.5.1" \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 697ef7e..542659c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -7,19 +7,19 @@ using ManualMemory using Distributed using StaticTools using StrideArraysCore -using CodeInfoTools using MacroTools using LLD_jll +using Bumper addprocs(1) @everywhere using StaticCompiler, StrideArraysCore const GROUP = get(ENV, "GROUP", "All") -@static if GROUP == "Core" || GROUP == "All" +if GROUP == "Core" || GROUP == "All" include("testcore.jl") end -@static if GROUP == "Integration" || GROUP == "All" +if GROUP == "Integration" || GROUP == "All" include("testintegration.jl") end diff --git a/test/testcore.jl b/test/testcore.jl index 823729c..e705ed6 100644 --- a/test/testcore.jl +++ b/test/testcore.jl @@ -1,234 +1,9 @@ workdir = tempdir() -# workdir = "./" # For debugging -remote_load_call(path, args...) = fetch(@spawnat 2 load_function(path)(args...)) - -@testset "Basics" begin - - simple_sum(x) = x + one(typeof(x)) - - # This probably needs a macro - for T ∈ (Int, Float64, Int32, Float32, Int16, Float16) - _, path, = compile(simple_sum, (T,)) - @test remote_load_call(path, T(1)) == T(2) - end -end fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) # This needs to be defined globally due to https://github.com/JuliaLang/julia/issues/40990 -@testset "Recursion" begin - _, path = compile(fib, (Int,)) - @test remote_load_call(path, 10) == fib(10) - - # Trick to work around #40990 - _fib2(_fib2, n) = n <= 1 ? n : _fib2(_fib2, n-1) + _fib2(_fib2, n-2) - fib2(n) = _fib2(_fib2, n) - - _, path = compile(fib2, (Int,)) - @test remote_load_call(path, 20) == fib(20) - #@test compile(fib2, (Int,))[1](20) == fib(20) -end - - -@testset "Loops" begin - function sum_first_N_int(N) - s = 0 - for a in 1:N - s += a - end - s - end - _, path = compile(sum_first_N_int, (Int,)) - @test remote_load_call(path, 10) == 55 - - function sum_first_N_float64(N) - s = Float64(0) - for a in 1:N - s += Float64(a) - end - s - end - _, path = compile(sum_first_N_float64, (Int,)) - @test remote_load_call(path, 10) == 55. - - function sum_first_N_int_inbounds(N) - s = 0 - @inbounds for a in 1:N - s += a - end - s - end - _, path = compile(sum_first_N_int_inbounds, (Int,)) - @test remote_load_call(path, 10) == 55 - - function sum_first_N_float64_inbounds(N) - s = Float64(0) - @inbounds for a in 1:N - s += Float64(a) - end - s - end - _, path = compile(sum_first_N_float64_inbounds, (Int,)) - @test remote_load_call(path, 10) == 55. -end - -# Arrays with different input types Int32, Int64, Float32, Float64, Complex? -@testset "Arrays" begin - function array_sum(n, A) - s = zero(eltype(A)) - for i in 1:n - s += A[i] - end - s - end - for T ∈ (Int, Complex{Float32}, Complex{Float64}) - _, path = compile(array_sum, (Int, Vector{T})) - @test remote_load_call(path, 10, T.(1:10)) == T(55) - end -end - -@testset "Array allocations" begin - function f(N) - v = Vector{Float64}(undef, N) - for i ∈ eachindex(v) - v[i] = i*i - end - v - end - _, path = compile(f, (Int,)) - @test remote_load_call(path, 5) == [1.0, 4.0, 9.0, 16.0, 25.0] -end - -# This is also a good test of loading and storing from the same object -@testset "Load & Store Same object" begin - global const x = Ref(0) - counter() = x[] += 1 - _, path = compile(counter, ()) - @spawnat 2 global counter = load_function(path) - @test fetch(@spawnat 2 counter()) == 1 - @test fetch(@spawnat 2 counter()) == 2 -end - -# This is also a good test of loading and storing from the same object -counter = let x = Ref(0) - () -> x[] += 1 -end -@testset "Closures" begin - #this currently segfaults during compilation - @test_skip begin - _, path = compile(counter, ()) - @spawnat 2 global counter_comp = load_function(path) - @test fetch(@spawnat 2 counter_comp()) == 1 - @test fetch(@spawnat 2 counter_comp()) == 2 - end -end - - -@testset "Error handling" begin - _, path = compile(sqrt, (Int,)) - tsk = @spawnat 2 begin - try - load_function(path)(-1) - catch e; - e - end - end - @test fetch(tsk) isa DomainError -end - -# Julia wants to treat Tuple (and other things like it) as plain bits, but LLVM wants to treat it as something with a pointer. -# We need to be careful to not send, nor receive an unwrapped Tuple to a compiled function. -# The interface made in `compile` should handle this fine. -@testset "Send and receive Tuple" begin - foo(u::Tuple) = 2 .* reverse(u) .- 1 - - _, path = compile(foo, (NTuple{3, Int},)) - @test remote_load_call(path, (1, 2, 3)) == (5, 3, 1) -end - - -# Just to call external libraries -@testset "BLAS" begin - function mydot(a::Vector{Float64}) - N = length(a) - BLAS.dot(N, a, 1, a, 1) - end - a = [1.0, 2.0] - - mydot_compiled, path = compile(mydot, (Vector{Float64},)) - # Works locally for me, but not on CI. Need some improvements to pointer relocation to be robust. - @test_skip remote_load_call(path, a) == 5.0 - @test mydot_compiled(a) ≈ 5.0 - - # This will need some more work apparently - @test_skip begin - _, path = compile((*), (Matrix{Float64}, Matrix{Float64})) - A, B = rand(10, 11), rand(11, 12) - @test remote_load_call(path, A, B) ≈ A * B - end -end - - -@testset "Strings" begin - function hello(name) - "Hello, " * name * "!" - end - hello_compiled, path = compile(hello, (String,)) - @test remote_load_call(path, "world") == "Hello, world!" - - # We'll need to be able to relocate a bunch of UV stuff for this, and deal with dynamic dispatch. - @test_skip begin - function hello(N) - println("Hello World $N") - N - end - - hello_compiled, path = compile(hello, (Int,)) - @test_skip remote_load_call(path, 1) == 1 - end -end - -# This is a trick to get stack allocated arrays inside a function body (so long as they don't escape). -# This lets us have intermediate, mutable stack allocated arrays inside our -@testset "Alloca" begin - function f(N) - # this can hold at most 100 Int values, if you use it for more, you'll segfault - buf = ManualMemory.MemoryBuffer{100, Int}(undef) - GC.@preserve buf begin - # wrap the first N values in a PtrArray - arr = PtrArray(pointer(buf), (N,)) - arr .= 1 # mutate the array to be all 1s - sum(arr) # compute the sum. It is very imporatant that no references to arr escape the function body - end - end - _, path = compile(f, (Int,)) - @test remote_load_call(path, 20) == 20 -end - -# I can't beleive this works. -@testset "LoopVectorization" begin - function mul!(C, A, B) - # note: @tturbo does NOT work - @turbo for n ∈ indices((C,B), 2), m ∈ indices((C,A), 1) - Cmn = zero(eltype(C)) - for k ∈ indices((A,B), (2,1)) - Cmn += A[m,k] * B[k,n] - end - C[m,n] = Cmn - end - end - - C = Array{Float64}(undef, 10, 12) - A = rand(10, 11) - B = rand(11, 12) - - _, path = compile(mul!, (Matrix{Float64}, Matrix{Float64}, Matrix{Float64},)) - # remote_load_call(path, C, A, B) This won't work because @spawnat copies C - C .= fetch(@spawnat 2 (load_function(path)(C, A, B); C)) - @test C ≈ A*B -end - @testset "Standalone Dylibs" begin # Test function # (already defined) @@ -349,3 +124,35 @@ end @test ccall(fptr, Float64, (Float64,), 10.) == squaresquaresquare(10.) #Compile dylib end + + +# Overlays + +module SubFoo + +rand(args...) = Base.rand(args...) + +function f() + x = rand() + y = rand() + return x + y +end + +end + +@device_override SubFoo.rand() = 2 + +# Lets test having another method table around +Base.Experimental.@MethodTable AnotherTable +Base.Experimental.@overlay AnotherTable SubFoo.rand() = 3 + +@testset "Overlays" begin + Libdl.dlopen(compile_shlib(SubFoo.f, (), workdir)) do lib + fptr = Libdl.dlsym(lib, "f") + @test @ccall($fptr()::Int) == 4 + end + Libdl.dlopen(compile_shlib(SubFoo.f, (), workdir; method_table=AnotherTable)) do lib + fptr = Libdl.dlsym(lib, "f") + @test @ccall($fptr()::Int) == 6 + end +end diff --git a/test/testintegration.jl b/test/testintegration.jl index ba4d785..3422e63 100644 --- a/test/testintegration.jl +++ b/test/testintegration.jl @@ -3,6 +3,34 @@ testpath = pwd() scratch = tempdir() cd(scratch) +if VERSION >= v"1.9" + # Bumper uses PackageExtensions to work with StaticCompiler, so let's just skip this test on 1.8 + function bumper_test(N::Int) + buf = AllocBuffer(MallocVector, sizeof(Float64) * N) + s = 0.0 + for i ∈ 1:N + # some excuse to reuse the same memory a bunch of times + @no_escape buf begin + v = @alloc(Float64, N) + v .= i + s += sum(v) + end + end + free(buf) + s + end + + @testset "Bumper.jl integration" begin + + path = compile_shlib(bumper_test, (Int,), "./") + ptr = Libdl.dlopen(path, Libdl.RTLD_LOCAL) + + fptr = Libdl.dlsym(ptr, "bumper_test") + + @test bumper_test(8) == @ccall($fptr(8::Int)::Float64) + end +end + @testset "Standalone Executable Integration" begin jlpath = joinpath(Sys.BINDIR, Base.julia_exename()) # Get path to julia executable @@ -105,7 +133,7 @@ cd(scratch) @warn "Could not compile $testpath/scripts/randn_matrix.jl" println(e) end - @static if Sys.isbsd() + if Sys.isbsd() @test isa(status, Base.Process) @test isa(status, Base.Process) && status.exitcode == 0 end @@ -119,14 +147,14 @@ cd(scratch) @warn "Could not run $(scratch)/randn_matrix" println(e) end - @static if Sys.isbsd() + if Sys.isbsd() @test isa(status, Base.Process) @test isa(status, Base.Process) && status.exitcode == 0 end end ## --- Test LoopVectorization integration - @static if LoopVectorization.VectorizationBase.has_feature(Val{:x86_64_avx2}) + if Bool(LoopVectorization.VectorizationBase.has_feature(Val{:x86_64_avx2})) let # Compile... status = -1 @@ -280,119 +308,33 @@ cd(scratch) ## --- Test interop - @static if Sys.isbsd() - let - # Compile... - status = -1 - try - isfile("interop") && rm("interop") - status = run(`$jlpath --startup=no --compile=min $testpath/scripts/interop.jl`) - catch e - @warn "Could not compile $testpath/scripts/interop.jl" - println(e) - end - @test isa(status, Base.Process) - @test isa(status, Base.Process) && status.exitcode == 0 - - # Run... - println("Interop:") - status = -1 - try - status = run(`./interop`) - catch e - @warn "Could not run $(scratch)/interop" - println(e) - end - @test isa(status, Base.Process) - @test isa(status, Base.Process) && status.exitcode == 0 - end - end - -end - -# Mixtape - -module SubFoo - -function f() - x = rand() - y = rand() - return x + y -end - -function stringfun(s1, s2) - return s1 * s2 -end - -function teststring() - return stringfun("ab", "c") == "abc" -end - -end - -struct MyMix <: CompilationContext end - -@testset "Mixtape" begin - # 101: How2Mix - - # A few little utility functions for working with Expr instances. - swap(e) = e - function swap(e::Expr) - new = MacroTools.postwalk(e) do s - isexpr(s, :call) || return s - s.args[1] == Base.rand || return s - return 4 - end - return new - end - - # This is pre-inference - you get to see a CodeInfoTools.Builder instance. - function StaticCompiler.transform(::MyMix, src) - b = CodeInfoTools.Builder(src) - for (v, st) in b - b[v] = swap(st) - end - return CodeInfoTools.finish(b) - end + if Sys.isbsd() + let + # Compile... + status = -1 + try + isfile("interop") && rm("interop") + status = run(`$jlpath --startup=no --compile=min $testpath/scripts/interop.jl`) + catch e + @warn "Could not compile $testpath/scripts/interop.jl" + println(e) + end + @test isa(status, Base.Process) + @test isa(status, Base.Process) && status.exitcode == 0 - # MyMix will only transform functions which you explicitly allow. - # You can also greenlight modules. - StaticCompiler.allow(ctx::MyMix, m::Module) = m == SubFoo - - _, path = compile(SubFoo.f, (), mixtape = MyMix()) - @test load_function(path)() == 8 - @test SubFoo.f() != 8 - - # redefine swap to test caching and add StaticString substitution - function swap(e::Expr) - new = MacroTools.postwalk(e) do s - s isa String && return StaticTools.StaticString(tuple(codeunits(s)..., 0x00)) - isexpr(s, :call) || return s - s.args[1] == Base.rand || return s - return 2 + # Run... + println("Interop:") + status = -1 + try + status = run(`./interop`) + catch e + @warn "Could not run $(scratch)/interop" + println(e) + end + @test isa(status, Base.Process) + @test isa(status, Base.Process) && status.exitcode == 0 end - return new end - _, path = compile(SubFoo.f, (), mixtape = MyMix()) - @test load_function(path)() == 4 - - _, path = compile(SubFoo.teststring, (), mixtape = MyMix()) - @test load_function(path)() - -end - -@testset "Cross compiling to WebAssembly" begin - testpath = pwd() - scratch = tempdir() - cd(scratch) - - m2(x) = 2x - m3(x) = 3x - wasm_path = compile_wasm(m2, Tuple{Float64}) - wasm_path2 = compile_wasm([(m2, Tuple{Float64}), (m3, Tuple{Float64})]) - - wasm_path = compile_wasm(m2, (Float64,)) - wasm_path2 = compile_wasm([(m2, (Float64,)), (m3, (Float64,))]) end From b66e19139b25f831b9495dad8e12cbd4225a55e9 Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Sat, 11 Nov 2023 23:04:24 +0100 Subject: [PATCH 135/159] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d4816cb..d7c4ed0 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ julia> f(1) # Gives the expected answer in regular julia julia> dlopen(compile_shlib(f, (Int,), "./")) do lib fptr = dlsym(lib, "f") - # Now use the compiled version where + is replaced with - + # Now use the compiled version where g(x) = 2x is replaced with g(x) = x - 10 @ccall $fptr(1::Int)::Int end -8 From ae99357b65ad9c63e46858a07c5e919bee73ff6e Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 13 Nov 2023 12:50:27 +0100 Subject: [PATCH 136/159] CompatHelper: bump compat for GPUCompiler to 0.25, (keep existing compat) (#147) * CompatHelper: bump compat for GPUCompiler to 0.25, (keep existing compat) * trigger CI --------- Co-authored-by: CompatHelper Julia Co-authored-by: Mason Protter --- Project.toml | 2 +- src/StaticCompiler.jl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 4b32fd7..ac268d6 100644 --- a/Project.toml +++ b/Project.toml @@ -17,7 +17,7 @@ StaticTools = "86c06d3c-3f03-46de-9781-57580aa96d0a" [compat] CodeInfoTools = "0.3" -GPUCompiler = "0.21, 0.22, 0.23, 0.24" +GPUCompiler = "0.21, 0.22, 0.23, 0.24, 0.25" LLVM = "6" MacroTools = "0.5" StaticTools = "0.8" diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index a44af51..41995f3 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -98,6 +98,7 @@ Hello, world! function compile_executable(f::Function, types=(), path::String="./", name=fix_name(f); also_expose=Tuple{Function, Tuple{DataType}}[], kwargs...) + compile_executable(vcat([(f, types)], also_expose), path, name; kwargs...) end From 808c9f9fb786b5f9757f543c92d4820909820a86 Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Mon, 13 Nov 2023 12:51:20 +0100 Subject: [PATCH 137/159] bump version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index ac268d6..6be72b4 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short and contributors"] -version = "0.6" +version = "0.6.1" [deps] Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" From b11a0f78d5a61f4cf8f21821d40e7db0a08d1b66 Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Tue, 14 Nov 2023 08:18:08 +0100 Subject: [PATCH 138/159] Delete src/mixtape.jl --- src/mixtape.jl | 77 -------------------------------------------------- 1 file changed, 77 deletions(-) delete mode 100644 src/mixtape.jl diff --git a/src/mixtape.jl b/src/mixtape.jl deleted file mode 100644 index 5e6098a..0000000 --- a/src/mixtape.jl +++ /dev/null @@ -1,77 +0,0 @@ - -##### -##### Exports -##### - -export CompilationContext, - NoContext, - allow, - transform - -##### -##### Compilation context -##### - -# User-extended context allows parametrization of the pipeline through -# our subtype of AbstractInterpreter -abstract type CompilationContext end - -struct NoContext <: CompilationContext end - -@doc( -""" - abstract type CompilationContext end - -Parametrize the Mixtape pipeline by inheriting from `CompilationContext`. Similar to the context objects in [Cassette.jl](https://julia.mit.edu/Cassette.jl/stable/contextualdispatch.html). By using the interface methods [`transform`](@ref) and [`optimize!`](@ref) -- the user can control different parts of the compilation pipeline. -""", CompilationContext) - -transform(ctx::CompilationContext, b) = b -transform(ctx::CompilationContext, b, sig) = transform(ctx, b) - -@doc( -""" - transform(ctx::CompilationContext, b::Core.CodeInfo)::Core.CodeInfo - transform(ctx::CompilationContext, b::Core.CodeInfo, sig::Tuple)::Core.CodeInfo - -User-defined transform which operates on lowered `Core.CodeInfo`. There's two versions: (1) ignores the signature of the current method body under consideration and (2) provides the signature as `sig`. - -Transforms might typically follow a simple "swap" format using `CodeInfoTools.Builder`: - -```julia -function transform(::MyCtx, src) - b = CodeInfoTools.Builder(b) - for (k, st) in b - b[k] = swap(st)) - end - return CodeInfoTools.finish(b) -end -``` - -but more advanced formats are possible. For further utilities, please see [CodeInfoTools.jl](https://github.com/JuliaCompilerPlugins/CodeInfoTools.jl). -""", transform) - - -allow(f::C, args...) where {C <: CompilationContext} = false -function allow(ctx::CompilationContext, mod::Module, fn, args...) - return allow(ctx, mod) || allow(ctx, fn, args...) -end - -@doc( -""" - allow(f::CompilationContext, args...)::Bool - -Determines whether the user-defined [`transform`](@ref) and [`optimize!`](@ref) are allowed to look at a lowered `Core.CodeInfo` or `Core.Compiler.IRCode` instance. - -The user is allowed to greenlight modules: - -```julia -allow(::MyCtx, m::Module) == m == SomeModule -``` - -or even specific signatures - -```julia -allow(::MyCtx, fn::typeof(rand), args...) = true -``` -""", allow) - From 34ef8bbcaa507368a630f5a4ac297e1d40366754 Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Thu, 16 Nov 2023 09:18:58 +0100 Subject: [PATCH 139/159] Add some device overrides for Bumper.jl (#149) * Update quirks.jl * Update Project.toml --- Project.toml | 2 +- src/quirks.jl | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 6be72b4..4be5322 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short and contributors"] -version = "0.6.1" +version = "0.6.2" [deps] Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" diff --git a/src/quirks.jl b/src/quirks.jl index ffa25ac..65ebc7b 100644 --- a/src/quirks.jl +++ b/src/quirks.jl @@ -44,3 +44,15 @@ end # trig.jl @device_override @noinline Base.Math.sincos_domain_error(x) = @print_and_throw c"sincos(x) is only defined for finite x." + +@static if isdefined(StaticTools, :Bumper) + Bumper = StaticTools.Bumper + @device_override @noinline Bumper.AllocBufferImpl.oom_error() = + @print_and_throw c"alloc: Buffer out of memory. This might be a sign of a memory leak." + @device_override @noinline Bumper.Internals.esc_err() = + @print_and_throw c"Tried to return a PtrArray from a `no_escape` block. If you really want to do this, evaluate Bumper.allow_ptrarray_to_escape() = true" + + # Just to make the compiler's life a little easier, let's not make it fetch and elide the current task + # since tasks don't actually exist on-device. + @device_override Bumper.Internals.get_task() = 0 +end From 8c46be1e7a821271d4146ffaa2b8ad86e8bdd25e Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Tue, 28 Nov 2023 17:00:50 -0300 Subject: [PATCH 140/159] Refactor code to make it more amenable to cross compilation Also cleanup some of it --- src/StaticCompiler.jl | 77 +++++++++++++++------------ src/target.jl | 114 ++++++++++++++++++++++++---------------- test/scripts/interop.jl | 2 +- 3 files changed, 111 insertions(+), 82 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 41995f3..6bca325 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -12,9 +12,10 @@ using LLD_jll: lld using StaticTools using StaticTools: @symbolcall, @c_str, println using Core: MethodTable +using Base:BinaryPlatforms.Platform, BinaryPlatforms.HostPlatform, BinaryPlatforms.arch, BinaryPlatforms.os, BinaryPlatforms.libc_str export load_function, compile_shlib, compile_executable -export native_code_llvm, native_code_typed, native_llvm_module, native_code_native +export static_code_llvm, static_code_typed, static_llvm_module, static_code_native export @device_override, @print_and_throw include("interpreter.jl") @@ -32,6 +33,7 @@ compile_executable(f::Function, types::Tuple, path::String, [name::String=string filename::String=name, cflags=``, # Specify libraries you would like to link against, and other compiler options here also_expose=[], + target::StaticTarget=StaticTarget(), method_table=StaticCompiler.method_table, kwargs... ) @@ -96,16 +98,16 @@ Hello, world! ``` """ function compile_executable(f::Function, types=(), path::String="./", name=fix_name(f); - also_expose=Tuple{Function, Tuple{DataType}}[], + also_expose=Tuple{Function, Tuple{DataType}}[], target::StaticTarget=StaticTarget(), kwargs...) - - compile_executable(vcat([(f, types)], also_expose), path, name; kwargs...) + compile_executable(vcat([(f, types)], also_expose), path, name; target, kwargs...) end function compile_executable(funcs::Union{Array,Tuple}, path::String="./", name=fix_name(first(first(funcs))); filename = name, demangle = true, cflags = ``, + target::StaticTarget=StaticTarget(), kwargs... ) @@ -114,12 +116,12 @@ function compile_executable(funcs::Union{Array,Tuple}, path::String="./", name=f isexecutableargtype = tt == Tuple{} || tt == Tuple{Int, Ptr{Ptr{UInt8}}} isexecutableargtype || @warn "input type signature $types should be either `()` or `(Int, Ptr{Ptr{UInt8}})` for standard executables" - rt = last(only(native_code_typed(f, tt; kwargs...))) + rt = last(only(static_code_typed(f, tt; target, kwargs...))) isconcretetype(rt) || error("`$f$types` did not infer to a concrete type. Got `$rt`") nativetype = isprimitivetype(rt) || isa(rt, Ptr) nativetype || @warn "Return type `$rt` of `$f$types` does not appear to be a native type. Consider returning only a single value of a native machine type (i.e., a single float, int/uint, bool, or pointer). \n\nIgnoring this warning may result in Undefined Behavior!" - generate_executable(funcs, path, name, filename; demangle, cflags, kwargs...) + generate_executable(funcs, path, name, filename; demangle, cflags, target, kwargs...) joinpath(abspath(path), filename) end @@ -129,6 +131,7 @@ compile_shlib(f::Function, types::Tuple, [path::String="./"], [name::String=stri filename::String=name, cflags=``, method_table=StaticCompiler.method_table, + target::StaticTarget=StaticTarget(), kwargs...) compile_shlib(funcs::Array, [path::String="./"]; @@ -136,6 +139,7 @@ compile_shlib(funcs::Array, [path::String="./"]; demangle=true, cflags=``, method_table=StaticCompiler.method_table, + target::StaticTarget=StaticTarget(), kwargs...) ``` As `compile_executable`, but compiling to a standalone `.dylib`/`.so` shared library. @@ -169,15 +173,17 @@ julia> ccall(("test", "test.dylib"), Float64, (Int64,), 100_000) """ function compile_shlib(f::Function, types=(), path::String="./", name=fix_name(f); filename=name, + target::StaticTarget=StaticTarget(), kwargs... ) - compile_shlib(((f, types),), path; filename, kwargs...) + compile_shlib(((f, types),), path; filename, target, kwargs...) end # As above, but taking an array of functions and returning a single shlib function compile_shlib(funcs::Union{Array,Tuple}, path::String="./"; filename = "libfoo", demangle = true, cflags = ``, + target::StaticTarget=StaticTarget(), kwargs... ) for func in funcs @@ -185,17 +191,17 @@ function compile_shlib(funcs::Union{Array,Tuple}, path::String="./"; tt = Base.to_tuple_type(types) isconcretetype(tt) || error("input type signature `$types` is not concrete") - rt = last(only(native_code_typed(f, tt))) + rt = last(only(static_code_typed(f, tt; target, kwargs...))) isconcretetype(rt) || error("`$f$types` did not infer to a concrete type. Got `$rt`") nativetype = isprimitivetype(rt) || isa(rt, Ptr) nativetype || @warn "Return type `$rt` of `$f$types` does not appear to be a native type. Consider returning only a single value of a native machine type (i.e., a single float, int/uint, bool, or pointer). \n\nIgnoring this warning may result in Undefined Behavior!" end - generate_shlib(funcs, true, path, filename; demangle, cflags, kwargs...) + generate_shlib(funcs, path, filename; demangle, cflags, target, kwargs...) joinpath(abspath(path), filename * "." * Libdl.dlext) end - + """ ```julia @@ -281,12 +287,11 @@ generate_executable(f, tt, args...; kwargs...) = generate_executable(((f, tt),), function generate_executable(funcs::Union{Array,Tuple}, path=tempname(), name=fix_name(first(first(funcs))), filename=name; demangle = true, cflags = ``, + target::StaticTarget=StaticTarget(), kwargs... ) - lib_path = joinpath(path, "$filename.$(Libdl.dlext)") exec_path = joinpath(path, filename) - external = true - _, obj_path = generate_obj(funcs, external, path, filename; demangle, kwargs...) + _, obj_path = generate_obj(funcs, path, filename; demangle, target, kwargs...) # Pick a compiler cc = Sys.isapple() ? `cc` : clang() # Compile! @@ -318,8 +323,8 @@ end """ ```julia -generate_shlib(f::Function, tt, [external::Bool=true], [path::String], [name], [filename]; kwargs...) -generate_shlib(funcs::Array, [external::Bool=true], [path::String], [filename::String]; demangle=true, kwargs...) +generate_shlib(f::Function, tt, [path::String], [name], [filename]; kwargs...) +generate_shlib(funcs::Array, [path::String], [filename::String]; demangle=true, target::StaticTarget=StaticTarget(), kwargs...) ``` Low level interface for compiling a shared object / dynamically loaded library (`.so` / `.dylib`) for function `f` given a tuple type `tt` characterizing @@ -356,19 +361,20 @@ julia> ccall(("test", "example/test.dylib"), Float64, (Int64,), 100_000) 5.2564961094956075 ``` """ -function generate_shlib(f::Function, tt, external::Bool=true, path::String=tempname(), name=fix_name(f), filename=name; kwargs...) - generate_shlib(((f, tt),), external, path, filename; kwargs...) +function generate_shlib(f::Function, tt, path::String=tempname(), name=fix_name(f), filename=name; target=StaticTarget(), kwargs...) + generate_shlib(((f, tt),), path, filename; target, kwargs...) end # As above, but taking an array of functions and returning a single shlib -function generate_shlib(funcs::Union{Array,Tuple}, external::Bool=true, path::String=tempname(), filename::String="libfoo"; +function generate_shlib(funcs::Union{Array,Tuple}, path::String=tempname(), filename::String="libfoo"; demangle = true, cflags = ``, + target::StaticTarget=StaticTarget(), kwargs... ) lib_path = joinpath(path, "$filename.$(Libdl.dlext)") - _, obj_path = generate_obj(funcs, external, path, filename; demangle, kwargs...) + _, obj_path = generate_obj(funcs, path, filename; target, demangle, kwargs...) # Pick a Clang cc = Sys.isapple() ? `cc` : clang() # Compile! @@ -377,27 +383,27 @@ function generate_shlib(funcs::Union{Array,Tuple}, external::Bool=true, path::St path, name end -function native_code_llvm(@nospecialize(func), @nospecialize(types); kwargs...) - job, kwargs = native_job(func, types, true; kwargs...) +function static_code_llvm(@nospecialize(func), @nospecialize(types); target::StaticTarget=StaticTarget(), kwargs...) + job, kwargs = static_job(func, types; target, kwargs...) GPUCompiler.code_llvm(stdout, job; kwargs...) end -function native_code_typed(@nospecialize(func), @nospecialize(types); kwargs...) - job, kwargs = native_job(func, types, true; kwargs...) +function static_code_typed(@nospecialize(func), @nospecialize(types); target::StaticTarget=StaticTarget(), kwargs...) + job, kwargs = static_job(func, types; target, kwargs...) GPUCompiler.code_typed(job; kwargs...) end -function native_code_native(@nospecialize(f), @nospecialize(tt), fname=fix_name(f); kwargs...) - job, kwargs = native_job(f, tt, true; fname, kwargs...) +function static_code_native(@nospecialize(f), @nospecialize(tt), fname=fix_name(f); target::StaticTarget=StaticTarget(), kwargs...) + job, kwargs = static_job(f, tt; fname, target, kwargs...) GPUCompiler.code_native(stdout, job; kwargs...) end # Return an LLVM module -function native_llvm_module(f, tt, name=fix_name(f); demangle, kwargs...) +function static_llvm_module(f, tt, name=fix_name(f); demangle, target::StaticTarget=StaticTarget(), kwargs...) if !demangle name = "julia_"*name end - job, kwargs = native_job(f, tt, true; name, kwargs...) + job, kwargs = static_job(f, tt; name, target, kwargs...) m = GPUCompiler.JuliaContext() do context m, _ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) locate_pointers_and_runtime_calls(m) @@ -407,14 +413,14 @@ function native_llvm_module(f, tt, name=fix_name(f); demangle, kwargs...) end #Return an LLVM module for multiple functions -function native_llvm_module(funcs::Union{Array,Tuple}; demangle=true, kwargs...) +function static_llvm_module(funcs::Union{Array,Tuple}; demangle=true, target::StaticTarget=StaticTarget(), kwargs...) f,tt = funcs[1] mod = GPUCompiler.JuliaContext() do context name_f = fix_name(f) if !demangle name_f = "julia_"*name_f end - job, kwargs = native_job(f, tt, true; name = name_f, kwargs...) + job, kwargs = static_job(f, tt; name = name_f, target, kwargs...) mod,_ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) if length(funcs) > 1 for func in funcs[2:end] @@ -423,7 +429,7 @@ function native_llvm_module(funcs::Union{Array,Tuple}; demangle=true, kwargs...) if !demangle name_f = "julia_"*name_f end - job, kwargs = native_job(f, tt, true; name = name_f, kwargs...) + job, kwargs = static_job(f, tt; name = name_f, target, kwargs...) tmod,_ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) link!(mod,tmod) end @@ -458,7 +464,7 @@ end """ ```julia -generate_obj(f, tt, external::Bool, path::String = tempname(), filenamebase::String="obj"; +generate_obj(f, tt, path::String = tempname(), filenamebase::String="obj"; target = (), demangle = true, strip_llvm = false, @@ -498,7 +504,7 @@ end """ ```julia -generate_obj(funcs::Union{Array,Tuple}, external::Bool, path::String = tempname(), filenamebase::String="obj"; +generate_obj(funcs::Union{Array,Tuple}, path::String = tempname(), filenamebase::String="obj"; target = (), demangle =false, strip_llvm = false, @@ -514,18 +520,19 @@ which will be compiled. This is a named tuple with fields `triple`, `cpu`, and `features` (each of these are strings). The defaults compile to the native target. """ -function generate_obj(funcs::Union{Array,Tuple}, external::Bool, path::String = tempname(), filenamebase::String="obj"; +function generate_obj(funcs::Union{Array,Tuple}, path::String = tempname(), filenamebase::String="obj"; demangle = true, strip_llvm = false, strip_asm = true, opt_level = 3, + target::StaticTarget=StaticTarget(), kwargs...) f, tt = funcs[1] mkpath(path) obj_path = joinpath(path, "$filenamebase.o") - mod = native_llvm_module(funcs; demangle, kwargs...) + mod = static_llvm_module(funcs; demangle, kwargs...) obj = GPUCompiler.JuliaContext() do ctx - fakejob, _ = native_job(f, tt, external; kwargs...) + fakejob, _ = static_job(f, tt; target, kwargs...) obj, _ = GPUCompiler.emit_asm(fakejob, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) obj end diff --git a/src/target.jl b/src/target.jl index 777960a..2f14e2b 100644 --- a/src/target.jl +++ b/src/target.jl @@ -4,6 +4,31 @@ else const method_table = nothing end +""" +```julia + StaticTarget() # Native target + StaticTarget(platform::Base.BinaryPlatforms.Platform) # Specific target with generic CPU + StaticTarget(platform::Platform, cpu::String) # Specific target with specific CPU + StaticTarget(platform::Platform, cpu::String, features::String) # Specific target with specific CPU and features +``` +Struct that defines a target for the compilation +Beware that currently the compilation assumes that the code is on the host so platform specific code like: +```julia + Sys.isapple() ... +``` +does not behave as expected. +By default `StaticTarget()` is the native target. +""" +struct StaticTarget + platform::Platform + tm::LLVM.TargetMachine +end + +clean_triple(platform::Platform) = arch(platform) * "-" * os(platform) * libc_str(platform) +StaticTarget() = StaticTarget(HostPlatform(), unsafe_string(LLVM.API.LLVMGetHostCPUName()), unsafe_string(LLVM.API.LLVMGetHostCPUFeatures())) +StaticTarget(platform::Platform) = StaticTarget(platform, LLVM.TargetMachine(LLVM.Target(triple = clean_triple(platform)), clean_triple(platform))) +StaticTarget(platform::Platform, cpu::String) = StaticTarget(platform, LLVM.TargetMachine(LLVM.Target(triple = clean_triple(platform)), clean_triple(platform), cpu)) +StaticTarget(platform::Platform, cpu::String, features::String) = StaticTarget(platform, LLVM.TargetMachine(LLVM.Target(triple = clean_triple(platform)), clean_triple(platform), cpu, features)) """ ```julia @@ -29,18 +54,12 @@ macro device_override(ex) return esc(code) end -Base.@kwdef struct NativeCompilerTarget{MT} <: GPUCompiler.AbstractCompilerTarget - triple::String=Sys.MACHINE - cpu::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUName()) - features::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUFeatures()) - method_table::MT = method_table -end - -Base.@kwdef struct ExternalNativeCompilerTarget{MT} <: GPUCompiler.AbstractCompilerTarget - triple::String=Sys.MACHINE - cpu::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUName()) - features::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUFeatures()) - method_table::MT = method_table +# Default to native +struct StaticCompilerTarget{MT} <: GPUCompiler.AbstractCompilerTarget + triple::String + cpu::String + features::String + method_table::MT end module StaticRuntime @@ -53,58 +72,61 @@ module StaticRuntime report_exception_frame(idx, func, file, line) = return end -for target in (:NativeCompilerTarget, :ExternalNativeCompilerTarget) - @eval begin - GPUCompiler.llvm_triple(target::$target) = target.triple - function GPUCompiler.llvm_machine(target::$target) - triple = GPUCompiler.llvm_triple(target) +GPUCompiler.llvm_triple(target::StaticCompilerTarget) = target.triple - t = LLVM.Target(triple=triple) +function GPUCompiler.llvm_machine(target::StaticCompilerTarget) + triple = GPUCompiler.llvm_triple(target) - tm = LLVM.TargetMachine(t, triple, target.cpu, target.features, reloc=LLVM.API.LLVMRelocPIC) - GPUCompiler.asm_verbosity!(tm, true) + t = LLVM.Target(triple=triple) - return tm - end + tm = LLVM.TargetMachine(t, triple, target.cpu, target.features, reloc=LLVM.API.LLVMRelocPIC) + GPUCompiler.asm_verbosity!(tm, true) - GPUCompiler.runtime_slug(job::GPUCompiler.CompilerJob{<:$target}) = "native_$(job.config.target.cpu)-$(hash(job.config.target.features))" + return tm +end - GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{<:$target}) = StaticRuntime - GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{<:$target, StaticCompilerParams}) = StaticRuntime +GPUCompiler.runtime_slug(job::GPUCompiler.CompilerJob{<:StaticCompilerTarget}) = "static_$(job.config.target.cpu)-$(hash(job.config.target.features))" +GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{<:StaticCompilerTarget}) = StaticRuntime +GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{<:StaticCompilerTarget, StaticCompilerParams}) = StaticRuntime - GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{<:$target, StaticCompilerParams}) = true - GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{<:$target}) = true - GPUCompiler.get_interpreter(job::GPUCompiler.CompilerJob{<:$target, StaticCompilerParams}) = - StaticInterpreter(job.config.params.cache, GPUCompiler.method_table(job), job.world, - GPUCompiler.inference_params(job), GPUCompiler.optimization_params(job)) - GPUCompiler.ci_cache(job::GPUCompiler.CompilerJob{<:$target, StaticCompilerParams}) = job.config.params.cache - GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{<:$target})) = job.config.target.method_table - end -end +GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{<:StaticCompilerTarget, StaticCompilerParams}) = true +GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{<:StaticCompilerTarget}) = true + +GPUCompiler.get_interpreter(job::GPUCompiler.CompilerJob{<:StaticCompilerTarget, StaticCompilerParams}) = + StaticInterpreter(job.config.params.cache, GPUCompiler.method_table(job), job.world, + GPUCompiler.inference_params(job), GPUCompiler.optimization_params(job)) +GPUCompiler.ci_cache(job::GPUCompiler.CompilerJob{<:StaticCompilerTarget, StaticCompilerParams}) = job.config.params.cache +GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{<:StaticCompilerTarget})) = job.config.target.method_table -function native_job(@nospecialize(func::Function), @nospecialize(types::Type), external::Bool; + +function static_job(@nospecialize(func::Function), @nospecialize(types::Type); name = fix_name(func), kernel::Bool = false, - target = (;), + target::StaticTarget = StaticTarget(), method_table=method_table, kwargs... ) - target = merge(target, (;method_table)) source = methodinstance(typeof(func), Base.to_tuple_type(types)) - target = external ? ExternalNativeCompilerTarget(;target...) : NativeCompilerTarget(;target...) + tm = target.tm + gputarget = StaticCompilerTarget(LLVM.triple(tm), LLVM.cpu(tm), LLVM.features(tm), method_table) params = StaticCompilerParams() - config = GPUCompiler.CompilerConfig(target, params, name = name, kernel = kernel) + config = GPUCompiler.CompilerConfig(gputarget, params, name = name, kernel = kernel) StaticCompiler.CompilerJob(source, config), kwargs end - -function native_job(@nospecialize(func), @nospecialize(types), external; kernel::Bool=false, name=fix_name(repr(func)), target = (;), method_table=method_table, kwargs...) - target = merge(target, (; method_table)) +function static_job(@nospecialize(func), @nospecialize(types); + name = fix_name(func), + kernel::Bool = false, + target::StaticTarget = StaticTarget(), + method_table=method_table, + kwargs... +) source = methodinstance(typeof(func), Base.to_tuple_type(types)) - target = external ? ExternalNativeCompilerTarget(;target...) : NativeCompilerTarget(;target...) + tm = target.tm + gputarget = StaticCompilerTarget(LLVM.triple(tm), LLVM.cpu(tm), LLVM.features(tm), method_table) params = StaticCompilerParams() - config = GPUCompiler.CompilerConfig(target, params, name = name, kernel = kernel) - GPUCompiler.CompilerJob(source, config), kwargs -end + config = GPUCompiler.CompilerConfig(gputarget, params, name = name, kernel = kernel) + StaticCompiler.CompilerJob(source, config), kwargs +end \ No newline at end of file diff --git a/test/scripts/interop.jl b/test/scripts/interop.jl index b601e09..24546ec 100644 --- a/test/scripts/interop.jl +++ b/test/scripts/interop.jl @@ -13,4 +13,4 @@ function interop(argc, argv) end # Attempt to compile -path = compile_executable(interop, (Int64, Ptr{Ptr{UInt8}}), "./", cflags=`-ldl -lm`) +path = compile_executable(interop, (Int64, Ptr{Ptr{UInt8}}), "./", c_flags=`-ldl -lm`) From 0e0f8722ab468e41392aa188fa0fa5bebb81d409 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Wed, 29 Nov 2023 11:13:13 -0300 Subject: [PATCH 141/159] Fixup macos issues --- src/StaticCompiler.jl | 4 ++-- src/target.jl | 2 +- test/scripts/interop.jl | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 6bca325..2d67382 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -12,7 +12,7 @@ using LLD_jll: lld using StaticTools using StaticTools: @symbolcall, @c_str, println using Core: MethodTable -using Base:BinaryPlatforms.Platform, BinaryPlatforms.HostPlatform, BinaryPlatforms.arch, BinaryPlatforms.os, BinaryPlatforms.libc_str +using Base:BinaryPlatforms.Platform, BinaryPlatforms.HostPlatform, BinaryPlatforms.arch, BinaryPlatforms.os_str, BinaryPlatforms.libc_str export load_function, compile_shlib, compile_executable export static_code_llvm, static_code_typed, static_llvm_module, static_code_native @@ -399,7 +399,7 @@ function static_code_native(@nospecialize(f), @nospecialize(tt), fname=fix_name( end # Return an LLVM module -function static_llvm_module(f, tt, name=fix_name(f); demangle, target::StaticTarget=StaticTarget(), kwargs...) +function static_llvm_module(f, tt, name=fix_name(f); demangle=true, target::StaticTarget=StaticTarget(), kwargs...) if !demangle name = "julia_"*name end diff --git a/src/target.jl b/src/target.jl index 2f14e2b..26d5c69 100644 --- a/src/target.jl +++ b/src/target.jl @@ -24,7 +24,7 @@ struct StaticTarget tm::LLVM.TargetMachine end -clean_triple(platform::Platform) = arch(platform) * "-" * os(platform) * libc_str(platform) +clean_triple(platform::Platform) = arch(platform) * os_str(platform) * libc_str(platform) StaticTarget() = StaticTarget(HostPlatform(), unsafe_string(LLVM.API.LLVMGetHostCPUName()), unsafe_string(LLVM.API.LLVMGetHostCPUFeatures())) StaticTarget(platform::Platform) = StaticTarget(platform, LLVM.TargetMachine(LLVM.Target(triple = clean_triple(platform)), clean_triple(platform))) StaticTarget(platform::Platform, cpu::String) = StaticTarget(platform, LLVM.TargetMachine(LLVM.Target(triple = clean_triple(platform)), clean_triple(platform), cpu)) diff --git a/test/scripts/interop.jl b/test/scripts/interop.jl index 24546ec..422f682 100644 --- a/test/scripts/interop.jl +++ b/test/scripts/interop.jl @@ -13,4 +13,4 @@ function interop(argc, argv) end # Attempt to compile -path = compile_executable(interop, (Int64, Ptr{Ptr{UInt8}}), "./", c_flags=`-ldl -lm`) +path = compile_executable(interop, (Int64, Ptr{Ptr{UInt8}}), "./",cflags=`-ldl -lm`) From 0d537cdadafa9236d7838ea9854ccd9e68a92f53 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Wed, 29 Nov 2023 12:09:47 -0300 Subject: [PATCH 142/159] Add wasm test and fixup macos --- src/StaticCompiler.jl | 11 ++++++----- src/target.jl | 3 ++- test/scripts/wasm.jl | 16 ++++++++++++++++ 3 files changed, 24 insertions(+), 6 deletions(-) create mode 100644 test/scripts/wasm.jl diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 2d67382..b84cc2f 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -17,6 +17,7 @@ using Base:BinaryPlatforms.Platform, BinaryPlatforms.HostPlatform, BinaryPlatfor export load_function, compile_shlib, compile_executable export static_code_llvm, static_code_typed, static_llvm_module, static_code_native export @device_override, @print_and_throw +export StaticTarget include("interpreter.jl") include("target.jl") @@ -465,7 +466,7 @@ end """ ```julia generate_obj(f, tt, path::String = tempname(), filenamebase::String="obj"; - target = (), + target::StaticTarget=StaticTarget(), demangle = true, strip_llvm = false, strip_asm = true, @@ -477,7 +478,7 @@ a tuple type `tt` characterizing the types of the arguments for which the function will be compiled. `target` can be used to change the output target. This is useful for compiling to WebAssembly and embedded targets. -This is a named tuple with fields `triple`, `cpu`, and `features` (each of these are strings). +This is a struct of the type StaticTarget() The defaults compile to the native target. If `demangle` is set to `false`, compiled function names are prepended with "julia_". @@ -487,7 +488,7 @@ If `demangle` is set to `false`, compiled function names are prepended with "jul julia> fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) fib (generic function with 1 method) -julia> path, name, table = StaticCompiler.generate_obj_for_compile(fib, Tuple{Int64}, "./test") +julia> path, name, table = StaticCompiler.generate_obj(fib, Tuple{Int64}, "./test") ("./test", "fib", IdDict{Any, String}()) shell> tree \$path @@ -505,7 +506,7 @@ end """ ```julia generate_obj(funcs::Union{Array,Tuple}, path::String = tempname(), filenamebase::String="obj"; - target = (), + target::StaticTarget=StaticTarget(), demangle =false, strip_llvm = false, strip_asm = true, @@ -517,7 +518,7 @@ Low level interface for compiling object code (`.o`) for an array of Tuples which will be compiled. `target` can be used to change the output target. This is useful for compiling to WebAssembly and embedded targets. -This is a named tuple with fields `triple`, `cpu`, and `features` (each of these are strings). +This is a struct of the type StaticTarget() The defaults compile to the native target. """ function generate_obj(funcs::Union{Array,Tuple}, path::String = tempname(), filenamebase::String="obj"; diff --git a/src/target.jl b/src/target.jl index 26d5c69..3d54563 100644 --- a/src/target.jl +++ b/src/target.jl @@ -20,7 +20,7 @@ does not behave as expected. By default `StaticTarget()` is the native target. """ struct StaticTarget - platform::Platform + platform::Union{Platform,Nothing} tm::LLVM.TargetMachine end @@ -29,6 +29,7 @@ StaticTarget() = StaticTarget(HostPlatform(), unsafe_string(LLVM.API.LLVMGetHost StaticTarget(platform::Platform) = StaticTarget(platform, LLVM.TargetMachine(LLVM.Target(triple = clean_triple(platform)), clean_triple(platform))) StaticTarget(platform::Platform, cpu::String) = StaticTarget(platform, LLVM.TargetMachine(LLVM.Target(triple = clean_triple(platform)), clean_triple(platform), cpu)) StaticTarget(platform::Platform, cpu::String, features::String) = StaticTarget(platform, LLVM.TargetMachine(LLVM.Target(triple = clean_triple(platform)), clean_triple(platform), cpu, features)) +StaticTarget(triple::String, cpu::String, features::String) = StaticTarget(nothing, LLVM.TargetMachine(LLVM.Target(triple = triple), triple, cpu, features)) """ ```julia diff --git a/test/scripts/wasm.jl b/test/scripts/wasm.jl new file mode 100644 index 0000000..9222660 --- /dev/null +++ b/test/scripts/wasm.jl @@ -0,0 +1,16 @@ +# Test that we can compile an object to wasm +# WebAssemblyCompiler.jl is a better tool for this, but this exercises the cross compilation pipeline + +using StaticCompiler +using LLVM +InitializeAllTargets() +InitializeAllTargetInfos() +InitializeAllAsmPrinters() +InitializeAllAsmParsers() +InitializeAllTargetMCs() + +fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) + +target=StaticTarget("wasm32","","") + +StaticCompiler.generate_obj(fib, Tuple{Int64}, "./test", target=target) \ No newline at end of file From bf0740bf858d2ad025e9a06bbcdad6a14c60992d Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Wed, 29 Nov 2023 12:11:05 -0300 Subject: [PATCH 143/159] Whitespace --- test/scripts/interop.jl | 2 +- test/scripts/wasm.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/scripts/interop.jl b/test/scripts/interop.jl index 422f682..b601e09 100644 --- a/test/scripts/interop.jl +++ b/test/scripts/interop.jl @@ -13,4 +13,4 @@ function interop(argc, argv) end # Attempt to compile -path = compile_executable(interop, (Int64, Ptr{Ptr{UInt8}}), "./",cflags=`-ldl -lm`) +path = compile_executable(interop, (Int64, Ptr{Ptr{UInt8}}), "./", cflags=`-ldl -lm`) diff --git a/test/scripts/wasm.jl b/test/scripts/wasm.jl index 9222660..beff3b7 100644 --- a/test/scripts/wasm.jl +++ b/test/scripts/wasm.jl @@ -13,4 +13,4 @@ fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) target=StaticTarget("wasm32","","") -StaticCompiler.generate_obj(fib, Tuple{Int64}, "./test", target=target) \ No newline at end of file +StaticCompiler.generate_obj(fib, Tuple{Int64}, "./test", target=target) From 2bf702662d1bce18d925a0d4130602dfd7b52de1 Mon Sep 17 00:00:00 2001 From: gbaraldi Date: Tue, 5 Dec 2023 13:15:18 -0500 Subject: [PATCH 144/159] Add way to set compiler for cross target and also bump major version --- Project.toml | 2 +- src/StaticCompiler.jl | 22 +++++++++++++++++----- src/target.jl | 29 ++++++++++++++++++++++++----- 3 files changed, 42 insertions(+), 11 deletions(-) diff --git a/Project.toml b/Project.toml index 4be5322..8daa749 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short and contributors"] -version = "0.6.2" +version = "0.7.0" [deps] Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index b84cc2f..e0fc69c 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -13,7 +13,7 @@ using StaticTools using StaticTools: @symbolcall, @c_str, println using Core: MethodTable using Base:BinaryPlatforms.Platform, BinaryPlatforms.HostPlatform, BinaryPlatforms.arch, BinaryPlatforms.os_str, BinaryPlatforms.libc_str - +using Base:BinaryPlatforms.platform_dlext export load_function, compile_shlib, compile_executable export static_code_llvm, static_code_typed, static_llvm_module, static_code_native export @device_override, @print_and_throw @@ -294,7 +294,12 @@ function generate_executable(funcs::Union{Array,Tuple}, path=tempname(), name=fi exec_path = joinpath(path, filename) _, obj_path = generate_obj(funcs, path, filename; demangle, target, kwargs...) # Pick a compiler - cc = Sys.isapple() ? `cc` : clang() + if !isnothing(target.compiler) + cc = `$(target.compiler)` + else + cc = Sys.isapple() ? `cc` : clang() + end + # Compile! if Sys.isapple() # Apple no longer uses _start, so we can just specify a custom entry @@ -372,12 +377,19 @@ function generate_shlib(funcs::Union{Array,Tuple}, path::String=tempname(), file target::StaticTarget=StaticTarget(), kwargs... ) - - lib_path = joinpath(path, "$filename.$(Libdl.dlext)") + if !isnothing(target.platform) + lib_path = joinpath(path, "$filename.$(platform_dlext(target.platform))") + else + lib_path = joinpath(path, "$filename.$(Libdl.dlext)") + end _, obj_path = generate_obj(funcs, path, filename; target, demangle, kwargs...) # Pick a Clang - cc = Sys.isapple() ? `cc` : clang() + if !isnothing(target.compiler) + cc = `$(target.compiler)` + else + cc = Sys.isapple() ? `cc` : clang() + end # Compile! run(`$cc -shared $cflags $obj_path -o $lib_path `) diff --git a/src/target.jl b/src/target.jl index 3d54563..df53b9c 100644 --- a/src/target.jl +++ b/src/target.jl @@ -18,18 +18,37 @@ Beware that currently the compilation assumes that the code is on the host so pl ``` does not behave as expected. By default `StaticTarget()` is the native target. + +For cross-compilation of executables and shared libraries, one also needs to call `set_compiler!` with the path to a valid C compiler +for the target platform. For example, to cross-compile for aarch64 using a compiler from homebrew, one can use: +```julia + set_compiler!(StaticTarget(parse(Platform,"aarch64-gnu-linux")), "/opt/homebrew/bin/aarch64-unknown-linux-gnu-gcc") +``` """ -struct StaticTarget +mutable struct StaticTarget platform::Union{Platform,Nothing} tm::LLVM.TargetMachine + compiler::Union{String,Nothing} end clean_triple(platform::Platform) = arch(platform) * os_str(platform) * libc_str(platform) StaticTarget() = StaticTarget(HostPlatform(), unsafe_string(LLVM.API.LLVMGetHostCPUName()), unsafe_string(LLVM.API.LLVMGetHostCPUFeatures())) -StaticTarget(platform::Platform) = StaticTarget(platform, LLVM.TargetMachine(LLVM.Target(triple = clean_triple(platform)), clean_triple(platform))) -StaticTarget(platform::Platform, cpu::String) = StaticTarget(platform, LLVM.TargetMachine(LLVM.Target(triple = clean_triple(platform)), clean_triple(platform), cpu)) -StaticTarget(platform::Platform, cpu::String, features::String) = StaticTarget(platform, LLVM.TargetMachine(LLVM.Target(triple = clean_triple(platform)), clean_triple(platform), cpu, features)) -StaticTarget(triple::String, cpu::String, features::String) = StaticTarget(nothing, LLVM.TargetMachine(LLVM.Target(triple = triple), triple, cpu, features)) +StaticTarget(platform::Platform) = StaticTarget(platform, LLVM.TargetMachine(LLVM.Target(triple = clean_triple(platform)), clean_triple(platform)), nothing) +StaticTarget(platform::Platform, cpu::String) = StaticTarget(platform, LLVM.TargetMachine(LLVM.Target(triple = clean_triple(platform)), clean_triple(platform), cpu), nothing) +StaticTarget(platform::Platform, cpu::String, features::String) = StaticTarget(platform, LLVM.TargetMachine(LLVM.Target(triple = clean_triple(platform)), clean_triple(platform), cpu, features), nothing) + +function StaticTarget(triple::String, cpu::String, features::String) + platform = tryparse(Platform, triple) + StaticTarget(platform, LLVM.TargetMachine(LLVM.Target(triple = triple), triple, cpu, features), nothing) +end + +""" +Set the compiler for cross compilation + ```julia + set_compiler!(StaticTarget(parse(Platform,"aarch64-gnu-linux")), "/opt/homebrew/bin/aarch64-elf-gcc") +``` +""" +set_compiler!(target::StaticTarget, compiler::String) = (target.compiler = compiler) """ ```julia From ac2d56f4d91962193b5ef809e27e428b1d225fef Mon Sep 17 00:00:00 2001 From: gbaraldi Date: Tue, 5 Dec 2023 13:25:39 -0500 Subject: [PATCH 145/159] Bump Bumper :) --- test/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Project.toml b/test/Project.toml index 5498846..a36e208 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -16,4 +16,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Bumper = "8ce10254-0962-460f-a3d8-1f77fea1446e" [compat] -Bumper = "0.5.1" \ No newline at end of file +Bumper = "0.6" \ No newline at end of file From e3412cb6be4766ef08c328e24f9b892085e4a804 Mon Sep 17 00:00:00 2001 From: gbaraldi Date: Tue, 5 Dec 2023 16:02:30 -0500 Subject: [PATCH 146/159] Prepare code for future endeavours into adding the GC --- src/StaticCompiler.jl | 10 +++++----- src/target.jl | 16 +++++++++++----- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index e0fc69c..7277b2f 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -398,7 +398,7 @@ end function static_code_llvm(@nospecialize(func), @nospecialize(types); target::StaticTarget=StaticTarget(), kwargs...) job, kwargs = static_job(func, types; target, kwargs...) - GPUCompiler.code_llvm(stdout, job; kwargs...) + GPUCompiler.code_llvm(stdout, job; libraries=false, kwargs...) end function static_code_typed(@nospecialize(func), @nospecialize(types); target::StaticTarget=StaticTarget(), kwargs...) @@ -408,7 +408,7 @@ end function static_code_native(@nospecialize(f), @nospecialize(tt), fname=fix_name(f); target::StaticTarget=StaticTarget(), kwargs...) job, kwargs = static_job(f, tt; fname, target, kwargs...) - GPUCompiler.code_native(stdout, job; kwargs...) + GPUCompiler.code_native(stdout, job; libraries=false, kwargs...) end # Return an LLVM module @@ -418,7 +418,7 @@ function static_llvm_module(f, tt, name=fix_name(f); demangle=true, target::Stat end job, kwargs = static_job(f, tt; name, target, kwargs...) m = GPUCompiler.JuliaContext() do context - m, _ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) + m, _ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false, libraries=false) locate_pointers_and_runtime_calls(m) m end @@ -434,7 +434,7 @@ function static_llvm_module(funcs::Union{Array,Tuple}; demangle=true, target::St name_f = "julia_"*name_f end job, kwargs = static_job(f, tt; name = name_f, target, kwargs...) - mod,_ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) + mod,_ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false, libraries=false) if length(funcs) > 1 for func in funcs[2:end] f,tt = func @@ -443,7 +443,7 @@ function static_llvm_module(funcs::Union{Array,Tuple}; demangle=true, target::St name_f = "julia_"*name_f end job, kwargs = static_job(f, tt; name = name_f, target, kwargs...) - tmod,_ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) + tmod,_ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false, libraries=false) link!(mod,tmod) end end diff --git a/src/target.jl b/src/target.jl index df53b9c..5faec0d 100644 --- a/src/target.jl +++ b/src/target.jl @@ -29,13 +29,14 @@ mutable struct StaticTarget platform::Union{Platform,Nothing} tm::LLVM.TargetMachine compiler::Union{String,Nothing} + julia_runtime::Bool end clean_triple(platform::Platform) = arch(platform) * os_str(platform) * libc_str(platform) StaticTarget() = StaticTarget(HostPlatform(), unsafe_string(LLVM.API.LLVMGetHostCPUName()), unsafe_string(LLVM.API.LLVMGetHostCPUFeatures())) -StaticTarget(platform::Platform) = StaticTarget(platform, LLVM.TargetMachine(LLVM.Target(triple = clean_triple(platform)), clean_triple(platform)), nothing) -StaticTarget(platform::Platform, cpu::String) = StaticTarget(platform, LLVM.TargetMachine(LLVM.Target(triple = clean_triple(platform)), clean_triple(platform), cpu), nothing) -StaticTarget(platform::Platform, cpu::String, features::String) = StaticTarget(platform, LLVM.TargetMachine(LLVM.Target(triple = clean_triple(platform)), clean_triple(platform), cpu, features), nothing) +StaticTarget(platform::Platform) = StaticTarget(platform, LLVM.TargetMachine(LLVM.Target(triple = clean_triple(platform)), clean_triple(platform)), nothing, false) +StaticTarget(platform::Platform, cpu::String) = StaticTarget(platform, LLVM.TargetMachine(LLVM.Target(triple = clean_triple(platform)), clean_triple(platform), cpu), nothing, false) +StaticTarget(platform::Platform, cpu::String, features::String) = StaticTarget(platform, LLVM.TargetMachine(LLVM.Target(triple = clean_triple(platform)), clean_triple(platform), cpu, features), nothing, false) function StaticTarget(triple::String, cpu::String, features::String) platform = tryparse(Platform, triple) @@ -50,6 +51,9 @@ Set the compiler for cross compilation """ set_compiler!(target::StaticTarget, compiler::String) = (target.compiler = compiler) + +set_runtime!(target::StaticTarget, julia_runtime::Bool) = (target.julia_runtime = julia_runtime) + """ ```julia @device_override old_bad_method(arg1::Type1, arg2::Type2) = new_good_method(arg1, arg2) @@ -79,6 +83,7 @@ struct StaticCompilerTarget{MT} <: GPUCompiler.AbstractCompilerTarget triple::String cpu::String features::String + julia_runtime::Bool method_table::MT end @@ -115,6 +120,7 @@ GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{<:StaticCompilerTarget, Sta GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{<:StaticCompilerTarget, StaticCompilerParams}) = true GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{<:StaticCompilerTarget}) = true +GPUCompiler.uses_julia_runtime(job::GPUCompiler.CompilerJob{<:StaticCompilerTarget}) = job.config.target.julia_runtime GPUCompiler.get_interpreter(job::GPUCompiler.CompilerJob{<:StaticCompilerTarget, StaticCompilerParams}) = StaticInterpreter(job.config.params.cache, GPUCompiler.method_table(job), job.world, GPUCompiler.inference_params(job), GPUCompiler.optimization_params(job)) @@ -131,7 +137,7 @@ function static_job(@nospecialize(func::Function), @nospecialize(types::Type); ) source = methodinstance(typeof(func), Base.to_tuple_type(types)) tm = target.tm - gputarget = StaticCompilerTarget(LLVM.triple(tm), LLVM.cpu(tm), LLVM.features(tm), method_table) + gputarget = StaticCompilerTarget(LLVM.triple(tm), LLVM.cpu(tm), LLVM.features(tm), target.julia_runtime, method_table) params = StaticCompilerParams() config = GPUCompiler.CompilerConfig(gputarget, params, name = name, kernel = kernel) StaticCompiler.CompilerJob(source, config), kwargs @@ -145,7 +151,7 @@ function static_job(@nospecialize(func), @nospecialize(types); ) source = methodinstance(typeof(func), Base.to_tuple_type(types)) tm = target.tm - gputarget = StaticCompilerTarget(LLVM.triple(tm), LLVM.cpu(tm), LLVM.features(tm), method_table) + gputarget = StaticCompilerTarget(LLVM.triple(tm), LLVM.cpu(tm), LLVM.features(tm), target.julia_runtime, method_table) params = StaticCompilerParams() config = GPUCompiler.CompilerConfig(gputarget, params, name = name, kernel = kernel) StaticCompiler.CompilerJob(source, config), kwargs From afd6f6623f71756cb2514cb3b177144ff05fcda7 Mon Sep 17 00:00:00 2001 From: CompatHelper Julia Date: Sun, 25 Feb 2024 00:58:26 +0000 Subject: [PATCH 147/159] CompatHelper: bump compat for GPUCompiler to 0.26, (keep existing compat) --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 4be5322..6eab8fb 100644 --- a/Project.toml +++ b/Project.toml @@ -17,7 +17,7 @@ StaticTools = "86c06d3c-3f03-46de-9781-57580aa96d0a" [compat] CodeInfoTools = "0.3" -GPUCompiler = "0.21, 0.22, 0.23, 0.24, 0.25" +GPUCompiler = "0.21, 0.22, 0.23, 0.24, 0.25, 0.26" LLVM = "6" MacroTools = "0.5" StaticTools = "0.8" From a18f913f9a346804eb07e26baa0e483101bb7550 Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Mon, 4 Mar 2024 09:47:18 -0500 Subject: [PATCH 148/159] Bump version to trigger CI --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 6eab8fb..6593f96 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short and contributors"] -version = "0.6.2" +version = "0.6.3" [deps] Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" From e672f352352097108e3049383e1edfcf508c57af Mon Sep 17 00:00:00 2001 From: Thomas R Date: Mon, 4 Mar 2024 21:27:33 +0100 Subject: [PATCH 149/159] adaption to Windows (#151) * adaption to windows: use clang to generate executable, skip GPUCompiler * a number of simplifications and bugfixes --------- Co-authored-by: C. Brenhin Keller --- .github/workflows/ci.yml | 2 +- Project.toml | 2 +- src/StaticCompiler.jl | 70 +++++++++++++++++++++++++++------------- test/testcore.jl | 10 ++++++ 4 files changed, 59 insertions(+), 25 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3bbb9fd..3dc141f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: version: - '1.8' - '1.9' - - '1.10.0-rc1' + - '1.10' os: - ubuntu-latest - macOS-latest diff --git a/Project.toml b/Project.toml index 96c94ea..4f41e3e 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short and contributors"] -version = "0.7.0" +version = "0.7.1" [deps] diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 7277b2f..38547fe 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -35,6 +35,7 @@ compile_executable(f::Function, types::Tuple, path::String, [name::String=string cflags=``, # Specify libraries you would like to link against, and other compiler options here also_expose=[], target::StaticTarget=StaticTarget(), + llvm_to_clang = Sys.iswindows(), method_table=StaticCompiler.method_table, kwargs... ) @@ -98,17 +99,18 @@ shell> ./hello Hello, world! ``` """ -function compile_executable(f::Function, types=(), path::String="./", name=fix_name(f); +function compile_executable(f::Function, types=(), path::String=pwd(), name=fix_name(f); also_expose=Tuple{Function, Tuple{DataType}}[], target::StaticTarget=StaticTarget(), kwargs...) compile_executable(vcat([(f, types)], also_expose), path, name; target, kwargs...) end -function compile_executable(funcs::Union{Array,Tuple}, path::String="./", name=fix_name(first(first(funcs))); +function compile_executable(funcs::Union{Array,Tuple}, path::String=pwd(), name=fix_name(first(first(funcs))); filename = name, demangle = true, cflags = ``, target::StaticTarget=StaticTarget(), + llvm_to_clang = Sys.iswindows(), kwargs... ) @@ -122,20 +124,20 @@ function compile_executable(funcs::Union{Array,Tuple}, path::String="./", name=f nativetype = isprimitivetype(rt) || isa(rt, Ptr) nativetype || @warn "Return type `$rt` of `$f$types` does not appear to be a native type. Consider returning only a single value of a native machine type (i.e., a single float, int/uint, bool, or pointer). \n\nIgnoring this warning may result in Undefined Behavior!" - generate_executable(funcs, path, name, filename; demangle, cflags, target, kwargs...) + generate_executable(funcs, path, name, filename; demangle, cflags, target, llvm_to_clang, kwargs...) joinpath(abspath(path), filename) end """ ```julia -compile_shlib(f::Function, types::Tuple, [path::String="./"], [name::String=string(nameof(f))]; +compile_shlib(f::Function, types::Tuple, [path::String=pwd()], [name::String=string(nameof(f))]; filename::String=name, cflags=``, method_table=StaticCompiler.method_table, target::StaticTarget=StaticTarget(), kwargs...) -compile_shlib(funcs::Array, [path::String="./"]; +compile_shlib(funcs::Array, [path::String=pwd()]; filename="libfoo", demangle=true, cflags=``, @@ -172,7 +174,7 @@ julia> ccall(("test", "test.dylib"), Float64, (Int64,), 100_000) 5.2564961094956075 ``` """ -function compile_shlib(f::Function, types=(), path::String="./", name=fix_name(f); +function compile_shlib(f::Function, types=(), path::String=pwd(), name=fix_name(f); filename=name, target::StaticTarget=StaticTarget(), kwargs... @@ -180,7 +182,7 @@ function compile_shlib(f::Function, types=(), path::String="./", name=fix_name(f compile_shlib(((f, types),), path; filename, target, kwargs...) end # As above, but taking an array of functions and returning a single shlib -function compile_shlib(funcs::Union{Array,Tuple}, path::String="./"; +function compile_shlib(funcs::Union{Array,Tuple}, path::String=pwd(); filename = "libfoo", demangle = true, cflags = ``, @@ -289,10 +291,11 @@ function generate_executable(funcs::Union{Array,Tuple}, path=tempname(), name=fi demangle = true, cflags = ``, target::StaticTarget=StaticTarget(), + llvm_to_clang::Bool = Sys.iswindows(), kwargs... ) exec_path = joinpath(path, filename) - _, obj_path = generate_obj(funcs, path, filename; demangle, target, kwargs...) + _, obj_or_ir_path = generate_obj(funcs, path, filename; demangle, target, emit_llvm_only=llvm_to_clang, kwargs...) # Pick a compiler if !isnothing(target.compiler) cc = `$(target.compiler)` @@ -301,10 +304,10 @@ function generate_executable(funcs::Union{Array,Tuple}, path=tempname(), name=fi end # Compile! - if Sys.isapple() + if Sys.isapple() && !llvm_to_clang # Apple no longer uses _start, so we can just specify a custom entry entry = demangle ? "_$name" : "_julia_$name" - run(`$cc -e $entry $cflags $obj_path -o $exec_path`) + run(`$cc -e $entry $cflags $obj_or_ir_path -o $exec_path`) else fn = demangle ? "$name" : "julia_$name" # Write a minimal wrapper to avoid having to specify a custom entry @@ -319,9 +322,22 @@ function generate_executable(funcs::Union{Array,Tuple}, path=tempname(), name=fi return 0; }""") close(f) - run(`$cc $wrapper_path $cflags $obj_path -o $exec_path`) + if llvm_to_clang # (required on Windows) + # Use clang (llc) to generate an executable from the LLVM IR + cclang = if Sys.iswindows() + `cmd \c clang` # Not clear if the `cmd \c` is necessary + elseif Sys.isapple() + `clang` + else + clang() + end + run(`$cclang -Wno-override-module $wrapper_path $obj_or_ir_path -o $exec_path`) + else + run(`$cc $wrapper_path $cflags $obj_or_ir_path -o $exec_path`) + end + # Clean up - run(`rm $wrapper_path`) + rm(wrapper_path) end path, name end @@ -482,7 +498,6 @@ generate_obj(f, tt, path::String = tempname(), filenamebase::String="obj"; demangle = true, strip_llvm = false, strip_asm = true, - opt_level = 3, kwargs...) ``` Low level interface for compiling object code (`.o`) for for function `f` given @@ -519,10 +534,10 @@ end ```julia generate_obj(funcs::Union{Array,Tuple}, path::String = tempname(), filenamebase::String="obj"; target::StaticTarget=StaticTarget(), - demangle =false, + demangle = false, + emit_llvm_only = false, strip_llvm = false, strip_asm = true, - opt_level=3, kwargs...) ``` Low level interface for compiling object code (`.o`) for an array of Tuples @@ -534,25 +549,34 @@ This is a struct of the type StaticTarget() The defaults compile to the native target. """ function generate_obj(funcs::Union{Array,Tuple}, path::String = tempname(), filenamebase::String="obj"; + target::StaticTarget=StaticTarget(), demangle = true, + emit_llvm_only = false, strip_llvm = false, strip_asm = true, - opt_level = 3, - target::StaticTarget=StaticTarget(), kwargs...) f, tt = funcs[1] mkpath(path) - obj_path = joinpath(path, "$filenamebase.o") mod = static_llvm_module(funcs; demangle, kwargs...) - obj = GPUCompiler.JuliaContext() do ctx + + if emit_llvm_only # (Required on Windows) + ir_path = joinpath(path, "$filenamebase.ll") + open(ir_path, "w") do io + write(io, string(mod)) + end + return path, ir_path + else + obj_path = joinpath(path, "$filenamebase.o") + obj = GPUCompiler.JuliaContext() do ctx fakejob, _ = static_job(f, tt; target, kwargs...) obj, _ = GPUCompiler.emit_asm(fakejob, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) obj + end + open(obj_path, "w") do io + write(io, obj) + end + return path, obj_path end - open(obj_path, "w") do io - write(io, obj) - end - path, obj_path end end # module diff --git a/test/testcore.jl b/test/testcore.jl index e705ed6..064f010 100644 --- a/test/testcore.jl +++ b/test/testcore.jl @@ -50,6 +50,11 @@ end @test isa(r, Base.Process) @test r.exitcode == 0 + filepath = compile_executable(foo, (), workdir, llvm_to_clang=true) + r = run(`$filepath`); + @test isa(r, Base.Process) + @test r.exitcode == 0 + @inline function _puts(s::Ptr{UInt8}) # Can't use Base.println because it allocates Base.llvmcall((""" @@ -85,6 +90,11 @@ end @test isa(r, Base.Process) @test r.exitcode == 0 + filepath = compile_executable(print_args, (Int, Ptr{Ptr{UInt8}}), workdir, llvm_to_clang=true) + r = run(`$filepath Hello, world!`); + @test isa(r, Base.Process) + @test r.exitcode == 0 + # Compile a function that definitely fails @inline foo_err() = UInt64(-1) From c91dc620c01bebd79fc750b97bb51cc517bbfee4 Mon Sep 17 00:00:00 2001 From: Chris Elrod Date: Mon, 1 Apr 2024 13:09:05 -0400 Subject: [PATCH 150/159] `throw_overflowerr_negation` takes 1 argument --- src/quirks.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/quirks.jl b/src/quirks.jl index 65ebc7b..b5c8571 100644 --- a/src/quirks.jl +++ b/src/quirks.jl @@ -25,7 +25,7 @@ end # checked.jl @device_override @noinline Base.Checked.throw_overflowerr_binaryop(op, x, y) = @print_and_throw c"Binary operation overflowed" -@device_override @noinline Base.Checked.throw_overflowerr_negation(op, x, y) = +@device_override @noinline Base.Checked.throw_overflowerr_negation(x) = @print_and_throw c"Negation overflowed" @device_override function Base.Checked.checked_abs(x::Base.Checked.SignedInt) r = ifelse(x < 0, -x, x) From 4c073c425a5408bd4f8618c13feb5571c52504f4 Mon Sep 17 00:00:00 2001 From: Chris Elrod Date: Mon, 1 Apr 2024 14:57:41 -0400 Subject: [PATCH 151/159] Similar update for `exp_domainerror` --- src/quirks.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/quirks.jl b/src/quirks.jl index b5c8571..f279d94 100644 --- a/src/quirks.jl +++ b/src/quirks.jl @@ -9,7 +9,7 @@ end # math.jl @device_override @noinline Base.Math.throw_complex_domainerror(f::Symbol, x) = @print_and_throw c"This operation requires a complex input to return a complex result" -@device_override @noinline Base.Math.throw_exp_domainerror(f::Symbol, x) = +@device_override @noinline Base.Math.throw_exp_domainerror(x) = @print_and_throw c"Exponentiation yielding a complex result requires a complex argument" # intfuncs.jl From 72a90e14b6c28e06ec0941f9875c93e910676a6e Mon Sep 17 00:00:00 2001 From: Daizu Date: Sun, 19 May 2024 00:00:27 +0900 Subject: [PATCH 152/159] Add dllexport.jl --- src/StaticCompiler.jl | 30 ++++++++++++++++++++++++------ src/dllexport.jl | 11 +++++++++++ 2 files changed, 35 insertions(+), 6 deletions(-) create mode 100644 src/dllexport.jl diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 38547fe..90e3404 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -23,6 +23,7 @@ include("interpreter.jl") include("target.jl") include("pointer_warning.jl") include("quirks.jl") +include("dllexport.jl") fix_name(f::Function) = fix_name(string(nameof(f))) fix_name(s) = String(GPUCompiler.safe_name(s)) @@ -125,6 +126,7 @@ function compile_executable(funcs::Union{Array,Tuple}, path::String=pwd(), name= nativetype || @warn "Return type `$rt` of `$f$types` does not appear to be a native type. Consider returning only a single value of a native machine type (i.e., a single float, int/uint, bool, or pointer). \n\nIgnoring this warning may result in Undefined Behavior!" generate_executable(funcs, path, name, filename; demangle, cflags, target, llvm_to_clang, kwargs...) + Sys.iswindows() && (filename *= ".exe") joinpath(abspath(path), filename) end @@ -187,6 +189,7 @@ function compile_shlib(funcs::Union{Array,Tuple}, path::String=pwd(); demangle = true, cflags = ``, target::StaticTarget=StaticTarget(), + llvm_to_clang = Sys.iswindows(), kwargs... ) for func in funcs @@ -200,7 +203,7 @@ function compile_shlib(funcs::Union{Array,Tuple}, path::String=pwd(); nativetype || @warn "Return type `$rt` of `$f$types` does not appear to be a native type. Consider returning only a single value of a native machine type (i.e., a single float, int/uint, bool, or pointer). \n\nIgnoring this warning may result in Undefined Behavior!" end - generate_shlib(funcs, path, filename; demangle, cflags, target, kwargs...) + generate_shlib(funcs, path, filename; demangle, cflags, target, llvm_to_clang, kwargs...) joinpath(abspath(path), filename * "." * Libdl.dlext) end @@ -325,17 +328,18 @@ function generate_executable(funcs::Union{Array,Tuple}, path=tempname(), name=fi if llvm_to_clang # (required on Windows) # Use clang (llc) to generate an executable from the LLVM IR cclang = if Sys.iswindows() - `cmd \c clang` # Not clear if the `cmd \c` is necessary + exec_path *= ".exe" + `clang` elseif Sys.isapple() `clang` else clang() end - run(`$cclang -Wno-override-module $wrapper_path $obj_or_ir_path -o $exec_path`) + run(`$cclang -Wno-override-module $wrapper_path $obj_or_ir_path -o $exec_path`) else run(`$cc $wrapper_path $cflags $obj_or_ir_path -o $exec_path`) end - + # Clean up rm(wrapper_path) end @@ -391,6 +395,7 @@ function generate_shlib(funcs::Union{Array,Tuple}, path::String=tempname(), file demangle = true, cflags = ``, target::StaticTarget=StaticTarget(), + llvm_to_clang::Bool = Sys.iswindows(), kwargs... ) if !isnothing(target.platform) @@ -399,7 +404,7 @@ function generate_shlib(funcs::Union{Array,Tuple}, path::String=tempname(), file lib_path = joinpath(path, "$filename.$(Libdl.dlext)") end - _, obj_path = generate_obj(funcs, path, filename; target, demangle, kwargs...) + _, obj_or_ir_path = generate_obj(funcs, path, filename; demangle, target, emit_llvm_only=llvm_to_clang, kwargs...) # Pick a Clang if !isnothing(target.compiler) cc = `$(target.compiler)` @@ -407,7 +412,20 @@ function generate_shlib(funcs::Union{Array,Tuple}, path::String=tempname(), file cc = Sys.isapple() ? `cc` : clang() end # Compile! - run(`$cc -shared $cflags $obj_path -o $lib_path `) + if llvm_to_clang # (required on Windows) + # Use clang (llc) to generate an executable from the LLVM IR + cclang = if Sys.iswindows() + add_dllexport(funcs, obj_or_ir_path; demangle) + `clang` + elseif Sys.isapple() + `clang` + else + clang() + end + run(`$cclang -shared -Wno-override-module $obj_or_ir_path -o $lib_path`) + else + run(`$cc -shared $cflags $obj_or_ir_path -o $lib_path `) + end path, name end diff --git a/src/dllexport.jl b/src/dllexport.jl new file mode 100644 index 0000000..b957e7b --- /dev/null +++ b/src/dllexport.jl @@ -0,0 +1,11 @@ +function add_dllexport(funcs, ir_path; demangle=true) + ir = read(ir_path, String) + + for (f, _) in funcs + name_f = (demangle ? "" : "julia_") * fix_name(f) + pattern = Regex("^define(.*?@$name_f\\()", "m") + ir = replace(ir, pattern => s"define dllexport\1") + end + + write(ir_path, ir) +end \ No newline at end of file From 8ad178a44602eec90ee0f0aff36ad4a01af791eb Mon Sep 17 00:00:00 2001 From: Daizu Date: Sun, 19 May 2024 09:29:10 +0900 Subject: [PATCH 153/159] Update tests for Windows --- test/runtests.jl | 6 +++++- test/testintegration.jl | 11 ++++++++--- test/testintegration_windows.jl | 17 +++++++++++++++++ 3 files changed, 30 insertions(+), 4 deletions(-) create mode 100644 test/testintegration_windows.jl diff --git a/test/runtests.jl b/test/runtests.jl index 542659c..cf2964c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -21,5 +21,9 @@ if GROUP == "Core" || GROUP == "All" end if GROUP == "Integration" || GROUP == "All" - include("testintegration.jl") + if Sys.iswindows() + include("testintegration_windows.jl") + else + include("testintegration.jl") + end end diff --git a/test/testintegration.jl b/test/testintegration.jl index 3422e63..e52a154 100644 --- a/test/testintegration.jl +++ b/test/testintegration.jl @@ -9,7 +9,7 @@ if VERSION >= v"1.9" buf = AllocBuffer(MallocVector, sizeof(Float64) * N) s = 0.0 for i ∈ 1:N - # some excuse to reuse the same memory a bunch of times + # some excuse to reuse the same memory a bunch of times @no_escape buf begin v = @alloc(Float64, N) v .= i @@ -24,7 +24,7 @@ if VERSION >= v"1.9" path = compile_shlib(bumper_test, (Int,), "./") ptr = Libdl.dlopen(path, Libdl.RTLD_LOCAL) - + fptr = Libdl.dlsym(ptr, "bumper_test") @test bumper_test(8) == @ccall($fptr(8::Int)::Float64) @@ -303,7 +303,12 @@ end catch e @info "maybe_throw: task failed sucessfully!" end - @test status === -1 + if Sys.iswindows() + @info "maybe_throw: task doesn't fail in Windows." + @test status.exitcode == 0 + else + @test status === -1 + end end ## --- Test interop diff --git a/test/testintegration_windows.jl b/test/testintegration_windows.jl new file mode 100644 index 0000000..3e6a887 --- /dev/null +++ b/test/testintegration_windows.jl @@ -0,0 +1,17 @@ +# Currently, `StaticTools.stderrp()` used in `test/scripts` doesn't work in Windows. +# This temporary file deletes `stderrp()` and run tests. + +mkpath("scripts_windows") + +for file in readdir("scripts") + script = read("scripts/$file", String) + script = replace(script, "printf(stderrp(), " => "printf(") + write("scripts_windows/$file", script) +end + +script = read("testintegration.jl", String) +script = replace(script, "testpath/scripts/" => "testpath/scripts_windows/") + +include_string(Main, script) + +rm("scripts_windows"; recursive=true) \ No newline at end of file From 489689a06d14b8fe599a5d33db4627e2283e2884 Mon Sep 17 00:00:00 2001 From: Daizu Date: Sun, 19 May 2024 10:38:13 +0900 Subject: [PATCH 154/159] change comments --- test/testintegration.jl | 2 +- test/testintegration_windows.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/testintegration.jl b/test/testintegration.jl index e52a154..d2a98da 100644 --- a/test/testintegration.jl +++ b/test/testintegration.jl @@ -304,7 +304,7 @@ end @info "maybe_throw: task failed sucessfully!" end if Sys.iswindows() - @info "maybe_throw: task doesn't fail in Windows." + @info "maybe_throw: task doesn't fail on Windows." @test status.exitcode == 0 else @test status === -1 diff --git a/test/testintegration_windows.jl b/test/testintegration_windows.jl index 3e6a887..7ac95e5 100644 --- a/test/testintegration_windows.jl +++ b/test/testintegration_windows.jl @@ -1,4 +1,4 @@ -# Currently, `StaticTools.stderrp()` used in `test/scripts` doesn't work in Windows. +# Currently, `StaticTools.stderrp()` used in `test/scripts` doesn't work on Windows. # This temporary file deletes `stderrp()` and run tests. mkpath("scripts_windows") From 76bff06fbac59d74bcb647fb1e097c31503f2f4e Mon Sep 17 00:00:00 2001 From: Daizu Date: Sun, 19 May 2024 11:05:53 +0900 Subject: [PATCH 155/159] Use llvm action --- .github/workflows/ci.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3dc141f..499223c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,6 +24,7 @@ jobs: os: - ubuntu-latest - macOS-latest + - windows-latest arch: - x64 group: @@ -34,6 +35,10 @@ jobs: os: ubuntu-latest steps: - uses: actions/checkout@v2 + - uses: KyleMayes/install-llvm-action@v2 + with: + version: "17" + if: matrix.os == 'windows-latest' - uses: julia-actions/setup-julia@latest with: version: ${{ matrix.version }} From a57094c765bfb32bdf3970f33b77fcd53120daca Mon Sep 17 00:00:00 2001 From: Daizu Date: Sun, 19 May 2024 11:12:39 +0900 Subject: [PATCH 156/159] Use llvm action (Integration) --- .github/workflows/ci-integration.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/ci-integration.yml b/.github/workflows/ci-integration.yml index beb16c1..544b0a4 100644 --- a/.github/workflows/ci-integration.yml +++ b/.github/workflows/ci-integration.yml @@ -24,12 +24,17 @@ jobs: os: - ubuntu-latest - macOS-latest + - windows-latest arch: - x64 group: - Integration steps: - uses: actions/checkout@v2 + - uses: KyleMayes/install-llvm-action@v2 + with: + version: "17" + if: matrix.os == 'windows-latest' - uses: julia-actions/setup-julia@latest with: version: ${{ matrix.version }} From 7d53475cc263498764deb72e1d400fb3ec90394a Mon Sep 17 00:00:00 2001 From: Daizu Date: Tue, 21 May 2024 05:41:00 +0900 Subject: [PATCH 157/159] Delete `testintegration_windows.jl` --- test/runtests.jl | 6 +----- test/testintegration_windows.jl | 17 ----------------- 2 files changed, 1 insertion(+), 22 deletions(-) delete mode 100644 test/testintegration_windows.jl diff --git a/test/runtests.jl b/test/runtests.jl index cf2964c..542659c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -21,9 +21,5 @@ if GROUP == "Core" || GROUP == "All" end if GROUP == "Integration" || GROUP == "All" - if Sys.iswindows() - include("testintegration_windows.jl") - else - include("testintegration.jl") - end + include("testintegration.jl") end diff --git a/test/testintegration_windows.jl b/test/testintegration_windows.jl deleted file mode 100644 index 7ac95e5..0000000 --- a/test/testintegration_windows.jl +++ /dev/null @@ -1,17 +0,0 @@ -# Currently, `StaticTools.stderrp()` used in `test/scripts` doesn't work on Windows. -# This temporary file deletes `stderrp()` and run tests. - -mkpath("scripts_windows") - -for file in readdir("scripts") - script = read("scripts/$file", String) - script = replace(script, "printf(stderrp(), " => "printf(") - write("scripts_windows/$file", script) -end - -script = read("testintegration.jl", String) -script = replace(script, "testpath/scripts/" => "testpath/scripts_windows/") - -include_string(Main, script) - -rm("scripts_windows"; recursive=true) \ No newline at end of file From 574db90bf816271129abf06e0371c2f8c00df844 Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Sun, 16 Jun 2024 16:55:56 -0400 Subject: [PATCH 158/159] Bump version to 0.7.2 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 4f41e3e..b4d7d40 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short and contributors"] -version = "0.7.1" +version = "0.7.2" [deps] From d379369d758d0663de8bf26f886dbde4d6cacb32 Mon Sep 17 00:00:00 2001 From: "C. Brenhin Keller" Date: Mon, 30 Sep 2024 15:02:35 -0400 Subject: [PATCH 159/159] Update README section on Windows support --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d7c4ed0..7889470 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ This package uses the [GPUCompiler package](https://github.com/JuliaGPU/GPUCompi * GC-tracked allocations and global variables do *not* work with `compile_executable` or `compile_shlib`. This has some interesting consequences, including that all functions _within_ the function you want to compile must either be inlined or return only native types (otherwise Julia would have to allocate a place to put the results, which will fail). * Since error handling relies on libjulia, you can only throw errors from standalone-compiled (`compile_executable` / `compile_shlib`) code if an explicit overload has been defined for that particular error with `@device_override` (see [quirks.jl](src/quirks.jl)). * Type instability. Type unstable code cannot currently be statically compiled via this package. -* Doesn't work on Windows (but works in WSL on Windows 10+). PRs welcome. +* Extra experimental on Windows (PRs welcome if you encounter issues). Should work in WSL on Windows 10+. ## Guide for Package Authors