From f51aea5d997ba46858f909a3b39566bb53961349 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 24 Jun 2025 11:31:06 +0200 Subject: [PATCH 1/6] Add support for using FileCheck in tests. --- test/Project.toml | 2 + test/helpers/bpf.jl | 9 ++++ test/helpers/gcn.jl | 9 ++++ test/helpers/metal.jl | 9 ++++ test/helpers/ptx.jl | 9 ++++ test/helpers/spirv.jl | 9 ++++ test/helpers/test.jl | 120 ++++++++++++++++++++++++++++++++++++++++++ test/setup.jl | 1 + test/utils.jl | 23 ++++++++ 9 files changed, 191 insertions(+) diff --git a/test/Project.toml b/test/Project.toml index 7354409e..71311c11 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -2,8 +2,10 @@ Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" +IOCapture = "b5f81e59-6552-4d32-b1f0-c071b021bf89" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" +LLVM_jll = "86de99a1-58d6-5da7-8064-bd56ce2e322c" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" diff --git a/test/helpers/bpf.jl b/test/helpers/bpf.jl index d66b6b48..15eaa121 100644 --- a/test/helpers/bpf.jl +++ b/test/helpers/bpf.jl @@ -25,6 +25,15 @@ function code_native(io::IO, @nospecialize(func), @nospecialize(types); kwargs.. GPUCompiler.code_native(io, job; kwargs...) end +# aliases without ::IO argument +for method in (:code_llvm, :code_native) + method = Symbol("$(method)") + @eval begin + $method(@nospecialize(func), @nospecialize(types); kwargs...) = + $method(stdout, func, types; kwargs...) + end +end + # simulates codegen for a kernel function: validates by default function code_execution(@nospecialize(func), @nospecialize(types); kwargs...) job, kwargs = create_job(func, types; kwargs...) diff --git a/test/helpers/gcn.jl b/test/helpers/gcn.jl index f7f54f85..c894fbd3 100644 --- a/test/helpers/gcn.jl +++ b/test/helpers/gcn.jl @@ -35,6 +35,15 @@ function code_native(io::IO, @nospecialize(func), @nospecialize(types); kwargs.. GPUCompiler.code_native(io, job; kwargs...) end +# aliases without ::IO argument +for method in (:code_warntype, :code_llvm, :code_native) + method = Symbol("$(method)") + @eval begin + $method(@nospecialize(func), @nospecialize(types); kwargs...) = + $method(stdout, func, types; kwargs...) + end +end + # simulates codegen for a kernel function: validates by default function code_execution(@nospecialize(func), @nospecialize(types); kwargs...) job, kwargs = create_job(func, types; kernel=true, kwargs...) diff --git a/test/helpers/metal.jl b/test/helpers/metal.jl index d46f9a89..41eb0fbe 100644 --- a/test/helpers/metal.jl +++ b/test/helpers/metal.jl @@ -35,6 +35,15 @@ function code_native(io::IO, @nospecialize(func), @nospecialize(types); kwargs.. GPUCompiler.code_native(io, job; kwargs...) end +# aliases without ::IO argument +for method in (:code_warntype, :code_llvm, :code_native) + method = Symbol("$(method)") + @eval begin + $method(@nospecialize(func), @nospecialize(types); kwargs...) = + $method(stdout, func, types; kwargs...) + end +end + # simulates codegen for a kernel function: validates by default function code_execution(@nospecialize(func), @nospecialize(types); kwargs...) job, kwargs = create_job(func, types; kernel=true, kwargs...) diff --git a/test/helpers/ptx.jl b/test/helpers/ptx.jl index 5f8a3c48..e82416bc 100644 --- a/test/helpers/ptx.jl +++ b/test/helpers/ptx.jl @@ -67,6 +67,15 @@ function code_native(io::IO, @nospecialize(func), @nospecialize(types); kwargs.. GPUCompiler.code_native(io, job; kwargs...) end +# aliases without ::IO argument +for method in (:code_warntype, :code_llvm, :code_native) + method = Symbol("$(method)") + @eval begin + $method(@nospecialize(func), @nospecialize(types); kwargs...) = + $method(stdout, func, types; kwargs...) + end +end + # simulates codegen for a kernel function: validates by default function code_execution(@nospecialize(func), @nospecialize(types); kwargs...) job, kwargs = create_job(func, types; kernel=true, kwargs...) diff --git a/test/helpers/spirv.jl b/test/helpers/spirv.jl index 73d030d1..0144cd6a 100644 --- a/test/helpers/spirv.jl +++ b/test/helpers/spirv.jl @@ -38,6 +38,15 @@ function code_native(io::IO, @nospecialize(func), @nospecialize(types); kwargs.. GPUCompiler.code_native(io, job; kwargs...) end +# aliases without ::IO argument +for method in (:code_warntype, :code_llvm, :code_native) + method = Symbol("$(method)") + @eval begin + $method(@nospecialize(func), @nospecialize(types); kwargs...) = + $method(stdout, func, types; kwargs...) + end +end + # simulates codegen for a kernel function: validates by default function code_execution(@nospecialize(func), @nospecialize(types); kwargs...) job, kwargs = create_job(func, types; kernel=true, kwargs...) diff --git a/test/helpers/test.jl b/test/helpers/test.jl index eb60c9a1..9de421e4 100644 --- a/test/helpers/test.jl +++ b/test/helpers/test.jl @@ -35,3 +35,123 @@ end ret i64 %value""" return :(Base.llvmcall($llvmcall_str, T, Tuple{T}, i)) end + +# filecheck utils + +module FileCheck + import LLVM_jll + import IOCapture + using GPUCompiler, LLVM + using Test + + export filecheck, @filecheck, @check_str + + global filecheck_path::String + function __init__() + # TODO: Windows + global filecheck_path = joinpath(LLVM_jll.artifact_dir, "tools", "FileCheck") + end + + function filecheck_exe(; adjust_PATH::Bool=true, adjust_LIBPATH::Bool=true) + env = Base.invokelatest( + LLVM_jll.JLLWrappers.adjust_ENV!, + copy(ENV), + LLVM_jll.PATH[], + LLVM_jll.LIBPATH[], + adjust_PATH, + adjust_LIBPATH + ) + + return Cmd(Cmd([filecheck_path]); env) + end + + const julia_typed_pointers = JuliaContext() do ctx + supports_typed_pointers(ctx) + end + + function filecheck(f, input) + # FileCheck assumes that the input is available as a file + mktemp() do path, input_io + write(input_io, input) + close(input_io) + + # capture the output of `f` and write it into a temporary buffer + result = IOCapture.capture(rethrow=Union{}) do + f(input) + end + output_io = IOBuffer() + write(output_io, result.output) + println(output_io) + + if result.error + # if the function errored, also render the exception and backtrace + showerror(output_io, result.value, result.backtrace) + elseif result.value !== nothing + # also show the returned value; some APIs don't print + write(output_io, string(result.value)) + end + + # determine some useful prefixes for FileCheck + prefixes = ["CHECK", + "JULIA$(VERSION.major)_$(VERSION.minor)", + "LLVM$(Base.libllvm_version.major)"] + ## whether we use typed pointers or opaque pointers + if julia_typed_pointers + push!(prefixes, "TYPED") + else + push!(prefixes, "OPAQUE") + end + ## whether we pass pointers as integers or as actual pointers + if VERSION >= v"1.12.0-DEV.225" + push!(prefixes, "PTR_ABI") + else + push!(prefixes, "INTPTR_ABI") + end + + # now pass the collected output to FileCheck + seekstart(output_io) + filecheck_io = Pipe() + cmd = ```$(filecheck_exe()) + --color + --allow-unused-prefixes + --check-prefixes $(join(prefixes, ',')) + $path``` + proc = run(pipeline(ignorestatus(cmd); stdin=output_io, stdout=filecheck_io, stderr=filecheck_io); wait=false) + close(filecheck_io.in) + + # collect the output of FileCheck + reader = Threads.@spawn String(read(filecheck_io)) + Base.wait(proc) + log = strip(fetch(reader)) + + # error out if FileCheck did not succeed. + # otherwise, return true so that `@test @filecheck` works as expected. + if !success(proc) + error(log) + end + return true + end + end + + # collect checks used in the @filecheck block by piggybacking on macro expansion + const checks = String[] + macro check_str(str) + push!(checks, str) + nothing + end + + macro filecheck(ex) + ex = Base.macroexpand(__module__, ex) + if isempty(checks) + error("No checks provided within the @filecheck macro block") + end + check_str = join(checks, "\n") + empty!(checks) + + esc(quote + filecheck($check_str) do _ + $ex + end + end) + end +end diff --git a/test/setup.jl b/test/setup.jl index 07adf625..5744a675 100644 --- a/test/setup.jl +++ b/test/setup.jl @@ -9,6 +9,7 @@ for file in readdir(joinpath(@__DIR__, "helpers")) include(joinpath(@__DIR__, "helpers", file)) end end +using .FileCheck ## entry point diff --git a/test/utils.jl b/test/utils.jl index f0de138d..6d17ab73 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -164,3 +164,26 @@ next_world = Base.get_world_counter() @test o_sin.overlayed == true end end + +# Test FileCheck +@testset "FileCheck" begin + @test @filecheck begin + check"CHECK: works" + println("works") + end + + @test_throws "expected string not found in input" @filecheck begin + check"CHECK: works" + println("doesn't work") + end + + @test @filecheck begin + check"CHECK: errors" + error("errors") + end + + @test_throws "expected string not found in input" @filecheck begin + check"CHECK: works" + error("errors") + end +end From ee3ba8f6aa9bb20dbb5a8749cc12c027b38fcada Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 24 Jun 2025 11:37:19 +0200 Subject: [PATCH 2/6] Convert tests to using filecheck. --- test/bpf.jl | 29 ++++-- test/gcn.jl | 101 +++++++++++-------- test/metal.jl | 87 +++++++++------- test/native.jl | 202 +++++++++++++++++++++++-------------- test/ptx.jl | 266 +++++++++++++++++++++++++++++-------------------- test/spirv.jl | 57 ++++++----- 6 files changed, 455 insertions(+), 287 deletions(-) diff --git a/test/bpf.jl b/test/bpf.jl index d94b0d32..07fa9fa7 100644 --- a/test/bpf.jl +++ b/test/bpf.jl @@ -1,20 +1,30 @@ @testset "No-op" begin kernel() = 0 - output = sprint(io->BPF.code_native(io, kernel, ())) - @test occursin("\tr0 = 0\n\texit", output) + @test @filecheck begin + check"CHECK: r0 = 0" + check"CHECK-NEXT: exit" + BPF.code_native(kernel, ()) + end end @testset "Return argument" begin kernel(x) = x - output = sprint(io->BPF.code_native(io, kernel, (UInt64,))) - @test occursin("\tr0 = r1\n\texit", output) + @test @filecheck begin + check"CHECK: r0 = r1" + check"CHECK-NEXT: exit" + BPF.code_native(kernel, (UInt64,)) + end end @testset "Addition" begin kernel(x) = x+1 - output = sprint(io->BPF.code_native(io, kernel, (UInt64,))) - @test occursin("\tr0 = r1\n\tr0 += 1\n\texit", output) + @test @filecheck begin + check"CHECK: r0 = r1" + check"CHECK-NEXT: r0 += 1" + check"CHECK-NEXT: exit" + BPF.code_native(kernel, (UInt64,)) + end end @testset "Errors" begin kernel(x) = fakefunc(x) @@ -26,8 +36,11 @@ end goodcall(x) = Base.llvmcall("%2 = call i64 inttoptr (i64 3 to i64 (i64)*)(i64 %0)\nret i64 %2", Int, Tuple{Int}, x) kernel(x) = goodcall(x) - output = sprint(io->BPF.code_native(io, kernel, (Int,))) - @test occursin(r"\tcall .*\n\texit", output) + @test @filecheck begin + check"CHECK: call" + check"CHECK-NEXT: exit" + BPF.code_native(kernel, (Int,)) + end end @testset "invalid" begin badcall(x) = Base.llvmcall("%2 = call i64 inttoptr (i64 3000 to i64 (i64)*)(i64 %0)\nret i64 %2", Int, Tuple{Int}, x) diff --git a/test/gcn.jl b/test/gcn.jl index c16bc5d4..4671ff8d 100644 --- a/test/gcn.jl +++ b/test/gcn.jl @@ -4,12 +4,15 @@ if :AMDGPU in LLVM.backends() @testset "kernel calling convention" begin kernel() = return - ir = sprint(io->GCN.code_llvm(io, kernel, Tuple{}; dump_module=true)) - @test !occursin("amdgpu_kernel", ir) + @test @filecheck begin + check"CHECK-NOT: amdgpu_kernel" + GCN.code_llvm(kernel, Tuple{}; dump_module=true) + end - ir = sprint(io->GCN.code_llvm(io, kernel, Tuple{}; - dump_module=true, kernel=true)) - @test occursin("amdgpu_kernel", ir) + @test @filecheck begin + check"CHECK: amdgpu_kernel" + GCN.code_llvm(kernel, Tuple{}; dump_module=true, kernel=true) + end end end @@ -27,8 +30,10 @@ end return end - asm = sprint(io->GCN.code_native(io, kernel, Tuple{})) - @test occursin("s_trap 2", asm) + @test @filecheck begin + check"CHECK: s_trap 2" + GCN.code_native(kernel, Tuple{}) + end @test_skip occursin("s_cbranch_execz", asm) if Base.libllvm_version < v"9" @test_broken occursin("v_readfirstlane", asm) @@ -44,9 +49,11 @@ end return end - asm = sprint(io->GCN.code_native(io, parent, Tuple{Int64}; dump_module=true)) - @test occursin(r"s_add_u32.*(julia|j)_child_.*@rel32@", asm) - @test occursin(r"s_addc_u32.*(julia|j)_child_.*@rel32@", asm) + @test @filecheck begin + check"CHECK: s_add_u32{{.*(julia|j)_child_.*}}@rel32@" + check"CHECK: s_addc_u32{{.*(julia|j)_child_.*}}@rel32@" + GCN.code_native(parent, Tuple{Int64}; dump_module=true) + end end @testset "kernel functions" begin @@ -56,10 +63,12 @@ end return end - asm = sprint(io->GCN.code_native(io, entry, Tuple{Int64}; dump_module=true, kernel=true)) - @test occursin(r"\.amdhsa_kernel \w*entry", asm) - @test !occursin(r"\.amdhsa_kernel \w*nonentry", asm) - @test occursin(r"\.type.*\w*nonentry\w*,@function", asm) + @test @filecheck begin + check"CHECK-NOT: .amdhsa_kernel {{.*}}nonentry" + check"CHECK: .type {{.*nonentry.*}},@function" + check"CHECK: .amdhsa_kernel {{.*entry.*}}" + GCN.code_native(entry, Tuple{Int64}; dump_module=true, kernel=true) + end end @testset "child function reuse" begin @@ -80,11 +89,15 @@ end end end - asm = sprint(io->GCN.code_native(io, mod.parent1, Tuple{Int}; dump_module=true)) - @test occursin(r"\.type.*(julia|j)_[[:alnum:]_.]*child_\d*,@function", asm) + @test @filecheck begin + check"CHECK: .type {{.*child.*}},@function" + GCN.code_native(mod.parent1, Tuple{Int}; dump_module=true) + end - asm = sprint(io->GCN.code_native(io, mod.parent2, Tuple{Int}; dump_module=true)) - @test occursin(r"\.type.*(julia|j)_[[:alnum:]_.]*child_\d*,@function", asm) + @test @filecheck begin + check"CHECK: .type {{.*child.*}},@function" + GCN.code_native(mod.parent2, Tuple{Int}; dump_module=true) + end end @testset "child function reuse bis" begin @@ -106,13 +119,17 @@ end end end - asm = sprint(io->GCN.code_native(io, mod.parent1, Tuple{Int}; dump_module=true)) - @test occursin(r"\.type.*(julia|j)_[[:alnum:]_.]*child1_\d*,@function", asm) - @test occursin(r"\.type.*(julia|j)_[[:alnum:]_.]*child2_\d*,@function", asm) + @test @filecheck begin + check"CHECK-DAG: .type {{.*child1.*}},@function" + check"CHECK-DAG: .type {{.*child2.*}},@function" + GCN.code_native(mod.parent1, Tuple{Int}; dump_module=true) + end - asm = sprint(io->GCN.code_native(io, mod.parent2, Tuple{Int}; dump_module=true)) - @test occursin(r"\.type.*(julia|j)_[[:alnum:]_.]*child1_\d*,@function", asm) - @test occursin(r"\.type.*(julia|j)_[[:alnum:]_.]*child2_\d*,@function", asm) + @test @filecheck begin + check"CHECK-DAG: .type {{.*child1.*}},@function" + check"CHECK-DAG: .type {{.*child2.*}},@function" + GCN.code_native(mod.parent2, Tuple{Int}; dump_module=true) + end end @testset "indirect sysimg function use" begin @@ -127,9 +144,11 @@ end return end - asm = sprint(io->GCN.code_native(io, kernel, Tuple{Ptr{Int32}})) - @test !occursin("jl_throw", asm) - @test !occursin("jl_invoke", asm) # forced recompilation should still not invoke + @test @filecheck begin + check"CHECK-NOT: jl_throw" + check"CHECK-NOT: jl_invoke" + GCN.code_native(kernel, Tuple{Ptr{Int32}}) + end end @testset "LLVM intrinsics" begin @@ -171,12 +190,14 @@ false && @testset "GC and TLS lowering" begin end end - asm = sprint(io->GCN.code_native(io, mod.kernel, Tuple{Int})) - @test occursin("gpu_gc_pool_alloc", asm) - @test !occursin("julia.push_gc_frame", asm) - @test !occursin("julia.pop_gc_frame", asm) - @test !occursin("julia.get_gc_frame_slot", asm) - @test !occursin("julia.new_gc_frame", asm) + @test @filecheck begin + check"CHECK-NOT: jl_push_gc_frame" + check"CHECK-NOT: jl_pop_gc_frame" + check"CHECK-NOT: jl_get_gc_frame_slot" + check"CHECK-NOT: jl_new_gc_frame" + check"CHECK: gpu_gc_pool_alloc" + GCN.code_native(mod.kernel, Tuple{Int}) + end # make sure that we can still ellide allocations function ref_kernel(ptr, i) @@ -191,10 +212,10 @@ false && @testset "GC and TLS lowering" begin return nothing end - asm = sprint(io->GCN.code_native(io, ref_kernel, Tuple{Ptr{Int64}, Int})) - - - @test !occursin("gpu_gc_pool_alloc", asm) + @test @filecheck begin + check"CHECK-NOT: gpu_gc_pool_alloc" + GCN.code_native(ref_kernel, Tuple{Ptr{Int64}, Int}) + end end @testset "float boxes" begin @@ -208,8 +229,10 @@ end return end - ir = sprint(io->GCN.code_llvm(io, kernel, Tuple{Float32,Ptr{Float32}})) - @test occursin("jl_box_float32", ir) + @test @filecheck begin + check"CHECK: jl_box_float32" + GCN.code_llvm(kernel, Tuple{Float32,Ptr{Float32}}) + end GCN.code_native(devnull, kernel, Tuple{Float32,Ptr{Float32}}) end diff --git a/test/metal.jl b/test/metal.jl index ae854353..5ddef1f7 100644 --- a/test/metal.jl +++ b/test/metal.jl @@ -4,44 +4,54 @@ @testset "byref aggregates" begin kernel(x) = return - ir = sprint(io->Metal.code_llvm(io, kernel, Tuple{Tuple{Int}})) - @test occursin(r"@\w*kernel\w*\(({ i64 }|\[1 x i64\])\*", ir) || - occursin(r"@\w*kernel\w*\(ptr", ir) + @test @filecheck begin + check"TYPED: @{{.*kernel.*}}({{(\{ i64 \}|\[1 x i64\])}}*" + check"OPAQUE: @{{.*kernel.*}}(ptr" + Metal.code_llvm(kernel, Tuple{Tuple{Int}}) + end # for kernels, every pointer argument needs to take an address space - ir = sprint(io->Metal.code_llvm(io, kernel, Tuple{Tuple{Int}}; kernel=true)) - @test occursin(r"@\w*kernel\w*\(({ i64 }|\[1 x i64\]) addrspace\(1\)\*", ir) || - occursin(r"@\w*kernel\w*\(ptr addrspace\(1\)", ir) + @test @filecheck begin + check"TYPED: @{{.*kernel.*}}({{(\{ i64 \}|\[1 x i64\])}} addrspace(1)*" + check"OPAQUE: @{{.*kernel.*}}(ptr addrspace(1)" + Metal.code_llvm(kernel, Tuple{Tuple{Int}}; kernel=true) + end end @testset "byref primitives" begin kernel(x) = return - ir = sprint(io->Metal.code_llvm(io, kernel, Tuple{Int})) - @test occursin(r"@\w*kernel\w*\(i64 ", ir) + @test @filecheck begin + check"CHECK: @{{.*kernel.*}}(i64 " + Metal.code_llvm(kernel, Tuple{Int}) + end # for kernels, every pointer argument needs to take an address space - ir = sprint(io->Metal.code_llvm(io, kernel, Tuple{Int}; kernel=true)) - @test occursin(r"@\w*kernel\w*\(i64 addrspace\(1\)\*", ir) || - occursin(r"@\w*kernel\w*\(ptr addrspace\(1\)", ir) + @test @filecheck begin + check"TYPED: @{{.*kernel.*}}(i64 addrspace(1)*" + check"OPAQUE: @{{.*kernel.*}}(ptr addrspace(1)" + Metal.code_llvm(kernel, Tuple{Int}; kernel=true) + end end @testset "module metadata" begin kernel() = return - ir = sprint(io->Metal.code_llvm(io, kernel, Tuple{}; - dump_module=true, kernel=true)) - @test occursin("air.version", ir) - @test occursin("air.language_version", ir) - @test occursin("air.max_device_buffers", ir) + @test @filecheck begin + check"CHECK: air.version" + check"CHECK: air.language_version" + check"CHECK: air.max_device_buffers" + Metal.code_llvm(kernel, Tuple{}; dump_module=true, kernel=true) + end end @testset "argument metadata" begin kernel(x) = return - ir = sprint(io->Metal.code_llvm(io, kernel, Tuple{Int}; - dump_module=true, kernel=true)) - @test occursin("air.buffer", ir) + @test @filecheck begin + check"CHECK: air.buffer" + Metal.code_llvm(kernel, Tuple{Int}; dump_module=true, kernel=true) + end # XXX: perform more exhaustive testing of argument passing metadata here, # or just defer to execution testing in Metal.jl? @@ -54,23 +64,29 @@ end return end - ir = sprint(io->Metal.code_llvm(io, kernel, Tuple{Core.LLVMPtr{Int,1}})) - @test occursin(r"@\w*kernel\w*\(.* addrspace\(1\)\* %.+\)", ir) || - occursin(r"@\w*kernel\w*\(ptr addrspace\(1\) %.+\)", ir) - @test occursin(r"call i32 @julia.air.thread_position_in_threadgroup.i32", ir) + @test @filecheck begin + check"TYPED: @{{.*kernel.*}}({{.*}} addrspace(1)* %{{.+}})" + check"OPAQUE: @{{.*kernel.*}}(ptr addrspace(1) %{{.+}})" + check"CHECK: call i32 @julia.air.thread_position_in_threadgroup.i32" + Metal.code_llvm(kernel, Tuple{Core.LLVMPtr{Int,1}}) + end - ir = sprint(io->Metal.code_llvm(io, kernel, Tuple{Core.LLVMPtr{Int,1}}; kernel=true)) - @test occursin(r"@\w*kernel\w*\(.* addrspace\(1\)\* %.+, i32 %thread_position_in_threadgroup\)", ir) || - occursin(r"@\w*kernel\w*\(ptr addrspace\(1\) %.+, i32 %thread_position_in_threadgroup\)", ir) - @test !occursin(r"call i32 @julia.air.thread_position_in_threadgroup.i32", ir) + @test @filecheck begin + check"TYPED: @{{.*kernel.*}}({{.*}} addrspace(1)* %{{.+}}, i32 %thread_position_in_threadgroup)" + check"OPAQUE: @{{.*kernel.*}}(ptr addrspace(1) %{{.+}}, i32 %thread_position_in_threadgroup)" + check"CHECK-NOT: call i32 @julia.air.thread_position_in_threadgroup.i32" + Metal.code_llvm(kernel, Tuple{Core.LLVMPtr{Int,1}}; kernel=true) + end end @testset "vector intrinsics" begin foo(x, y) = ccall("llvm.smax.v2i64", llvmcall, NTuple{2, VecElement{Int64}}, (NTuple{2, VecElement{Int64}}, NTuple{2, VecElement{Int64}}), x, y) - ir = sprint(io->Metal.code_llvm(io, foo, (NTuple{2, VecElement{Int64}}, NTuple{2, VecElement{Int64}}))) - @test occursin("air.max.s.v2i64", ir) + @test @filecheck begin + check"CHECK: air.max.s.v2i64" + Metal.code_llvm(foo, (NTuple{2, VecElement{Int64}}, NTuple{2, VecElement{Int64}})) + end end @testset "unsupported type detection" begin @@ -104,8 +120,10 @@ end return end - ir = sprint(io->Metal.code_llvm(io, kernel1, Tuple{Core.LLVMPtr{Float32,1}}; validate=true)) - @test occursin("@metal_os_log", ir) + @test @filecheck begin + check"CHECK: @metal_os_log" + Metal.code_llvm(kernel1, Tuple{Core.LLVMPtr{Float32,1}}; validate=true) + end function kernel2(ptr) val = unsafe_load(ptr) @@ -130,9 +148,10 @@ end end end - ir = sprint(io->Metal.code_llvm(io, mod.kernel, Tuple{Core.LLVMPtr{Float32,1}, Int}; - dump_module=true, kernel=true)) - @test occursin("addrspace(2) constant [2 x float]", ir) + @test @filecheck begin + check"CHECK: addrspace(2) constant [2 x float]" + Metal.code_llvm(mod.kernel, Tuple{Core.LLVMPtr{Float32,1}, Int}; dump_module=true, kernel=true) + end end end diff --git a/test/native.jl b/test/native.jl index 18217613..ec380f7b 100644 --- a/test/native.jl +++ b/test/native.jl @@ -6,14 +6,20 @@ ci, rt = only(GPUCompiler.code_typed(job)) @test rt === Int - ir = sprint(io->GPUCompiler.code_warntype(io, job)) - @test contains(ir, "MethodInstance for identity") + @test @filecheck begin + check"CHECK: MethodInstance for identity" + GPUCompiler.code_warntype(job) + end - ir = sprint(io->GPUCompiler.code_llvm(io, job)) - @test contains(ir, r"(julia|j)_identity") + @test @filecheck begin + check"CHECK: {{(julia|j)_identity}}" + GPUCompiler.code_llvm(job) + end - asm = sprint(io->GPUCompiler.code_native(io, job)) - @test contains(asm, r"(julia|j)_identity") + @test @filecheck begin + check"CHECK: {{(julia|j)_identity}}" + GPUCompiler.code_native(job) + end end @testset "compilation" begin @@ -74,14 +80,18 @@ end # smoke test job, _ = Native.create_job(eval(kernel), (Int64,)) - ir = sprint(io->GPUCompiler.code_llvm(io, job)) - @test contains(ir, r"add i64 %\d+, 1") + @test @filecheck begin + check"CHECK: add i64 %{{[0-9]+}}, 1" + GPUCompiler.code_llvm(job) + end # basic redefinition @eval $kernel(i) = $child(i)+2 job, _ = Native.create_job(eval(kernel), (Int64,)) - ir = sprint(io->GPUCompiler.code_llvm(io, job)) - @test contains(ir, r"add i64 %\d+, 2") + @test @filecheck begin + check"CHECK: add i64 %{{[0-9]+}}, 2" + GPUCompiler.code_llvm(job) + end # cached_compilation interface invocations = Ref(0) @@ -99,46 +109,56 @@ end # initial compilation source = methodinstance(ft, tt, Base.get_world_counter()) - ir = Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) - @test contains(ir, r"add i64 %\d+, 2") + @test @filecheck begin + check"CHECK: add i64 %{{[0-9]+}}, 2" + Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) + end @test invocations[] == 1 # cached compilation - ir = Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) - @test contains(ir, r"add i64 %\d+, 2") + @test @filecheck begin + check"CHECK: add i64 %{{[0-9]+}}, 2" + Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) + end @test invocations[] == 1 # redefinition @eval $kernel(i) = $child(i)+3 source = methodinstance(ft, tt, Base.get_world_counter()) - ir = Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) - @test contains(ir, r"add i64 %\d+, 3") + @test @filecheck begin + check"CHECK: add i64 %{{[0-9]+}}, 3" + Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) + end @test invocations[] == 2 # cached compilation - ir = Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) - @test contains(ir, r"add i64 %\d+, 3") + @test @filecheck begin + check"CHECK: add i64 %{{[0-9]+}}, 3" + Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) + end @test invocations[] == 2 # redefinition of an unrelated function @eval $unrelated(i) = 42 - ir = Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) + Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) @test invocations[] == 2 # redefining child functions @eval @noinline $child(i) = i+1 - ir = Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) + Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) @test invocations[] == 3 # cached compilation - ir = Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) + Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) @test invocations[] == 3 # change in configuration config = CompilerConfig(job.config; name="foobar") - ir = Base.invokelatest(GPUCompiler.cached_compilation, cache, source, config, compiler, linker) + @test @filecheck begin + check"CHECK: foobar" + Base.invokelatest(GPUCompiler.cached_compilation, cache, source, config, compiler, linker) + end @test invocations[] == 4 - @test contains(ir, "foobar") # tasks running in the background should keep on using the old version c1, c2 = Condition(), Condition() @@ -155,8 +175,10 @@ end ir = Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) @test contains(ir, r"add i64 %\d+, 4") notify(c2) # wake up the task - ir = fetch(t) - @test contains(ir, r"add i64 %\d+, 3") + @test @filecheck begin + check"CHECK: add i64 %{{[0-9]+}}, 3" + fetch(t) + end end @testset "allowed mutable types" begin @@ -180,14 +202,14 @@ end valid_kernel() = return invalid_kernel() = 1 - ir = sprint(io->Native.code_llvm(io, valid_kernel, Tuple{}; optimize=false, dump_module=true)) - - # module should contain our function + a generic call wrapper - @test occursin(r"define\ .* void\ @.*(julia|j)_valid_kernel.*\(\)"x, ir) - @test !occursin("define %jl_value_t* @jlcall_", ir) - - # there should be no debug metadata - @test !occursin("!dbg", ir) + @test @filecheck begin + # module should contain our function + a generic call wrapper + check"CHECK-NOT: define %jl_value_t* @jlcall_" + check"CHECK: define{{.*}} void @{{.*(julia|j)_valid_kernel.*}}()" + # there should be no debug metadata + check"CHECK-NOT: !dbg" + Native.code_llvm(valid_kernel, Tuple{}; optimize=false, dump_module=true) + end @test Native.code_llvm(devnull, invalid_kernel, Tuple{}) == nothing @test_throws KernelError Native.code_llvm(devnull, invalid_kernel, Tuple{}; kernel=true) == nothing @@ -203,8 +225,10 @@ end @noinline child(i) = sink(i) parent(i) = child(i) - ir = sprint(io->Native.code_llvm(io, parent, Tuple{Int})) - @test occursin(r"call .+ @(julia|j).+child.+", ir) + @test @filecheck begin + check"CHECK: call {{.+}} @{{(julia|j).+child.+}}" + Native.code_llvm(parent, Tuple{Int}) + end end @testset "sysimg" begin @@ -214,8 +238,10 @@ end Base.pointerset(a, 0, mod1(i,10), 8) end - ir = sprint(io->Native.code_llvm(io, foobar, Tuple{Ptr{Int},Int})) - @test !occursin("jlsys_", ir) + @test @filecheck begin + check"CHECK-NOT: jlsys_" + Native.code_llvm(foobar, Tuple{Ptr{Int},Int}) + end end @testset "tracked pointers" begin @@ -261,21 +287,29 @@ end @testset "slow abi" begin x = 2 f = () -> x+1 - ir = sprint(io->Native.code_llvm(io, f, Tuple{}, entry_abi=:func, dump_module=true)) - @test occursin(r"define nonnull {}\* @jfptr", ir) || - occursin(r"define nonnull ptr @jfptr", ir) - @test occursin(r"define internal fastcc .+ @julia", ir) - @test occursin(r"call fastcc .+ @julia", ir) + @test @filecheck begin + check"CHECK: define internal fastcc {{.+}} @julia" + check"TYPED: define nonnull {}* @jfptr" + check"OPAQUE: define nonnull ptr @jfptr" + check"CHECK: call fastcc {{.+}} @julia" + Native.code_llvm(f, Tuple{}; entry_abi=:func, dump_module=true) + end end @testset "function entry safepoint emission" begin - ir = sprint(io->Native.code_llvm(io, identity, Tuple{Nothing}; entry_safepoint=false, optimize=false, dump_module=true)) - @test !occursin("%safepoint", ir) + @test @filecheck begin + check"CHECK-NOT: %safepoint" + Native.code_llvm(identity, Tuple{Nothing}; entry_safepoint=false, optimize=false, dump_module=true) + end - ir = sprint(io->Native.code_llvm(io, identity, Tuple{Nothing}; entry_safepoint=true, optimize=false, dump_module=true)) - @test occursin("%safepoint", ir) broken=(VERSION >= v"1.13.0-DEV.533") # XXX: broken by JuliaLang/julia#57010, # see https://github.com/JuliaLang/julia/pull/57010/files#r2079576894 + if VERSION < v"1.13.0-DEV.533" + @test @filecheck begin + check"CHECK: %safepoint" + Native.code_llvm(identity, Tuple{Nothing}; entry_safepoint=true, optimize=false, dump_module=true) + end + end end @testset "always_inline" begin @@ -292,19 +326,25 @@ end end end - ir = sprint(io->Native.code_llvm(io, mod.g, Tuple{Int64}; dump_module=true, kernel=true)) - @test occursin(r"^define.*(julia|j)_f_expensive"m, ir) + @test @filecheck begin + check"CHECK: define{{.*(julia|j)_f_expensive}}" + Native.code_llvm(mod.g, Tuple{Int64}; dump_module=true, kernel=true) + end - ir = sprint(io->Native.code_llvm(io, mod.g, Tuple{Int64}; dump_module=true, kernel=true, - always_inline=true)) - @test !occursin(r"^define.*(julia|j)_f_expensive"m, ir) + @test @filecheck begin + check"CHECK-NOT: define{{.*(julia|j)_f_expensive}}" + Native.code_llvm(mod.g, Tuple{Int64}; dump_module=true, kernel=true, always_inline=true) + end - ir = sprint(io->Native.code_llvm(io, mod.h, Tuple{Int64}; dump_module=true, kernel=true, - always_inline=true)) - @test !occursin(r"^define.*(julia|j)_f_expensive"m, ir) + @test @filecheck begin + check"CHECK-NOT: define{{.*(julia|j)_f_expensive}}" + Native.code_llvm(mod.h, Tuple{Int64}; dump_module=true, kernel=true, always_inline=true) + end - ir = sprint(io->Native.code_llvm(io, mod.h, Tuple{Int64}; dump_module=true, kernel=true)) - @test occursin(r"^define.*(julia|j)_f_expensive"m, ir) + @test @filecheck begin + check"CHECK: define{{.*(julia|j)_f_expensive}}" + Native.code_llvm(mod.h, Tuple{Int64}; dump_module=true, kernel=true) + end end @testset "function attributes" begin @@ -322,8 +362,10 @@ end Nothing, Tuple{}) end - ir = sprint(io->Native.code_llvm(io, convergent_barrier, Tuple{}; dump_module=true, raw=true)) - @test occursin(r"attributes #. = \{ convergent \}", ir) + @test @filecheck begin + check"CHECK: attributes #{{.}} = { convergent }" + Native.code_llvm(convergent_barrier, Tuple{}; dump_module=true, raw=true) + end end end @@ -495,11 +537,14 @@ end mod = @eval module $(gensym()) kernel() = child() - child() = 0 + @inline child() = 0 end - ir = sprint(io->Native.code_llvm(io, mod.kernel, Tuple{})) - @test occursin("ret i64 0", ir) + @test @filecheck begin + check"CHECK-LABEL: @julia_kernel" + check"CHECK: ret i64 0" + Native.code_llvm(mod.kernel, Tuple{}) + end mod = @eval module $(gensym()) using ..GPUCompiler @@ -507,13 +552,16 @@ end Base.Experimental.@MethodTable(method_table) kernel() = child() - child() = 0 + @inline child() = 0 Base.Experimental.@overlay method_table child() = 1 end - ir = sprint(io->Native.code_llvm(io, mod.kernel, Tuple{}; mod.method_table)) - @test occursin("ret i64 1", ir) + @test @filecheck begin + check"CHECK-LABEL: @julia_kernel" + check"CHECK: ret i64 1" + Native.code_llvm(mod.kernel, Tuple{}; mod.method_table) + end end @testset "#366: semi-concrete interpretation + overlay methods = dynamic dispatch" begin @@ -533,10 +581,12 @@ end (ccall("extern __nv_isnanf", llvmcall, Int32, (Cfloat,), x)) != 0 end - ir = sprint(io->Native.code_llvm(io, mod.kernel, Tuple{Int, Int}; - debuginfo=:none, mod.method_table)) - @test !occursin("apply_generic", ir) - @test occursin("llvm.floor", ir) + @test @filecheck begin + check"CHECK-LABEL: @julia_kernel" + check"CHECK-NOT: apply_generic" + check"CHECK: llvm.floor" + Native.code_llvm(mod.kernel, Tuple{Int, Int}; debuginfo=:none, mod.method_table) + end end @testset "JuliaLang/julia#48097: kwcall inference in the presence of overlay method" begin @@ -553,11 +603,13 @@ end Base.Experimental.@overlay method_table @noinline Core.throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = return end - ir = sprint(io->Native.code_llvm(io, mod.parent, Tuple{}; - debuginfo=:none, mod.method_table)) - - @test occursin("ret void", ir) - @test !any(f->occursin(f, ir), - ["jl_invoke", "apply_iterate", - "inttoptr", "apply_type"]) + @test @filecheck begin + check"CHECK-LABEL: @julia_parent" + check"CHECK-NOT: jl_invoke" + check"CHECK-NOT: apply_iterate" + check"CHECK-NOT: inttoptr" + check"CHECK-NOT: apply_type" + check"CHECK: ret void" + Native.code_llvm(mod.parent, Tuple{}; debuginfo=:none, mod.method_table) + end end diff --git a/test/ptx.jl b/test/ptx.jl index 5b2cfd8a..21db7a8e 100644 --- a/test/ptx.jl +++ b/test/ptx.jl @@ -2,12 +2,15 @@ @testset "exceptions" begin foobar() = throw(DivideError()) - ir = sprint(io->PTX.code_llvm(io, foobar, Tuple{})) - - # plain exceptions should get lowered to a call to the GPU run-time - @test occursin("gpu_report_exception", ir) - # not a jl_throw referencing a jl_value_t representing the exception - @test !occursin("jl_throw", ir) + @test @filecheck begin + check"CHECK-LABEL: foobar" + # plain exceptions should get lowered to a call to the GPU run-time + # not a jl_throw referencing a jl_value_t representing the exception + check"CHECK-NOT: jl_throw" + check"CHECK: gpu_report_exception" + + PTX.code_llvm(foobar, Tuple{}; dump_module=true) + end end @testset "kernel functions" begin @@ -20,58 +23,73 @@ end end end - ir = sprint(io->PTX.code_llvm(io, mod.kernel, Tuple{mod.Aggregate})) - @test occursin(r"@(julia|j)_kernel\w*\(({ i64 }|\[1 x i64\])\* ", ir) || - occursin(r"@(julia|j)_kernel\w*\(ptr ", ir) + @test @filecheck begin + check"CHECK-LABEL: @{{(julia|j)_kernel_[0-9]+}}" + check"TYPED-SAME: ({{(\{ i64 \}|\[1 x i64\])}}*" + check"OPAQUE-SAME: (ptr" + PTX.code_llvm(mod.kernel, Tuple{mod.Aggregate}) + end - ir = sprint(io->PTX.code_llvm(io, mod.kernel, Tuple{mod.Aggregate}; kernel=true)) - @test occursin(r"@_Z6kernel9Aggregate\(.*({ i64 }|\[1 x i64\]) ", ir) + @test @filecheck begin + check"CHECK: @_Z6kernel9Aggregate({{.*({ i64 }|\[1 x i64\])}} " + PTX.code_llvm(mod.kernel, Tuple{mod.Aggregate}; kernel=true) + end end @testset "property_annotations" begin kernel() = return - ir = sprint(io->PTX.code_llvm(io, kernel, Tuple{}; dump_module=true)) - @test !occursin("nvvm.annotations", ir) - - ir = sprint(io->PTX.code_llvm(io, kernel, Tuple{}; - dump_module=true, kernel=true)) - @test occursin("nvvm.annotations", ir) - @test !occursin("maxntid", ir) - @test !occursin("reqntid", ir) - @test !occursin("minctasm", ir) - @test !occursin("maxnreg", ir) - - ir = sprint(io->PTX.code_llvm(io, kernel, Tuple{}; - dump_module=true, kernel=true, maxthreads=42)) - @test occursin("maxntidx\", i32 42", ir) - @test occursin("maxntidy\", i32 1", ir) - @test occursin("maxntidz\", i32 1", ir) - - ir = sprint(io->PTX.code_llvm(io, kernel, Tuple{}; - dump_module=true, kernel=true, minthreads=42)) - @test occursin("reqntidx\", i32 42", ir) - @test occursin("reqntidy\", i32 1", ir) - @test occursin("reqntidz\", i32 1", ir) - - ir = sprint(io->PTX.code_llvm(io, kernel, Tuple{}; - dump_module=true, kernel=true, blocks_per_sm=42)) - @test occursin("minctasm\", i32 42", ir) - - ir = sprint(io->PTX.code_llvm(io, kernel, Tuple{}; - dump_module=true, kernel=true, maxregs=42)) - @test occursin("maxnreg\", i32 42", ir) + @test @filecheck begin + check"CHECK-NOT: nvvm.annotations" + PTX.code_llvm(kernel, Tuple{}; dump_module=true) + end + + @test @filecheck begin + check"CHECK-NOT: maxntid" + check"CHECK-NOT: reqntid" + check"CHECK-NOT: minctasm" + check"CHECK-NOT: maxnreg" + check"CHECK: nvvm.annotations" + PTX.code_llvm(kernel, Tuple{}; dump_module=true, kernel=true) + end + + @test @filecheck begin + check"CHECK: maxntidx\", i32 42" + check"CHECK: maxntidy\", i32 1" + check"CHECK: maxntidz\", i32 1" + PTX.code_llvm(kernel, Tuple{}; dump_module=true, kernel=true, maxthreads=42) + end + + @test @filecheck begin + check"CHECK: reqntidx\", i32 42" + check"CHECK: reqntidy\", i32 1" + check"CHECK: reqntidz\", i32 1" + PTX.code_llvm(kernel, Tuple{}; dump_module=true, kernel=true, minthreads=42) + end + + @test @filecheck begin + check"CHECK: minctasm\", i32 42" + PTX.code_llvm(kernel, Tuple{}; dump_module=true, kernel=true, blocks_per_sm=42) + end + + @test @filecheck begin + check"CHECK: maxnreg\", i32 42" + PTX.code_llvm(kernel, Tuple{}; dump_module=true, kernel=true, maxregs=42) + end end LLVM.version() >= v"8" && @testset "calling convention" begin kernel() = return - ir = sprint(io->PTX.code_llvm(io, kernel, Tuple{}; dump_module=true)) - @test !occursin("ptx_kernel", ir) + @test @filecheck begin + check"CHECK-NOT: ptx_kernel" + PTX.code_llvm(kernel, Tuple{}; dump_module=true) + end - ir = sprint(io->PTX.code_llvm(io, kernel, Tuple{}; - dump_module=true, kernel=true)) - @test occursin("ptx_kernel", ir) + @test @filecheck begin + check"CHECK: ptx_kernel" + PTX.code_llvm(kernel, Tuple{}; dump_module=true, kernel=true) + end end @testset "kernel state" begin @@ -82,11 +100,15 @@ end kernel() = return end - ir = sprint(io->PTX.code_llvm(io, mod.kernel, Tuple{})) - @test occursin(r"@(julia|j)_kernel\w*\(\)", ir) + @test @filecheck begin + check"CHECK: @{{(julia|j)_kernel[0-9_]*}}()" + PTX.code_llvm(mod.kernel, Tuple{}) + end - ir = sprint(io->PTX.code_llvm(io, mod.kernel, Tuple{}; kernel=true)) - @test occursin("@_Z6kernel([1 x i64] %state)", ir) + @test @filecheck begin + check"CHECK: @_Z6kernel([1 x i64] %state)" + PTX.code_llvm(mod.kernel, Tuple{}; kernel=true) + end # state should only passed to device functions that use it @@ -104,20 +126,25 @@ end end end - ir = sprint(io->PTX.code_llvm(io, mod.kernel, Tuple{Ptr{Int64}}; - kernel=true, dump_module=true)) - # kernel should take state argument before all else - @test occursin(r"@_Z6kernelP5Int64\(\[1 x i64\] %state", ir) - + @test @filecheck begin + check"CHECK-LABEL: define ptx_kernel void @_Z6kernelP5Int64([1 x i64] %state" + check"CHECK-NOT: julia.gpu.state_getter" + PTX.code_llvm(mod.kernel, Tuple{Ptr{Int64}}; kernel=true, dump_module=true) + end # child1 doesn't use the state - @test occursin(r"@(julia|j)_child1\w*\((i64|i8\*|ptr)", ir) - + @test @filecheck begin + check"CHECK-LABEL: define internal fastcc i64 @{{(julia|j)_child1[0-9_]*}}" + check"INTPTR_ABI-SAME: (i64" + check"PTR_ABI-SAME: ({{(ptr|i8\*)}}" + PTX.code_llvm(mod.kernel, Tuple{Ptr{Int64}}; kernel=true, dump_module=true) + end # child2 does - @test occursin(r"@(julia|j)_child2\w*\(\[1 x i64\] %state", ir) - - # can't have the unlowered intrinsic - @test !occursin("julia.gpu.state_getter", ir) + @test @filecheck begin + check"CHECK-LABEL: define internal fastcc i64 @{{(julia|j)_child2[0-9_]*}}" + check"CHECK-SAME: ([1 x i64] %state" + PTX.code_llvm(mod.kernel, Tuple{Ptr{Int64}}; kernel=true, dump_module=true) + end end end @@ -142,8 +169,11 @@ if :NVPTX in LLVM.backends() end end - asm = sprint(io->PTX.code_native(io, mod.parent, Tuple{Int64})) - @test occursin(r"call.uni\s+(julia|j)_child_"m, asm) + @test @filecheck begin + check"CHECK: call.uni" + check"CHECK-NEXT: {{(julia|j)_child_}}" + PTX.code_native(mod.parent, Tuple{Int64}) + end end @testset "kernel functions" begin @@ -158,32 +188,39 @@ end end end - asm = sprint(io->PTX.code_native(io, mod.entry, Tuple{Int64}; - kernel=true, dump_module=true)) - @test occursin(".visible .entry _Z5entry5Int64", asm) - @test !occursin(r"\.visible \.func (julia|j)_nonentry", asm) - @test occursin(r"\.func (julia|j)_nonentry", asm) + @test @filecheck begin + check"CHECK-NOT: .visible .func {{(julia|j)_nonentry}}" + check"CHECK-LABEL: .visible .entry _Z5entry5Int64" + check"CHECK: {{(julia|j)_nonentry}}" + PTX.code_native(mod.entry, Tuple{Int64}; kernel=true, dump_module=true) + end @testset "property_annotations" begin - asm = sprint(io->PTX.code_native(io, mod.entry, Tuple{Int64}; kernel=true)) - @test !occursin("maxntid", asm) + @test @filecheck begin + check"CHECK-NOT: maxntid" + PTX.code_native(mod.entry, Tuple{Int64}; kernel=true) + end - asm = sprint(io->PTX.code_native(io, mod.entry, Tuple{Int64}; - kernel=true, maxthreads=42)) - @test occursin(".maxntid 42, 1, 1", asm) + @test @filecheck begin + check"CHECK: .maxntid 42, 1, 1" + PTX.code_native(mod.entry, Tuple{Int64}; kernel=true, maxthreads=42) + end - asm = sprint(io->PTX.code_native(io, mod.entry, Tuple{Int64}; - kernel=true, minthreads=42)) - @test occursin(".reqntid 42, 1, 1", asm) + @test @filecheck begin + check"CHECK: .reqntid 42, 1, 1" + PTX.code_native(mod.entry, Tuple{Int64}; kernel=true, minthreads=42) + end - asm = sprint(io->PTX.code_native(io, mod.entry, Tuple{Int64}; - kernel=true, blocks_per_sm=42)) - @test occursin(".minnctapersm 42", asm) + @test @filecheck begin + check"CHECK: .minnctapersm 42" + PTX.code_native(mod.entry, Tuple{Int64}; kernel=true, blocks_per_sm=42) + end if LLVM.version() >= v"4.0" - asm = sprint(io->PTX.code_native(io, mod.entry, Tuple{Int64}; - kernel=true, maxregs=42)) - @test occursin(".maxnreg 42", asm) + @test @filecheck begin + check"CHECK: .maxnreg 42" + PTX.code_native(mod.entry, Tuple{Int64}; kernel=true, maxregs=42) + end end end end @@ -207,11 +244,15 @@ end end end - asm = sprint(io->PTX.code_native(io, mod.parent1, Tuple{Int})) - @test occursin(r"\.func (julia|j)_child_", asm) + @test @filecheck begin + check"CHECK: .func {{(julia|j)_child}}" + PTX.code_native(mod.parent1, Tuple{Int}) + end - asm = sprint(io->PTX.code_native(io, mod.parent2, Tuple{Int})) - @test occursin(r"\.func (julia|j)_child_", asm) + @test @filecheck begin + check"CHECK: .func {{(julia|j)_child}}" + PTX.code_native(mod.parent2, Tuple{Int}) + end end @testset "child function reuse bis" begin @@ -234,13 +275,17 @@ end end end - asm = sprint(io->PTX.code_native(io, mod.parent1, Tuple{Int})) - @test occursin(r"\.func (julia|j)_child1_", asm) - @test occursin(r"\.func (julia|j)_child2_", asm) + @test @filecheck begin + check"CHECK-DAG: .func {{(julia|j)_child1}}" + check"CHECK-DAG: .func {{(julia|j)_child2}}" + PTX.code_native(mod.parent1, Tuple{Int}) + end - asm = sprint(io->PTX.code_native(io, mod.parent2, Tuple{Int})) - @test occursin(r"\.func (julia|j)_child1_", asm) - @test occursin(r"\.func (julia|j)_child2_", asm) + @test @filecheck begin + check"CHECK-DAG: .func {{(julia|j)_child1}}" + check"CHECK-DAG: .func {{(julia|j)_child2}}" + PTX.code_native(mod.parent2, Tuple{Int}) + end end @testset "indirect sysimg function use" begin @@ -255,9 +300,11 @@ end return end - asm = sprint(io->PTX.code_native(io, kernel, Tuple{Ptr{Int32}})) - @test !occursin("jl_throw", asm) - @test !occursin("jl_invoke", asm) # forced recompilation should still not invoke + @test @filecheck begin + check"CHECK-NOT: jl_throw" + check"CHECK-NOT: jl_invoke" + PTX.code_native(kernel, Tuple{Ptr{Int32}}) + end end @testset "LLVM intrinsics" begin @@ -299,13 +346,14 @@ end end end - asm = sprint(io->PTX.code_native(io, mod.kernel, Tuple{Int})) - @test occursin("gpu_gc_pool_alloc", asm) - @test !occursin("julia.push_gc_frame", asm) - @test !occursin("julia.pop_gc_frame", asm) - @test !occursin("julia.get_gc_frame_slot", asm) - @test !occursin("julia.new_gc_frame", asm) - + @test @filecheck begin + check"CHECK-NOT: julia.push_gc_frame" + check"CHECK-NOT: julia.pop_gc_frame" + check"CHECK-NOT: julia.get_gc_frame_slot" + check"CHECK-NOT: julia.new_gc_frame" + check"CHECK: gpu_gc_pool_alloc" + PTX.code_native(mod.kernel, Tuple{Int}) + end # make sure that we can still ellide allocations function ref_kernel(ptr, i) @@ -320,10 +368,10 @@ end return nothing end - asm = sprint(io->PTX.code_native(io, ref_kernel, Tuple{Ptr{Int64}, Int})) - - - @test !occursin("gpu_gc_pool_alloc", asm) + @test @filecheck begin + check"CHECK-NOT: gpu_gc_pool_alloc" + PTX.code_native(ref_kernel, Tuple{Ptr{Int64}, Int}) + end end @testset "float boxes" begin @@ -337,8 +385,10 @@ end return end - ir = sprint(io->PTX.code_llvm(io, kernel, Tuple{Float32,Ptr{Float32}})) - @test occursin("jl_box_float32", ir) + @test @filecheck begin + check"CHECK: jl_box_float32" + PTX.code_llvm(kernel, Tuple{Float32,Ptr{Float32}}) + end PTX.code_native(devnull, kernel, Tuple{Float32,Ptr{Float32}}) end diff --git a/test/spirv.jl b/test/spirv.jl index e14ccf77..eb1be7d9 100644 --- a/test/spirv.jl +++ b/test/spirv.jl @@ -6,12 +6,15 @@ for backend in (:khronos, :llvm) @testset "calling convention" begin kernel() = return - ir = sprint(io->SPIRV.code_llvm(io, kernel, Tuple{}; backend, dump_module=true)) - @test !occursin("spir_kernel", ir) + @test @filecheck begin + check"CHECK-NOT: spir_kernel" + SPIRV.code_llvm(kernel, Tuple{}; backend, dump_module=true) + end - ir = sprint(io->SPIRV.code_llvm(io, kernel, Tuple{}; - backend, dump_module=true, kernel=true)) - @test occursin("spir_kernel", ir) + @test @filecheck begin + check"CHECK: spir_kernel" + SPIRV.code_llvm(kernel, Tuple{}; backend, dump_module=true, kernel=true) + end end @testset "byval workaround" begin @@ -20,14 +23,17 @@ end kernel(x) = return end - ir = sprint(io->SPIRV.code_llvm(io, mod.kernel, Tuple{Tuple{Int}}; backend)) - @test occursin(r"@\w*kernel\w*\(({ i64 }|\[1 x i64\])\*", ir) || - occursin(r"@\w*kernel\w*\(ptr", ir) + @test @filecheck begin + check"TYPED: @{{.*kernel.*}}([1 x i64]*" + check"OPAQUE: @{{.*kernel.*}}(ptr" + SPIRV.code_llvm(mod.kernel, Tuple{Tuple{Int}}; backend) + end - ir = sprint(io->SPIRV.code_llvm(io, mod.kernel, Tuple{Tuple{Int}}; - backend, kernel=true)) - @test occursin(r"@\w*kernel\w*\(.*{ ({ i64 }|\[1 x i64\]) }\*.+byval", ir) || - occursin(r"@\w*kernel\w*\(ptr byval", ir) + @test @filecheck begin + check"TYPED: @{{.*kernel.*}}({ [1 x i64] }* byval" + check"OPAQUE: @{{.*kernel.*}}(ptr byval" + SPIRV.code_llvm(mod.kernel, Tuple{Tuple{Int}}; backend, kernel=true) + end end @testset "byval bug" begin @@ -47,17 +53,20 @@ end end end - ir = sprint(io->SPIRV.code_llvm(io, mod.kernel, Tuple{Ptr{Float16}, Float16}; - backend)) - @test occursin("store half", ir) + @test @filecheck begin + check"CHECK: store half" + SPIRV.code_llvm(mod.kernel, Tuple{Ptr{Float16}, Float16}; backend) + end - ir = sprint(io->SPIRV.code_llvm(io, mod.kernel, Tuple{Ptr{Float32}, Float32}; - backend)) - @test occursin("store float", ir) + @test @filecheck begin + check"CHECK: store float" + SPIRV.code_llvm(mod.kernel, Tuple{Ptr{Float32}, Float32}; backend) + end - ir = sprint(io->SPIRV.code_llvm(io, mod.kernel, Tuple{Ptr{Float64}, Float64}; - backend)) - @test occursin("store double", ir) + @test @filecheck begin + check"CHECK: store double" + SPIRV.code_llvm(mod.kernel, Tuple{Ptr{Float64}, Float64}; backend) + end @test_throws_message(InvalidIRError, SPIRV.code_execution(mod.kernel, Tuple{Ptr{Float16}, Float16}; @@ -88,8 +97,10 @@ end return end - asm = sprint(io->SPIRV.code_native(io, kernel, Tuple{Bool}; backend, kernel=true)) - @test occursin(r"OpFunctionCall %void %(julia|j)_error", asm) + @test @filecheck begin + check"CHECK: OpFunctionCall %void %{{(julia|j)_error}}" + SPIRV.code_native(kernel, Tuple{Bool}; backend, kernel=true) + end end end From 40402d0722660bbe2114e961d766ab87137e1a60 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 24 Jun 2025 15:47:33 +0200 Subject: [PATCH 3/6] Add label checks and refactor functions into modules. --- test/bpf.jl | 45 +++++++--- test/gcn.jl | 123 ++++++++++++++++---------- test/metal.jl | 151 +++++++++++++++++++------------- test/native.jl | 231 +++++++++++++++++++++++++++++-------------------- test/ptx.jl | 136 +++++++++++++++-------------- test/spirv.jl | 37 ++++---- 6 files changed, 422 insertions(+), 301 deletions(-) diff --git a/test/bpf.jl b/test/bpf.jl index 07fa9fa7..a4b10f38 100644 --- a/test/bpf.jl +++ b/test/bpf.jl @@ -1,51 +1,68 @@ @testset "No-op" begin - kernel() = 0 + mod = @eval module $(gensym()) + kernel() = 0 + end @test @filecheck begin + check"CHECK-LABEL: julia_kernel_{{[0-9_]*}}:" check"CHECK: r0 = 0" check"CHECK-NEXT: exit" - BPF.code_native(kernel, ()) + BPF.code_native(mod.kernel, ()) end end @testset "Return argument" begin - kernel(x) = x + mod = @eval module $(gensym()) + kernel(x) = x + end @test @filecheck begin + check"CHECK-LABEL: julia_kernel_{{[0-9_]*}}:" check"CHECK: r0 = r1" check"CHECK-NEXT: exit" - BPF.code_native(kernel, (UInt64,)) + BPF.code_native(mod.kernel, (UInt64,)) end end @testset "Addition" begin - kernel(x) = x+1 + mod = @eval module $(gensym()) + kernel(x) = x+1 + end @test @filecheck begin + check"CHECK-LABEL: julia_kernel_{{[0-9_]*}}:" check"CHECK: r0 = r1" check"CHECK-NEXT: r0 += 1" check"CHECK-NEXT: exit" - BPF.code_native(kernel, (UInt64,)) + BPF.code_native(mod.kernel, (UInt64,)) end end @testset "Errors" begin - kernel(x) = fakefunc(x) + mod = @eval module $(gensym()) + kernel(x) = fakefunc(x) + end - @test_throws GPUCompiler.InvalidIRError BPF.code_execution(kernel, (UInt64,)) + @test_throws GPUCompiler.InvalidIRError BPF.code_execution(mod.kernel, (UInt64,)) end @testset "Function Pointers" begin @testset "valid" begin - goodcall(x) = Base.llvmcall("%2 = call i64 inttoptr (i64 3 to i64 (i64)*)(i64 %0)\nret i64 %2", Int, Tuple{Int}, x) - kernel(x) = goodcall(x) + mod = @eval module $(gensym()) + goodcall(x) = Base.llvmcall("%2 = call i64 inttoptr (i64 3 to i64 (i64)*)(i64 %0)\nret i64 %2", Int, Tuple{Int}, x) + kernel(x) = goodcall(x) + end @test @filecheck begin + check"CHECK-LABEL: julia_kernel_{{[0-9_]*}}:" check"CHECK: call" check"CHECK-NEXT: exit" - BPF.code_native(kernel, (Int,)) + BPF.code_native(mod.kernel, (Int,)) end end + @testset "invalid" begin - badcall(x) = Base.llvmcall("%2 = call i64 inttoptr (i64 3000 to i64 (i64)*)(i64 %0)\nret i64 %2", Int, Tuple{Int}, x) - kernel(x) = badcall(x) + mod = @eval module $(gensym()) + badcall(x) = Base.llvmcall("%2 = call i64 inttoptr (i64 3000 to i64 (i64)*)(i64 %0)\nret i64 %2", Int, Tuple{Int}, x) + kernel(x) = badcall(x) + end - @test_throws GPUCompiler.InvalidIRError BPF.code_execution(kernel, (Int,)) + @test_throws GPUCompiler.InvalidIRError BPF.code_execution(mod.kernel, (Int,)) end end diff --git a/test/gcn.jl b/test/gcn.jl index 4671ff8d..b5f61791 100644 --- a/test/gcn.jl +++ b/test/gcn.jl @@ -1,17 +1,23 @@ if :AMDGPU in LLVM.backends() + +# XXX: generic `sink` generates an instruction selection error +sink_gcn(i) = sink(i, Val(5)) + @testset "IR" begin @testset "kernel calling convention" begin - kernel() = return + mod = @eval module $(gensym()) + kernel() = return + end @test @filecheck begin check"CHECK-NOT: amdgpu_kernel" - GCN.code_llvm(kernel, Tuple{}; dump_module=true) + GCN.code_llvm(mod.kernel, Tuple{}; dump_module=true) end @test @filecheck begin check"CHECK: amdgpu_kernel" - GCN.code_llvm(kernel, Tuple{}; dump_module=true, kernel=true) + GCN.code_llvm(mod.kernel, Tuple{}; dump_module=true, kernel=true) end end @@ -21,19 +27,24 @@ end @testset "assembly" begin @testset "skip scalar trap" begin - workitem_idx_x() = ccall("llvm.amdgcn.workitem.id.x", llvmcall, Int32, ()) - trap() = ccall("llvm.trap", llvmcall, Nothing, ()) - function kernel() - if workitem_idx_x() > 1 - trap() + mod = @eval module $(gensym()) + workitem_idx_x() = ccall("llvm.amdgcn.workitem.id.x", llvmcall, Int32, ()) + trap() = ccall("llvm.trap", llvmcall, Nothing, ()) + + function kernel() + if workitem_idx_x() > 1 + trap() + end + return end - return end @test @filecheck begin + check"CHECK-LABEL: {{.*kernel.*}}:" check"CHECK: s_trap 2" - GCN.code_native(kernel, Tuple{}) + GCN.code_native(mod.kernel, Tuple{}) end + # XXX @test_skip occursin("s_cbranch_execz", asm) if Base.libllvm_version < v"9" @test_broken occursin("v_readfirstlane", asm) @@ -43,31 +54,38 @@ end @testset "child functions" begin # we often test using @noinline child functions, so test whether these survive # (despite not having side-effects) - @noinline child(i) = sink_gcn(i) - function parent(i) - child(i) - return + mod = @eval module $(gensym()) + import ..sink_gcn + @noinline child(i) = sink_gcn(i) + function parent(i) + child(i) + return + end end @test @filecheck begin + check"CHECK-LABEL: {{.*parent.*}}:" check"CHECK: s_add_u32{{.*(julia|j)_child_.*}}@rel32@" check"CHECK: s_addc_u32{{.*(julia|j)_child_.*}}@rel32@" - GCN.code_native(parent, Tuple{Int64}; dump_module=true) + GCN.code_native(mod.parent, Tuple{Int64}; dump_module=true) end end @testset "kernel functions" begin - @noinline nonentry(i) = sink_gcn(i) - function entry(i) - nonentry(i) - return + mod = @eval module $(gensym()) + import ..sink_gcn + @noinline nonentry(i) = sink_gcn(i) + function entry(i) + nonentry(i) + return + end end @test @filecheck begin check"CHECK-NOT: .amdhsa_kernel {{.*}}nonentry" check"CHECK: .type {{.*nonentry.*}},@function" check"CHECK: .amdhsa_kernel {{.*entry.*}}" - GCN.code_native(entry, Tuple{Int64}; dump_module=true, kernel=true) + GCN.code_native(mod.entry, Tuple{Int64}; dump_module=true, kernel=true) end end @@ -76,8 +94,7 @@ end # the child only being present once mod = @eval module $(gensym()) - export child, parent1, parent2 - + import ..sink_gcn @noinline child(i) = sink_gcn(i) function parent1(i) child(i) @@ -105,8 +122,7 @@ end # in the case of two child functions mod = @eval module $(gensym()) - export parent1, parent2, child1, child2 - + import ..sink_gcn @noinline child1(i) = sink_gcn(i) @noinline child2(i) = sink_gcn(i+1) function parent1(i) @@ -138,49 +154,57 @@ end # NOTE: Int32 to test for #49 - function kernel(out) - wid, lane = fldmod1(unsafe_load(out), Int32(32)) - unsafe_store!(out, wid) - return + mod = @eval module $(gensym()) + function kernel(out) + wid, lane = fldmod1(unsafe_load(out), Int32(32)) + unsafe_store!(out, wid) + return + end end @test @filecheck begin + check"CHECK-LABEL: {{.*kernel.*}}:" check"CHECK-NOT: jl_throw" check"CHECK-NOT: jl_invoke" - GCN.code_native(kernel, Tuple{Ptr{Int32}}) + GCN.code_native(mod.kernel, Tuple{Ptr{Int32}}) end end @testset "LLVM intrinsics" begin # issue #13 (a): cannot select trunc - function kernel(x) - unsafe_trunc(Int, x) - return + mod = @eval module $(gensym()) + function kernel(x) + unsafe_trunc(Int, x) + return + end end - GCN.code_native(devnull, kernel, Tuple{Float64}) + GCN.code_native(devnull, mod.kernel, Tuple{Float64}) @test "We did not crash!" != "" end # FIXME: _ZNK4llvm14TargetLowering20scalarizeVectorStoreEPNS_11StoreSDNodeERNS_12SelectionDAGE false && @testset "exception arguments" begin - function kernel(a) - unsafe_store!(a, trunc(Int, unsafe_load(a))) - return + mod = @eval module $(gensym()) + function kernel(a) + unsafe_store!(a, trunc(Int, unsafe_load(a))) + return + end end - GCN.code_native(devnull, kernel, Tuple{Ptr{Float64}}) + GCN.code_native(devnull, mod.kernel, Tuple{Ptr{Float64}}) end # FIXME: in function julia_inner_18528 void (%jl_value_t addrspace(10)*): invalid addrspacecast false && @testset "GC and TLS lowering" begin mod = @eval module $(gensym()) + import ..sink_gcn mutable struct PleaseAllocate y::Csize_t end # common pattern in Julia 0.7: outlined throw to avoid a GC frame in the calling code @noinline function inner(x) - sink(x.y) + sink_gcn(x.y) nothing end @@ -219,21 +243,24 @@ false && @testset "GC and TLS lowering" begin end @testset "float boxes" begin - function kernel(a,b) - c = Int32(a) - # the conversion to Int32 may fail, in which case the input Float32 is boxed in order to - # pass it to the @nospecialize exception constructor. we should really avoid that (eg. - # by avoiding @nospecialize, or optimize the unused arguments away), but for now the box - # should just work. - unsafe_store!(b, c) - return + mod = @eval module $(gensym()) + function kernel(a,b) + c = Int32(a) + # the conversion to Int32 may fail, in which case the input Float32 is boxed in order to + # pass it to the @nospecialize exception constructor. we should really avoid that (eg. + # by avoiding @nospecialize, or optimize the unused arguments away), but for now the box + # should just work. + unsafe_store!(b, c) + return + end end @test @filecheck begin + check"CHECK-LABEL: define void @{{.*kernel.*}}" check"CHECK: jl_box_float32" - GCN.code_llvm(kernel, Tuple{Float32,Ptr{Float32}}) + GCN.code_llvm(mod.kernel, Tuple{Float32,Ptr{Float32}}) end - GCN.code_native(devnull, kernel, Tuple{Float32,Ptr{Float32}}) + GCN.code_native(devnull, mod.kernel, Tuple{Float32,Ptr{Float32}}) end end diff --git a/test/metal.jl b/test/metal.jl index 5ddef1f7..0a9c1c68 100644 --- a/test/metal.jl +++ b/test/metal.jl @@ -2,55 +2,67 @@ @testset "kernel functions" begin @testset "byref aggregates" begin - kernel(x) = return + mod = @eval module $(gensym()) + kernel(x) = return + end @test @filecheck begin - check"TYPED: @{{.*kernel.*}}({{(\{ i64 \}|\[1 x i64\])}}*" - check"OPAQUE: @{{.*kernel.*}}(ptr" - Metal.code_llvm(kernel, Tuple{Tuple{Int}}) + check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" + check"TYPED-SAME: ({{(\{ i64 \}|\[1 x i64\])}}*" + check"OPAQUE-SAME: (ptr" + Metal.code_llvm(mod.kernel, Tuple{Tuple{Int}}) end # for kernels, every pointer argument needs to take an address space @test @filecheck begin - check"TYPED: @{{.*kernel.*}}({{(\{ i64 \}|\[1 x i64\])}} addrspace(1)*" - check"OPAQUE: @{{.*kernel.*}}(ptr addrspace(1)" - Metal.code_llvm(kernel, Tuple{Tuple{Int}}; kernel=true) + check"CHECK-LABEL: define void @_Z6kernel5TupleI5Int64E" + check"TYPED-SAME: ({{(\{ i64 \}|\[1 x i64\])}} addrspace(1)*" + check"OPAQUE-SAME: (ptr addrspace(1)" + Metal.code_llvm(mod.kernel, Tuple{Tuple{Int}}; kernel=true) end end @testset "byref primitives" begin - kernel(x) = return + mod = @eval module $(gensym()) + kernel(x) = return + end @test @filecheck begin - check"CHECK: @{{.*kernel.*}}(i64 " - Metal.code_llvm(kernel, Tuple{Int}) + check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" + check"CHECK-SAME: (i64" + Metal.code_llvm(mod.kernel, Tuple{Int}) end # for kernels, every pointer argument needs to take an address space @test @filecheck begin - check"TYPED: @{{.*kernel.*}}(i64 addrspace(1)*" - check"OPAQUE: @{{.*kernel.*}}(ptr addrspace(1)" - Metal.code_llvm(kernel, Tuple{Int}; kernel=true) + check"CHECK-LABEL: define void @_Z6kernel5Int64" + check"TYPED-SAME: (i64 addrspace(1)*" + check"OPAQUE-SAME: (ptr addrspace(1)" + Metal.code_llvm(mod.kernel, Tuple{Int}; kernel=true) end end @testset "module metadata" begin - kernel() = return + mod = @eval module $(gensym()) + kernel() = return + end @test @filecheck begin check"CHECK: air.version" check"CHECK: air.language_version" check"CHECK: air.max_device_buffers" - Metal.code_llvm(kernel, Tuple{}; dump_module=true, kernel=true) + Metal.code_llvm(mod.kernel, Tuple{}; dump_module=true, kernel=true) end end @testset "argument metadata" begin - kernel(x) = return + mod = @eval module $(gensym()) + kernel(x) = return + end @test @filecheck begin check"CHECK: air.buffer" - Metal.code_llvm(kernel, Tuple{Int}; dump_module=true, kernel=true) + Metal.code_llvm(mod.kernel, Tuple{Int}; dump_module=true, kernel=true) end # XXX: perform more exhaustive testing of argument passing metadata here, @@ -58,71 +70,82 @@ end end @testset "input arguments" begin - function kernel(ptr) - idx = ccall("extern julia.air.thread_position_in_threadgroup.i32", llvmcall, UInt32, ()) + 1 - unsafe_store!(ptr, 42, idx) - return + mod = @eval module $(gensym()) + function kernel(ptr) + idx = ccall("extern julia.air.thread_position_in_threadgroup.i32", + llvmcall, UInt32, ()) + 1 + unsafe_store!(ptr, 42, idx) + return + end end @test @filecheck begin - check"TYPED: @{{.*kernel.*}}({{.*}} addrspace(1)* %{{.+}})" - check"OPAQUE: @{{.*kernel.*}}(ptr addrspace(1) %{{.+}})" + check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" + check"TYPED-SAME: ({{.*}} addrspace(1)* %{{.+}})" + check"OPAQUE-SAME: (ptr addrspace(1) %{{.+}})" check"CHECK: call i32 @julia.air.thread_position_in_threadgroup.i32" - Metal.code_llvm(kernel, Tuple{Core.LLVMPtr{Int,1}}) + Metal.code_llvm(mod.kernel, Tuple{Core.LLVMPtr{Int,1}}) end @test @filecheck begin - check"TYPED: @{{.*kernel.*}}({{.*}} addrspace(1)* %{{.+}}, i32 %thread_position_in_threadgroup)" - check"OPAQUE: @{{.*kernel.*}}(ptr addrspace(1) %{{.+}}, i32 %thread_position_in_threadgroup)" + check"CHECK-LABEL: define void @_Z6kernel7LLVMPtrI5Int64Li1EE" + check"TYPED-SAME: ({{.*}} addrspace(1)* %{{.+}}, i32 %thread_position_in_threadgroup)" + check"OPAQUE-SAME: (ptr addrspace(1) %{{.+}}, i32 %thread_position_in_threadgroup)" check"CHECK-NOT: call i32 @julia.air.thread_position_in_threadgroup.i32" - Metal.code_llvm(kernel, Tuple{Core.LLVMPtr{Int,1}}; kernel=true) + Metal.code_llvm(mod.kernel, Tuple{Core.LLVMPtr{Int,1}}; kernel=true) end end @testset "vector intrinsics" begin - foo(x, y) = ccall("llvm.smax.v2i64", llvmcall, NTuple{2, VecElement{Int64}}, - (NTuple{2, VecElement{Int64}}, NTuple{2, VecElement{Int64}}), x, y) + mod = @eval module $(gensym()) + foo(x, y) = ccall("llvm.smax.v2i64", llvmcall, NTuple{2, VecElement{Int64}}, + (NTuple{2, VecElement{Int64}}, NTuple{2, VecElement{Int64}}), x, y) + end @test @filecheck begin + check"CHECK-LABEL: define <2 x i64> @{{(julia|j)_foo_[0-9]+}}" check"CHECK: air.max.s.v2i64" - Metal.code_llvm(foo, (NTuple{2, VecElement{Int64}}, NTuple{2, VecElement{Int64}})) + Metal.code_llvm(mod.foo, (NTuple{2, VecElement{Int64}}, NTuple{2, VecElement{Int64}})) end end @testset "unsupported type detection" begin - function kernel1(ptr) - buf = reinterpret(Ptr{Float32}, ptr) - val = unsafe_load(buf) - dval = Cdouble(val) - # ccall("extern metal_os_log", llvmcall, Nothing, (Float64,), dval) - Base.llvmcall((""" - declare void @llvm.va_start(i8*) - declare void @llvm.va_end(i8*) - declare void @air.os_log(i8*, i64) - - define void @metal_os_log(...) { - %1 = alloca i8* - %2 = bitcast i8** %1 to i8* - call void @llvm.va_start(i8* %2) - %3 = load i8*, i8** %1 - call void @air.os_log(i8* %3, i64 8) - call void @llvm.va_end(i8* %2) - ret void - } - - define void @entry(double %val) #0 { - call void (...) @metal_os_log(double %val) - ret void - } - - attributes #0 = { alwaysinline }""", "entry"), - Nothing, Tuple{Float64}, dval) - return + mod = @eval module $(gensym()) + function kernel(ptr) + buf = reinterpret(Ptr{Float32}, ptr) + val = unsafe_load(buf) + dval = Cdouble(val) + # ccall("extern metal_os_log", llvmcall, Nothing, (Float64,), dval) + Base.llvmcall((""" + declare void @llvm.va_start(i8*) + declare void @llvm.va_end(i8*) + declare void @air.os_log(i8*, i64) + + define void @metal_os_log(...) { + %1 = alloca i8* + %2 = bitcast i8** %1 to i8* + call void @llvm.va_start(i8* %2) + %3 = load i8*, i8** %1 + call void @air.os_log(i8* %3, i64 8) + call void @llvm.va_end(i8* %2) + ret void + } + + define void @entry(double %val) #0 { + call void (...) @metal_os_log(double %val) + ret void + } + + attributes #0 = { alwaysinline }""", "entry"), + Nothing, Tuple{Float64}, dval) + return + end end @test @filecheck begin + check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" check"CHECK: @metal_os_log" - Metal.code_llvm(kernel1, Tuple{Core.LLVMPtr{Float32,1}}; validate=true) + Metal.code_llvm(mod.kernel, Tuple{Core.LLVMPtr{Float32,1}}; validate=true) end function kernel2(ptr) @@ -132,7 +155,9 @@ end return end - @test_throws_message(InvalidIRError, Metal.code_execution(kernel2, Tuple{Core.LLVMPtr{Float64,1}})) do msg + @test_throws_message(InvalidIRError, + Metal.code_execution(kernel2, + Tuple{Core.LLVMPtr{Float64,1}})) do msg occursin("unsupported use of double value", msg) end end @@ -149,8 +174,10 @@ end end @test @filecheck begin - check"CHECK: addrspace(2) constant [2 x float]" - Metal.code_llvm(mod.kernel, Tuple{Core.LLVMPtr{Float32,1}, Int}; dump_module=true, kernel=true) + check"CHECK: @{{.+}} = {{.*}} addrspace(2) constant [2 x float]" + check"CHECK: define void @_Z6kernel7LLVMPtrI7Float32Li1EE5Int64" + Metal.code_llvm(mod.kernel, Tuple{Core.LLVMPtr{Float32,1}, Int}; + dump_module=true, kernel=true) end end diff --git a/test/native.jl b/test/native.jl index ec380f7b..5486e32c 100644 --- a/test/native.jl +++ b/test/native.jl @@ -12,62 +12,69 @@ end @test @filecheck begin - check"CHECK: {{(julia|j)_identity}}" + check"CHECK: {{.*identity.*}}" GPUCompiler.code_llvm(job) end @test @filecheck begin - check"CHECK: {{(julia|j)_identity}}" + check"CHECK: {{.*identity.*}}" GPUCompiler.code_native(job) end end @testset "compilation" begin @testset "callable structs" begin - struct MyCallable end - (::MyCallable)(a, b) = a+b + mod = @eval module $(gensym()) + struct MyCallable end + (::MyCallable)(a, b) = a+b + end - (ci, rt) = Native.code_typed(MyCallable(), (Int, Int), kernel=false)[1] - @test ci.slottypes[1] == Core.Compiler.Const(MyCallable()) + (ci, rt) = Native.code_typed(mod.MyCallable(), (Int, Int), kernel=false)[1] + @test ci.slottypes[1] == Core.Compiler.Const(mod.MyCallable()) end @testset "compilation database" begin - @noinline inner(x) = x+1 - function outer(x) - return inner(x) + mod = @eval module $(gensym()) + @noinline inner(x) = x+1 + function outer(x) + return inner(x) + end end - job, _ = Native.create_job(outer, (Int,)) + job, _ = Native.create_job(mod.outer, (Int,)) JuliaContext() do ctx ir, meta = GPUCompiler.compile(:llvm, job) - meth = only(methods(outer, (Int,))) + meth = only(methods(mod.outer, (Int,))) mis = filter(mi->mi.def == meth, keys(meta.compiled)) @test length(mis) == 1 other_mis = filter(mi->mi.def != meth, keys(meta.compiled)) @test length(other_mis) == 1 - @test only(other_mis).def in methods(inner) + @test only(other_mis).def in methods(mod.inner) end end @testset "advanced database" begin - @noinline inner(x) = x+1 - foo(x) = sum(inner, fill(x, 10, 10)) + mod = @eval module $(gensym()) + @noinline inner(x) = x+1 + foo(x) = sum(inner, fill(x, 10, 10)) + end - job, _ = Native.create_job(foo, (Float64,); validate=false) + job, _ = Native.create_job(mod.foo, (Float64,); validate=false) JuliaContext() do ctx # shouldn't segfault ir, meta = GPUCompiler.compile(:llvm, job) - meth = only(methods(foo, (Float64,))) + meth = only(methods(mod.foo, (Float64,))) mis = filter(mi->mi.def == meth, keys(meta.compiled)) @test length(mis) == 1 inner_methods = filter(keys(meta.compiled)) do mi - mi.def in methods(inner) && mi.specTypes == Tuple{typeof(inner), Float64} + mi.def in methods(mod.inner) && + mi.specTypes == Tuple{typeof(mod.inner), Float64} end @test length(inner_methods) == 1 end @@ -81,6 +88,7 @@ end # smoke test job, _ = Native.create_job(eval(kernel), (Int64,)) @test @filecheck begin + check"CHECK-LABEL: define {{.*}} @{{.*kernel.*}}(" check"CHECK: add i64 %{{[0-9]+}}, 1" GPUCompiler.code_llvm(job) end @@ -89,6 +97,7 @@ end @eval $kernel(i) = $child(i)+2 job, _ = Native.create_job(eval(kernel), (Int64,)) @test @filecheck begin + check"CHECK-LABEL: define {{.*}} @{{.*kernel.*}}(" check"CHECK: add i64 %{{[0-9]+}}, 2" GPUCompiler.code_llvm(job) end @@ -110,6 +119,7 @@ end # initial compilation source = methodinstance(ft, tt, Base.get_world_counter()) @test @filecheck begin + check"CHECK-LABEL: define {{.*}} @{{.*kernel.*}}(" check"CHECK: add i64 %{{[0-9]+}}, 2" Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) end @@ -117,6 +127,7 @@ end # cached compilation @test @filecheck begin + check"CHECK-LABEL: define {{.*}} @{{.*kernel.*}}(" check"CHECK: add i64 %{{[0-9]+}}, 2" Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) end @@ -126,6 +137,7 @@ end @eval $kernel(i) = $child(i)+3 source = methodinstance(ft, tt, Base.get_world_counter()) @test @filecheck begin + check"CHECK-LABEL: define {{.*}} @{{.*kernel.*}}(" check"CHECK: add i64 %{{[0-9]+}}, 3" Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) end @@ -133,6 +145,7 @@ end # cached compilation @test @filecheck begin + check"CHECK-LABEL: define {{.*}} @{{.*kernel.*}}(" check"CHECK: add i64 %{{[0-9]+}}, 3" Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) end @@ -155,7 +168,7 @@ end # change in configuration config = CompilerConfig(job.config; name="foobar") @test @filecheck begin - check"CHECK: foobar" + check"CHECK: define {{.*}} @foobar" Base.invokelatest(GPUCompiler.cached_compilation, cache, source, config, compiler, linker) end @test invocations[] == 4 @@ -176,6 +189,7 @@ end @test contains(ir, r"add i64 %\d+, 4") notify(c2) # wake up the task @test @filecheck begin + check"CHECK-LABEL: define {{.*}} @{{.*kernel.*}}(" check"CHECK: add i64 %{{[0-9]+}}, 3" fetch(t) end @@ -199,63 +213,71 @@ end @testset "IR" begin @testset "basic reflection" begin - valid_kernel() = return - invalid_kernel() = 1 + mod = @eval module $(gensym()) + valid_kernel() = return + invalid_kernel() = 1 + end @test @filecheck begin # module should contain our function + a generic call wrapper - check"CHECK-NOT: define %jl_value_t* @jlcall_" - check"CHECK: define{{.*}} void @{{.*(julia|j)_valid_kernel.*}}()" - # there should be no debug metadata - check"CHECK-NOT: !dbg" - Native.code_llvm(valid_kernel, Tuple{}; optimize=false, dump_module=true) + check"CHECK: {{.*valid_kernel.*}}" + Native.code_llvm(mod.valid_kernel, Tuple{}; optimize=false, dump_module=true) end - @test Native.code_llvm(devnull, invalid_kernel, Tuple{}) == nothing - @test_throws KernelError Native.code_llvm(devnull, invalid_kernel, Tuple{}; kernel=true) == nothing + @test Native.code_llvm(devnull, mod.invalid_kernel, Tuple{}) == nothing + @test_throws KernelError Native.code_llvm(devnull, mod.invalid_kernel, Tuple{}; kernel=true) == nothing end @testset "unbound typevars" begin - invalid_kernel() where {unbound} = return - @test_throws KernelError Native.code_llvm(devnull, invalid_kernel, Tuple{}) + mod = @eval module $(gensym()) + invalid_kernel() where {unbound} = return + end + @test_throws KernelError Native.code_llvm(devnull, mod.invalid_kernel, Tuple{}) end @testset "child functions" begin # we often test using `@noinline sink` child functions, so test whether these survive - @noinline child(i) = sink(i) - parent(i) = child(i) + mod = @eval module $(gensym()) + import ..sink + @noinline child(i) = sink(i) + parent(i) = child(i) + end @test @filecheck begin - check"CHECK: call {{.+}} @{{(julia|j).+child.+}}" - Native.code_llvm(parent, Tuple{Int}) + check"CHECK-LABEL: define {{.*}} @{{.*parent.*}}" + check"CHECK: call {{.+}} @{{.*child.*}}" + Native.code_llvm(mod.parent, Tuple{Int}) end end @testset "sysimg" begin # bug: use a system image function - - function foobar(a,i) - Base.pointerset(a, 0, mod1(i,10), 8) + mod = @eval module $(gensym()) + function foobar(a,i) + Base.pointerset(a, 0, mod1(i,10), 8) + end end @test @filecheck begin check"CHECK-NOT: jlsys_" - Native.code_llvm(foobar, Tuple{Ptr{Int},Int}) + Native.code_llvm(mod.foobar, Tuple{Ptr{Int},Int}) end end @testset "tracked pointers" begin - function kernel(a) - a[1] = 1 - return + mod = @eval module $(gensym()) + function kernel(a) + a[1] = 1 + return + end end # this used to throw an LLVM assertion (#223) - Native.code_llvm(devnull, kernel, Tuple{Vector{Int}}; kernel=true) + Native.code_llvm(devnull, mod.kernel, Tuple{Vector{Int}}; kernel=true) @test "We did not crash!" != "" end -@testset "CUDAjl#278" begin +@testset "CUDA.jl#278" begin # codegen idempotency # NOTE: this isn't fixed, but surfaces here due to bad inference of checked_sub # NOTE: with the fix to print_to_string this doesn't error anymore, @@ -285,19 +307,22 @@ end end @testset "slow abi" begin - x = 2 - f = () -> x+1 + mod = @eval module $(gensym()) + x = 2 + f = () -> x+1 + end @test @filecheck begin check"CHECK: define internal fastcc {{.+}} @julia" check"TYPED: define nonnull {}* @jfptr" check"OPAQUE: define nonnull ptr @jfptr" check"CHECK: call fastcc {{.+}} @julia" - Native.code_llvm(f, Tuple{}; entry_abi=:func, dump_module=true) + Native.code_llvm(mod.f, Tuple{}; entry_abi=:func, dump_module=true) end end @testset "function entry safepoint emission" begin @test @filecheck begin + check"CHECK-LABEL: define {{.*}} @{{.*identity.*}}(" check"CHECK-NOT: %safepoint" Native.code_llvm(identity, Tuple{Nothing}; entry_safepoint=false, optimize=false, dump_module=true) end @@ -306,6 +331,7 @@ end # see https://github.com/JuliaLang/julia/pull/57010/files#r2079576894 if VERSION < v"1.13.0-DEV.533" @test @filecheck begin + check"CHECK-LABEL: define {{.*}} @{{.*identity.*}}(" check"CHECK: %safepoint" Native.code_llvm(identity, Tuple{Nothing}; entry_safepoint=true, optimize=false, dump_module=true) end @@ -315,6 +341,7 @@ end @testset "always_inline" begin # XXX: broken by JuliaLang/julia#51599, see JuliaGPU/GPUCompiler.jl#527 mod = @eval module $(gensym()) + import ..sink f_expensive(x) = $(foldl((e, _) -> :($sink($e) + $sink(x)), 1:100; init=:x)) function g(x) f_expensive(x) @@ -327,44 +354,46 @@ end end @test @filecheck begin - check"CHECK: define{{.*(julia|j)_f_expensive}}" + check"CHECK: define {{.*f_expensive.*}}" Native.code_llvm(mod.g, Tuple{Int64}; dump_module=true, kernel=true) end @test @filecheck begin - check"CHECK-NOT: define{{.*(julia|j)_f_expensive}}" + check"CHECK-NOT: define {{.*f_expensive.*}}" Native.code_llvm(mod.g, Tuple{Int64}; dump_module=true, kernel=true, always_inline=true) end @test @filecheck begin - check"CHECK-NOT: define{{.*(julia|j)_f_expensive}}" - Native.code_llvm(mod.h, Tuple{Int64}; dump_module=true, kernel=true, always_inline=true) + check"CHECK: define {{.*f_expensive.*}}" + Native.code_llvm(mod.h, Tuple{Int64}; dump_module=true, kernel=true) end @test @filecheck begin - check"CHECK: define{{.*(julia|j)_f_expensive}}" - Native.code_llvm(mod.h, Tuple{Int64}; dump_module=true, kernel=true) + check"CHECK-NOT: define {{.*f_expensive.*}}" + Native.code_llvm(mod.h, Tuple{Int64}; dump_module=true, kernel=true, always_inline=true) end end @testset "function attributes" begin - @inline function convergent_barrier() - Base.llvmcall((""" - declare void @barrier() #1 - - define void @entry() #0 { - call void @barrier() - ret void - } - - attributes #0 = { alwaysinline } - attributes #1 = { convergent }""", "entry"), - Nothing, Tuple{}) + mod = @eval module $(gensym()) + @inline function convergent_barrier() + Base.llvmcall((""" + declare void @barrier() #1 + + define void @entry() #0 { + call void @barrier() + ret void + } + + attributes #0 = { alwaysinline } + attributes #1 = { convergent }""", "entry"), + Nothing, Tuple{}) + end end @test @filecheck begin check"CHECK: attributes #{{.}} = { convergent }" - Native.code_llvm(convergent_barrier, Tuple{}; dump_module=true, raw=true) + Native.code_llvm(mod.convergent_barrier, Tuple{}; dump_module=true, raw=true) end end @@ -375,39 +404,45 @@ end @testset "assembly" begin @testset "basic reflection" begin - valid_kernel() = return - invalid_kernel() = 1 + mod = @eval module $(gensym()) + valid_kernel() = return + invalid_kernel() = 1 + end - @test Native.code_native(devnull, valid_kernel, Tuple{}) == nothing - @test Native.code_native(devnull, invalid_kernel, Tuple{}) == nothing - @test_throws KernelError Native.code_native(devnull, invalid_kernel, Tuple{}; kernel=true) + @test Native.code_native(devnull, mod.valid_kernel, Tuple{}) == nothing + @test Native.code_native(devnull, mod.invalid_kernel, Tuple{}) == nothing + @test_throws KernelError Native.code_native(devnull, mod.invalid_kernel, Tuple{}; kernel=true) end @testset "idempotency" begin # bug: generate code twice for the same kernel (jl_to_ptx wasn't idempotent) - - kernel() = return - Native.code_native(devnull, kernel, Tuple{}) - Native.code_native(devnull, kernel, Tuple{}) + mod = @eval module $(gensym()) + kernel() = return + end + Native.code_native(devnull, mod.kernel, Tuple{}) + Native.code_native(devnull, mod.kernel, Tuple{}) @test "We did not crash!" != "" end @testset "compile for host after gpu" begin # issue #11: re-using host functions after GPU compilation - @noinline child(i) = sink(i+1) + mod = @eval module $(gensym()) + import ..sink + @noinline child(i) = sink(i+1) - function fromhost() - child(10) - end + function fromhost() + child(10) + end - function fromptx() - child(10) - return + function fromptx() + child(10) + return + end end - Native.code_native(devnull, fromptx, Tuple{}) - @test fromhost() == 11 + Native.code_native(devnull, mod.fromptx, Tuple{}) + @test mod.fromhost() == 11 end end @@ -416,23 +451,29 @@ end @testset "errors" begin -struct CleverType{T} - x::T -end -Base.unsafe_trunc(::Type{Int}, x::CleverType) = unsafe_trunc(Int, x.x) @testset "non-isbits arguments" begin - foobar(i) = (sink(unsafe_trunc(Int,i)); return) + mod = @eval module $(gensym()) + import ..sink + foobar(i) = (sink(unsafe_trunc(Int,i)); return) + end @test_throws_message(KernelError, - Native.code_execution(foobar, Tuple{BigInt})) do msg + Native.code_execution(mod.foobar, Tuple{BigInt})) do msg occursin("passing non-bitstype argument", msg) && occursin("BigInt", msg) end # test that we get information about fields and reason why something is not isbits + mod = @eval module $(gensym()) + struct CleverType{T} + x::T + end + Base.unsafe_trunc(::Type{Int}, x::CleverType) = unsafe_trunc(Int, x.x) + foobar(i) = (sink(unsafe_trunc(Int,i)); return) + end @test_throws_message(KernelError, - Native.code_execution(foobar, Tuple{CleverType{BigInt}})) do msg + Native.code_execution(mod.foobar, Tuple{mod.CleverType{BigInt}})) do msg occursin("passing non-bitstype argument", msg) && occursin("CleverType", msg) && occursin("BigInt", msg) @@ -441,7 +482,6 @@ end @testset "invalid LLVM IR" begin mod = @eval module $(gensym()) - export foobar foobar(i) = println(i) end @@ -458,7 +498,6 @@ end @testset "invalid LLVM IR (ccall)" begin mod = @eval module $(gensym()) - export foobar function foobar(p) unsafe_store!(p, ccall(:time, Cint, ())) return @@ -482,7 +521,6 @@ end @testset "delayed bindings" begin mod = @eval module $(gensym()) - export kernel function kernel() undefined return @@ -515,7 +553,6 @@ end @testset "dynamic call (apply)" begin mod = @eval module $(gensym()) - export func func() = println(1) end @@ -564,7 +601,8 @@ end end end -@testset "#366: semi-concrete interpretation + overlay methods = dynamic dispatch" begin +@testset "semi-concrete interpretation + overlay methods" begin + # issue 366, caused dynamic deispatch mod = @eval module $(gensym()) using ..GPUCompiler using StaticArrays @@ -589,8 +627,9 @@ end end end -@testset "JuliaLang/julia#48097: kwcall inference in the presence of overlay method" begin - # XXX: broken again by JuliaLang/julia#51092, see JuliaGPU/GPUCompiler.jl#506 +@testset "kwcall inference + overlay method" begin + # originally broken by JuliaLang/julia#48097 + # broken again by JuliaLang/julia#51092, see JuliaGPU/GPUCompiler.jl#506 mod = @eval module $(gensym()) child(; kwargs...) = return diff --git a/test/ptx.jl b/test/ptx.jl index 21db7a8e..fbf982b8 100644 --- a/test/ptx.jl +++ b/test/ptx.jl @@ -1,15 +1,17 @@ @testset "IR" begin @testset "exceptions" begin - foobar() = throw(DivideError()) + mod = @eval module $(gensym()) + foobar() = throw(DivideError()) + end @test @filecheck begin - check"CHECK-LABEL: foobar" + check"CHECK-LABEL: define void @{{.*foobar.*}}()" # plain exceptions should get lowered to a call to the GPU run-time # not a jl_throw referencing a jl_value_t representing the exception check"CHECK-NOT: jl_throw" check"CHECK: gpu_report_exception" - PTX.code_llvm(foobar, Tuple{}; dump_module=true) + PTX.code_llvm(mod.foobar, Tuple{}; dump_module=true) end end @@ -24,24 +26,24 @@ end end @test @filecheck begin - check"CHECK-LABEL: @{{(julia|j)_kernel_[0-9]+}}" - check"TYPED-SAME: ({{(\{ i64 \}|\[1 x i64\])}}*" - check"OPAQUE-SAME: (ptr" + check"CHECK-LABEL: define void @{{.*kernel.*}}(" PTX.code_llvm(mod.kernel, Tuple{mod.Aggregate}) end @test @filecheck begin - check"CHECK: @_Z6kernel9Aggregate({{.*({ i64 }|\[1 x i64\])}} " + check"CHECK-LABEL: define ptx_kernel void @_Z6kernel9Aggregate({{.*({ i64 }|\[1 x i64\])}}" PTX.code_llvm(mod.kernel, Tuple{mod.Aggregate}; kernel=true) end end @testset "property_annotations" begin - kernel() = return + mod = @eval module $(gensym()) + kernel() = return + end @test @filecheck begin check"CHECK-NOT: nvvm.annotations" - PTX.code_llvm(kernel, Tuple{}; dump_module=true) + PTX.code_llvm(mod.kernel, Tuple{}; dump_module=true) end @test @filecheck begin @@ -50,45 +52,47 @@ end check"CHECK-NOT: minctasm" check"CHECK-NOT: maxnreg" check"CHECK: nvvm.annotations" - PTX.code_llvm(kernel, Tuple{}; dump_module=true, kernel=true) + PTX.code_llvm(mod.kernel, Tuple{}; dump_module=true, kernel=true) end @test @filecheck begin check"CHECK: maxntidx\", i32 42" check"CHECK: maxntidy\", i32 1" check"CHECK: maxntidz\", i32 1" - PTX.code_llvm(kernel, Tuple{}; dump_module=true, kernel=true, maxthreads=42) + PTX.code_llvm(mod.kernel, Tuple{}; dump_module=true, kernel=true, maxthreads=42) end @test @filecheck begin check"CHECK: reqntidx\", i32 42" check"CHECK: reqntidy\", i32 1" check"CHECK: reqntidz\", i32 1" - PTX.code_llvm(kernel, Tuple{}; dump_module=true, kernel=true, minthreads=42) + PTX.code_llvm(mod.kernel, Tuple{}; dump_module=true, kernel=true, minthreads=42) end @test @filecheck begin check"CHECK: minctasm\", i32 42" - PTX.code_llvm(kernel, Tuple{}; dump_module=true, kernel=true, blocks_per_sm=42) + PTX.code_llvm(mod.kernel, Tuple{}; dump_module=true, kernel=true, blocks_per_sm=42) end @test @filecheck begin check"CHECK: maxnreg\", i32 42" - PTX.code_llvm(kernel, Tuple{}; dump_module=true, kernel=true, maxregs=42) + PTX.code_llvm(mod.kernel, Tuple{}; dump_module=true, kernel=true, maxregs=42) end end LLVM.version() >= v"8" && @testset "calling convention" begin - kernel() = return + mod = @eval module $(gensym()) + kernel() = return + end @test @filecheck begin check"CHECK-NOT: ptx_kernel" - PTX.code_llvm(kernel, Tuple{}; dump_module=true) + PTX.code_llvm(mod.kernel, Tuple{}; dump_module=true) end @test @filecheck begin check"CHECK: ptx_kernel" - PTX.code_llvm(kernel, Tuple{}; dump_module=true, kernel=true) + PTX.code_llvm(mod.kernel, Tuple{}; dump_module=true, kernel=true) end end @@ -96,7 +100,6 @@ end # state should be passed by value to kernel functions mod = @eval module $(gensym()) - export kernel kernel() = return end @@ -134,15 +137,12 @@ end end # child1 doesn't use the state @test @filecheck begin - check"CHECK-LABEL: define internal fastcc i64 @{{(julia|j)_child1[0-9_]*}}" - check"INTPTR_ABI-SAME: (i64" - check"PTR_ABI-SAME: ({{(ptr|i8\*)}}" + check"CHECK-LABEL: define internal fastcc i64 @{{.*child1.*}}(" PTX.code_llvm(mod.kernel, Tuple{Ptr{Int64}}; kernel=true, dump_module=true) end # child2 does @test @filecheck begin - check"CHECK-LABEL: define internal fastcc i64 @{{(julia|j)_child2[0-9_]*}}" - check"CHECK-SAME: ([1 x i64] %state" + check"CHECK-LABEL: define internal fastcc i64 @{{.*child2.*}}(" PTX.code_llvm(mod.kernel, Tuple{Ptr{Int64}}; kernel=true, dump_module=true) end end @@ -160,8 +160,6 @@ if :NVPTX in LLVM.backends() mod = @eval module $(gensym()) import ..sink - export child, parent - @noinline child(i) = sink(i) function parent(i) child(i) @@ -170,6 +168,7 @@ if :NVPTX in LLVM.backends() end @test @filecheck begin + check"CHECK-LABEL: .visible .func {{(julia|j)_parent[0-9_]*}}" check"CHECK: call.uni" check"CHECK-NEXT: {{(julia|j)_child_}}" PTX.code_native(mod.parent, Tuple{Int64}) @@ -179,8 +178,6 @@ end @testset "kernel functions" begin mod = @eval module $(gensym()) import ..sink - export nonentry, entry - @noinline nonentry(i) = sink(i) function entry(i) nonentry(i) @@ -231,8 +228,6 @@ end mod = @eval module $(gensym()) import ..sink - export child, parent1, parent2 - @noinline child(i) = sink(i) function parent1(i) child(i) @@ -261,8 +256,6 @@ end mod = @eval module $(gensym()) import ..sink - export parent1, parent2, child1, child2 - @noinline child1(i) = sink(i) @noinline child2(i) = sink(i+1) function parent1(i) @@ -293,36 +286,42 @@ end # (host fldmod1->mod1 throws, so the PTX code shouldn't contain a throw) # NOTE: Int32 to test for #49 - - function kernel(out) - wid, lane = fldmod1(unsafe_load(out), Int32(32)) - unsafe_store!(out, wid) - return + mod = @eval module $(gensym()) + function kernel(out) + wid, lane = fldmod1(unsafe_load(out), Int32(32)) + unsafe_store!(out, wid) + return + end end @test @filecheck begin + check"CHECK-LABEL: .visible .func {{(julia|j)_kernel[0-9_]*}}" check"CHECK-NOT: jl_throw" check"CHECK-NOT: jl_invoke" - PTX.code_native(kernel, Tuple{Ptr{Int32}}) + PTX.code_native(mod.kernel, Tuple{Ptr{Int32}}) end end @testset "LLVM intrinsics" begin # issue #13 (a): cannot select trunc - function kernel(x) - unsafe_trunc(Int, x) - return + mod = @eval module $(gensym()) + function kernel(x) + unsafe_trunc(Int, x) + return + end end - PTX.code_native(devnull, kernel, Tuple{Float64}) + PTX.code_native(devnull, mod.kernel, Tuple{Float64}) @test "We did not crash!" != "" end @testset "exception arguments" begin - function kernel(a) - unsafe_store!(a, trunc(Int, unsafe_load(a))) - return + mod = @eval module $(gensym()) + function kernel(a) + unsafe_store!(a, trunc(Int, unsafe_load(a))) + return + end end - PTX.code_native(devnull, kernel, Tuple{Ptr{Float64}}) + PTX.code_native(devnull, mod.kernel, Tuple{Ptr{Float64}}) @test "We did not crash!" != "" end @@ -347,6 +346,7 @@ end end @test @filecheck begin + check"CHECK-LABEL: .visible .func {{(julia|j)_kernel[0-9_]*}}" check"CHECK-NOT: julia.push_gc_frame" check"CHECK-NOT: julia.pop_gc_frame" check"CHECK-NOT: julia.get_gc_frame_slot" @@ -356,40 +356,46 @@ end end # make sure that we can still ellide allocations - function ref_kernel(ptr, i) - data = Ref{Int64}() - data[] = 0 - if i > 1 - data[] = 1 - else - data[] = 2 + mod = @eval module $(gensym()) + function ref_kernel(ptr, i) + data = Ref{Int64}() + data[] = 0 + if i > 1 + data[] = 1 + else + data[] = 2 + end + unsafe_store!(ptr, data[], i) + return nothing end - unsafe_store!(ptr, data[], i) - return nothing end @test @filecheck begin + check"CHECK-LABEL: .visible .func {{(julia|j)_ref_kernel[0-9_]*}}" check"CHECK-NOT: gpu_gc_pool_alloc" - PTX.code_native(ref_kernel, Tuple{Ptr{Int64}, Int}) + PTX.code_native(mod.ref_kernel, Tuple{Ptr{Int64}, Int}) end end @testset "float boxes" begin - function kernel(a,b) - c = Int32(a) - # the conversion to Int32 may fail, in which case the input Float32 is boxed in order to - # pass it to the @nospecialize exception constructor. we should really avoid that (eg. - # by avoiding @nospecialize, or optimize the unused arguments away), but for now the box - # should just work. - unsafe_store!(b, c) - return + mod = @eval module $(gensym()) + function kernel(a,b) + c = Int32(a) + # the conversion to Int32 may fail, in which case the input Float32 is boxed in order to + # pass it to the @nospecialize exception constructor. we should really avoid that (eg. + # by avoiding @nospecialize, or optimize the unused arguments away), but for now the box + # should just work. + unsafe_store!(b, c) + return + end end @test @filecheck begin + check"CHECK-LABEL: define void @{{.*kernel.*}}(" check"CHECK: jl_box_float32" - PTX.code_llvm(kernel, Tuple{Float32,Ptr{Float32}}) + PTX.code_llvm(mod.kernel, Tuple{Float32,Ptr{Float32}}) end - PTX.code_native(devnull, kernel, Tuple{Float32,Ptr{Float32}}) + PTX.code_native(devnull, mod.kernel, Tuple{Float32,Ptr{Float32}}) end end diff --git a/test/spirv.jl b/test/spirv.jl index eb1be7d9..dcf38779 100644 --- a/test/spirv.jl +++ b/test/spirv.jl @@ -4,49 +4,49 @@ for backend in (:khronos, :llvm) @testset "kernel functions" begin @testset "calling convention" begin - kernel() = return + mod = @eval module $(gensym()) + kernel() = return + end @test @filecheck begin check"CHECK-NOT: spir_kernel" - SPIRV.code_llvm(kernel, Tuple{}; backend, dump_module=true) + SPIRV.code_llvm(mod.kernel, Tuple{}; backend, dump_module=true) end @test @filecheck begin check"CHECK: spir_kernel" - SPIRV.code_llvm(kernel, Tuple{}; backend, dump_module=true, kernel=true) + SPIRV.code_llvm(mod.kernel, Tuple{}; backend, dump_module=true, kernel=true) end end @testset "byval workaround" begin mod = @eval module $(gensym()) - export kernel kernel(x) = return end @test @filecheck begin - check"TYPED: @{{.*kernel.*}}([1 x i64]*" - check"OPAQUE: @{{.*kernel.*}}(ptr" + check"CHECK-LABEL: define void @{{.*kernel.*}}(" SPIRV.code_llvm(mod.kernel, Tuple{Tuple{Int}}; backend) end @test @filecheck begin - check"TYPED: @{{.*kernel.*}}({ [1 x i64] }* byval" - check"OPAQUE: @{{.*kernel.*}}(ptr byval" + check"CHECK-LABEL: define spir_kernel void @{{.*kernel.*}}(" SPIRV.code_llvm(mod.kernel, Tuple{Tuple{Int}}; backend, kernel=true) end end @testset "byval bug" begin # byval added alwaysinline, which could conflict with noinline and fail verification - @noinline kernel() = return - SPIRV.code_llvm(devnull, kernel, Tuple{}; backend, kernel=true) + mod = @eval module $(gensym()) + @noinline kernel() = return + end + SPIRV.code_llvm(devnull, mod.kernel, Tuple{}; backend, kernel=true) @test "We did not crash!" != "" end end @testset "unsupported type detection" begin mod = @eval module $(gensym()) - export kernel function kernel(ptr, val) unsafe_store!(ptr, val) return @@ -54,16 +54,19 @@ end end @test @filecheck begin + check"CHECK-LABEL: define {{.*}} @{{.*kernel.*}}(" check"CHECK: store half" SPIRV.code_llvm(mod.kernel, Tuple{Ptr{Float16}, Float16}; backend) end @test @filecheck begin + check"CHECK-LABEL: define {{.*}} @{{.*kernel.*}}(" check"CHECK: store float" SPIRV.code_llvm(mod.kernel, Tuple{Ptr{Float32}, Float32}; backend) end @test @filecheck begin + check"CHECK-LABEL: define {{.*}} @{{.*kernel.*}}(" check"CHECK: store double" SPIRV.code_llvm(mod.kernel, Tuple{Ptr{Float64}, Float64}; backend) end @@ -92,14 +95,16 @@ end @testset "asm" begin @testset "trap removal" begin - function kernel(x) - x && error() - return + mod = @eval module $(gensym()) + function kernel(x) + x && error() + return + end end @test @filecheck begin - check"CHECK: OpFunctionCall %void %{{(julia|j)_error}}" - SPIRV.code_native(kernel, Tuple{Bool}; backend, kernel=true) + check"CHECK: {{.*kernel.*}}" + SPIRV.code_native(mod.kernel, Tuple{Bool}; backend, kernel=true) end end From b0002f4565f22c9bdb58ac6f6ba1ea76ff783e2a Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 24 Jun 2025 18:55:30 +0200 Subject: [PATCH 4/6] Reduce reliance on (greedy) wildcards. --- test/gcn.jl | 30 ++++++++++----------- test/metal.jl | 6 ++--- test/native.jl | 71 +++++++++++++++++++++++++------------------------- test/ptx.jl | 24 ++++++++++------- test/spirv.jl | 18 +++++++------ 5 files changed, 78 insertions(+), 71 deletions(-) diff --git a/test/gcn.jl b/test/gcn.jl index b5f61791..e39f6a11 100644 --- a/test/gcn.jl +++ b/test/gcn.jl @@ -40,7 +40,7 @@ end end @test @filecheck begin - check"CHECK-LABEL: {{.*kernel.*}}:" + check"CHECK-LABEL: {{(julia|j)_kernel_[0-9]+}}:" check"CHECK: s_trap 2" GCN.code_native(mod.kernel, Tuple{}) end @@ -64,9 +64,9 @@ end end @test @filecheck begin - check"CHECK-LABEL: {{.*parent.*}}:" - check"CHECK: s_add_u32{{.*(julia|j)_child_.*}}@rel32@" - check"CHECK: s_addc_u32{{.*(julia|j)_child_.*}}@rel32@" + check"CHECK-LABEL: {{(julia|j)_parent_[0-9]+}}:" + check"CHECK: s_add_u32 {{.+}} {{(julia|j)_child_[0-9]+}}@rel32@" + check"CHECK: s_addc_u32 {{.+}} {{(julia|j)_child_[0-9]+}}@rel32@" GCN.code_native(mod.parent, Tuple{Int64}; dump_module=true) end end @@ -82,9 +82,9 @@ end end @test @filecheck begin - check"CHECK-NOT: .amdhsa_kernel {{.*}}nonentry" - check"CHECK: .type {{.*nonentry.*}},@function" - check"CHECK: .amdhsa_kernel {{.*entry.*}}" + check"CHECK-NOT: .amdhsa_kernel {{(julia|j)_nonentry_[0-9]+}}" + check"CHECK: .type {{(julia|j)_nonentry_[0-9]+}},@function" + check"CHECK: .amdhsa_kernel _Z5entry5Int64" GCN.code_native(mod.entry, Tuple{Int64}; dump_module=true, kernel=true) end end @@ -107,12 +107,12 @@ end end @test @filecheck begin - check"CHECK: .type {{.*child.*}},@function" + check"CHECK: .type {{(julia|j)_child_[0-9]+}},@function" GCN.code_native(mod.parent1, Tuple{Int}; dump_module=true) end @test @filecheck begin - check"CHECK: .type {{.*child.*}},@function" + check"CHECK: .type {{(julia|j)_child_[0-9]+}},@function" GCN.code_native(mod.parent2, Tuple{Int}; dump_module=true) end end @@ -136,14 +136,14 @@ end end @test @filecheck begin - check"CHECK-DAG: .type {{.*child1.*}},@function" - check"CHECK-DAG: .type {{.*child2.*}},@function" + check"CHECK-DAG: .type {{(julia|j)_child1_[0-9]+}},@function" + check"CHECK-DAG: .type {{(julia|j)_child2_[0-9]+}},@function" GCN.code_native(mod.parent1, Tuple{Int}; dump_module=true) end @test @filecheck begin - check"CHECK-DAG: .type {{.*child1.*}},@function" - check"CHECK-DAG: .type {{.*child2.*}},@function" + check"CHECK-DAG: .type {{(julia|j)_child1_[0-9]+}},@function" + check"CHECK-DAG: .type {{(julia|j)_child2_[0-9]+}},@function" GCN.code_native(mod.parent2, Tuple{Int}; dump_module=true) end end @@ -163,7 +163,7 @@ end end @test @filecheck begin - check"CHECK-LABEL: {{.*kernel.*}}:" + check"CHECK-LABEL: {{(julia|j)_kernel_[0-9]+}}:" check"CHECK-NOT: jl_throw" check"CHECK-NOT: jl_invoke" GCN.code_native(mod.kernel, Tuple{Ptr{Int32}}) @@ -256,7 +256,7 @@ end end @test @filecheck begin - check"CHECK-LABEL: define void @{{.*kernel.*}}" + check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" check"CHECK: jl_box_float32" GCN.code_llvm(mod.kernel, Tuple{Float32,Ptr{Float32}}) end diff --git a/test/metal.jl b/test/metal.jl index 0a9c1c68..951781f1 100644 --- a/test/metal.jl +++ b/test/metal.jl @@ -81,7 +81,7 @@ end @test @filecheck begin check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" - check"TYPED-SAME: ({{.*}} addrspace(1)* %{{.+}})" + check"TYPED-SAME: ({{.+}} addrspace(1)* %{{.+}})" check"OPAQUE-SAME: (ptr addrspace(1) %{{.+}})" check"CHECK: call i32 @julia.air.thread_position_in_threadgroup.i32" Metal.code_llvm(mod.kernel, Tuple{Core.LLVMPtr{Int,1}}) @@ -89,7 +89,7 @@ end @test @filecheck begin check"CHECK-LABEL: define void @_Z6kernel7LLVMPtrI5Int64Li1EE" - check"TYPED-SAME: ({{.*}} addrspace(1)* %{{.+}}, i32 %thread_position_in_threadgroup)" + check"TYPED-SAME: ({{.+}} addrspace(1)* %{{.+}}, i32 %thread_position_in_threadgroup)" check"OPAQUE-SAME: (ptr addrspace(1) %{{.+}}, i32 %thread_position_in_threadgroup)" check"CHECK-NOT: call i32 @julia.air.thread_position_in_threadgroup.i32" Metal.code_llvm(mod.kernel, Tuple{Core.LLVMPtr{Int,1}}; kernel=true) @@ -174,7 +174,7 @@ end end @test @filecheck begin - check"CHECK: @{{.+}} = {{.*}} addrspace(2) constant [2 x float]" + check"CHECK: @{{.+}} ={{.*}} addrspace(2) constant [2 x float]" check"CHECK: define void @_Z6kernel7LLVMPtrI7Float32Li1EE5Int64" Metal.code_llvm(mod.kernel, Tuple{Core.LLVMPtr{Float32,1}, Int}; dump_module=true, kernel=true) diff --git a/test/native.jl b/test/native.jl index 5486e32c..1b0757ab 100644 --- a/test/native.jl +++ b/test/native.jl @@ -12,12 +12,12 @@ end @test @filecheck begin - check"CHECK: {{.*identity.*}}" + check"CHECK: @{{(julia|j)_identity_[0-9]+}}" GPUCompiler.code_llvm(job) end @test @filecheck begin - check"CHECK: {{.*identity.*}}" + check"CHECK: @{{(julia|j)_identity_[0-9]+}}" GPUCompiler.code_native(job) end end @@ -81,23 +81,24 @@ end end @testset "cached compilation" begin - @gensym child kernel unrelated - @eval @noinline $child(i) = i - @eval $kernel(i) = $child(i)+1 + mod = @eval module $(gensym()) + @noinline child(i) = i + kernel(i) = child(i)+1 + end # smoke test - job, _ = Native.create_job(eval(kernel), (Int64,)) + job, _ = Native.create_job(mod.kernel, (Int64,)) @test @filecheck begin - check"CHECK-LABEL: define {{.*}} @{{.*kernel.*}}(" + check"CHECK-LABEL: define i64 @{{(julia|j)_kernel_[0-9]+}}" check"CHECK: add i64 %{{[0-9]+}}, 1" GPUCompiler.code_llvm(job) end # basic redefinition - @eval $kernel(i) = $child(i)+2 - job, _ = Native.create_job(eval(kernel), (Int64,)) + @eval mod kernel(i) = child(i)+2 + job, _ = Native.create_job(mod.kernel, (Int64,)) @test @filecheck begin - check"CHECK-LABEL: define {{.*}} @{{.*kernel.*}}(" + check"CHECK-LABEL: define i64 @{{(julia|j)_kernel_[0-9]+}}" check"CHECK: add i64 %{{[0-9]+}}, 2" GPUCompiler.code_llvm(job) end @@ -113,13 +114,13 @@ end end linker(job, compiled) = compiled cache = Dict() - ft = typeof(eval(kernel)) + ft = typeof(mod.kernel) tt = Tuple{Int64} # initial compilation source = methodinstance(ft, tt, Base.get_world_counter()) @test @filecheck begin - check"CHECK-LABEL: define {{.*}} @{{.*kernel.*}}(" + check"CHECK-LABEL: define i64 @{{(julia|j)_kernel_[0-9]+}}" check"CHECK: add i64 %{{[0-9]+}}, 2" Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) end @@ -127,17 +128,17 @@ end # cached compilation @test @filecheck begin - check"CHECK-LABEL: define {{.*}} @{{.*kernel.*}}(" + check"CHECK-LABEL: define i64 @{{(julia|j)_kernel_[0-9]+}}" check"CHECK: add i64 %{{[0-9]+}}, 2" Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) end @test invocations[] == 1 # redefinition - @eval $kernel(i) = $child(i)+3 + @eval mod kernel(i) = child(i)+3 source = methodinstance(ft, tt, Base.get_world_counter()) @test @filecheck begin - check"CHECK-LABEL: define {{.*}} @{{.*kernel.*}}(" + check"CHECK-LABEL: define i64 @{{(julia|j)_kernel_[0-9]+}}" check"CHECK: add i64 %{{[0-9]+}}, 3" Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) end @@ -145,19 +146,19 @@ end # cached compilation @test @filecheck begin - check"CHECK-LABEL: define {{.*}} @{{.*kernel.*}}(" + check"CHECK-LABEL: define i64 @{{(julia|j)_kernel_[0-9]+}}" check"CHECK: add i64 %{{[0-9]+}}, 3" Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) end @test invocations[] == 2 # redefinition of an unrelated function - @eval $unrelated(i) = 42 + @eval mod unrelated(i) = 42 Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) @test invocations[] == 2 # redefining child functions - @eval @noinline $child(i) = i+1 + @eval mod @noinline child(i) = i+1 Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) @test invocations[] == 3 @@ -168,7 +169,7 @@ end # change in configuration config = CompilerConfig(job.config; name="foobar") @test @filecheck begin - check"CHECK: define {{.*}} @foobar" + check"CHECK: define i64 @foobar" Base.invokelatest(GPUCompiler.cached_compilation, cache, source, config, compiler, linker) end @test invocations[] == 4 @@ -183,13 +184,13 @@ end end t = @async Base.invokelatest(background, job) wait(c1) # make sure the task has started - @eval $kernel(i) = $child(i)+4 + @eval mod kernel(i) = child(i)+4 source = methodinstance(ft, tt, Base.get_world_counter()) ir = Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) @test contains(ir, r"add i64 %\d+, 4") notify(c2) # wake up the task @test @filecheck begin - check"CHECK-LABEL: define {{.*}} @{{.*kernel.*}}(" + check"CHECK-LABEL: define i64 @{{(julia|j)_kernel_[0-9]+}}" check"CHECK: add i64 %{{[0-9]+}}, 3" fetch(t) end @@ -220,7 +221,7 @@ end @test @filecheck begin # module should contain our function + a generic call wrapper - check"CHECK: {{.*valid_kernel.*}}" + check"CHECK: @{{(julia|j)_valid_kernel_[0-9]+}}" Native.code_llvm(mod.valid_kernel, Tuple{}; optimize=false, dump_module=true) end @@ -244,8 +245,8 @@ end end @test @filecheck begin - check"CHECK-LABEL: define {{.*}} @{{.*parent.*}}" - check"CHECK: call {{.+}} @{{.*child.*}}" + check"CHECK-LABEL: define i64 @{{(julia|j)_parent_[0-9]+}}" + check"CHECK: call{{.*}} i64 @{{(julia|j)_child_[0-9]+}}" Native.code_llvm(mod.parent, Tuple{Int}) end end @@ -312,17 +313,17 @@ end f = () -> x+1 end @test @filecheck begin - check"CHECK: define internal fastcc {{.+}} @julia" + check"CHECK: define {{.+}} @julia" check"TYPED: define nonnull {}* @jfptr" check"OPAQUE: define nonnull ptr @jfptr" - check"CHECK: call fastcc {{.+}} @julia" + check"CHECK: call {{.+}} @julia" Native.code_llvm(mod.f, Tuple{}; entry_abi=:func, dump_module=true) end end @testset "function entry safepoint emission" begin @test @filecheck begin - check"CHECK-LABEL: define {{.*}} @{{.*identity.*}}(" + check"CHECK-LABEL: define void @{{(julia|j)_identity_[0-9]+}}" check"CHECK-NOT: %safepoint" Native.code_llvm(identity, Tuple{Nothing}; entry_safepoint=false, optimize=false, dump_module=true) end @@ -331,7 +332,7 @@ end # see https://github.com/JuliaLang/julia/pull/57010/files#r2079576894 if VERSION < v"1.13.0-DEV.533" @test @filecheck begin - check"CHECK-LABEL: define {{.*}} @{{.*identity.*}}(" + check"CHECK-LABEL: define void @{{(julia|j)_identity_[0-9]+}}" check"CHECK: %safepoint" Native.code_llvm(identity, Tuple{Nothing}; entry_safepoint=true, optimize=false, dump_module=true) end @@ -342,34 +343,34 @@ end # XXX: broken by JuliaLang/julia#51599, see JuliaGPU/GPUCompiler.jl#527 mod = @eval module $(gensym()) import ..sink - f_expensive(x) = $(foldl((e, _) -> :($sink($e) + $sink(x)), 1:100; init=:x)) + expensive(x) = $(foldl((e, _) -> :($sink($e) + $sink(x)), 1:100; init=:x)) function g(x) - f_expensive(x) + expensive(x) return end function h(x) - f_expensive(x) + expensive(x) return end end @test @filecheck begin - check"CHECK: define {{.*f_expensive.*}}" + check"CHECK: @{{(julia|j)_expensive_[0-9]+}}" Native.code_llvm(mod.g, Tuple{Int64}; dump_module=true, kernel=true) end @test @filecheck begin - check"CHECK-NOT: define {{.*f_expensive.*}}" + check"CHECK-NOT: @{{(julia|j)_expensive_[0-9]+}}" Native.code_llvm(mod.g, Tuple{Int64}; dump_module=true, kernel=true, always_inline=true) end @test @filecheck begin - check"CHECK: define {{.*f_expensive.*}}" + check"CHECK: @{{(julia|j)_expensive_[0-9]+}}" Native.code_llvm(mod.h, Tuple{Int64}; dump_module=true, kernel=true) end @test @filecheck begin - check"CHECK-NOT: define {{.*f_expensive.*}}" + check"CHECK-NOT: @{{(julia|j)_expensive_[0-9]+}}" Native.code_llvm(mod.h, Tuple{Int64}; dump_module=true, kernel=true, always_inline=true) end end diff --git a/test/ptx.jl b/test/ptx.jl index fbf982b8..18dd0e4a 100644 --- a/test/ptx.jl +++ b/test/ptx.jl @@ -5,7 +5,7 @@ foobar() = throw(DivideError()) end @test @filecheck begin - check"CHECK-LABEL: define void @{{.*foobar.*}}()" + check"CHECK-LABEL: define void @{{(julia|j)_foobar_[0-9]+}}" # plain exceptions should get lowered to a call to the GPU run-time # not a jl_throw referencing a jl_value_t representing the exception check"CHECK-NOT: jl_throw" @@ -26,12 +26,16 @@ end end @test @filecheck begin - check"CHECK-LABEL: define void @{{.*kernel.*}}(" + check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" + check"TYPED-SAME: ({{({ i64 }|\[1 x i64\])}}*" + check"OPAQUE-SAME: (ptr" PTX.code_llvm(mod.kernel, Tuple{mod.Aggregate}) end @test @filecheck begin - check"CHECK-LABEL: define ptx_kernel void @_Z6kernel9Aggregate({{.*({ i64 }|\[1 x i64\])}}" + check"CHECK-LABEL: define ptx_kernel void @_Z6kernel9Aggregate" + check"TYPED-NOT: *" + check"OPAQUE-NOT: ptr" PTX.code_llvm(mod.kernel, Tuple{mod.Aggregate}; kernel=true) end end @@ -137,12 +141,12 @@ end end # child1 doesn't use the state @test @filecheck begin - check"CHECK-LABEL: define internal fastcc i64 @{{.*child1.*}}(" + check"CHECK-LABEL: define{{.*}} i64 @{{(julia|j)_child1_[0-9]+}}" PTX.code_llvm(mod.kernel, Tuple{Ptr{Int64}}; kernel=true, dump_module=true) end # child2 does @test @filecheck begin - check"CHECK-LABEL: define internal fastcc i64 @{{.*child2.*}}(" + check"CHECK-LABEL: define{{.*}} i64 @{{(julia|j)_child2_[0-9]+}}" PTX.code_llvm(mod.kernel, Tuple{Ptr{Int64}}; kernel=true, dump_module=true) end end @@ -381,17 +385,17 @@ end mod = @eval module $(gensym()) function kernel(a,b) c = Int32(a) - # the conversion to Int32 may fail, in which case the input Float32 is boxed in order to - # pass it to the @nospecialize exception constructor. we should really avoid that (eg. - # by avoiding @nospecialize, or optimize the unused arguments away), but for now the box - # should just work. + # the conversion to Int32 may fail, in which case the input Float32 is boxed in + # order to pass it to the @nospecialize exception constructor. we should really + # avoid that (eg. by avoiding @nospecialize, or optimize the unused arguments + # away), but for now the box should just work. unsafe_store!(b, c) return end end @test @filecheck begin - check"CHECK-LABEL: define void @{{.*kernel.*}}(" + check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" check"CHECK: jl_box_float32" PTX.code_llvm(mod.kernel, Tuple{Float32,Ptr{Float32}}) end diff --git a/test/spirv.jl b/test/spirv.jl index dcf38779..be7f2651 100644 --- a/test/spirv.jl +++ b/test/spirv.jl @@ -25,12 +25,12 @@ end end @test @filecheck begin - check"CHECK-LABEL: define void @{{.*kernel.*}}(" + check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" SPIRV.code_llvm(mod.kernel, Tuple{Tuple{Int}}; backend) end @test @filecheck begin - check"CHECK-LABEL: define spir_kernel void @{{.*kernel.*}}(" + check"CHECK-LABEL: define spir_kernel void @_Z6kernel" SPIRV.code_llvm(mod.kernel, Tuple{Tuple{Int}}; backend, kernel=true) end end @@ -40,8 +40,10 @@ end mod = @eval module $(gensym()) @noinline kernel() = return end - SPIRV.code_llvm(devnull, mod.kernel, Tuple{}; backend, kernel=true) - @test "We did not crash!" != "" + @test @filecheck begin + check"CHECK-LABEL: define spir_kernel void @_Z6kernel" + SPIRV.code_llvm(mod.kernel, Tuple{}; backend, kernel=true) + end end end @@ -54,19 +56,19 @@ end end @test @filecheck begin - check"CHECK-LABEL: define {{.*}} @{{.*kernel.*}}(" + check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" check"CHECK: store half" SPIRV.code_llvm(mod.kernel, Tuple{Ptr{Float16}, Float16}; backend) end @test @filecheck begin - check"CHECK-LABEL: define {{.*}} @{{.*kernel.*}}(" + check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" check"CHECK: store float" SPIRV.code_llvm(mod.kernel, Tuple{Ptr{Float32}, Float32}; backend) end @test @filecheck begin - check"CHECK-LABEL: define {{.*}} @{{.*kernel.*}}(" + check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" check"CHECK: store double" SPIRV.code_llvm(mod.kernel, Tuple{Ptr{Float64}, Float64}; backend) end @@ -103,7 +105,7 @@ end end @test @filecheck begin - check"CHECK: {{.*kernel.*}}" + check"CHECK: %_Z6kernel4Bool = OpFunction %void None" SPIRV.code_native(mod.kernel, Tuple{Bool}; backend, kernel=true) end end From 611ba7fa25f98d3c3a144ad3eecc21e604de5c0e Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 24 Jun 2025 18:59:35 +0200 Subject: [PATCH 5/6] Apply suggestions from code review Co-authored-by: Valentin Churavy --- test/helpers/test.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/helpers/test.jl b/test/helpers/test.jl index 9de421e4..014ddb27 100644 --- a/test/helpers/test.jl +++ b/test/helpers/test.jl @@ -42,13 +42,11 @@ module FileCheck import LLVM_jll import IOCapture using GPUCompiler, LLVM - using Test export filecheck, @filecheck, @check_str global filecheck_path::String function __init__() - # TODO: Windows global filecheck_path = joinpath(LLVM_jll.artifact_dir, "tools", "FileCheck") end From 8a17cbac8aa2b2b63049acbf1653ef069217b516 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Wed, 25 Jun 2025 08:36:35 +0200 Subject: [PATCH 6/6] Fix GCN test. --- test/gcn.jl | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/test/gcn.jl b/test/gcn.jl index e39f6a11..0108d6a5 100644 --- a/test/gcn.jl +++ b/test/gcn.jl @@ -41,14 +41,10 @@ end @test @filecheck begin check"CHECK-LABEL: {{(julia|j)_kernel_[0-9]+}}:" + check"CHECK: s_cbranch_exec" check"CHECK: s_trap 2" GCN.code_native(mod.kernel, Tuple{}) end - # XXX - @test_skip occursin("s_cbranch_execz", asm) - if Base.libllvm_version < v"9" - @test_broken occursin("v_readfirstlane", asm) - end end @testset "child functions" begin