-
Notifications
You must be signed in to change notification settings - Fork 57
[SPIRV] convert i128 allocas to <2 x i64> #734
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Conversation
It seems like the issue is that codegen hard codes `MAX_ALIGN` based on the host platform ABI and assumes that if the host supports `i128` allocas the target will support it as well. For now just handle this by converting `i128` allocas to `<2 x i64>` allocas. Discovered while working on JuliaGPU/OpenCL.jl#379 To reproduce the issue: ```julia-repl julia> using OpenCL, SIMD julia> OpenCL.code_llvm(NTuple{2, Vec{8, Float32}}) do x... @noinline +(x...) end ; @ REPL[7]:2 within `#11` define void @julia__11_16515(ptr noalias nocapture noundef nonnull sret([1 x <8 x float>]) align 16 dereferenceable(32) %sret_return, ptr nocapture noundef nonnull readonly align 16 dereferenceable(32) %"x[1]::Vec", ptr nocapture noundef nonnull readonly align 16 dereferenceable(32) %"x[2]::Vec") local_unnamed_addr { top: %"new::Tuple" = alloca [2 x [1 x <8 x float>]], align 16 %sret_box = alloca [2 x i128], align 16 call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(32) %"new::Tuple", ptr noundef nonnull align 16 dereferenceable(32) %"x[1]::Vec", i64 32, i1 false) %0 = getelementptr inbounds i8, ptr %"new::Tuple", i64 32 call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(32) %0, ptr noundef nonnull align 16 dereferenceable(32) %"x[2]::Vec", i64 32, i1 false) call fastcc void @julia___16519(ptr noalias nocapture noundef sret([1 x <8 x float>]) %sret_box, ptr nocapture readonly %"new::Tuple", ptr nocapture readonly %0) call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(32) %sret_return, ptr noundef nonnull align 16 dereferenceable(32) %sret_box, i64 32, i1 false) ret void } ``` A similar workaround might be needed for Metal, but I don't have a Mac to test
|
Your PR requires formatting changes to meet the project's style guidelines. Click here to view the suggested changes.diff --git a/src/spirv.jl b/src/spirv.jl
index ea552e7..acf525a 100644
--- a/src/spirv.jl
+++ b/src/spirv.jl
@@ -293,50 +293,50 @@ function convert_i128_allocas!(mod::LLVM.Module)
changed = false
@tracepoint "convert i128 allocas" begin
- for f in functions(mod), bb in blocks(f)
- for inst in instructions(bb)
- if inst isa LLVM.AllocaInst
- alloca_type = LLVMType(LLVM.API.LLVMGetAllocatedType(inst))
-
- # Check if this is an i128 or an array of i128
- if alloca_type isa LLVM.ArrayType
- T = eltype(alloca_type)
- else
- T = alloca_type
- end
- if T isa LLVM.IntegerType && width(T) == 128
- # replace i128 with <2 x i64>
- vec_type = LLVM.VectorType(LLVM.Int64Type(), 2)
+ for f in functions(mod), bb in blocks(f)
+ for inst in instructions(bb)
+ if inst isa LLVM.AllocaInst
+ alloca_type = LLVMType(LLVM.API.LLVMGetAllocatedType(inst))
+ # Check if this is an i128 or an array of i128
if alloca_type isa LLVM.ArrayType
- array_size = length(alloca_type)
- new_alloca_type = LLVM.ArrayType(vec_type, array_size)
+ T = eltype(alloca_type)
else
- new_alloca_type = vec_type
+ T = alloca_type
end
- align_val = alignment(inst)
-
- # Create new alloca with vector type
- @dispose builder=IRBuilder() begin
- position!(builder, inst)
- new_alloca = alloca!(builder, new_alloca_type)
- alignment!(new_alloca, align_val)
-
- # Bitcast the new alloca back to the original pointer type
- # XXX: The issue only seems to manifest itself on LLVM >= 18
- # where we use opaque pointers anyways, so not sure this
- # is needed
- old_ptr_type = LLVMType(LLVM.API.LLVMTypeOf(inst.ref))
- bitcast_ptr = bitcast!(builder, new_alloca, old_ptr_type)
-
- replace_uses!(inst, bitcast_ptr)
- erase!(inst)
- changed = true
+ if T isa LLVM.IntegerType && width(T) == 128
+ # replace i128 with <2 x i64>
+ vec_type = LLVM.VectorType(LLVM.Int64Type(), 2)
+
+ if alloca_type isa LLVM.ArrayType
+ array_size = length(alloca_type)
+ new_alloca_type = LLVM.ArrayType(vec_type, array_size)
+ else
+ new_alloca_type = vec_type
+ end
+ align_val = alignment(inst)
+
+ # Create new alloca with vector type
+ @dispose builder = IRBuilder() begin
+ position!(builder, inst)
+ new_alloca = alloca!(builder, new_alloca_type)
+ alignment!(new_alloca, align_val)
+
+ # Bitcast the new alloca back to the original pointer type
+ # XXX: The issue only seems to manifest itself on LLVM >= 18
+ # where we use opaque pointers anyways, so not sure this
+ # is needed
+ old_ptr_type = LLVMType(LLVM.API.LLVMTypeOf(inst.ref))
+ bitcast_ptr = bitcast!(builder, new_alloca, old_ptr_type)
+
+ replace_uses!(inst, bitcast_ptr)
+ erase!(inst)
+ changed = true
+ end
end
end
end
end
- end
end
return changed
diff --git a/test/spirv.jl b/test/spirv.jl
index e890314..de41a85 100644
--- a/test/spirv.jl
+++ b/test/spirv.jl
@@ -112,37 +112,41 @@ end
end
-@testset "replace i128 allocas" begin
- mod = @eval module $(gensym())
+ @testset "replace i128 allocas" begin
+ mod = @eval module $(gensym())
# reimplement some of SIMD.jl
struct Vec{N, T}
data::NTuple{N, Core.VecElement{T}}
end
@generated function fadd(x::Vec{N, Float32}, y::Vec{N, Float32}) where {N}
quote
- Vec(Base.llvmcall($"""
- %ret = fadd <$N x float> %0, %1
- ret <$N x float> %ret
- """, NTuple{N, Core.VecElement{Float32}}, NTuple{2, NTuple{N, Core.VecElement{Float32}}}, x.data, y.data))
+ Vec(
+ Base.llvmcall(
+ $"""
+ %ret = fadd <$N x float> %0, %1
+ ret <$N x float> %ret
+ """, NTuple{N, Core.VecElement{Float32}}, NTuple{2, NTuple{N, Core.VecElement{Float32}}}, x.data, y.data
+ )
+ )
end
end
kernel(x...) = @noinline fadd(x...)
- end
+ end
- @test @filecheck begin
- # TODO: should structs of `NTuple{VecElement{T}}` be passed by value instead of sret?
- check"CHECK-NOT: i128"
- check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}"
- @static VERSION >= v"1.12" && check"CHECK: alloca <2 x i64>, align 16"
- SPIRV.code_llvm(mod.kernel, NTuple{2, mod.Vec{4, Float32}}; backend, dump_module=true)
- end
+ @test @filecheck begin
+ # TODO: should structs of `NTuple{VecElement{T}}` be passed by value instead of sret?
+ check"CHECK-NOT: i128"
+ check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}"
+ @static VERSION >= v"1.12" && check"CHECK: alloca <2 x i64>, align 16"
+ SPIRV.code_llvm(mod.kernel, NTuple{2, mod.Vec{4, Float32}}; backend, dump_module = true)
+ end
- @test @filecheck begin
- check"CHECK-NOT: i128"
- check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}"
- @static VERSION >= v"1.12" && check"CHECK: alloca [2 x <2 x i64>], align 16"
- SPIRV.code_llvm(mod.kernel, NTuple{2, mod.Vec{8, Float32}}; backend, dump_module=true)
+ @test @filecheck begin
+ check"CHECK-NOT: i128"
+ check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}"
+ @static VERSION >= v"1.12" && check"CHECK: alloca [2 x <2 x i64>], align 16"
+ SPIRV.code_llvm(mod.kernel, NTuple{2, mod.Vec{8, Float32}}; backend, dump_module = true)
+ end
end
-end
end |
Codecov Report✅ All modified and coverable lines are covered by tests. Additional details and impacted files@@ Coverage Diff @@
## master #734 +/- ##
==========================================
- Coverage 75.59% 75.23% -0.37%
==========================================
Files 24 24
Lines 3639 3670 +31
==========================================
+ Hits 2751 2761 +10
- Misses 888 909 +21 ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
|
Seems reasonable. Can you add a test here? |
It seems like the issue is that codegen hard codes
MAX_ALIGNbased onthe host platform ABI and assumes that if the host supports
i128allocas the target will support it as well. For now just handle this by
converting
i128allocas to<2 x i64>allocas. Discovered whileworking on JuliaGPU/OpenCL.jl#379
To reproduce the issue:
A similar workaround might be needed for Metal, but I don't have a Mac
to test