From 00dec18ee81513d53190d3c440d8bdee164068fb Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Tue, 4 Nov 2025 11:41:45 +0100 Subject: [PATCH 1/4] [SPIRV] convert i128 allocas to <2 x i64> It seems like the issue is that codegen hard codes `MAX_ALIGN` based on the host platform ABI and assumes that if the host supports `i128` allocas the target will support it as well. For now just handle this by converting `i128` allocas to `<2 x i64>` allocas. Discovered while working on JuliaGPU/OpenCL.jl#379 To reproduce the issue: ```julia-repl julia> using OpenCL, SIMD julia> OpenCL.code_llvm(NTuple{2, Vec{8, Float32}}) do x... @noinline +(x...) end ; @ REPL[7]:2 within `#11` define void @julia__11_16515(ptr noalias nocapture noundef nonnull sret([1 x <8 x float>]) align 16 dereferenceable(32) %sret_return, ptr nocapture noundef nonnull readonly align 16 dereferenceable(32) %"x[1]::Vec", ptr nocapture noundef nonnull readonly align 16 dereferenceable(32) %"x[2]::Vec") local_unnamed_addr { top: %"new::Tuple" = alloca [2 x [1 x <8 x float>]], align 16 %sret_box = alloca [2 x i128], align 16 call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(32) %"new::Tuple", ptr noundef nonnull align 16 dereferenceable(32) %"x[1]::Vec", i64 32, i1 false) %0 = getelementptr inbounds i8, ptr %"new::Tuple", i64 32 call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(32) %0, ptr noundef nonnull align 16 dereferenceable(32) %"x[2]::Vec", i64 32, i1 false) call fastcc void @julia___16519(ptr noalias nocapture noundef sret([1 x <8 x float>]) %sret_box, ptr nocapture readonly %"new::Tuple", ptr nocapture readonly %0) call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(32) %sret_return, ptr noundef nonnull align 16 dereferenceable(32) %sret_box, i64 32, i1 false) ret void } ``` A similar workaround might be needed for Metal, but I don't have a Mac to test --- src/spirv.jl | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/src/spirv.jl b/src/spirv.jl index 8eea92c6..dfed3137 100644 --- a/src/spirv.jl +++ b/src/spirv.jl @@ -110,6 +110,9 @@ function finish_ir!(job::CompilerJob{SPIRVCompilerTarget}, mod::LLVM.Module, entry = wrap_byval(job, mod, entry) end + # SPIR-V does not support i128, convert alloca arrays to vector types + convert_i128_allocas!(mod) + # add module metadata ## OpenCL 2.0 push!(metadata(mod)["opencl.ocl.version"], @@ -283,6 +286,62 @@ function rm_freeze!(mod::LLVM.Module) return changed end +# convert alloca [N x i128] to alloca [N x <2 x i64>] +# SPIR-V doesn't support i128 types, but we can represent them as vectors +function convert_i128_allocas!(mod::LLVM.Module) + job = current_job::CompilerJob + changed = false + @tracepoint "convert i128 allocas" begin + + for f in functions(mod), bb in blocks(f) + for inst in instructions(bb) + if inst isa LLVM.AllocaInst + alloca_type = LLVMType(LLVM.API.LLVMGetAllocatedType(inst)) + + # Check if this is an i128 or an array of i128 + if alloca_type isa LLVM.ArrayType + T = eltype(alloca_type) + else + T = alloca_type + end + if T isa LLVM.IntegerType && width(T) == 128 + # replace i128 with <2 x i64> + vec_type = LLVM.VectorType(LLVM.Int64Type(), 2) + + if alloca_type isa LLVM.ArrayType + array_size = length(alloca_type) + new_alloca_type = LLVM.ArrayType(vec_type, array_size) + else + new_alloca_type = vec_type + end + align_val = alignment(inst) + + # Create new alloca with vector type + @dispose builder=IRBuilder() begin + position!(builder, inst) + new_alloca = alloca!(builder, new_alloca_type) + alignment!(new_alloca, align_val) + + # Bitcast the new alloca back to the original pointer type + # XXX: The issue only seems to manifest itself on LLVM >= 18 + # where we use opaque pointers anyways, so not sure this + # is needed + old_ptr_type = LLVMType(LLVM.API.LLVMTypeOf(inst.ref)) + bitcast_ptr = bitcast!(builder, new_alloca, old_ptr_type) + + replace_uses!(inst, bitcast_ptr) + unsafe_delete!(bb, inst) + changed = true + end + end + end + end + end + + end + return changed +end + # wrap byval pointers in a single-value struct function wrap_byval(@nospecialize(job::CompilerJob), mod::LLVM.Module, f::LLVM.Function) ft = function_type(f)::LLVM.FunctionType From 8ce64dbe458fa2c3d554c6518b99eafdb92b5ce8 Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Fri, 7 Nov 2025 13:08:49 +0100 Subject: [PATCH 2/4] add tests --- test/spirv.jl | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/test/spirv.jl b/test/spirv.jl index 772531bf..f9ee2160 100644 --- a/test/spirv.jl +++ b/test/spirv.jl @@ -112,4 +112,37 @@ end end +@testset "replace i128 allocas" begin + mod = @eval module $(gensym()) + # reimplement some of SIMD.jl + struct Vec{N, T} + data::NTuple{N, Core.VecElement{T}} + end + @generated function fadd(x::Vec{N, Float32}, y::Vec{N, Float32}) where {N} + quote + Vec(Base.llvmcall($""" + %ret = fadd <$N x float> %0, %1 + ret <$N x float> %ret + """, NTuple{N, Core.VecElement{Float32}}, NTuple{2, NTuple{N, Core.VecElement{Float32}}}, x.data, y.data)) + end + end + kernel(x...) = @noinline fadd(x...) + end + + @test @filecheck begin + # TODO: should structs of `NTuple{VecElement{T}}` be passed by value instead of sret? + check"CHECK-NOT: i128" + check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" + check"CHECK: alloca <2 x i64>, align 16" + SPIRV.code_llvm(mod.kernel, NTuple{2, mod.Vec{4, Float32}}; backend, dump_module=true) + end + + @test @filecheck begin + check"CHECK-NOT: i128" + check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" + check"CHECK: alloca [2 x <2 x i64>], align 16" + SPIRV.code_llvm(mod.kernel, NTuple{2, mod.Vec{8, Float32}}; backend, dump_module=true) + end +end + end From b6065f7f965b7d3de7406ed8b22201f728274560 Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Fri, 7 Nov 2025 13:22:34 +0100 Subject: [PATCH 3/4] fix deprecation warning --- src/spirv.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spirv.jl b/src/spirv.jl index dfed3137..ea552e70 100644 --- a/src/spirv.jl +++ b/src/spirv.jl @@ -330,7 +330,7 @@ function convert_i128_allocas!(mod::LLVM.Module) bitcast_ptr = bitcast!(builder, new_alloca, old_ptr_type) replace_uses!(inst, bitcast_ptr) - unsafe_delete!(bb, inst) + erase!(inst) changed = true end end From 006906f824196f728ac822079a2fe026d5a62d46 Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Fri, 7 Nov 2025 14:28:07 +0100 Subject: [PATCH 4/4] only look for <2 x i64> allocas on 1.12+ --- test/spirv.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/spirv.jl b/test/spirv.jl index f9ee2160..e8903140 100644 --- a/test/spirv.jl +++ b/test/spirv.jl @@ -133,14 +133,14 @@ end # TODO: should structs of `NTuple{VecElement{T}}` be passed by value instead of sret? check"CHECK-NOT: i128" check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" - check"CHECK: alloca <2 x i64>, align 16" + @static VERSION >= v"1.12" && check"CHECK: alloca <2 x i64>, align 16" SPIRV.code_llvm(mod.kernel, NTuple{2, mod.Vec{4, Float32}}; backend, dump_module=true) end @test @filecheck begin check"CHECK-NOT: i128" check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" - check"CHECK: alloca [2 x <2 x i64>], align 16" + @static VERSION >= v"1.12" && check"CHECK: alloca [2 x <2 x i64>], align 16" SPIRV.code_llvm(mod.kernel, NTuple{2, mod.Vec{8, Float32}}; backend, dump_module=true) end end