From 869c3c21a187961297be9c87ca750342956226e3 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Mon, 1 Sep 2025 14:26:17 -0400 Subject: [PATCH 1/6] fix: indexing for `gpu_rand` --- src/host/random.jl | 5 +++-- test/testsuite/random.jl | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/host/random.jl b/src/host/random.jl index 256ecea9..cb79de3b 100644 --- a/src/host/random.jl +++ b/src/host/random.jl @@ -97,8 +97,9 @@ function Random.randn!(rng::RNG, A::AnyGPUArray{T}) where T <: Number @kernel function randn!(a, randstates) i = @index(Global, Linear) idx = 2*(i - 1) + 1 - U1 = gpu_rand(T, i, randstates) - U2 = gpu_rand(T, i, randstates) + threadidx = (i-1)%length(randstates) + 1 + U1 = gpu_rand(T, threadidx, randstates) + U2 = gpu_rand(T, threadidx, randstates) Z0 = sqrt(T(-2.0)*log(U1))*cos(T(2pi)*U2) Z1 = sqrt(T(-2.0)*log(U1))*sin(T(2pi)*U2) @inbounds a[idx] = Z0 diff --git a/test/testsuite/random.jl b/test/testsuite/random.jl index f2cf832a..cec0a74b 100644 --- a/test/testsuite/random.jl +++ b/test/testsuite/random.jl @@ -7,7 +7,7 @@ cpu_rng = Random.default_rng() @testset "rand" begin # uniform - for T in eltypes, d in (10, (10,10)) + for T in eltypes, d in (10, (10,10), (128,128)) A = AT{T}(undef, d) B = copy(A) rand!(rng, A) From ac844304e8c42425341b3341664e80a344502d16 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Mon, 1 Sep 2025 14:28:07 -0400 Subject: [PATCH 2/6] chore: fix runic formatting --- src/host/random.jl | 2 +- test/testsuite/random.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/host/random.jl b/src/host/random.jl index cb79de3b..a7419e40 100644 --- a/src/host/random.jl +++ b/src/host/random.jl @@ -97,7 +97,7 @@ function Random.randn!(rng::RNG, A::AnyGPUArray{T}) where T <: Number @kernel function randn!(a, randstates) i = @index(Global, Linear) idx = 2*(i - 1) + 1 - threadidx = (i-1)%length(randstates) + 1 + threadidx = (i - 1) % length(randstates) + 1 U1 = gpu_rand(T, threadidx, randstates) U2 = gpu_rand(T, threadidx, randstates) Z0 = sqrt(T(-2.0)*log(U1))*cos(T(2pi)*U2) diff --git a/test/testsuite/random.jl b/test/testsuite/random.jl index cec0a74b..78156bf5 100644 --- a/test/testsuite/random.jl +++ b/test/testsuite/random.jl @@ -7,7 +7,7 @@ cpu_rng = Random.default_rng() @testset "rand" begin # uniform - for T in eltypes, d in (10, (10,10), (128,128)) + for T in eltypes, d in (10, (10, 10), (128, 128)) A = AT{T}(undef, d) B = copy(A) rand!(rng, A) From 4e37a541f49ee905a2a71302f6b169178eaf9f41 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Mon, 1 Sep 2025 16:38:14 -0400 Subject: [PATCH 3/6] test: see which testset fails --- src/host/random.jl | 2 +- test/testsuite/random.jl | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/host/random.jl b/src/host/random.jl index a7419e40..7759e841 100644 --- a/src/host/random.jl +++ b/src/host/random.jl @@ -96,8 +96,8 @@ function Random.randn!(rng::RNG, A::AnyGPUArray{T}) where T <: Number threads = (length(A) - 1) ÷ 2 + 1 @kernel function randn!(a, randstates) i = @index(Global, Linear) + threadidx = @index(Local, Linear) idx = 2*(i - 1) + 1 - threadidx = (i - 1) % length(randstates) + 1 U1 = gpu_rand(T, threadidx, randstates) U2 = gpu_rand(T, threadidx, randstates) Z0 = sqrt(T(-2.0)*log(U1))*cos(T(2pi)*U2) diff --git a/test/testsuite/random.jl b/test/testsuite/random.jl index 78156bf5..99d54958 100644 --- a/test/testsuite/random.jl +++ b/test/testsuite/random.jl @@ -7,7 +7,7 @@ cpu_rng = Random.default_rng() @testset "rand" begin # uniform - for T in eltypes, d in (10, (10, 10), (128, 128)) + for T in eltypes, d in (10, (10,10)) A = AT{T}(undef, d) B = copy(A) rand!(rng, A) @@ -44,7 +44,7 @@ @testset "randn" begin # normally-distributed # XXX: randn calls sqrt, and Base's sqrt(::Complex) performs # checked type conversions that throw boxed numbers. - for T in filter(isrealfloattype, eltypes), d in (2, (2,2)) + for T in filter(isrealfloattype, eltypes), d in (2, (2, 2), (128, 128)) A = AT{T}(undef, d) B = copy(A) randn!(rng, A) From dc1974df380611732a61b5de32c96b75bd35aa32 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Wed, 3 Sep 2025 09:42:31 -0400 Subject: [PATCH 4/6] test: try using approx --- test/testsuite/random.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/testsuite/random.jl b/test/testsuite/random.jl index 99d54958..f9430258 100644 --- a/test/testsuite/random.jl +++ b/test/testsuite/random.jl @@ -56,7 +56,7 @@ randn!(rng, A) Random.seed!(rng, 1) randn!(rng, B) - @test Array(A) == Array(B) + @test Array(A) ≈ Array(B) if rng != cpu_rng randn!(cpu_rng, A) From 5d9d350a6ab358540f61116d23ecd2155bd9ed2b Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Fri, 24 Oct 2025 13:50:54 -0300 Subject: [PATCH 5/6] Fix workgroup size determinaiton --- lib/JLArrays/Project.toml | 2 +- lib/JLArrays/src/JLArrays.jl | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/JLArrays/Project.toml b/lib/JLArrays/Project.toml index 700a31aa..ec263f7c 100644 --- a/lib/JLArrays/Project.toml +++ b/lib/JLArrays/Project.toml @@ -1,7 +1,7 @@ name = "JLArrays" uuid = "27aeb0d3-9eb9-45fb-866b-73c2ecf80fcb" authors = ["Tim Besard "] -version = "0.3.0" +version = "0.3.1" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" diff --git a/lib/JLArrays/src/JLArrays.jl b/lib/JLArrays/src/JLArrays.jl index 4b238fa0..8f2fe024 100644 --- a/lib/JLArrays/src/JLArrays.jl +++ b/lib/JLArrays/src/JLArrays.jl @@ -377,7 +377,7 @@ KernelAbstractions.allocate(::JLBackend, ::Type{T}, dims::Tuple) where T = JLArr end if KernelAbstractions.workgroupsize(kernel) <: DynamicSize && workgroupsize === nothing - workgroupsize = (1024,) # Vectorization, 4x unrolling, minimal grain size + workgroupsize = (MAXTHREADS,) # Vectorization, 4x unrolling, minimal grain size end iterspace, dynamic = partition(kernel, ndrange, workgroupsize) # partition checked that the ndrange's agreed @@ -403,6 +403,7 @@ else end function (obj::Kernel{JLBackend})(args...; ndrange=nothing, workgroupsize=nothing) + ndrange, workgroupsize, _, _ = launch_config(obj, ndrange, workgroupsize) device_args = jlconvert.(args) new_obj = convert_to_cpu(obj) new_obj(device_args...; ndrange, workgroupsize) From 62797c8e4b9a5e8bc69e050ccfac490be7d3f8a3 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Fri, 24 Oct 2025 10:27:26 -0300 Subject: [PATCH 6/6] Mark broken tests broken --- test/testsuite/random.jl | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/test/testsuite/random.jl b/test/testsuite/random.jl index f9430258..6ecd235f 100644 --- a/test/testsuite/random.jl +++ b/test/testsuite/random.jl @@ -6,20 +6,25 @@ end cpu_rng = Random.default_rng() + SEEDING_BROKEN = (rng != cpu_rng) && !contains(string(AT), "JLArray") + @testset "rand" begin # uniform - for T in eltypes, d in (10, (10,10)) + @testset "$d $T" for T in eltypes, d in (10, (10, 10), (1024, 1024)) A = AT{T}(undef, d) B = copy(A) rand!(rng, A) rand!(rng, B) @test Array(A) != Array(B) + A = AT(rand(T, d)) + B = AT(rand(T, d)) + Random.seed!(rng) Random.seed!(rng, 1) rand!(rng, A) Random.seed!(rng, 1) rand!(rng, B) - @test all(Array(A) .== Array(B)) + @test Array(A) == Array(B) broken=SEEDING_BROKEN && (prod(d) > length(rng.state)) if rng != cpu_rng rand!(cpu_rng, A) @@ -44,19 +49,22 @@ @testset "randn" begin # normally-distributed # XXX: randn calls sqrt, and Base's sqrt(::Complex) performs # checked type conversions that throw boxed numbers. - for T in filter(isrealfloattype, eltypes), d in (2, (2, 2), (128, 128)) + @testset "$d $T" for T in filter(isrealfloattype, eltypes), d in (2, (2, 2), (1024, 1024)) A = AT{T}(undef, d) B = copy(A) randn!(rng, A) randn!(rng, B) @test Array(A) != Array(B) + A = AT(rand(T, d)) + B = AT(rand(T, d)) + Random.seed!(rng) Random.seed!(rng, 1) randn!(rng, A) Random.seed!(rng, 1) randn!(rng, B) - @test Array(A) ≈ Array(B) + @test Array(A) == Array(B) broken=SEEDING_BROKEN && (prod(d) > (2 * length(rng.state))) if rng != cpu_rng randn!(cpu_rng, A)