Respond to comments

kshyatt · kshyatt · commit a55a95cc0855 · 2025-10-24T09:15:25.000+02:00
diff --git a/README.md b/README.md
@@ -31,3 +31,61 @@ This package is the counterpart of Julia's `AbstractArray` interface, but for GP
 types: It provides functionality and tooling to speed-up development of new GPU array types.
 **This package is not intended for end users!** Instead, you should use one of the packages
 that builds on GPUArrays.jl, such as [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl), [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl), [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl), or [Metal.jl](https://github.com/JuliaGPU/Metal.jl).
+
+## Interface methods
+
+To support a new GPU backend, you will need to implement various interface methods for your backend's array types.
+Some (CPU based) examples can be see in the testing library `JLArrays` (located in the `lib` directory of this package).
+
+### Dense array support
+
+### Sparse array support (optional)
+
+`GPUArrays.jl` provides **device-side** array types for `CSC`, `CSR`, `COO`, and `BSR` matrices, as well as sparse vectors.
+It also provides abstract types for these layouts that you can create concrete child types of in order to benefit from the
+backend-agnostic wrappers. In particular, `GPUArrays.jl` provides out-of-the-box support for broadcasting and `mapreduce` over
+GPU sparse arrays.
+
+For **host-side** types, your custom sparse types should implement:
+
+- `dense_array_type` - the corresponding dense array type. For example, for a `CuSparseVector` or `CuSparseMatrixCXX`, the `dense_array_type` is `CuArray`
+- `sparse_array_type` - the **untyped** sparse array type corresponding to a given parametrized type. A `CuSparseVector{Tv, Ti}` would have a `sparse_array_type` of `CuVector` -- note the lack of type parameters!
+- `csc_type(::Type{T})` - the compressed sparse column type for your backend. A `CuSparseMatrixCSR` would have a `csc_type` of `CuSparseMatrixCSC`. 
+- `csr_type(::Type{T})` - the compressed sparse row type for your backend. A `CuSparseMatrixCSC` would have a `csr_type` of `CuSparseMatrixCSR`. 
+- `coo_type(::Type{T})` - the coordinate sparse matrix type for your backend. A `CuSparseMatrixCSC` would have a `coo_type` of `CuSparseMatrixCOO`.
+
+Additionally, you need to teach `GPUArrays.jl` how to translate your backend's specific types onto the device. `GPUArrays.jl` provides the device-side types:
+
+- `GPUSparseDeviceVector`
+- `GPUSparseDeviceMatrixCSC`
+- `GPUSparseDeviceMatrixCSR`
+- `GPUSparseDeviceMatrixBSR`
+- `GPUSparseDeviceMatrixCOO`
+
+You will need to create a method of `Adapt.adapt_structure` for each format your backend supports. **Note** that if your backend supports separate address spaces,
+as CUDA and ROCm do, you need to provide a parameter to these device-side arrays to indicate in which address space the underlying pointers live. An example of adapting
+an array to the device-side struct:
+
+```julia
+function GPUArrays.GPUSparseDeviceVector(iPtr::MyDeviceVector{Ti, A},
+                                         nzVal::MyDeviceVector{Tv, A},
+                                         len::Int,
+                                         nnz::Ti) where {Ti, Tv, A}
+    GPUArrays.GPUSparseDeviceVector{Tv, Ti, MyDeviceVector{Ti, A}, MyDeviceVector{Tv, A}, A}(iPtr, nzVal, len, nnz)
+end
+
+function Adapt.adapt_structure(to::MyAdaptor, x::MySparseVector)
+    return GPUArrays.GPUSparseDeviceVector(
+        adapt(to, x.iPtr),
+        adapt(to, x.nzVal),
+        length(x), x.nnz
+    )
+end
+```
+
+You'll also need to inform `GPUArrays.jl` and `GPUCompiler.jl` how to adapt your sparse arrays by extending `KernelAbstractions.jl`'s `get_backend()`:
+
+```julia
+KA.get_backend(::MySparseVector) = MyBackend()
+```  
+``` 
diff --git a/lib/JLArrays/src/JLArrays.jl b/lib/JLArrays/src/JLArrays.jl
@@ -13,7 +13,7 @@ using GPUArrays
 using Adapt
 using SparseArrays, LinearAlgebra
 
-import GPUArrays: _dense_array_type
+import GPUArrays: dense_array_type
 
 import KernelAbstractions
 import KernelAbstractions: Adapt, StaticArrays, Backend, Kernel, StaticSize, DynamicSize, partition, blocks, workitems, launch_config
@@ -198,27 +198,27 @@ function Base.getindex(A::JLSparseMatrixCSR{Tv, Ti}, i0::Integer, i1::Integer) w
 end
 
 GPUArrays.storage(a::JLArray) = a.data
-GPUArrays._dense_array_type(a::JLArray{T, N}) where {T, N} = JLArray{T, N}
-GPUArrays._dense_array_type(::Type{JLArray{T, N}}) where {T, N} = JLArray{T, N}
-GPUArrays._dense_vector_type(a::JLArray{T, N}) where {T, N} = JLArray{T, 1}
-GPUArrays._dense_vector_type(::Type{JLArray{T, N}}) where {T, N} = JLArray{T, 1}
-
-GPUArrays._sparse_array_type(sa::JLSparseMatrixCSC) = JLSparseMatrixCSC
-GPUArrays._sparse_array_type(::Type{<:JLSparseMatrixCSC}) = JLSparseMatrixCSC
-GPUArrays._sparse_array_type(sa::JLSparseMatrixCSR) = JLSparseMatrixCSR
-GPUArrays._sparse_array_type(::Type{<:JLSparseMatrixCSR}) = JLSparseMatrixCSR
-GPUArrays._sparse_array_type(sa::JLSparseVector) = JLSparseVector
-GPUArrays._sparse_array_type(::Type{<:JLSparseVector}) = JLSparseVector
-
-GPUArrays._dense_array_type(sa::JLSparseVector) = JLArray 
-GPUArrays._dense_array_type(::Type{<:JLSparseVector}) = JLArray 
-GPUArrays._dense_array_type(sa::JLSparseMatrixCSC) = JLArray 
-GPUArrays._dense_array_type(::Type{<:JLSparseMatrixCSC}) = JLArray 
-GPUArrays._dense_array_type(sa::JLSparseMatrixCSR) = JLArray 
-GPUArrays._dense_array_type(::Type{<:JLSparseMatrixCSR}) = JLArray 
-
-GPUArrays._csc_type(sa::JLSparseMatrixCSR) = JLSparseMatrixCSC
-GPUArrays._csr_type(sa::JLSparseMatrixCSC) = JLSparseMatrixCSR
+GPUArrays.dense_array_type(a::JLArray{T, N}) where {T, N} = JLArray{T, N}
+GPUArrays.dense_array_type(::Type{JLArray{T, N}}) where {T, N} = JLArray{T, N}
+GPUArrays.dense_vector_type(a::JLArray{T, N}) where {T, N} = JLArray{T, 1}
+GPUArrays.dense_vector_type(::Type{JLArray{T, N}}) where {T, N} = JLArray{T, 1}
+
+GPUArrays.sparse_array_type(sa::JLSparseMatrixCSC) = JLSparseMatrixCSC
+GPUArrays.sparse_array_type(::Type{<:JLSparseMatrixCSC}) = JLSparseMatrixCSC
+GPUArrays.sparse_array_type(sa::JLSparseMatrixCSR) = JLSparseMatrixCSR
+GPUArrays.sparse_array_type(::Type{<:JLSparseMatrixCSR}) = JLSparseMatrixCSR
+GPUArrays.sparse_array_type(sa::JLSparseVector) = JLSparseVector
+GPUArrays.sparse_array_type(::Type{<:JLSparseVector}) = JLSparseVector
+
+GPUArrays.dense_array_type(sa::JLSparseVector) = JLArray 
+GPUArrays.dense_array_type(::Type{<:JLSparseVector}) = JLArray 
+GPUArrays.dense_array_type(sa::JLSparseMatrixCSC) = JLArray 
+GPUArrays.dense_array_type(::Type{<:JLSparseMatrixCSC}) = JLArray 
+GPUArrays.dense_array_type(sa::JLSparseMatrixCSR) = JLArray 
+GPUArrays.dense_array_type(::Type{<:JLSparseMatrixCSR}) = JLArray 
+
+GPUArrays.csc_type(sa::JLSparseMatrixCSR) = JLSparseMatrixCSC
+GPUArrays.csr_type(sa::JLSparseMatrixCSC) = JLSparseMatrixCSR
 
 # conversion of untyped data to a typed Array
 function typed_data(x::JLArray{T}) where {T}
@@ -361,6 +361,23 @@ end
 Base.length(x::JLSparseMatrixCSR) = prod(x.dims)
 Base.size(x::JLSparseMatrixCSR) = x.dims
 
+function GPUArrays._spadjoint(A::JLSparseMatrixCSR)
+    Aᴴ = JLSparseMatrixCSC(A.rowPtr, A.colVal, conj(A.nzVal), reverse(size(A)))
+    JLSparseMatrixCSR(Aᴴ)
+end
+function GPUArrays._sptranspose(A::JLSparseMatrixCSR)
+    Aᵀ = JLSparseMatrixCSC(A.rowPtr, A.colVal, A.nzVal, reverse(size(A)))
+    JLSparseMatrixCSR(Aᵀ)
+end
+function _spadjoint(A::JLSparseMatrixCSC)
+    Aᴴ = JLSparseMatrixCSR(A.colPtr, A.rowVal, conj(A.nzVal), reverse(size(A)))
+    JLSparseMatrixCSC(Aᴴ)
+end
+function _sptranspose(A::JLSparseMatrixCSC)
+    Aᵀ = JLSparseMatrixCSR(A.colPtr, A.rowVal, A.nzVal, reverse(size(A)))
+    JLSparseMatrixCSC(Aᵀ)
+end
+
 # idempotency
 JLArray{T,N}(xs::JLArray{T,N}) where {T,N} = xs
 
diff --git a/src/host/sparse.jl b/src/host/sparse.jl
@@ -23,21 +23,21 @@ SparseArrays.getcolptr(S::AbstractGPUSparseMatrixCSC) = S.colPtr
 
 Base.convert(T::Type{<:AbstractGPUSparseArray}, m::AbstractArray) = m isa T ? m : T(m)
 
-_dense_array_type(sa::SparseVector)     = SparseVector
-_dense_array_type(::Type{SparseVector}) = SparseVector
-_sparse_array_type(sa::SparseVector) = SparseVector
-_dense_vector_type(sa::AbstractSparseArray) = Vector
-_dense_vector_type(sa::AbstractArray)       = Vector
-_dense_vector_type(::Type{<:AbstractSparseArray}) = Vector
-_dense_vector_type(::Type{<:AbstractArray})       = Vector
-_dense_array_type(sa::SparseMatrixCSC)     = SparseMatrixCSC
-_dense_array_type(::Type{SparseMatrixCSC}) = SparseMatrixCSC
-_sparse_array_type(sa::SparseMatrixCSC)    = SparseMatrixCSC
-
-function _sparse_array_type(sa::AbstractGPUSparseArray) end
-function _dense_array_type(sa::AbstractGPUSparseArray) end
-function _coo_type(sa::AbstractGPUSparseArray) end
-_coo_type(::SA) where {SA<:AbstractGPUSparseMatrixCSC} = SA
+dense_array_type(sa::SparseVector)     = SparseVector
+dense_array_type(::Type{SparseVector}) = SparseVector
+sparse_array_type(sa::SparseVector) = SparseVector
+dense_vector_type(sa::AbstractSparseArray) = Vector
+dense_vector_type(sa::AbstractArray)       = Vector
+dense_vector_type(::Type{<:AbstractSparseArray}) = Vector
+dense_vector_type(::Type{<:AbstractArray})       = Vector
+dense_array_type(sa::SparseMatrixCSC)     = SparseMatrixCSC
+dense_array_type(::Type{SparseMatrixCSC}) = SparseMatrixCSC
+sparse_array_type(sa::SparseMatrixCSC)    = SparseMatrixCSC
+
+function sparse_array_type(sa::AbstractGPUSparseArray) end
+function dense_array_type(sa::AbstractGPUSparseArray) end
+function coo_type(sa::AbstractGPUSparseArray) end
+coo_type(::SA) where {SA<:AbstractGPUSparseMatrixCSC} = SA
 
 function _spadjoint end
 function _sptranspose end
@@ -908,8 +908,8 @@ end
 end
 ## COV_EXCL_STOP
 
-function _csc_type end
-function _csr_type end
+function csc_type end
+function csr_type end
 
 # TODO: implement mapreducedim!
 function Base.mapreduce(f, op, A::AbstractGPUSparseMatrix; dims=:, init=nothing)