Skip to content

Commit 42a756a

Browse files
committed
Start on GPU extensions
1 parent 4ab4707 commit 42a756a

File tree

16 files changed

+2362
-33
lines changed

16 files changed

+2362
-33
lines changed

Project.toml

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,33 @@ TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6"
1818
VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8"
1919

2020
[weakdeps]
21+
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
22+
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
2123
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
2224
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
25+
cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"
26+
27+
[sources]
28+
GPUArrays = {rev = "ksh/more_diag", url = "https://github.com/JuliaGPU/GPUArrays.jl"}
29+
MatrixAlgebraKit = {rev = "ksh/tk", url = "https://github.com/QuantumKitHub/MatrixAlgebraKit.jl"}
30+
cuTENSOR = {subdir = "lib/cutensor", url = "https://github.com/JuliaGPU/CUDA.jl"}
2331

2432
[extensions]
33+
TensorKitAMDGPUExt = "AMDGPU"
34+
TensorKitCUDAExt = ["CUDA", "cuTENSOR"]
2535
TensorKitChainRulesCoreExt = "ChainRulesCore"
2636
TensorKitFiniteDifferencesExt = "FiniteDifferences"
2737

2838
[compat]
39+
AMDGPU = "2"
40+
Adapt = "4"
2941
Aqua = "0.6, 0.7, 0.8"
42+
CUDA = "5.8.4"
3043
ChainRulesCore = "1"
3144
ChainRulesTestUtils = "1"
3245
Combinatorics = "1"
3346
FiniteDifferences = "0.12"
47+
GPUArrays = "11.2.6"
3448
LRUCache = "1.0.2"
3549
LinearAlgebra = "1"
3650
MatrixAlgebraKit = "0.5.0"
@@ -39,26 +53,31 @@ PackageExtensionCompat = "1"
3953
Random = "1"
4054
ScopedValues = "1.3.0"
4155
Strided = "2"
42-
TensorKitSectors = "0.1.4, 0.2"
56+
TensorKitSectors = "0.3"
4357
TensorOperations = "5.1"
4458
Test = "1"
4559
TestExtras = "0.2,0.3"
4660
TupleTools = "1.1"
4761
VectorInterface = "0.4.8, 0.5"
4862
Zygote = "0.7"
63+
cuTENSOR = "2"
4964
julia = "1.10"
5065

5166
[extras]
67+
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
5268
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
69+
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
5370
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
5471
ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
5572
Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
5673
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
74+
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
5775
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
5876
TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2"
5977
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
6078
TestExtras = "5ed8adda-3752-4e41-b88a-e8b09835ee3a"
6179
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
80+
cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"
6281

6382
[targets]
64-
test = ["Aqua", "Combinatorics", "LinearAlgebra", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"]
83+
test = ["Adapt", "Aqua", "Combinatorics", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"]
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
module TensorKitAMDGPUExt
2+
3+
using AMDGPU, AMDGPU.CUBLAS, LinearAlgebra
4+
using AMDGPU: @allowscalar
5+
import AMDGPU: rand as rocrand, rand! as rocrand!, randn as rocrandn, randn! as rocrandn!
6+
7+
using TensorKit
8+
import TensorKit.VectorInterface: scalartype as vi_scalartype
9+
using TensorKit.Factorizations
10+
using TensorKit.Factorizations: AbstractAlgorithm
11+
using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap
12+
13+
using TensorKit.MatrixAlgebraKit
14+
15+
using Random
16+
17+
include("roctensormap.jl")
18+
19+
const ROCDiagonalTensorMap{T, S} = DiagonalTensorMap{T, S, ROCVector{T, AMDGPU.DeviceMemory}}
20+
21+
"""
22+
ROCDiagonalTensorMap{T}(undef, domain::S) where {T,S<:IndexSpace}
23+
# expert mode: select storage type `A`
24+
DiagonalTensorMap{T,S,A}(undef, domain::S) where {T,S<:IndexSpace,A<:DenseVector{T}}
25+
26+
Construct a `DiagonalTensorMap` with uninitialized data.
27+
"""
28+
function ROCDiagonalTensorMap{T}(::UndefInitializer, V::TensorMapSpace) where {T}
29+
(numin(V) == numout(V) == 1 && domain(V) == codomain(V)) ||
30+
throw(ArgumentError("DiagonalTensorMap requires a space with equal domain and codomain and 2 indices"))
31+
return ROCDiagonalTensorMap{T}(undef, domain(V))
32+
end
33+
function ROCDiagonalTensorMap{T}(::UndefInitializer, V::ProductSpace) where {T}
34+
length(V) == 1 ||
35+
throw(ArgumentError("DiagonalTensorMap requires `numin(d) == numout(d) == 1`"))
36+
return ROCDiagonalTensorMap{T}(undef, only(V))
37+
end
38+
function ROCDiagonalTensorMap{T}(::UndefInitializer, V::S) where {T,S<:IndexSpace}
39+
return ROCDiagonalTensorMap{T,S}(undef, V)
40+
end
41+
ROCDiagonalTensorMap(::UndefInitializer, V::IndexSpace) = ROCDiagonalTensorMap{Float64}(undef, V)
42+
43+
function ROCDiagonalTensorMap(data::ROCVector{T}, V::S) where {T, S}
44+
return ROCDiagonalTensorMap{T, S}(data, V)
45+
end
46+
47+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_full!), t::ROCDiagonalTensorMap, alg::DiagonalAlgorithm)
48+
V_cod = fuse(codomain(t))
49+
V_dom = fuse(domain(t))
50+
U = similar(t, codomain(t) V_cod)
51+
S = ROCDiagonalTensorMap{real(scalartype(t))}(undef, V_cod V_dom)
52+
Vᴴ = similar(t, V_dom domain(t))
53+
return U, S, Vᴴ
54+
end
55+
56+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_vals!), t::ROCTensorMap, alg::AbstractAlgorithm)
57+
V_cod = infimum(fuse(codomain(t)), fuse(domain(t)))
58+
return ROCDiagonalTensorMap{real(scalartype(t))}(undef, V_cod)
59+
end
60+
61+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_compact!), t::ROCTensorMap, ::AbstractAlgorithm)
62+
V_cod = V_dom = infimum(fuse(codomain(t)), fuse(domain(t)))
63+
U = similar(t, codomain(t) V_cod)
64+
S = ROCDiagonalTensorMap{real(scalartype(t))}(undef, V_cod)
65+
Vᴴ = similar(t, V_dom domain(t))
66+
return U, S, Vᴴ
67+
end
68+
69+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(eigh_full!), t::ROCTensorMap, ::AbstractAlgorithm)
70+
V_D = fuse(domain(t))
71+
T = real(scalartype(t))
72+
D = ROCDiagonalTensorMap{T}(undef, V_D)
73+
V = similar(t, codomain(t) V_D)
74+
return D, V
75+
end
76+
77+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(eig_full!), t::ROCTensorMap, ::AbstractAlgorithm)
78+
V_D = fuse(domain(t))
79+
Tc = complex(scalartype(t))
80+
D = ROCDiagonalTensorMap{Tc}(undef, V_D)
81+
V = similar(t, Tc, codomain(t) V_D)
82+
return D, V
83+
end
84+
85+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(eigh_vals!), t::ROCTensorMap, alg::AbstractAlgorithm)
86+
V_D = fuse(domain(t))
87+
T = real(scalartype(t))
88+
return D = ROCDiagonalTensorMap{Tc}(undef, V_D)
89+
end
90+
91+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(eig_vals!), t::ROCTensorMap, alg::AbstractAlgorithm)
92+
V_D = fuse(domain(t))
93+
Tc = complex(scalartype(t))
94+
return D = ROCDiagonalTensorMap{Tc}(undef, V_D)
95+
end
96+
97+
98+
# TODO
99+
# add VectorInterface extensions for proper AMDGPU promotion
100+
function TensorKit.VectorInterface.promote_add(TA::Type{<:AMDGPU.StridedROCMatrix{Tx}}, TB::Type{<:AMDGPU.StridedROCMatrix{Ty}}, α::Tα = TensorKit.VectorInterface.One(), β::Tβ = TensorKit.VectorInterface.One()) where {Tx, Ty, Tα, Tβ}
101+
return Base.promote_op(add, Tx, Ty, Tα, Tβ)
102+
end
103+
104+
end
Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
const ROCTensorMap{T,S,N₁,N₂} = TensorMap{T,S,N₁,N₂, ROCVector{T,AMDGPU.DeviceMemory}}
2+
const ROCTensor{T, S, N} = ROCTensorMap{T, S, N, 0}
3+
4+
const AdjointROCTensorMap{T,S,N₁,N₂} = AdjointTensorMap{T,S,N₁,N₂,ROCTensorMap{T,S,N₁,N₂}}
5+
6+
function TensorKit.tensormaptype(S::Type{<:IndexSpace}, N₁, N₂, TorA::Type{<:StridedROCArray})
7+
if TorA <: ROCArray
8+
return TensorMap{eltype(TorA),S,N₁,N₂,ROCVector{eltype(TorA), AMDGPU.DeviceMemory}}
9+
else
10+
throw(ArgumentError("argument $TorA should specify a scalar type (`<:Number`) or a storage type `<:ROCVector{<:Number}`"))
11+
end
12+
end
13+
14+
function ROCTensorMap{T}(::UndefInitializer, V::TensorMapSpace{S, N₁, N₂}) where {T, S, N₁, N₂}
15+
return ROCTensorMap{T,S,N₁,N₂}(undef, V)
16+
end
17+
18+
function ROCTensorMap{T}(::UndefInitializer, codomain::TensorSpace{S},
19+
domain::TensorSpace{S}) where {T,S}
20+
return ROCTensorMap{T}(undef, codomain domain)
21+
end
22+
function ROCTensor{T}(::UndefInitializer, V::TensorSpace{S}) where {T,S}
23+
return ROCTensorMap{T}(undef, V one(V))
24+
end
25+
# constructor starting from block data
26+
"""
27+
ROCTensorMap(data::AbstractDict{<:Sector,<:ROCMatrix}, codomain::ProductSpace{S,N₁},
28+
domain::ProductSpace{S,N₂}) where {S<:ElementarySpace,N₁,N₂}
29+
ROCTensorMap(data, codomain ← domain)
30+
ROCTensorMap(data, domain → codomain)
31+
32+
Construct a `ROCTensorMap` by explicitly specifying its block data.
33+
34+
## Arguments
35+
- `data::AbstractDict{<:Sector,<:ROCMatrix}`: dictionary containing the block data for
36+
each coupled sector `c` as a matrix of size `(blockdim(codomain, c), blockdim(domain, c))`.
37+
- `codomain::ProductSpace{S,N₁}`: the codomain as a `ProductSpace` of `N₁` spaces of type
38+
`S<:ElementarySpace`.
39+
- `domain::ProductSpace{S,N₂}`: the domain as a `ProductSpace` of `N₂` spaces of type
40+
`S<:ElementarySpace`.
41+
42+
Alternatively, the domain and codomain can be specified by passing a [`HomSpace`](@ref)
43+
using the syntax `codomain ← domain` or `domain → codomain`.
44+
"""
45+
function ROCTensorMap(data::AbstractDict{<:Sector,<:ROCArray},
46+
V::TensorMapSpace{S,N₁,N₂}) where {S,N₁,N₂}
47+
T = eltype(valtype(data))
48+
t = ROCTensorMap{T}(undef, V)
49+
for (c, b) in blocks(t)
50+
haskey(data, c) || throw(SectorMismatch("no data for block sector $c"))
51+
datac = data[c]
52+
size(datac) == size(b) ||
53+
throw(DimensionMismatch("wrong size of block for sector $c"))
54+
copy!(b, datac)
55+
end
56+
for (c, b) in data
57+
c blocksectors(t) || isempty(b) ||
58+
throw(SectorMismatch("data for block sector $c not expected"))
59+
end
60+
return t
61+
end
62+
function ROCTensorMap{T}(data::DenseVector{T}, codomain::TensorSpace{S},
63+
domain::TensorSpace{S}) where {T,S}
64+
return ROCTensorMap(data, codomain domain)
65+
end
66+
function ROCTensorMap(data::AbstractDict{<:Sector,<:ROCMatrix}, codom::TensorSpace{S},
67+
dom::TensorSpace{S}) where {S}
68+
return ROCTensorMap(data, codom dom)
69+
end
70+
71+
for (fname, felt) in ((:zeros, :zero), (:ones, :one))
72+
@eval begin
73+
function AMDGPU.$fname(codomain::TensorSpace{S},
74+
domain::TensorSpace{S}=one(codomain)) where {S<:IndexSpace}
75+
return AMDGPU.$fname(codomain domain)
76+
end
77+
function AMDGPU.$fname(::Type{T}, codomain::TensorSpace{S},
78+
domain::TensorSpace{S}=one(codomain)) where {T,S<:IndexSpace}
79+
return AMDGPU.$fname(T, codomain domain)
80+
end
81+
AMDGPU.$fname(V::TensorMapSpace) = AMDGPU.$fname(Float64, V)
82+
function AMDGPU.$fname(::Type{T}, V::TensorMapSpace) where {T}
83+
t = ROCTensorMap{T}(undef, V)
84+
fill!(t, $felt(T))
85+
return t
86+
end
87+
end
88+
end
89+
90+
for randfun in (:curand, :curandn)
91+
randfun! = Symbol(randfun, :!)
92+
@eval begin
93+
# converting `codomain` and `domain` into `HomSpace`
94+
function $randfun(codomain::TensorSpace{S},
95+
domain::TensorSpace{S}) where {S<:IndexSpace}
96+
return $randfun(codomain domain)
97+
end
98+
function $randfun(::Type{T}, codomain::TensorSpace{S},
99+
domain::TensorSpace{S}) where {T,S<:IndexSpace}
100+
return $randfun(T, codomain domain)
101+
end
102+
function $randfun(rng::Random.AbstractRNG, ::Type{T},
103+
codomain::TensorSpace{S},
104+
domain::TensorSpace{S}) where {T,S<:IndexSpace}
105+
return $randfun(rng, T, codomain domain)
106+
end
107+
108+
# accepting single `TensorSpace`
109+
$randfun(codomain::TensorSpace) = $randfun(codomain one(codomain))
110+
function $randfun(::Type{T}, codomain::TensorSpace) where {T}
111+
return $randfun(T, codomain one(codomain))
112+
end
113+
function $randfun(rng::Random.AbstractRNG, ::Type{T},
114+
codomain::TensorSpace) where {T}
115+
return $randfun(rng, T, codomain one(domain))
116+
end
117+
118+
# filling in default eltype
119+
$randfun(V::TensorMapSpace) = $randfun(Float64, V)
120+
function $randfun(rng::Random.AbstractRNG, V::TensorMapSpace)
121+
return $randfun(rng, Float64, V)
122+
end
123+
124+
# filling in default rng
125+
function $randfun(::Type{T}, V::TensorMapSpace) where {T}
126+
return $randfun(Random.default_rng(), T, V)
127+
end
128+
129+
# implementation
130+
function $randfun(rng::Random.AbstractRNG, ::Type{T},
131+
V::TensorMapSpace) where {T}
132+
t = ROCTensorMap{T}(undef, V)
133+
$randfun!(rng, t)
134+
return t
135+
end
136+
end
137+
end
138+
139+
# converters
140+
# ----------
141+
function Base.convert(::Type{ROCTensorMap}, d::Dict{Symbol,Any})
142+
try
143+
codomain = eval(Meta.parse(d[:codomain]))
144+
domain = eval(Meta.parse(d[:domain]))
145+
data = SectorDict(eval(Meta.parse(c)) => ROCArray(b) for (c, b) in d[:data])
146+
return ROCTensorMap(data, codomain, domain)
147+
catch e # sector unknown in TensorKit.jl; user-defined, hopefully accessible in Main
148+
codomain = Base.eval(Main, Meta.parse(d[:codomain]))
149+
domain = Base.eval(Main, Meta.parse(d[:domain]))
150+
data = SectorDict(Base.eval(Main, Meta.parse(c)) => ROCArray(b)
151+
for (c, b) in d[:data])
152+
return ROCTensorMap(data, codomain, domain)
153+
end
154+
end
155+
156+
function Base.convert(::Type{ROCTensorMap}, t::AbstractTensorMap)
157+
return copy!(ROCTensorMap{scalartype(t)}(undef, space(t)), t)
158+
end
159+
160+
# Scalar implementation
161+
#-----------------------
162+
function TensorKit.scalar(t::ROCTensorMap)
163+
164+
# TODO: should scalar only work if N₁ == N₂ == 0?
165+
return @allowscalar dim(codomain(t)) == dim(domain(t)) == 1 ?
166+
first(blocks(t))[2][1, 1] : throw(DimensionMismatch())
167+
end
168+
169+
TensorKit.scalartype(A::StridedROCArray{T}) where {T} = T
170+
vi_scalartype(::Type{<:ROCTensorMap{T}}) where {T} = T
171+
vi_scalartype(::Type{<:ROCArray{T}}) where {T} = T
172+
173+
function TensorKit.similarstoragetype(TT::Type{<:ROCTensorMap{TTT,S,N₁,N₂}}, ::Type{T}) where {TTT,T,S,N₁,N₂}
174+
return ROCVector{T, AMDGPU.DeviceMemory}
175+
end
176+
177+
function Base.convert(TT::Type{ROCTensorMap{T,S,N₁,N₂}},
178+
t::AbstractTensorMap{<:Any,S,N₁,N₂}) where {T,S,N₁,N₂}
179+
if typeof(t) === TT
180+
return t
181+
else
182+
tnew = TT(undef, space(t))
183+
return copy!(tnew, t)
184+
end
185+
end
186+
187+
function Base.copy!(tdst::ROCTensorMap{T, S, N₁, N₂}, tsrc::ROCTensorMap{T, S, N₁, N₂}) where {T, S, N₁, N₂}
188+
space(tdst) == space(tsrc) || throw(SpaceMismatch("$(space(tdst))$(space(tsrc))"))
189+
for ((c, bdst), (_, bsrc)) in zip(blocks(tdst), blocks(tsrc))
190+
copy!(bdst, bsrc)
191+
end
192+
return tdst
193+
end
194+
195+
function Base.copy!(tdst::ROCTensorMap, tsrc::TensorKit.AdjointTensorMap)
196+
space(tdst) == space(tsrc) || throw(SpaceMismatch("$(space(tdst))$(space(tsrc))"))
197+
for ((c, bdst), (_, bsrc)) in zip(blocks(tdst), blocks(tsrc))
198+
copy!(bdst, bsrc)
199+
end
200+
return tdst
201+
end
202+
203+
function Base.promote_rule(::Type{<:TT₁},
204+
::Type{<:TT₂}) where {S,N₁,N₂, TTT₁, TTT₂,
205+
TT₁<:ROCTensorMap{TTT₁,S,N₁,N₂},
206+
TT₂<:ROCTensorMap{TTT₂,S,N₁,N₂}}
207+
T = TensorKit.VectorInterface.promote_add(TTT₁, TTT₂)
208+
return ROCTensorMap{T,S,N₁,N₂}
209+
end
210+
211+
function LinearAlgebra.isposdef(t::ROCTensorMap)
212+
domain(t) == codomain(t) ||
213+
throw(SpaceMismatch("`isposdef` requires domain and codomain to be the same"))
214+
InnerProductStyle(spacetype(t)) === EuclideanInnerProduct() || return false
215+
for (c, b) in blocks(t)
216+
# do our own hermitian check
217+
isherm = TensorKit.MatrixAlgebraKit.ishermitian(b; atol=eps(real(eltype(b))), rtol=eps(real(eltype(b))))
218+
isherm || return false
219+
isposdef(Hermitian(b)) || return false
220+
end
221+
return true
222+
end

0 commit comments

Comments
 (0)