Skip to content

Commit 0d8a6d0

Browse files
committed
Start on GPU extensions
1 parent bbeb8e5 commit 0d8a6d0

File tree

21 files changed

+2976
-80
lines changed

21 files changed

+2976
-80
lines changed

Project.toml

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,35 @@ TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6"
1919
VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8"
2020

2121
[weakdeps]
22+
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
23+
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
2224
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
2325
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
26+
cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"
27+
28+
[sources]
29+
GPUArrays = {rev = "master", url = "https://github.com/JuliaGPU/GPUArrays.jl"}
30+
MatrixAlgebraKit = {rev = "ksh/tk", url = "https://github.com/QuantumKitHub/MatrixAlgebraKit.jl"}
31+
AMDGPU = {rev = "master", url = "https://github.com/JuliaGPU/AMDGPU.jl"}
32+
cuTENSOR = {subdir = "lib/cutensor", url = "https://github.com/JuliaGPU/CUDA.jl", rev="master"}
2433

2534
[extensions]
35+
TensorKitAMDGPUExt = "AMDGPU"
36+
TensorKitCUDAExt = ["CUDA", "cuTENSOR"]
2637
TensorKitChainRulesCoreExt = "ChainRulesCore"
2738
TensorKitFiniteDifferencesExt = "FiniteDifferences"
2839

2940
[compat]
41+
AMDGPU = "2"
42+
Adapt = "4"
3043
Aqua = "0.6, 0.7, 0.8"
3144
ArgParse = "1.2.0"
45+
CUDA = "5.9"
3246
ChainRulesCore = "1"
3347
ChainRulesTestUtils = "1"
3448
Combinatorics = "1"
3549
FiniteDifferences = "0.12"
50+
GPUArrays = "11.2.6"
3651
LRUCache = "1.0.2"
3752
LinearAlgebra = "1"
3853
MatrixAlgebraKit = "0.5.0"
@@ -50,21 +65,27 @@ TestExtras = "0.2,0.3"
5065
TupleTools = "1.1"
5166
VectorInterface = "0.4.8, 0.5"
5267
Zygote = "0.7"
68+
cuTENSOR = "2"
5369
julia = "1.10"
5470

5571
[extras]
56-
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
72+
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
73+
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
5774
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
75+
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
76+
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
5877
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
5978
ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
6079
Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
6180
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
81+
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
6282
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
6383
SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
6484
TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2"
6585
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
6686
TestExtras = "5ed8adda-3752-4e41-b88a-e8b09835ee3a"
6787
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
88+
cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"
6889

6990
[targets]
70-
test = ["ArgParse", "Aqua", "Combinatorics", "LinearAlgebra", "TensorOperations", "Test", "TestExtras", "SafeTestsets", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"]
91+
test = ["ArgParse", "Adapt", "AMDGPU", "Aqua", "Combinatorics", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "SafeTestsets", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"]
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
module TensorKitAMDGPUExt
2+
3+
using AMDGPU, AMDGPU.rocBLAS, LinearAlgebra
4+
using AMDGPU: @allowscalar
5+
import AMDGPU: rand as rocrand, rand! as rocrand!, randn as rocrandn, randn! as rocrandn!
6+
7+
using TensorKit
8+
import TensorKit.VectorInterface: scalartype as vi_scalartype
9+
using TensorKit.Factorizations
10+
using TensorKit.Strided
11+
using TensorKit.Factorizations: AbstractAlgorithm
12+
using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap
13+
14+
using TensorKit.MatrixAlgebraKit
15+
16+
using Random
17+
18+
include("roctensormap.jl")
19+
20+
const ROCDiagonalTensorMap{T, S} = DiagonalTensorMap{T, S, ROCVector{T, AMDGPU.Mem.HIPBuffer}}
21+
22+
"""
23+
ROCDiagonalTensorMap{T}(undef, domain::S) where {T,S<:IndexSpace}
24+
# expert mode: select storage type `A`
25+
DiagonalTensorMap{T,S,A}(undef, domain::S) where {T,S<:IndexSpace,A<:DenseVector{T}}
26+
27+
Construct a `DiagonalTensorMap` with uninitialized data.
28+
"""
29+
function ROCDiagonalTensorMap{T}(::UndefInitializer, V::TensorMapSpace) where {T}
30+
(numin(V) == numout(V) == 1 && domain(V) == codomain(V)) ||
31+
throw(ArgumentError("DiagonalTensorMap requires a space with equal domain and codomain and 2 indices"))
32+
return ROCDiagonalTensorMap{T}(undef, domain(V))
33+
end
34+
function ROCDiagonalTensorMap{T}(::UndefInitializer, V::ProductSpace) where {T}
35+
length(V) == 1 ||
36+
throw(ArgumentError("DiagonalTensorMap requires `numin(d) == numout(d) == 1`"))
37+
return ROCDiagonalTensorMap{T}(undef, only(V))
38+
end
39+
function ROCDiagonalTensorMap{T}(::UndefInitializer, V::S) where {T, S <: IndexSpace}
40+
return ROCDiagonalTensorMap{T, S}(undef, V)
41+
end
42+
ROCDiagonalTensorMap(::UndefInitializer, V::IndexSpace) = ROCDiagonalTensorMap{Float64}(undef, V)
43+
44+
function ROCDiagonalTensorMap(data::ROCVector{T}, V::S) where {T, S}
45+
return ROCDiagonalTensorMap{T, S}(data, V)
46+
end
47+
48+
function ROCDiagonalTensorMap(data::Vector{T}, V::S) where {T, S}
49+
return ROCDiagonalTensorMap{T, S}(ROCVector{T}(data), V)
50+
end
51+
52+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_full!), t::ROCDiagonalTensorMap, alg::DiagonalAlgorithm)
53+
V_cod = fuse(codomain(t))
54+
V_dom = fuse(domain(t))
55+
U = similar(t, codomain(t) V_cod)
56+
S = ROCDiagonalTensorMap{real(scalartype(t))}(undef, V_cod V_dom)
57+
Vᴴ = similar(t, V_dom domain(t))
58+
return U, S, Vᴴ
59+
end
60+
61+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_vals!), t::ROCTensorMap, alg::AbstractAlgorithm)
62+
V_cod = infimum(fuse(codomain(t)), fuse(domain(t)))
63+
return ROCDiagonalTensorMap{real(scalartype(t))}(undef, V_cod)
64+
end
65+
66+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_compact!), t::ROCTensorMap, ::AbstractAlgorithm)
67+
V_cod = V_dom = infimum(fuse(codomain(t)), fuse(domain(t)))
68+
U = similar(t, codomain(t) V_cod)
69+
S = ROCDiagonalTensorMap{real(scalartype(t))}(undef, V_cod)
70+
Vᴴ = similar(t, V_dom domain(t))
71+
return U, S, Vᴴ
72+
end
73+
74+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(eigh_full!), t::ROCTensorMap, ::AbstractAlgorithm)
75+
V_D = fuse(domain(t))
76+
T = real(scalartype(t))
77+
D = ROCDiagonalTensorMap{T}(undef, V_D)
78+
V = similar(t, codomain(t) V_D)
79+
return D, V
80+
end
81+
82+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(eig_full!), t::ROCTensorMap, ::AbstractAlgorithm)
83+
V_D = fuse(domain(t))
84+
Tc = complex(scalartype(t))
85+
D = ROCDiagonalTensorMap{Tc}(undef, V_D)
86+
V = similar(t, Tc, codomain(t) V_D)
87+
return D, V
88+
end
89+
90+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(eigh_vals!), t::ROCTensorMap, alg::AbstractAlgorithm)
91+
V_D = fuse(domain(t))
92+
T = real(scalartype(t))
93+
return D = ROCDiagonalTensorMap{Tc}(undef, V_D)
94+
end
95+
96+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(eig_vals!), t::ROCTensorMap, alg::AbstractAlgorithm)
97+
V_D = fuse(domain(t))
98+
Tc = complex(scalartype(t))
99+
return D = ROCDiagonalTensorMap{Tc}(undef, V_D)
100+
end
101+
102+
103+
# TODO
104+
# add VectorInterface extensions for proper AMDGPU promotion
105+
function TensorKit.VectorInterface.promote_add(TA::Type{<:AMDGPU.StridedROCMatrix{Tx}}, TB::Type{<:AMDGPU.StridedROCMatrix{Ty}}, α::Tα = TensorKit.VectorInterface.One(), β::Tβ = TensorKit.VectorInterface.One()) where {Tx, Ty, Tα, Tβ}
106+
return Base.promote_op(add, Tx, Ty, Tα, Tβ)
107+
end
108+
109+
end

0 commit comments

Comments
 (0)