diff --git a/src/implementations/svd.jl b/src/implementations/svd.jl
index c8b27f3f..222c7aee 100644
--- a/src/implementations/svd.jl
+++ b/src/implementations/svd.jl
@@ -218,6 +218,11 @@ function svd_full!(A::AbstractMatrix, USVᴴ, alg::DiagonalAlgorithm)
     check_input(svd_full!, A, USVᴴ, alg)
     Ad = diagview(A)
     U, S, Vᴴ = USVᴴ
+    if isempty(Ad)
+        one!(U)
+        one!(Vᴴ)
+        return USVᴴ
+    end
     p = sortperm(Ad; by = abs, rev = true)
     zero!(U)
     zero!(Vᴴ)
diff --git a/test/amd/eigh.jl b/test/amd/eigh.jl
index a9bd4c0c..4d23f128 100644
--- a/test/amd/eigh.jl
+++ b/test/amd/eigh.jl
@@ -6,7 +6,9 @@ using LinearAlgebra: LinearAlgebra, Diagonal, I
 using MatrixAlgebraKit: TruncatedAlgorithm, diagview
 using AMDGPU
 
-@testset "eigh_full! for T = $T" for T in (Float32, Float64, ComplexF32, ComplexF64)
+BLASFloats = (Float32, Float64, ComplexF32, ComplexF64)
+
+@testset "eigh_full! for T = $T" for T in BLASFloats
     rng = StableRNG(123)
     m = 54
     for alg in (
@@ -32,11 +34,11 @@ using AMDGPU
     end
 end
 
-#=@testset "eigh_trunc! for T = $T" for T in (Float32, Float64, ComplexF32, ComplexF64)
+#=@testset "eigh_trunc! for T = $T" for T in BLASFloats 
     rng = StableRNG(123)
     m = 54
-    for alg in (CUSOLVER_QRIteration(),
-                CUSOLVER_DivideAndConquer(),
+    for alg in (ROCSOLVER_QRIteration(),
+                ROCSOLVER_DivideAndConquer(),
                 )
         A = ROCArray(randn(rng, T, m, m))
         A = A * A'
@@ -64,18 +66,40 @@ end
     end
 end
 
-@testset "eigh_trunc! specify truncation algorithm T = $T" for T in
-                                                               (Float32, Float64,
-                                                                ComplexF32,
-                                                                ComplexF64)
+@testset "eigh_trunc! specify truncation algorithm T = $T" for T in BLASFloats
     rng = StableRNG(123)
     m = 4
     V = qr_compact(ROCArray(randn(rng, T, m, m)))[1]
     D = Diagonal([0.9, 0.3, 0.1, 0.01])
     A = V * D * V'
     A = (A + A') / 2
-    alg = TruncatedAlgorithm(CUSOLVER_QRIteration(), truncrank(2))
+    alg = TruncatedAlgorithm(ROCSOLVER_QRIteration(), truncrank(2))
     D2, V2, ϵ2 = @constinferred eigh_trunc(A; alg)
     @test diagview(D2) ≈ diagview(D)[1:2] rtol = sqrt(eps(real(T)))
     @test_throws ArgumentError eigh_trunc(A; alg, trunc=(; maxrank=2))
 end=#
+
+@testset "eigh for Diagonal{$T}" for T in BLASFloats
+    rng = StableRNG(123)
+    m = 54
+    Ad = randn(rng, T, m)
+    Ad .+= conj.(Ad)
+    A = Diagonal(ROCArray(Ad))
+    atol = sqrt(eps(real(T)))
+
+    D, V = @constinferred eigh_full(A)
+    @test D isa Diagonal{real(T)} && size(D) == size(A)
+    @test V isa Diagonal{T} && size(V) == size(A)
+    @test A * V ≈ V * D
+
+    D2 = @constinferred eigh_vals(A)
+    @test D2 isa AbstractVector{real(T)} && length(D2) == m
+    @test diagview(D) ≈ D2
+
+    # TODO partialsortperm
+    #=A2 = Diagonal(ROCArray(T[0.9, 0.3, 0.1, 0.01]))
+    alg = TruncatedAlgorithm(DiagonalAlgorithm(), truncrank(2))
+    D2, V2, ϵ2 = @constinferred eigh_trunc(A2; alg)
+    @test diagview(D2) ≈ diagview(A2)[1:2]
+    @test ϵ2 ≈ norm(diagview(A2)[3:4]) atol = atol=#
+end
diff --git a/test/amd/lq.jl b/test/amd/lq.jl
index a3dc8e53..ef46bbe9 100644
--- a/test/amd/lq.jl
+++ b/test/amd/lq.jl
@@ -4,10 +4,13 @@ using Test
 using TestExtras
 using StableRNGs
 using AMDGPU
+using LinearAlgebra
 
 include(joinpath("..", "utilities.jl"))
 
-@testset "lq_compact! for T = $T" for T in (Float32, Float64, ComplexF32, ComplexF64)
+BLASFloats = (Float32, Float64, ComplexF32, ComplexF64)
+
+@testset "lq_compact! for T = $T" for T in BLASFloats
     rng = StableRNG(123)
     m = 54
     for n in (37, m, 63)
@@ -65,7 +68,7 @@ include(joinpath("..", "utilities.jl"))
     end
 end
 
-@testset "lq_full! for T = $T" for T in (Float32, Float64, ComplexF32, ComplexF64)
+@testset "lq_full! for T = $T" for T in BLASFloats
     rng = StableRNG(123)
     m = 54
     for n in (37, m, 63)
@@ -115,3 +118,46 @@ end
         @test_throws ArgumentError lq_full!(copy!(Ac, A), (L, Q); blocksize = 8)
     end
 end
+
+@testset "lq_compact, lq_full and lq_null for Diagonal{$T}" for T in BLASFloats
+    rng = StableRNG(123)
+    atol = eps(real(T))^(3 / 4)
+    for m in (54, 0)
+        Ad = ROCArray(randn(rng, T, m))
+        A = Diagonal(Ad)
+
+        # compact
+        L, Q = @constinferred lq_compact(A)
+        @test Q isa Diagonal{T} && size(Q) == (m, m)
+        @test L isa Diagonal{T} && size(L) == (m, m)
+        @test L * Q ≈ A
+        @test isunitary(Q)
+
+        # compact and positive
+        Lp, Qp = @constinferred lq_compact(A; positive = true)
+        @test Qp isa Diagonal{T} && size(Qp) == (m, m)
+        @test Lp isa Diagonal{T} && size(Lp) == (m, m)
+        @test Lp * Qp ≈ A
+        @test isunitary(Qp)
+        @test all(isposdef.(diagview(Lp)))
+
+        # full
+        L, Q = @constinferred lq_full(A)
+        @test Q isa Diagonal{T} && size(Q) == (m, m)
+        @test L isa Diagonal{T} && size(L) == (m, m)
+        @test L * Q ≈ A
+        @test isunitary(Q)
+
+        # full and positive
+        Lp, Qp = @constinferred lq_full(A; positive = true)
+        @test Qp isa Diagonal{T} && size(Qp) == (m, m)
+        @test Lp isa Diagonal{T} && size(Lp) == (m, m)
+        @test Lp * Qp ≈ A
+        @test isunitary(Qp)
+        @test all(isposdef.(diagview(Lp)))
+
+        # null
+        N = @constinferred lq_null(A)
+        @test N isa AbstractMatrix{T} && size(N) == (0, m)
+    end
+end
diff --git a/test/amd/qr.jl b/test/amd/qr.jl
index 542f5858..b0708ae1 100644
--- a/test/amd/qr.jl
+++ b/test/amd/qr.jl
@@ -4,12 +4,13 @@ using Test
 using TestExtras
 using StableRNGs
 using AMDGPU
+using LinearAlgebra
 
 include(joinpath("..", "utilities.jl"))
 
-eltypes = (Float32, Float64, ComplexF32, ComplexF64)
+BLASFloats = (Float32, Float64, ComplexF32, ComplexF64)
 
-@testset "qr_compact! and qr_null! for T = $T" for T in eltypes
+@testset "qr_compact! and qr_null! for T = $T" for T in BLASFloats
     rng = StableRNG(123)
     m = 54
     for n in (37, m, 63)
@@ -68,7 +69,7 @@ eltypes = (Float32, Float64, ComplexF32, ComplexF64)
     end
 end
 
-@testset "qr_full! for T = $T" for T in eltypes
+@testset "qr_full! for T = $T" for T in BLASFloats
     rng = StableRNG(123)
     m = 63
     for n in (37, m, 63)
@@ -121,3 +122,46 @@ end
         @test_throws ArgumentError qr_full!(copy!(Ac, A), (Q, R); blocksize = 8)
     end
 end
+
+@testset "qr_compact, qr_full and qr_null for Diagonal{$T}" for T in BLASFloats
+    rng = StableRNG(123)
+    atol = eps(real(T))^(3 / 4)
+    for m in (54, 0)
+        Ad = ROCArray(randn(rng, T, m))
+        A = Diagonal(Ad)
+
+        # compact
+        Q, R = @constinferred qr_compact(A)
+        @test Q isa Diagonal{T} && size(Q) == (m, m)
+        @test R isa Diagonal{T} && size(R) == (m, m)
+        @test Q * R ≈ A
+        @test isunitary(Q)
+
+        # compact and positive
+        Qp, Rp = @constinferred qr_compact(A; positive = true)
+        @test Qp isa Diagonal{T} && size(Qp) == (m, m)
+        @test Rp isa Diagonal{T} && size(Rp) == (m, m)
+        @test Qp * Rp ≈ A
+        @test isunitary(Qp)
+        @test all(isposdef.(diagview(Rp)))
+
+        # full
+        Q, R = @constinferred qr_full(A)
+        @test Q isa Diagonal{T} && size(Q) == (m, m)
+        @test R isa Diagonal{T} && size(R) == (m, m)
+        @test Q * R ≈ A
+        @test isunitary(Q)
+
+        # full and positive
+        Qp, Rp = @constinferred qr_full(A; positive = true)
+        @test Qp isa Diagonal{T} && size(Qp) == (m, m)
+        @test Rp isa Diagonal{T} && size(Rp) == (m, m)
+        @test Qp * Rp ≈ A
+        @test isunitary(Qp)
+        @test all(isposdef.(diagview(Rp)))
+
+        # null
+        N = @constinferred qr_null(A)
+        @test N isa AbstractMatrix{T} && size(N) == (m, 0)
+    end
+end
diff --git a/test/amd/svd.jl b/test/amd/svd.jl
index f8a48045..fcd5b490 100644
--- a/test/amd/svd.jl
+++ b/test/amd/svd.jl
@@ -8,100 +8,116 @@ using AMDGPU
 
 include(joinpath("..", "utilities.jl"))
 
-@testset "svd_compact! for T = $T" for T in (Float32, Float64, ComplexF32, ComplexF64)
+BLASFloats = (Float32, Float64, ComplexF32, ComplexF64)
+
+@testset "svd_compact! for T = $T" for T in BLASFloats
     rng = StableRNG(123)
     m = 54
     @testset "size ($m, $n)" for n in (37, m, 63)
         k = min(m, n)
-        algs = (ROCSOLVER_QRIteration(), ROCSOLVER_Jacobi())
-        @testset "algorithm $alg" for alg in algs
-            minmn = min(m, n)
-            A = ROCArray(randn(rng, T, m, n))
+        algs(::ROCArray) = (ROCSOLVER_QRIteration(), ROCSOLVER_Jacobi())
+        algs(::Diagonal) = (DiagonalAlgorithm(),)
+        As = m == n ? (ROCArray(randn(rng, T, m, n)), Diagonal(ROCArray(randn(rng, T, m)))) : (ROCArray(randn(rng, T, m, n)),)
+        for A in As
+            @testset "algorithm $alg" for alg in algs(A)
+                minmn = min(m, n)
 
-            U, S, Vᴴ = svd_compact(A; alg)
-            @test U isa ROCMatrix{T} && size(U) == (m, minmn)
-            @test S isa Diagonal{real(T), <:ROCVector} && size(S) == (minmn, minmn)
-            @test Vᴴ isa ROCMatrix{T} && size(Vᴴ) == (minmn, n)
-            @test U * S * Vᴴ ≈ A
-            @test isapproxone(U' * U)
-            @test isapproxone(Vᴴ * Vᴴ')
-            @test isposdef(S)
+                U, S, Vᴴ = svd_compact(A; alg)
+                @test U isa ROCMatrix{T} && size(U) == (m, minmn)
+                @test S isa Diagonal{real(T), <:ROCVector} && size(S) == (minmn, minmn)
+                @test Vᴴ isa ROCMatrix{T} && size(Vᴴ) == (minmn, n)
+                @test U * S * Vᴴ ≈ A
+                @test isapproxone(U' * U)
+                @test isapproxone(Vᴴ * Vᴴ')
+                @test isposdef(S)
 
-            Ac = similar(A)
-            U2, S2, V2ᴴ = @constinferred svd_compact!(copy!(Ac, A), (U, S, Vᴴ), alg)
-            @test U2 === U
-            @test S2 === S
-            @test V2ᴴ === Vᴴ
-            @test U * S * Vᴴ ≈ A
-            @test isapproxone(U' * U)
-            @test isapproxone(Vᴴ * Vᴴ')
-            @test isposdef(S)
+                Ac = similar(A)
+                U2, S2, V2ᴴ = @constinferred svd_compact!(copy!(Ac, A), (U, S, Vᴴ), alg)
+                @test U2 === U
+                @test S2 === S
+                @test V2ᴴ === Vᴴ
+                @test U * S * Vᴴ ≈ A
+                @test isapproxone(U' * U)
+                @test isapproxone(Vᴴ * Vᴴ')
+                @test isposdef(S)
 
-            Sd = svd_vals(A, alg)
-            @test ROCArray(diagview(S)) ≈ Sd
-            # ROCArray is necessary because norm of ROCArray view with non-unit step is broken
-            if alg isa ROCSOLVER_QRIteration
-                @test_warn "invalid keyword arguments for GPU_QRIteration" svd_compact!(copy!(Ac, A), (U, S, Vᴴ), ROCSOLVER_QRIteration(; bad = "bad"))
+                Sd = svd_vals(A, alg)
+                @test ROCArray(diagview(S)) ≈ Sd
+                # ROCArray is necessary because norm of ROCArray view with non-unit step is broken
+                if alg isa ROCSOLVER_QRIteration
+                    @test_warn "invalid keyword arguments for GPU_QRIteration" svd_compact!(copy!(Ac, A), (U, S, Vᴴ), ROCSOLVER_QRIteration(; bad = "bad"))
+                end
             end
         end
     end
 end
 
-@testset "svd_full! for T = $T" for T in (Float32, Float64, ComplexF32, ComplexF64)
+@testset "svd_full! for T = $T" for T in BLASFloats
     rng = StableRNG(123)
     m = 54
+    algs(::ROCArray) = (ROCSOLVER_QRIteration(), ROCSOLVER_Jacobi())
+    algs(::Diagonal) = (DiagonalAlgorithm(),)
     @testset "size ($m, $n)" for n in (37, m, 63)
-        algs = (ROCSOLVER_QRIteration(), ROCSOLVER_Jacobi())
-        @testset "algorithm $alg" for alg in algs
-            A = ROCArray(randn(rng, T, m, n))
-            U, S, Vᴴ = svd_full(A; alg)
-            @test U isa ROCMatrix{T} && size(U) == (m, m)
-            @test S isa ROCMatrix{real(T)} && size(S) == (m, n)
-            @test Vᴴ isa ROCMatrix{T} && size(Vᴴ) == (n, n)
-            @test U * S * Vᴴ ≈ A
-            @test isapproxone(U' * U)
-            @test isapproxone(U * U')
-            @test isapproxone(Vᴴ * Vᴴ')
-            @test isapproxone(Vᴴ' * Vᴴ)
-            @test all(isposdef, diagview(S))
+        As = m == n ? (ROCArray(randn(rng, T, m, n)), Diagonal(ROCArray(randn(rng, T, m)))) : (ROCArray(randn(rng, T, m, n)),)
+        for A in As
+            @testset "algorithm $alg" for alg in algs(A)
+                U, S, Vᴴ = svd_full(A; alg)
+                @test U isa ROCMatrix{T} && size(U) == (m, m)
+                if A isa Diagonal
+                    @test S isa Diagonal{real(T), <:ROCVector{real(T)}} && size(S) == (m, n)
+                else
+                    @test S isa ROCMatrix{real(T)} && size(S) == (m, n)
+                end
+                @test Vᴴ isa ROCMatrix{T} && size(Vᴴ) == (n, n)
+                @test U * S * Vᴴ ≈ A
+                @test isapproxone(U' * U)
+                @test isapproxone(U * U')
+                @test isapproxone(Vᴴ * Vᴴ')
+                @test isapproxone(Vᴴ' * Vᴴ)
+                @test all(isposdef, diagview(S))
 
-            Ac = similar(A)
-            U2, S2, V2ᴴ = @constinferred svd_full!(copy!(Ac, A), (U, S, Vᴴ), alg)
-            @test U2 === U
-            @test S2 === S
-            @test V2ᴴ === Vᴴ
-            @test U * S * Vᴴ ≈ A
-            @test isapproxone(U' * U)
-            @test isapproxone(U * U')
-            @test isapproxone(Vᴴ * Vᴴ')
-            @test isapproxone(Vᴴ' * Vᴴ)
-            @test all(isposdef, diagview(S))
+                Ac = similar(A)
+                U2, S2, V2ᴴ = @constinferred svd_full!(copy!(Ac, A), (U, S, Vᴴ), alg)
+                @test U2 === U
+                @test S2 === S
+                @test V2ᴴ === Vᴴ
+                @test U * S * Vᴴ ≈ A
+                @test isapproxone(U' * U)
+                @test isapproxone(U * U')
+                @test isapproxone(Vᴴ * Vᴴ')
+                @test isapproxone(Vᴴ' * Vᴴ)
+                @test all(isposdef, diagview(S))
 
-            Sc = similar(A, real(T), min(m, n))
-            Sc2 = svd_vals!(copy!(Ac, A), Sc, alg)
-            @test Sc === Sc2
-            @test ROCArray(diagview(S)) ≈ Sc
-            # ROCArray is necessary because norm of ROCArray view with non-unit step is broken
-            if alg isa ROCSOLVER_QRIteration
-                @test_warn "invalid keyword arguments for GPU_QRIteration" svd_full!(copy!(Ac, A), (U, S, Vᴴ), ROCSOLVER_QRIteration(; bad = "bad"))
-                @test_warn "invalid keyword arguments for GPU_QRIteration" svd_vals!(copy!(Ac, A), Sc, ROCSOLVER_QRIteration(; bad = "bad"))
+                Sc = similar(A, real(T), min(m, n))
+                Sc2 = svd_vals!(copy!(Ac, A), Sc, alg)
+                @test Sc === Sc2
+                @test ROCArray(diagview(S)) ≈ Sc
+                # ROCArray is necessary because norm of ROCArray view with non-unit step is broken
+                if alg isa ROCSOLVER_QRIteration
+                    @test_warn "invalid keyword arguments for GPU_QRIteration" svd_full!(copy!(Ac, A), (U, S, Vᴴ), ROCSOLVER_QRIteration(; bad = "bad"))
+                    @test_warn "invalid keyword arguments for GPU_QRIteration" svd_vals!(copy!(Ac, A), Sc, ROCSOLVER_QRIteration(; bad = "bad"))
+                end
             end
         end
     end
     @testset "size (0, 0)" begin
-        algs = (ROCSOLVER_QRIteration(), ROCSOLVER_Jacobi())
-        @testset "algorithm $alg" for alg in algs
-            A = ROCArray(randn(rng, T, 0, 0))
-            U, S, Vᴴ = svd_full(A; alg)
-            @test U isa ROCMatrix{T} && size(U) == (0, 0)
-            @test S isa ROCMatrix{real(T)} && size(S) == (0, 0)
-            @test Vᴴ isa ROCMatrix{T} && size(Vᴴ) == (0, 0)
-            @test U * S * Vᴴ ≈ A
-            @test isapproxone(U' * U)
-            @test isapproxone(U * U')
-            @test isapproxone(Vᴴ * Vᴴ')
-            @test isapproxone(Vᴴ' * Vᴴ)
-            @test all(isposdef, diagview(S))
+        for A in (ROCArray(randn(rng, T, 0, 0)), Diagonal(ROCArray(randn(rng, T, 0))))
+            @testset "algorithm $alg" for alg in algs(A)
+                U, S, Vᴴ = svd_full(A; alg)
+                @test U isa ROCMatrix{T} && size(U) == (0, 0)
+                if isa(A, Diagonal)
+                    @test S isa Diagonal{real(T), <:ROCVector{real(T)}}
+                else
+                    @test S isa ROCMatrix{real(T)}
+                end
+                @test Vᴴ isa ROCMatrix{T} && size(Vᴴ) == (0, 0)
+                @test U * S * Vᴴ ≈ A
+                @test isapproxone(U' * U)
+                @test isapproxone(U * U')
+                @test isapproxone(Vᴴ * Vᴴ')
+                @test isapproxone(Vᴴ' * Vᴴ)
+                @test all(isposdef, diagview(S))
+            end
         end
     end
 end
diff --git a/test/cuda/eig.jl b/test/cuda/eig.jl
index 6a44ea73..e40ee9b8 100644
--- a/test/cuda/eig.jl
+++ b/test/cuda/eig.jl
@@ -4,10 +4,13 @@ using Test
 using TestExtras
 using StableRNGs
 using CUDA
+using MatrixAlgebraKit: TruncatedAlgorithm, diagview, norm
 
 include(joinpath("..", "utilities.jl"))
 
-@testset "eig_full! for T = $T" for T in (Float32, Float64, ComplexF32, ComplexF64)
+BLASFloats = (Float32, Float64, ComplexF32, ComplexF64)
+
+@testset "eig_full! for T = $T" for T in BLASFloats
     rng = StableRNG(123)
     m = 54
     for alg in (CUSOLVER_Simple(), :CUSOLVER_Simple, CUSOLVER_Simple)
@@ -60,4 +63,46 @@ end
         @test V2 * ((V2' * V2) \ (V2' * V1)) ≈ V1
     end
 end
+
+@testset "eig_trunc! specify truncation algorithm T = $T" for T in BLASFloats
+    rng = StableRNG(123)
+    m = 4
+    atol = sqrt(eps(real(T)))
+    V = randn(rng, T, m, m)
+    D = Diagonal(real(T)[0.9, 0.3, 0.1, 0.01])
+    A = V * D * inv(V)
+    alg = TruncatedAlgorithm(LAPACK_Simple(), truncrank(2))
+    D2, V2, ϵ2 = @constinferred eig_trunc(A; alg)
+    @test diagview(D2) ≈ diagview(D)[1:2]
+    @test ϵ2 ≈ norm(diagview(D)[3:4]) atol = atol
+    @test_throws ArgumentError eig_trunc(A; alg, trunc = (; maxrank = 2))
+
+    alg = TruncatedAlgorithm(LAPACK_Simple(), truncerror(; atol = 0.2, p = 1))
+    D3, V3, ϵ3 = @constinferred eig_trunc(A; alg)
+    @test diagview(D3) ≈ diagview(D)[1:2]
+    @test ϵ3 ≈ norm(diagview(D)[3:4]) atol = atol
+end
 =#
+
+@testset "eig for Diagonal{$T}" for T in BLASFloats
+    rng = StableRNG(123)
+    m = 54
+    Ad = CuArray(randn(rng, T, m))
+    A = Diagonal(Ad)
+    atol = sqrt(eps(real(T)))
+
+    D, V = @constinferred eig_full(A)
+    @test D isa Diagonal{T} && size(D) == size(A)
+    @test V isa Diagonal{T} && size(V) == size(A)
+    @test A * V ≈ V * D
+
+    D2 = @constinferred eig_vals(A)
+    @test D2 isa AbstractVector{T} && length(D2) == m
+    @test diagview(D) ≈ D2
+
+    #=A2 = Diagonal(T[0.9, 0.3, 0.1, 0.01])
+    alg = TruncatedAlgorithm(DiagonalAlgorithm(), truncrank(2))
+    D2, V2, ϵ2 = @constinferred eig_trunc(A2; alg)
+    @test diagview(D2) ≈ diagview(A2)[1:2]
+    @test ϵ2 ≈ norm(diagview(A2)[3:4]) atol = atol=#
+end
diff --git a/test/cuda/eigh.jl b/test/cuda/eigh.jl
index d37722e1..a8171615 100644
--- a/test/cuda/eigh.jl
+++ b/test/cuda/eigh.jl
@@ -6,7 +6,9 @@ using LinearAlgebra: LinearAlgebra, Diagonal, I
 using MatrixAlgebraKit: TruncatedAlgorithm, diagview
 using CUDA
 
-@testset "eigh_full! for T = $T" for T in (Float32, Float64, ComplexF32, ComplexF64)
+BLASFloats = (Float32, Float64, ComplexF32, ComplexF64)
+
+@testset "eigh_full! for T = $T" for T in BLASFloats
     rng = StableRNG(123)
     m = 54
     for alg in (CUSOLVER_DivideAndConquer(), CUSOLVER_Jacobi())
@@ -26,8 +28,8 @@ using CUDA
         @test parent(D) ≈ D3
     end
 end
-
-#=@testset "eigh_trunc! for T = $T" for T in (Float32, Float64, ComplexF32, ComplexF64)
+#= #TODO mul!
+@testset "eigh_trunc! for T = $T" for T in BLASFloats 
     rng = StableRNG(123)
     m = 54
     for alg in (CUSOLVER_QRIteration(),
@@ -40,29 +42,38 @@ end
         D₀ = reverse(eigh_vals(A))
         r = m - 2
         s = 1 + sqrt(eps(real(T)))
+        atol = sqrt(eps(real(T)))
 
         D1, V1, ϵ1 = @constinferred eigh_trunc(A; alg, trunc=truncrank(r))
         @test length(diagview(D1)) == r
         @test isisometric(V1)
         @test A * V1 ≈ V1 * D1
         @test LinearAlgebra.opnorm(A - V1 * D1 * V1') ≈ D₀[r + 1]
+        @test ϵ1 ≈ norm(view(D₀, (r + 1):m)) atol = atol
 
         trunc = trunctol(; atol = s * D₀[r + 1])
         D2, V2, ϵ2 = @constinferred eigh_trunc(A; alg, trunc)
         @test length(diagview(D2)) == r
         @test isisometric(V2)
         @test A * V2 ≈ V2 * D2
+        @test ϵ2 ≈ norm(view(D₀, (r + 1):m)) atol = atol
+
+        s = 1 - sqrt(eps(real(T)))
+        trunc = truncerror(; atol = s * norm(@view(D₀[r:end]), 1), p = 1)
+        D3, V3, ϵ3 = @constinferred eigh_trunc(A; alg, trunc)
+        @test length(diagview(D3)) == r
+        @test A * V3 ≈ V3 * D3
+        @test ϵ3 ≈ norm(view(D₀, (r + 1):m)) atol = atol
 
         # test for same subspace
         @test V1 * (V1' * V2) ≈ V2
         @test V2 * (V2' * V1) ≈ V1
+        @test V1 * (V1' * V3) ≈ V3
+        @test V3 * (V3' * V1) ≈ V1
     end
 end
 
-@testset "eigh_trunc! specify truncation algorithm T = $T" for T in
-                                                               (Float32, Float64,
-                                                                ComplexF32,
-                                                                ComplexF64)
+@testset "eigh_trunc! specify truncation algorithm T = $T" for T in BLASFloats
     rng = StableRNG(123)
     m = 4
     V = qr_compact(CuArray(randn(rng, T, m, m)))[1]
@@ -73,4 +84,35 @@ end
     D2, V2, ϵ2 = @constinferred eigh_trunc(A; alg)
     @test diagview(D2) ≈ diagview(D)[1:2] rtol = sqrt(eps(real(T)))
     @test_throws ArgumentError eigh_trunc(A; alg, trunc=(; maxrank=2))
-end=#
+    @test ϵ2 ≈ norm(diagview(D)[3:4]) atol = atol
+
+    alg = TruncatedAlgorithm(CUSOLVER_QRIteration(), truncerror(; atol = 0.2))
+    D3, V3, ϵ3 = @constinferred eigh_trunc(A; alg)
+    @test diagview(D3) ≈ diagview(D)[1:2]
+    @test ϵ3 ≈ norm(diagview(D)[3:4]) atol = atol
+end
+=#
+@testset "eigh for Diagonal{$T}" for T in BLASFloats
+    rng = StableRNG(123)
+    m = 54
+    Ad = randn(rng, T, m)
+    Ad .+= conj.(Ad)
+    A = Diagonal(CuArray(Ad))
+    atol = sqrt(eps(real(T)))
+
+    D, V = @constinferred eigh_full(A)
+    @test D isa Diagonal{real(T)} && size(D) == size(A)
+    @test V isa Diagonal{T} && size(V) == size(A)
+    @test A * V ≈ V * D
+
+    D2 = @constinferred eigh_vals(A)
+    @test D2 isa AbstractVector{real(T)} && length(D2) == m
+    @test diagview(D) ≈ D2
+
+    # TODO partialsortperm
+    #=A2 = Diagonal(CuArray(T[0.9, 0.3, 0.1, 0.01]))
+    alg = TruncatedAlgorithm(DiagonalAlgorithm(), truncrank(2))
+    D2, V2, ϵ2 = @constinferred eigh_trunc(A2; alg)
+    @test diagview(D2) ≈ diagview(A2)[1:2]
+    @test ϵ2 ≈ norm(diagview(A2)[3:4]) atol = atol=#
+end
diff --git a/test/cuda/lq.jl b/test/cuda/lq.jl
index 9a162c6b..de904c88 100644
--- a/test/cuda/lq.jl
+++ b/test/cuda/lq.jl
@@ -4,10 +4,13 @@ using Test
 using TestExtras
 using StableRNGs
 using CUDA
+using LinearAlgebra
 
 include(joinpath("..", "utilities.jl"))
 
-@testset "lq_compact! for T = $T" for T in (Float32, Float64, ComplexF32, ComplexF64)
+BLASFloats = (Float32, Float64, ComplexF32, ComplexF64)
+
+@testset "lq_compact! for T = $T" for T in BLASFloats
     rng = StableRNG(123)
     m = 54
     for n in (37, m, 63)
@@ -65,7 +68,7 @@ include(joinpath("..", "utilities.jl"))
     end
 end
 
-@testset "lq_full! for T = $T" for T in (Float32, Float64, ComplexF32, ComplexF64)
+@testset "lq_full! for T = $T" for T in BLASFloats
     rng = StableRNG(123)
     m = 54
     for n in (37, m, 63)
@@ -115,3 +118,46 @@ end
         @test_throws ArgumentError lq_full!(copy!(Ac, A), (L, Q); blocksize = 8)
     end
 end
+
+@testset "lq_compact, lq_full and lq_null for Diagonal{$T}" for T in BLASFloats
+    rng = StableRNG(123)
+    atol = eps(real(T))^(3 / 4)
+    for m in (54, 0)
+        Ad = CuArray(randn(rng, T, m))
+        A = Diagonal(Ad)
+
+        # compact
+        L, Q = @constinferred lq_compact(A)
+        @test Q isa Diagonal{T} && size(Q) == (m, m)
+        @test L isa Diagonal{T} && size(L) == (m, m)
+        @test L * Q ≈ A
+        @test isunitary(Q)
+
+        # compact and positive
+        Lp, Qp = @constinferred lq_compact(A; positive = true)
+        @test Qp isa Diagonal{T} && size(Qp) == (m, m)
+        @test Lp isa Diagonal{T} && size(Lp) == (m, m)
+        @test Lp * Qp ≈ A
+        @test isunitary(Qp)
+        @test all(isposdef.(diagview(Lp)))
+
+        # full
+        L, Q = @constinferred lq_full(A)
+        @test Q isa Diagonal{T} && size(Q) == (m, m)
+        @test L isa Diagonal{T} && size(L) == (m, m)
+        @test L * Q ≈ A
+        @test isunitary(Q)
+
+        # full and positive
+        Lp, Qp = @constinferred lq_full(A; positive = true)
+        @test Qp isa Diagonal{T} && size(Qp) == (m, m)
+        @test Lp isa Diagonal{T} && size(Lp) == (m, m)
+        @test Lp * Qp ≈ A
+        @test isunitary(Qp)
+        @test all(isposdef.(diagview(Lp)))
+
+        # null
+        N = @constinferred lq_null(A)
+        @test N isa AbstractMatrix{T} && size(N) == (0, m)
+    end
+end
diff --git a/test/cuda/qr.jl b/test/cuda/qr.jl
index 93e4b748..73fa3985 100644
--- a/test/cuda/qr.jl
+++ b/test/cuda/qr.jl
@@ -4,12 +4,14 @@ using Test
 using TestExtras
 using StableRNGs
 using CUDA
+using LinearAlgebra
+using LinearAlgebra: isposdef
 
 include(joinpath("..", "utilities.jl"))
 
-eltypes = (Float32, Float64, ComplexF32, ComplexF64)
+BLASFloats = (Float32, Float64, ComplexF32, ComplexF64)
 
-@testset "qr_compact! and qr_null! for T = $T" for T in eltypes
+@testset "qr_compact! and qr_null! for T = $T" for T in BLASFloats
     rng = StableRNG(123)
     m = 54
     for n in (37, m, 63)
@@ -68,7 +70,7 @@ eltypes = (Float32, Float64, ComplexF32, ComplexF64)
     end
 end
 
-@testset "qr_full! for T = $T" for T in eltypes
+@testset "qr_full! for T = $T" for T in BLASFloats
     rng = StableRNG(123)
     m = 63
     for n in (37, m, 63)
@@ -121,3 +123,46 @@ end
         @test_throws ArgumentError qr_full!(copy!(Ac, A), (Q, R); blocksize = 8)
     end
 end
+
+@testset "qr_compact, qr_full and qr_null for Diagonal{$T}" for T in BLASFloats
+    rng = StableRNG(123)
+    atol = eps(real(T))^(3 / 4)
+    for m in (54, 0)
+        Ad = CuArray(randn(rng, T, m))
+        A = Diagonal(Ad)
+
+        # compact
+        Q, R = @constinferred qr_compact(A)
+        @test Q isa Diagonal{T} && size(Q) == (m, m)
+        @test R isa Diagonal{T} && size(R) == (m, m)
+        @test Q * R ≈ A
+        @test isunitary(Q)
+
+        # compact and positive
+        Qp, Rp = @constinferred qr_compact(A; positive = true)
+        @test Qp isa Diagonal{T} && size(Qp) == (m, m)
+        @test Rp isa Diagonal{T} && size(Rp) == (m, m)
+        @test Qp * Rp ≈ A
+        @test isunitary(Qp)
+        @test all(isposdef.(diagview(Rp)))
+
+        # full
+        Q, R = @constinferred qr_full(A)
+        @test Q isa Diagonal{T} && size(Q) == (m, m)
+        @test R isa Diagonal{T} && size(R) == (m, m)
+        @test Q * R ≈ A
+        @test isunitary(Q)
+
+        # full and positive
+        Qp, Rp = @constinferred qr_full(A; positive = true)
+        @test Qp isa Diagonal{T} && size(Qp) == (m, m)
+        @test Rp isa Diagonal{T} && size(Rp) == (m, m)
+        @test Qp * Rp ≈ A
+        @test isunitary(Qp)
+        @test all(isposdef.(diagview(Rp)))
+
+        # null
+        N = @constinferred qr_null(A)
+        @test N isa AbstractMatrix{T} && size(N) == (m, 0)
+    end
+end
diff --git a/test/cuda/svd.jl b/test/cuda/svd.jl
index e2173d03..fc564fec 100644
--- a/test/cuda/svd.jl
+++ b/test/cuda/svd.jl
@@ -8,101 +8,118 @@ using CUDA
 
 include(joinpath("..", "utilities.jl"))
 
-@testset "svd_compact! for T = $T" for T in (Float32, Float64, ComplexF32, ComplexF64)
+BLASFloats = (Float32, Float64, ComplexF32, ComplexF64)
+
+@testset "svd_compact! for T = $T" for T in BLASFloats
     rng = StableRNG(123)
     m = 54
     @testset "size ($m, $n)" for n in (37, m, 63)
         k = min(m, n)
-        algs = (CUSOLVER_QRIteration(), CUSOLVER_SVDPolar(), CUSOLVER_Jacobi())
-        @testset "algorithm $alg" for alg in algs
-            minmn = min(m, n)
-            A = CuArray(randn(rng, T, m, n))
-
-            U, S, Vᴴ = svd_compact(A; alg)
-            @test U isa CuMatrix{T} && size(U) == (m, minmn)
-            @test S isa Diagonal{real(T), <:CuVector} && size(S) == (minmn, minmn)
-            @test Vᴴ isa CuMatrix{T} && size(Vᴴ) == (minmn, n)
-            @test U * S * Vᴴ ≈ A
-            @test isapproxone(U' * U)
-            @test isapproxone(Vᴴ * Vᴴ')
-            @test isposdef(S)
+        algs(::CuArray) = (CUSOLVER_QRIteration(), CUSOLVER_SVDPolar(), CUSOLVER_Jacobi())
+        algs(::Diagonal) = (DiagonalAlgorithm(),)
+        As = m == n ? (CuArray(randn(rng, T, m, n)), Diagonal(CuArray(randn(rng, T, m)))) : (CuArray(randn(rng, T, m, n)),)
+        for A in As
+            @testset "algorithm $alg" for alg in algs(A)
+                minmn = min(m, n)
+                U, S, Vᴴ = svd_compact(A; alg)
+                @test U isa CuMatrix{T} && size(U) == (m, minmn)
+                @test S isa Diagonal{real(T), <:CuVector} && size(S) == (minmn, minmn)
+                @test Vᴴ isa CuMatrix{T} && size(Vᴴ) == (minmn, n)
+                @test U * S * Vᴴ ≈ A
+                @test isapproxone(U' * U)
+                @test isapproxone(Vᴴ * Vᴴ')
+                @test isposdef(S)
 
-            Ac = similar(A)
-            U2, S2, V2ᴴ = @constinferred svd_compact!(copy!(Ac, A), (U, S, Vᴴ), alg)
-            @test U2 === U
-            @test S2 === S
-            @test V2ᴴ === Vᴴ
-            @test U * S * Vᴴ ≈ A
-            @test isapproxone(U' * U)
-            @test isapproxone(Vᴴ * Vᴴ')
-            @test isposdef(S)
+                Ac = similar(A)
+                U2, S2, V2ᴴ = @constinferred svd_compact!(copy!(Ac, A), (U, S, Vᴴ), alg)
+                @test U2 === U
+                @test S2 === S
+                @test V2ᴴ === Vᴴ
+                @test U * S * Vᴴ ≈ A
+                @test isapproxone(U' * U)
+                @test isapproxone(Vᴴ * Vᴴ')
+                @test isposdef(S)
 
-            Sd = svd_vals(A, alg)
-            @test CuArray(diagview(S)) ≈ Sd
-            # CuArray is necessary because norm of CuArray view with non-unit step is broken
-            if alg isa CUSOLVER_QRIteration
-                @test_warn "invalid keyword arguments for GPU_QRIteration" svd_compact!(copy!(Ac, A), (U, S, Vᴴ), CUSOLVER_QRIteration(; bad = "bad"))
+                Sd = svd_vals(A, alg)
+                @test CuArray(diagview(S)) ≈ Sd
+                # CuArray is necessary because norm of CuArray view with non-unit step is broken
+                if alg isa CUSOLVER_QRIteration
+                    @test_warn "invalid keyword arguments for GPU_QRIteration" svd_compact!(copy!(Ac, A), (U, S, Vᴴ), CUSOLVER_QRIteration(; bad = "bad"))
+                end
             end
         end
     end
 end
 
-@testset "svd_full! for T = $T" for T in (Float32, Float64, ComplexF32, ComplexF64)
+@testset "svd_full! for T = $T" for T in BLASFloats
     rng = StableRNG(123)
     m = 54
+    algs(::CuArray) = (CUSOLVER_QRIteration(), CUSOLVER_SVDPolar(), CUSOLVER_Jacobi())
+    algs(::Diagonal) = (DiagonalAlgorithm(),)
     @testset "size ($m, $n)" for n in (37, m, 63)
-        algs = (CUSOLVER_QRIteration(), CUSOLVER_SVDPolar(), CUSOLVER_Jacobi())
-        @testset "algorithm $alg" for alg in algs
-            A = CuArray(randn(rng, T, m, n))
-            U, S, Vᴴ = svd_full(A; alg)
-            @test U isa CuMatrix{T} && size(U) == (m, m)
-            @test S isa CuMatrix{real(T)} && size(S) == (m, n)
-            @test Vᴴ isa CuMatrix{T} && size(Vᴴ) == (n, n)
-            @test U * S * Vᴴ ≈ A
-            @test isapproxone(U' * U)
-            @test isapproxone(U * U')
-            @test isapproxone(Vᴴ * Vᴴ')
-            @test isapproxone(Vᴴ' * Vᴴ)
-            @test all(isposdef, diagview(S))
+        As = m == n ? (CuArray(randn(rng, T, m, n)), Diagonal(CuArray(randn(rng, T, m)))) : (CuArray(randn(rng, T, m, n)),)
+        for A in As
+            @testset "algorithm $alg" for alg in algs(A)
+                minmn = min(m, n)
+                U, S, Vᴴ = svd_full(A; alg)
+                @test U isa CuMatrix{T} && size(U) == (m, m)
+                if A isa Diagonal
+                    @test S isa Diagonal{real(T), <:CuVector{real(T)}} && size(S) == (m, n)
+                else
+                    @test S isa CuMatrix{real(T)} && size(S) == (m, n)
+                end
+                @test Vᴴ isa CuMatrix{T} && size(Vᴴ) == (n, n)
+                @test U * S * Vᴴ ≈ A
+                @test isapproxone(U' * U)
+                @test isapproxone(U * U')
+                @test isapproxone(Vᴴ * Vᴴ')
+                @test isapproxone(Vᴴ' * Vᴴ)
+                @test all(isposdef, diagview(S))
 
-            Ac = similar(A)
-            U2, S2, V2ᴴ = @constinferred svd_full!(copy!(Ac, A), (U, S, Vᴴ), alg)
-            @test U2 === U
-            @test S2 === S
-            @test V2ᴴ === Vᴴ
-            @test U * S * Vᴴ ≈ A
-            @test isapproxone(U' * U)
-            @test isapproxone(U * U')
-            @test isapproxone(Vᴴ * Vᴴ')
-            @test isapproxone(Vᴴ' * Vᴴ)
-            @test all(isposdef, diagview(S))
+                Ac = similar(A)
+                U2, S2, V2ᴴ = @constinferred svd_full!(copy!(Ac, A), (U, S, Vᴴ), alg)
+                @test U2 === U
+                @test S2 === S
+                @test V2ᴴ === Vᴴ
+                @test U * S * Vᴴ ≈ A
+                @test isapproxone(U' * U)
+                @test isapproxone(U * U')
+                @test isapproxone(Vᴴ * Vᴴ')
+                @test isapproxone(Vᴴ' * Vᴴ)
+                @test all(isposdef, diagview(S))
 
-            minmn = min(m, n)
-            Sc = similar(A, real(T), minmn)
-            Sc2 = svd_vals!(copy!(Ac, A), Sc, alg)
-            @test Sc === Sc2
-            @test CuArray(diagview(S)) ≈ Sc
-            # CuArray is necessary because norm of CuArray view with non-unit step is broken
-            if alg isa CUSOLVER_QRIteration
-                @test_warn "invalid keyword arguments for GPU_QRIteration" svd_full!(copy!(Ac, A), (U, S, Vᴴ), CUSOLVER_QRIteration(; bad = "bad"))
-                @test_warn "invalid keyword arguments for GPU_QRIteration" svd_vals!(copy!(Ac, A), Sc, CUSOLVER_QRIteration(; bad = "bad"))
+                minmn = min(m, n)
+                Sc = similar(A, real(T), minmn)
+                Sc2 = svd_vals!(copy!(Ac, A), Sc, alg)
+                @test Sc === Sc2
+                @test CuArray(diagview(S)) ≈ Sc
+                # CuArray is necessary because norm of CuArray view with non-unit step is broken
+                if alg isa CUSOLVER_QRIteration
+                    @test_warn "invalid keyword arguments for GPU_QRIteration" svd_full!(copy!(Ac, A), (U, S, Vᴴ), CUSOLVER_QRIteration(; bad = "bad"))
+                    @test_warn "invalid keyword arguments for GPU_QRIteration" svd_vals!(copy!(Ac, A), Sc, CUSOLVER_QRIteration(; bad = "bad"))
+                end
             end
         end
     end
     @testset "size (0, 0)" begin
-        algs = (CUSOLVER_QRIteration(), CUSOLVER_SVDPolar(), CUSOLVER_Jacobi())
-        @testset "algorithm $alg" for alg in algs
-            A = CuArray(randn(rng, T, 0, 0))
-            U, S, Vᴴ = svd_full(A; alg)
-            @test U isa CuMatrix{T} && size(U) == (0, 0)
-            @test S isa CuMatrix{real(T)} && size(S) == (0, 0)
-            @test Vᴴ isa CuMatrix{T} && size(Vᴴ) == (0, 0)
-            @test U * S * Vᴴ ≈ A
-            @test isapproxone(U' * U)
-            @test isapproxone(U * U')
-            @test isapproxone(Vᴴ * Vᴴ')
-            @test isapproxone(Vᴴ' * Vᴴ)
-            @test all(isposdef, diagview(S))
+        for A in (CuArray(randn(rng, T, 0, 0)), Diagonal(CuArray(randn(rng, T, 0))))
+            @testset "algorithm $alg" for alg in algs(A)
+                U, S, Vᴴ = svd_full(A; alg)
+                @test U isa CuMatrix{T} && size(U) == (0, 0)
+                @test size(S) == (0, 0)
+                if isa(A, Diagonal)
+                    @test S isa Diagonal{real(T), <:CuVector{real(T)}}
+                else
+                    @test S isa CuMatrix{real(T)}
+                end
+                @test Vᴴ isa CuMatrix{T} && size(Vᴴ) == (0, 0)
+                @test U * S * Vᴴ ≈ A
+                @test isapproxone(U' * U)
+                @test isapproxone(U * U')
+                @test isapproxone(Vᴴ * Vᴴ')
+                @test isapproxone(Vᴴ' * Vᴴ)
+                @test all(isposdef, diagview(S))
+            end
         end
     end
 end
@@ -113,28 +130,31 @@ end
     @testset "size ($m, $n)" for n in (37, m, 63)
         k = min(m, n) - 20
         p = min(m, n) - k - 1
-        algs = (CUSOLVER_QRIteration(), CUSOLVER_SVDPolar(), CUSOLVER_Jacobi(), CUSOLVER_Randomized(; k = k, p = p, niters = 100))
-        @testset "algorithm $alg" for alg in algs
-            hA = randn(rng, T, m, n)
-            S₀ = svd_vals(hA)
+        algs(::CuArray) = (CUSOLVER_QRIteration(), CUSOLVER_SVDPolar(), CUSOLVER_Jacobi(), CUSOLVER_Randomized(; k = k, p = p, niters = 100))
+        algs(::Diagonal) = (DiagonalAlgorithm(),)
+        hAs = m == n ? (randn(rng, T, m, n), Diagonal(randn(rng, T, m))) : (randn(rng, T, m, n),)
+        minmn = min(m, n)
+        for hA in hAs
             A = CuArray(hA)
-            minmn = min(m, n)
-            r = k
+            @testset "algorithm $alg" for alg in algs(A)
+                S₀ = svd_vals(hA)
+                r = k
 
-            U1, S1, V1ᴴ, ϵ1 = @constinferred svd_trunc(A; alg, trunc = truncrank(r))
-            @test length(S1.diag) == r
-            @test opnorm(A - U1 * S1 * V1ᴴ) ≈ S₀[r + 1]
-            @test norm(A - U1 * S1 * V1ᴴ) ≈ ϵ1
+                U1, S1, V1ᴴ, ϵ1 = @constinferred svd_trunc(A; alg, trunc = truncrank(r))
+                @test length(S1.diag) == r
+                @test opnorm(A - U1 * S1 * V1ᴴ) ≈ S₀[r + 1]
+                @test norm(A - U1 * S1 * V1ᴴ) ≈ ϵ1
 
-            if !(alg isa CUSOLVER_Randomized)
-                s = 1 + sqrt(eps(real(T)))
-                trunc2 = trunctol(; atol = s * S₀[r + 1])
+                if !(alg isa CUSOLVER_Randomized)
+                    s = 1 + sqrt(eps(real(T)))
+                    trunc2 = trunctol(; atol = s * S₀[r + 1])
 
-                U2, S2, V2ᴴ, ϵ2 = @constinferred svd_trunc(A; alg, trunc = trunctol(; atol = s * S₀[r + 1]))
-                @test length(S2.diag) == r
-                @test U1 ≈ U2
-                @test parent(S1) ≈ parent(S2)
-                @test V1ᴴ ≈ V2ᴴ
+                    U2, S2, V2ᴴ, ϵ2 = @constinferred svd_trunc(A; alg, trunc = trunctol(; atol = s * S₀[r + 1]))
+                    @test length(S2.diag) == r
+                    @test U1 ≈ U2
+                    @test parent(S1) ≈ parent(S2)
+                    @test V1ᴴ ≈ V2ᴴ
+                end
             end
         end
     end