From efcb33ed6785e69b7f978051a2dcd4a1b92a2a05 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Fri, 25 Jul 2025 01:10:31 +0200 Subject: [PATCH] Fix implementations of `eigen` and `eigvals` --- ext/ForwardDiffStaticArraysExt.jl | 23 ++++++-- src/dual.jl | 96 ++++++++++++++++++------------- test/JacobianTest.jl | 84 +++++++++++++++++++++------ test/MiscTest.jl | 14 ++--- 4 files changed, 150 insertions(+), 67 deletions(-) diff --git a/ext/ForwardDiffStaticArraysExt.jl b/ext/ForwardDiffStaticArraysExt.jl index 63f841db..ca0ce1c9 100644 --- a/ext/ForwardDiffStaticArraysExt.jl +++ b/ext/ForwardDiffStaticArraysExt.jl @@ -24,15 +24,28 @@ end @inline static_dual_eval(::Type{T}, f::F, x::StaticArray) where {T,F} = f(dualize(T, x)) # To fix method ambiguity issues: -function LinearAlgebra.eigvals(A::Symmetric{<:Dual{Tg,T,N}, <:StaticArrays.StaticMatrix}) where {Tg,T<:Real,N} - return ForwardDiff._eigvals(A) +function LinearAlgebra.eigvals(A::Symmetric{Dual{T,V,N}, <:StaticArrays.StaticMatrix}) where {T,V<:Real,N} + return ForwardDiff._eigvals_hermitian(A) end -function LinearAlgebra.eigen(A::Symmetric{<:Dual{Tg,T,N}, <:StaticArrays.StaticMatrix}) where {Tg,T<:Real,N} - return ForwardDiff._eigen(A) +function LinearAlgebra.eigvals(A::Hermitian{Dual{T,V,N}, <:StaticArrays.StaticMatrix}) where {T,V<:Real,N} + return ForwardDiff._eigvals_hermitian(A) +end +function LinearAlgebra.eigvals(A::Hermitian{Complex{Dual{T,V,N}}, <:StaticArrays.StaticMatrix}) where {T,V<:Real,N} + return ForwardDiff._eigvals_hermitian(A) +end + +function LinearAlgebra.eigen(A::Symmetric{Dual{T,V,N}, <:StaticArrays.StaticMatrix}) where {T,V<:Real,N} + return ForwardDiff._eigen_hermitian(A) +end +function LinearAlgebra.eigen(A::Hermitian{Dual{T,V,N}, <:StaticArrays.StaticMatrix}) where {T,V<:Real,N} + return ForwardDiff._eigen_hermitian(A) +end +function LinearAlgebra.eigen(A::Hermitian{Complex{Dual{T,V,N}}, <:StaticArrays.StaticMatrix}) where {T,V<:Real,N} + return ForwardDiff._eigen_hermitian(A) end # For `MMatrix` we can use the in-place method -ForwardDiff._lyap_div!!(A::StaticArrays.MMatrix, λ::AbstractVector) = ForwardDiff._lyap_div!(A, λ) +ForwardDiff._lyap_div_zero_diag!!(A::StaticArrays.MMatrix, λ::AbstractVector) = ForwardDiff._lyap_div_zero_diag!(A, λ) # Gradient @inline ForwardDiff.gradient(f::F, x::StaticArray) where {F} = vector_mode_gradient(f, x) diff --git a/src/dual.jl b/src/dual.jl index 7e8ec110..0e722544 100644 --- a/src/dual.jl +++ b/src/dual.jl @@ -735,68 +735,86 @@ end return (Dual{T}(sd, cd * π * partials(d)), Dual{T}(cd, -sd * π * partials(d))) end -# Symmetric eigvals # +# eigen values and vectors of Hermitian matrices # #-------------------# -# To be able to reuse this default definition in the StaticArrays extension -# (has to be re-defined to avoid method ambiguity issues) -# we forward the call to an internal method that can be shared and reused -LinearAlgebra.eigvals(A::Symmetric{<:Dual{Tg,T,N}}) where {Tg,T<:Real,N} = _eigvals(A) -function _eigvals(A::Symmetric{<:Dual{Tg,T,N}}) where {Tg,T<:Real,N} - λ,Q = eigen(Symmetric(value.(parent(A)))) - parts = ntuple(j -> diag(Q' * getindex.(partials.(A), j) * Q), N) - Dual{Tg}.(λ, tuple.(parts...)) +# Extract structured matrices of primal values and partials +_structured_value(A::Symmetric{Dual{T,V,N}}) where {T,V,N} = Symmetric(map(value, parent(A)), A.uplo === 'U' ? :U : :L) +_structured_value(A::Hermitian{Dual{T,V,N}}) where {T,V,N} = Hermitian(map(value, parent(A)), A.uplo === 'U' ? :U : :L) +_structured_value(A::Hermitian{Complex{Dual{T,V,N}}}) where {T,V,N} = Hermitian(map(z -> splat(complex)(map(value, reim(z))), parent(A)), A.uplo === 'U' ? :U : :L) +_structured_value(A::SymTridiagonal{Dual{T,V,N}}) where {T,V,N} = SymTridiagonal(map(value, A.dv), map(value, A.ev)) + +_structured_partials(A::Symmetric{Dual{T,V,N}}, j::Int) where {T,V,N} = Symmetric(partials.(parent(A), j), A.uplo === 'U' ? :U : :L) +_structured_partials(A::Hermitian{Dual{T,V,N}}, j::Int) where {T,V,N} = Hermitian(partials.(parent(A), j), A.uplo === 'U' ? :U : :L) +function _structured_partials(A::Hermitian{Complex{Dual{T,V,N}}}, j::Int) where {T,V,N} + return Hermitian(complex.(partials.(real.(parent(A)), j), partials.(imag.(parent(A)), j)), A.uplo === 'U' ? :U : :L) end +_structured_partials(A::SymTridiagonal{Dual{T,V,N}}, j::Int) where {T,V,N} = SymTridiagonal(partials.(A.dv, j), partials.(A.ev, j)) -function LinearAlgebra.eigvals(A::Hermitian{<:Complex{<:Dual{Tg,T,N}}}) where {Tg,T<:Real,N} - λ,Q = eigen(Hermitian(value.(real.(parent(A))) .+ im .* value.(imag.(parent(A))))) - parts = ntuple(j -> diag(real.(Q' * (getindex.(partials.(real.(A)) .+ im .* partials.(imag.(A)), j)) * Q)), N) - Dual{Tg}.(λ, tuple.(parts...)) +# Convert arrays of primal values and partials to arrays of Duals +function _to_duals(::Val{T}, values::AbstractArray{<:Real}, partials::Tuple{Vararg{AbstractArray{<:Real}}}) where {T} + return Dual{T}.(values, tuple.(partials...)) +end +function _to_duals(::Val{T}, values::AbstractArray{<:Complex}, partials::Tuple{Vararg{AbstractArray{<:Complex}}}) where {T} + return complex.( + Dual{T}.(real.(values), Base.Fix1(map, real).(tuple.(partials...))), + Dual{T}.(imag.(values), Base.Fix1(map, imag).(tuple.(partials...))), + ) end -function LinearAlgebra.eigvals(A::SymTridiagonal{<:Dual{Tg,T,N}}) where {Tg,T<:Real,N} - λ,Q = eigen(SymTridiagonal(value.(parent(A).dv),value.(parent(A).ev))) - parts = ntuple(j -> diag(Q' * getindex.(partials.(A), j) * Q), N) - Dual{Tg}.(λ, tuple.(parts...)) +# We forward the call to an internal method that can be shared and reused +LinearAlgebra.eigvals(A::Symmetric{Dual{T,V,N}}) where {T,V<:Real,N} = _eigvals_hermitian(A) +LinearAlgebra.eigvals(A::Hermitian{Dual{T,V,N}}) where {T,V<:Real,N} = _eigvals_hermitian(A) +LinearAlgebra.eigvals(A::Hermitian{Complex{Dual{T,V,N}}}) where {T,V<:Real,N} = _eigvals_hermitian(A) +LinearAlgebra.eigvals(A::SymTridiagonal{Dual{T,V,N}}) where {T,V<:Real,N} = _eigvals_hermitian(A) + +# Eigenvalues of Hermitian-structured matrices +const DualMatrixRealComplex{T,V<:Real,N} = Union{AbstractMatrix{Dual{T,V,N}}, AbstractMatrix{Complex{Dual{T,V,N}}}} +function _eigvals_hermitian(A::DualMatrixRealComplex{T,<:Real,N}) where {T,N} + F = eigen(_structured_value(A)) + λ = F.values + Q = F.vectors + parts = ntuple(j -> real(diag(Q' * _structured_partials(A, j) * Q)), N) + return _to_duals(Val(T), λ, parts) end -# A ./ (λ' .- λ) but with diag special cased +# A ./ (λ' .- λ) but with diagonal elements zeroed out # Default out-of-place method -function _lyap_div!!(A::AbstractMatrix, λ::AbstractVector) +function _lyap_div_zero_diag!!(A::AbstractMatrix, λ::AbstractVector) return map( - (a, b, idx) -> a / (idx[1] == idx[2] ? oneunit(b) : b), + (a, b, idx) -> idx[1] == idx[2] ? zero(a) / oneunit(b) : a / b, A, λ' .- λ, CartesianIndices(A), ) end # For `Matrix` (and e.g. `StaticArrays.MMatrix`) we can use an in-place method -_lyap_div!!(A::Matrix, λ::AbstractVector) = _lyap_div!(A, λ) -function _lyap_div!(A::AbstractMatrix, λ::AbstractVector) +_lyap_div_zero_diag!!(A::Matrix, λ::AbstractVector) = _lyap_div_zero_diag!(A, λ) +function _lyap_div_zero_diag!(A::AbstractMatrix, λ::AbstractVector) for (j,μ) in enumerate(λ), (k,λ) in enumerate(λ) - if k ≠ j + if k == j + A[k, j] = zero(A[k, j]) + else A[k,j] /= μ - λ end end A end -# To be able to reuse this default definition in the StaticArrays extension -# (has to be re-defined to avoid method ambiguity issues) -# we forward the call to an internal method that can be shared and reused -LinearAlgebra.eigen(A::Symmetric{<:Dual{Tg,T,N}}) where {Tg,T<:Real,N} = _eigen(A) -function _eigen(A::Symmetric{<:Dual{Tg,T,N}}) where {Tg,T<:Real,N} - λ = eigvals(A) - _,Q = eigen(Symmetric(value.(parent(A)))) - parts = ntuple(j -> Q*_lyap_div!!(Q' * getindex.(partials.(A), j) * Q - Diagonal(getindex.(partials.(λ), j)), value.(λ)), N) - Eigen(λ,Dual{Tg}.(Q, tuple.(parts...))) -end - -function LinearAlgebra.eigen(A::SymTridiagonal{<:Dual{Tg,T,N}}) where {Tg,T<:Real,N} - λ = eigvals(A) - _,Q = eigen(SymTridiagonal(value.(parent(A)))) - parts = ntuple(j -> Q*_lyap_div!!(Q' * getindex.(partials.(A), j) * Q - Diagonal(getindex.(partials.(λ), j)), value.(λ)), N) - Eigen(λ,Dual{Tg}.(Q, tuple.(parts...))) +# We forward the call to an internal method that can be shared and reused +LinearAlgebra.eigen(A::Symmetric{Dual{T,V,N}}) where {T,V<:Real,N} = _eigen_hermitian(A) +LinearAlgebra.eigen(A::Hermitian{Dual{T,V,N}}) where {T,V<:Real,N} = _eigen_hermitian(A) +LinearAlgebra.eigen(A::Hermitian{Complex{Dual{T,V,N}}}) where {T,V<:Real,N} = _eigen_hermitian(A) +LinearAlgebra.eigen(A::SymTridiagonal{Dual{T,V,N}}) where {T,V<:Real,N} = _eigen_hermitian(A) + +function _eigen_hermitian(A::DualMatrixRealComplex{T,<:Real,N}) where {T,N} + F = eigen(_structured_value(A)) + λ = F.values + Q = F.vectors + Qt_∂A_Q = ntuple(j -> Q' * _structured_partials(A, j) * Q, N) + λ_partials = map(real ∘ diag, Qt_∂A_Q) + Q_partials = map(Qt_∂Aj_Q -> Q*_lyap_div_zero_diag!!(Qt_∂Aj_Q, λ), Qt_∂A_Q) + return Eigen(_to_duals(Val(T), λ, λ_partials), _to_duals(Val(T), Q, Q_partials)) end # Functions in SpecialFunctions which return tuples # diff --git a/test/JacobianTest.jl b/test/JacobianTest.jl index 1e52f7fa..fdd16dea 100644 --- a/test/JacobianTest.jl +++ b/test/JacobianTest.jl @@ -238,22 +238,74 @@ end end @testset "eigen" begin - @test ForwardDiff.jacobian(x -> eigvals(SymTridiagonal(x, x[1:end-1])), [1.,2.]) ≈ [(1 - 3/sqrt(5))/2 (1 - 1/sqrt(5))/2 ; (1 + 3/sqrt(5))/2 (1 + 1/sqrt(5))/2] - @test ForwardDiff.jacobian(x -> eigvals(Symmetric(x*x')), [1.,2.]) ≈ [0 0; 2 4] - - x0 = [1.0, 2.0]; - ev1(x) = eigen(Symmetric(x*x')).vectors[:,1] - @test ForwardDiff.jacobian(ev1, x0) ≈ Calculus.finite_difference_jacobian(ev1, x0) - ev2(x) = eigen(SymTridiagonal(x, x[1:end-1])).vectors[:,1] - @test ForwardDiff.jacobian(ev2, x0) ≈ Calculus.finite_difference_jacobian(ev2, x0) - - x0_svector = SVector{2}(x0) - @test ForwardDiff.jacobian(ev1, x0_svector) isa SMatrix{2, 2} - @test ForwardDiff.jacobian(ev1, x0_svector) ≈ Calculus.finite_difference_jacobian(ev1, x0) - - x0_mvector = MVector{2}(x0) - @test ForwardDiff.jacobian(ev1, x0_mvector) isa MMatrix{2, 2} - @test ForwardDiff.jacobian(ev1, x0_mvector) ≈ Calculus.finite_difference_jacobian(ev1, x0) + eigvals_symreal(x) = eigvals(Symmetric(x*x')) + eigvals_hermreal(x) = eigvals(Hermitian(x*x')) + eigvals_hermcomplex(x) = map(abs2, eigvals(Hermitian(complex.(x*x', x'*x)))) + eigvals_symtridiag(x) = eigvals(SymTridiagonal(x, x[begin:(end - 1)])) + + eigen_vals_symreal(x) = eigen(Symmetric(x*x')).values + eigen_vals_hermreal(x) = eigen(Hermitian(x*x')).values + eigen_vals_hermcomplex(x) = map(abs2, eigen(Hermitian(complex.(x*x', x'*x))).values) + eigen_vals_symtridiag(x) = eigen(SymTridiagonal(x, x[begin:(end - 1)])).values + + eigen_vec1_symreal(x) = eigen(Symmetric(x*x')).vectors[:,1] + eigen_vec1_hermreal(x) = eigen(Hermitian(x*x')).vectors[:,1] + eigen_vec1_hermcomplex(x) = map(abs2, eigen(Hermitian(complex.(x*x', x'*x))).vectors[:,1]) + eigen_vec1_symtridiag(x) = eigen(SymTridiagonal(x, x[begin:(end - 1)])).vectors[:,1] + + # Note: SymTridiagonal is not supported for StaticArrays + for T in (Int, Float32, Float64), A in (Array, SArray, MArray) + if A <: StaticArrays.StaticArray + x = A{Tuple{2},T}(x0) + JT = A{Tuple{2,2},float(T)} + else + x = A{T,1}(x0) + JT = A{float(T),2} + end + + # analytic solutions + @test ForwardDiff.jacobian(eigvals_symreal, x) ≈ [0 0; 2 4] + @test ForwardDiff.jacobian(eigvals_hermreal, x) ≈ [0 0; 2 4] + if !(x isa StaticArrays.StaticArray) + @test ForwardDiff.jacobian(eigvals_symtridiag, x) ≈ [(1 - 3/sqrt(5))/2 (1 - 1/sqrt(5))/2 ; (1 + 3/sqrt(5))/2 (1 + 1/sqrt(5))/2] + end + + # eigen + eigvals + for ev in ( + eigvals_symreal, eigvals_hermreal, eigvals_hermcomplex, eigvals_symtridiag, + eigen_vals_symreal, eigen_vals_hermreal, eigen_vals_hermcomplex, eigen_vals_symtridiag, + eigen_vec1_symreal, eigen_vec1_hermreal, eigen_vec1_hermcomplex, eigen_vec1_symtridiag, + ) + if x isa StaticArrays.StaticArray && + (ev === eigvals_symtridiag || ev === eigen_vals_symtridiag || ev === eigen_vec1_symtridiag) + continue + end + + # Chunk size can only be inferred for static arrays + if x isa StaticArrays.StaticArray + @test @inferred(ForwardDiff.jacobian(ev, x)) isa JT + else + @test ForwardDiff.jacobian(ev, x) isa JT + end + cfg = ForwardDiff.JacobianConfig(ev, x) + @test @inferred(ForwardDiff.jacobian(ev, x, cfg)) isa JT + + @test ForwardDiff.jacobian(ev, x) ≈ Calculus.finite_difference_jacobian(ev, float.(x0)) + end + + # consistency of eigen and eigvals + for (eigvals, eigen_vals) in ( + (eigvals_symreal, eigen_vals_symreal), + (eigvals_hermreal, eigen_vals_hermreal), + (eigvals_hermcomplex, eigen_vals_hermcomplex), + (eigvals_symtridiag, eigen_vals_symtridiag), + ) + if x isa StaticArrays.StaticArray && eigvals === eigvals_symtridiag + continue + end + @test ForwardDiff.jacobian(eigvals, x) ≈ ForwardDiff.jacobian(eigen_vals, x) + end + end end @testset "type stability" begin diff --git a/test/MiscTest.jl b/test/MiscTest.jl index e7e0d91a..0535b552 100644 --- a/test/MiscTest.jl +++ b/test/MiscTest.jl @@ -182,26 +182,26 @@ end # example from https://github.com/JuliaDiff/DiffRules.jl/pull/98#issuecomment-1574420052 @test only(ForwardDiff.hessian(t -> abs(t[1])^2, [0.0])) == 2 -@testset "_lyap_div!!" begin +@testset "_lyap_div_zero_diag!!" begin # In-place version for `Matrix` A = rand(3, 3) Acopy = copy(A) λ = rand(3) - B = @inferred(ForwardDiff._lyap_div!!(A, λ)) + B = @inferred(ForwardDiff._lyap_div_zero_diag!!(A, λ)) @test B === A - @test B[diagind(B)] == Acopy[diagind(Acopy)] + @test iszero(B[diagind(B)]) no_diag(X) = [X[i] for i in eachindex(X) if !(i in diagind(X))] @test no_diag(B) == no_diag(Acopy ./ (λ' .- λ)) # Immutable static arrays A_smatrix = SMatrix{3,3}(Acopy) λ_svector = SVector{3}(λ) - B_smatrix = @inferred(ForwardDiff._lyap_div!!(A_smatrix, λ_svector)) + B_smatrix = @inferred(ForwardDiff._lyap_div_zero_diag!!(A_smatrix, λ_svector)) @test B_smatrix !== A_smatrix @test B_smatrix isa SMatrix{3,3} @test B_smatrix == B λ_mvector = MVector{3}(λ) - B_smatrix = @inferred(ForwardDiff._lyap_div!!(A_smatrix, λ_mvector)) + B_smatrix = @inferred(ForwardDiff._lyap_div_zero_diag!!(A_smatrix, λ_mvector)) @test B_smatrix !== A_smatrix @test B_smatrix isa SMatrix{3,3} @test B_smatrix == B @@ -209,12 +209,12 @@ end # Mutable static arrays A_mmatrix = MMatrix{3,3}(Acopy) λ_svector = SVector{3}(λ) - B_mmatrix = @inferred(ForwardDiff._lyap_div!!(A_mmatrix, λ_svector)) + B_mmatrix = @inferred(ForwardDiff._lyap_div_zero_diag!!(A_mmatrix, λ_svector)) @test B_mmatrix === A_mmatrix @test B_mmatrix == B A_mmatrix = MMatrix{3,3}(Acopy) λ_mvector = MVector{3}(λ) - B_mmatrix = @inferred(ForwardDiff._lyap_div!!(A_mmatrix, λ_mvector)) + B_mmatrix = @inferred(ForwardDiff._lyap_div_zero_diag!!(A_mmatrix, λ_mvector)) @test B_mmatrix === A_mmatrix @test B_mmatrix == B end