From 786bc0d9a5f5bbe9ea936d96ad0dbe41b9d130c2 Mon Sep 17 00:00:00 2001 From: farhadrclass <31899325+farhadrclass@users.noreply.github.com> Date: Mon, 23 Sep 2024 12:47:34 -0400 Subject: [PATCH 01/34] solve_shifted_system! Computes the regularized L-BFGS step by solving the linear system --- README.md | 1 + src/utilities.jl | 95 ++++++++++++++++++++- test/runtests.jl | 1 + test/test_solve_shifted_system.jl | 132 ++++++++++++++++++++++++++++++ 4 files changed, 228 insertions(+), 1 deletion(-) create mode 100644 test/test_solve_shifted_system.jl diff --git a/README.md b/README.md index 7a0d2686..ccc4f201 100644 --- a/README.md +++ b/README.md @@ -78,6 +78,7 @@ Function | Description `size` | Return the size of a linear operator `symmetric` | Determine whether the operator is symmetric `normest` | Estimate the 2-norm +`solve_shifted_system!` | Computes the regularized L-BFGS step ## Other Operations on Operators diff --git a/src/utilities.jl b/src/utilities.jl index 81f953f7..48550f76 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -1,4 +1,4 @@ -export check_ctranspose, check_hermitian, check_positive_definite, normest +export check_ctranspose, check_hermitian, check_positive_definite, normest, solve_shifted_system! """ normest(S) estimates the matrix 2-norm of S. @@ -145,3 +145,96 @@ end check_positive_definite(M::AbstractMatrix; kwargs...) = check_positive_definite(LinearOperator(M); kwargs...) + + + """ + solve_shifted_system!(op::LBFGSOperator{T,I,F1,F2,F3}, + z::AbstractVector{T}, + σ::T, + γ_inv::T, + inv_Cz::AbstractVector{T}, + p::AbstractVector{T}, + v::AbstractVector{T}, + u::AbstractVector{T}) where {T,I,F1,F2,F3} + +Computes the regularized L-BFGS step by solving the linear system: + + (B_k + σ_k * I) s = -∇f(x_k) + +where `B_k` is the L-BFGS approximation of the Hessian, `σ_k` is a regularization parameter, and `∇f(x_k)` is the gradient at `x_k`. + +### Parameters +- `op::LBFGSOperator{T,I,F1,F2,F3}`: The L-BFGS operator `B_k`. Encodes the curvature information used to approximate the Hessian. +- `z::AbstractVector{T}`: The vector representing `-∇f(x_k)` (negative gradient at the current iterate). +- `σ::T`: The regularization parameter, used to shift the Hessian approximation `B_k` by adding a multiple of the identity matrix. +- `γ_inv::T`: The inverse of the initial curvature `γ_0`, used to initialize the L-BFGS matrix. +- `inv_Cz::AbstractVector{T}`: A preallocated vector used to store the result of the solution. It will be overwritten in the function to hold the computed `s`. +- `p::AbstractVector{T}`: A temporary matrix used in the computation to avoid allocating new memory during the solve process. +- `v::AbstractVector{T}`: A temporary vector for intermediate values during the solution process. +- `u::AbstractVector{T}`: A temporary vector that stores elements of the L-BFGS vectors `a_k` or `b_k` used in the algorithm. + +### Returns +- `inv_Cz::AbstractVector{T}`: The solution vector `s` such that `(B_k + σ * I) s = -∇f(x_k)`. + +### Method +The function solves the system efficiently without allocating new memory, by reusing preallocated arrays `inv_Cz`, `p`, `v`, and `u`. It computes the solution iteratively, making use of the structure of the L-BFGS approximation to the Hessian. + +The method uses a two-loop recursion-like approach with modifications to handle the regularization term `σ`. The initial inverse Hessian approximation `B_0` is initialized using `γ_inv`. + +The solution is computed on the CPU, but the function can be extended for GPU compatibility. + +### Notes +- `data.a[k]` and `data.b[k]` represent stored L-BFGS vectors used in the computation of `B_k`. They are loaded into `u` and updated in each iteration. +- The computation involves vector dot products and matrix updates which are efficiently performed in place. + +### To-Do +- Implement GPU support for further acceleration. + +### References +@misc{erway2013shiftedlbfgssystems, + title={Shifted L-BFGS Systems}, + author={Jennifer B. Erway and Vibhor Jain and Roummel F. Marcia}, + year={2013}, + eprint={1209.5141}, + archivePrefix={arXiv}, + primaryClass={math.NA}, + url={https://arxiv.org/abs/1209.5141}, +} +""" + +function solve_shifted_system!( + op::LBFGSOperator{T, I, F1, F2, F3}, + z::AbstractVector{T}, + σ::T, + γ_inv::T, + inv_Cz::AbstractVector{T}, + p::AbstractArray{T}, + v::AbstractVector{T}, + u::AbstractVector{T}, + ) where {T, I, F1, F2, F3} + data = op.data + insert = data.insert + + inv_c0 = 1 / (γ_inv + σ) + @. inv_Cz = inv_c0 * z + + max_i = 2 * data.mem + for i = 1:max_i + j = (i + 1) ÷ 2 + k = mod(insert + j - 1, data.mem) + 1 + u .= ((i % 2) == 0 ? data.b[k] : data.a[k]) + + @. p[:, i] = inv_c0 * u + + for t = 1:(i - 1) + c0 = dot(view(p, :, t), u) + c1 = (-1)^(t + 1) .* v[t] + c2 = c1 * c0 + view(p, :, i) .+= c2 .* view(p, :, t) + end + + v[i] = 1 / (1 + (-1)^i * dot(u, view(p, :, i))) + inv_Cz .+= (-1)^(i + 1) * v[i] * (view(p, :, i)' * z) .* view(p, :, i) + end + return inv_Cz +end diff --git a/test/runtests.jl b/test/runtests.jl index 57a4851c..eea61206 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -15,3 +15,4 @@ include("test_deprecated.jl") include("test_normest.jl") include("test_diag.jl") include("test_chainrules.jl") +include("test_solve_shifted_system.jl") \ No newline at end of file diff --git a/test/test_solve_shifted_system.jl b/test/test_solve_shifted_system.jl new file mode 100644 index 00000000..9654bdd2 --- /dev/null +++ b/test/test_solve_shifted_system.jl @@ -0,0 +1,132 @@ +using Test +using LinearOperators +using LinearAlgebra + +function setup_test_val(; M = 5, n = 100, scaling = false) + σ = 0.1 + B = LBFGSOperator(n, mem = 5, scaling = scaling) + s = ones(n) + y = ones(n) + + S = randn(n, M) + Y = randn(n, M) + + # make sure it is positive + for i = 1:M + if dot(S[:, i], Y[:, i]) < 0 + S[:, i] = -S[:, i] + end + end + for i = 1:M + s = S[:, i] + y = Y[:, i] + if dot(s, y) > 1.0e-20 + push!(B, s, y) + end + end + + γ_inv = 1 / B.data.scaling_factor + + x = randn(n) + z = B * x + σ .* x # so we know the true answer is x + + data = B.data + # Preallocate vectors for efficiency + p = zeros(size(data.a[1], 1), 2 * (data.mem)) + v = zeros(2 * (data.mem)) + u = zeros(size(data.a[1], 1)) + + return B, z, σ, γ_inv, zeros(n), p, v, u, x +end + +function test_solve_shifted_system() + @testset "solve_shifted_system! Default setup test" begin + # Setup Test Case 1: Default setup from setup_test_val + B, z, σ, γ_inv, inv_Cz, p, v, u, x = setup_test_val(n = 100, M = 5) + + result = solve_shifted_system!(B, z, σ, γ_inv, inv_Cz, p, v, u) + + # Test 1: Check if result is a vector of the same size as z + @test length(result) == length(z) + + # Test 2: Verify that inv_Cz (result) is modified in place + @test result === inv_Cz + + # Test 3: Check if the function produces finite values + @test all(isfinite, result) + + # Test 4: Check if inv_Cz is close to the known solution x + # x = B \ (z ./ (1 + σ)) # Known true solution + @test isapprox(inv_Cz, x, atol = 1e-6, rtol = 1e-6) + end + @testset "solve_shifted_system! Larger dimensional system tests" begin + + # Test Case 2: Larger dimensional system + dim = 10 + mem_size = 5 + B, z, σ, γ_inv, inv_Cz, p, v, u, x = setup_test_val(n = dim, M = mem_size) + + result = solve_shifted_system!(B, z, σ, γ_inv, inv_Cz, p, v, u) + + # Test 5: Check if result is a vector of the same size as z (larger case) + @test length(result) == length(z) + + # Test 6: Verify that inv_Cz is modified in place (larger case) + @test result === inv_Cz + + # Test 7: Check if the function produces finite values (larger case) + @test all(isfinite, result) + + # Test 8: Check if inv_Cz is close to the known solution x (larger case) + # x = B \ (z ./ (1 + σ)) + @test isapprox(inv_Cz, x, atol = 1e-6, rtol = 1e-6) + end + + @testset "solve_shifted_system! Minimal memory size test" begin + # Test Case 3: Minimal memory size case (memory size = 1) + dim = 4 + mem_size = 1 + B, z, σ, γ_inv, inv_Cz, p, v, u, x = setup_test_val(n = dim, M = mem_size) + + result = solve_shifted_system!(B, z, σ, γ_inv, inv_Cz, p, v, u) + + # Test 9: Check if result is a vector of the same size as z (minimal memory) + @test length(result) == length(z) + + # Test 10: Verify that inv_Cz is modified in place (minimal case) + @test result === inv_Cz + + # Test 11: Check if the function produces finite values (minimal case) + @test all(isfinite, result) + + # Test 12: Check if inv_Cz is close to the known solution x (minimal memory) + # x = B \ (z ./ (1 + σ)) + @test isapprox(inv_Cz, x, atol = 1e-6, rtol = 1e-6) + end + + @testset "solve_shifted_system! Extra large memory size test" begin + + # Test Case 4: Even larger system with more memory (case 4) + dim = 50 + mem_size = 10 + B, z, σ, γ_inv, inv_Cz, p, v, u, x = setup_test_val(n = dim, M = mem_size) + + # Call the function + result = solve_shifted_system!(B, z, σ, γ_inv, inv_Cz, p, v, u) + + # Test 13: Check if result is a vector of the same size as z (case 4) + @test length(result) == length(z) + + # Test 14: Verify that inv_Cz is modified in place (case 4) + @test result === inv_Cz + + # Test 15: Check if the function produces finite values (case 4) + @test all(isfinite, result) + + # Test 16: Check if inv_Cz is close to the known solution x (case 4) + # x = B \ (z ./ (1 + σ)) + @test isapprox(inv_Cz, x, atol = 1e-6, rtol = 1e-6) + end +end + +test_solve_shifted_system() From f95d210957ad819d04e9b29d693854273d9fc1a4 Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Mon, 23 Sep 2024 15:11:54 -0400 Subject: [PATCH 02/34] Update src/utilities.jl Co-authored-by: Dominique --- src/utilities.jl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/utilities.jl b/src/utilities.jl index 48550f76..2a362059 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -183,10 +183,6 @@ The method uses a two-loop recursion-like approach with modifications to handle The solution is computed on the CPU, but the function can be extended for GPU compatibility. -### Notes -- `data.a[k]` and `data.b[k]` represent stored L-BFGS vectors used in the computation of `B_k`. They are loaded into `u` and updated in each iteration. -- The computation involves vector dot products and matrix updates which are efficiently performed in place. - ### To-Do - Implement GPU support for further acceleration. From bac9f6f6a24c2bae1da4823862ccaa9199a1a863 Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Mon, 23 Sep 2024 15:12:14 -0400 Subject: [PATCH 03/34] Update src/utilities.jl Co-authored-by: Dominique --- src/utilities.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utilities.jl b/src/utilities.jl index 2a362059..bbec6ed6 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -166,7 +166,7 @@ where `B_k` is the L-BFGS approximation of the Hessian, `σ_k` is a regularizati ### Parameters - `op::LBFGSOperator{T,I,F1,F2,F3}`: The L-BFGS operator `B_k`. Encodes the curvature information used to approximate the Hessian. - `z::AbstractVector{T}`: The vector representing `-∇f(x_k)` (negative gradient at the current iterate). -- `σ::T`: The regularization parameter, used to shift the Hessian approximation `B_k` by adding a multiple of the identity matrix. +- `σ::T`: Nonnegative shift. - `γ_inv::T`: The inverse of the initial curvature `γ_0`, used to initialize the L-BFGS matrix. - `inv_Cz::AbstractVector{T}`: A preallocated vector used to store the result of the solution. It will be overwritten in the function to hold the computed `s`. - `p::AbstractVector{T}`: A temporary matrix used in the computation to avoid allocating new memory during the solve process. From 62d35f6a0fd8f8e872474460d46464cc5e3650d2 Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Mon, 23 Sep 2024 15:12:59 -0400 Subject: [PATCH 04/34] Update src/utilities.jl Co-authored-by: Dominique --- src/utilities.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utilities.jl b/src/utilities.jl index bbec6ed6..fcd2d88e 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -164,6 +164,7 @@ Computes the regularized L-BFGS step by solving the linear system: where `B_k` is the L-BFGS approximation of the Hessian, `σ_k` is a regularization parameter, and `∇f(x_k)` is the gradient at `x_k`. ### Parameters + - `op::LBFGSOperator{T,I,F1,F2,F3}`: The L-BFGS operator `B_k`. Encodes the curvature information used to approximate the Hessian. - `z::AbstractVector{T}`: The vector representing `-∇f(x_k)` (negative gradient at the current iterate). - `σ::T`: Nonnegative shift. From 186b6c50c60b96f1dff495ccb526b0cd032e0b2d Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Mon, 23 Sep 2024 15:13:12 -0400 Subject: [PATCH 05/34] Update src/utilities.jl Co-authored-by: Dominique --- src/utilities.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utilities.jl b/src/utilities.jl index fcd2d88e..6f505811 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -175,6 +175,7 @@ where `B_k` is the L-BFGS approximation of the Hessian, `σ_k` is a regularizati - `u::AbstractVector{T}`: A temporary vector that stores elements of the L-BFGS vectors `a_k` or `b_k` used in the algorithm. ### Returns + - `inv_Cz::AbstractVector{T}`: The solution vector `s` such that `(B_k + σ * I) s = -∇f(x_k)`. ### Method From 0d4f9526dbf2adad3ec5ee02feeacaf234f9e86c Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Mon, 23 Sep 2024 15:13:30 -0400 Subject: [PATCH 06/34] Update src/utilities.jl Co-authored-by: Dominique --- src/utilities.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utilities.jl b/src/utilities.jl index 6f505811..21e2253c 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -179,6 +179,7 @@ where `B_k` is the L-BFGS approximation of the Hessian, `σ_k` is a regularizati - `inv_Cz::AbstractVector{T}`: The solution vector `s` such that `(B_k + σ * I) s = -∇f(x_k)`. ### Method + The function solves the system efficiently without allocating new memory, by reusing preallocated arrays `inv_Cz`, `p`, `v`, and `u`. It computes the solution iteratively, making use of the structure of the L-BFGS approximation to the Hessian. The method uses a two-loop recursion-like approach with modifications to handle the regularization term `σ`. The initial inverse Hessian approximation `B_0` is initialized using `γ_inv`. From 4672bd32ab908420efe218793113fc6d8edeba6a Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Mon, 23 Sep 2024 15:14:58 -0400 Subject: [PATCH 07/34] Update src/utilities.jl Co-authored-by: Dominique --- src/utilities.jl | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/utilities.jl b/src/utilities.jl index 21e2253c..fe8707d9 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -186,9 +186,6 @@ The method uses a two-loop recursion-like approach with modifications to handle The solution is computed on the CPU, but the function can be extended for GPU compatibility. -### To-Do -- Implement GPU support for further acceleration. - ### References @misc{erway2013shiftedlbfgssystems, title={Shifted L-BFGS Systems}, From f3ea7387bd79441739ff78d5668ce24d2b79053c Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Mon, 23 Sep 2024 15:15:21 -0400 Subject: [PATCH 08/34] Update src/utilities.jl Co-authored-by: Dominique --- src/utilities.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utilities.jl b/src/utilities.jl index fe8707d9..ee85b2b7 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -182,7 +182,7 @@ where `B_k` is the L-BFGS approximation of the Hessian, `σ_k` is a regularizati The function solves the system efficiently without allocating new memory, by reusing preallocated arrays `inv_Cz`, `p`, `v`, and `u`. It computes the solution iteratively, making use of the structure of the L-BFGS approximation to the Hessian. -The method uses a two-loop recursion-like approach with modifications to handle the regularization term `σ`. The initial inverse Hessian approximation `B_0` is initialized using `γ_inv`. +The method uses a two-loop recursion-like approach with modifications to handle the shift `σ`. The solution is computed on the CPU, but the function can be extended for GPU compatibility. From 4bf8f55fb96d1e713d7808f43b9dfec5ddec40ec Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Mon, 23 Sep 2024 15:15:32 -0400 Subject: [PATCH 09/34] Update src/utilities.jl Co-authored-by: Dominique --- src/utilities.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/utilities.jl b/src/utilities.jl index ee85b2b7..bf373274 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -180,8 +180,6 @@ where `B_k` is the L-BFGS approximation of the Hessian, `σ_k` is a regularizati ### Method -The function solves the system efficiently without allocating new memory, by reusing preallocated arrays `inv_Cz`, `p`, `v`, and `u`. It computes the solution iteratively, making use of the structure of the L-BFGS approximation to the Hessian. - The method uses a two-loop recursion-like approach with modifications to handle the shift `σ`. The solution is computed on the CPU, but the function can be extended for GPU compatibility. From 025645edf6ccda204b235611934d1ac97eda158f Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Mon, 23 Sep 2024 15:16:13 -0400 Subject: [PATCH 10/34] Update src/utilities.jl Co-authored-by: Dominique --- src/utilities.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/utilities.jl b/src/utilities.jl index bf373274..8d87d494 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -182,8 +182,6 @@ where `B_k` is the L-BFGS approximation of the Hessian, `σ_k` is a regularizati The method uses a two-loop recursion-like approach with modifications to handle the shift `σ`. -The solution is computed on the CPU, but the function can be extended for GPU compatibility. - ### References @misc{erway2013shiftedlbfgssystems, title={Shifted L-BFGS Systems}, From 94fb35f45b442a289f83f33405a5d77d8da18962 Mon Sep 17 00:00:00 2001 From: farhadrclass <31899325+farhadrclass@users.noreply.github.com> Date: Mon, 23 Sep 2024 16:32:25 -0400 Subject: [PATCH 11/34] changes after review --- README.md | 2 +- src/lbfgs.jl | 6 ++ src/utilities.jl | 43 ++++--------- test/test_solve_shifted_system.jl | 102 +++--------------------------- 4 files changed, 27 insertions(+), 126 deletions(-) diff --git a/README.md b/README.md index ccc4f201..1bfe2189 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ Function | Description `size` | Return the size of a linear operator `symmetric` | Determine whether the operator is symmetric `normest` | Estimate the 2-norm -`solve_shifted_system!` | Computes the regularized L-BFGS step +`solve_shifted_system!` | Solves linear system $(B + \sigma I) x = b$, where $B$ is a forward L-BFGS operator and $\sigma \geq 0$. ## Other Operations on Operators diff --git a/src/lbfgs.jl b/src/lbfgs.jl index f03cad23..185840d3 100644 --- a/src/lbfgs.jl +++ b/src/lbfgs.jl @@ -16,6 +16,9 @@ mutable struct LBFGSData{T, I <: Integer} b::Vector{Vector{T}} insert::I Ax::Vector{T} + shifted_p::AbstractArray{T} # Temporary matrix used in the computation solve_shifted_system! + shifted_v::Vector{T} + shifted_u::Vector{T} end function LBFGSData( @@ -43,6 +46,9 @@ function LBFGSData( inverse ? Vector{T}(undef, 0) : [zeros(T, n) for _ = 1:mem], 1, Vector{T}(undef, n), + zeros(T, n, 2*mem), + zeros(T,2*mem), + zeros(T,n) ) end diff --git a/src/utilities.jl b/src/utilities.jl index 8d87d494..efcfef10 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -157,22 +157,14 @@ check_positive_definite(M::AbstractMatrix; kwargs...) = v::AbstractVector{T}, u::AbstractVector{T}) where {T,I,F1,F2,F3} -Computes the regularized L-BFGS step by solving the linear system: - - (B_k + σ_k * I) s = -∇f(x_k) - -where `B_k` is the L-BFGS approximation of the Hessian, `σ_k` is a regularization parameter, and `∇f(x_k)` is the gradient at `x_k`. +Solves linear system (B + σI) x = b, where B is a forward L-BFGS operator and σ <= 0. ### Parameters - `op::LBFGSOperator{T,I,F1,F2,F3}`: The L-BFGS operator `B_k`. Encodes the curvature information used to approximate the Hessian. -- `z::AbstractVector{T}`: The vector representing `-∇f(x_k)` (negative gradient at the current iterate). +- `z::AbstractVector{T}`: The vector representing `-b`. - `σ::T`: Nonnegative shift. -- `γ_inv::T`: The inverse of the initial curvature `γ_0`, used to initialize the L-BFGS matrix. - `inv_Cz::AbstractVector{T}`: A preallocated vector used to store the result of the solution. It will be overwritten in the function to hold the computed `s`. -- `p::AbstractVector{T}`: A temporary matrix used in the computation to avoid allocating new memory during the solve process. -- `v::AbstractVector{T}`: A temporary vector for intermediate values during the solution process. -- `u::AbstractVector{T}`: A temporary vector that stores elements of the L-BFGS vectors `a_k` or `b_k` used in the algorithm. ### Returns @@ -183,30 +175,19 @@ where `B_k` is the L-BFGS approximation of the Hessian, `σ_k` is a regularizati The method uses a two-loop recursion-like approach with modifications to handle the shift `σ`. ### References -@misc{erway2013shiftedlbfgssystems, - title={Shifted L-BFGS Systems}, - author={Jennifer B. Erway and Vibhor Jain and Roummel F. Marcia}, - year={2013}, - eprint={1209.5141}, - archivePrefix={arXiv}, - primaryClass={math.NA}, - url={https://arxiv.org/abs/1209.5141}, -} +Erway, J. B., Jain, V., & Marcia, R. F. (2013). Shifted L-BFGS Systems. """ function solve_shifted_system!( op::LBFGSOperator{T, I, F1, F2, F3}, z::AbstractVector{T}, σ::T, - γ_inv::T, inv_Cz::AbstractVector{T}, - p::AbstractArray{T}, - v::AbstractVector{T}, - u::AbstractVector{T}, ) where {T, I, F1, F2, F3} data = op.data insert = data.insert - + + γ_inv = 1 / data.scaling_factor inv_c0 = 1 / (γ_inv + σ) @. inv_Cz = inv_c0 * z @@ -214,19 +195,19 @@ function solve_shifted_system!( for i = 1:max_i j = (i + 1) ÷ 2 k = mod(insert + j - 1, data.mem) + 1 - u .= ((i % 2) == 0 ? data.b[k] : data.a[k]) + data.shifted_u .= ((i % 2) == 0 ? data.b[k] : data.a[k]) - @. p[:, i] = inv_c0 * u + @. data.shifted_p[:, i] = inv_c0 * data.shifted_u for t = 1:(i - 1) - c0 = dot(view(p, :, t), u) - c1 = (-1)^(t + 1) .* v[t] + c0 = dot(view(data.shifted_p, :, t), data.shifted_u) + c1= ((t % 2) == 0 ? -1 : 1) .*data.shifted_v[t] c2 = c1 * c0 - view(p, :, i) .+= c2 .* view(p, :, t) + view(data.shifted_p, :, i) .+= c2 .* view(data.shifted_p, :, t) end - v[i] = 1 / (1 + (-1)^i * dot(u, view(p, :, i))) - inv_Cz .+= (-1)^(i + 1) * v[i] * (view(p, :, i)' * z) .* view(p, :, i) + data.shifted_v[i] = 1 / (1 + ((i % 2) == 0 ? 1 : -1) * dot(data.shifted_u, view(data.shifted_p, :, i))) + inv_Cz .+= ((i % 2) == 0 ? -1 : 1) *data.shifted_v[i] * (view(data.shifted_p, :, i)' * z) .* view(data.shifted_p, :, i) end return inv_Cz end diff --git a/test/test_solve_shifted_system.jl b/test/test_solve_shifted_system.jl index 9654bdd2..679ee2df 100644 --- a/test/test_solve_shifted_system.jl +++ b/test/test_solve_shifted_system.jl @@ -5,46 +5,27 @@ using LinearAlgebra function setup_test_val(; M = 5, n = 100, scaling = false) σ = 0.1 B = LBFGSOperator(n, mem = 5, scaling = scaling) - s = ones(n) - y = ones(n) - S = randn(n, M) - Y = randn(n, M) - - # make sure it is positive - for i = 1:M - if dot(S[:, i], Y[:, i]) < 0 - S[:, i] = -S[:, i] - end - end - for i = 1:M - s = S[:, i] - y = Y[:, i] - if dot(s, y) > 1.0e-20 - push!(B, s, y) - end + for _ = 1:10 + s = rand(n) + y = rand(n) + push!(B, s, y) end - γ_inv = 1 / B.data.scaling_factor + # γ_inv = 1 / B.data.scaling_factor x = randn(n) z = B * x + σ .* x # so we know the true answer is x - data = B.data - # Preallocate vectors for efficiency - p = zeros(size(data.a[1], 1), 2 * (data.mem)) - v = zeros(2 * (data.mem)) - u = zeros(size(data.a[1], 1)) - - return B, z, σ, γ_inv, zeros(n), p, v, u, x + return B, z, σ, zeros(n), x end function test_solve_shifted_system() @testset "solve_shifted_system! Default setup test" begin # Setup Test Case 1: Default setup from setup_test_val - B, z, σ, γ_inv, inv_Cz, p, v, u, x = setup_test_val(n = 100, M = 5) + B, z, σ, inv_Cz, x = setup_test_val(n = 100, M = 5) - result = solve_shifted_system!(B, z, σ, γ_inv, inv_Cz, p, v, u) + result = solve_shifted_system!(B, z, σ, inv_Cz) # Test 1: Check if result is a vector of the same size as z @test length(result) == length(z) @@ -59,74 +40,7 @@ function test_solve_shifted_system() # x = B \ (z ./ (1 + σ)) # Known true solution @test isapprox(inv_Cz, x, atol = 1e-6, rtol = 1e-6) end - @testset "solve_shifted_system! Larger dimensional system tests" begin - - # Test Case 2: Larger dimensional system - dim = 10 - mem_size = 5 - B, z, σ, γ_inv, inv_Cz, p, v, u, x = setup_test_val(n = dim, M = mem_size) - - result = solve_shifted_system!(B, z, σ, γ_inv, inv_Cz, p, v, u) - - # Test 5: Check if result is a vector of the same size as z (larger case) - @test length(result) == length(z) - - # Test 6: Verify that inv_Cz is modified in place (larger case) - @test result === inv_Cz - - # Test 7: Check if the function produces finite values (larger case) - @test all(isfinite, result) - - # Test 8: Check if inv_Cz is close to the known solution x (larger case) - # x = B \ (z ./ (1 + σ)) - @test isapprox(inv_Cz, x, atol = 1e-6, rtol = 1e-6) - end - - @testset "solve_shifted_system! Minimal memory size test" begin - # Test Case 3: Minimal memory size case (memory size = 1) - dim = 4 - mem_size = 1 - B, z, σ, γ_inv, inv_Cz, p, v, u, x = setup_test_val(n = dim, M = mem_size) - - result = solve_shifted_system!(B, z, σ, γ_inv, inv_Cz, p, v, u) - - # Test 9: Check if result is a vector of the same size as z (minimal memory) - @test length(result) == length(z) - - # Test 10: Verify that inv_Cz is modified in place (minimal case) - @test result === inv_Cz - - # Test 11: Check if the function produces finite values (minimal case) - @test all(isfinite, result) - - # Test 12: Check if inv_Cz is close to the known solution x (minimal memory) - # x = B \ (z ./ (1 + σ)) - @test isapprox(inv_Cz, x, atol = 1e-6, rtol = 1e-6) - end - - @testset "solve_shifted_system! Extra large memory size test" begin - - # Test Case 4: Even larger system with more memory (case 4) - dim = 50 - mem_size = 10 - B, z, σ, γ_inv, inv_Cz, p, v, u, x = setup_test_val(n = dim, M = mem_size) - # Call the function - result = solve_shifted_system!(B, z, σ, γ_inv, inv_Cz, p, v, u) - - # Test 13: Check if result is a vector of the same size as z (case 4) - @test length(result) == length(z) - - # Test 14: Verify that inv_Cz is modified in place (case 4) - @test result === inv_Cz - - # Test 15: Check if the function produces finite values (case 4) - @test all(isfinite, result) - - # Test 16: Check if inv_Cz is close to the known solution x (case 4) - # x = B \ (z ./ (1 + σ)) - @test isapprox(inv_Cz, x, atol = 1e-6, rtol = 1e-6) - end end test_solve_shifted_system() From ded60c19a19649b29f80e4b11dc791e4b088ed02 Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Sat, 28 Sep 2024 19:21:00 -0400 Subject: [PATCH 12/34] Update src/lbfgs.jl Co-authored-by: Dominique --- src/lbfgs.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lbfgs.jl b/src/lbfgs.jl index 185840d3..4d985bab 100644 --- a/src/lbfgs.jl +++ b/src/lbfgs.jl @@ -16,7 +16,7 @@ mutable struct LBFGSData{T, I <: Integer} b::Vector{Vector{T}} insert::I Ax::Vector{T} - shifted_p::AbstractArray{T} # Temporary matrix used in the computation solve_shifted_system! + shifted_p::Matrix{T} # Temporary matrix used in the computation solve_shifted_system! shifted_v::Vector{T} shifted_u::Vector{T} end From e5b054addbcf5451d08e80cfe2f8db192da3cc1b Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Sat, 28 Sep 2024 19:21:11 -0400 Subject: [PATCH 13/34] Update src/lbfgs.jl Co-authored-by: Dominique --- src/lbfgs.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lbfgs.jl b/src/lbfgs.jl index 4d985bab..319b486b 100644 --- a/src/lbfgs.jl +++ b/src/lbfgs.jl @@ -47,8 +47,8 @@ function LBFGSData( 1, Vector{T}(undef, n), zeros(T, n, 2*mem), - zeros(T,2*mem), - zeros(T,n) + zeros(T, 2*mem), + zeros(T, n) ) end From 6c40d0a1d3b1f91f31a85435d094ac70f6ae945f Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Sat, 28 Sep 2024 19:21:34 -0400 Subject: [PATCH 14/34] Update src/utilities.jl Co-authored-by: Dominique --- src/utilities.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utilities.jl b/src/utilities.jl index efcfef10..b9d2ee87 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -161,7 +161,7 @@ Solves linear system (B + σI) x = b, where B is a forward L-BFGS operator and ### Parameters -- `op::LBFGSOperator{T,I,F1,F2,F3}`: The L-BFGS operator `B_k`. Encodes the curvature information used to approximate the Hessian. +- `B::LBFGSOperator{T,I,F1,F2,F3}`: forward L-BFGS operator. - `z::AbstractVector{T}`: The vector representing `-b`. - `σ::T`: Nonnegative shift. - `inv_Cz::AbstractVector{T}`: A preallocated vector used to store the result of the solution. It will be overwritten in the function to hold the computed `s`. From 773f2fd014c5ca922352a7ef155165682c831453 Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Sat, 28 Sep 2024 19:21:47 -0400 Subject: [PATCH 15/34] Update src/utilities.jl Co-authored-by: Dominique --- src/utilities.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utilities.jl b/src/utilities.jl index b9d2ee87..1890720b 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -157,7 +157,7 @@ check_positive_definite(M::AbstractMatrix; kwargs...) = v::AbstractVector{T}, u::AbstractVector{T}) where {T,I,F1,F2,F3} -Solves linear system (B + σI) x = b, where B is a forward L-BFGS operator and σ <= 0. +Solves linear system (B + σI) x = b, where B is a forward L-BFGS operator and σ ≥ 0. ### Parameters From 13162dcdea9ef8b6b57672cb79ccf3ffff22b98a Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Sat, 28 Sep 2024 19:22:21 -0400 Subject: [PATCH 16/34] Update src/utilities.jl Co-authored-by: Dominique --- src/utilities.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utilities.jl b/src/utilities.jl index 1890720b..cda08cc0 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -164,7 +164,7 @@ Solves linear system (B + σI) x = b, where B is a forward L-BFGS operator and - `B::LBFGSOperator{T,I,F1,F2,F3}`: forward L-BFGS operator. - `z::AbstractVector{T}`: The vector representing `-b`. - `σ::T`: Nonnegative shift. -- `inv_Cz::AbstractVector{T}`: A preallocated vector used to store the result of the solution. It will be overwritten in the function to hold the computed `s`. +- `inv_Cz::AbstractVector{T}`: A preallocated vector used to store the solution. ### Returns From 2cf42466f1dd95d2a3241e34d9ea08306c876834 Mon Sep 17 00:00:00 2001 From: farhadrclass <31899325+farhadrclass@users.noreply.github.com> Date: Wed, 2 Oct 2024 09:06:04 -0400 Subject: [PATCH 17/34] updated according to the PR review --- src/utilities.jl | 46 +++++++++++++++++-------------- test/test_solve_shifted_system.jl | 3 -- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/src/utilities.jl b/src/utilities.jl index cda08cc0..25a759b2 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -148,50 +148,51 @@ check_positive_definite(M::AbstractMatrix; kwargs...) = """ - solve_shifted_system!(op::LBFGSOperator{T,I,F1,F2,F3}, - z::AbstractVector{T}, - σ::T, - γ_inv::T, - inv_Cz::AbstractVector{T}, - p::AbstractVector{T}, - v::AbstractVector{T}, - u::AbstractVector{T}) where {T,I,F1,F2,F3} + solve_shifted_system!(B, + x, + σ, + γ_inv, + inv_Cx, + p, + v, + u) Solves linear system (B + σI) x = b, where B is a forward L-BFGS operator and σ ≥ 0. ### Parameters - `B::LBFGSOperator{T,I,F1,F2,F3}`: forward L-BFGS operator. -- `z::AbstractVector{T}`: The vector representing `-b`. +- `x::AbstractVector{T}`: The vector representing `-b`. - `σ::T`: Nonnegative shift. -- `inv_Cz::AbstractVector{T}`: A preallocated vector used to store the solution. +- `inv_Cx::AbstractVector{T}`: A preallocated vector used to store the solution. ### Returns -- `inv_Cz::AbstractVector{T}`: The solution vector `s` such that `(B_k + σ * I) s = -∇f(x_k)`. +- `inv_Cx::AbstractVector{T}`: The solution vector `x`, the solution to `(B + σI) x = b`. ### Method The method uses a two-loop recursion-like approach with modifications to handle the shift `σ`. ### References -Erway, J. B., Jain, V., & Marcia, R. F. (2013). Shifted L-BFGS Systems. +Erway, J. B., Jain, V., & Marcia, R. F. (2014). Shifted L-BFGS Systems. Optimization Methods and Software, 29(5), 992-1004. """ function solve_shifted_system!( - op::LBFGSOperator{T, I, F1, F2, F3}, - z::AbstractVector{T}, + B::LBFGSOperator{T, I, F1, F2, F3}, + x::AbstractVector{T}, σ::T, - inv_Cz::AbstractVector{T}, + inv_Cx::AbstractVector{T}, ) where {T, I, F1, F2, F3} - data = op.data + data = B.data insert = data.insert γ_inv = 1 / data.scaling_factor inv_c0 = 1 / (γ_inv + σ) - @. inv_Cz = inv_c0 * z + @. inv_Cx = inv_c0 * x max_i = 2 * data.mem + sign_i = 1 for i = 1:max_i j = (i + 1) ÷ 2 k = mod(insert + j - 1, data.mem) + 1 @@ -199,15 +200,18 @@ function solve_shifted_system!( @. data.shifted_p[:, i] = inv_c0 * data.shifted_u + sign_t = 1 for t = 1:(i - 1) c0 = dot(view(data.shifted_p, :, t), data.shifted_u) - c1= ((t % 2) == 0 ? -1 : 1) .*data.shifted_v[t] + c1= sign_t .*data.shifted_v[t] c2 = c1 * c0 view(data.shifted_p, :, i) .+= c2 .* view(data.shifted_p, :, t) + sign_t = -sign_t end - data.shifted_v[i] = 1 / (1 + ((i % 2) == 0 ? 1 : -1) * dot(data.shifted_u, view(data.shifted_p, :, i))) - inv_Cz .+= ((i % 2) == 0 ? -1 : 1) *data.shifted_v[i] * (view(data.shifted_p, :, i)' * z) .* view(data.shifted_p, :, i) + data.shifted_v[i] = 1 / (1 + ((i % 2) == 0 ? 1 : -1) * dot(data.shifted_u, view(data.shifted_p, :, i))) + inv_Cx .+= sign_i *data.shifted_v[i] * (view(data.shifted_p, :, i)' * x) .* view(data.shifted_p, :, i) + sign_i = -sign_i end - return inv_Cz + return inv_Cx end diff --git a/test/test_solve_shifted_system.jl b/test/test_solve_shifted_system.jl index 679ee2df..656d94bc 100644 --- a/test/test_solve_shifted_system.jl +++ b/test/test_solve_shifted_system.jl @@ -12,8 +12,6 @@ function setup_test_val(; M = 5, n = 100, scaling = false) push!(B, s, y) end - # γ_inv = 1 / B.data.scaling_factor - x = randn(n) z = B * x + σ .* x # so we know the true answer is x @@ -37,7 +35,6 @@ function test_solve_shifted_system() @test all(isfinite, result) # Test 4: Check if inv_Cz is close to the known solution x - # x = B \ (z ./ (1 + σ)) # Known true solution @test isapprox(inv_Cz, x, atol = 1e-6, rtol = 1e-6) end From 5c1eeaed8c37cd4a45194ec49f38b93677bb3bde Mon Sep 17 00:00:00 2001 From: farhadrclass <31899325+farhadrclass@users.noreply.github.com> Date: Wed, 2 Oct 2024 10:49:05 -0400 Subject: [PATCH 18/34] updated according to PR, renamed variable --- src/lbfgs.jl | 6 ++--- src/utilities.jl | 40 +++++++++++++++---------------- test/test_solve_shifted_system.jl | 12 +++++----- 3 files changed, 28 insertions(+), 30 deletions(-) diff --git a/src/lbfgs.jl b/src/lbfgs.jl index 319b486b..067f76e0 100644 --- a/src/lbfgs.jl +++ b/src/lbfgs.jl @@ -46,9 +46,9 @@ function LBFGSData( inverse ? Vector{T}(undef, 0) : [zeros(T, n) for _ = 1:mem], 1, Vector{T}(undef, n), - zeros(T, n, 2*mem), - zeros(T, 2*mem), - zeros(T, n) + Array{T}(undef, (n, 2*mem)), + Vector{T}(undef, 2*mem), + Vector{T}(undef, n) ) end diff --git a/src/utilities.jl b/src/utilities.jl index 25a759b2..c0a8c8c4 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -149,26 +149,23 @@ check_positive_definite(M::AbstractMatrix; kwargs...) = """ solve_shifted_system!(B, + z, + σ, x, - σ, - γ_inv, - inv_Cx, - p, - v, - u) + ) -Solves linear system (B + σI) x = b, where B is a forward L-BFGS operator and σ ≥ 0. +Solves linear system (B + σI) x = z, where B is a forward L-BFGS operator and σ ≥ 0. ### Parameters -- `B::LBFGSOperator{T,I,F1,F2,F3}`: forward L-BFGS operator. -- `x::AbstractVector{T}`: The vector representing `-b`. -- `σ::T`: Nonnegative shift. -- `inv_Cx::AbstractVector{T}`: A preallocated vector used to store the solution. +- `B::LBFGSOperator{T,I,F1,F2,F3}`: forward L-BFGS operator, a LinearOperator that models a matrix of dimension nxn; +- `z::AbstractVector{T}`: The vector of length n. +- `σ::T`: Nonnegative shift scalar. +- `x::AbstractVector{T}`: A preallocated vector a vector of length n that is used to store the solution x. ### Returns -- `inv_Cx::AbstractVector{T}`: The solution vector `x`, the solution to `(B + σI) x = b`. +- `x::AbstractVector{T}`: The solution vector `x` size n, the solution to `(B + σI) x = b`. ### Method @@ -180,25 +177,26 @@ Erway, J. B., Jain, V., & Marcia, R. F. (2014). Shifted L-BFGS Systems. Optimiza function solve_shifted_system!( B::LBFGSOperator{T, I, F1, F2, F3}, - x::AbstractVector{T}, + z::AbstractVector{T}, σ::T, - inv_Cx::AbstractVector{T}, + x::AbstractVector{T}, ) where {T, I, F1, F2, F3} data = B.data insert = data.insert γ_inv = 1 / data.scaling_factor - inv_c0 = 1 / (γ_inv + σ) - @. inv_Cx = inv_c0 * x + x_0 = 1 / (γ_inv + σ) + @. x = x_0 * z max_i = 2 * data.mem sign_i = 1 + for i = 1:max_i j = (i + 1) ÷ 2 k = mod(insert + j - 1, data.mem) + 1 - data.shifted_u .= ((i % 2) == 0 ? data.b[k] : data.a[k]) + data.shifted_u .= ((sign_i == -1) ? data.b[k] : data.a[k]) - @. data.shifted_p[:, i] = inv_c0 * data.shifted_u + @. data.shifted_p[:, i] = x_0 * data.shifted_u sign_t = 1 for t = 1:(i - 1) @@ -209,9 +207,9 @@ function solve_shifted_system!( sign_t = -sign_t end - data.shifted_v[i] = 1 / (1 + ((i % 2) == 0 ? 1 : -1) * dot(data.shifted_u, view(data.shifted_p, :, i))) - inv_Cx .+= sign_i *data.shifted_v[i] * (view(data.shifted_p, :, i)' * x) .* view(data.shifted_p, :, i) + data.shifted_v[i] = 1 / (1 - sign_i * dot(data.shifted_u, view(data.shifted_p, :, i))) + x .+= sign_i *data.shifted_v[i] * (view(data.shifted_p, :, i)' * z) .* view(data.shifted_p, :, i) sign_i = -sign_i end - return inv_Cx + return x end diff --git a/test/test_solve_shifted_system.jl b/test/test_solve_shifted_system.jl index 656d94bc..26929219 100644 --- a/test/test_solve_shifted_system.jl +++ b/test/test_solve_shifted_system.jl @@ -21,21 +21,21 @@ end function test_solve_shifted_system() @testset "solve_shifted_system! Default setup test" begin # Setup Test Case 1: Default setup from setup_test_val - B, z, σ, inv_Cz, x = setup_test_val(n = 100, M = 5) + B, z, σ, x_sol, x_true = setup_test_val(n = 100, M = 5) - result = solve_shifted_system!(B, z, σ, inv_Cz) + result = solve_shifted_system!(B, z, σ, x_sol) # Test 1: Check if result is a vector of the same size as z @test length(result) == length(z) - # Test 2: Verify that inv_Cz (result) is modified in place - @test result === inv_Cz + # Test 2: Verify that x_sol (result) is modified in place + @test result === x_sol # Test 3: Check if the function produces finite values @test all(isfinite, result) - # Test 4: Check if inv_Cz is close to the known solution x - @test isapprox(inv_Cz, x, atol = 1e-6, rtol = 1e-6) + # Test 4: Check if x_sol is close to the known solution x + @test isapprox(x_sol, x_true, atol = 1e-6, rtol = 1e-6) end end From 1323a983afb9e9d9ea35e1abdc2a8201e42d2f3d Mon Sep 17 00:00:00 2001 From: farhadrclass <31899325+farhadrclass@users.noreply.github.com> Date: Wed, 2 Oct 2024 20:02:05 -0400 Subject: [PATCH 19/34] added ldiv! --- src/utilities.jl | 68 +++++++++++++++++++++++++------ test/test_solve_shifted_system.jl | 40 ++++++++++++++---- 2 files changed, 88 insertions(+), 20 deletions(-) diff --git a/src/utilities.jl b/src/utilities.jl index c0a8c8c4..46eae9e8 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -1,4 +1,5 @@ -export check_ctranspose, check_hermitian, check_positive_definite, normest, solve_shifted_system! +export check_ctranspose, check_hermitian, check_positive_definite, normest, solve_shifted_system!, ldiv! +import LinearAlgebra.ldiv! """ normest(S) estimates the matrix 2-norm of S. @@ -148,20 +149,20 @@ check_positive_definite(M::AbstractMatrix; kwargs...) = """ - solve_shifted_system!(B, - z, + solve_shifted_system!(x, + B, + b, σ, - x, ) -Solves linear system (B + σI) x = z, where B is a forward L-BFGS operator and σ ≥ 0. +Solves linear system (B + σI) x = b, where B is a forward L-BFGS operator and σ ≥ 0. ### Parameters +- `x::AbstractVector{T}`: A preallocated vector a vector of length n that is used to store the solution x. - `B::LBFGSOperator{T,I,F1,F2,F3}`: forward L-BFGS operator, a LinearOperator that models a matrix of dimension nxn; -- `z::AbstractVector{T}`: The vector of length n. +- `b::AbstractVector{T}`: The vector of length n. - `σ::T`: Nonnegative shift scalar. -- `x::AbstractVector{T}`: A preallocated vector a vector of length n that is used to store the solution x. ### Returns @@ -174,19 +175,23 @@ The method uses a two-loop recursion-like approach with modifications to handle ### References Erway, J. B., Jain, V., & Marcia, R. F. (2014). Shifted L-BFGS Systems. Optimization Methods and Software, 29(5), 992-1004. """ - function solve_shifted_system!( + x::AbstractVector{T}, B::LBFGSOperator{T, I, F1, F2, F3}, - z::AbstractVector{T}, + b::AbstractVector{T}, σ::T, - x::AbstractVector{T}, ) where {T, I, F1, F2, F3} + + # check if σ < 0 + if σ < 0 + throw(ArgumentError("σ must be nonnegative")) + end data = B.data insert = data.insert γ_inv = 1 / data.scaling_factor x_0 = 1 / (γ_inv + σ) - @. x = x_0 * z + @. x = x_0 * b max_i = 2 * data.mem sign_i = 1 @@ -208,8 +213,47 @@ function solve_shifted_system!( end data.shifted_v[i] = 1 / (1 - sign_i * dot(data.shifted_u, view(data.shifted_p, :, i))) - x .+= sign_i *data.shifted_v[i] * (view(data.shifted_p, :, i)' * z) .* view(data.shifted_p, :, i) + x .+= sign_i *data.shifted_v[i] * (view(data.shifted_p, :, i)' * b) .* view(data.shifted_p, :, i) sign_i = -sign_i end return x end + + +""" + ldiv!(x::AbstractVector{T}, B::LBFGSOperator{T, I, F1, F2, F3}, b::AbstractVector{T}) where {T, I, F1, F2, F3} + +Solves the linear system defined by the L-BFGS operator `B` and the right-hand side vector `b`, storing the solution in the vector `x`. + +### Arguments: + +- `x::AbstractVector{T}`: The solution vector, which will be modified in-place. +- `B::LBFGSOperator{T, I, F1, F2, F3}`: The L-BFGS operator that defines the linear system. +- `b::AbstractVector{T}`: The right-hand side vector of the linear system. + +### Returns: + +- `x::AbstractVector{T}`: The modified solution vector containing the solution to the linear system. + +### Examples: + +```julia + +# Create an L-BFGS operator +B = LBFGSOperator(10) + +# Generate random vectors +x = rand(10) +b = rand(10) + +# Solve the linear system +ldiv!(x, B, b) + +# The vector `x` now contains the solution +""" + +function ldiv!(x::AbstractVector{T}, B::LBFGSOperator{T, I, F1, F2, F3}, b::AbstractVector{T}) where {T, I, F1, F2, F3} + # Call solve_shifted_system! with σ = 0 + solve_shifted_system!(x, B, b, T(0.0)) + return x +end \ No newline at end of file diff --git a/test/test_solve_shifted_system.jl b/test/test_solve_shifted_system.jl index 26929219..d40dbd68 100644 --- a/test/test_solve_shifted_system.jl +++ b/test/test_solve_shifted_system.jl @@ -2,31 +2,32 @@ using Test using LinearOperators using LinearAlgebra -function setup_test_val(; M = 5, n = 100, scaling = false) - σ = 0.1 - B = LBFGSOperator(n, mem = 5, scaling = scaling) +function setup_test_val(; M = 5, n = 100, scaling = false, σ = 0.1) + B = LBFGSOperator(n, mem = M, scaling = scaling) + H = InverseLBFGSOperator(n, mem = M, scaling = false) for _ = 1:10 s = rand(n) y = rand(n) push!(B, s, y) + push!(H, s, y) end x = randn(n) - z = B * x + σ .* x # so we know the true answer is x + b = B * x + σ .* x # so we know the true answer is x - return B, z, σ, zeros(n), x + return B, H , b, σ, zeros(n), x end function test_solve_shifted_system() @testset "solve_shifted_system! Default setup test" begin # Setup Test Case 1: Default setup from setup_test_val - B, z, σ, x_sol, x_true = setup_test_val(n = 100, M = 5) + B,_, b, σ, x_sol, x_true = setup_test_val(n = 100, M = 5) - result = solve_shifted_system!(B, z, σ, x_sol) + result = solve_shifted_system!(x_sol, B, b, σ) # Test 1: Check if result is a vector of the same size as z - @test length(result) == length(z) + @test length(result) == length(b) # Test 2: Verify that x_sol (result) is modified in place @test result === x_sol @@ -37,6 +38,29 @@ function test_solve_shifted_system() # Test 4: Check if x_sol is close to the known solution x @test isapprox(x_sol, x_true, atol = 1e-6, rtol = 1e-6) end + @testset "solve_shifted_system! Negative σ test" begin + # Setup Test Case 2: Negative σ + B,_, b, _, x_sol, _ = setup_test_val(n = 100, M = 5) + σ = -0.1 + + # Expect an ArgumentError to be thrown + @test_throws ArgumentError solve_shifted_system!(x_sol, B, b, σ) + end + + @testset "ldiv! test" begin + # Setup Test Case 1: Default setup from setup_test_val + B, H, b, _, x_sol, x_true = setup_test_val(n = 100, M = 5, σ = 0.0) + + + # Solve the system using solve_shifted_system! + result = ldiv!(x_sol, B, b) + + # Check consistency with operator-vector product using H + x_H = H * b + @test isapprox(x_sol, x_H, atol = 1e-6, rtol = 1e-6) + @test isapprox(x_sol, x_true, atol = 1e-6, rtol = 1e-6) + end + end From d1da6ef5b2e2b5bbaa2bffdf6681cf598afc9cda Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Fri, 4 Oct 2024 09:10:18 -0400 Subject: [PATCH 20/34] Update src/utilities.jl Co-authored-by: Dominique --- src/utilities.jl | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/utilities.jl b/src/utilities.jl index 46eae9e8..d5ba60cc 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -149,11 +149,7 @@ check_positive_definite(M::AbstractMatrix; kwargs...) = """ - solve_shifted_system!(x, - B, - b, - σ, - ) + solve_shifted_system!(x, B, b, σ) Solves linear system (B + σI) x = b, where B is a forward L-BFGS operator and σ ≥ 0. From ad9567ffa3cbc7f8d3bfa4c670912a2fe2469eec Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Fri, 4 Oct 2024 09:10:26 -0400 Subject: [PATCH 21/34] Update src/utilities.jl Co-authored-by: Dominique --- src/utilities.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utilities.jl b/src/utilities.jl index d5ba60cc..14da18db 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -151,7 +151,7 @@ check_positive_definite(M::AbstractMatrix; kwargs...) = """ solve_shifted_system!(x, B, b, σ) -Solves linear system (B + σI) x = b, where B is a forward L-BFGS operator and σ ≥ 0. +Solve linear system (B + σI) x = b, where B is a forward L-BFGS operator and σ ≥ 0. ### Parameters From d5c1cfd4f508632fc20bad7d019e415909d7c0d6 Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Fri, 4 Oct 2024 09:10:40 -0400 Subject: [PATCH 22/34] Update src/utilities.jl Co-authored-by: Dominique --- src/utilities.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utilities.jl b/src/utilities.jl index 14da18db..17090a44 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -155,7 +155,7 @@ Solve linear system (B + σI) x = b, where B is a forward L-BFGS operator and σ ### Parameters -- `x::AbstractVector{T}`: A preallocated vector a vector of length n that is used to store the solution x. +- `x::AbstractVector{T}`: preallocated vector of length n that is used to store the solution x. - `B::LBFGSOperator{T,I,F1,F2,F3}`: forward L-BFGS operator, a LinearOperator that models a matrix of dimension nxn; - `b::AbstractVector{T}`: The vector of length n. - `σ::T`: Nonnegative shift scalar. From 759ab023b153fc4b91b4557ffaff9ea33a9de5e7 Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Fri, 4 Oct 2024 09:10:59 -0400 Subject: [PATCH 23/34] Update src/utilities.jl Co-authored-by: Dominique --- src/utilities.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utilities.jl b/src/utilities.jl index 17090a44..ea3811ff 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -156,7 +156,7 @@ Solve linear system (B + σI) x = b, where B is a forward L-BFGS operator and σ ### Parameters - `x::AbstractVector{T}`: preallocated vector of length n that is used to store the solution x. -- `B::LBFGSOperator{T,I,F1,F2,F3}`: forward L-BFGS operator, a LinearOperator that models a matrix of dimension nxn; +- `B::LBFGSOperator`: forward L-BFGS operatorthat models a matrix of size n x n. - `b::AbstractVector{T}`: The vector of length n. - `σ::T`: Nonnegative shift scalar. From 3094644dc569ed269982e7b02673e4dd6326d582 Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Fri, 4 Oct 2024 09:11:08 -0400 Subject: [PATCH 24/34] Update src/utilities.jl Co-authored-by: Dominique --- src/utilities.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utilities.jl b/src/utilities.jl index ea3811ff..d0d25f99 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -157,7 +157,7 @@ Solve linear system (B + σI) x = b, where B is a forward L-BFGS operator and σ - `x::AbstractVector{T}`: preallocated vector of length n that is used to store the solution x. - `B::LBFGSOperator`: forward L-BFGS operatorthat models a matrix of size n x n. -- `b::AbstractVector{T}`: The vector of length n. +- `b::AbstractVector{T}`: right-hand side vector of length n. - `σ::T`: Nonnegative shift scalar. ### Returns From 88479e52192cddfcc529c6a4ba56b4d88e113e72 Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Fri, 4 Oct 2024 09:11:16 -0400 Subject: [PATCH 25/34] Update src/utilities.jl Co-authored-by: Dominique --- src/utilities.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utilities.jl b/src/utilities.jl index d0d25f99..fc9eb31b 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -158,7 +158,7 @@ Solve linear system (B + σI) x = b, where B is a forward L-BFGS operator and σ - `x::AbstractVector{T}`: preallocated vector of length n that is used to store the solution x. - `B::LBFGSOperator`: forward L-BFGS operatorthat models a matrix of size n x n. - `b::AbstractVector{T}`: right-hand side vector of length n. -- `σ::T`: Nonnegative shift scalar. +- `σ::T`: nonnegative shift. ### Returns From bc6ba12b4dbdb26c974f2a58131458eaa6fd38c9 Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Fri, 4 Oct 2024 09:11:27 -0400 Subject: [PATCH 26/34] Update src/utilities.jl Co-authored-by: Dominique --- src/utilities.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utilities.jl b/src/utilities.jl index fc9eb31b..7dde5133 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -162,7 +162,7 @@ Solve linear system (B + σI) x = b, where B is a forward L-BFGS operator and σ ### Returns -- `x::AbstractVector{T}`: The solution vector `x` size n, the solution to `(B + σI) x = b`. +- `x::AbstractVector{T}`: solution vector `x` of length n. ### Method From 5a173ef29e172d12ed1a87b19ff350e350257d5f Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Fri, 4 Oct 2024 09:11:37 -0400 Subject: [PATCH 27/34] Update src/utilities.jl Co-authored-by: Dominique --- src/utilities.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/utilities.jl b/src/utilities.jl index 7dde5133..0c7c944d 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -169,7 +169,8 @@ Solve linear system (B + σI) x = b, where B is a forward L-BFGS operator and σ The method uses a two-loop recursion-like approach with modifications to handle the shift `σ`. ### References -Erway, J. B., Jain, V., & Marcia, R. F. (2014). Shifted L-BFGS Systems. Optimization Methods and Software, 29(5), 992-1004. + +Erway, J. B., Jain, V., & Marcia, R. F. Shifted L-BFGS Systems. Optimization Methods and Software, 29(5), pp. 992-1004, 2014. """ function solve_shifted_system!( x::AbstractVector{T}, From 4ad62e6490e13bf8685ae0481e01bcac82dac2bc Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Fri, 4 Oct 2024 09:11:46 -0400 Subject: [PATCH 28/34] Update src/utilities.jl Co-authored-by: Dominique --- src/utilities.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/utilities.jl b/src/utilities.jl index 0c7c944d..8ed319eb 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -179,7 +179,6 @@ function solve_shifted_system!( σ::T, ) where {T, I, F1, F2, F3} - # check if σ < 0 if σ < 0 throw(ArgumentError("σ must be nonnegative")) end From 17ffb34d4b4c8c60a00156f9e500e469e7998eaa Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Fri, 4 Oct 2024 09:12:03 -0400 Subject: [PATCH 29/34] Update src/utilities.jl Co-authored-by: Dominique --- src/utilities.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utilities.jl b/src/utilities.jl index 8ed319eb..26431d38 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -217,7 +217,7 @@ end """ - ldiv!(x::AbstractVector{T}, B::LBFGSOperator{T, I, F1, F2, F3}, b::AbstractVector{T}) where {T, I, F1, F2, F3} + ldiv!(x, B, b) Solves the linear system defined by the L-BFGS operator `B` and the right-hand side vector `b`, storing the solution in the vector `x`. From 87bd7b6cb1b8a34277ecb8c6b0b92d1e8ab9bfe9 Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Fri, 4 Oct 2024 09:12:39 -0400 Subject: [PATCH 30/34] Update src/utilities.jl Co-authored-by: Dominique --- src/utilities.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utilities.jl b/src/utilities.jl index 26431d38..8cc190f2 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -219,7 +219,7 @@ end """ ldiv!(x, B, b) -Solves the linear system defined by the L-BFGS operator `B` and the right-hand side vector `b`, storing the solution in the vector `x`. +Solves the linear system Bx = b. ### Arguments: From e039299212ca491fba9276e04bb78b5c02ee752e Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Fri, 4 Oct 2024 09:13:25 -0400 Subject: [PATCH 31/34] Update test/test_solve_shifted_system.jl Co-authored-by: Dominique --- test/test_solve_shifted_system.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_solve_shifted_system.jl b/test/test_solve_shifted_system.jl index d40dbd68..670a433f 100644 --- a/test/test_solve_shifted_system.jl +++ b/test/test_solve_shifted_system.jl @@ -51,7 +51,6 @@ function test_solve_shifted_system() # Setup Test Case 1: Default setup from setup_test_val B, H, b, _, x_sol, x_true = setup_test_val(n = 100, M = 5, σ = 0.0) - # Solve the system using solve_shifted_system! result = ldiv!(x_sol, B, b) From 89d28d6d9a6924e994d16042276b26418c74d12c Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Fri, 4 Oct 2024 09:13:34 -0400 Subject: [PATCH 32/34] Update test/test_solve_shifted_system.jl Co-authored-by: Dominique --- test/test_solve_shifted_system.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/test_solve_shifted_system.jl b/test/test_solve_shifted_system.jl index 670a433f..caafea53 100644 --- a/test/test_solve_shifted_system.jl +++ b/test/test_solve_shifted_system.jl @@ -59,8 +59,6 @@ function test_solve_shifted_system() @test isapprox(x_sol, x_H, atol = 1e-6, rtol = 1e-6) @test isapprox(x_sol, x_true, atol = 1e-6, rtol = 1e-6) end - - end test_solve_shifted_system() From aa4c8a37037ba3288a3c2e4e797eee7fea6eb892 Mon Sep 17 00:00:00 2001 From: farhadrclass <31899325+farhadrclass@users.noreply.github.com> Date: Fri, 4 Oct 2024 09:23:59 -0400 Subject: [PATCH 33/34] added the example --- src/utilities.jl | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/src/utilities.jl b/src/utilities.jl index 8cc190f2..07aeff55 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -156,7 +156,7 @@ Solve linear system (B + σI) x = b, where B is a forward L-BFGS operator and σ ### Parameters - `x::AbstractVector{T}`: preallocated vector of length n that is used to store the solution x. -- `B::LBFGSOperator`: forward L-BFGS operatorthat models a matrix of size n x n. +- `B::LBFGSOperator`: forward L-BFGS operator that models a matrix of size n x n. - `b::AbstractVector{T}`: right-hand side vector of length n. - `σ::T`: nonnegative shift. @@ -168,6 +168,38 @@ Solve linear system (B + σI) x = b, where B is a forward L-BFGS operator and σ The method uses a two-loop recursion-like approach with modifications to handle the shift `σ`. +### Example + +```julia +using Random + +# Problem setup +n = 100 # size of the problem +mem = 10 # L-BFGS memory size +scaling = true # enable scaling + +# Create an L-BFGS operator +B = LBFGSOperator(n, mem = mem, scaling = scaling) + +# Add random {s, y} pairs to the L-BFGS operator +for _ = 1:10 + s = rand(n) + y = rand(n) + push!(B, s, y) # Add the {s, y} pair to B +end + +# Prepare vectors for the system +x_sol = zeros(n) # Preallocated solution vector +b = rand(n) # Right-hand side vector +σ = 0.1 # Small shift value + +# Solve the shifted system +result = solve_shifted_system!(x_sol, B, b, σ) + +# Check that the solution is close enough (residual test) +@assert norm(B * x_sol + σ * x_sol - b) / norm(b) < 1e-8 +``` + ### References Erway, J. B., Jain, V., & Marcia, R. F. Shifted L-BFGS Systems. Optimization Methods and Software, 29(5), pp. 992-1004, 2014. @@ -223,10 +255,9 @@ Solves the linear system Bx = b. ### Arguments: -- `x::AbstractVector{T}`: The solution vector, which will be modified in-place. -- `B::LBFGSOperator{T, I, F1, F2, F3}`: The L-BFGS operator that defines the linear system. -- `b::AbstractVector{T}`: The right-hand side vector of the linear system. - +- `x::AbstractVector{T}`: preallocated vector of length n that is used to store the solution x. +- `B::LBFGSOperator`: forward L-BFGS operator that models a matrix of size n x n. +- `b::AbstractVector{T}`: right-hand side vector of length n. ### Returns: - `x::AbstractVector{T}`: The modified solution vector containing the solution to the linear system. From 90084b9823c4fa72cd59643ad8ac519eb4791999 Mon Sep 17 00:00:00 2001 From: farhadrclass <31899325+farhadrclass@users.noreply.github.com> Date: Fri, 4 Oct 2024 09:24:29 -0400 Subject: [PATCH 34/34] Update utilities.jl --- src/utilities.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/utilities.jl b/src/utilities.jl index 07aeff55..0f53ea64 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -189,15 +189,15 @@ for _ = 1:10 end # Prepare vectors for the system -x_sol = zeros(n) # Preallocated solution vector +x = zeros(n) # Preallocated solution vector b = rand(n) # Right-hand side vector σ = 0.1 # Small shift value # Solve the shifted system -result = solve_shifted_system!(x_sol, B, b, σ) +result = solve_shifted_system!(x, B, b, σ) # Check that the solution is close enough (residual test) -@assert norm(B * x_sol + σ * x_sol - b) / norm(b) < 1e-8 +@assert norm(B * x + σ * x - b) / norm(b) < 1e-8 ``` ### References