Implement preconditioning of gradient in L-BFGS method

JanLuca · JanLuca · commit eed13f95c764 · 2025-11-18T15:38:47.000+01:00
diff --git a/varipeps/config.py b/varipeps/config.py
@@ -166,6 +166,15 @@ class VariPEPS_Config:
       optimizer_reuse_env_eps (:obj:`float`):
         Reuse CTMRG environment of previous step if norm of gradient is below
         this threshold.
+      optimizer_use_preconditioning (:obj:`bool`):
+        Use (local) preconditioning method as described in
+        https://arxiv.org/abs/2511.09546.
+      optimizer_precond_gmres_krylov_subspace_size (:obj:`int`):
+        Size of Krylov subspace built up during GMRES method for the inversion
+        of the preconditioner.
+      optimizer_precond_gmres_maxiter (:obj:`int`):
+        Maximal number of outer iterations inside the GMRES method for the
+        inversion of the preconditioner.
       line_search_method (:obj:`Line_Search_Methods`):
         Method used for the line search routine.
       line_search_initial_step_size (:obj:`float`):
@@ -263,19 +272,22 @@ class VariPEPS_Config:
     svd_ad_lorentz_broadening_eps: float = 1e-13
 
     # Optimizer
-    optimizer_method: Optimizing_Methods = Optimizing_Methods.BFGS
+    optimizer_method: Optimizing_Methods = Optimizing_Methods.L_BFGS
     optimizer_max_steps: int = 300
     optimizer_convergence_eps: float = 1e-5
     optimizer_ctmrg_preconverged_eps: float = 1e-5
     optimizer_fail_if_no_step_size_found: bool = False
     optimizer_l_bfgs_maxlen: int = 15
-    optimizer_preconverge_with_half_projectors: bool = True
+    optimizer_preconverge_with_half_projectors: bool = False
     optimizer_preconverge_with_half_projectors_eps: float = 1e-3
     optimizer_autosave_step_count: int = 2
     optimizer_random_noise_eps: float = 1e-4
     optimizer_random_noise_max_retries: int = 5
     optimizer_random_noise_relative_amplitude: float = 1e-1
     optimizer_reuse_env_eps: float = 1e-3
+    optimizer_use_preconditioning: bool = True
+    optimizer_precond_gmres_krylov_subspace_size: int = 30
+    optimizer_precond_gmres_maxiter: int = 3
 
     # Line search
     line_search_method: Line_Search_Methods = Line_Search_Methods.HAGERZHANG
diff --git a/varipeps/contractions/definitions.py b/varipeps/contractions/definitions.py
@@ -4860,5 +4860,74 @@ def _prepare_defs(cls):
         ],
     }
 
+    precondition_operator: Definition = {
+        "tensors": [["C1", "T1", "C2", "T2", "C3", "T3", "C4", "T4"], "ket_tensor"],
+        "network": [
+            [
+                (2, 12),  # C1
+                (12, 9, -5, 3),  # T1
+                (3, 8),  # C2
+                (10, -4, 4, 8),  # T2
+                (11, 4),  # C3
+                (1, 11, -2, 7),  # T3
+                (1, 5),  # C4
+                (5, -1, 6, 2),  # T4
+            ],
+            (6, 7, -3, 10, 9),  # ket_tensor
+        ],
+    }
+
+    precondition_operator_triangular: Definition = {
+        "tensors": [["C1", "C2", "C3", "C4", "C5", "C6"], "ket_tensor"],
+        "network": [
+            [
+                (12, 5, -1, 1),  # C1
+                (1, 6, -2, 2),  # C2
+                (2, 7, -3, 8),  # C3
+                (8, 9, -4, 3),  # C4
+                (3, 10, -5, 4),  # C5
+                (4, 11, -6, 12),  # C6
+            ],
+            (5, 6, 7, 9, 10, 11, -7),  # ket_tensor
+        ],
+    }
+
+    precondition_operator_split_transfer: Definition = {
+        "tensors": [
+            [
+                "C1",
+                "T1_ket",
+                "T1_bra",
+                "C2",
+                "T2_ket",
+                "T2_bra",
+                "C3",
+                "T3_bra",
+                "T3_ket",
+                "C4",
+                "T4_bra",
+                "T4_ket",
+            ],
+            "ket_tensor",
+        ],
+        "network": [
+            [
+                (1, 2),  # C1
+                (2, 13, 3),  # T1_ket
+                (3, -5, 4),  # T1_bra
+                (4, 5),  # C2
+                (6, 14, 5),  # T2_ket
+                (7, -4, 6),  # T2_bra
+                (8, 7),  # C3
+                (9, 15, 8),  # T3_ket
+                (10, -2, 9),  # T3_bra
+                (10, 11),  # C4
+                (11, 16, 12),  # T4_ket
+                (12, -1, 1),  # T4_bra
+            ],
+            (16, 15, -3, 14, 13),  # ket_tensor
+        ],
+    }
+
 
 Definitions._prepare_defs()
diff --git a/varipeps/optimization/optimizer.py b/varipeps/optimization/optimizer.py
@@ -20,7 +20,7 @@
 import jax
 from jax import jit
 import jax.numpy as jnp
-from jax.lax import scan
+from jax.lax import scan, cond
 from jax.flatten_util import ravel_pytree
 
 from varipeps import varipeps_config, varipeps_global_state
@@ -32,6 +32,8 @@
 from varipeps.ctmrg import CTMRGNotConvergedError, CTMRGGradientNotConvergedError
 from varipeps.utils.random import PEPS_Random_Number_Generator
 from varipeps.utils.slurm import SlurmUtils
+from varipeps.contractions import apply_contraction_jitted
+from varipeps.utils.debug_print import debug_print
 
 from .inner_function import (
     calc_ctmrg_expectation,
@@ -143,7 +145,7 @@ def _bfgs_workhorse(
 
 
 @jit
-def _l_bfgs_workhorse(value_tuple, gradient_tuple):
+def _l_bfgs_workhorse(value_tuple, gradient_tuple, t_objs, config):
     gradient_elem_0, gradient_unravel = ravel_pytree(gradient_tuple[0])
     gradient_len = gradient_elem_0.size
 
@@ -155,6 +157,9 @@ def _make_1d(x):
             return jnp.concatenate((jnp.real(x_1d), jnp.imag(x_1d)))
         return x_1d
 
+    gradient_elem_0_1d = _make_1d(gradient_elem_0)
+    norm_grad_square = jnp.sum(gradient_elem_0_1d * gradient_elem_0_1d)
+
     value_arr = jnp.asarray([_make_1d(e) for e in value_tuple])
     gradient_arr = jnp.asarray([_make_1d(e) for e in gradient_tuple])
 
@@ -173,9 +178,69 @@ def first_loop(q, x):
         (pho_arr[:, jnp.newaxis] * s_arr, y_arr),
     )
 
-    gamma = jnp.sum(s_arr[-1] * y_arr[-1]) / jnp.sum(y_arr[-1] * y_arr[-1])
+    def apply_precond(x):
+        if hasattr(t_objs[0], "is_triangular_peps") and t_objs[0].is_triangular_peps:
+            contraction = "precondition_operator_triangular"
+        elif hasattr(t_objs[0], "is_split_transfer") and t_objs[0].is_split_transfer:
+            contraction = "precondition_operator_split_transfer"
+        else:
+            contraction = "precondition_operator"
+
+        if iscomplex:
+            x = x[:gradient_len] + 1j * x[gradient_len:]
+        x = gradient_unravel(x)
+        x = [
+            apply_contraction_jitted(contraction, (te.tensor,), (te,), (xe,))
+            + norm_grad_square * xe
+            for te, xe in zip(t_objs, x, strict=True)
+        ]
+
+        return _make_1d(x)
+
+    if config.optimizer_use_preconditioning:
+        y_precond, _ = jax.scipy.sparse.linalg.gmres(
+            apply_precond,
+            y_arr[0],
+            y_arr[0],
+            restart=config.optimizer_precond_gmres_krylov_subspace_size,
+            maxiter=config.optimizer_precond_gmres_maxiter,
+            solve_method="incremental",
+        )
+
+        def calc_q_precond(y, y_precond, q):
+            q_precond, _ = jax.scipy.sparse.linalg.gmres(
+                apply_precond,
+                q,
+                q,
+                restart=config.optimizer_precond_gmres_krylov_subspace_size,
+                maxiter=config.optimizer_precond_gmres_maxiter,
+                solve_method="incremental",
+            )
+
+            return cond(
+                jnp.sum(q_precond * q) >= 0,
+                lambda y, y_precond, q, q_precond: (y_precond, q_precond),
+                lambda y, y_precond, q, q_precond: (y, q),
+                y,
+                y_precond,
+                q,
+                q_precond,
+            )
+
+        y_precond, q_precond = cond(
+            jnp.sum(y_precond * y_arr[0]) >= 0,
+            calc_q_precond,
+            lambda y, y_precond, q: (y, q),
+            y_arr[0],
+            y_precond,
+            q,
+        )
+    else:
+        y_precond = y_arr[0]
+        q_precond = q
 
-    z_result = gamma * q
+    gamma = jnp.sum(s_arr[0] * y_arr[0]) / jnp.sum(y_arr[0] * y_precond)
+    z_result = gamma * q_precond
 
     def second_loop(z, x):
         pho_y, s, alpha_i = x
@@ -753,9 +818,72 @@ def random_noise(a):
 
                 if count == 0 or signal_reset_descent_dir:
                     descent_dir = [-elem for elem in working_gradient]
+
+                    if varipeps_config.optimizer_use_preconditioning:
+                        if (
+                            hasattr(
+                                working_unitcell.get_unique_tensors()[0],
+                                "is_triangular_peps",
+                            )
+                            and working_unitcell.get_unique_tensors()[
+                                0
+                            ].is_triangular_peps
+                        ):
+                            contraction = "precondition_operator_triangular"
+                        elif (
+                            hasattr(
+                                working_unitcell.get_unique_tensors()[0],
+                                "is_split_transfer",
+                            )
+                            and working_unitcell.get_unique_tensors()[
+                                0
+                            ].is_split_transfer
+                        ):
+                            contraction = "precondition_operator_split_transfer"
+                        else:
+                            contraction = "precondition_operator"
+
+                        grad_norm_squared = 1e-2 * (
+                            jnp.linalg.norm(jnp.asarray(working_gradient)) ** 2
+                        )
+
+                        tmp_descent_dir = [
+                            jax.scipy.sparse.linalg.gmres(
+                                lambda x: (
+                                    apply_contraction_jitted(
+                                        contraction, (te.tensor,), (te,), (x,)
+                                    )
+                                    + grad_norm_squared * x
+                                ),
+                                xe,
+                                xe,
+                                restart=varipeps_config.optimizer_precond_gmres_krylov_subspace_size,
+                                maxiter=varipeps_config.optimizer_precond_gmres_maxiter,
+                                solve_method="incremental",
+                            )[0]
+                            for te, xe in zip(
+                                working_unitcell.get_unique_tensors(),
+                                descent_dir,
+                                strict=True,
+                            )
+                        ]
+                        if all(
+                            jnp.sum(xe * x2e.conj()) >= 0
+                            for xe, x2e in zip(
+                                descent_dir, tmp_descent_dir, strict=True
+                            )
+                        ):
+                            descent_dir = tmp_descent_dir
+                        else:
+                            tqdm.write("Warning: Non-positive preconditioner")
+                        del contraction
+                        del grad_norm_squared
                 else:
                     descent_dir = _l_bfgs_workhorse(
-                        tuple(l_bfgs_x_cache), tuple(l_bfgs_grad_cache)
+                        tuple(l_bfgs_x_cache),
+                        tuple(l_bfgs_grad_cache),
+                        working_unitcell.get_unique_tensors(),
+                        varipeps_config,
                     )
             else:
                 raise ValueError("Unknown optimization method.")
@@ -767,6 +895,8 @@ def random_noise(a):
                 descent_dir = [-elem for elem in working_gradient]
 
             conv = jnp.linalg.norm(ravel_pytree(working_gradient)[0])
+            if jnp.isinf(conv) or jnp.isnan(conv):
+                conv = 0
             step_conv[random_noise_retries].append(conv)
 
             try: