From e8aa25b9636c36a03975439b3d83661f62067fb8 Mon Sep 17 00:00:00 2001 From: Shao-Ting Chiu Date: Tue, 17 Jun 2025 13:23:26 -0500 Subject: [PATCH 01/12] explicit return loss terms --- pinn/pinn_1d.py | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/pinn/pinn_1d.py b/pinn/pinn_1d.py index d81bdab..67d8201 100644 --- a/pinn/pinn_1d.py +++ b/pinn/pinn_1d.py @@ -56,7 +56,9 @@ import numpy as np from enum import Enum from utils import parse_args, get_activation, print_args, save_frame, make_video_from_frames, is_notebook, cleanfiles -from SOAP.soap import SOAP +#from SOAP.soap import SOAP +from torchjd import mtl_backward +from torchjd import aggregation as agg # torch.set_default_dtype(torch.float64) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") @@ -291,7 +293,7 @@ def super_loss(self, model, mesh, loss_func): x = mesh.x_train u = model.get_solution(x) loss = loss_func(u, mesh.u_ex) - return loss + return loss, (loss,) # "PINN" loss def pinn_loss(self, model, mesh, loss_func): @@ -303,15 +305,15 @@ def pinn_loss(self, model, mesh, loss_func): # Internal loss pde = mesh.pde - loss = loss_func(d2u_dx2[1:-1] + mesh.f[1:-1], pde.r * u[1:-1]) + loss_pinn = loss_func(d2u_dx2[1:-1] + mesh.f[1:-1], pde.r * u[1:-1]) # Boundary loss if not model.enforce_bc: u_bc = u[[0, -1]] u_ex_bc = mesh.u_ex[[0, -1]] loss_b = loss_func(u_bc, u_ex_bc) - loss += self.bc_weight * loss_b - - return loss + loss = loss_pinn + self.bc_weight * loss_b + return loss, (loss_pinn, loss_b) + return loss_pinn, (loss_pinn,) def drm_loss(self, model, mesh: Mesh): """Deep Ritz Method loss""" @@ -329,17 +331,18 @@ def drm_loss(self, model, mesh: Mesh): fu_prod = f_val * u integrand_values = 0.5 * grad_u_pred_sq[1:-1] + 0.5 * mesh.pde.r * u_pred_sq[1:-1] - fu_prod[1:-1] - loss = torch.mean(integrand_values) - - # Boundary loss - u_bc = u[[0,-1]] - u_ex_bc = mesh.u_ex[[0,-1]] - loss_b = self.loss_func(u_bc, u_ex_bc) - loss += self.bc_weight * loss_b + loss_drm = torch.mean(integrand_values) + if not model.enforce_bc: + # Boundary loss + u_bc = u[[0,-1]] + u_ex_bc = mesh.u_ex[[0,-1]] + loss_b = self.loss_func(u_bc, u_ex_bc) + loss = loss_drm + self.bc_weight * loss_b + return loss, (loss_drm, loss_b) xs.requires_grad_(False) # Disable gradient tracking for x - return loss + return loss, (loss_drm,) def loss(self, model, mesh): if self.type == -1: @@ -377,7 +380,7 @@ def to_np(t): return t.detach().cpu().numpy() def closure(): optimizer.zero_grad() - loss = criterion.loss(model=model, mesh=mesh) + loss, _ = criterion.loss(model=model, mesh=mesh) loss.backward() return loss @@ -387,7 +390,7 @@ def closure(): # we need to set to zero the gradients of all model parameters (PyTorch accumulates grad by default) optimizer.zero_grad() # compute the loss value for the current batch of data - loss = criterion.loss(model=model, mesh=mesh) + loss, _ = criterion.loss(model=model, mesh=mesh) # backpropagation to compute gradients of model param respect to the loss. computes dloss/dx # for every parameter x which has requires_grad=True. loss.backward() From 69fe618024e34de9668ea5cb743a41e48b0617ee Mon Sep 17 00:00:00 2001 From: Shao-Ting Chiu Date: Tue, 17 Jun 2025 13:42:27 -0500 Subject: [PATCH 02/12] add torchjd for multi task learning --- pinn/pinn_1d.py | 29 +++++++++++++++++++++++------ pinn/utils.py | 17 +++++++++++++++++ 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/pinn/pinn_1d.py b/pinn/pinn_1d.py index 67d8201..88f6943 100644 --- a/pinn/pinn_1d.py +++ b/pinn/pinn_1d.py @@ -57,7 +57,7 @@ from enum import Enum from utils import parse_args, get_activation, print_args, save_frame, make_video_from_frames, is_notebook, cleanfiles #from SOAP.soap import SOAP -from torchjd import mtl_backward +import torchjd from torchjd import aggregation as agg # torch.set_default_dtype(torch.float64) @@ -284,6 +284,8 @@ def __init__(self, loss_type, loss_func=nn.MSELoss(), bc_weight=1.0): self.name = "PINN Loss" elif self.type == 1: self.name = "DRM Loss" + elif self.type == 2: + self.name = "PINN+DRM Loss" else: raise ValueError(f"Unknown loss type: {self.type}") self.bc_weight = bc_weight @@ -343,7 +345,14 @@ def drm_loss(self, model, mesh: Mesh): xs.requires_grad_(False) # Disable gradient tracking for x return loss, (loss_drm,) - + + def drmpinn_loss(self, model, mesh): + """Combined Deep Ritz Method and PINN loss""" + loss_p, loss_ps = self.pinn_loss(model=model, mesh=mesh, loss_func=self.loss_func) + loss_d, loss_ds = self.drm_loss(model=model, mesh=mesh) + # Combine losses + loss_value = (loss_p + loss_d, [*loss_ps, loss_ds[0]]) + return loss_value def loss(self, model, mesh): if self.type == -1: loss_value = self.super_loss(model=model, mesh=mesh, loss_func=self.loss_func) @@ -351,16 +360,21 @@ def loss(self, model, mesh): loss_value = self.pinn_loss(model=model, mesh=mesh, loss_func=self.loss_func) elif self.type == 1: loss_value = self.drm_loss(model=model, mesh=mesh) + elif self.type == 2: + loss_value = self.drmpinn_loss(model=model, mesh=mesh) else: raise ValueError(f"Unknown loss type: {self.type}") return loss_value +def get_aggregator(name: str): + if # %% # Define the training loop def train(model, mesh, criterion, iterations, adam_iterations, learning_rate, - num_check, num_plots, sweep_idx, level_idx, frame_dir): + num_check, num_plots, sweep_idx, level_idx, frame_dir, aggregator:str=None): optimizer = optim.Adam(model.parameters(), lr=learning_rate) + aggragator = None if aggregator is None else agg.get_aggregator(aggregator) # optimizer = SOAP(model.parameters(), lr = 3e-3, betas=(.95, .95), weight_decay=.01, # precondition_frequency=10) scheduler = StepLR(optimizer, step_size=1000, gamma=0.9) @@ -390,10 +404,13 @@ def closure(): # we need to set to zero the gradients of all model parameters (PyTorch accumulates grad by default) optimizer.zero_grad() # compute the loss value for the current batch of data - loss, _ = criterion.loss(model=model, mesh=mesh) + loss, multiloss = criterion.loss(model=model, mesh=mesh) # backpropagation to compute gradients of model param respect to the loss. computes dloss/dx # for every parameter x which has requires_grad=True. - loss.backward() + if aggragator is None: + loss.backward() + else: + torchjd.backward(multiloss, aggregator=aggragator,) # update the model param doing an optim step using the computed gradients and learning rate optimizer.step() # @@ -478,7 +495,7 @@ def main(args=None): train(model=model, mesh=mesh, criterion=loss, iterations=args.epochs, adam_iterations=args.adam_epochs, learning_rate=args.lr, num_check=args.num_checks, num_plots=num_plots, - sweep_idx=i, level_idx=l, frame_dir=frame_dir) + sweep_idx=i, level_idx=l, frame_dir=frame_dir, aggragator=args.aggregator) # Turn PNGs into a video using OpenCV if args.plot: make_video_from_frames(frame_dir=frame_dir, name_prefix="Model_Outputs", diff --git a/pinn/utils.py b/pinn/utils.py index 41dd625..1b0c726 100644 --- a/pinn/utils.py +++ b/pinn/utils.py @@ -5,6 +5,8 @@ import cv2 from pathlib import Path import shutil +import torch +from torch.nn.functional import cosine_similarity def cleanfiles(dir_name): @@ -71,6 +73,7 @@ def parse_args(args=None): help="If set, enforce the BC in solution.") parser.add_argument('--bc_weight', type=float, default=1.0, help="Weight for the loss of BC.") + parser.add_argument('--aggregator', type=str, default='None', help="Aggregator for the loss function. See https://torchjd.org/stable/docs/aggregation/ for options") args = parser.parse_args(args) @@ -146,3 +149,17 @@ def make_video_from_frames(frame_dir, name_prefix, output_file, fps=10): video.write(img) video.release() print(f" Video saved as {output_file_path}") + +def monitor_aggregator(aggregator): + def print_weights(_, __, weights: torch.Tensor) -> None: + """Prints the extracted weights.""" + print(f"Weights: {weights}") + + def print_gd_similarity(_, inputs: tuple[torch.Tensor, ...], aggregation: torch.Tensor) -> None: + """Prints the cosine similarity between the aggregation and the average gradient.""" + matrix = inputs[0] + gd_output = matrix.mean(dim=0) + similarity = cosine_similarity(aggregation, gd_output, dim=0) + print(f"Cosine similarity: {similarity.item():.4f}") + aggregator.weighting.register_forward_hook(print_weights) + aggregator.register_forward_hook(print_gd_similarity) \ No newline at end of file From 86a71ac2203227d6eeaa639ec3a9bd0d55c569ff Mon Sep 17 00:00:00 2001 From: Shao-Ting Chiu Date: Tue, 17 Jun 2025 14:18:22 -0500 Subject: [PATCH 03/12] add aggragator --- pinn/pinn_1d.py | 35 +++++++++++++++++------------------ pinn/utils.py | 11 ++++++++--- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/pinn/pinn_1d.py b/pinn/pinn_1d.py index 88f6943..5a53d21 100644 --- a/pinn/pinn_1d.py +++ b/pinn/pinn_1d.py @@ -55,7 +55,7 @@ from torch.optim.lr_scheduler import StepLR import numpy as np from enum import Enum -from utils import parse_args, get_activation, print_args, save_frame, make_video_from_frames, is_notebook, cleanfiles +from utils import parse_args, get_activation, print_args, save_frame, make_video_from_frames, is_notebook, cleanfiles, get_aggregator, monitor_aggregator #from SOAP.soap import SOAP import torchjd from torchjd import aggregation as agg @@ -295,7 +295,7 @@ def super_loss(self, model, mesh, loss_func): x = mesh.x_train u = model.get_solution(x) loss = loss_func(u, mesh.u_ex) - return loss, (loss,) + return loss, [loss,] # "PINN" loss def pinn_loss(self, model, mesh, loss_func): @@ -314,8 +314,8 @@ def pinn_loss(self, model, mesh, loss_func): u_ex_bc = mesh.u_ex[[0, -1]] loss_b = loss_func(u_bc, u_ex_bc) loss = loss_pinn + self.bc_weight * loss_b - return loss, (loss_pinn, loss_b) - return loss_pinn, (loss_pinn,) + return loss, [loss_pinn, loss_b] + return loss_pinn, [loss_pinn,] def drm_loss(self, model, mesh: Mesh): """Deep Ritz Method loss""" @@ -341,18 +341,18 @@ def drm_loss(self, model, mesh: Mesh): u_ex_bc = mesh.u_ex[[0,-1]] loss_b = self.loss_func(u_bc, u_ex_bc) loss = loss_drm + self.bc_weight * loss_b - return loss, (loss_drm, loss_b) + return loss, [loss_drm, loss_b] - xs.requires_grad_(False) # Disable gradient tracking for x - return loss, (loss_drm,) - + #xs.requires_grad_(False) # Disable gradient tracking for x + return loss_drm, [loss_drm,] def drmpinn_loss(self, model, mesh): """Combined Deep Ritz Method and PINN loss""" loss_p, loss_ps = self.pinn_loss(model=model, mesh=mesh, loss_func=self.loss_func) loss_d, loss_ds = self.drm_loss(model=model, mesh=mesh) # Combine losses - loss_value = (loss_p + loss_d, [*loss_ps, loss_ds[0]]) - return loss_value + loss = loss_p + loss_d + multi_loss = torch.tensor([*loss_ps, loss_ds[0]]).requires_grad_(True) + return loss, multi_loss def loss(self, model, mesh): if self.type == -1: loss_value = self.super_loss(model=model, mesh=mesh, loss_func=self.loss_func) @@ -366,15 +366,14 @@ def loss(self, model, mesh): raise ValueError(f"Unknown loss type: {self.type}") return loss_value -def get_aggregator(name: str): - if - # %% # Define the training loop def train(model, mesh, criterion, iterations, adam_iterations, learning_rate, - num_check, num_plots, sweep_idx, level_idx, frame_dir, aggregator:str=None): + num_check, num_plots, sweep_idx, level_idx, frame_dir, aggregator:str='None', monitor_aggregator:bool=False): optimizer = optim.Adam(model.parameters(), lr=learning_rate) - aggragator = None if aggregator is None else agg.get_aggregator(aggregator) + aggregator = None if aggregator == 'None' else get_aggregator(aggregator) + if (aggregator is not None) and (monitor_aggregator): + monitor_aggregator(aggregator) # optimizer = SOAP(model.parameters(), lr = 3e-3, betas=(.95, .95), weight_decay=.01, # precondition_frequency=10) scheduler = StepLR(optimizer, step_size=1000, gamma=0.9) @@ -407,10 +406,10 @@ def closure(): loss, multiloss = criterion.loss(model=model, mesh=mesh) # backpropagation to compute gradients of model param respect to the loss. computes dloss/dx # for every parameter x which has requires_grad=True. - if aggragator is None: + if aggregator is None: loss.backward() else: - torchjd.backward(multiloss, aggregator=aggragator,) + torchjd.backward(multiloss, aggregator=aggregator,) # update the model param doing an optim step using the computed gradients and learning rate optimizer.step() # @@ -495,7 +494,7 @@ def main(args=None): train(model=model, mesh=mesh, criterion=loss, iterations=args.epochs, adam_iterations=args.adam_epochs, learning_rate=args.lr, num_check=args.num_checks, num_plots=num_plots, - sweep_idx=i, level_idx=l, frame_dir=frame_dir, aggragator=args.aggregator) + sweep_idx=i, level_idx=l, frame_dir=frame_dir, aggregator=args.aggregator, monitor_aggregator=args.monitor_aggregator) # Turn PNGs into a video using OpenCV if args.plot: make_video_from_frames(frame_dir=frame_dir, name_prefix="Model_Outputs", diff --git a/pinn/utils.py b/pinn/utils.py index 1b0c726..70b8d26 100644 --- a/pinn/utils.py +++ b/pinn/utils.py @@ -7,7 +7,7 @@ import shutil import torch from torch.nn.functional import cosine_similarity - +from torchjd import aggregation as agg def cleanfiles(dir_name): dir_path = Path(dir_name) @@ -58,8 +58,8 @@ def parse_args(args=None): help="Learning rate for the optimizer.") parser.add_argument('--levels', type=int, default=4, help="Number of levels in multilevel training.") - parser.add_argument('--loss_type', type=int, default=0, choices=[-1, 0, 1], - help="Loss type: -1 for supervised (true solution), 0 for PINN loss.") + parser.add_argument('--loss_type', type=int, default=0, choices=[-1, 0, 1, 2], + help="Loss type: -1 for supervised (true solution), 0 for PINN loss. 1 for DRM, 2 for DRN+PINN") parser.add_argument('--activation', type=str, default='tanh', choices=['tanh', 'silu', 'relu', 'gelu', 'softmax'], help="Activation function to use.") @@ -75,6 +75,8 @@ def parse_args(args=None): help="Weight for the loss of BC.") parser.add_argument('--aggregator', type=str, default='None', help="Aggregator for the loss function. See https://torchjd.org/stable/docs/aggregation/ for options") + parser.add_argument('--monitor_aggregator', action='store_true', help="If set, monitor gradient. This need to set up aggregator") + args = parser.parse_args(args) # Set adam_epochs to epochs if not provided @@ -150,6 +152,9 @@ def make_video_from_frames(frame_dir, name_prefix, output_file, fps=10): video.release() print(f" Video saved as {output_file_path}") +def get_aggregator(name: str): + return getattr(agg, name) + def monitor_aggregator(aggregator): def print_weights(_, __, weights: torch.Tensor) -> None: """Prints the extracted weights.""" From 6eb1b92cc783485047ef8cb74ea9f4fe44eb0912 Mon Sep 17 00:00:00 2001 From: Shao-Ting Chiu Date: Tue, 17 Jun 2025 14:20:18 -0500 Subject: [PATCH 04/12] fix call agg api --- pinn/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pinn/utils.py b/pinn/utils.py index 70b8d26..0b70591 100644 --- a/pinn/utils.py +++ b/pinn/utils.py @@ -153,7 +153,7 @@ def make_video_from_frames(frame_dir, name_prefix, output_file, fps=10): print(f" Video saved as {output_file_path}") def get_aggregator(name: str): - return getattr(agg, name) + return getattr(agg, name)() def monitor_aggregator(aggregator): def print_weights(_, __, weights: torch.Tensor) -> None: From 18d89a983e6aaa2d2e2d227025dddff518963e83 Mon Sep 17 00:00:00 2001 From: Shao-Ting Chiu Date: Tue, 17 Jun 2025 17:42:10 -0500 Subject: [PATCH 05/12] add monitor_aggregator --- pinn/pinn_1d.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pinn/pinn_1d.py b/pinn/pinn_1d.py index 5a53d21..f585832 100644 --- a/pinn/pinn_1d.py +++ b/pinn/pinn_1d.py @@ -351,7 +351,7 @@ def drmpinn_loss(self, model, mesh): loss_d, loss_ds = self.drm_loss(model=model, mesh=mesh) # Combine losses loss = loss_p + loss_d - multi_loss = torch.tensor([*loss_ps, loss_ds[0]]).requires_grad_(True) + multi_loss = [*loss_ps, loss_ds[0]] return loss, multi_loss def loss(self, model, mesh): if self.type == -1: @@ -369,10 +369,10 @@ def loss(self, model, mesh): # %% # Define the training loop def train(model, mesh, criterion, iterations, adam_iterations, learning_rate, - num_check, num_plots, sweep_idx, level_idx, frame_dir, aggregator:str='None', monitor_aggregator:bool=False): + num_check, num_plots, sweep_idx, level_idx, frame_dir, aggregator:str='None', do_monitor_aggregator:bool=False): optimizer = optim.Adam(model.parameters(), lr=learning_rate) aggregator = None if aggregator == 'None' else get_aggregator(aggregator) - if (aggregator is not None) and (monitor_aggregator): + if (aggregator is not None) and (do_monitor_aggregator): monitor_aggregator(aggregator) # optimizer = SOAP(model.parameters(), lr = 3e-3, betas=(.95, .95), weight_decay=.01, # precondition_frequency=10) @@ -494,7 +494,7 @@ def main(args=None): train(model=model, mesh=mesh, criterion=loss, iterations=args.epochs, adam_iterations=args.adam_epochs, learning_rate=args.lr, num_check=args.num_checks, num_plots=num_plots, - sweep_idx=i, level_idx=l, frame_dir=frame_dir, aggregator=args.aggregator, monitor_aggregator=args.monitor_aggregator) + sweep_idx=i, level_idx=l, frame_dir=frame_dir, aggregator=args.aggregator, do_monitor_aggregator=args.monitor_aggregator) # Turn PNGs into a video using OpenCV if args.plot: make_video_from_frames(frame_dir=frame_dir, name_prefix="Model_Outputs", From 3eb02eb1f444fef45caa063aab15fb6ebaf56e8c Mon Sep 17 00:00:00 2001 From: Shao-Ting Chiu Date: Tue, 17 Jun 2025 18:28:24 -0500 Subject: [PATCH 06/12] allow strin got int or float --- pinn/utils.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/pinn/utils.py b/pinn/utils.py index 0b70591..046571b 100644 --- a/pinn/utils.py +++ b/pinn/utils.py @@ -6,9 +6,12 @@ from pathlib import Path import shutil import torch +import ast from torch.nn.functional import cosine_similarity from torchjd import aggregation as agg +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + def cleanfiles(dir_name): dir_path = Path(dir_name) if dir_path.exists() and dir_path.is_dir(): @@ -73,7 +76,7 @@ def parse_args(args=None): help="If set, enforce the BC in solution.") parser.add_argument('--bc_weight', type=float, default=1.0, help="Weight for the loss of BC.") - parser.add_argument('--aggregator', type=str, default='None', help="Aggregator for the loss function. See https://torchjd.org/stable/docs/aggregation/ for options") + parser.add_argument('--aggregator', type=str, nargs='+', default='None', help="Aggregator for the loss function. See https://torchjd.org/stable/docs/aggregation/ for options") parser.add_argument('--monitor_aggregator', action='store_true', help="If set, monitor gradient. This need to set up aggregator") @@ -153,7 +156,12 @@ def make_video_from_frames(frame_dir, name_prefix, output_file, fps=10): print(f" Video saved as {output_file_path}") def get_aggregator(name: str): - return getattr(agg, name)() + if isinstance(name, str): + return getattr(agg, name)() + elif name[0]=="Constant": + return getattr(agg, name[0])(torch.tensor([ast.literal_eval(i) for i in name[1:]]).to(device)) + else: + return getattr(agg, name[0])(*[ast.literal_eval(i) for i in name[1:]]) def monitor_aggregator(aggregator): def print_weights(_, __, weights: torch.Tensor) -> None: @@ -166,5 +174,5 @@ def print_gd_similarity(_, inputs: tuple[torch.Tensor, ...], aggregation: torch. gd_output = matrix.mean(dim=0) similarity = cosine_similarity(aggregation, gd_output, dim=0) print(f"Cosine similarity: {similarity.item():.4f}") - aggregator.weighting.register_forward_hook(print_weights) - aggregator.register_forward_hook(print_gd_similarity) \ No newline at end of file + aggregator.weighting.register_forward_hook(print_weights) + aggregator.register_forward_hook(print_gd_similarity) \ No newline at end of file From 839071ac7ca7a36c0d3cd010a1b87a54cf09c908 Mon Sep 17 00:00:00 2001 From: Shao-Ting Chiu Date: Tue, 17 Jun 2025 18:29:37 -0500 Subject: [PATCH 07/12] ignore launch.json --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index e6cba78..3e8a43c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,7 @@ __pycache__/ *.py[cod] *$py.class - +launch.json # C extensions *.so pinn/frames/ From f63861cf636d41df779d2becd41b8d684e9e1306 Mon Sep 17 00:00:00 2001 From: Shao-Ting Chiu Date: Fri, 20 Jun 2025 13:41:44 -0500 Subject: [PATCH 08/12] add agg package --- pinn/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pinn/utils.py b/pinn/utils.py index 4ee3929..da4d0e7 100644 --- a/pinn/utils.py +++ b/pinn/utils.py @@ -22,6 +22,8 @@ import numpy as np import torch import ast +from torch.nn.functional import cosine_similarity +from torchjd import aggregation as agg device = torch.device("cuda" if torch.cuda.is_available() else "cpu") From ce08e6096f10e2380cfdced0909d41bec498fd7c Mon Sep 17 00:00:00 2001 From: Shao-Ting Chiu Date: Fri, 20 Jun 2025 13:42:32 -0500 Subject: [PATCH 09/12] update torchjd --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 24391e1..acfa2b9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -27,7 +27,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install flake8 pytest numpy torch matplotlib + python -m pip install flake8 pytest numpy torch matplotlib torchjd if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Lint with flake8 run: | From 4f4ca9601f0787388390244ee8d634850ae30fc1 Mon Sep 17 00:00:00 2001 From: Shao-Ting Chiu Date: Fri, 20 Jun 2025 13:45:16 -0500 Subject: [PATCH 10/12] fix indent issue --- pinn/pinn_1d.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pinn/pinn_1d.py b/pinn/pinn_1d.py index c50bf53..a647bee 100644 --- a/pinn/pinn_1d.py +++ b/pinn/pinn_1d.py @@ -498,7 +498,6 @@ def main(args=None): adam_iterations=args.adam_epochs, learning_rate=args.lr, num_check=args.num_checks, num_plots=num_plots, sweep_idx=i, level_idx=l, frame_dir=frame_dir, aggregator=args.aggregator, do_monitor_aggregator=args.monitor_aggregator) - sweep_idx=i, level_idx=lev, frame_dir=frame_dir, scheduler_gen=scheduler_gen) # Turn PNGs into a video using OpenCV if args.plot: make_video_from_frames(frame_dir=frame_dir, name_prefix="Model_Outputs", From 7988125e3fee00eaa4ce333d36ab88c3c5139593 Mon Sep 17 00:00:00 2001 From: Shao-Ting Chiu Date: Fri, 20 Jun 2025 13:48:00 -0500 Subject: [PATCH 11/12] add import monitor aggregator --- pinn/pinn_1d.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pinn/pinn_1d.py b/pinn/pinn_1d.py index a647bee..2461c42 100644 --- a/pinn/pinn_1d.py +++ b/pinn/pinn_1d.py @@ -48,7 +48,7 @@ import numpy as np from enum import Enum from utils import parse_args, get_activation, print_args, save_frame, make_video_from_frames -from utils import is_notebook, cleanfiles, fourier_analysis, get_scheduler_generator, scheduler_step +from utils import is_notebook, cleanfiles, fourier_analysis, get_scheduler_generator, scheduler_step, monitor_aggregator # from SOAP.soap import SOAP # torch.set_default_dtype(torch.float64) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") From 80041039c6a8488aa48944fddd484d8d21cf6c44 Mon Sep 17 00:00:00 2001 From: Shao-Ting Chiu Date: Fri, 20 Jun 2025 13:56:01 -0500 Subject: [PATCH 12/12] fix merge bug: argument error --- pinn/pinn_1d.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pinn/pinn_1d.py b/pinn/pinn_1d.py index 2461c42..fd9f2c9 100644 --- a/pinn/pinn_1d.py +++ b/pinn/pinn_1d.py @@ -45,10 +45,12 @@ import torch import torch.nn as nn import torch.optim as optim +import torchjd +from torchjd import aggregation as agg import numpy as np from enum import Enum from utils import parse_args, get_activation, print_args, save_frame, make_video_from_frames -from utils import is_notebook, cleanfiles, fourier_analysis, get_scheduler_generator, scheduler_step, monitor_aggregator +from utils import is_notebook, cleanfiles, fourier_analysis, get_scheduler_generator, scheduler_step, monitor_aggregator, get_aggregator # from SOAP.soap import SOAP # torch.set_default_dtype(torch.float64) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") @@ -366,7 +368,7 @@ def loss(self, model, mesh): # %% # Define the training loop def train(model, mesh, criterion, iterations, adam_iterations, learning_rate, num_check, num_plots, sweep_idx, - level_idx, frame_dir, scheduler_gen): + level_idx, frame_dir, scheduler_gen, aggregator:str='None', do_monitor_aggregator:bool=False): optimizer = optim.Adam(model.parameters(), lr=learning_rate) aggregator = None if aggregator == 'None' else get_aggregator(aggregator) if (aggregator is not None) and (do_monitor_aggregator): @@ -497,7 +499,7 @@ def main(args=None): train(model=model, mesh=mesh, criterion=loss, iterations=args.epochs, adam_iterations=args.adam_epochs, learning_rate=args.lr, num_check=args.num_checks, num_plots=num_plots, - sweep_idx=i, level_idx=l, frame_dir=frame_dir, aggregator=args.aggregator, do_monitor_aggregator=args.monitor_aggregator) + sweep_idx=i, level_idx=lev, frame_dir=frame_dir, scheduler_gen=scheduler_gen, aggregator=args.aggregator, do_monitor_aggregator=args.monitor_aggregator) # Turn PNGs into a video using OpenCV if args.plot: make_video_from_frames(frame_dir=frame_dir, name_prefix="Model_Outputs",