differentiable s-matrix calculation

tylerflex · tylerflex · commit a4d17370709f · 2025-07-17T14:51:33.000-04:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Added
+- Objective functions that involve running `tidy3d.plugins.smatrix.ComponentModeler` can be differentiated with autograd.
+
 ## [2.9.0rc2] - 2025-07-17
 
 ### Added
diff --git a/docs/faq b/docs/faq
@@ -1 +1 @@
-Subproject commit 807ec8c174c575d339a4a847c64aeaa2ab8af59d
+Subproject commit e85f53460b0fd870dbb8d5ae78e9f137b848d7b9
diff --git a/tests/test_data/test_data_arrays.py b/tests/test_data/test_data_arrays.py
@@ -4,9 +4,11 @@
 
 from typing import Optional
 
-import numpy as np
+import autograd as ag
+import autograd.numpy as np
 import pytest
 import xarray.testing as xrt
+from autograd.test_util import check_grads
 
 import tidy3d as td
 from tidy3d.exceptions import DataError
@@ -468,3 +470,32 @@ def test_interp(method, scalar_index):
     xr_interp = data.interp(f=f)
     ag_interp = data._ag_interp(f=f)
     xrt.assert_allclose(xr_interp, ag_interp)
+
+
+def test_with_updated_data():
+    """Check the ``DataArray.with_updated_data()`` method."""
+
+    arr = td.SpatialDataArray(
+        np.ones((2, 3, 4, 5), dtype=np.complex128),
+        coords={"x": [0, 1], "y": [1, 2, 3], "z": [2, 3, 4, 5], "w": [0, 1, 2, 3, 4]},
+    )
+
+    data = np.zeros((1, 1, 1, 5))
+
+    coords = {"x": 0, "y": 2, "z": 3}
+
+    arr2 = arr._with_updated_data(data=data, coords=coords)
+
+    data_expected = np.ones(arr.shape) + 0j
+    data_expected[0, 1, 1, :] = 0.0 + 0j
+    assert np.all(arr2.data == data_expected), "DataArray.with_updated_copy() failed"
+
+    def f(x):
+        arr2 = arr._with_updated_data(data=x, coords=coords)
+        return np.abs(np.sum(arr2.data))
+
+    # grad should just be all 1s because of sum, so check that this is true
+    g = ag.grad(f)(data)
+    assert np.all(g == np.ones_like(data))
+
+    check_grads(f)(data)
diff --git a/tidy3d/components/data/data_array.py b/tidy3d/components/data/data_array.py
@@ -489,6 +489,33 @@ def _ag_interp_func(var, indexes_coords, method, **kwargs):
                 result = result.transpose(*out_dims)
         return result
 
+    def _with_updated_data(self, data: np.ndarray, coords: dict[str, Any]) -> DataArray:
+        """Make copy of ``DataArray`` with ``data`` at specified ``coords``, autograd compatible
+
+        Constraints / Edge cases:
+            - `coords` must map to a specific value eg {x: '1'}, does not broadcast to arrays
+            - `data` will be reshaped to try to match `self.shape` except where `coords` present
+        """
+
+        # make mask
+        mask = xr.zeros_like(self, dtype=bool)
+        mask.loc[coords] = True
+
+        # reshape `data` to line up with `self.dims`, with shape of 1 along the selected axis
+        old_data = self.data
+        new_shape = list(old_data.shape)
+        for i, dim in enumerate(self.dims):
+            if dim in coords:
+                new_shape[i] = 1
+        new_data = data.reshape(new_shape)
+
+        # broadcast data to repeat data along the selected dimensions to match mask
+        new_data = new_data + np.zeros_like(old_data)
+
+        new_data = np.where(mask, new_data, old_data)
+
+        return self.copy(deep=True, data=new_data)
+
 
 class FreqDataArray(DataArray):
     """Frequency-domain array.
diff --git a/tidy3d/plugins/autograd/README.md b/tidy3d/plugins/autograd/README.md
@@ -217,6 +217,7 @@ We also support the following high-level features:
 - We automatically determine the number of adjoint simulations to run from a given forward simulation to maintain gradient accuracy.
   Adjoint sources are automatically grouped by either frequency or spatial port (whichever yields fewer adjoint simulations), and all adjoint simulations are run in a single batch (applies to both `run` and `run_async`).
   The parameter `max_num_adjoint_per_fwd` (default `10`) prevents launching unexpectedly large numbers of adjoint simulations automatically.
+- Differentiation of objective functions involving the scattering matrix produced by `tidy3d.plugins.smatrix.ComponentModeler`.
 
 We currently have the following restrictions:
 
diff --git a/tidy3d/plugins/smatrix/component_modelers/base.py b/tidy3d/plugins/smatrix/component_modelers/base.py
@@ -6,7 +6,7 @@
 from abc import ABC, abstractmethod
 from typing import Optional, Union, get_args
 
-import numpy as np
+import autograd.numpy as np
 import pydantic.v1 as pd
 
 from tidy3d.components.base import Tidy3dBaseModel, cached_property
@@ -22,6 +22,7 @@
 from tidy3d.plugins.smatrix.ports.modal import Port
 from tidy3d.plugins.smatrix.ports.rectangular_lumped import LumpedPort
 from tidy3d.plugins.smatrix.ports.wave import WavePort
+from tidy3d.web import run_async
 from tidy3d.web.api.container import Batch, BatchData
 
 # fwidth of gaussian pulse in units of central frequency
@@ -196,7 +197,21 @@ def batch_path(self) -> str:
     @cached_property
     def batch_data(self) -> BatchData:
         """The :class:`.BatchData` associated with the simulations run for this component modeler."""
-        return self.batch.run(path_dir=self.path_dir)
+
+        # NOTE: uses run_async because Batch is not differentiable.
+        batch = self.batch
+        run_async_kwargs = batch.dict(
+            exclude={
+                "type",
+                "path_dir",
+                "attrs",
+                "solver_version",
+                "jobs_cached",
+                "num_workers",
+                "simulations",
+            }
+        )
+        return run_async(batch.simulations, **run_async_kwargs, path_dir=self.path_dir)
 
     def get_path_dir(self, path_dir: str) -> None:
         """Check whether the supplied 'path_dir' matches the internal field value."""
diff --git a/tidy3d/plugins/smatrix/component_modelers/modal.py b/tidy3d/plugins/smatrix/component_modelers/modal.py
@@ -6,7 +6,7 @@
 
 from typing import Optional
 
-import numpy as np
+import autograd.numpy as np
 import pydantic.v1 as pd
 
 from tidy3d.components.base import cached_property
@@ -317,14 +317,15 @@ def _internal_construct_smatrix(self, batch_data: BatchData) -> ModalPortDataArr
                 )
                 source_norm = self._normalization_factor(port_in, sim_data)
                 s_matrix_elements = np.array(amp.data) / np.array(source_norm)
-                s_matrix.loc[
-                    {
-                        "port_in": port_name_in,
-                        "mode_index_in": mode_index_in,
-                        "port_out": port_name_out,
-                        "mode_index_out": mode_index_out,
-                    }
-                ] = s_matrix_elements
+
+                coords_set = {
+                    "port_in": port_name_in,
+                    "mode_index_in": mode_index_in,
+                    "port_out": port_name_out,
+                    "mode_index_out": mode_index_out,
+                }
+
+                s_matrix = s_matrix._with_updated_data(data=s_matrix_elements, coords=coords_set)
 
         # element can be determined by user-defined mapping
         for (row_in, col_in), (row_out, col_out), mult_by in self.element_mappings:
@@ -339,12 +340,14 @@ def _internal_construct_smatrix(self, batch_data: BatchData) -> ModalPortDataArr
 
             port_out_to, mode_index_out_to = row_out
             port_in_to, mode_index_in_to = col_out
+
+            elements_from = mult_by * s_matrix.loc[coords_from].values
             coords_to = {
                 "port_in": port_in_to,
                 "mode_index_in": mode_index_in_to,
                 "port_out": port_out_to,
                 "mode_index_out": mode_index_out_to,
             }
-            s_matrix.loc[coords_to] = mult_by * s_matrix.loc[coords_from].values
+            s_matrix = s_matrix._with_updated_data(data=elements_from, coords=coords_to)
 
         return s_matrix
diff --git a/tidy3d/plugins/smatrix/component_modelers/terminal.py b/tidy3d/plugins/smatrix/component_modelers/terminal.py
@@ -221,9 +221,9 @@ def _internal_construct_smatrix(self, batch_data: BatchData) -> TerminalPortData
         for port_in in self.ports:
             sim_data = batch_data[self._task_name(port=port_in)]
             a, b = self.compute_power_wave_amplitudes_at_each_port(port_impedances, sim_data)
-            indexer = {"f": a.f, "port_in": port_in.name, "port_out": a.port}
-            a_matrix.loc[indexer] = a
-            b_matrix.loc[indexer] = b
+            indexer = {"port_in": port_in.name}
+            a_matrix = a_matrix._with_updated_data(data=a.data, coords=indexer)
+            b_matrix = b_matrix._with_updated_data(data=b.data, coords=indexer)
 
         s_matrix = self.ab_to_s(a_matrix, b_matrix)
         return s_matrix
@@ -481,10 +481,14 @@ def _port_reference_impedances(self, batch_data: BatchData) -> PortDataArray:
                 # WavePorts have a port impedance calculated from its associated modal field distribution
                 # and is frequency dependent.
                 impedances = port.compute_port_impedance(sim_data_port).values
-                port_impedances.loc[{"port": port.name}] = impedances.squeeze()
+                port_impedances = port_impedances._with_updated_data(
+                    data=impedances, coords={"port": port.name}
+                )
             else:
                 # LumpedPorts have a constant reference impedance
-                port_impedances.loc[{"port": port.name}] = np.full(len(self.freqs), port.impedance)
+                port_impedances = port_impedances._with_updated_data(
+                    data=np.full(len(self.freqs), port.impedance), coords={"port": port.name}
+                )
 
         port_impedances = TerminalComponentModeler._set_port_data_array_attributes(port_impedances)
         return port_impedances