support PolySlab.slab_bounds differentiation

flexcompute · Dec 9, 2024 · 144ab3e · 144ab3e
1 parent 5d2a8d9
commit 144ab3e
Show file tree

Hide file tree

Showing 9 changed files with 260 additions and 101 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -28,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Added mode solver option `precision='auto'` to automatically select single or double precision for optimizing performance and accuracy.
 - Added `LinearLumpedElement` as a new supported `LumpedElement` type. This enhancement allows for the modeling of two-terminal passive networks, which can include any configuration of resistors, inductors, and capacitors, within the `Simulation`. Simple RLC networks are described using `RLCNetwork`, while more complex networks can be represented by their admittance transfer function through `AdmittanceNetwork`.
 - Option for the `ModeSolver` and `ModeSolverMonitor` to store only a subset of field components.
+- Support for differentiation with respect to `PolySlab.slab_bounds`.
 
 ### Changed
 - Priority is given to `snapping_points` in `GridSpec` when close to structure boundaries, which reduces the chance of them being skipped.
@@ -44,6 +45,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Default choice of frequency in `Simulation.plot_eps` and `Simulation.plot_structures_eps` is now the central frequency of all sources in the simulation. If the central frequencies differ, the permittivity is evaluated at infinite frequency, and a warning is emitted. 
 - Mode solver fields are more consistently normalized with respect to grid-dependent sign inversions in high order modes.
 - `MeshOverrideStructure` accepts `Box` as geometry. Other geometry types will raise a warning and convert to bounding box.
+- Internal refactor of adjoint shape gradients using `GradientSurfaceMesh`.
 
 ### Fixed
 - Significant speedup for field projection computations.
@@ -70,8 +72,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Autograd support for simulations without adjoint sources in `run` as well as `run_async`, which will not attempt to run the simulation but instead return zero gradients. This can sometimes occur if the objective function gradient does not depend on some simulations, for example when using `min` or `max` in the objective.
 - `bend_angle_rotation` in `ModeSpec` to improve accuracy in some cases when both `bend_radius` and `angle_theta` are defined. This option is disabled by default but it can be tried when very high accuracy is required in `ModeSource` and `ModeMonitor` objects that are placed in bend and angled waveguides.
 
-- Support for differentiation with respect to `PolySlab.slab_bounds` and `PolySlab.dilation`.
-
 ### Changed
 - `CustomMedium` design regions require far less data when performing inverse design by reducing adjoint field monitor size for dims with one pixel.
 - Calling `.values` on `DataArray` no longer raises a `DeprecationWarning` during automatic differentiation.

diff --git a/tests/test_components/test_autograd.py b/tests/test_components/test_autograd.py
@@ -41,7 +41,7 @@
 
 # whether to run numerical gradient tests, off by default because it runs real simulations
 RUN_NUMERICAL = False
-_NUMERICAL_COMBINATION = ("size_element", "mode")
+_NUMERICAL_COMBINATION = ("polyslab", "mode")
 
 TEST_MODES = ("pipeline", "adjoint", "speed")
 TEST_MODE = "speed" if TEST_POLYSLAB_SPEED else "pipeline"
@@ -71,10 +71,11 @@
 LZ = 7.0 * WVL
 
 IS_3D = False
+POLYSLAB_AXIS = 2
 
 # TODO: test 2D and 3D parameterized
 
-LX = 0.5 * WVL if IS_3D else 0.0
+LX = 3.5 * WVL if IS_3D else 0.0
 PML_X = True if IS_3D else False
 
 
@@ -83,7 +84,7 @@
 DA_SHAPE = (DA_SHAPE_X, 1_000, 1_000) if TEST_CUSTOM_MEDIUM_SPEED else (DA_SHAPE_X, 12, 12)
 
 # number of vertices in the polyslab
-NUM_VERTICES = 100_000 if TEST_POLYSLAB_SPEED else 110
+NUM_VERTICES = 100_000 if TEST_POLYSLAB_SPEED else 25
 
 PNT_DIPOLE = td.PointDipole(
     center=(0, 0, -LZ / 2 + WVL),
@@ -104,6 +105,7 @@
         fwidth=FWIDTH,
         amplitude=1.0,
     ),
+    pol_angle=0,
 )
 
 # sim that we add traced structures and monitors to
@@ -128,7 +130,7 @@
             name="extraneous",
         )
     ],
-    boundary_spec=td.BoundarySpec.pml(x=False, y=True, z=True),
+    boundary_spec=td.BoundarySpec.pml(x=PML_X, y=True, z=True),
     grid_spec=td.GridSpec.uniform(dl=0.01 * td.C_0 / FREQ0),
 )
 
@@ -332,19 +334,30 @@ def make_structures(params: anp.ndarray) -> dict[str, td.Structure]:
     matrix = np.random.random((N_PARAMS,)) - 0.5
     params_01 = 0.5 * (anp.tanh(matrix @ params / 3) + 1)
 
-    radii = 1.0 + 0.5 * params_01
+    free_param = "vertices" if POLYSLAB_AXIS == 0 else "slab_bounds"
+
+    if free_param == "vertices":
+        radii = 0.5 + 0.5 * params_01
+        slab_bounds = (-0.5, 0.5)
+    elif free_param == "slab_bounds":
+        radii = 1.0
+        shift = 0.1 * params_01
+        slab_bounds = (-0.5 + shift, 0.5 + shift)
+        # slab_bounds = (-0.5 + shift, 0.5)
+        # slab_bounds = (-0.5, 0.5 + shift)
 
     phis = 2 * anp.pi * anp.linspace(0, 1, NUM_VERTICES + 1)[:NUM_VERTICES]
     xs = radii * anp.cos(phis)
     ys = radii * anp.sin(phis)
     vertices = anp.stack((xs, ys), axis=-1)
+
     polyslab = td.Structure(
         geometry=td.PolySlab(
             vertices=vertices,
-            slab_bounds=(-0.5, 0.5),
-            axis=0,
-            sidewall_angle=0.01,
-            dilation=0.01,
+            slab_bounds=slab_bounds,
+            axis=POLYSLAB_AXIS,
+            sidewall_angle=0.00,
+            dilation=0.00,
         ),
         medium=med,
     )
@@ -558,7 +571,7 @@ def plot_sim(sim: td.Simulation, plot_eps: bool = True) -> None:
     args = [("polyslab", "mode")]
 
 
-args = [("polyslab", "mode")]
+# args = [("polyslab", "mode")]
 
 
 def get_functions(structure_key: str, monitor_key: str) -> typing.Callable:
@@ -591,7 +604,7 @@ def make_sim(*args) -> td.Simulation:
             structures.append(structures_traced_dict[structure_key])
 
         sim = SIM_BASE
-        if "diff" in monitor_dict:
+        if "diff" in monitor_keys:
             sim = sim.updated_copy(boundary_spec=td.BoundarySpec.pml(x=False, y=False, z=True))
         sim = sim.updated_copy(structures=structures, monitors=monitors)
 
@@ -643,7 +656,8 @@ def objective(*args):
         sim = make_sim(*args)
         if PLOT_SIM:
             plot_sim(sim, plot_eps=True)
-        data = web.run(sim, task_name="autograd_test_numerical", verbose=False)
+
+        data = web.run(sim, task_name="autograd_test_numerical", verbose=False, local_gradient=True)
         value = postprocess(data)
         return value
 
@@ -652,7 +666,7 @@ def objective(*args):
     assert anp.all(grad != 0.0), "some gradients are 0"
 
     # numerical gradients
-    delta = 1e-3
+    delta = 1e-1
     sims_numerical = {}
 
     params_num = np.zeros((N_PARAMS, N_PARAMS))
@@ -864,22 +878,22 @@ def objective(*args):
 def test_autograd_polyslab_cylinder(use_emulated_run, monitor_key):
     """Test an objective function through tidy3d autograd."""
 
-    t = 1.0
+    t0 = 1.0
     axis = 0
 
-    num_pts = 89
+    num_pts = 819
 
     monitor, postprocess = make_monitors()[monitor_key]
 
-    def make_cylinder(radius, x0, y0):
+    def make_cylinder(radius, x0, y0, t):
         return td.Cylinder(
             center=td.Cylinder.unpop_axis(0.0, (x0, y0), axis=axis),
             radius=radius,
             length=t,
             axis=axis,
-        )  # .to_polyslab(num_pts)
+        ).to_polyslab(num_pts)
 
-    def make_polyslab(radius, x0, y0):
+    def make_polyslab(radius, x0, y0, t):
         phis = anp.linspace(0, 2 * np.pi, num_pts + 1)[:-1]
 
         xs = radius * anp.cos(phis) + x0
@@ -899,7 +913,7 @@ def make_sim(params, geo_maker):
 
         return SIM_BASE.updated_copy(structures=[structure], monitors=[monitor])
 
-    p0 = [1.0, 0.0, 0.0]
+    p0 = [1.0, 0.0, 0.0, t0]
 
     def objective_polyslab(params):
         """Objective function."""
@@ -1311,6 +1325,7 @@ def J(eps):
         bounds=((-1, -1, -1), (1, 1, 1)),
         eps_no_structure=td.SpatialDataArray([[[1.0]]], coords=dict(x=[0], y=[0], z=[0])),
         eps_inf_structure=td.SpatialDataArray([[[2.0]]], coords=dict(x=[0], y=[0], z=[0])),
+        bounds_intersect=((-1, -1, -1), (1, 1, 1)),
     )
 
     grads_computed = pr.compute_derivatives(derivative_info=info)
@@ -1392,6 +1407,7 @@ def J(eps):
         bounds=((-1, -1, -1), (1, 1, 1)),
         eps_no_structure=td.SpatialDataArray([[[1.0]]], coords=dict(x=[0], y=[0], z=[0])),
         eps_inf_structure=td.SpatialDataArray([[[2.0]]], coords=dict(x=[0], y=[0], z=[0])),
+        bounds_intersect=((-1, -1, -1), (1, 1, 1)),
     )
 
     grads_computed = pr.compute_derivatives(derivative_info=info)

diff --git a/tidy3d/components/autograd/derivative_utils.py b/tidy3d/components/autograd/derivative_utils.py
@@ -5,6 +5,7 @@
 import pydantic.v1 as pd
 import xarray as xr
 
+from ...constants import LARGE_NUMBER
 from ..base import Tidy3dBaseModel
 from ..data.data_array import ScalarFieldDataArray, SpatialDataArray
 from ..types import ArrayLike, Bound, tidycomplex
@@ -17,7 +18,29 @@
 
 
 class DerivativeSurfaceMesh(Tidy3dBaseModel):
-    """Stores information about the surfaces of an object to be used for derivative calculation."""
+    """Stores information about the surfaces of an object to be used for derivative calculation.
+
+    user guide: this class is used to construct derivatives with respect to surface elements
+    given some forward and adjoint fields stored in a ``DerivativeInfo`` class. To use it,
+    you must specify the central locations of all the surface elements in ``centers``,
+    along with the area of each element in ``areas``. Then you need to specify three orthogonal
+    vectors (``normals``, ``perps1`` and ``perps2``). The derivative will be computed with
+    respect to a change in material along the ``normals`` direction. It is important to note
+    that the sign of these basis vectors is irrelevant since we end up multiplying the forward
+    and adjoint fields together in these bases.
+
+    After this ``DerivativeSurfaceMesh`` is constructed, given some ``DerivativeInfo`` in the
+    gradient calculation, a call to ``DerivativeInfo.grad_surfaces(x: DerivativeSurfaceMesh)``
+    will return the gradient with respect to a change in all of the provided surfaces.
+
+    The gradient is with respect to a change from the surface element permittivity from the
+    ``eps_in`` to ``eps_out`` field. So in principle it is always computing gradients with
+    respect to an infinitesimal outward shift in the normal direction. For example, for
+    ``PolySlab.vertices``, this means shifting each edge to the background.
+    For ``PolySlab.slab_bounds``, this means shifting the bound in either + or - direction if it
+    is index ``[1]`` or ``[0]`` respectively. This renders the sign of the normal vector
+    irrelevant.
+    """
 
     centers: ArrayLike = pd.Field(
         ...,
@@ -139,6 +162,13 @@ class DerivativeInfo(Tidy3dBaseModel):
         description="Bounds corresponding to the structure, used in ``Medium`` calculations.",
     )
 
+    bounds_intersect: Bound = pd.Field(
+        ...,
+        title="Geometry and Simulation Intersections Bounds",
+        description="Bounds corresponding to the minimum intersection between the "
+        "structure and the simulation it is contained in.",
+    )
+
     frequency: float = pd.Field(
         ...,
         title="Frequency of adjoint simulation",
@@ -191,16 +221,15 @@ def _eps_out(self, spatial_coords: np.ndarray) -> complex | np.ndarray:
 
         if self.eps_no_structure is not None:
             return self.evaluate_eps(spatial_coords, is_inside=False)
-
-        return self.eps_out
 
+        return self.eps_out
 
-    @property
     def delta_eps(self, spatial_coords: np.ndarray) -> complex | np.ndarray:
+        """Change in the permittivity across interface (for E field grads)."""
         return self._eps_in(spatial_coords) - self._eps_out(spatial_coords)
 
-    @property
     def delta_eps_inv(self, spatial_coords: np.ndarray) -> complex | np.ndarray:
+        """Change in 1 / permittivity across interface (for D field grads)."""
         return 1.0 / self._eps_in(spatial_coords) - 1.0 / self._eps_out(spatial_coords)
 
     def grad_surfaces(self, surface_mesh: DerivativeSurfaceMesh) -> dict:
@@ -240,8 +269,8 @@ def grad_surfaces(self, surface_mesh: DerivativeSurfaceMesh) -> dict:
         E_der_perp2 = E_fwd_perp2 * E_adj_perp2
 
         # approximate permittivity in and out
-        delta_eps_inv = self.delta_eps_inv
-        delta_eps = self.delta_eps
+        delta_eps_inv = self.delta_eps_inv(spatial_coords=spatial_coords)
+        delta_eps = self.delta_eps(spatial_coords=spatial_coords)
 
         # put together VJP using D_normal and E_perp integration
         vjps = 0.0
@@ -288,19 +317,20 @@ def evaluate_flds_at(
 
         interp_kwargs = {}
         for dim, locations_dim in zip("xyz", (xs, ys, zs)):
-            # only include dims where the data has more than 1 coord, to avoid warnings and errors
-            if True or all(np.array(fld.coords).size > 1 for fld in fld_dataset.values()):
-                interp_kwargs[dim] = xr.DataArray(locations_dim, dims=edge_index_dim)
+            # filter out any infinity values to use LARGE_NUMBER and get 0 in the interp()
+            locations_dim = np.nan_to_num(locations_dim, posinf=LARGE_NUMBER, neginf=-LARGE_NUMBER)
+            interp_kwargs[dim] = xr.DataArray(locations_dim, dims=edge_index_dim)
 
         components = {}
         for fld_name, arr in fld_dataset.items():
             arr_interp = arr.interp(
                 **interp_kwargs,
                 assume_sorted=True,
-                kwargs={"bounds_error": False, "fill_value": None},
+                kwargs=dict(fill_value=None, bounds_error=False),
             )
             if "f" in arr_interp.coords:
                 arr_interp = arr_interp.sum("f")
+
             components[fld_name] = arr_interp
 
         return components