feat: adjoint equations (#99)

This is a big set of features, most of which was required to support solving the adjoint equations: * Generalises the sensitivitity and adjoint equations into `AugmentedOdeEquations`, a set of n equations with the same jacobian as the main equations * Splits the `OdeEquations` into multiple traits `OdeEquations` (for explicit solvers), `OdeEquationsImplicit` (for implicit solvers, currently all of them), `OdeEquationsSens` (if you want to calculate forward sensitivitites), `OdeEquationsAdjoint` (if you want to calculate adjoint sensitivities) * Solvers can now work in reverse time (many equations will be unstable, but this is used to solve the adjoint equations) * (internal) nonlinear and linear solvers no longer have an `Op` as a generic parameter, so they can be reused for different operators (as long as the number of rows/cols are the same) * Solvers can be checkpointed so they can be re-started from a checkpoint * Hermite interpolation struct added for saving an interpolating along a solution trajectory * Solvers can integrate an output function along the solution trajectory * output function integration, adjoint and forward sensitivity integration can be added or removed from error control for all the solvers. Tolerances can be set via the builder or `OdeSolverEquation` structs.
martinjrobins · Oct 20, 2024 · 29a29ae · 29a29ae
1 parent cfacee1
commit 29a29ae
Show file tree

Hide file tree

Showing 72 changed files with 6,057 additions and 2,212 deletions.
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
@@ -37,8 +37,6 @@ jobs:
           include:
             - toolchain: beta
               os: ubuntu-latest
-            - toolchain: nightly
-              os: ubuntu-latest
 
     steps:
     - uses: actions/checkout@v4

diff --git a/benches/ode_solvers.rs b/benches/ode_solvers.rs
@@ -546,62 +546,60 @@ criterion_group!(benches, criterion_benchmark);
 criterion_main!(benches);
 
 mod benchmarks {
-    use diffsol::linear_solver::LinearSolver;
     use diffsol::matrix::MatrixRef;
-    use diffsol::op::bdf::BdfCallable;
-    use diffsol::op::sdirk::SdirkCallable;
     use diffsol::vector::VectorRef;
+    use diffsol::LinearSolver;
     use diffsol::{
-        Bdf, DefaultDenseMatrix, DefaultSolver, Matrix, NewtonNonlinearSolver, OdeEquations,
-        OdeSolverMethod, OdeSolverProblem, Sdirk, Tableau,
+        Bdf, DefaultDenseMatrix, DefaultSolver, Matrix, NewtonNonlinearSolver,
+        OdeEquationsImplicit, OdeSolverMethod, OdeSolverProblem, OdeSolverState, Sdirk, Tableau,
     };
 
     // bdf
-    pub fn bdf<Eqn>(
-        problem: &OdeSolverProblem<Eqn>,
-        t: Eqn::T,
-        ls: impl LinearSolver<BdfCallable<Eqn>>,
-    ) where
-        Eqn: OdeEquations,
+    pub fn bdf<Eqn>(problem: &OdeSolverProblem<Eqn>, t: Eqn::T, ls: impl LinearSolver<Eqn::M>)
+    where
+        Eqn: OdeEquationsImplicit,
         Eqn::M: Matrix + DefaultSolver,
         Eqn::V: DefaultDenseMatrix,
         for<'a> &'a Eqn::V: VectorRef<Eqn::V>,
         for<'a> &'a Eqn::M: MatrixRef<Eqn::M>,
     {
         let nls = NewtonNonlinearSolver::new(ls);
         let mut s = Bdf::<<Eqn::V as DefaultDenseMatrix>::M, _, _>::new(nls);
-        let _y = s.solve(problem, t);
+        let state = OdeSolverState::new(problem, &s).unwrap();
+        let _y = s.solve(problem, state, t);
     }
 
     pub fn esdirk34<Eqn>(
         problem: &OdeSolverProblem<Eqn>,
         t: Eqn::T,
-        linear_solver: impl LinearSolver<SdirkCallable<Eqn>>,
+        linear_solver: impl LinearSolver<Eqn::M>,
     ) where
-        Eqn: OdeEquations,
+        Eqn: OdeEquationsImplicit,
         Eqn::M: Matrix + DefaultSolver,
         Eqn::V: DefaultDenseMatrix,
         for<'a> &'a Eqn::V: VectorRef<Eqn::V>,
         for<'a> &'a Eqn::M: MatrixRef<Eqn::M>,
     {
         let tableau = Tableau::<<Eqn::V as DefaultDenseMatrix>::M>::esdirk34();
         let mut s = Sdirk::new(tableau, linear_solver);
-        let _y = s.solve(problem, t);
+        let state = OdeSolverState::new(problem, &s).unwrap();
+        let _y = s.solve(problem, state, t);
     }
 
     pub fn tr_bdf2<Eqn>(
         problem: &OdeSolverProblem<Eqn>,
         t: Eqn::T,
-        linear_solver: impl LinearSolver<SdirkCallable<Eqn>>,
+        linear_solver: impl LinearSolver<Eqn::M>,
     ) where
-        Eqn: OdeEquations,
+        Eqn: OdeEquationsImplicit,
         Eqn::M: Matrix + DefaultSolver,
         Eqn::V: DefaultDenseMatrix,
         for<'a> &'a Eqn::V: VectorRef<Eqn::V>,
         for<'a> &'a Eqn::M: MatrixRef<Eqn::M>,
     {
         let tableau = Tableau::<<Eqn::V as DefaultDenseMatrix>::M>::tr_bdf2();
         let mut s = Sdirk::new(tableau, linear_solver);
-        let _y = s.solve(problem, t);
+        let state = OdeSolverState::new(problem, &s).unwrap();
+        let _y = s.solve(problem, state, t);
     }
 }
diff --git a/benches/plot.py b/benches/plot.py
@@ -17,19 +17,22 @@
         "name": "robertson_ode",
         "reference_name": "robertson_ode_klu",
         "arg": [25, 100, 400, 900],
-        "solvers": ["faer_sparse_bdf_klu"],
+        #"solvers": ["faer_sparse_bdf_klu"],
+        "solvers": ["faer_sparse_bdf"],
     },
     {
         "name": "heat2d",
         "reference_name": "heat2d_klu",
         "arg": [5, 10, 20, 30],
-        "solvers": ["faer_sparse_esdirk_klu", "faer_sparse_tr_bdf2_klu", "faer_sparse_bdf_klu", "faer_sparse_bdf_klu_diffsl"]
+        "solvers": ["faer_sparse_esdirk", "faer_sparse_tr_bdf2", "faer_sparse_bdf", "faer_sparse_bdf", "faer_sparse_bdf_diffsl"]
+        #"solvers": ["faer_sparse_esdirk_klu", "faer_sparse_tr_bdf2_klu", "faer_sparse_bdf_klu", "faer_sparse_bdf_klu_diffsl"]
     },
     {
         "name": "foodweb",
         "reference_name": "foodweb_bnd",
         "arg": [5, 10, 20, 30],
-        "solvers": ["faer_sparse_esdirk_klu", "faer_sparse_tr_bdf2_klu", "faer_sparse_bdf_klu", "faer_sparse_bdf_klu_diffsl"]
+        "solvers": ["faer_sparse_esdirk", "faer_sparse_tr_bdf2", "faer_sparse_bdf", "faer_sparse_bdf_diffsl"]
+        #"solvers": ["faer_sparse_esdirk_klu", "faer_sparse_tr_bdf2_klu", "faer_sparse_bdf_klu", "faer_sparse_bdf_klu_diffsl"]
     },
 ]
 estimates = {}
@@ -116,5 +119,9 @@
 fig1.savefig(f"{basedir}/bench_tr_bdf2_esdirk.svg")
 fig2.savefig(f"{basedir}/bench_bdf.svg")
 fig3.savefig(f"{basedir}/bench_bdf_diffsl.svg")
+basedir = "."
+fig1.savefig(f"{basedir}/bench_tr_bdf2_esdirk.png")
+fig2.savefig(f"{basedir}/bench_bdf.png")
+fig3.savefig(f"{basedir}/bench_bdf_diffsl.png")
 
 
diff --git a/src/error.rs b/src/error.rs
@@ -71,8 +71,8 @@ pub enum OdeSolverError {
     SensitivityNotSupported,
     #[error("Failed to get mutable reference to equations, is there a solver created with this problem?")]
     FailedToGetMutableReference,
-    #[error("atol must have length 1 or equal to the number of states")]
-    AtolLengthMismatch,
+    #[error("Builder error: {0}")]
+    BuilderError(String),
     #[error("t_eval must be increasing and all values must be greater than or equal to the current time")]
     StateProblemMismatch,
     #[error("State is not consistent with the problem equations")]

diff --git a/src/jacobian/mod.rs b/src/jacobian/mod.rs
@@ -1,9 +1,9 @@
 use std::collections::HashSet;
 
-use crate::op::{LinearOp, Op};
-use crate::vector::Vector;
-use crate::Scalar;
-use crate::{op::NonLinearOp, Matrix, MatrixSparsityRef, VectorIndex};
+use crate::{
+    LinearOp, LinearOpTranspose, Matrix, MatrixSparsityRef, NonLinearOp, NonLinearOpAdjoint,
+    NonLinearOpJacobian, NonLinearOpSens, NonLinearOpSensAdjoint, Op, Scalar, Vector, VectorIndex,
+};
 use num_traits::{One, Zero};
 
 use self::{coloring::nonzeros2graph, greedy_coloring::color_graph_greedy};
@@ -12,48 +12,80 @@ pub mod coloring;
 pub mod graph;
 pub mod greedy_coloring;
 
-/// Find the non-zero entries of the Jacobian matrix of a non-linear operator.
-pub fn find_non_zeros_nonlinear<F: NonLinearOp + ?Sized>(
-    op: &F,
-    x: &F::V,
-    t: F::T,
-) -> Vec<(usize, usize)> {
-    let mut v = F::V::zeros(op.nstates());
-    let mut col = F::V::zeros(op.nout());
-    let mut triplets = Vec::with_capacity(op.nstates());
-    for j in 0..op.nstates() {
-        v[j] = F::T::NAN;
-        op.jac_mul_inplace(x, t, &v, &mut col);
-        for i in 0..op.nout() {
-            if col[i].is_nan() {
-                triplets.push((i, j));
+macro_rules! gen_find_non_zeros_nonlinear {
+    ($name:ident, $op_fn:ident, $op_trait:ident) => {
+        /// Find the non-zero entries of the $name matrix of a non-linear operator.
+        pub fn $name<F: NonLinearOp + $op_trait + ?Sized>(
+            op: &F,
+            x: &F::V,
+            t: F::T,
+        ) -> Vec<(usize, usize)> {
+            let mut v = F::V::zeros(op.nstates());
+            let mut col = F::V::zeros(op.nout());
+            let mut triplets = Vec::with_capacity(op.nstates());
+            for j in 0..op.nstates() {
+                v[j] = F::T::NAN;
+                op.$op_fn(x, t, &v, &mut col);
+                for i in 0..op.nout() {
+                    if col[i].is_nan() {
+                        triplets.push((i, j));
+                    }
+                    col[i] = F::T::zero();
+                }
+                v[j] = F::T::zero();
             }
-            col[i] = F::T::zero();
+            triplets
         }
-        v[j] = F::T::zero();
-    }
-    triplets
+    };
 }
 
-/// Find the non-zero entries of the matrix of a linear operator.
-pub fn find_non_zeros_linear<F: LinearOp + ?Sized>(op: &F, t: F::T) -> Vec<(usize, usize)> {
-    let mut v = F::V::zeros(op.nstates());
-    let mut col = F::V::zeros(op.nout());
-    let mut triplets = Vec::with_capacity(op.nstates());
-    for j in 0..op.nstates() {
-        v[j] = F::T::NAN;
-        op.call_inplace(&v, t, &mut col);
-        for i in 0..op.nout() {
-            if col[i].is_nan() {
-                triplets.push((i, j));
+gen_find_non_zeros_nonlinear!(
+    find_jacobian_non_zeros,
+    jac_mul_inplace,
+    NonLinearOpJacobian
+);
+gen_find_non_zeros_nonlinear!(
+    find_adjoint_non_zeros,
+    jac_transpose_mul_inplace,
+    NonLinearOpAdjoint
+);
+gen_find_non_zeros_nonlinear!(find_sens_non_zeros, sens_mul_inplace, NonLinearOpSens);
+gen_find_non_zeros_nonlinear!(
+    find_sens_adjoint_non_zeros,
+    sens_transpose_mul_inplace,
+    NonLinearOpSensAdjoint
+);
+
+macro_rules! gen_find_non_zeros_linear {
+    ($name:ident, $op_fn:ident $(, $op_trait:tt )?) => {
+        /// Find the non-zero entries of the $name matrix of a non-linear operator.
+        pub fn $name<F: LinearOp + ?Sized $(+ $op_trait)?>(op: &F, t: F::T) -> Vec<(usize, usize)> {
+            let mut v = F::V::zeros(op.nstates());
+            let mut col = F::V::zeros(op.nout());
+            let mut triplets = Vec::with_capacity(op.nstates());
+            for j in 0..op.nstates() {
+                v[j] = F::T::NAN;
+                op.$op_fn(&v, t, &mut col);
+                for i in 0..op.nout() {
+                    if col[i].is_nan() {
+                        triplets.push((i, j));
+                    }
+                    col[i] = F::T::zero();
+                }
+                v[j] = F::T::zero();
             }
-            col[i] = F::T::zero();
+            triplets
         }
-        v[j] = F::T::zero();
-    }
-    triplets
+    };
 }
 
+gen_find_non_zeros_linear!(find_matrix_non_zeros, call_inplace);
+gen_find_non_zeros_linear!(
+    find_transpose_non_zeros,
+    call_transpose_inplace,
+    LinearOpTranspose
+);
+
 pub struct JacobianColoring<M: Matrix> {
     dst_indices_per_color: Vec<<M::V as Vector>::Index>,
     src_indices_per_color: Vec<<M::V as Vector>::Index>,
@@ -107,7 +139,7 @@ impl<M: Matrix> JacobianColoring<M> {
     //    Self::new_from_non_zeros(op, non_zeros)
     //}
 
-    pub fn jacobian_inplace<F: NonLinearOp<M = M, V = M::V, T = M::T>>(
+    pub fn jacobian_inplace<F: NonLinearOpJacobian<M = M, V = M::V, T = M::T>>(
         &self,
         op: &F,
         x: &F::V,
@@ -127,6 +159,46 @@ impl<M: Matrix> JacobianColoring<M> {
         }
     }
 
+    pub fn adjoint_inplace<F: NonLinearOpAdjoint<M = M, V = M::V, T = M::T>>(
+        &self,
+        op: &F,
+        x: &F::V,
+        t: F::T,
+        y: &mut F::M,
+    ) {
+        let mut v = F::V::zeros(op.nstates());
+        let mut col = F::V::zeros(op.nout());
+        for c in 0..self.dst_indices_per_color.len() {
+            let input = &self.input_indices_per_color[c];
+            let dst_indices = &self.dst_indices_per_color[c];
+            let src_indices = &self.src_indices_per_color[c];
+            v.assign_at_indices(input, F::T::one());
+            op.jac_transpose_mul_inplace(x, t, &v, &mut col);
+            y.set_data_with_indices(dst_indices, src_indices, &col);
+            v.assign_at_indices(input, F::T::zero());
+        }
+    }
+
+    pub fn sens_adjoint_inplace<F: NonLinearOpSensAdjoint<M = M, V = M::V, T = M::T>>(
+        &self,
+        op: &F,
+        x: &F::V,
+        t: F::T,
+        y: &mut F::M,
+    ) {
+        let mut v = F::V::zeros(op.nstates());
+        let mut col = F::V::zeros(op.nout());
+        for c in 0..self.dst_indices_per_color.len() {
+            let input = &self.input_indices_per_color[c];
+            let dst_indices = &self.dst_indices_per_color[c];
+            let src_indices = &self.src_indices_per_color[c];
+            v.assign_at_indices(input, F::T::one());
+            op.sens_transpose_mul_inplace(x, t, &v, &mut col);
+            y.set_data_with_indices(dst_indices, src_indices, &col);
+            v.assign_at_indices(input, F::T::zero());
+        }
+    }
+
     pub fn matrix_inplace<F: LinearOp<M = M, V = M::V, T = M::T>>(
         &self,
         op: &F,
@@ -151,26 +223,28 @@ impl<M: Matrix> JacobianColoring<M> {
 mod tests {
     use std::rc::Rc;
 
-    use crate::jacobian::{find_non_zeros_linear, find_non_zeros_nonlinear, JacobianColoring};
+    use crate::jacobian::{find_jacobian_non_zeros, JacobianColoring};
     use crate::matrix::sparsity::MatrixSparsityRef;
     use crate::matrix::Matrix;
     use crate::op::linear_closure::LinearClosure;
-    use crate::op::{LinearOp, Op};
     use crate::vector::Vector;
     use crate::{
         jacobian::{coloring::nonzeros2graph, greedy_coloring::color_graph_greedy},
         op::closure::Closure,
+        LinearOp, Op,
     };
-    use crate::{scale, NonLinearOp, SparseColMat};
+    use crate::{scale, NonLinearOpJacobian, SparseColMat};
     use nalgebra::DMatrix;
     use num_traits::{One, Zero};
     use std::ops::MulAssign;
 
+    use super::find_matrix_non_zeros;
+
     fn helper_triplets2op_nonlinear<'a, M: Matrix + 'a>(
         triplets: &'a [(usize, usize, M::T)],
         nrows: usize,
         ncols: usize,
-    ) -> impl NonLinearOp<M = M, V = M::V, T = M::T> + 'a {
+    ) -> impl NonLinearOpJacobian<M = M, V = M::V, T = M::T> + 'a {
         let nstates = ncols;
         let nout = nrows;
         let f = move |x: &M::V, y: &mut M::V| {
@@ -241,7 +315,7 @@ mod tests {
         ];
         for triplets in test_triplets {
             let op = helper_triplets2op_nonlinear::<M>(triplets.as_slice(), 2, 2);
-            let non_zeros = find_non_zeros_nonlinear(&op, &M::V::zeros(2), M::T::zero());
+            let non_zeros = find_jacobian_non_zeros(&op, &M::V::zeros(2), M::T::zero());
             let expect = triplets
                 .iter()
                 .map(|(i, j, _v)| (*i, *j))
@@ -279,7 +353,7 @@ mod tests {
         let expect = vec![vec![1, 1], vec![1, 2], vec![1, 1], vec![1, 2]];
         for (triplets, expect) in test_triplets.iter().zip(expect) {
             let op = helper_triplets2op_nonlinear::<M>(triplets.as_slice(), 2, 2);
-            let non_zeros = find_non_zeros_nonlinear(&op, &M::V::zeros(2), M::T::zero());
+            let non_zeros = find_jacobian_non_zeros(&op, &M::V::zeros(2), M::T::zero());
             let ncols = op.nstates();
             let graph = nonzeros2graph(non_zeros.as_slice(), ncols);
             let coloring = color_graph_greedy(&graph);
@@ -320,7 +394,7 @@ mod tests {
             let op = helper_triplets2op_nonlinear::<M>(triplets.as_slice(), n, n);
             let y0 = M::V::zeros(n);
             let t0 = M::T::zero();
-            let non_zeros = find_non_zeros_nonlinear(&op, &y0, t0);
+            let non_zeros = find_jacobian_non_zeros(&op, &y0, t0);
             let coloring = JacobianColoring::new_from_non_zeros(&op, non_zeros);
             let mut jac = M::new_from_sparsity(3, 3, op.sparsity().map(|s| s.to_owned()));
             coloring.jacobian_inplace(&op, &y0, t0, &mut jac);
@@ -336,7 +410,7 @@ mod tests {
         for triplets in test_triplets {
             let op = helper_triplets2op_linear::<M>(triplets.as_slice(), n, n);
             let t0 = M::T::zero();
-            let non_zeros = find_non_zeros_linear(&op, t0);
+            let non_zeros = find_matrix_non_zeros(&op, t0);
             let coloring = JacobianColoring::new_from_non_zeros(&op, non_zeros);
             let mut jac = M::new_from_sparsity(3, 3, op.sparsity().map(|s| s.to_owned()));
             coloring.matrix_inplace(&op, t0, &mut jac);