Add a get_bias function for fitting experimental/reference kinetic da…

…ta (#79) * Make flake8 be more strict on function complexity * Make CLI receive only biases in kcal/mol * Optimize biases from experiments using get_bias
geem-lab · Oct 13, 2021 · 97cce47 · 97cce47
1 parent e8ae783
commit 97cce47
Show file tree

Hide file tree

Showing 5 changed files with 181 additions and 35 deletions.
diff --git a/.flake8 b/.flake8
@@ -3,7 +3,7 @@ max-line-length = 88
 max-doc-length = 88
 enable-extensions = H106,H203,H204,H205,H210,H904
 extend-ignore = E203
-max-complexity = 20
+max-complexity = 10
 doctests = True
 docstring-convention = numpy
 include-in-doctest = "*.rst"
diff --git a/data b/data
diff --git a/overreact/__init__.py b/overreact/__init__.py
@@ -22,6 +22,7 @@
 from overreact.core import unparse_reactions  # noqa: F401
 from overreact.io import parse_compounds  # noqa: F401
 from overreact.io import parse_model  # noqa: F401
+from overreact.simulate import get_bias  # noqa: F401
 from overreact.simulate import get_dydt  # noqa: F401
 from overreact.simulate import get_fixed_scheme  # noqa: F401
 from overreact.simulate import get_y  # noqa: F401

diff --git a/overreact/cli.py b/overreact/cli.py
@@ -410,6 +410,43 @@ def _yield_kinetics(self):
         ------
         renderable
         """
+        if isinstance(self.bias, str):
+            data = np.genfromtxt(
+                self.bias,
+                names=True,
+                delimiter=",",
+            )
+            data = {name: data[name] for name in data.dtype.names}
+
+            scheme, _, y0 = _prepare_simulation(
+                self.model.scheme,
+                rx.get_k(
+                    self.model.scheme,
+                    self.model.compounds,
+                    bias=0.0,
+                    tunneling=self.tunneling,
+                    qrrho=self.qrrho,
+                    scale="l mol-1 s-1",
+                    temperature=self.temperature,
+                    pressure=self.pressure,
+                ),
+                self.concentrations,
+            )
+            # TODO(schneiderfelipe): support schemes with fixed concentrations
+            self.bias = rx.get_bias(
+                scheme,
+                self.model.compounds,
+                data,
+                y0,
+                tunneling=self.tunneling,
+                qrrho=self.qrrho,
+                temperature=self.temperature,
+                pressure=self.pressure,
+                method=self.method,
+                rtol=self.rtol,
+                atol=self.atol,
+            )
+
         # TODO(schneiderfelipe): apply other corrections to k (such as
         # diffusion control).
         # TODO(schneiderfelipe): use pressure.
@@ -485,36 +522,9 @@ def _yield_kinetics(self):
         yield Markdown("For **half-equilibria**, only ratios make sense.")
 
         if self.concentrations is not None and self.concentrations:
-            k = k["M⁻ⁿ⁺¹·s⁻¹"]
-
-            free_y0 = {}
-            fixed_y0 = {}
-            for spec in self.concentrations:
-                fields = spec.split(":", 1)
-                name, quantity = fields[0].strip(), fields[1].strip()
-
-                if quantity.startswith("!"):
-                    d = fixed_y0
-                    quantity = quantity[1:]
-                else:
-                    d = free_y0
-
-                try:
-                    quantity = float(quantity)
-                except (IndexError, ValueError):
-                    raise ValueError(
-                        "badly formatted concentrations: "
-                        f"'{' '.join(self.concentrations)}'"
-                    )
-
-                d[name] = quantity
-
-            # TODO(schneiderfelipe): log stuff related to get_fixed_scheme
-            scheme, k = rx.get_fixed_scheme(self.model.scheme, k, fixed_y0)
-
-            y0 = np.zeros(len(scheme.compounds))
-            for compound in free_y0:
-                y0[scheme.compounds.index(compound)] = free_y0[compound]
+            scheme, k, y0 = _prepare_simulation(
+                self.model.scheme, k["M⁻ⁿ⁺¹·s⁻¹"], self.concentrations
+            )
 
             # TODO(schneiderfelipe): encapsulate everything in a function that depends
             # on the freeenergies as first parameter
@@ -602,6 +612,39 @@ def _yield_kinetics(self):
                 yield Markdown(f"Simulation data was saved to **{self.savepath}**")
 
 
+def _prepare_simulation(scheme, k, concentrations):
+    """Helper for preparing some data before simulation."""
+    free_y0 = {}
+    fixed_y0 = {}
+    for spec in concentrations:
+        fields = spec.split(":", 1)
+        name, quantity = fields[0].strip(), fields[1].strip()
+
+        if quantity.startswith("!"):
+            d = fixed_y0
+            quantity = quantity[1:]
+        else:
+            d = free_y0
+
+        try:
+            quantity = float(quantity)
+        except (IndexError, ValueError):
+            raise ValueError(
+                "badly formatted concentrations: " f"'{' '.join(concentrations)}'"
+            )
+
+        d[name] = quantity
+
+    # TODO(schneiderfelipe): log stuff related to get_fixed_scheme
+    scheme, k = rx.get_fixed_scheme(scheme, k, fixed_y0)
+
+    y0 = np.zeros(len(scheme.compounds))
+    for compound in free_y0:
+        y0[scheme.compounds.index(compound)] = free_y0[compound]
+
+    return scheme, k, y0
+
+
 def main():
     """Command-line interface."""
     console = Console(width=max(105, shutil.get_terminal_size()[0]))
@@ -659,9 +702,8 @@ def main():
     parser.add_argument(
         "-b",
         "--bias",
-        help="an energy value (in joules per mole) to be added to each "
+        help="an energy value (in kilocalories per mole) to be added to each "
         "indiviual compound in order to mitigate eventual systematic errors",
-        type=float,
         default=0.0,
     )
     parser.add_argument(
@@ -723,6 +765,12 @@ def main():
     )
     args = parser.parse_args()
 
+    try:
+        args.bias = float(args.bias) * constants.kcal
+        bias_message = f"{args.bias} kcal/mol"
+    except ValueError:
+        bias_message = f"fitting from {args.bias}"
+
     console.print(
         Markdown(
             f"""
@@ -742,7 +790,7 @@ def main():
 - Max. Time      = {args.max_time}
 - Rel. Tol.      = {args.rtol}
 - Abs. Tol.      = {args.atol}
-- Bias           = {args.bias / constants.kcal} kcal/mol
+- Bias           = {bias_message}
 - Tunneling      = {args.tunneling}
 
 Parsing and calculating…

diff --git a/overreact/simulate.py b/overreact/simulate.py
@@ -10,8 +10,10 @@
 
 import numpy as np
 from scipy.integrate import solve_ivp
+from scipy.optimize import minimize_scalar
 
 import overreact as rx
+from overreact import constants
 from overreact.misc import _found_jax
 
 if _found_jax:
@@ -501,3 +503,98 @@ def get_fixed_scheme(scheme, k, fixed_y0):
         ),
         new_k,
     )
+
+
+def get_bias(
+    scheme,
+    compounds,
+    data,
+    y0,
+    tunneling="eckart",
+    qrrho=True,
+    temperature=298.15,
+    pressure=constants.atm,
+    method="Radau",
+    rtol=1e-5,
+    atol=1e-11,
+):
+    r"""Estimate a energy bias for a given set of reference data points.
+
+    Parameters
+    ----------
+    scheme : Scheme
+    data : dict-like of array-like
+    compounds : dict-like, optional
+    tunneling : str or None, optional
+        Choose between "eckart", "wigner" or None (or "none").
+    qrrho : bool or tuple-like, optional
+        Apply both the quasi-rigid rotor harmonic oscilator (QRRHO)
+        approximations of M. Head-Gordon (enthalpy correction, see
+        doi:10.1021/jp509921r) and S. Grimme (entropy correction, see
+        doi:10.1002/chem.201200497) on top of the classical RRHO.
+    temperature : array-like, optional
+        Absolute temperature in Kelvin.
+    pressure : array-like, optional
+        Reference gas pressure.
+    delta_freeenergies : array-like, optional
+    molecularity : array-like, optional
+        Reaction order, i.e., number of molecules that come together to react.
+        If set, this is used to calculate `delta_moles` for
+        `equilibrium_constant`, which effectively calculates a solution
+        equilibrium constant between reactants and the transition state for
+        gas phase data. You should set this to `None` if your free energies
+        were already adjusted for solution Gibbs free energies.
+    volume : float, optional
+        Molar volume.
+
+    Returns
+    -------
+    array-like
+
+    Examples
+    --------
+    >>> model = rx.parse_model("data/tanaka1996/UMP2/cc-pVTZ/model.jk")
+
+    The following are some estimates on actual atmospheric concentrations:
+
+    >>> y0 = [4.8120675684099e-05, 2.8206357713028517e-05, 0.0, 0.0, 2.7426565371218556e-05]
+    >>> data = {"t": [1.276472128376942246e-06, 1.446535794555581743e-04, 1.717069678525567564e-02],
+    ...         "CH3·": [9.694916853338366211e-09, 1.066033349343709026e-06, 2.632179124780495175e-05]}
+    >>> get_bias(model.scheme, data, y0, model.compounds) / constants.kcal
+    -1.364171
+    """
+    max_time = np.max(data["t"])
+
+    def f(bias):
+        k = rx.get_k(
+            scheme,
+            compounds,
+            bias=bias,
+            tunneling=tunneling,
+            qrrho=qrrho,
+            temperature=temperature,
+            pressure=pressure,
+        )
+
+        # TODO(schneiderfelipe): support schemes with fixed concentrations
+        dydt = rx.get_dydt(scheme, k)
+        y, _ = rx.get_y(
+            dydt,
+            y0=y0,
+            method=method,
+            rtol=rtol,
+            atol=atol,
+            max_time=max_time,
+        )
+
+        yhat = y(data["t"])
+        return np.sum(
+            [
+                (yhat[i] - data[name]) ** 2
+                for (i, name) in enumerate(compounds)
+                if name in data
+            ]
+        )
+
+    res = minimize_scalar(f)
+    return res.x