🩹 Add number_of_clps to result and correct degrees_of_freedom calcula…

…tion (#1249) This PR adds the number of conditionally linear parameters and the correct degrees of freedom to the optimization results. Also, we previously reported the the number of residuals rather than data points, and there is a (small) difference. Now we are just reporting the (correct) number of residuals. * 🗑️ Deprecated Result.number_of_data_points in favor of Result.number_of_residuals * ✨ Add number_of_clps property to MatrixProvider's and OptimizationGroup * 👌 Add number_of_clps to result and adjust degrees_of_freedom calculation * 🧪 Add number_of_clps unit tests for relation, constraints and penalties 👌🧪 Changed relation and constraints unit tests to use intervals * 🧪 Added number_of_clps unit test for full model * 🧹 Fix typos in error docstrings * 🚧📚 Added change to changelog
glotaran · Feb 20, 2023 · 356b934 · 356b934
1 parent be140b8
commit 356b934
Show file tree

Hide file tree

Showing 12 changed files with 179 additions and 16 deletions.
diff --git a/changelog.md b/changelog.md
@@ -22,6 +22,7 @@
 - 👌 Make yaml the default plugin when passing a folder to save_result and load_result (#1230)
 - ✨ Allow usage of subfolders in project API for parameters, models and data (#1232)
 - ✨ Allow import of xarray objects in project API import_data (#1235)
+- 🩹 Add number_of_clps to result and correct degrees_of_freedom calculation (#1249)
 
 ### 🩹 Bug fixes
 
@@ -51,6 +52,7 @@
 - Command Line Interface (removed without replacement) (#1228)
 - `Project.generate_model` (removed without replacement)
 - `Project.generate_parameters` (removed without replacement)
+- `glotaran.project.Result.number_of_data_points` -> `glotaran.project.Result.number_of_residuals`
 
 ### 🗑️❌ Deprecated functionality removed in this release
 

diff --git a/glotaran/builtin/megacomplexes/test/test_spectral_decay_full_model.py b/glotaran/builtin/megacomplexes/test/test_spectral_decay_full_model.py
@@ -222,3 +222,15 @@ def test_kinetic_model(suite, nnls):
     assert dataset.data.shape == resultdata.data.shape
     assert dataset.data.shape == resultdata.fitted_data.shape
     assert np.allclose(dataset.data, resultdata.fitted_data, rtol=1e-2)
+
+    # 3 compartments * 3 shapes (dataset1)
+    # + 3 shapes * 3 compartments (dataset2)
+    expected_number_of_clps_common = 18
+    if suite is ThreeComponentParallel:
+        # + 3 compartments * 15 items in global axis (dataset3)
+        # + 3 shapes * 74 items in global axis (dataset4)
+        assert result.number_of_clps == expected_number_of_clps_common + 45 + 222
+    else:
+        # + 3 compartments * 30 items in global axis (dataset3)
+        # + 3 shapes * 60 items in global axis (dataset4)
+        assert result.number_of_clps == expected_number_of_clps_common + 90 + 180
diff --git a/glotaran/deprecation/modules/test/test_project_result.py b/glotaran/deprecation/modules/test/test_project_result.py
@@ -0,0 +1,24 @@
+"""Test deprecations for ``glotaran.project.project``."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from glotaran.deprecation import GlotaranApiDeprecationWarning
+from glotaran.optimization.optimize import optimize
+from glotaran.testing.simulated_data.sequential_spectral_decay import SCHEME
+
+
+def test_project_generate_model():
+    """Trow deprecation warning on accessing ``Result.number_of_data_points``."""
+    print(SCHEME.data["dataset_1"])
+    result = optimize(SCHEME, raise_exception=True)
+    with pytest.warns(GlotaranApiDeprecationWarning) as records:
+        result.number_of_data_points
+
+        assert len(records) == 1
+        assert Path(records[0].filename) == Path(
+            __file__
+        ), f"{Path(records[0].filename)=}, {Path(__file__)=}"
diff --git a/glotaran/optimization/matrix_provider.py b/glotaran/optimization/matrix_provider.py
@@ -461,6 +461,22 @@ def calculate(self):
         """
         raise NotImplementedError
 
+    @property
+    def number_of_clps(self) -> int:
+        """Return number of conditionally linear parameters.
+
+        Raises
+        ------
+        NotImplementedError
+            This property needs to be implemented by subclasses.
+
+        See Also
+        --------
+        MatrixProviderUnlinked
+        MatrixProviderLinked
+        """
+        raise NotImplementedError
+
 
 class MatrixProviderUnlinked(MatrixProvider):
     """A class to provide matrix calculations for optimization of unlinked dataset groups."""
@@ -589,6 +605,31 @@ def calculate_full_matrices(self):
 
                 self._full_matrices[label] = full_matrix
 
+    @property
+    def number_of_clps(self) -> int:
+        """Return number of conditionally linear parameters.
+
+        Returns
+        -------
+        int
+        """
+        nr_of_clps = 0
+        for dataset_label, dataset_model in self.group.dataset_models.items():
+            if has_dataset_model_global_model(dataset_model):
+                model_clp_labels = self.get_matrix_container(dataset_label).clp_labels
+                global_clp_labels = self.get_global_matrix_container(dataset_label).clp_labels
+                nr_of_clps += len(model_clp_labels) * len(global_clp_labels)
+            else:
+                global_axis_indexes = range(
+                    len(self._data_provider.get_global_axis(dataset_label))
+                )
+                nr_of_clps += sum(
+                    len(self.get_prepared_matrix_container(dataset_label, index).clp_labels)
+                    for index in global_axis_indexes
+                )
+
+        return nr_of_clps
+
 
 class MatrixProviderLinked(MatrixProvider):
     """A class to provide matrix calculations for optimization of linked dataset groups."""
@@ -755,3 +796,17 @@ def align_matrices(matrices: list[MatrixContainer], scales: list[float]) -> Matr
             start = end
 
         return MatrixContainer(full_clp_labels, full_matrix)
+
+    @property
+    def number_of_clps(self) -> int:
+        """Return number of conditionally linear parameters.
+
+        Returns
+        -------
+        int
+        """
+        global_axis_indexes = range(len(self._data_provider.aligned_global_axis))
+        return sum(
+            len(self.get_aligned_matrix_container(index).clp_labels)
+            for index in global_axis_indexes
+        )
diff --git a/glotaran/optimization/optimization_group.py b/glotaran/optimization/optimization_group.py
@@ -212,3 +212,13 @@ def add_svd_data(name: str, dataset: xr.Dataset, lsv_dim: str, rsv_dim: str):
         add_svd_to_dataset(
             dataset, name=name, lsv_dim=lsv_dim, rsv_dim=rsv_dim, data_array=dataset[name]
         )
+
+    @property
+    def number_of_clps(self) -> int:
+        """Return number of conditionally linear parameters.
+
+        Returns
+        -------
+        int
+        """
+        return self._matrix_provider.number_of_clps
diff --git a/glotaran/optimization/optimizer.py b/glotaran/optimization/optimizer.py
@@ -19,6 +19,7 @@
 
 if TYPE_CHECKING:
     from glotaran.typing.types import ArrayLike
+
 SUPPORTED_METHODS = {
     "TrustRegionReflection": "trf",
     "Dogbox": "dogbox",
@@ -27,23 +28,23 @@
 
 
 class InitialParameterError(ValueError):
-    """Inidcates that initial parameters can not be evaluated."""
+    """Indicates that initial parameters can not be evaluated."""
 
     def __init__(self):
         """Initialize a InitialParameterError."""
         super().__init__("Initial parameters can not be evaluated.")
 
 
 class ParameterNotInitializedError(ValueError):
-    """Inidcates that scheme parameters are not initialized."""
+    """Indicates that scheme parameters are not initialized."""
 
     def __init__(self):
         """Initialize a ParameterNotInitializedError."""
         super().__init__("Parameter not initialized")
 
 
 class MissingDatasetsError(ValueError):
-    """Inidcates that datasets are missing in the scheme."""
+    """Indicates that datasets are missing in the scheme."""
 
     def __init__(self, missing_datasets: list[str]):
         """Initialize a MissingDatasetsError.
@@ -57,7 +58,7 @@ def __init__(self, missing_datasets: list[str]):
 
 
 class UnsupportedMethodError(ValueError):
-    """Inidcates that the optimization method is unsupported."""
+    """Indicates that the optimization method is unsupported."""
 
     def __init__(self, method: str):
         """Initialize an UnsupportedMethodError.
@@ -230,10 +231,15 @@ def create_result(self) -> Result:
         if success:
             result_args["number_of_jacobian_evaluations"] = self._optimization_result.njev
             result_args["optimality"] = float(self._optimization_result.optimality)
-            result_args["number_of_data_points"] = self._optimization_result.fun.size
+            result_args["number_of_residuals"] = self._optimization_result.fun.size
+            result_args["number_of_clps"] = sum(
+                group.number_of_clps for group in self._optimization_groups
+            )
             result_args["number_of_parameters"] = self._optimization_result.x.size
             result_args["degrees_of_freedom"] = (
-                result_args["number_of_data_points"] - result_args["number_of_parameters"]
+                result_args["number_of_residuals"]
+                - result_args["number_of_parameters"]
+                - result_args["number_of_clps"]
             )
             result_args["chi_square"] = float(np.sum(self._optimization_result.fun**2))
             result_args["reduced_chi_square"] = (

diff --git a/glotaran/optimization/test/suites.py b/glotaran/optimization/test/suites.py
@@ -46,7 +46,7 @@ class TwoCompartmentDecay:
     wanted_parameters = Parameters.from_list([11e-4, 22e-5])
     initial_parameters = Parameters.from_list([10e-4, 20e-5])
 
-    global_axis = np.asarray([1.0])
+    global_axis = np.asarray([1.0, 2])
     model_axis = np.arange(0, 150, 1.5)
 
     sim_model = DecayModel(

diff --git a/glotaran/optimization/test/test_constraints.py b/glotaran/optimization/test/test_constraints.py
@@ -15,7 +15,7 @@ def test_constraint(index_dependent, link_clp):
     model = deepcopy(suite.model)
     model.dataset_groups["default"].link_clp = link_clp
     model.megacomplex["m1"].is_index_dependent = index_dependent
-    model.clp_constraints.append(ZeroConstraint(**{"target": "s2"}))
+    model.clp_constraints.append(ZeroConstraint(**{"target": "s2", "interval": (1, 1)}))
 
     print("link_clp", link_clp, "index_dependent", index_dependent)
     dataset = simulate(
@@ -41,5 +41,10 @@ def test_constraint(index_dependent, link_clp):
 
     assert "s2" not in reduced_matrix.clp_labels
     assert "s2" in clps.coords["clp_label"]
-    assert clps.sel(clp_label="s2") == 0
+    assert clps.sel(clp_label="s2")[0] == 0
+    assert clps.sel(clp_label="s2")[1] != 0
     assert "s2" in matrix.clp_labels
+
+    # 1 compartment * 2 items in global axis
+    # + 1 compartment * 1 item in global axis
+    assert optimization_group.number_of_clps == 3
diff --git a/glotaran/optimization/test/test_matrix_provider.py b/glotaran/optimization/test/test_matrix_provider.py
@@ -71,6 +71,10 @@ def test_matrix_provider_unlinked_index_independent(scheme: Scheme):
     assert matrices["dataset2"].shape == (scheme.data["dataset2"].model.size, 2)
     assert all(matrices["dataset2"].clp_label == ["s1", "s2"])
 
+    # 2 compartments * 3 items in global axis of dataset1
+    # + 2 compartments * 4 items in global axis of dataset2
+    assert matrix_provider.number_of_clps == (2 * 3) + (2 * 4)
+
 
 def test_matrix_provider_linked_index_independent(scheme: Scheme):
     dataset_group = scheme.model.get_dataset_groups()["default"]
@@ -109,6 +113,10 @@ def test_matrix_provider_linked_index_independent(scheme: Scheme):
     )
     assert matrix_provider.get_aligned_matrix_container(4).matrix.shape == (dataset2_size, 2)
 
+    # 2 compartments * 5 items in aligned global axis
+    # See also: test_data_provider_linking_methods
+    assert matrix_provider.number_of_clps == 2 * 5
+
 
 def test_matrix_provider_unlinked_index_dependent(scheme: Scheme):
     scheme.model.megacomplex["m1"].is_index_dependent = True  # type:ignore[attr-defined]
@@ -132,6 +140,10 @@ def test_matrix_provider_unlinked_index_dependent(scheme: Scheme):
     assert matrices["dataset2"].shape == (dataset2_global_size, dataset2_model_size, 2)
     assert all(matrices["dataset2"].clp_label == ["s1", "s2"])
 
+    # 2 compartments * 3 items in global axis of dataset1
+    # + 2 compartments * 4 items in global axis of dataset2
+    assert matrix_provider.number_of_clps == (2 * 3) + (2 * 4)
+
 
 def test_matrix_provider_linked_index_dependent(scheme: Scheme):
     scheme.model.megacomplex["m1"].is_index_dependent = True  # type:ignore[attr-defined]
@@ -172,3 +184,7 @@ def test_matrix_provider_linked_index_dependent(scheme: Scheme):
         2,
     )
     assert matrix_provider.get_aligned_matrix_container(4).matrix.shape == (dataset2_model_size, 2)
+
+    # 2 compartments * 5 items in aligned global axis
+    # See also: test_data_provider_linking_methods
+    assert matrix_provider.number_of_clps == 2 * 5
diff --git a/glotaran/optimization/test/test_penalties.py b/glotaran/optimization/test/test_penalties.py
@@ -53,3 +53,6 @@ def test_penalties(index_dependent, link_clp):
     assert full_penalty.size == (suite.model_axis.size * global_axis.size) + len(
         additional_penalty
     )
+
+    # 2 compartments * 50 items in global axis
+    assert optimization_group.number_of_clps == 100
diff --git a/glotaran/optimization/test/test_relations.py b/glotaran/optimization/test/test_relations.py
@@ -16,7 +16,9 @@ def test_relations(index_dependent, link_clp):
     model = deepcopy(suite.model)
     model.dataset_groups["default"].link_clp = link_clp
     model.megacomplex["m1"].is_index_dependent = index_dependent
-    model.clp_relations.append(ClpRelation(**{"source": "s1", "target": "s2", "parameter": "3"}))
+    model.clp_relations.append(
+        ClpRelation(**{"source": "s1", "target": "s2", "parameter": "3", "interval": (1, 1)})
+    )
     parameters = Parameters.from_list([11e-4, 22e-5, 2])
 
     print("link_clp", link_clp, "index_dependent", index_dependent)  # T201
@@ -43,5 +45,10 @@ def test_relations(index_dependent, link_clp):
 
     assert "s2" not in reduced_matrix.clp_labels
     assert "s2" in clps.coords["clp_label"]
-    assert clps.sel(clp_label="s2") == clps.sel(clp_label="s1") * 2
+    assert clps.sel(clp_label="s2")[0] == clps.sel(clp_label="s1")[0] * 2
+    assert clps.sel(clp_label="s2")[1] != clps.sel(clp_label="s1")[1] * 2
     assert "s2" in matrix.clp_labels
+
+    # 1 compartment (s1 due to relation) * 2 items in global axis
+    # + 1 compartment (s2 outside of interval) * 1 item in global axis
+    assert optimization_group.number_of_clps == 3
diff --git a/glotaran/project/result.py b/glotaran/project/result.py
@@ -13,6 +13,7 @@
 from numpy.typing import ArrayLike
 from tabulate import tabulate
 
+from glotaran.deprecation import deprecate
 from glotaran.io import SAVING_OPTIONS_DEFAULT
 from glotaran.io import SavingOptions
 from glotaran.io import load_result
@@ -105,15 +106,17 @@ class Result:
     The rows and columns are corresponding to :attr:`free_parameter_labels`."""
 
     degrees_of_freedom: int | None = None
-    """Degrees of freedom in optimization :math:`N - N_{vars}`."""
+    """Degrees of freedom in optimization :math:`N - N_{vars} - N_{clps}`."""
+    number_of_clps: int | None = None
+    """Number of conditionally linear parameters :math:`N_{clps}`."""
 
     jacobian: ArrayLike | list | None = exclude_from_dict_field(None)
     """Modified Jacobian matrix at the solution
 
     See also: :func:`scipy.optimize.least_squares`
     """
-    number_of_data_points: int | None = None
-    """Number of data points :math:`N`."""
+    number_of_residuals: int | None = None
+    """Number of values in the residual vector :math:`N`."""
     number_of_jacobian_evaluations: int | None = None
     """The number of jacobian evaluations."""
     number_of_parameters: int | None = None
@@ -122,7 +125,7 @@ class Result:
     reduced_chi_square: float | None = None
     r"""The reduced chi-square of the optimization.
 
-    :math:`\chi^2_{red}= {\chi^2} / {(N - N_{vars})}`.
+    :math:`\chi^2_{red}= {\chi^2} / {(N - N_{vars} - N_{clps})}`.
     """
     root_mean_square_error: float | None = None
     r"""
@@ -144,6 +147,25 @@ def __post_init__(self):
             self.jacobian = np.array(self.jacobian)
             self.covariance_matrix = np.array(self.covariance_matrix)
 
+    @property
+    @deprecate(
+        deprecated_qual_name_usage="glotaran.project.Result.number_of_data_points",
+        new_qual_name_usage="glotaran.project.Result.number_of_residuals",
+        to_be_removed_in_version="0.8.0",
+        importable_indices=(2, 2),
+    )
+    def number_of_data_points(self) -> int | None:
+        """Return the number of values in the residual vector :math:`N`.
+
+        Deprecated since it returned the wrong value.
+
+        Returns
+        -------
+        int | None
+            Number of values in the residual vector :math:`N`.
+        """
+        return self.number_of_residuals
+
     @property
     def model(self) -> Model:
         """Return the model used to fit result.
@@ -198,8 +220,9 @@ def markdown(
         """
         general_table_rows: list[list[Any]] = [
             ["Number of residual evaluation", self.number_of_function_evaluations],
+            ["Number of residuals", self.number_of_residuals],
             ["Number of parameters", self.number_of_parameters],
-            ["Number of datapoints", self.number_of_data_points],
+            ["Number of conditionally linear parameters", self.number_of_clps],
             ["Degrees of freedom", self.degrees_of_freedom],
             ["Chi Square", f"{self.chi_square or np.nan:.2e}"],
             ["Reduced Chi Square", f"{self.reduced_chi_square or np.nan:.2e}"],