Skip to content

Commit

Permalink
🩹 Add number_of_clps to result and correct degrees_of_freedom calcula…
Browse files Browse the repository at this point in the history
…tion (#1249)

This PR adds the number of conditionally linear parameters and the correct degrees of freedom to the optimization results.

Also, we previously reported the the number of residuals rather than data points, and there is a (small) difference. Now we are just reporting the (correct) number of residuals. 

* 🗑️ Deprecated Result.number_of_data_points in favor of Result.number_of_residuals
* ✨ Add number_of_clps property to MatrixProvider's and OptimizationGroup
* 👌 Add number_of_clps to result and adjust degrees_of_freedom calculation
* 🧪 Add number_of_clps unit tests for relation, constraints and penalties
👌🧪 Changed relation and constraints unit tests to use intervals
* 🧪 Added number_of_clps unit test for full model
* 🧹 Fix typos in error docstrings
* 🚧📚 Added change to changelog
  • Loading branch information
s-weigand authored Feb 20, 2023
1 parent be140b8 commit 356b934
Show file tree
Hide file tree
Showing 12 changed files with 179 additions and 16 deletions.
2 changes: 2 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
- 👌 Make yaml the default plugin when passing a folder to save_result and load_result (#1230)
- ✨ Allow usage of subfolders in project API for parameters, models and data (#1232)
- ✨ Allow import of xarray objects in project API import_data (#1235)
- 🩹 Add number_of_clps to result and correct degrees_of_freedom calculation (#1249)

### 🩹 Bug fixes

Expand Down Expand Up @@ -51,6 +52,7 @@
- Command Line Interface (removed without replacement) (#1228)
- `Project.generate_model` (removed without replacement)
- `Project.generate_parameters` (removed without replacement)
- `glotaran.project.Result.number_of_data_points` -> `glotaran.project.Result.number_of_residuals`

### 🗑️❌ Deprecated functionality removed in this release

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -222,3 +222,15 @@ def test_kinetic_model(suite, nnls):
assert dataset.data.shape == resultdata.data.shape
assert dataset.data.shape == resultdata.fitted_data.shape
assert np.allclose(dataset.data, resultdata.fitted_data, rtol=1e-2)

# 3 compartments * 3 shapes (dataset1)
# + 3 shapes * 3 compartments (dataset2)
expected_number_of_clps_common = 18
if suite is ThreeComponentParallel:
# + 3 compartments * 15 items in global axis (dataset3)
# + 3 shapes * 74 items in global axis (dataset4)
assert result.number_of_clps == expected_number_of_clps_common + 45 + 222
else:
# + 3 compartments * 30 items in global axis (dataset3)
# + 3 shapes * 60 items in global axis (dataset4)
assert result.number_of_clps == expected_number_of_clps_common + 90 + 180
24 changes: 24 additions & 0 deletions glotaran/deprecation/modules/test/test_project_result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""Test deprecations for ``glotaran.project.project``."""

from __future__ import annotations

from pathlib import Path

import pytest

from glotaran.deprecation import GlotaranApiDeprecationWarning
from glotaran.optimization.optimize import optimize
from glotaran.testing.simulated_data.sequential_spectral_decay import SCHEME


def test_project_generate_model():
"""Trow deprecation warning on accessing ``Result.number_of_data_points``."""
print(SCHEME.data["dataset_1"])
result = optimize(SCHEME, raise_exception=True)
with pytest.warns(GlotaranApiDeprecationWarning) as records:
result.number_of_data_points

assert len(records) == 1
assert Path(records[0].filename) == Path(
__file__
), f"{Path(records[0].filename)=}, {Path(__file__)=}"
55 changes: 55 additions & 0 deletions glotaran/optimization/matrix_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,22 @@ def calculate(self):
"""
raise NotImplementedError

@property
def number_of_clps(self) -> int:
"""Return number of conditionally linear parameters.
Raises
------
NotImplementedError
This property needs to be implemented by subclasses.
See Also
--------
MatrixProviderUnlinked
MatrixProviderLinked
"""
raise NotImplementedError


class MatrixProviderUnlinked(MatrixProvider):
"""A class to provide matrix calculations for optimization of unlinked dataset groups."""
Expand Down Expand Up @@ -589,6 +605,31 @@ def calculate_full_matrices(self):

self._full_matrices[label] = full_matrix

@property
def number_of_clps(self) -> int:
"""Return number of conditionally linear parameters.
Returns
-------
int
"""
nr_of_clps = 0
for dataset_label, dataset_model in self.group.dataset_models.items():
if has_dataset_model_global_model(dataset_model):
model_clp_labels = self.get_matrix_container(dataset_label).clp_labels
global_clp_labels = self.get_global_matrix_container(dataset_label).clp_labels
nr_of_clps += len(model_clp_labels) * len(global_clp_labels)
else:
global_axis_indexes = range(
len(self._data_provider.get_global_axis(dataset_label))
)
nr_of_clps += sum(
len(self.get_prepared_matrix_container(dataset_label, index).clp_labels)
for index in global_axis_indexes
)

return nr_of_clps


class MatrixProviderLinked(MatrixProvider):
"""A class to provide matrix calculations for optimization of linked dataset groups."""
Expand Down Expand Up @@ -755,3 +796,17 @@ def align_matrices(matrices: list[MatrixContainer], scales: list[float]) -> Matr
start = end

return MatrixContainer(full_clp_labels, full_matrix)

@property
def number_of_clps(self) -> int:
"""Return number of conditionally linear parameters.
Returns
-------
int
"""
global_axis_indexes = range(len(self._data_provider.aligned_global_axis))
return sum(
len(self.get_aligned_matrix_container(index).clp_labels)
for index in global_axis_indexes
)
10 changes: 10 additions & 0 deletions glotaran/optimization/optimization_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,3 +212,13 @@ def add_svd_data(name: str, dataset: xr.Dataset, lsv_dim: str, rsv_dim: str):
add_svd_to_dataset(
dataset, name=name, lsv_dim=lsv_dim, rsv_dim=rsv_dim, data_array=dataset[name]
)

@property
def number_of_clps(self) -> int:
"""Return number of conditionally linear parameters.
Returns
-------
int
"""
return self._matrix_provider.number_of_clps
18 changes: 12 additions & 6 deletions glotaran/optimization/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

if TYPE_CHECKING:
from glotaran.typing.types import ArrayLike

SUPPORTED_METHODS = {
"TrustRegionReflection": "trf",
"Dogbox": "dogbox",
Expand All @@ -27,23 +28,23 @@


class InitialParameterError(ValueError):
"""Inidcates that initial parameters can not be evaluated."""
"""Indicates that initial parameters can not be evaluated."""

def __init__(self):
"""Initialize a InitialParameterError."""
super().__init__("Initial parameters can not be evaluated.")


class ParameterNotInitializedError(ValueError):
"""Inidcates that scheme parameters are not initialized."""
"""Indicates that scheme parameters are not initialized."""

def __init__(self):
"""Initialize a ParameterNotInitializedError."""
super().__init__("Parameter not initialized")


class MissingDatasetsError(ValueError):
"""Inidcates that datasets are missing in the scheme."""
"""Indicates that datasets are missing in the scheme."""

def __init__(self, missing_datasets: list[str]):
"""Initialize a MissingDatasetsError.
Expand All @@ -57,7 +58,7 @@ def __init__(self, missing_datasets: list[str]):


class UnsupportedMethodError(ValueError):
"""Inidcates that the optimization method is unsupported."""
"""Indicates that the optimization method is unsupported."""

def __init__(self, method: str):
"""Initialize an UnsupportedMethodError.
Expand Down Expand Up @@ -230,10 +231,15 @@ def create_result(self) -> Result:
if success:
result_args["number_of_jacobian_evaluations"] = self._optimization_result.njev
result_args["optimality"] = float(self._optimization_result.optimality)
result_args["number_of_data_points"] = self._optimization_result.fun.size
result_args["number_of_residuals"] = self._optimization_result.fun.size
result_args["number_of_clps"] = sum(
group.number_of_clps for group in self._optimization_groups
)
result_args["number_of_parameters"] = self._optimization_result.x.size
result_args["degrees_of_freedom"] = (
result_args["number_of_data_points"] - result_args["number_of_parameters"]
result_args["number_of_residuals"]
- result_args["number_of_parameters"]
- result_args["number_of_clps"]
)
result_args["chi_square"] = float(np.sum(self._optimization_result.fun**2))
result_args["reduced_chi_square"] = (
Expand Down
2 changes: 1 addition & 1 deletion glotaran/optimization/test/suites.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class TwoCompartmentDecay:
wanted_parameters = Parameters.from_list([11e-4, 22e-5])
initial_parameters = Parameters.from_list([10e-4, 20e-5])

global_axis = np.asarray([1.0])
global_axis = np.asarray([1.0, 2])
model_axis = np.arange(0, 150, 1.5)

sim_model = DecayModel(
Expand Down
9 changes: 7 additions & 2 deletions glotaran/optimization/test/test_constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_constraint(index_dependent, link_clp):
model = deepcopy(suite.model)
model.dataset_groups["default"].link_clp = link_clp
model.megacomplex["m1"].is_index_dependent = index_dependent
model.clp_constraints.append(ZeroConstraint(**{"target": "s2"}))
model.clp_constraints.append(ZeroConstraint(**{"target": "s2", "interval": (1, 1)}))

print("link_clp", link_clp, "index_dependent", index_dependent)
dataset = simulate(
Expand All @@ -41,5 +41,10 @@ def test_constraint(index_dependent, link_clp):

assert "s2" not in reduced_matrix.clp_labels
assert "s2" in clps.coords["clp_label"]
assert clps.sel(clp_label="s2") == 0
assert clps.sel(clp_label="s2")[0] == 0
assert clps.sel(clp_label="s2")[1] != 0
assert "s2" in matrix.clp_labels

# 1 compartment * 2 items in global axis
# + 1 compartment * 1 item in global axis
assert optimization_group.number_of_clps == 3
16 changes: 16 additions & 0 deletions glotaran/optimization/test/test_matrix_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ def test_matrix_provider_unlinked_index_independent(scheme: Scheme):
assert matrices["dataset2"].shape == (scheme.data["dataset2"].model.size, 2)
assert all(matrices["dataset2"].clp_label == ["s1", "s2"])

# 2 compartments * 3 items in global axis of dataset1
# + 2 compartments * 4 items in global axis of dataset2
assert matrix_provider.number_of_clps == (2 * 3) + (2 * 4)


def test_matrix_provider_linked_index_independent(scheme: Scheme):
dataset_group = scheme.model.get_dataset_groups()["default"]
Expand Down Expand Up @@ -109,6 +113,10 @@ def test_matrix_provider_linked_index_independent(scheme: Scheme):
)
assert matrix_provider.get_aligned_matrix_container(4).matrix.shape == (dataset2_size, 2)

# 2 compartments * 5 items in aligned global axis
# See also: test_data_provider_linking_methods
assert matrix_provider.number_of_clps == 2 * 5


def test_matrix_provider_unlinked_index_dependent(scheme: Scheme):
scheme.model.megacomplex["m1"].is_index_dependent = True # type:ignore[attr-defined]
Expand All @@ -132,6 +140,10 @@ def test_matrix_provider_unlinked_index_dependent(scheme: Scheme):
assert matrices["dataset2"].shape == (dataset2_global_size, dataset2_model_size, 2)
assert all(matrices["dataset2"].clp_label == ["s1", "s2"])

# 2 compartments * 3 items in global axis of dataset1
# + 2 compartments * 4 items in global axis of dataset2
assert matrix_provider.number_of_clps == (2 * 3) + (2 * 4)


def test_matrix_provider_linked_index_dependent(scheme: Scheme):
scheme.model.megacomplex["m1"].is_index_dependent = True # type:ignore[attr-defined]
Expand Down Expand Up @@ -172,3 +184,7 @@ def test_matrix_provider_linked_index_dependent(scheme: Scheme):
2,
)
assert matrix_provider.get_aligned_matrix_container(4).matrix.shape == (dataset2_model_size, 2)

# 2 compartments * 5 items in aligned global axis
# See also: test_data_provider_linking_methods
assert matrix_provider.number_of_clps == 2 * 5
3 changes: 3 additions & 0 deletions glotaran/optimization/test/test_penalties.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,6 @@ def test_penalties(index_dependent, link_clp):
assert full_penalty.size == (suite.model_axis.size * global_axis.size) + len(
additional_penalty
)

# 2 compartments * 50 items in global axis
assert optimization_group.number_of_clps == 100
11 changes: 9 additions & 2 deletions glotaran/optimization/test/test_relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ def test_relations(index_dependent, link_clp):
model = deepcopy(suite.model)
model.dataset_groups["default"].link_clp = link_clp
model.megacomplex["m1"].is_index_dependent = index_dependent
model.clp_relations.append(ClpRelation(**{"source": "s1", "target": "s2", "parameter": "3"}))
model.clp_relations.append(
ClpRelation(**{"source": "s1", "target": "s2", "parameter": "3", "interval": (1, 1)})
)
parameters = Parameters.from_list([11e-4, 22e-5, 2])

print("link_clp", link_clp, "index_dependent", index_dependent) # T201
Expand All @@ -43,5 +45,10 @@ def test_relations(index_dependent, link_clp):

assert "s2" not in reduced_matrix.clp_labels
assert "s2" in clps.coords["clp_label"]
assert clps.sel(clp_label="s2") == clps.sel(clp_label="s1") * 2
assert clps.sel(clp_label="s2")[0] == clps.sel(clp_label="s1")[0] * 2
assert clps.sel(clp_label="s2")[1] != clps.sel(clp_label="s1")[1] * 2
assert "s2" in matrix.clp_labels

# 1 compartment (s1 due to relation) * 2 items in global axis
# + 1 compartment (s2 outside of interval) * 1 item in global axis
assert optimization_group.number_of_clps == 3
33 changes: 28 additions & 5 deletions glotaran/project/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from numpy.typing import ArrayLike
from tabulate import tabulate

from glotaran.deprecation import deprecate
from glotaran.io import SAVING_OPTIONS_DEFAULT
from glotaran.io import SavingOptions
from glotaran.io import load_result
Expand Down Expand Up @@ -105,15 +106,17 @@ class Result:
The rows and columns are corresponding to :attr:`free_parameter_labels`."""

degrees_of_freedom: int | None = None
"""Degrees of freedom in optimization :math:`N - N_{vars}`."""
"""Degrees of freedom in optimization :math:`N - N_{vars} - N_{clps}`."""
number_of_clps: int | None = None
"""Number of conditionally linear parameters :math:`N_{clps}`."""

jacobian: ArrayLike | list | None = exclude_from_dict_field(None)
"""Modified Jacobian matrix at the solution
See also: :func:`scipy.optimize.least_squares`
"""
number_of_data_points: int | None = None
"""Number of data points :math:`N`."""
number_of_residuals: int | None = None
"""Number of values in the residual vector :math:`N`."""
number_of_jacobian_evaluations: int | None = None
"""The number of jacobian evaluations."""
number_of_parameters: int | None = None
Expand All @@ -122,7 +125,7 @@ class Result:
reduced_chi_square: float | None = None
r"""The reduced chi-square of the optimization.
:math:`\chi^2_{red}= {\chi^2} / {(N - N_{vars})}`.
:math:`\chi^2_{red}= {\chi^2} / {(N - N_{vars} - N_{clps})}`.
"""
root_mean_square_error: float | None = None
r"""
Expand All @@ -144,6 +147,25 @@ def __post_init__(self):
self.jacobian = np.array(self.jacobian)
self.covariance_matrix = np.array(self.covariance_matrix)

@property
@deprecate(
deprecated_qual_name_usage="glotaran.project.Result.number_of_data_points",
new_qual_name_usage="glotaran.project.Result.number_of_residuals",
to_be_removed_in_version="0.8.0",
importable_indices=(2, 2),
)
def number_of_data_points(self) -> int | None:
"""Return the number of values in the residual vector :math:`N`.
Deprecated since it returned the wrong value.
Returns
-------
int | None
Number of values in the residual vector :math:`N`.
"""
return self.number_of_residuals

@property
def model(self) -> Model:
"""Return the model used to fit result.
Expand Down Expand Up @@ -198,8 +220,9 @@ def markdown(
"""
general_table_rows: list[list[Any]] = [
["Number of residual evaluation", self.number_of_function_evaluations],
["Number of residuals", self.number_of_residuals],
["Number of parameters", self.number_of_parameters],
["Number of datapoints", self.number_of_data_points],
["Number of conditionally linear parameters", self.number_of_clps],
["Degrees of freedom", self.degrees_of_freedom],
["Chi Square", f"{self.chi_square or np.nan:.2e}"],
["Reduced Chi Square", f"{self.reduced_chi_square or np.nan:.2e}"],
Expand Down

0 comments on commit 356b934

Please sign in to comment.