Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ Add optimization history to result and iteration column to parameter history #1134

Merged
merged 18 commits into from
Oct 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,10 @@ repos:
rev: 0.6.1
hooks:
- id: nbstripout
args: [--drop-empty-cells]
args:
- "--drop-empty-cells"
- "--extra-keys"
- "metadata.vscode metadata.language_info.version"

- repo: https://github.com/nbQA-dev/nbQA
rev: 1.5.2
Expand Down
2 changes: 2 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

### ✨ Features

- ✨ Add optimization history to result and iteration column to parameter history (#1134)

### 👌 Minor Improvements:

- 👌🎨 Wrap model section in result markdown in details tag for notebooks (#1098)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -338,8 +338,7 @@
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
Expand Down
12 changes: 10 additions & 2 deletions docs/source/notebooks/quickstart/quickstart.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,15 @@
"result"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"result.optimization_history.data.plot(logy=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -371,8 +380,7 @@
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
Expand Down
4 changes: 4 additions & 0 deletions glotaran/builtin/io/folder/folder_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,10 @@ def save_result(
result.parameter_history.to_csv(parameter_history_path)
paths.append(parameter_history_path.as_posix())

optimization_history_path = result_folder / "optimization_history.csv"
result.optimization_history.to_csv(optimization_history_path)
paths.append(optimization_history_path.as_posix())

for label, dataset in result.data.items():
data_path = result_folder / f"{label}.{saving_options.data_format}"
if saving_options.data_filter is not None:
Expand Down
40 changes: 37 additions & 3 deletions glotaran/builtin/io/yml/test/test_save_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
from textwrap import dedent

import pytest
from pandas.testing import assert_frame_equal

from glotaran import __version__
from glotaran.io import load_result
from glotaran.io import save_result
from glotaran.optimization.optimize import optimize
from glotaran.project.result import Result
Expand Down Expand Up @@ -42,22 +44,54 @@ def test_save_result_yml(
initial_parameters: initial_parameters.csv
optimized_parameters: optimized_parameters.csv
parameter_history: parameter_history.csv
optimization_history: optimization_history.csv
data:
dataset_1: dataset_1.nc
"""
)

result_dir = tmp_path / "testresult"
save_result(result_path=result_dir / "result.yml", result=dummy_result)
result_path = result_dir / "result.yml"
save_result(result_path=result_path, result=dummy_result)

assert dummy_result.source_path == result_path.as_posix()

assert (result_dir / "result.md").exists()
assert (result_dir / "scheme.yml").exists()
assert (result_dir / "result.yml").exists()
assert result_path.exists()
assert (result_dir / "initial_parameters.csv").exists()
assert (result_dir / "optimized_parameters.csv").exists()
assert (result_dir / "optimization_history.csv").exists()
assert (result_dir / "dataset_1.nc").exists()

# We can't check equality due to numerical fluctuations
got = (result_dir / "result.yml").read_text()
got = result_path.read_text()
print(got)
assert expected in got


def test_save_result_yml_roundtrip(tmp_path: Path, dummy_result: Result):
"""Save and reloaded Result should be the same."""
result_dir = tmp_path / "testresult"
result_path = result_dir / "result.yml"
save_result(result_path=result_path, result=dummy_result)
result_round_tripped = load_result(result_path)

assert dummy_result.source_path == result_path.as_posix()
assert result_round_tripped.source_path == result_path.as_posix()

assert_frame_equal(
dummy_result.initial_parameters.to_dataframe(),
result_round_tripped.initial_parameters.to_dataframe(),
)
assert_frame_equal(
dummy_result.optimized_parameters.to_dataframe(),
result_round_tripped.optimized_parameters.to_dataframe(),
)
assert_frame_equal(
dummy_result.parameter_history.to_dataframe(),
result_round_tripped.parameter_history.to_dataframe(),
)
assert_frame_equal(
dummy_result.optimization_history.data, result_round_tripped.optimization_history.data
)
1 change: 1 addition & 0 deletions glotaran/builtin/io/yml/yml.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ def save_result(
* ``initial_parameters.csv``: Initially used parameters.
* ``optimized_parameters.csv``: The optimized parameter as csv file.
* ``parameter_history.csv``: Parameter changes over the optimization
* ``optimization_history.csv``: Parsed table printed by the SciPy optimizer
* ``{dataset_label}.nc``: The result data for each dataset as NetCDF file.

Parameters
Expand Down
131 changes: 131 additions & 0 deletions glotaran/optimization/optimization_history.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
"""Module containing the ``OptimizationHistory`` class."""
from __future__ import annotations

from pathlib import Path
from typing import TYPE_CHECKING
from typing import Any

import pandas as pd

from glotaran.utils.regex import RegexPattern

if TYPE_CHECKING:

from glotaran.typing import StrOrPath


class OptimizationHistory:
"""Wrapped DataFrame to hold information of the optimization and behaves like a ``DataFrame``.

Ref.:
https://stackoverflow.com/a/65375904/3990615
"""

def __init__(self, data=None, source_path: StrOrPath | None = None) -> None:
"""Ensure DataFrame has the correct columns, is numeric and has iteration as index."""
self._df = (
pd.DataFrame(
data,
columns=["iteration", "nfev", "cost", "cost_reduction", "step_norm", "optimality"],
)
.apply(pd.to_numeric)
.set_index("iteration")
)
if source_path is not None:
self.source_path = Path(source_path).as_posix()
else:
self.source_path = "optimization_history.csv"

def __getattr__(self, attr: str) -> Any:
"""Access class attribute and fallback to DataFrame attribute if not present.

Parameters
----------
attr: str
Name of the attribute to access.

Returns
-------
Any
Attribute of ``OptimizationHistory`` or the DataFrame
"""
if attr in self.__dict__:
return getattr(self, attr)
return getattr(self.data, attr)

def __getitem__(self, column: str) -> pd.Series:
"""Access DataFrame instead of class items.

Parameters
----------
column: str
Name of the column to access.

Returns
-------
pd.Series
Column of the DataFrame.
"""
return self.data[column]

@property
def data(self) -> pd.DataFrame:
"""Underlying ``DataFrame`` which allows for autocomplete with static analyzers.

Returns
-------
pd.DataFrame
``DataFrame`` containing ``OptimizationHistory`` data.
"""
return self._df

@classmethod
def from_stdout_str(
cls: type[OptimizationHistory], optimize_stdout: str
) -> OptimizationHistory:
"""Create ``OptimizationHistory`` instance from ``optimize_stdout``.

Parameters
----------
optimize_stdout: str
SciPy optimization stdout string, read out via ``TeeContext.read()``.

Returns
-------
OptimizationHistory
``OptimizationHistory`` instance created by parsing ``optimize_stdout``.
"""
return cls(
[m.groupdict() for m in RegexPattern.optimization_stdout.finditer(optimize_stdout)]
)

@classmethod
def from_csv(cls: type[OptimizationHistory], path: StrOrPath) -> OptimizationHistory:
"""Read ``OptimizationHistory`` from file.

Parameters
----------
path : StrOrPath
The path to the csv file.

Returns
-------
OptimizationHistory
``OptimizationHistory`` read from file.
"""
return cls(pd.read_csv(path), source_path=Path(path).as_posix())

loader = from_csv # type:ignore[assignment]

def to_csv(self, path: StrOrPath, delimiter: str = ","):
"""Write a ``OptimizationHistory`` to a CSV file and set ``source_path``.

Parameters
----------
path : StrOrPath
The path to the CSV file.
delimiter : str
The delimiter of the CSV file.
"""
self.source_path = Path(path).as_posix()
self.data.to_csv(path, sep=delimiter)
65 changes: 45 additions & 20 deletions glotaran/optimization/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,12 @@

from glotaran import __version__ as glotaran_version
from glotaran.optimization.optimization_group import OptimizationGroup
from glotaran.optimization.optimization_history import OptimizationHistory
from glotaran.parameter import ParameterHistory
from glotaran.project import Result
from glotaran.project import Scheme
from glotaran.utils.regex import RegexPattern
from glotaran.utils.tee import TeeContext

SUPPORTED_METHODS = {
"TrustRegionReflection": "trf",
Expand Down Expand Up @@ -103,6 +106,7 @@ def __init__(self, scheme: Scheme, verbose: bool = True, raise_exception: bool =
self._method = SUPPORTED_METHODS[scheme.optimization_method]

self._scheme = scheme
self._tee = TeeContext()
self._verbose = verbose
self._raise = raise_exception

Expand Down Expand Up @@ -131,25 +135,26 @@ def optimize(self):
lower_bounds,
upper_bounds,
) = self._scheme.parameters.get_label_value_and_bounds_arrays(exclude_non_vary=True)
try:
verbose = 2 if self._verbose else 0
self._optimization_result = least_squares(
self.objective_function,
initial_parameter,
bounds=(lower_bounds, upper_bounds),
method=self._method,
max_nfev=self._scheme.maximum_number_function_evaluations,
verbose=verbose,
ftol=self._scheme.ftol,
gtol=self._scheme.gtol,
xtol=self._scheme.xtol,
)
self._termination_reason = self._optimization_result.message
except Exception as e:
if self._raise:
raise e
warn(f"Optimization failed:\n\n{e}")
self._termination_reason = str(e)
with self._tee:
try:
verbose = 2 if self._verbose else 0
self._optimization_result = least_squares(
self.objective_function,
initial_parameter,
bounds=(lower_bounds, upper_bounds),
method=self._method,
max_nfev=self._scheme.maximum_number_function_evaluations,
verbose=verbose,
ftol=self._scheme.ftol,
gtol=self._scheme.gtol,
xtol=self._scheme.xtol,
)
self._termination_reason = self._optimization_result.message
except Exception as e:
if self._raise:
raise e
warn(f"Optimization failed:\n\n{e}")
self._termination_reason = str(e)

def objective_function(self, parameters: np.typing.ArrayLike) -> np.typing.ArrayLike:
"""Calculate the objective for the optimization.
Expand Down Expand Up @@ -177,7 +182,9 @@ def calculate_penalty(self) -> np.typing.ArrayLike:
"""
for group in self._optimization_groups:
group.calculate(self._parameters)
self._parameter_history.append(self._parameters)
self._parameter_history.append(
self._parameters, self.get_current_optimization_iteration(self._tee.read())
)

penalties = [group.get_full_penalty() for group in self._optimization_groups]

Expand Down Expand Up @@ -211,6 +218,7 @@ def create_result(self) -> Result:
"initial_parameters": self._scheme.parameters,
"parameter_history": self._parameter_history,
"termination_reason": self._termination_reason,
"optimization_history": OptimizationHistory.from_stdout_str(self._tee.read()),
"number_of_function_evaluations": self._optimization_result.nfev
if success
else self._parameter_history.number_of_records,
Expand Down Expand Up @@ -284,3 +292,20 @@ def calculate_covariance_matrix_and_standard_errors(
for label, error in zip(self._free_parameter_labels, standard_errors):
self._parameters.get(label).standard_error = error
return covariance_matrix

@staticmethod
def get_current_optimization_iteration(optimize_stdout: str) -> int:
"""Extract current iteration from ``optimize_stdout``.

Parameters
----------
optimize_stdout: str
SciPy optimization stdout string, read out via ``TeeContext.read()``.

Returns
-------
int
Current iteration (``0`` if pattern did not match).
"""
matches = RegexPattern.optimization_stdout.findall(optimize_stdout)
return 0 if len(matches) == 0 else int(matches[-1][0])
Loading