Skip to content

Commit

Permalink
✨ Allow usage of subfolders in project API for parameters, models and…
Browse files Browse the repository at this point in the history
… data (#1232)

* ♻️ Factor out item name to be an init parameter

* ✨ Allow usage of subfolders in project parameters, models and data

👌 Gracefully handle and warn the user about ambiguity due to files with the same name but different extensions next to each other

* ♻️ Sort file paths when globbing to make errors deterministic

* 🚧📚 Added change to changelog
  • Loading branch information
s-weigand authored Feb 10, 2023
1 parent bb4ef93 commit bcf7cc2
Show file tree
Hide file tree
Showing 8 changed files with 195 additions and 25 deletions.
1 change: 1 addition & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
- 👌 Allow more natural column names in pandas parameters file reading (#1174)
- ✨ Integrate plugin system into Project (#1229)
- 👌 Make yaml the default plugin when passing a folder to save_result and load_result (#1230)
- ✨ Allow usage of subfolders in project API for parameters, models and data (#1232)

### 🩹 Bug fixes

Expand Down
32 changes: 11 additions & 21 deletions glotaran/project/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,14 +166,11 @@ def load_data(self, dataset_name: str) -> xr.Dataset | xr.DataArray:
------
ValueError
Raised if the dataset does not exist.
.. # noqa: DAR402
"""
try:
return self._data_registry.load_item(dataset_name)
except ValueError as err:
raise ValueError(
f"Dataset {dataset_name!r} does not exist. "
f"Known Datasets are: {list(self._data_registry.items.keys())}"
) from err
return self._data_registry.load_item(dataset_name)

def import_data(
self,
Expand Down Expand Up @@ -238,14 +235,11 @@ def load_model(self, name: str) -> Model:
------
ValueError
Raised if the model does not exist.
.. # noqa: DAR402
"""
try:
return self._model_registry.load_item(name)
except ValueError as err:
raise ValueError(
f"Model {name!r} does not exist. "
f"Known Models are: {list(self._model_registry.items.keys())}"
) from err
return self._model_registry.load_item(name)

def generate_model(
self,
Expand Down Expand Up @@ -329,15 +323,11 @@ def load_parameters(self, parameters_name: str) -> Parameters:
Parameters
The loaded parameters.
.. # noqa: D414
.. # noqa: DAR402
"""
try:
return self._parameter_registry.load_item(parameters_name)
except ValueError as err:
raise ValueError(
f"Parameters '{parameters_name}' does not exist. "
f"Known Parameters are: {list(self._parameter_registry.items.keys())}"
) from err
return self._parameter_registry.load_item(parameters_name)

def generate_parameters(
self,
Expand Down
1 change: 1 addition & 0 deletions glotaran/project/project_data_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __init__(self, directory: Path):
directory / "data",
supported_file_extensions_data_io("load_dataset"),
load_dataset,
item_name="Dataset",
)

def import_data(
Expand Down
5 changes: 4 additions & 1 deletion glotaran/project/project_model_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@ def __init__(self, directory: Path):
The registry directory.
"""
super().__init__(
directory / "models", supported_file_extensions_project_io("load_model"), load_model
directory / "models",
supported_file_extensions_project_io("load_model"),
load_model,
item_name="Model",
)

def generate_model(
Expand Down
1 change: 1 addition & 0 deletions glotaran/project/project_parameter_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def __init__(self, directory: Path):
directory / "parameters",
supported_file_extensions_project_io("load_parameters"),
load_parameters,
item_name="Parameters",
)

def generate_parameters(
Expand Down
79 changes: 76 additions & 3 deletions glotaran/project/project_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,64 @@
from collections.abc import Iterable
from pathlib import Path
from typing import Any
from warnings import warn

from glotaran.utils.ipython import MarkdownStr


class AmbiguousNameWarning(UserWarning):
"""Warning thrown when an item with the same name already exists.
This is the case if two files with the same name but different extensions exist next to
each other.
"""

def __init__(
self,
*,
item_name: str,
items: dict[str, Path],
item_key: str,
unique_item_key: str,
project_root_path: Path,
):
"""Initialize ``AmbiguousNameWarning`` with a formatted message.
Parameters
----------
item_name: str
Name of items in the registry (e.g. 'Parameters').
items: dict[str, Path]
Known items at this iteration point.
item_key: str
Key that would have been used if the file names weren't ambiguous.
unique_item_key: str
Unique key for the item with ambiguous file name.
project_root_path: Path
Root path of the project.
"""
ambiguous_file_names = [
value.relative_to(project_root_path).as_posix()
for key, value in items.items()
if key.startswith(item_key)
]
super().__init__(
f"The {item_name} name {item_key!r} is ambiguous since it could "
f"refer to the following files: {ambiguous_file_names}\n"
f"The file {items[unique_item_key].relative_to(project_root_path).as_posix()!r} "
f"will be accessible by the name {unique_item_key!r}. \n"
f"While {item_key!r} refers to the file "
f"{items[item_key].relative_to(project_root_path).as_posix()!r}.\n"
"Rename the files with unambiguous names to silence this warning."
)


class ProjectRegistry:
"""A registry base class."""

def __init__(self, directory: Path, file_suffix: str | Iterable[str], loader: Callable):
def __init__(
self, directory: Path, file_suffix: str | Iterable[str], loader: Callable, item_name: str
):
"""Initialize a registry.
Parameters
Expand All @@ -23,12 +73,15 @@ def __init__(self, directory: Path, file_suffix: str | Iterable[str], loader: Ca
The suffixes of item files.
loader : Callable
A loader for the registry items.
item_name: str
Name of items in the registry used to format warning and exception (e.g. 'Parameters').
"""
self._directory: Path = directory
self._file_suffix: tuple[str, ...] = (
(file_suffix,) if isinstance(file_suffix, str) else tuple(file_suffix)
)
self._loader: Callable = loader
self._item_name = item_name

self._create_directory_if_not_exist()

Expand Down Expand Up @@ -63,7 +116,26 @@ def items(self) -> dict[str, Path]:
dict[str, Path]
The items of the registry.
"""
items = {path.stem: path for path in self._directory.iterdir() if self.is_item(path)}
items = {}
for path in sorted(self._directory.rglob("*")):
if self.is_item(path) is True:
rel_parent_path = path.parent.relative_to(self._directory)
item_key = (rel_parent_path / path.stem).as_posix()
if item_key not in items:
items[item_key] = path
else:
unique_item_key = (rel_parent_path / path.name).as_posix()
items[unique_item_key] = path
warn(
AmbiguousNameWarning(
item_name=self._item_name,
items=items,
item_key=item_key,
unique_item_key=unique_item_key,
project_root_path=self._directory.parent,
),
stacklevel=3,
)
return dict(sorted(items.items()))

def is_item(self, path: Path) -> bool:
Expand Down Expand Up @@ -102,7 +174,8 @@ def load_item(self, name: str) -> Any:
if name in self.items:
return self._loader(self.items[name])
raise ValueError(
f"No Item with name '{name}' exists. Known items are: {list(self.items.keys())}"
f"{self._item_name} '{name}' does not exist. "
f"Known {self._item_name.rstrip('s')}s are: {list(self.items.keys())}"
)

def markdown(self, join_indentation: int = 0) -> MarkdownStr:
Expand Down
1 change: 1 addition & 0 deletions glotaran/project/project_result_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def __init__(self, directory: Path):
directory / "results",
[],
lambda path: load_result(path / "result.yml", format_name="yml"),
item_name="Result",
)

def is_item(self, path: Path) -> bool:
Expand Down
100 changes: 100 additions & 0 deletions glotaran/project/test/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from glotaran.io import save_dataset
from glotaran.io import save_parameters
from glotaran.project.project import Project
from glotaran.project.project_registry import AmbiguousNameWarning
from glotaran.project.result import Result
from glotaran.testing.simulated_data.sequential_spectral_decay import DATASET as example_dataset
from glotaran.testing.simulated_data.sequential_spectral_decay import (
Expand Down Expand Up @@ -415,6 +416,105 @@ def test_parameters_plugin_system_integration(tmp_path: Path, file_extension: st
assert project.parameters["test_parameter"].samefile(parameter_file)


def test_data_subfolder_folders(tmp_path: Path):
"""Models in sub folders are found."""
data_file = tmp_path / "data/subfolder/test_data.ascii"
data_file.parent.mkdir(parents=True, exist_ok=True)
data_file.touch()
project = Project.open(tmp_path)

assert len(project.data) == 1
assert project.data["subfolder/test_data"].samefile(data_file)

data_file2 = tmp_path / "data/subfolder/test_data.nc"
data_file2.touch()

with pytest.warns(AmbiguousNameWarning) as records:
assert len(project.data) == 2
assert project.data["subfolder/test_data"].samefile(data_file)
assert project.data["subfolder/test_data.nc"].samefile(data_file2)
# One warning pre accessing project.models
assert len(records) == 3, [r.message for r in records]
for record in records:
assert Path(record.filename) == Path(__file__)
assert str(record.message) == (
"The Dataset name 'subfolder/test_data' is ambiguous since it could "
"refer to the following files: ['data/subfolder/test_data.ascii', "
"'data/subfolder/test_data.nc']\n"
"The file 'data/subfolder/test_data.nc' will be accessible by the name "
"'subfolder/test_data.nc'. \n"
"While 'subfolder/test_data' refers to the file "
"'data/subfolder/test_data.ascii'.\n"
"Rename the files with unambiguous names to silence this warning."
)


def test_model_subfolder_folders(tmp_path: Path):
"""Models in sub folders are found."""
model_file = tmp_path / "models/subfolder/test_model.yaml"
model_file.parent.mkdir(parents=True, exist_ok=True)
model_file.touch()
project = Project.open(tmp_path)

assert len(project.models) == 1
assert project.models["subfolder/test_model"].samefile(model_file)

model_file2 = tmp_path / "models/subfolder/test_model.yml"
model_file2.touch()

with pytest.warns(AmbiguousNameWarning) as records:
assert len(project.models) == 2
assert project.models["subfolder/test_model"].samefile(model_file)
assert project.models["subfolder/test_model.yml"].samefile(model_file2)
# One warning pre accessing project.models
assert len(records) == 3, [r.message for r in records]
for record in records:
assert Path(record.filename) == Path(__file__)
assert str(record.message) == (
"The Model name 'subfolder/test_model' is ambiguous since it could "
"refer to the following files: ['models/subfolder/test_model.yaml', "
"'models/subfolder/test_model.yml']\n"
"The file 'models/subfolder/test_model.yml' will be accessible by the name "
"'subfolder/test_model.yml'. \n"
"While 'subfolder/test_model' refers to the file "
"'models/subfolder/test_model.yaml'.\n"
"Rename the files with unambiguous names to silence this warning."
)


def test_parameters_subfolder_folders(tmp_path: Path):
"""Parameters in sub folders are found."""
parameter_file = tmp_path / "parameters/subfolder/test_parameter.yaml"
parameter_file.parent.mkdir(parents=True, exist_ok=True)
parameter_file.touch()
project = Project.open(tmp_path)

assert len(project.parameters) == 1
assert project.parameters["subfolder/test_parameter"].samefile(parameter_file)

parameter_file2 = tmp_path / "parameters/subfolder/test_parameter.yml"
parameter_file2.touch()

with pytest.warns(AmbiguousNameWarning) as records:
assert len(project.parameters) == 2
assert project.parameters["subfolder/test_parameter"].samefile(parameter_file)
assert project.parameters["subfolder/test_parameter.yml"].samefile(parameter_file2)
# One warning pre accessing project.parameters
assert len(records) == 3, [r.message for r in records]
for record in records:
assert Path(record.filename) == Path(__file__)
assert str(record.message) == (
"The Parameters name 'subfolder/test_parameter' is ambiguous since it could "
"refer to the following files: ['parameters/subfolder/test_parameter.yaml', "
"'parameters/subfolder/test_parameter.yml']\n"
"The file 'parameters/subfolder/test_parameter.yml' will be accessible by the "
"name 'subfolder/test_parameter.yml'. \n"
"While 'subfolder/test_parameter' refers to the file "
"'parameters/subfolder/test_parameter.yaml'.\n"
"Rename the files with unambiguous names to silence this warning."
)


def test_missing_file_errors(tmp_path: Path, project_folder: Path):
"""Error when accessing non existing files."""
with pytest.raises(FileNotFoundError) as exc_info:
Expand Down

0 comments on commit bcf7cc2

Please sign in to comment.