diff --git a/changelog.md b/changelog.md index 1ce3772b0..853717122 100644 --- a/changelog.md +++ b/changelog.md @@ -20,6 +20,7 @@ - 👌 Allow more natural column names in pandas parameters file reading (#1174) - ✨ Integrate plugin system into Project (#1229) - 👌 Make yaml the default plugin when passing a folder to save_result and load_result (#1230) +- ✨ Allow usage of subfolders in project API for parameters, models and data (#1232) ### 🩹 Bug fixes diff --git a/glotaran/project/project.py b/glotaran/project/project.py index 00fd0d33f..cc098e0f5 100644 --- a/glotaran/project/project.py +++ b/glotaran/project/project.py @@ -166,14 +166,11 @@ def load_data(self, dataset_name: str) -> xr.Dataset | xr.DataArray: ------ ValueError Raised if the dataset does not exist. + + + .. # noqa: DAR402 """ - try: - return self._data_registry.load_item(dataset_name) - except ValueError as err: - raise ValueError( - f"Dataset {dataset_name!r} does not exist. " - f"Known Datasets are: {list(self._data_registry.items.keys())}" - ) from err + return self._data_registry.load_item(dataset_name) def import_data( self, @@ -238,14 +235,11 @@ def load_model(self, name: str) -> Model: ------ ValueError Raised if the model does not exist. + + + .. # noqa: DAR402 """ - try: - return self._model_registry.load_item(name) - except ValueError as err: - raise ValueError( - f"Model {name!r} does not exist. " - f"Known Models are: {list(self._model_registry.items.keys())}" - ) from err + return self._model_registry.load_item(name) def generate_model( self, @@ -329,15 +323,11 @@ def load_parameters(self, parameters_name: str) -> Parameters: Parameters The loaded parameters. + .. # noqa: D414 + .. # noqa: DAR402 """ - try: - return self._parameter_registry.load_item(parameters_name) - except ValueError as err: - raise ValueError( - f"Parameters '{parameters_name}' does not exist. " - f"Known Parameters are: {list(self._parameter_registry.items.keys())}" - ) from err + return self._parameter_registry.load_item(parameters_name) def generate_parameters( self, diff --git a/glotaran/project/project_data_registry.py b/glotaran/project/project_data_registry.py index 08285a91c..c0414e3f4 100644 --- a/glotaran/project/project_data_registry.py +++ b/glotaran/project/project_data_registry.py @@ -24,6 +24,7 @@ def __init__(self, directory: Path): directory / "data", supported_file_extensions_data_io("load_dataset"), load_dataset, + item_name="Dataset", ) def import_data( diff --git a/glotaran/project/project_model_registry.py b/glotaran/project/project_model_registry.py index 5e62dc504..57aec9482 100644 --- a/glotaran/project/project_model_registry.py +++ b/glotaran/project/project_model_registry.py @@ -22,7 +22,10 @@ def __init__(self, directory: Path): The registry directory. """ super().__init__( - directory / "models", supported_file_extensions_project_io("load_model"), load_model + directory / "models", + supported_file_extensions_project_io("load_model"), + load_model, + item_name="Model", ) def generate_model( diff --git a/glotaran/project/project_parameter_registry.py b/glotaran/project/project_parameter_registry.py index c27873f26..afefe358c 100644 --- a/glotaran/project/project_parameter_registry.py +++ b/glotaran/project/project_parameter_registry.py @@ -28,6 +28,7 @@ def __init__(self, directory: Path): directory / "parameters", supported_file_extensions_project_io("load_parameters"), load_parameters, + item_name="Parameters", ) def generate_parameters( diff --git a/glotaran/project/project_registry.py b/glotaran/project/project_registry.py index 956f94773..b47305e0c 100644 --- a/glotaran/project/project_registry.py +++ b/glotaran/project/project_registry.py @@ -5,14 +5,64 @@ from collections.abc import Iterable from pathlib import Path from typing import Any +from warnings import warn from glotaran.utils.ipython import MarkdownStr +class AmbiguousNameWarning(UserWarning): + """Warning thrown when an item with the same name already exists. + + This is the case if two files with the same name but different extensions exist next to + each other. + """ + + def __init__( + self, + *, + item_name: str, + items: dict[str, Path], + item_key: str, + unique_item_key: str, + project_root_path: Path, + ): + """Initialize ``AmbiguousNameWarning`` with a formatted message. + + Parameters + ---------- + item_name: str + Name of items in the registry (e.g. 'Parameters'). + items: dict[str, Path] + Known items at this iteration point. + item_key: str + Key that would have been used if the file names weren't ambiguous. + unique_item_key: str + Unique key for the item with ambiguous file name. + project_root_path: Path + Root path of the project. + """ + ambiguous_file_names = [ + value.relative_to(project_root_path).as_posix() + for key, value in items.items() + if key.startswith(item_key) + ] + super().__init__( + f"The {item_name} name {item_key!r} is ambiguous since it could " + f"refer to the following files: {ambiguous_file_names}\n" + f"The file {items[unique_item_key].relative_to(project_root_path).as_posix()!r} " + f"will be accessible by the name {unique_item_key!r}. \n" + f"While {item_key!r} refers to the file " + f"{items[item_key].relative_to(project_root_path).as_posix()!r}.\n" + "Rename the files with unambiguous names to silence this warning." + ) + + class ProjectRegistry: """A registry base class.""" - def __init__(self, directory: Path, file_suffix: str | Iterable[str], loader: Callable): + def __init__( + self, directory: Path, file_suffix: str | Iterable[str], loader: Callable, item_name: str + ): """Initialize a registry. Parameters @@ -23,12 +73,15 @@ def __init__(self, directory: Path, file_suffix: str | Iterable[str], loader: Ca The suffixes of item files. loader : Callable A loader for the registry items. + item_name: str + Name of items in the registry used to format warning and exception (e.g. 'Parameters'). """ self._directory: Path = directory self._file_suffix: tuple[str, ...] = ( (file_suffix,) if isinstance(file_suffix, str) else tuple(file_suffix) ) self._loader: Callable = loader + self._item_name = item_name self._create_directory_if_not_exist() @@ -63,7 +116,26 @@ def items(self) -> dict[str, Path]: dict[str, Path] The items of the registry. """ - items = {path.stem: path for path in self._directory.iterdir() if self.is_item(path)} + items = {} + for path in sorted(self._directory.rglob("*")): + if self.is_item(path) is True: + rel_parent_path = path.parent.relative_to(self._directory) + item_key = (rel_parent_path / path.stem).as_posix() + if item_key not in items: + items[item_key] = path + else: + unique_item_key = (rel_parent_path / path.name).as_posix() + items[unique_item_key] = path + warn( + AmbiguousNameWarning( + item_name=self._item_name, + items=items, + item_key=item_key, + unique_item_key=unique_item_key, + project_root_path=self._directory.parent, + ), + stacklevel=3, + ) return dict(sorted(items.items())) def is_item(self, path: Path) -> bool: @@ -102,7 +174,8 @@ def load_item(self, name: str) -> Any: if name in self.items: return self._loader(self.items[name]) raise ValueError( - f"No Item with name '{name}' exists. Known items are: {list(self.items.keys())}" + f"{self._item_name} '{name}' does not exist. " + f"Known {self._item_name.rstrip('s')}s are: {list(self.items.keys())}" ) def markdown(self, join_indentation: int = 0) -> MarkdownStr: diff --git a/glotaran/project/project_result_registry.py b/glotaran/project/project_result_registry.py index 2c963fb5c..87edc8abb 100644 --- a/glotaran/project/project_result_registry.py +++ b/glotaran/project/project_result_registry.py @@ -28,6 +28,7 @@ def __init__(self, directory: Path): directory / "results", [], lambda path: load_result(path / "result.yml", format_name="yml"), + item_name="Result", ) def is_item(self, path: Path) -> bool: diff --git a/glotaran/project/test/test_project.py b/glotaran/project/test/test_project.py index 52f404312..6f97fdf17 100644 --- a/glotaran/project/test/test_project.py +++ b/glotaran/project/test/test_project.py @@ -18,6 +18,7 @@ from glotaran.io import save_dataset from glotaran.io import save_parameters from glotaran.project.project import Project +from glotaran.project.project_registry import AmbiguousNameWarning from glotaran.project.result import Result from glotaran.testing.simulated_data.sequential_spectral_decay import DATASET as example_dataset from glotaran.testing.simulated_data.sequential_spectral_decay import ( @@ -415,6 +416,105 @@ def test_parameters_plugin_system_integration(tmp_path: Path, file_extension: st assert project.parameters["test_parameter"].samefile(parameter_file) +def test_data_subfolder_folders(tmp_path: Path): + """Models in sub folders are found.""" + data_file = tmp_path / "data/subfolder/test_data.ascii" + data_file.parent.mkdir(parents=True, exist_ok=True) + data_file.touch() + project = Project.open(tmp_path) + + assert len(project.data) == 1 + assert project.data["subfolder/test_data"].samefile(data_file) + + data_file2 = tmp_path / "data/subfolder/test_data.nc" + data_file2.touch() + + with pytest.warns(AmbiguousNameWarning) as records: + assert len(project.data) == 2 + assert project.data["subfolder/test_data"].samefile(data_file) + assert project.data["subfolder/test_data.nc"].samefile(data_file2) + # One warning pre accessing project.models + assert len(records) == 3, [r.message for r in records] + for record in records: + assert Path(record.filename) == Path(__file__) + assert str(record.message) == ( + "The Dataset name 'subfolder/test_data' is ambiguous since it could " + "refer to the following files: ['data/subfolder/test_data.ascii', " + "'data/subfolder/test_data.nc']\n" + "The file 'data/subfolder/test_data.nc' will be accessible by the name " + "'subfolder/test_data.nc'. \n" + "While 'subfolder/test_data' refers to the file " + "'data/subfolder/test_data.ascii'.\n" + "Rename the files with unambiguous names to silence this warning." + ) + + +def test_model_subfolder_folders(tmp_path: Path): + """Models in sub folders are found.""" + model_file = tmp_path / "models/subfolder/test_model.yaml" + model_file.parent.mkdir(parents=True, exist_ok=True) + model_file.touch() + project = Project.open(tmp_path) + + assert len(project.models) == 1 + assert project.models["subfolder/test_model"].samefile(model_file) + + model_file2 = tmp_path / "models/subfolder/test_model.yml" + model_file2.touch() + + with pytest.warns(AmbiguousNameWarning) as records: + assert len(project.models) == 2 + assert project.models["subfolder/test_model"].samefile(model_file) + assert project.models["subfolder/test_model.yml"].samefile(model_file2) + # One warning pre accessing project.models + assert len(records) == 3, [r.message for r in records] + for record in records: + assert Path(record.filename) == Path(__file__) + assert str(record.message) == ( + "The Model name 'subfolder/test_model' is ambiguous since it could " + "refer to the following files: ['models/subfolder/test_model.yaml', " + "'models/subfolder/test_model.yml']\n" + "The file 'models/subfolder/test_model.yml' will be accessible by the name " + "'subfolder/test_model.yml'. \n" + "While 'subfolder/test_model' refers to the file " + "'models/subfolder/test_model.yaml'.\n" + "Rename the files with unambiguous names to silence this warning." + ) + + +def test_parameters_subfolder_folders(tmp_path: Path): + """Parameters in sub folders are found.""" + parameter_file = tmp_path / "parameters/subfolder/test_parameter.yaml" + parameter_file.parent.mkdir(parents=True, exist_ok=True) + parameter_file.touch() + project = Project.open(tmp_path) + + assert len(project.parameters) == 1 + assert project.parameters["subfolder/test_parameter"].samefile(parameter_file) + + parameter_file2 = tmp_path / "parameters/subfolder/test_parameter.yml" + parameter_file2.touch() + + with pytest.warns(AmbiguousNameWarning) as records: + assert len(project.parameters) == 2 + assert project.parameters["subfolder/test_parameter"].samefile(parameter_file) + assert project.parameters["subfolder/test_parameter.yml"].samefile(parameter_file2) + # One warning pre accessing project.parameters + assert len(records) == 3, [r.message for r in records] + for record in records: + assert Path(record.filename) == Path(__file__) + assert str(record.message) == ( + "The Parameters name 'subfolder/test_parameter' is ambiguous since it could " + "refer to the following files: ['parameters/subfolder/test_parameter.yaml', " + "'parameters/subfolder/test_parameter.yml']\n" + "The file 'parameters/subfolder/test_parameter.yml' will be accessible by the " + "name 'subfolder/test_parameter.yml'. \n" + "While 'subfolder/test_parameter' refers to the file " + "'parameters/subfolder/test_parameter.yaml'.\n" + "Rename the files with unambiguous names to silence this warning." + ) + + def test_missing_file_errors(tmp_path: Path, project_folder: Path): """Error when accessing non existing files.""" with pytest.raises(FileNotFoundError) as exc_info: