Skip to content

Commit

Permalink
✨ Allow usage of subfolders in project parameters, models and data
Browse files Browse the repository at this point in the history
👌 Gracefully handle and warn the user about ambiguity due to files with the same name but different extensions next to each other
  • Loading branch information
s-weigand committed Feb 10, 2023
1 parent 0b5e545 commit 2ca3ddc
Show file tree
Hide file tree
Showing 2 changed files with 170 additions and 1 deletion.
71 changes: 70 additions & 1 deletion glotaran/project/project_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,58 @@
from collections.abc import Iterable
from pathlib import Path
from typing import Any
from warnings import warn

from glotaran.utils.ipython import MarkdownStr


class AmbiguousNameWarning(UserWarning):
"""Warning thrown when an item with the same name already exists.
This is the case if two files with the same name but different extensions exist next to
each other.
"""

def __init__(
self,
*,
item_name: str,
items: dict[str, Path],
item_key: str,
unique_item_key: str,
project_root_path: Path,
):
"""Initialize ``AmbiguousNameWarning`` with a formatted message.
Parameters
----------
item_name: str
Name of items in the registry (e.g. 'Parameters').
items: dict[str, Path]
Known items at this iteration point.
item_key: str
Key that would have been used if the file names weren't ambiguous.
unique_item_key: str
Unique key for the item with ambiguous file name.
project_root_path: Path
Root path of the project.
"""
ambiguous_file_names = [
value.relative_to(project_root_path).as_posix()
for key, value in items.items()
if key.startswith(item_key)
]
super().__init__(
f"The {item_name} name {item_key!r} is ambiguous since it could "
f"refer to the following files: {ambiguous_file_names}\n"
f"The file {items[unique_item_key].relative_to(project_root_path).as_posix()!r} "
f"will be accessible by the name {unique_item_key!r}. \n"
f"While {item_key!r} refers to the file "
f"{items[item_key].relative_to(project_root_path).as_posix()!r}.\n"
"Rename the files with unambiguous names to silence this warning."
)


class ProjectRegistry:
"""A registry base class."""

Expand All @@ -25,6 +73,8 @@ def __init__(
The suffixes of item files.
loader : Callable
A loader for the registry items.
item_name: str
Name of items in the registry used to format warning and exception (e.g. 'Parameters').
"""
self._directory: Path = directory
self._file_suffix: tuple[str, ...] = (
Expand Down Expand Up @@ -66,7 +116,26 @@ def items(self) -> dict[str, Path]:
dict[str, Path]
The items of the registry.
"""
items = {path.stem: path for path in self._directory.iterdir() if self.is_item(path)}
items = {}
for path in self._directory.rglob("*"):
if self.is_item(path) is True:
rel_parent_path = path.parent.relative_to(self._directory)
item_key = (rel_parent_path / path.stem).as_posix()
if item_key not in items:
items[item_key] = path
else:
unique_item_key = (rel_parent_path / path.name).as_posix()
items[unique_item_key] = path
warn(
AmbiguousNameWarning(
item_name=self._item_name,
items=items,
item_key=item_key,
unique_item_key=unique_item_key,
project_root_path=self._directory.parent,
),
stacklevel=3,
)
return dict(sorted(items.items()))

def is_item(self, path: Path) -> bool:
Expand Down
100 changes: 100 additions & 0 deletions glotaran/project/test/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from glotaran.io import save_dataset
from glotaran.io import save_parameters
from glotaran.project.project import Project
from glotaran.project.project_registry import AmbiguousNameWarning
from glotaran.project.result import Result
from glotaran.testing.simulated_data.sequential_spectral_decay import DATASET as example_dataset
from glotaran.testing.simulated_data.sequential_spectral_decay import (
Expand Down Expand Up @@ -415,6 +416,105 @@ def test_parameters_plugin_system_integration(tmp_path: Path, file_extension: st
assert project.parameters["test_parameter"].samefile(parameter_file)


def test_data_subfolder_folders(tmp_path: Path):
"""Models in sub folders are found."""
data_file = tmp_path / "data/subfolder/test_data.ascii"
data_file.parent.mkdir(parents=True, exist_ok=True)
data_file.touch()
project = Project.open(tmp_path)

assert len(project.data) == 1
assert project.data["subfolder/test_data"].samefile(data_file)

data_file2 = tmp_path / "data/subfolder/test_data.nc"
data_file2.touch()

with pytest.warns(AmbiguousNameWarning) as records:
assert len(project.data) == 2
assert project.data["subfolder/test_data"].samefile(data_file)
assert project.data["subfolder/test_data.nc"].samefile(data_file2)
# One warning pre accessing project.models
assert len(records) == 3, [r.message for r in records]
for record in records:
assert Path(record.filename) == Path(__file__)
assert str(record.message) == (
"The Dataset name 'subfolder/test_data' is ambiguous since it could "
"refer to the following files: ['data/subfolder/test_data.ascii', "
"'data/subfolder/test_data.nc']\n"
"The file 'data/subfolder/test_data.nc' will be accessible by the name "
"'subfolder/test_data.nc'. \n"
"While 'subfolder/test_data' refers to the file "
"'data/subfolder/test_data.ascii'.\n"
"Rename the files with unambiguous names to silence this warning."
)


def test_model_subfolder_folders(tmp_path: Path):
"""Models in sub folders are found."""
model_file = tmp_path / "models/subfolder/test_model.yaml"
model_file.parent.mkdir(parents=True, exist_ok=True)
model_file.touch()
project = Project.open(tmp_path)

assert len(project.models) == 1
assert project.models["subfolder/test_model"].samefile(model_file)

model_file2 = tmp_path / "models/subfolder/test_model.yml"
model_file2.touch()

with pytest.warns(AmbiguousNameWarning) as records:
assert len(project.models) == 2
assert project.models["subfolder/test_model"].samefile(model_file)
assert project.models["subfolder/test_model.yml"].samefile(model_file2)
# One warning pre accessing project.models
assert len(records) == 3, [r.message for r in records]
for record in records:
assert Path(record.filename) == Path(__file__)
assert str(record.message) == (
"The Model name 'subfolder/test_model' is ambiguous since it could "
"refer to the following files: ['models/subfolder/test_model.yaml', "
"'models/subfolder/test_model.yml']\n"
"The file 'models/subfolder/test_model.yml' will be accessible by the name "
"'subfolder/test_model.yml'. \n"
"While 'subfolder/test_model' refers to the file "
"'models/subfolder/test_model.yaml'.\n"
"Rename the files with unambiguous names to silence this warning."
)


def test_parameters_subfolder_folders(tmp_path: Path):
"""Parameters in sub folders are found."""
parameter_file = tmp_path / "parameters/subfolder/test_parameter.yaml"
parameter_file.parent.mkdir(parents=True, exist_ok=True)
parameter_file.touch()
project = Project.open(tmp_path)

assert len(project.parameters) == 1
assert project.parameters["subfolder/test_parameter"].samefile(parameter_file)

parameter_file2 = tmp_path / "parameters/subfolder/test_parameter.yml"
parameter_file2.touch()

with pytest.warns(AmbiguousNameWarning) as records:
assert len(project.parameters) == 2
assert project.parameters["subfolder/test_parameter"].samefile(parameter_file)
assert project.parameters["subfolder/test_parameter.yml"].samefile(parameter_file2)
# One warning pre accessing project.parameters
assert len(records) == 3, [r.message for r in records]
for record in records:
assert Path(record.filename) == Path(__file__)
assert str(record.message) == (
"The Parameters name 'subfolder/test_parameter' is ambiguous since it could "
"refer to the following files: ['parameters/subfolder/test_parameter.yaml', "
"'parameters/subfolder/test_parameter.yml']\n"
"The file 'parameters/subfolder/test_parameter.yml' will be accessible by the "
"name 'subfolder/test_parameter.yml'. \n"
"While 'subfolder/test_parameter' refers to the file "
"'parameters/subfolder/test_parameter.yaml'.\n"
"Rename the files with unambiguous names to silence this warning."
)


def test_missing_file_errors(tmp_path: Path, project_folder: Path):
"""Error when accessing non existing files."""
with pytest.raises(FileNotFoundError) as exc_info:
Expand Down

0 comments on commit 2ca3ddc

Please sign in to comment.