Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pydantic model #487

Merged
merged 33 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
64848b9
adding this flag due to https://github.com/astral-sh/ruff/issues/5434
zain-sohail Jul 31, 2024
eb5ee3f
first pydantic model for config
zain-sohail Jul 31, 2024
600ef55
without typeddict
zain-sohail Sep 14, 2024
24998df
remove defaults
zain-sohail Sep 14, 2024
bea6079
update lock file with pydantic
zain-sohail Sep 14, 2024
c7aac14
nest the models
zain-sohail Sep 15, 2024
c179d9a
add copytool in configmodel and other attrs
zain-sohail Sep 15, 2024
d7cab5f
update config files to conform to model
zain-sohail Sep 15, 2024
75f1200
use configmodel in processor class
zain-sohail Sep 16, 2024
8ef5807
update modules to new config
zain-sohail Sep 16, 2024
02aea0a
fix some config problems
zain-sohail Sep 16, 2024
d84f6e4
update lockfile
rettigl Oct 7, 2024
5c0f75e
Merge remote-tracking branch 'origin/v1_feature_branch' into pydantic…
rettigl Oct 12, 2024
1146b4f
fix tests for calibrators
rettigl Oct 12, 2024
a1a9b27
make model fail on extra parameters
rettigl Oct 12, 2024
e3577bb
fix flash loader
rettigl Oct 12, 2024
8054bfa
fix calibrator tests again
rettigl Oct 12, 2024
35dcd11
fix processor tests
rettigl Oct 12, 2024
738cd85
fix sxp loader
rettigl Oct 12, 2024
bc6f457
update notebooks
rettigl Oct 12, 2024
bc40fea
fix remaining tests
rettigl Oct 12, 2024
b6db85f
add config model for copy tool
rettigl Oct 13, 2024
bdc5bac
Add further type refinements to config model
rettigl Oct 13, 2024
022dc69
add tests for config model
zain-sohail Oct 14, 2024
52a11dc
fix remaining tutorials
rettigl Oct 14, 2024
9edbea2
fix config model tests
rettigl Oct 15, 2024
ad8705d
add review suggestions
rettigl Oct 15, 2024
7aa7231
fix reporting of energy/delay offsets
rettigl Oct 15, 2024
ec20e03
fix handling of nexus input files and tests
rettigl Oct 15, 2024
4e535cf
fix error reporting
rettigl Oct 15, 2024
7a7441c
fix sxp notebook
rettigl Oct 17, 2024
fb04ce6
changes from review
rettigl Oct 21, 2024
fea015a
Move static (#511)
zain-sohail Oct 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .cspell/custom-dictionary.txt
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ ftype
fwhm
genindex
getgid
getgrgid
getmtime
gpfs
griddata
Expand Down
39 changes: 32 additions & 7 deletions sed/config/config_model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Pydantic model to validate the config for SED package."""
import grp
from collections.abc import Sequence
from typing import Literal
from typing import Optional
Expand All @@ -18,29 +19,45 @@
## https://github.com/astral-sh/ruff/issues/5434


class Paths(BaseModel):
class PathsModel(BaseModel):
model_config = ConfigDict(extra="forbid")

raw: DirectoryPath
processed: Optional[Union[DirectoryPath, NewPath]] = None


class CopyToolModel(BaseModel):
model_config = ConfigDict(extra="forbid")

source: DirectoryPath
dest: DirectoryPath
safety_margin: Optional[float] = None
gid: Optional[int] = None
scheduler: Optional[str] = None

@field_validator("gid")
@classmethod
def validate_gid(cls, v: int) -> int:
"""Checks if the gid is valid on the system"""
try:
grp.getgrgid(v)
except KeyError:
raise ValueError(f"Invalid value {v} for gid. Group not found.")
return v


class CoreModel(BaseModel):
model_config = ConfigDict(extra="forbid")

loader: str
verbose: Optional[bool] = None
paths: Optional[Paths] = None
paths: Optional[PathsModel] = None
num_cores: Optional[int] = None
year: Optional[int] = None
beamtime_id: Optional[Union[int, str]] = None
instrument: Optional[str] = None
beamline: Optional[str] = None
# TODO: move copy tool to a separate model
use_copy_tool: Optional[bool] = None
copy_tool_source: Optional[DirectoryPath] = None
copy_tool_dest: Optional[DirectoryPath] = None
copy_tool_kwds: Optional[dict] = None
copy_tool: Optional[CopyToolModel] = None

@field_validator("loader")
@classmethod
Expand All @@ -51,6 +68,14 @@ def validate_loader(cls, v: str) -> str:
raise ValueError(f"Invalid loader {v}. Available loaders are: {names}")
return v

@field_validator("num_cores")
rettigl marked this conversation as resolved.
Show resolved Hide resolved
@classmethod
def validate_num_cores(cls, v: int) -> int:
"""Checks if the num_cores field is a positive integer"""
if v < 1:
raise ValueError(f"Invalid value {v} for num_cores. Needs to be > 0.")
return v


class ColumnsModel(BaseModel):
model_config = ConfigDict(extra="forbid")
Expand Down
16 changes: 7 additions & 9 deletions sed/config/mpes_example_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,13 @@ core:
# Number of parallel threads to use for parallelized jobs (e.g. binning, data conversion, copy, ...)
num_cores: 20
# Option to use the copy tool to mirror data to a local storage location before processing.
use_copy_tool: False
# path to the root of the source data directory
copy_tool_source: null # "/path/to/data/"
# path to the root or the local data storage
copy_tool_dest: null # "/path/to/localDataStore/"
# optional keywords for the copy tool:
copy_tool_kwds:
# group id to set for copied files and folders
gid: 1001
# copy_tool:
# # path to the root of the source data directory
# source: "/path/to/data/"
# # path to the root or the local data storage
# dest: "/path/to/localDataStore/"
# # group id to set for copied files and folders
# gid: 1000

dataframe:
# hdf5 group name containing eventIDs occurring at every millisecond (used to calculate timestamps)
Expand Down
15 changes: 5 additions & 10 deletions sed/core/processor.py
rettigl marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -161,22 +161,17 @@ def __init__(
verbose=self._verbose,
)

self.use_copy_tool = self._config.get("core", {}).get(
"use_copy_tool",
False,
)
self.use_copy_tool = "copy_tool" in self._config["core"]
if self.use_copy_tool:
try:
self.ct = CopyTool(
source=self._config["core"]["copy_tool_source"],
dest=self._config["core"]["copy_tool_dest"],
num_cores=self._config["core"]["num_cores"],
**self._config["core"].get("copy_tool_kwds", {}),
**self._config["core"]["copy_tool"],
)
logger.debug(
f"Initialized copy tool: Copy file from "
f"'{self._config['core']['copy_tool_source']}' "
f"to '{self._config['core']['copy_tool_dest']}'.",
f"Initialized copy tool: Copy files from "
f"'{self._config['core']['copy_tool']['source']}' "
f"to '{self._config['core']['copy_tool']['dest']}'.",
)
except KeyError:
self.use_copy_tool = False
Expand Down
1 change: 0 additions & 1 deletion sed/loader/mpes/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ def hdf5_to_dataframe(

electron_channels = []
column_names = []
print("Print values: ", channels)
for name, channel in channels.items():
if channel["format"] == "per_electron":
if channel["dataset_key"] in test_proc:
Expand Down
18 changes: 2 additions & 16 deletions tests/test_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def test_attributes_setters() -> None:

def test_copy_tool() -> None:
"""Test the copy tool functionality in the processor"""
config = {"core": {"loader": "mpes", "use_copy_tool": True}}
config: dict[str, dict[str, Any]] = {"core": {"loader": "mpes"}}
processor = SedProcessor(
config=config,
folder_config={},
Expand All @@ -231,10 +231,7 @@ def test_copy_tool() -> None:
config = {
"core": {
"loader": "mpes",
"use_copy_tool": True,
"copy_tool_source": source_folder,
"copy_tool_dest": dest_folder,
"copy_tool_kwds": {"gid": os.getgid()},
"copy_tool": {"source": source_folder, "dest": dest_folder, "gid": os.getgid()},
},
}
processor = SedProcessor(
Expand All @@ -248,17 +245,6 @@ def test_copy_tool() -> None:
processor.load(files=files)
assert processor.files[0].find(dest_folder) > -1

# test illegal keywords:
config["core"]["copy_tool_kwds"] = {"gid": os.getgid(), "illegal_keyword": True}
with pytest.raises(TypeError):
processor = SedProcessor(
config=config,
folder_config={},
user_config={},
system_config={},
verbose=True,
)


feature4 = np.array([[203.2, 341.96], [299.16, 345.32], [304.38, 149.88], [199.52, 152.48]])
feature5 = np.array(
Expand Down