diff --git a/poetry.lock b/poetry.lock index c94a7319..049af2da 100644 --- a/poetry.lock +++ b/poetry.lock @@ -265,13 +265,13 @@ test-all = ["astropy[test]", "coverage[toml]", "ipython (>=4.2)", "objgraph", "s [[package]] name = "astropy-iers-data" -version = "0.2024.9.12.13.29.57" +version = "0.2024.9.16.0.32.21" description = "IERS Earth Rotation and Leap Second tables for the astropy core package" optional = false python-versions = ">=3.8" files = [ - {file = "astropy_iers_data-0.2024.9.12.13.29.57-py3-none-any.whl", hash = "sha256:ca580347f084a9d9a2e30cbd8cd44665cef52c69e5a1c1d129abb645eb970fea"}, - {file = "astropy_iers_data-0.2024.9.12.13.29.57.tar.gz", hash = "sha256:9ae1d147f47f18c335984781c7119e1c12aca47b89653d609980f75c505e7708"}, + {file = "astropy_iers_data-0.2024.9.16.0.32.21-py3-none-any.whl", hash = "sha256:adf111e1b596470c4437fa44cf767e56f6d4bc2e93068871fd0b30c73476d430"}, + {file = "astropy_iers_data-0.2024.9.16.0.32.21.tar.gz", hash = "sha256:2ff6fe868a623e616953a432698b05dd6adac9683d21ac780bfbb94e78f7c344"}, ] [package.extras] @@ -1260,13 +1260,13 @@ test = ["blosc2 (>=2.5.1)", "blosc2-grok (>=0.2.2)"] [[package]] name = "identify" -version = "2.6.0" +version = "2.6.1" description = "File identification library for Python" optional = false python-versions = ">=3.8" files = [ - {file = "identify-2.6.0-py2.py3-none-any.whl", hash = "sha256:e79ae4406387a9d300332b5fd366d8994f1525e8414984e1a59e058b2eda2dd0"}, - {file = "identify-2.6.0.tar.gz", hash = "sha256:cb171c685bdc31bcc4c1734698736a7d5b6c8bf2e0c15117f4d469c8640ae5cf"}, + {file = "identify-2.6.1-py2.py3-none-any.whl", hash = "sha256:53863bcac7caf8d2ed85bd20312ea5dcfc22226800f6d6881f232d861db5a8f0"}, + {file = "identify-2.6.1.tar.gz", hash = "sha256:91478c5fb7c3aac5ff7bf9b4344f803843dc586832d5f110d672b19aa1984c98"}, ] [package.extras] @@ -1274,13 +1274,13 @@ license = ["ukkonen"] [[package]] name = "idna" -version = "3.9" +version = "3.10" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.6" files = [ - {file = "idna-3.9-py3-none-any.whl", hash = "sha256:69297d5da0cc9281c77efffb4e730254dd45943f45bbfb461de5991713989b1e"}, - {file = "idna-3.9.tar.gz", hash = "sha256:e5c5dafde284f26e9e0f28f6ea2d6400abd5ca099864a67f576f3981c6476124"}, + {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, + {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, ] [package.extras] @@ -5180,4 +5180,4 @@ notebook = ["ipykernel", "jupyter", "jupyterlab", "jupyterlab-h5web"] [metadata] lock-version = "2.0" python-versions = ">=3.9, <3.12.3, !=3.11.9" -content-hash = "0010412c146fd67325fff1d8af1a2185e2b970ee128d265bac45862604f08463" +content-hash = "2961767a4ec02006cc97b4c48a7e993c5f488e5b8009c8264ca883da497fe489" diff --git a/sed/calibrator/delay.py b/sed/calibrator/delay.py index c4d8f797..9eaea99c 100644 --- a/sed/calibrator/delay.py +++ b/sed/calibrator/delay.py @@ -55,10 +55,10 @@ def __init__( else: logger.handlers[0].setLevel(WARNING) - self.adc_column: str = self._config["dataframe"].get("adc_column", None) - self.delay_column: str = self._config["dataframe"]["delay_column"] - self.corrected_delay_column = self._config["dataframe"].get( - "corrected_delay_column", + self.adc_column: str = config["dataframe"]["columns"]["adc"] + self.delay_column: str = config["dataframe"]["columns"]["delay"] + self.corrected_delay_column = self._config["dataframe"]["columns"].get( + "corrected_delay", self.delay_column, ) self.calibration: dict[str, Any] = self._config["delay"].get("calibration", {}) @@ -87,9 +87,9 @@ def append_delay_axis( df (pd.DataFrame | dask.dataframe.DataFrame): The dataframe where to apply the delay calibration to. adc_column (str, optional): Source column for delay calibration. - Defaults to config["dataframe"]["adc_column"]. + Defaults to config["dataframe"]["columns"]["adc"]. delay_column (str, optional): Destination column for delay calibration. - Defaults to config["dataframe"]["delay_column"]. + Defaults to config["dataframe"]["columns"]["delay"]. calibration (dict, optional): Calibration dictionary with parameters for delay calibration. adc_range (tuple | list | np.ndarray, optional): The range of used diff --git a/sed/calibrator/energy.py b/sed/calibrator/energy.py index 2b6bf946..82b17a81 100644 --- a/sed/calibrator/energy.py +++ b/sed/calibrator/energy.py @@ -111,12 +111,12 @@ def __init__( self.peaks: np.ndarray = np.asarray([]) self.calibration: dict[str, Any] = self._config["energy"].get("calibration", {}) - self.tof_column = self._config["dataframe"]["tof_column"] - self.tof_ns_column = self._config["dataframe"].get("tof_ns_column", None) - self.corrected_tof_column = self._config["dataframe"]["corrected_tof_column"] - self.energy_column = self._config["dataframe"]["energy_column"] - self.x_column = self._config["dataframe"]["x_column"] - self.y_column = self._config["dataframe"]["y_column"] + self.tof_column = self._config["dataframe"]["columns"]["tof"] + self.tof_ns_column = self._config["dataframe"]["columns"].get("tof_ns", None) + self.corrected_tof_column = self._config["dataframe"]["columns"]["corrected_tof"] + self.energy_column = self._config["dataframe"]["columns"]["energy"] + self.x_column = self._config["dataframe"]["columns"]["x"] + self.y_column = self._config["dataframe"]["columns"]["y"] self.binwidth: float = self._config["dataframe"]["tof_binwidth"] self.binning: int = self._config["dataframe"]["tof_binning"] self.x_width = self._config["energy"]["x_width"] @@ -125,7 +125,7 @@ def __init__( self.tof_fermi = self._config["energy"]["tof_fermi"] / self.binning self.color_clip = self._config["energy"]["color_clip"] self.sector_delays = self._config["dataframe"].get("sector_delays", None) - self.sector_id_column = self._config["dataframe"].get("sector_id_column", None) + self.sector_id_column = self._config["dataframe"]["columns"].get("sector_id", None) self.offsets: dict[str, Any] = self._config["energy"].get("offsets", {}) self.correction: dict[str, Any] = self._config["energy"].get("correction", {}) @@ -202,7 +202,7 @@ def bin_data( Args: data_files (list[str]): list of file names to bin axes (list[str], optional): bin axes. Defaults to - config["dataframe"]["tof_column"]. + config["dataframe"]["columns"]["tof"]. bins (list[int], optional): number of bins. Defaults to config["energy"]["bins"]. ranges (Sequence[tuple[float, float]], optional): bin ranges. @@ -787,9 +787,9 @@ def append_energy_axis( df (pd.DataFrame | dask.dataframe.DataFrame): Dataframe to apply the energy axis calibration to. tof_column (str, optional): Label of the source column. - Defaults to config["dataframe"]["tof_column"]. + Defaults to config["dataframe"]["columns"]["tof"]. energy_column (str, optional): Label of the destination column. - Defaults to config["dataframe"]["energy_column"]. + Defaults to config["dataframe"]["columns"]["energy"]. calibration (dict, optional): Calibration dictionary. If provided, overrides calibration from class or config. Defaults to self.calibration or config["energy"]["calibration"]. @@ -933,9 +933,9 @@ def append_tof_ns_axis( Args: df (pd.DataFrame | dask.dataframe.DataFrame): Dataframe to convert. tof_column (str, optional): Name of the column containing the - time-of-flight steps. Defaults to config["dataframe"]["tof_column"]. + time-of-flight steps. Defaults to config["dataframe"]["columns"]["tof"]. tof_ns_column (str, optional): Name of the column to store the - time-of-flight in nanoseconds. Defaults to config["dataframe"]["tof_ns_column"]. + time-of-flight in nanoseconds. Defaults to config["dataframe"]["columns"]["tof_ns"]. binwidth (float, optional): Time-of-flight binwidth in ns. Defaults to config["energy"]["tof_binwidth"]. binning (int, optional): Time-of-flight binning factor. @@ -1364,9 +1364,9 @@ def apply_energy_correction( df (pd.DataFrame | dask.dataframe.DataFrame): The dataframe where to apply the energy correction to. tof_column (str, optional): Name of the source column to convert. - Defaults to config["dataframe"]["tof_column"]. + Defaults to config["dataframe"]["columns"]["tof"]. new_tof_column (str, optional): Name of the destination column to convert. - Defaults to config["dataframe"]["corrected_tof_column"]. + Defaults to config["dataframe"]["columns"]["corrected_tof"]. correction_type (str, optional): Type of correction to apply to the TOF axis. Valid values are: @@ -1477,9 +1477,9 @@ def align_dld_sectors( Args: df (dask.dataframe.DataFrame): Dataframe to use. tof_column (str, optional): Name of the column containing the time-of-flight values. - Defaults to config["dataframe"]["tof_column"]. + Defaults to config["dataframe"]["columns"]["tof"]. sector_id_column (str, optional): Name of the column containing the sector id values. - Defaults to config["dataframe"]["sector_id_column"]. + Defaults to config["dataframe"]["columns"]["sector_id"]. sector_delays (np.ndarray, optional): Array containing the sector delays. Defaults to config["dataframe"]["sector_delays"]. diff --git a/sed/calibrator/momentum.py b/sed/calibrator/momentum.py index bae3bf65..1b527407 100644 --- a/sed/calibrator/momentum.py +++ b/sed/calibrator/momentum.py @@ -127,12 +127,12 @@ def __init__( self.adjust_params: dict[str, Any] = {} self.calibration: dict[str, Any] = self._config["momentum"].get("calibration", {}) - self.x_column = self._config["dataframe"]["x_column"] - self.y_column = self._config["dataframe"]["y_column"] - self.corrected_x_column = self._config["dataframe"]["corrected_x_column"] - self.corrected_y_column = self._config["dataframe"]["corrected_y_column"] - self.kx_column = self._config["dataframe"]["kx_column"] - self.ky_column = self._config["dataframe"]["ky_column"] + self.x_column = self._config["dataframe"]["columns"]["x"] + self.y_column = self._config["dataframe"]["columns"]["y"] + self.corrected_x_column = self._config["dataframe"]["columns"]["corrected_x"] + self.corrected_y_column = self._config["dataframe"]["columns"]["corrected_y"] + self.kx_column = self._config["dataframe"]["columns"]["kx"] + self.ky_column = self._config["dataframe"]["columns"]["ky"] self._state: int = 0 @@ -1734,15 +1734,15 @@ def apply_corrections( df (pd.DataFrame | dask.dataframe.DataFrame): Dataframe to apply the distortion correction to. x_column (str, optional): Label of the 'X' column before momentum - distortion correction. Defaults to config["momentum"]["x_column"]. + distortion correction. Defaults to config["dataframe"]["columns"]["x"]. y_column (str, optional): Label of the 'Y' column before momentum - distortion correction. Defaults to config["momentum"]["y_column"]. + distortion correction. Defaults to config["dataframe"]["columns"]["y"]. new_x_column (str, optional): Label of the 'X' column after momentum distortion correction. - Defaults to config["momentum"]["corrected_x_column"]. + Defaults to config["dataframe"]["columns"]["corrected_x"]. new_y_column (str, optional): Label of the 'Y' column after momentum distortion correction. - Defaults to config["momentum"]["corrected_y_column"]. + Defaults to config["dataframe"]["columns"]["corrected_y"]. Returns: tuple[pd.DataFrame | dask.dataframe.DataFrame, dict]: Dataframe with @@ -1883,15 +1883,15 @@ def append_k_axis( df (pd.DataFrame | dask.dataframe.DataFrame): Dataframe to apply the distortion correction to. x_column (str, optional): Label of the source 'X' column. - Defaults to config["momentum"]["corrected_x_column"] or - config["momentum"]["x_column"] (whichever is present). + Defaults to config["dataframe"]["columns"]["corrected_x"] or + config["dataframe"]["columns"]["x"] (whichever is present). y_column (str, optional): Label of the source 'Y' column. - Defaults to config["momentum"]["corrected_y_column"] or - config["momentum"]["y_column"] (whichever is present). + Defaults to config["dataframe"]["columns"]["corrected_y"] or + config["dataframe"]["columns"]["y"] (whichever is present). new_x_column (str, optional): Label of the destination 'X' column after - momentum calibration. Defaults to config["momentum"]["kx_column"]. + momentum calibration. Defaults to config["dataframe"]["columns"]["kx"]. new_y_column (str, optional): Label of the destination 'Y' column after - momentum calibration. Defaults to config["momentum"]["ky_column"]. + momentum calibration. Defaults to config["dataframe"]["columns"]["ky"]. calibration (dict, optional): Dictionary containing calibration parameters. Defaults to 'self.calibration' or config["momentum"]["calibration"]. suppress_output (bool, optional): Option to suppress output of diagnostic information. diff --git a/sed/config/config_model.py b/sed/config/config_model.py index 71840198..ae2ef64b 100644 --- a/sed/config/config_model.py +++ b/sed/config/config_model.py @@ -6,6 +6,7 @@ from typing import Union from pydantic import BaseModel +from pydantic import ConfigDict from pydantic import DirectoryPath from pydantic import Field from pydantic import field_validator @@ -101,6 +102,8 @@ class DataframeModel(BaseModel): sector_id_reserved_bits: Optional[int] = None sector_delays: Optional[Sequence[int]] = None + # write validator for model so that x_column gets converted to columns: x + class BinningModel(BaseModel): hist_mode: str @@ -113,7 +116,7 @@ class BinningModel(BaseModel): class HistogramModel(BaseModel): bins: Sequence[int] axes: Sequence[str] - ranges: Sequence[Sequence[int]] + ranges: Sequence[tuple[float, float]] class StaticModel(BaseModel): @@ -250,3 +253,5 @@ class ConfigModel(BaseModel): metadata: Optional[MetadataModel] = None nexus: Optional[NexusModel] = None static: Optional[StaticModel] = None + + model_config = ConfigDict(extra="forbid") diff --git a/sed/config/flash_example_config.yaml b/sed/config/flash_example_config.yaml index 03867513..185fac78 100644 --- a/sed/config/flash_example_config.yaml +++ b/sed/config/flash_example_config.yaml @@ -1,13 +1,4 @@ # This file contains the default configuration for the flash loader. - -# The paths to the raw and parquet data directories. If these are not -# provided, the loader will try to find the data based on year beamtimeID etc -paths: - # location of the raw data. - raw: "" - # location of the intermediate parquet files. - processed: "" - core: # defines the loader loader: 'flash' @@ -21,6 +12,13 @@ core: year: 2023 # the instrument used instrument: hextof # hextof, wespe, etc + # The paths to the raw and parquet data directories. If these are not + # provided, the loader will try to find the data based on year beamtimeID etc + # paths: + # # location of the raw data. + # raw: "" + # # location of the intermediate parquet files. + # processed: "" binning: # Histogram computation mode to use. diff --git a/sed/config/mpes_example_config.yaml b/sed/config/mpes_example_config.yaml index f191c97e..3d1d3e39 100644 --- a/sed/config/mpes_example_config.yaml +++ b/sed/config/mpes_example_config.yaml @@ -6,9 +6,9 @@ core: # Option to use the copy tool to mirror data to a local storage location before processing. use_copy_tool: False # path to the root of the source data directory - copy_tool_source: "/path/to/data/" + copy_tool_source: null # "/path/to/data/" # path to the root or the local data storage - copy_tool_dest: "/path/to/localDataStore/" + copy_tool_dest: null # "/path/to/localDataStore/" # optional keywords for the copy tool: copy_tool_kwds: # group id to set for copied files and folders diff --git a/sed/core/config.py b/sed/core/config.py index bd41fc24..df44cfd1 100644 --- a/sed/core/config.py +++ b/sed/core/config.py @@ -30,8 +30,8 @@ def parse_config( system_config: dict | str = None, default_config: (dict | str) = f"{package_dir}/config/default.yaml", verbose: bool = True, - model: bool = False, -) -> dict | ConfigModel: + verify_config: bool = True, +) -> dict: """Load the config dictionary from a file, or pass the provided config dictionary. The content of the loaded config dictionary is then completed from a set of pre-configured config files in hierarchical order, by adding missing items. These additional config files @@ -57,13 +57,13 @@ def parse_config( or file path. The loaded dictionary is completed with the default values. Defaults to *package_dir*/config/default.yaml". verbose (bool, optional): Option to report loaded config files. Defaults to True. - model (bool, optional): Option to return the config model instead of the dictionary. + verify_config (bool, optional): Option to verify config file. Defaults to True. Raises: TypeError: Raised if the provided file is neither *json* nor *yaml*. FileNotFoundError: Raised if the provided file is not found. Returns: - dict: Loaded and possibly completed config dictionary. + ConfigModel: Loaded and possibly completed pydantic config model. """ if config is None: config = {} @@ -144,10 +144,10 @@ def parse_config( base_dictionary=default_dict, ) + if not verify_config: + return config_dict # Run the config through the ConfigModel to ensure it is valid config_model = ConfigModel(**config_dict) - if model: - return config_model return config_model.model_dump() diff --git a/sed/dataset/dataset.py b/sed/dataset/dataset.py index 15bf8b4f..fca7fc83 100644 --- a/sed/dataset/dataset.py +++ b/sed/dataset/dataset.py @@ -55,6 +55,7 @@ def load_datasets_dict() -> dict: system_config={}, default_config=DatasetsManager.json_path["module"], verbose=False, + verify_config=False, ) @staticmethod diff --git a/tests/data/loader/flash/config.yaml b/tests/data/loader/flash/config.yaml index 19e01a2a..d4209636 100644 --- a/tests/data/loader/flash/config.yaml +++ b/tests/data/loader/flash/config.yaml @@ -61,7 +61,21 @@ dataframe: sector_delays: [0., 0., 0., 0., 0., 0., 0., 0.] jitter_cols: ["dldPosX", "dldPosY", "dldTimeSteps"] - + columns: + x: dldPosX + corrected_x: X + kx: kx + y: dldPosY + corrected_y: Y + ky: ky + tof: dldTimeSteps + tof_ns: dldTime + corrected_tof: tm + timestamp: timeStamp + auxiliary: dldAux + sector_id: dldSectorID + delay: delayStage + corrected_delay: pumpProbeTime units: dldPosX: 'step' dldPosY: 'step' diff --git a/tests/test_config.py b/tests/test_config.py index df875b45..0ab54cbb 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -36,7 +36,7 @@ def test_default_config() -> None: """Test the config loader for the default config.""" - config = parse_config() + config = parse_config(verify_config=False) assert isinstance(config, dict) for key in default_config_keys: assert key in config.keys() @@ -49,7 +49,7 @@ def test_default_config() -> None: def test_load_dict() -> None: """Test the config loader for a dict.""" config_dict = {"test_entry": True} - config = parse_config(config_dict) + config = parse_config(config_dict, verify_config=False) assert isinstance(config, dict) for key in default_config_keys: assert key in config.keys() @@ -69,7 +69,14 @@ def test_load_does_not_modify() -> None: default_dict = {"a": 1, "b": {"c": 13}, "c": {"e": 11}} default_copy = copy.deepcopy(default_dict) - parse_config(config_dict, folder_dict, user_dict, system_dict, default_dict) + parse_config( + config_dict, + folder_dict, + user_dict, + system_dict, + default_dict, + verify_config=False, + ) assert config_dict == config_copy assert folder_dict == folder_copy assert user_dict == user_copy