diff --git a/sed/core/processor.py b/sed/core/processor.py index a012aebf..706dee0b 100644 --- a/sed/core/processor.py +++ b/sed/core/processor.py @@ -285,8 +285,7 @@ def load( if metadata is None: metadata = {} if dataframe is not None: - self._dataframe = dataframe - self._timed_dataframe = None + timed_dataframe = kwds.pop("timed_dataframe", None) elif runs is not None: # If runs are provided, we only use the copy tool if also folder is provided. # In that case, we copy the whole provided base folder tree, and pass the copied @@ -314,7 +313,6 @@ def load( collect_metadata=collect_metadata, **kwds, ) - elif files is not None: dataframe, timed_dataframe, metadata = self.loader.read_dataframe( files=cast(List[str], self.cpy(files)), @@ -322,7 +320,6 @@ def load( collect_metadata=collect_metadata, **kwds, ) - else: raise ValueError( "Either 'dataframe', 'files', 'folder', or 'runs' needs to be provided!", @@ -1417,11 +1414,9 @@ def compute( print( f"Calculate normalization histogram for axis '{axis}'...", ) - self._normalization_histogram = ( - self.get_normalization_histogram( - axis=axis, - df_partitions=df_partitions, - ) + self._normalization_histogram = self.get_normalization_histogram( + axis=axis, + df_partitions=df_partitions, ) # if the axes are named correctly, xarray figures out the normalization correctly self._normalized = self._binned / self._normalization_histogram @@ -1443,9 +1438,7 @@ def compute( ) self._normalized.attrs["units"] = "counts/second" - self._normalized.attrs[ - "long_name" - ] = "photoelectron counts per second" + self._normalized.attrs["long_name"] = "photoelectron counts per second" self._normalized.attrs["metadata"] = self._attributes.metadata return self._normalized @@ -1496,41 +1489,33 @@ def get_normalization_histogram( if use_time_stamps or self._timed_dataframe is None: if df_partitions is not None: - self._normalization_histogram = ( - normalization_histogram_from_timestamps( - self._dataframe.partitions[df_partitions], - axis, - self._binned.coords[axis].values, - self._config["dataframe"]["time_stamp_alias"], - ) + self._normalization_histogram = normalization_histogram_from_timestamps( + self._dataframe.partitions[df_partitions], + axis, + self._binned.coords[axis].values, + self._config["dataframe"]["time_stamp_alias"], ) else: - self._normalization_histogram = ( - normalization_histogram_from_timestamps( - self._dataframe, - axis, - self._binned.coords[axis].values, - self._config["dataframe"]["time_stamp_alias"], - ) + self._normalization_histogram = normalization_histogram_from_timestamps( + self._dataframe, + axis, + self._binned.coords[axis].values, + self._config["dataframe"]["time_stamp_alias"], ) else: if df_partitions is not None: - self._normalization_histogram = ( - normalization_histogram_from_timed_dataframe( - self._timed_dataframe.partitions[df_partitions], - axis, - self._binned.coords[axis].values, - self._config["dataframe"]["timed_dataframe_unit_time"], - ) + self._normalization_histogram = normalization_histogram_from_timed_dataframe( + self._timed_dataframe.partitions[df_partitions], + axis, + self._binned.coords[axis].values, + self._config["dataframe"]["timed_dataframe_unit_time"], ) else: - self._normalization_histogram = ( - normalization_histogram_from_timed_dataframe( - self._timed_dataframe, - axis, - self._binned.coords[axis].values, - self._config["dataframe"]["timed_dataframe_unit_time"], - ) + self._normalization_histogram = normalization_histogram_from_timed_dataframe( + self._timed_dataframe, + axis, + self._binned.coords[axis].values, + self._config["dataframe"]["timed_dataframe_unit_time"], ) return self._normalization_histogram diff --git a/sed/loader/base/loader.py b/sed/loader/base/loader.py index 1f905d82..880be88d 100644 --- a/sed/loader/base/loader.py +++ b/sed/loader/base/loader.py @@ -147,28 +147,6 @@ def get_files_from_run_id( """ raise NotImplementedError - @abstractmethod - def get_files_from_run_id( - self, - run_id: str, - folders: Union[str, Sequence[str]] = None, - extension: str = None, - **kwds, - ) -> List[str]: - """Locate the files for a given run identifier. - - Args: - run_id (str): The run identifier to locate. - folders (Union[str, Sequence[str]], optional): The directory(ies) where the raw - data is located. Defaults to None. - extension (str, optional): The file extension. Defaults to None. - kwds: Keyword arguments - - Return: - List[str]: List of files for the given run. - """ - raise NotImplementedError - @abstractmethod def get_count_rate( self, diff --git a/sed/loader/flash/loader.py b/sed/loader/flash/loader.py index 8d4f5cf2..b0a4c124 100644 --- a/sed/loader/flash/loader.py +++ b/sed/loader/flash/loader.py @@ -595,7 +595,7 @@ def read_dataframe( metadata: dict = None, collect_metadata: bool = False, **kwds, - ) -> Tuple[dd.DataFrame, dict]: + ) -> Tuple[dd.DataFrame, dd.DataFrame, dict]: """ Read express data from the DAQ, generating a parquet in between. @@ -712,7 +712,7 @@ def read_dataframe( metadata = self.parse_metadata() if collect_metadata else {} - return dataframe, metadata + return dataframe, None, metadata def get_files_from_run_id( self, diff --git a/tests/calibrator/test_energy.py b/tests/calibrator/test_energy.py index 5793d42e..86c48e57 100644 --- a/tests/calibrator/test_energy.py +++ b/tests/calibrator/test_energy.py @@ -258,7 +258,7 @@ def test_append_energy_axis_raises(): """Test if apply_correction raises the correct errors""" config = parse_config(config={}, folder_config={}, user_config={}, system_config={}) loader = get_loader(loader_name="mpes", config=config) - df, _ = loader.read_dataframe(folders=df_folder, collect_metadata=False) + df, _, _ = loader.read_dataframe(folders=df_folder, collect_metadata=False) ec = EnergyCalibrator(config=config, loader=loader) with pytest.raises(ValueError): df, _ = ec.append_energy_axis(df, calibration={"d": 1, "t0": 0}) diff --git a/tests/test_diagnostics.py b/tests/test_diagnostics.py index ab80bde7..21b20093 100644 --- a/tests/test_diagnostics.py +++ b/tests/test_diagnostics.py @@ -31,7 +31,7 @@ def test_plot_histogram(ncols: int, backend: str): ncols (int): number of columns backend (str): plotting backend to use """ - dataframe, _ = loader.read_dataframe(files=files) + dataframe, _, _ = loader.read_dataframe(files=files) axes = config["histogram"]["axes"] ranges = config["histogram"]["ranges"] bins = config["histogram"]["bins"] diff --git a/tests/test_processor.py b/tests/test_processor.py index d7609895..9f6b27bf 100644 --- a/tests/test_processor.py +++ b/tests/test_processor.py @@ -48,7 +48,7 @@ def test_processor_from_dataframe(): """Test generation of the processor from a dataframe object""" config = {"core": {"loader": "generic"}} - dataframe, _ = loader.read_dataframe(files=files) + dataframe, _, _ = loader.read_dataframe(files=files) processor = SedProcessor( dataframe=dataframe, config=config, @@ -63,7 +63,7 @@ def test_processor_from_dataframe(): def test_processor_from_files(): """Test generation of the processor from a list of files""" config = {"core": {"loader": "generic"}} - dataframe, _ = loader.read_dataframe(files=files) + dataframe, _, _ = loader.read_dataframe(files=files) processor = SedProcessor( files=files, config=config, @@ -78,7 +78,7 @@ def test_processor_from_files(): def test_processor_from_folders(): """Test generation of the processor from a folder""" config = {"core": {"loader": "generic"}} - dataframe, _ = loader.read_dataframe(files=files) + dataframe, _, _ = loader.read_dataframe(files=files) processor = SedProcessor( folder=df_folder, config=config,