From c3676371d6050be89e585f39855bfb635181ee39 Mon Sep 17 00:00:00 2001 From: Steinn Ymir Agustsson Date: Tue, 10 Oct 2023 23:15:25 +0200 Subject: [PATCH] linting and bugfix --- sed/calibrator/hextof.py | 23 +++++++++----- sed/core/processor.py | 65 ++++++++++++++++++++++++++++++++++++++ sed/loader/flash/loader.py | 6 +--- 3 files changed, 81 insertions(+), 13 deletions(-) diff --git a/sed/calibrator/hextof.py b/sed/calibrator/hextof.py index 83672735..884028f7 100644 --- a/sed/calibrator/hextof.py +++ b/sed/calibrator/hextof.py @@ -31,6 +31,8 @@ def unravel_8s_detector_time_channel( if config is None: raise ValueError("Either time_sector_column or config must be given.") time_sector_column = config["dataframe"]["time_sector_column"] + if time_sector_column not in df.columns: + raise ValueError(f"Column {time_sector_column} not in dataframe.") if tof_step_column is None: if config is None: raise ValueError("Either tof_step_column or config must be given.") @@ -40,15 +42,16 @@ def unravel_8s_detector_time_channel( raise ValueError("Either sector_id_column or config must be given.") sector_id_column = config["dataframe"]["sector_id_column"] - # extract dld sector id information df[sector_id_column] = (df[time_sector_column] % 8).astype(np.int8) df[tof_step_column] = (df[time_sector_column] // 8).astype(np.int32) return df def align_8s_sectors( - dataframe: dask.dataframe.DataFrame, + df: dask.dataframe.DataFrame, sector_delays: Sequence[float] = None, + sector_id_column: str = "dldSectorID", + tof_step_column: str = "dldTimeSteps", config: dict = None, ) -> Tuple[Union[pd.DataFrame, dask.dataframe.DataFrame], dict]: """Aligns the 8s sectors to the first sector. @@ -63,19 +66,19 @@ def align_8s_sectors( raise ValueError("Either sector_delays or config must be given.") sector_delays = config["dataframe"]["sector_delays"] # align the 8s sectors + sector_delays = dask.array.from_array(sector_delays) def align_sector(x): - return x - sector_delays[x['dldSectorID']] - - dataframe['dldTimeSteps'] = dataframe.map_partitions( - align_sector, meta=('dldTimeSteps', np.int32) + return x[tof_step_column] - sector_delays[x[sector_id_column].values.astype(int)] + df[tof_step_column] = df.map_partitions( + align_sector, meta=(tof_step_column, np.float64) ) metadata = {} metadata["applied"] = True metadata["sector_delays"] = sector_delays - return dataframe, metadata + return df, metadata def convert_8s_time_to_ns( @@ -88,8 +91,12 @@ def convert_8s_time_to_ns( """Converts the 8s time in steps to time in ns. Args: - time_step_size (float, optional): Size of one time step in ns. + time_step_size (float, optional): Time step size in nanoseconds. Defaults to config["dataframe"]["time_step_size"]. + tof_step_column (str, optional): Name of the column containing the + time-of-flight steps. Defaults to config["dataframe"]["tof_step_column"]. + tof_column (str, optional): Name of the column containing the + time-of-flight. Defaults to config["dataframe"]["tof_column"]. """ if time_step_size is None: if config is None: diff --git a/sed/core/processor.py b/sed/core/processor.py index 9069215c..d73ad7d8 100644 --- a/sed/core/processor.py +++ b/sed/core/processor.py @@ -21,6 +21,7 @@ from sed.calibrator import DelayCalibrator from sed.calibrator import EnergyCalibrator from sed.calibrator import MomentumCorrector +from sed.calibrator import hextof from sed.core.config import parse_config from sed.core.config import save_config from sed.core.dfops import apply_jitter @@ -1203,6 +1204,70 @@ def add_jitter(self, cols: Sequence[str] = None): metadata.append(col) self._attributes.add(metadata, "jittering", duplicate_policy="append") + def hextof_step_to_ns( + self, + time_step_size: float = None, + tof_step_column: str = None, + tof_column: str = None, + ): + """Convert time-of-flight channel steps to nanoseconds. + + Intended for use with HEXTOF endstation + + Args: + time_step_size (float, optional): Time step size in nanoseconds. + Defaults to config["dataframe"]["time_step_size"]. + tof_step_column (str, optional): Name of the column containing the + time-of-flight steps. Defaults to config["dataframe"]["tof_step_column"]. + tof_column (str, optional): Name of the column containing the + time-of-flight. Defaults to config["dataframe"]["tof_column"]. + """ + if self._dataframe is not None: + print("Adding energy column to dataframe:") + # TODO assert order of execution through metadata + + self._dataframe, metadata = hextof.convert_8s_time_to_ns( + df=self._dataframe, + time_step_size=time_step_size or self._config["dataframe"]["time_step_size"], + tof_step_column=tof_step_column or self._config["dataframe"]["tof_step_column"], + tof_column=tof_column or self._config["dataframe"]["tof_column"], + ) + self._attributes.add( + metadata, + "energy_calibration", + duplicate_policy="merge", + ) + + def hextof_align_8s_sectors( + self, + sector_delays: Sequence[float] = None, + ): + """ Align the 8s sectors of the HEXTOF endstation. + + Intended for use with HEXTOF endstation + + Args: + sector_delays (Sequence[float], optional): Delays of the 8s sectors in + picoseconds. Defaults to config["dataframe"]["sector_delays"]. + """ + if self._dataframe is not None: + print("Aligning 8s sectors of dataframe") + # TODO assert order of execution through metadata + sector_delays = sector_delays or self._config["dataframe"].get("sector_delays", [0.0] * 8) + if len(sector_delays) != 8: + raise ValueError("sector_delays must be a list of 8 floats") + if all(sector_delays == 0): + print("All sector delays are 0, skipping alignment") + self._dataframe, metadata = hextof.align_8s_sectors( + df=self._dataframe, + sector_delays=sector_delays, + ) + self._attributes.add( + metadata, + "energy_calibration", + duplicate_policy="merge", + ) + def pre_binning( self, df_partitions: int = 100, diff --git a/sed/loader/flash/loader.py b/sed/loader/flash/loader.py index d89c3f02..31d73e8b 100644 --- a/sed/loader/flash/loader.py +++ b/sed/loader/flash/loader.py @@ -619,11 +619,7 @@ def create_buffer_file(self, h5_path: Path, parquet_path: Path) -> None: .reset_index(level=self.multi_index) .to_parquet(parquet_path, index=False) ) - # except ValueError as failed_string_error: - # print(f"Conversion failed for {parquet_path}:\nValueError: {failed_string_error}") - # error = f"{parquet_path}: {failed_string_error}" - # self.failed_files_error.append(error) - except Exception as exc: # pylint: disable=broad-except + except Exception as exc: # pylint: disable=broad-except self.failed_files_error.append(f"{parquet_path}: {type(exc)} {exc}") return exc return False