diff --git a/sed/calibrator/hextof.py b/sed/calibrator/hextof.py index 942a0bca..d2549985 100644 --- a/sed/calibrator/hextof.py +++ b/sed/calibrator/hextof.py @@ -12,9 +12,8 @@ def unravel_8s_detector_time_channel( df: dask.dataframe.DataFrame, - time_sector_column: str = "dldTimeAndSector", - tof_step_column: str = "dldTimeSteps", - sector_id_column: str = "dldSectorID", + tof_column: str = None, + sector_id_column: str = None, config: dict = None, ) -> dask.dataframe.DataFrame: """Converts the 8s time in steps to time in steps and sectorID. @@ -26,32 +25,28 @@ def unravel_8s_detector_time_channel( sector_delays (Sequece[float], optional): Sector delays for the 8s time. Defaults to config["dataframe"]["sector_delays"]. """ - df = df.dropna(subset=[time_sector_column]) - if time_sector_column is None: - if config is None: - raise ValueError("Either time_sector_column or config must be given.") - time_sector_column = config["dataframe"]["time_sector_column"] - if time_sector_column not in df.columns: - raise ValueError(f"Column {time_sector_column} not in dataframe.") - if tof_step_column is None: + if tof_column is None: if config is None: - raise ValueError("Either tof_step_column or config must be given.") - tof_step_column = config["dataframe"]["tof_step_column"] + raise ValueError("Either tof_column or config must be given.") + tof_column = config["dataframe"]["tof_column"] if sector_id_column is None: if config is None: raise ValueError("Either sector_id_column or config must be given.") sector_id_column = config["dataframe"]["sector_id_column"] - df[sector_id_column] = (df[time_sector_column] % 8).astype(np.int8) - df[tof_step_column] = (df[time_sector_column] // 8).astype(np.int32) + if sector_id_column in df.columns: + raise ValueError(f"Column {sector_id_column} already in dataframe. " + "This function is not idempotent.") + df[sector_id_column] = (df[tof_column] % 8).astype(np.int8) + df[tof_column] = (df[tof_column] // 8).astype(np.int32) return df -def align_8s_sectors( +def align_dld_sectors( df: dask.dataframe.DataFrame, sector_delays: Sequence[float] = None, - sector_id_column: str = "dldSectorID", - tof_step_column: str = "dldTimeSteps", + sector_id_column: str = None, + tof_column: str = None, config: dict = None, ) -> Tuple[Union[pd.DataFrame, dask.dataframe.DataFrame], dict]: """Aligns the 8s sectors to the first sector. @@ -65,13 +60,22 @@ def align_8s_sectors( if config is None: raise ValueError("Either sector_delays or config must be given.") sector_delays = config["dataframe"]["sector_delays"] + if sector_id_column is None: + if config is None: + raise ValueError("Either sector_id_column or config must be given.") + sector_id_column = config["dataframe"]["sector_id_column"] + if tof_column is None: + if config is None: + raise ValueError("Either tof_column or config must be given.") + tof_column = config["dataframe"]["tof_column"] # align the 8s sectors sector_delays_arr = dask.array.from_array(sector_delays) def align_sector(x): - return x[tof_step_column] - sector_delays_arr[x[sector_id_column].values.astype(int)] - df[tof_step_column] = df.map_partitions( - align_sector, meta=(tof_step_column, np.float64) + val = x[tof_column] - sector_delays_arr[x[sector_id_column].values.astype(int)] + return val.astype(np.float32) + df[tof_column] = df.map_partitions( + align_sector, meta=(tof_column, np.float32) ) metadata = {} @@ -81,43 +85,50 @@ def align_sector(x): return df, metadata -def convert_8s_time_to_ns( +def dld_time_to_ns( df: Union[pd.DataFrame, dask.dataframe.DataFrame], - time_step_size: float = None, - tof_step_column: str = "dldTimeSteps", - tof_column: str = "dldTime", + tof_ns_column: str = None, + tof_binwidth: float = None, + tof_column: str = None, + tof_binning: int = None, config: dict = None, ) -> Tuple[Union[pd.DataFrame, dask.dataframe.DataFrame], dict]: """Converts the 8s time in steps to time in ns. Args: - time_step_size (float, optional): Time step size in nanoseconds. - Defaults to config["dataframe"]["time_step_size"]. - tof_step_column (str, optional): Name of the column containing the - time-of-flight steps. Defaults to config["dataframe"]["tof_step_column"]. + tof_binwidth (float, optional): Time step size in nanoseconds. + Defaults to config["dataframe"]["tof_binwidth"]. + tof_column (str, optional): Name of the column containing the + time-of-flight steps. Defaults to config["dataframe"]["tof_column"]. tof_column (str, optional): Name of the column containing the time-of-flight. Defaults to config["dataframe"]["tof_column"]. + tof_binning (int, optional): Binning of the time-of-flight steps. """ - if time_step_size is None: - if config is None: - raise ValueError("Either time_step_size or config must be given.") - time_step_size: float = config["dataframe"]["time_step_size"] - if tof_step_column is None: + if tof_binwidth is None: if config is None: - raise ValueError("Either tof_step_column or config must be given.") - tof_step_column: str = config["dataframe"]["tof_step_column"] + raise ValueError("Either tof_binwidth or config must be given.") + tof_binwidth: float = config["dataframe"]["tof_binwidth"] if tof_column is None: if config is None: - raise ValueError("Either tof_time_column or config must be given.") + raise ValueError("Either tof_column or config must be given.") tof_column: str = config["dataframe"]["tof_column"] + if tof_binning is None: + if config is None: + raise ValueError("Either tof_binning or config must be given.") + tof_binning: int = config["dataframe"]["tof_binning"] + if tof_ns_column is None: + if config is None: + raise ValueError("Either tof_ns_column or config must be given.") + tof_ns_column: str = config["dataframe"]["tof_ns_column"] def convert_to_ns(x): - return x[tof_step_column] * time_step_size - df[tof_column] = df.map_partitions( - convert_to_ns, meta=(tof_column, np.float64) + val = x[tof_column] * tof_binwidth * 2**tof_binning + return val.astype(np.float32) + df[tof_ns_column] = df.map_partitions( + convert_to_ns, meta=(tof_column, np.float32) ) metadata = {} metadata["applied"] = True - metadata["time_step_size"] = time_step_size + metadata["tof_binwidth"] = tof_binwidth return df, metadata diff --git a/sed/config/flash_example_config.yaml b/sed/config/flash_example_config.yaml index 1a52468e..4cf1d425 100644 --- a/sed/config/flash_example_config.yaml +++ b/sed/config/flash_example_config.yaml @@ -23,19 +23,60 @@ dataframe: daq: fl1user3 # The offset correction to the pulseId ubid_offset: 5 + # the number of iterations to fill the pulseId forward. forward_fill_iterations: 2 - # if true, removes the 3 bits reserved for dldSectorID from the dldTimeandSector column + # if true, removes the 3 bits reserved for dldSectorID from the dldTimeSteps column unravel_8s_detector_time_channel: True - time_step_size: 0.16460905596613884 # 0.020576131995767355 - raw_time_column: dldTimeAndSector - time_step_column: dldTimeSteps - tof_step_column: dldTimeSteps + + # dataframe column containing x coordinates + x_column: dldPosX + # dataframe column containing corrected x coordinates + corrected_x_column: "X" + # dataframe column containing kx coordinates + kx_column: "kx" + # dataframe column containing y coordinates + + y_column: dldPosY + # dataframe column containing corrected y coordinates + corrected_y_column: "Y" + # dataframe column containing kx coordinates + ky_column: "ky" + # dataframe column containing time-of-flight data + + tof_column: dldTimeSteps + # dataframe column containing time-of-flight data in ns + tof_ns_column: dldTime + # dataframe column containing corrected time-of-flight data + corrected_tof_column: "tm" + + # time length of a base time-of-flight bin in ns + tof_binwidth: 0.020576131995767355 # 0.16460905596613884 + # binning parameter for time-of-flight data. 2**tof_binning bins per base bin + tof_binning: 3 # power of 2, 4 means 8 bins per step + # dataframe column containing sector ID. obtained from dldTimeSteps column sector_id_column: dldSectorID + sector_delays: [0., 0., 0., 0., 0., 0., 0., 0.] - tof_column: dldTime + jitter_cols: ["dldPosX", "dldPosY", "dldTimeSteps"] + units: + dldPosX: 'step' + dldPosY: 'step' + dldTimeSteps: 'step' + tof_voltage: 'V' + extractorVoltage: 'V' + extractorCurrent: 'A' + cryoTemperature: 'K' + sampleTemperature: 'K' + dldTime: 'ns' + # delay: 'ps' + timeStamp: 's' + # energy: 'eV' + # E: 'eV' + kx: '1/A' + ky: '1/A' # The channels to load. # channels have the following structure: @@ -62,7 +103,7 @@ dataframe: slice: 0 # This channel will actually create dldTimeSteps and dldSectorID, # if unravel_8s_detector_time_channel is set to True - dldTimeAndSector: + dldTimeSteps: format: per_electron group_name: "/uncategorised/FLASH.EXP/HEXTOF.DAQ/DLD1/" slice: 3 diff --git a/sed/core/processor.py b/sed/core/processor.py index e8d71fad..15ed267c 100644 --- a/sed/core/processor.py +++ b/sed/core/processor.py @@ -1204,33 +1204,36 @@ def add_jitter(self, cols: Sequence[str] = None): metadata.append(col) self._attributes.add(metadata, "jittering", duplicate_policy="append") - def hextof_step_to_ns( + def dld_time_to_ns( self, - time_step_size: float = None, - tof_step_column: str = None, + tof_ns_column: str = None, + tof_binwidth: float = None, tof_column: str = None, + tof_binning: int = None, ): """Convert time-of-flight channel steps to nanoseconds. - Intended for use with HEXTOF endstation - Args: - time_step_size (float, optional): Time step size in nanoseconds. - Defaults to config["dataframe"]["time_step_size"]. - tof_step_column (str, optional): Name of the column containing the - time-of-flight steps. Defaults to config["dataframe"]["tof_step_column"]. + tof_binwidth (float, optional): Time step size in nanoseconds. + Defaults to config["dataframe"]["tof_binwidth"]. + tof_column (str, optional): Name of the column containing the + time-of-flight steps. Defaults to config["dataframe"]["tof_column"]. tof_column (str, optional): Name of the column containing the time-of-flight. Defaults to config["dataframe"]["tof_column"]. + tof_binning (int, optional): Binning of the time-of-flight steps. + """ if self._dataframe is not None: print("Adding energy column to dataframe:") # TODO assert order of execution through metadata - self._dataframe, metadata = hextof.convert_8s_time_to_ns( + self._dataframe, metadata = hextof.dld_time_to_ns( df=self._dataframe, - time_step_size=time_step_size or self._config["dataframe"]["time_step_size"], - tof_step_column=tof_step_column or self._config["dataframe"]["tof_step_column"], - tof_column=tof_column or self._config["dataframe"]["tof_column"], + tof_ns_column=tof_ns_column, + tof_binwidth=tof_binwidth, + tof_column=tof_column, + tof_binning=tof_binning, + config=self._config, ) self._attributes.add( metadata, @@ -1238,9 +1241,11 @@ def hextof_step_to_ns( duplicate_policy="merge", ) - def hextof_align_8s_sectors( + def align_dld_sectors( self, sector_delays: Sequence[float] = None, + sector_id_column: str = None, + tof_column: str = None, ): """ Align the 8s sectors of the HEXTOF endstation. @@ -1253,19 +1258,16 @@ def hextof_align_8s_sectors( if self._dataframe is not None: print("Aligning 8s sectors of dataframe") # TODO assert order of execution through metadata - if sector_delays is None: - sector_delays = self._config["dataframe"].get("sector_delays", [0.0] * 8) - if len(sector_delays) != 8: - raise ValueError("sector_delays must be a list of 8 floats") - if all(delay == 0.0 for delay in sector_delays): - print("All sector delays are 0, skipping alignment") - self._dataframe, metadata = hextof.align_8s_sectors( + self._dataframe, metadata = hextof.align_dld_sectors( df=self._dataframe, sector_delays=sector_delays, + sector_id_column=sector_id_column, + tof_column=tof_column, + config=self._config, ) self._attributes.add( metadata, - "energy_calibration", + "sector_alignment", duplicate_policy="merge", ) diff --git a/sed/loader/flash/loader.py b/sed/loader/flash/loader.py index 47d523be..f8e3cc0b 100644 --- a/sed/loader/flash/loader.py +++ b/sed/loader/flash/loader.py @@ -593,9 +593,10 @@ def create_dataframe_per_file( with h5py.File(file_path, "r") as h5_file: self.reset_multi_index() # Reset MultiIndexes for next file df = self.concatenate_channels(h5_file) - df = df.dropna(subset=['dldTimeAndSector']) + df = df.dropna(subset=self._config['dataframe'].get('tof_column', 'dldTimeSteps')) # correct the 3 bit shift which encodes the detector ID in the 8s time - df = unravel_8s_detector_time_channel(df) + if self._config['dataframe'].get('unravel_8s_detector_time_channel', False): + df = unravel_8s_detector_time_channel(df, config=self._config) return df def create_buffer_file(self, h5_path: Path, parquet_path: Path) -> Union[bool, Exception]: diff --git a/tests/data/loader/flash/config.yaml b/tests/data/loader/flash/config.yaml index 0a9e9277..fa5d38f6 100644 --- a/tests/data/loader/flash/config.yaml +++ b/tests/data/loader/flash/config.yaml @@ -19,30 +19,64 @@ core: # year: 20xx dataframe: - # The offset correction to the pulseId - ubid_offset: 5 - # The name of the DAQ system to use. Necessary to resolve the filenames/paths. daq: fl1user3 + # The offset correction to the pulseId + ubid_offset: 5 + # the number of iterations to fill the pulseId forward. + forward_fill_iterations: 2 + # if true, removes the 3 bits reserved for dldSectorID from the dldTimeandSector column unravel_8s_detector_time_channel: True - # the tof step size is now 8 times larger as we remove the 3 bits of the sectorID - time_step_size: 0.16460905596613884 # 0.020576131995767355 - raw_time_column: dldTimeAndSector - time_step_column: dldTimeSteps - tof_step_column: dldTimeSteps + + # dataframe column containing x coordinates + x_column: dldPosX + # dataframe column containing corrected x coordinates + corrected_x_column: "X" + # dataframe column containing kx coordinates + kx_column: "kx" + # dataframe column containing y coordinates + + y_column: dldPosY + # dataframe column containing corrected y coordinates + corrected_y_column: "Y" + # dataframe column containing kx coordinates + ky_column: "ky" + # dataframe column containing time-of-flight data + + tof_column: dldTimeSteps + # dataframe column containing time-of-flight data in ns + tof_ns_column: dldTime + # dataframe column containing corrected time-of-flight data + corrected_tof_column: "tm" + + # time length of a base time-of-flight bin in ns + tof_binwidth: 0.020576131995767355 # 0.16460905596613884 + # binning parameter for time-of-flight data. 2**tof_binning bins per base bin + tof_binning: 3 # power of 2, 4 means 8 bins per step + # dataframe column containing sector ID. obtained from dldTimeSteps column sector_id_column: dldSectorID + sector_delays: [0., 0., 0., 0., 0., 0., 0., 0.] - tof_column: dldTime + jitter_cols: ["dldPosX", "dldPosY", "dldTimeSteps"] - # The channels to load. - # channels have the following structure: - # channelAlias: - # format: per_pulse/per_electron/per_train - # group_name: the hdf5 group path - # slice: if the group contains multidim data, where to slice - + units: + dldPosX: 'step' + dldPosY: 'step' + dldTimeSteps: 'step' + tof_voltage: 'V' + extractorVoltage: 'V' + extractorCurrent: 'A' + cryoTemperature: 'K' + sampleTemperature: 'K' + dldTime: 'ns' + # delay: 'ps' + timeStamp: 's' + # energy: 'eV' + # E: 'eV' + kx: '1/A' + ky: '1/A' channels: # pulse ID is a necessary channel for using the loader. pulseId: @@ -60,7 +94,7 @@ dataframe: group_name: "/uncategorised/FLASH.EXP/HEXTOF.DAQ/DLD1/" slice: 0 - dldTimeAndSector: + dldTimeSteps: format: per_electron group_name: "/uncategorised/FLASH.EXP/HEXTOF.DAQ/DLD1/" slice: 3