Skip to content

Commit

Permalink
fixes from suggestions and linting
Browse files Browse the repository at this point in the history
  • Loading branch information
steinnymir committed Oct 11, 2023
1 parent 9cd0d67 commit 9dbb379
Show file tree
Hide file tree
Showing 5 changed files with 178 additions and 89 deletions.
93 changes: 52 additions & 41 deletions sed/calibrator/hextof.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,8 @@

def unravel_8s_detector_time_channel(
df: dask.dataframe.DataFrame,
time_sector_column: str = "dldTimeAndSector",
tof_step_column: str = "dldTimeSteps",
sector_id_column: str = "dldSectorID",
tof_column: str = None,
sector_id_column: str = None,
config: dict = None,
) -> dask.dataframe.DataFrame:
"""Converts the 8s time in steps to time in steps and sectorID.
Expand All @@ -26,32 +25,28 @@ def unravel_8s_detector_time_channel(
sector_delays (Sequece[float], optional): Sector delays for the 8s time.
Defaults to config["dataframe"]["sector_delays"].
"""
df = df.dropna(subset=[time_sector_column])
if time_sector_column is None:
if config is None:
raise ValueError("Either time_sector_column or config must be given.")
time_sector_column = config["dataframe"]["time_sector_column"]
if time_sector_column not in df.columns:
raise ValueError(f"Column {time_sector_column} not in dataframe.")
if tof_step_column is None:
if tof_column is None:
if config is None:
raise ValueError("Either tof_step_column or config must be given.")
tof_step_column = config["dataframe"]["tof_step_column"]
raise ValueError("Either tof_column or config must be given.")
tof_column = config["dataframe"]["tof_column"]
if sector_id_column is None:
if config is None:
raise ValueError("Either sector_id_column or config must be given.")
sector_id_column = config["dataframe"]["sector_id_column"]

df[sector_id_column] = (df[time_sector_column] % 8).astype(np.int8)
df[tof_step_column] = (df[time_sector_column] // 8).astype(np.int32)
if sector_id_column in df.columns:
raise ValueError(f"Column {sector_id_column} already in dataframe. "
"This function is not idempotent.")
df[sector_id_column] = (df[tof_column] % 8).astype(np.int8)
df[tof_column] = (df[tof_column] // 8).astype(np.int32)
return df


def align_8s_sectors(
def align_dld_sectors(
df: dask.dataframe.DataFrame,
sector_delays: Sequence[float] = None,
sector_id_column: str = "dldSectorID",
tof_step_column: str = "dldTimeSteps",
sector_id_column: str = None,
tof_column: str = None,
config: dict = None,
) -> Tuple[Union[pd.DataFrame, dask.dataframe.DataFrame], dict]:
"""Aligns the 8s sectors to the first sector.
Expand All @@ -65,13 +60,22 @@ def align_8s_sectors(
if config is None:
raise ValueError("Either sector_delays or config must be given.")
sector_delays = config["dataframe"]["sector_delays"]
if sector_id_column is None:
if config is None:
raise ValueError("Either sector_id_column or config must be given.")
sector_id_column = config["dataframe"]["sector_id_column"]
if tof_column is None:
if config is None:
raise ValueError("Either tof_column or config must be given.")
tof_column = config["dataframe"]["tof_column"]
# align the 8s sectors
sector_delays_arr = dask.array.from_array(sector_delays)

def align_sector(x):
return x[tof_step_column] - sector_delays_arr[x[sector_id_column].values.astype(int)]
df[tof_step_column] = df.map_partitions(
align_sector, meta=(tof_step_column, np.float64)
val = x[tof_column] - sector_delays_arr[x[sector_id_column].values.astype(int)]
return val.astype(np.float32)
df[tof_column] = df.map_partitions(
align_sector, meta=(tof_column, np.float32)
)

metadata = {}
Expand All @@ -81,43 +85,50 @@ def align_sector(x):
return df, metadata


def convert_8s_time_to_ns(
def dld_time_to_ns(
df: Union[pd.DataFrame, dask.dataframe.DataFrame],
time_step_size: float = None,
tof_step_column: str = "dldTimeSteps",
tof_column: str = "dldTime",
tof_ns_column: str = None,
tof_binwidth: float = None,
tof_column: str = None,
tof_binning: int = None,
config: dict = None,
) -> Tuple[Union[pd.DataFrame, dask.dataframe.DataFrame], dict]:
"""Converts the 8s time in steps to time in ns.
Args:
time_step_size (float, optional): Time step size in nanoseconds.
Defaults to config["dataframe"]["time_step_size"].
tof_step_column (str, optional): Name of the column containing the
time-of-flight steps. Defaults to config["dataframe"]["tof_step_column"].
tof_binwidth (float, optional): Time step size in nanoseconds.
Defaults to config["dataframe"]["tof_binwidth"].
tof_column (str, optional): Name of the column containing the
time-of-flight steps. Defaults to config["dataframe"]["tof_column"].
tof_column (str, optional): Name of the column containing the
time-of-flight. Defaults to config["dataframe"]["tof_column"].
tof_binning (int, optional): Binning of the time-of-flight steps.
"""
if time_step_size is None:
if config is None:
raise ValueError("Either time_step_size or config must be given.")
time_step_size: float = config["dataframe"]["time_step_size"]
if tof_step_column is None:
if tof_binwidth is None:
if config is None:
raise ValueError("Either tof_step_column or config must be given.")
tof_step_column: str = config["dataframe"]["tof_step_column"]
raise ValueError("Either tof_binwidth or config must be given.")
tof_binwidth: float = config["dataframe"]["tof_binwidth"]
if tof_column is None:
if config is None:
raise ValueError("Either tof_time_column or config must be given.")
raise ValueError("Either tof_column or config must be given.")
tof_column: str = config["dataframe"]["tof_column"]
if tof_binning is None:
if config is None:
raise ValueError("Either tof_binning or config must be given.")
tof_binning: int = config["dataframe"]["tof_binning"]
if tof_ns_column is None:
if config is None:
raise ValueError("Either tof_ns_column or config must be given.")
tof_ns_column: str = config["dataframe"]["tof_ns_column"]

def convert_to_ns(x):
return x[tof_step_column] * time_step_size
df[tof_column] = df.map_partitions(
convert_to_ns, meta=(tof_column, np.float64)
val = x[tof_column] * tof_binwidth * 2**tof_binning
return val.astype(np.float32)
df[tof_ns_column] = df.map_partitions(
convert_to_ns, meta=(tof_column, np.float32)
)
metadata = {}
metadata["applied"] = True
metadata["time_step_size"] = time_step_size
metadata["tof_binwidth"] = tof_binwidth

return df, metadata
55 changes: 48 additions & 7 deletions sed/config/flash_example_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,60 @@ dataframe:
daq: fl1user3
# The offset correction to the pulseId
ubid_offset: 5

# the number of iterations to fill the pulseId forward.
forward_fill_iterations: 2
# if true, removes the 3 bits reserved for dldSectorID from the dldTimeandSector column
# if true, removes the 3 bits reserved for dldSectorID from the dldTimeSteps column
unravel_8s_detector_time_channel: True
time_step_size: 0.16460905596613884 # 0.020576131995767355
raw_time_column: dldTimeAndSector
time_step_column: dldTimeSteps
tof_step_column: dldTimeSteps

# dataframe column containing x coordinates
x_column: dldPosX
# dataframe column containing corrected x coordinates
corrected_x_column: "X"
# dataframe column containing kx coordinates
kx_column: "kx"
# dataframe column containing y coordinates

y_column: dldPosY
# dataframe column containing corrected y coordinates
corrected_y_column: "Y"
# dataframe column containing kx coordinates
ky_column: "ky"
# dataframe column containing time-of-flight data

tof_column: dldTimeSteps
# dataframe column containing time-of-flight data in ns
tof_ns_column: dldTime
# dataframe column containing corrected time-of-flight data
corrected_tof_column: "tm"

# time length of a base time-of-flight bin in ns
tof_binwidth: 0.020576131995767355 # 0.16460905596613884
# binning parameter for time-of-flight data. 2**tof_binning bins per base bin
tof_binning: 3 # power of 2, 4 means 8 bins per step
# dataframe column containing sector ID. obtained from dldTimeSteps column
sector_id_column: dldSectorID

sector_delays: [0., 0., 0., 0., 0., 0., 0., 0.]
tof_column: dldTime

jitter_cols: ["dldPosX", "dldPosY", "dldTimeSteps"]

units:
dldPosX: 'step'
dldPosY: 'step'
dldTimeSteps: 'step'
tof_voltage: 'V'
extractorVoltage: 'V'
extractorCurrent: 'A'
cryoTemperature: 'K'
sampleTemperature: 'K'
dldTime: 'ns'
# delay: 'ps'
timeStamp: 's'
# energy: 'eV'
# E: 'eV'
kx: '1/A'
ky: '1/A'

# The channels to load.
# channels have the following structure:
Expand All @@ -62,7 +103,7 @@ dataframe:
slice: 0
# This channel will actually create dldTimeSteps and dldSectorID,
# if unravel_8s_detector_time_channel is set to True
dldTimeAndSector:
dldTimeSteps:
format: per_electron
group_name: "/uncategorised/FLASH.EXP/HEXTOF.DAQ/DLD1/"
slice: 3
Expand Down
46 changes: 24 additions & 22 deletions sed/core/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1204,43 +1204,48 @@ def add_jitter(self, cols: Sequence[str] = None):
metadata.append(col)
self._attributes.add(metadata, "jittering", duplicate_policy="append")

def hextof_step_to_ns(
def dld_time_to_ns(
self,
time_step_size: float = None,
tof_step_column: str = None,
tof_ns_column: str = None,
tof_binwidth: float = None,
tof_column: str = None,
tof_binning: int = None,
):
"""Convert time-of-flight channel steps to nanoseconds.
Intended for use with HEXTOF endstation
Args:
time_step_size (float, optional): Time step size in nanoseconds.
Defaults to config["dataframe"]["time_step_size"].
tof_step_column (str, optional): Name of the column containing the
time-of-flight steps. Defaults to config["dataframe"]["tof_step_column"].
tof_binwidth (float, optional): Time step size in nanoseconds.
Defaults to config["dataframe"]["tof_binwidth"].
tof_column (str, optional): Name of the column containing the
time-of-flight steps. Defaults to config["dataframe"]["tof_column"].
tof_column (str, optional): Name of the column containing the
time-of-flight. Defaults to config["dataframe"]["tof_column"].
tof_binning (int, optional): Binning of the time-of-flight steps.
"""
if self._dataframe is not None:
print("Adding energy column to dataframe:")
# TODO assert order of execution through metadata

self._dataframe, metadata = hextof.convert_8s_time_to_ns(
self._dataframe, metadata = hextof.dld_time_to_ns(
df=self._dataframe,
time_step_size=time_step_size or self._config["dataframe"]["time_step_size"],
tof_step_column=tof_step_column or self._config["dataframe"]["tof_step_column"],
tof_column=tof_column or self._config["dataframe"]["tof_column"],
tof_ns_column=tof_ns_column,
tof_binwidth=tof_binwidth,
tof_column=tof_column,
tof_binning=tof_binning,
config=self._config,
)
self._attributes.add(
metadata,
"energy_calibration",
duplicate_policy="merge",
)

def hextof_align_8s_sectors(
def align_dld_sectors(
self,
sector_delays: Sequence[float] = None,
sector_id_column: str = None,
tof_column: str = None,
):
""" Align the 8s sectors of the HEXTOF endstation.
Expand All @@ -1253,19 +1258,16 @@ def hextof_align_8s_sectors(
if self._dataframe is not None:
print("Aligning 8s sectors of dataframe")
# TODO assert order of execution through metadata
if sector_delays is None:
sector_delays = self._config["dataframe"].get("sector_delays", [0.0] * 8)
if len(sector_delays) != 8:
raise ValueError("sector_delays must be a list of 8 floats")
if all(delay == 0.0 for delay in sector_delays):
print("All sector delays are 0, skipping alignment")
self._dataframe, metadata = hextof.align_8s_sectors(
self._dataframe, metadata = hextof.align_dld_sectors(
df=self._dataframe,
sector_delays=sector_delays,
sector_id_column=sector_id_column,
tof_column=tof_column,
config=self._config,
)
self._attributes.add(
metadata,
"energy_calibration",
"sector_alignment",
duplicate_policy="merge",
)

Expand Down
5 changes: 3 additions & 2 deletions sed/loader/flash/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,9 +593,10 @@ def create_dataframe_per_file(
with h5py.File(file_path, "r") as h5_file:
self.reset_multi_index() # Reset MultiIndexes for next file
df = self.concatenate_channels(h5_file)
df = df.dropna(subset=['dldTimeAndSector'])
df = df.dropna(subset=self._config['dataframe'].get('tof_column', 'dldTimeSteps'))
# correct the 3 bit shift which encodes the detector ID in the 8s time
df = unravel_8s_detector_time_channel(df)
if self._config['dataframe'].get('unravel_8s_detector_time_channel', False):
df = unravel_8s_detector_time_channel(df, config=self._config)
return df

def create_buffer_file(self, h5_path: Path, parquet_path: Path) -> Union[bool, Exception]:
Expand Down
Loading

0 comments on commit 9dbb379

Please sign in to comment.