Skip to content

Commit

Permalink
Merge pull request #116 from OpenCOMPES/histograms_from_timed_dataframe
Browse files Browse the repository at this point in the history
Histograms from timed dataframe
  • Loading branch information
rettigl authored Nov 5, 2023
2 parents 35bd293 + 3dc7c5f commit bb717a7
Show file tree
Hide file tree
Showing 18 changed files with 853 additions and 113 deletions.
71 changes: 71 additions & 0 deletions sed/binning/binning.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,77 @@ def bin_dataframe(
return data_array


def normalization_histogram_from_timestamps(
df: dask.dataframe.DataFrame,
axis: str,
bin_centers: np.ndarray,
time_stamp_column: str,
) -> xr.DataArray:
"""Get a normalization histogram from the time stamps column in the dataframe.
Args:
df (dask.dataframe.DataFrame): a dask.DataFrame on which to perform the
histogram.
axis (str): The axis (dataframe column) on which to calculate the normalization
histogram.
bin_centers (np.ndarray): Bin centers used for binning of the axis.
time_stamp_column (str): Dataframe column containing the time stamps.
Returns:
xr.DataArray: Calculated normalization histogram.
"""
time_per_electron = df[time_stamp_column].diff()

bins = df[axis].map_partitions(
pd.cut,
bins=bin_centers_to_bin_edges(bin_centers),
)

histogram = time_per_electron.groupby([bins]).sum().compute().values

data_array = xr.DataArray(
data=histogram,
coords={axis: bin_centers},
)

return data_array


def normalization_histogram_from_timed_dataframe(
df: dask.dataframe.DataFrame,
axis: str,
bin_centers: np.ndarray,
time_unit: float,
) -> xr.DataArray:
"""Get a normalization histogram from a timed datafram.
Args:
df (dask.dataframe.DataFrame): a dask.DataFrame on which to perform the
histogram. Entries should be based on an equal time unit.
axis (str): The axis (dataframe column) on which to calculate the normalization
histogram.
bin_centers (np.ndarray): Bin centers used for binning of the axis.
time_unit (float): Time unit the data frame entries are based on.
Returns:
xr.DataArray: Calculated normalization histogram.
"""
bins = df[axis].map_partitions(
pd.cut,
bins=bin_centers_to_bin_edges(bin_centers),
)

histogram = df[axis].groupby([bins]).count().compute().values * time_unit
# histogram = bin_dataframe(df, axes=[axis], bins=[bin_centers]) * time_unit

data_array = xr.DataArray(
data=histogram,
coords={axis: bin_centers},
)

return data_array


def apply_jitter_on_column(
df: Union[dask.dataframe.core.DataFrame, pd.DataFrame],
amp: float,
Expand Down
8 changes: 5 additions & 3 deletions sed/calibrator/energy.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def bin_data(
"Either Bias Values or a valid bias_key has to be present!",
) from exc

dataframe, _ = self.loader.read_dataframe(
dataframe, _, _ = self.loader.read_dataframe(
files=data_files,
collect_metadata=False,
)
Expand Down Expand Up @@ -1442,8 +1442,10 @@ def align_dld_sectors(
dask.dataframe.DataFrame: Dataframe with the new columns.
dict: Metadata dictionary.
"""
sector_delays = sector_delays or self.sector_delays
sector_id_column = sector_id_column or self.sector_id_column
if sector_delays is None:
sector_delays = self.sector_delays
if sector_id_column is None:
sector_id_column = self.sector_id_column

if sector_delays is None or sector_id_column is None:
raise ValueError(
Expand Down
2 changes: 2 additions & 0 deletions sed/config/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ dataframe:
jitter_cols: ["@x_column", "@y_column", "@tof_column"]
# Jitter amplitude or list of jitter amplitudes. Should equal half the digitial step size of each jitter_column
jitter_amps: 0.5
# Time stepping in seconds of the succesive events in the timed dataframe
timed_dataframe_unit_time: 0.001

energy:
# Number of bins to use for energy calibration traces
Expand Down
2 changes: 2 additions & 0 deletions sed/config/mpes_example_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ dataframe:
ms_markers_group: "msMarkers"
# hdf5 attribute containing the timestamp of the first event in a file
first_event_time_stamp_key: "FirstEventTimeStamp"
# Time stepping in seconds of the succesive events in the timed dataframe
timed_dataframe_unit_time: 0.001
# list of columns to apply jitter to
jitter_cols: ["X", "Y", "t", "ADC"]
# dataframe column containing x coordinates
Expand Down
Loading

0 comments on commit bb717a7

Please sign in to comment.