Skip to content

Commit

Permalink
add dataframe statistics in repr
Browse files Browse the repository at this point in the history
  • Loading branch information
rettigl committed Nov 29, 2024
1 parent 8f4b6bf commit 835f084
Showing 1 changed file with 77 additions and 2 deletions.
79 changes: 77 additions & 2 deletions src/sed/core/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import pandas as pd
import psutil
import xarray as xr
from dask.diagnostics import ProgressBar

from sed.binning import bin_dataframe
from sed.binning.binning import normalization_histogram_from_timed_dataframe
Expand Down Expand Up @@ -190,11 +191,36 @@ def __init__(
)

def __repr__(self):
info = self.get_run_info()
if self._dataframe is None:
df_str = "Dataframe: No Data loaded"
else:
df_str = self._dataframe.__repr__()
pretty_str = df_str + "\n" + "Metadata: " + "\n" + self._attributes.__repr__()
df_str = f"Data Frame: {len(info['dataframe']['columns'])} columns.\n"
if info["dataframe"]["num_electrons"] is not None:
df_str += f"{' '*11} {info['dataframe']['num_electrons']:,.0f} electrons.\n"
if info["dataframe"]["num_trains"] is not None:
df_str += f"{' '*11} {info['dataframe']['num_trains']:,.0f} trains.\n"
if (
info["dataframe"]["num_electrons"] is not None
and info["dataframe"]["num_trains"] is not None
):
df_str += (
f"{' '*11} {info['dataframe']['electrons_per_train']:,.1f} electrons/train.\n"
)
if "num_pulses" in info["dataframe"]:
df_str += f"{' '*11} {info['dataframe']['num_pulses']:,.0f} pulses.\n"
df_str += (
f"{' '*11} {info['dataframe']['electrons_per_pulse']} " "electrons/pulse.\n"
)
df_str += f"{' '*11} {info['dataframe']['timestamp_duration']:,.0f} seconds.\n"
df_str += f"{' '*11} {info['dataframe']['duration']}.\n"
df_str += (
f"{' '*11} {info['dataframe']['start_time']} to {info['dataframe']['end_time']}.\n"
)

# df_str = self._dataframe.__repr__()
# attributes_str = f"Metadata: {self._attributes.metadata}"
pretty_str = df_str # + "\n" + attributes_str
return pretty_str

def _repr_html_(self):
Expand All @@ -217,6 +243,55 @@ def _repr_html_(self):

return html

def get_run_info(self, compute=False) -> dict:
"""Function to return a dict of information about the loaded data.
TODO: add dtypes from dataframe. add columns per pulse/per electron/per train
Returns:
dict: Dictionary with information about the loaded data.
"""
info: dict[str, Any] = {}
head = self.dataframe.head(1)
tail = self.dataframe.tail(1)
info["dataframe"] = {}
info["dataframe"]["columns"] = self.dataframe.columns
if hasattr(self.loader, "num_electrons"):
n_el: int = self.loader.num_electrons
else:
n_el = None
if n_el is None and compute:
with ProgressBar():
print("computing number of electrons")
n_el = len(self.dataframe)
info["dataframe"]["num_electrons"] = n_el
if hasattr(self.loader, "num_pulses"):
n_pulses: int = self.loader.num_pulses
else:
n_pulses = None
if n_pulses is None and compute:
with ProgressBar():
print("computing number of pulses")
n_pulses = len(self.dataframe[self.dataframe["electronId"] == 0])
train_range: tuple = int(head["trainId"]), int(tail["trainId"])
n_trains = train_range[1] - train_range[0]
info["dataframe"]["trainId_min"] = train_range[0]
info["dataframe"]["trainId_max"] = train_range[1]
info["dataframe"]["num_trains"] = n_trains
if n_el is not None and n_pulses is not None:
info["dataframe"]["electrons_per_pulse"] = n_el / n_pulses
if n_el is not None and n_trains is not None:
info["dataframe"]["electrons_per_train"] = n_el / n_trains
tsr = float(head["timeStamp"]), float(tail["timeStamp"])
info["dataframe"]["timestamp_min"] = tsr[0]
info["dataframe"]["timestamp_max"] = tsr[1]
info["dataframe"]["timestamp_duration"] = tsr[1] - tsr[0]
info["dataframe"]["start_time"] = pd.to_datetime(tsr[0], unit="s")
info["dataframe"]["end_time"] = pd.to_datetime(tsr[1], unit="s")
info["dataframe"]["duration"] = pd.to_timedelta(tsr[1] - tsr[0], unit="s")

info["metadata"] = self._attributes.metadata
info["config"] = self._config
return info

## Suggestion:
# @property
# def overview_panel(self):
Expand Down

0 comments on commit 835f084

Please sign in to comment.