Skip to content

Commit

Permalink
add progress bar to flash parquet forward filling
Browse files Browse the repository at this point in the history
  • Loading branch information
steinnymir committed Oct 6, 2023
1 parent 4527ec1 commit 1742074
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions sed/loader/flash/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@
from sed.loader.flash.metadata import MetadataRetriever
from sed.loader.utils import parse_h5_keys

from tqdm.auto import trange, tqdm
import time


class FlashLoader(BaseLoader):
"""
Expand Down Expand Up @@ -701,13 +704,14 @@ def fill_na(
channels: List[str] = self.get_channels_by_format(["per_pulse", "per_train"])

# Fill NaN values within each dataframe
t0 = time.time()
for i, _ in enumerate(dataframes):
dataframes[i][channels] = dataframes[i][channels].fillna(
method="ffill",
)

# Forward fill between consecutive dataframes
for i in range(1, len(dataframes)):
for i in tqdm(range(1, len(dataframes)), desc='Filling NaNs', leave=True, total=len(dataframes)-1):
# Select pulse channels from current dataframe
subset = dataframes[i][channels]
# Find columns with NaN values in the first row
Expand All @@ -726,7 +730,6 @@ def fill_na(
dataframes[i][channels_to_overwrite] = subset[channels_to_overwrite].fillna(
fill_dict,
)

# Concatenate the filled dataframes
return dd.concat(dataframes)

Expand Down

0 comments on commit 1742074

Please sign in to comment.