corrected map_overlap lenght to minimum part. size

OpenCOMPES · Oct 9, 2023 · 5b69796 · zain-sohail · Oct 9, 2023 · 5b69796
1 parent 74df4e5
commit 5b69796
Showing 1 changed file with 8 additions and 1 deletion.
diff --git a/sed/loader/flash/loader.py b/sed/loader/flash/loader.py
@@ -15,6 +15,7 @@
 from typing import Union
 
 import dask.dataframe as dd
+from dask.diagnostics import ProgressBar
 import h5py
 import numpy as np
 from joblib import delayed
@@ -742,8 +743,14 @@ def forward_fill_partition(df):
                 df[channels] = df[channels].ffill()
                 return df
 
+            # calculate the number of rows in each partition
+            with ProgressBar():
+                print("Computing dataframe shape...")
+                nrows = dataframe.map_partitions(len).compute()
+                max_part_size = min(nrows)
+
             # Use map_overlap to apply forward_fill_partition
-            dataframe = dataframe.map_overlap(forward_fill_partition, before=0, after=1)
+            dataframe = dataframe.map_overlap(forward_fill_partition, before=max_part_size+1, after=0)
 
             # Remove the NaNs from per_electron channels
             dataframe = dataframe.dropna(