From 328d4a43548efe18a61f92086aecd40f627d9bb4 Mon Sep 17 00:00:00 2001 From: Zain Sohail Date: Sat, 4 Nov 2023 20:26:23 +0100 Subject: [PATCH] load metadata and schema after creating all buffer files --- sed/loader/flash/loader.py | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/sed/loader/flash/loader.py b/sed/loader/flash/loader.py index a65b511a..0ab3574a 100644 --- a/sed/loader/flash/loader.py +++ b/sed/loader/flash/loader.py @@ -656,22 +656,20 @@ def buffer_file_handler(self, data_parquet_dir: Path, detector: str, force_recre if len(h5_filenames) == 0: raise ValueError("No data available. Probably failed reading all h5 files") - # read parquet metadata and schema - metadatas = [pq.read_metadata(file) for file in existing_parquet_filenames] - schemas = [pq.read_schema(file) for file in existing_parquet_filenames] - - # check if available_channels are same as schema - available_channels_set = set(self.available_channels) - - for i, schema in enumerate(schemas): - schema_set = set(schema.names) - # Check if available_channels are the same as schema including pulseId - if not force_recreate and schema_set != available_channels_set.union({"pulseId"}): - raise ValueError( - "The available channels do not match the schema of file " - f"{existing_parquet_filenames[i]}" - "Please check the configuration file or set force_recreate to True.", - ) + if not force_recreate: + # Check if the available channels match the schema of the existing parquet files + schemas = [pq.read_schema(file) for file in existing_parquet_filenames] + available_channels_set = set(self.available_channels) + + for i, schema in enumerate(schemas): + schema_set = set(schema.names) + # Check if available_channels are the same as schema including pulseId + if schema_set != available_channels_set.union({"pulseId"}): + raise ValueError( + "The available channels do not match the schema of file " + f"{existing_parquet_filenames[i]}" + "Please check the configuration file or set force_recreate to True.", + ) # Choose files to read files_to_read = [ @@ -703,7 +701,11 @@ def buffer_file_handler(self, data_parquet_dir: Path, detector: str, force_recre print("All files converted successfully!") - return parquet_filenames, metadatas, schemas + # read all parquet metadata and schema + metadata = [pq.read_metadata(file) for file in parquet_filenames] + schema = [pq.read_schema(file) for file in parquet_filenames] + + return parquet_filenames, metadata, schema def parquet_handler( self,