Skip to content

Commit

Permalink
Merge pull request #162 from ImperialCollegeLondon/fix-multi-file-rea…
Browse files Browse the repository at this point in the history
…ding

Fix multi file reading where columns are missing in some files
  • Loading branch information
tomjholland authored Nov 18, 2024
2 parents 82da37c + 42933e0 commit eadbd32
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 6 deletions.
9 changes: 3 additions & 6 deletions pyprobe/cyclers/basecycler.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,16 +179,13 @@ def _get_dataframe_list(self) -> list[pl.DataFrame | pl.LazyFrame]:
files.sort()
list = [self.read_file(file) for file in files]
all_columns = set([col for df in list for col in df.collect_schema().names()])
indices_to_remove = []
for i in range(len(list)):
if len(list[i].collect_schema().names()) < len(all_columns):
indices_to_remove.append(i)
warnings.warn(
f"File {os.path.basename(files[i])} has missing columns, "
"it has not been read."
"these have been filled with null values."
)
continue
return [df for i, df in enumerate(list) if i not in indices_to_remove]
return list

def get_imported_dataframe(
self, dataframe_list: List[pl.DataFrame]
Expand All @@ -201,7 +198,7 @@ def get_imported_dataframe(
Returns:
DataFrame: A single DataFrame.
"""
return pl.concat(dataframe_list, how="vertical", rechunk=True)
return pl.concat(dataframe_list, how="diagonal", rechunk=True)

@staticmethod
def _match_unit(column_name: str, pattern: str) -> Optional[str]:
Expand Down
30 changes: 30 additions & 0 deletions tests/cyclers/test_basecycler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import re

import numpy as np
import polars as pl
import polars.testing as pl_testing
import pytest
Expand Down Expand Up @@ -355,3 +356,32 @@ def test_ch_dis_capacity(sample_dataframe, sample_pyprobe_dataframe, column_dict
base_cycler.pyprobe_dataframe.collect(), sample_pyprobe_dataframe
)
os.remove("tests/sample_data/test_data.csv")


def test_with_missing_columns(sample_dataframe):
"""Test with a dataframe missing columns."""
sample_dataframe.write_csv("tests/sample_data/test_data.csv")
df = copy.deepcopy(sample_dataframe)
df = df.drop("I [mA]")
df.write_csv("tests/sample_data/test_data1.csv")
base_cycler = BaseCycler(
input_data_path="tests/sample_data/test_data*.csv",
column_dict={
"DateTime": "Date",
"T [*]": "Time [*]",
"V [*]": "Voltage [*]",
"I [*]": "Current [*]",
"Q [*]": "Capacity [*]",
"Count": "Step",
"Temp [*]": "Temperature [*]",
"Q_ch [*]": "Charge Capacity [*]",
"Q_dis [*]": "Discharge Capacity [*]",
},
)
assert np.all(
np.isnan(
base_cycler.pyprobe_dataframe.collect().select("Current [A]").to_numpy()[3:]
)
)
os.remove("tests/sample_data/test_data.csv")
os.remove("tests/sample_data/test_data1.csv")

0 comments on commit eadbd32

Please sign in to comment.