diff --git a/pyprobe/cyclers/basecycler.py b/pyprobe/cyclers/basecycler.py index 679118c4..ca0354f8 100644 --- a/pyprobe/cyclers/basecycler.py +++ b/pyprobe/cyclers/basecycler.py @@ -179,16 +179,13 @@ def _get_dataframe_list(self) -> list[pl.DataFrame | pl.LazyFrame]: files.sort() list = [self.read_file(file) for file in files] all_columns = set([col for df in list for col in df.collect_schema().names()]) - indices_to_remove = [] for i in range(len(list)): if len(list[i].collect_schema().names()) < len(all_columns): - indices_to_remove.append(i) warnings.warn( f"File {os.path.basename(files[i])} has missing columns, " - "it has not been read." + "these have been filled with null values." ) - continue - return [df for i, df in enumerate(list) if i not in indices_to_remove] + return list def get_imported_dataframe( self, dataframe_list: List[pl.DataFrame] @@ -201,7 +198,7 @@ def get_imported_dataframe( Returns: DataFrame: A single DataFrame. """ - return pl.concat(dataframe_list, how="vertical", rechunk=True) + return pl.concat(dataframe_list, how="diagonal", rechunk=True) @staticmethod def _match_unit(column_name: str, pattern: str) -> Optional[str]: diff --git a/tests/cyclers/test_basecycler.py b/tests/cyclers/test_basecycler.py index af083556..29bd08dc 100644 --- a/tests/cyclers/test_basecycler.py +++ b/tests/cyclers/test_basecycler.py @@ -3,6 +3,7 @@ import os import re +import numpy as np import polars as pl import polars.testing as pl_testing import pytest @@ -355,3 +356,32 @@ def test_ch_dis_capacity(sample_dataframe, sample_pyprobe_dataframe, column_dict base_cycler.pyprobe_dataframe.collect(), sample_pyprobe_dataframe ) os.remove("tests/sample_data/test_data.csv") + + +def test_with_missing_columns(sample_dataframe): + """Test with a dataframe missing columns.""" + sample_dataframe.write_csv("tests/sample_data/test_data.csv") + df = copy.deepcopy(sample_dataframe) + df = df.drop("I [mA]") + df.write_csv("tests/sample_data/test_data1.csv") + base_cycler = BaseCycler( + input_data_path="tests/sample_data/test_data*.csv", + column_dict={ + "DateTime": "Date", + "T [*]": "Time [*]", + "V [*]": "Voltage [*]", + "I [*]": "Current [*]", + "Q [*]": "Capacity [*]", + "Count": "Step", + "Temp [*]": "Temperature [*]", + "Q_ch [*]": "Charge Capacity [*]", + "Q_dis [*]": "Discharge Capacity [*]", + }, + ) + assert np.all( + np.isnan( + base_cycler.pyprobe_dataframe.collect().select("Current [A]").to_numpy()[3:] + ) + ) + os.remove("tests/sample_data/test_data.csv") + os.remove("tests/sample_data/test_data1.csv")