diff --git a/pyprobe/cyclers/basecycler.py b/pyprobe/cyclers/basecycler.py index 5376b5d6..ca0354f8 100644 --- a/pyprobe/cyclers/basecycler.py +++ b/pyprobe/cyclers/basecycler.py @@ -179,16 +179,13 @@ def _get_dataframe_list(self) -> list[pl.DataFrame | pl.LazyFrame]: files.sort() list = [self.read_file(file) for file in files] all_columns = set([col for df in list for col in df.collect_schema().names()]) - indices_to_remove = [] for i in range(len(list)): if len(list[i].collect_schema().names()) < len(all_columns): - indices_to_remove.append(i) warnings.warn( f"File {os.path.basename(files[i])} has missing columns, " - "it has not been read." + "these have been filled with null values." ) - continue - return [df for i, df in enumerate(list) if i not in indices_to_remove] + return list def get_imported_dataframe( self, dataframe_list: List[pl.DataFrame] diff --git a/pyprobe/cyclers/biologic.py b/pyprobe/cyclers/biologic.py index b75234fd..19d667e5 100644 --- a/pyprobe/cyclers/biologic.py +++ b/pyprobe/cyclers/biologic.py @@ -1,7 +1,6 @@ """A module to load and process Biologic battery cycler data.""" -import glob from datetime import datetime from typing import List @@ -116,17 +115,3 @@ def apply_step_correction( return df_with_max_step.with_columns( pl.col("Ns").cast(pl.Int64) + pl.col("Max_Step") ) - - def _get_dataframe_list(self) -> list[pl.DataFrame | pl.LazyFrame]: - """Return a list of all the imported dataframes. - - Args: - input_data_path (str): The path to the input data. - - Returns: - List[DataFrame]: A list of DataFrames. - """ - files = glob.glob(self.input_data_path) - files.sort() - list = [self.read_file(file) for file in files] - return list diff --git a/tests/cyclers/test_basecycler.py b/tests/cyclers/test_basecycler.py index af083556..29bd08dc 100644 --- a/tests/cyclers/test_basecycler.py +++ b/tests/cyclers/test_basecycler.py @@ -3,6 +3,7 @@ import os import re +import numpy as np import polars as pl import polars.testing as pl_testing import pytest @@ -355,3 +356,32 @@ def test_ch_dis_capacity(sample_dataframe, sample_pyprobe_dataframe, column_dict base_cycler.pyprobe_dataframe.collect(), sample_pyprobe_dataframe ) os.remove("tests/sample_data/test_data.csv") + + +def test_with_missing_columns(sample_dataframe): + """Test with a dataframe missing columns.""" + sample_dataframe.write_csv("tests/sample_data/test_data.csv") + df = copy.deepcopy(sample_dataframe) + df = df.drop("I [mA]") + df.write_csv("tests/sample_data/test_data1.csv") + base_cycler = BaseCycler( + input_data_path="tests/sample_data/test_data*.csv", + column_dict={ + "DateTime": "Date", + "T [*]": "Time [*]", + "V [*]": "Voltage [*]", + "I [*]": "Current [*]", + "Q [*]": "Capacity [*]", + "Count": "Step", + "Temp [*]": "Temperature [*]", + "Q_ch [*]": "Charge Capacity [*]", + "Q_dis [*]": "Discharge Capacity [*]", + }, + ) + assert np.all( + np.isnan( + base_cycler.pyprobe_dataframe.collect().select("Current [A]").to_numpy()[3:] + ) + ) + os.remove("tests/sample_data/test_data.csv") + os.remove("tests/sample_data/test_data1.csv")