Add test to check that missing columns are filled with null values

ImperialCollegeLondon · Nov 18, 2024 · 42933e0 · 42933e0
1 parent fad9398
commit 42933e0
Show file tree

Hide file tree

Showing 3 changed files with 32 additions and 20 deletions.
diff --git a/pyprobe/cyclers/basecycler.py b/pyprobe/cyclers/basecycler.py
@@ -179,16 +179,13 @@ def _get_dataframe_list(self) -> list[pl.DataFrame | pl.LazyFrame]:
         files.sort()
         list = [self.read_file(file) for file in files]
         all_columns = set([col for df in list for col in df.collect_schema().names()])
-        indices_to_remove = []
         for i in range(len(list)):
             if len(list[i].collect_schema().names()) < len(all_columns):
-                indices_to_remove.append(i)
                 warnings.warn(
                     f"File {os.path.basename(files[i])} has missing columns, "
-                    "it has not been read."
+                    "these have been filled with null values."
                 )
-                continue
-        return [df for i, df in enumerate(list) if i not in indices_to_remove]
+        return list
 
     def get_imported_dataframe(
         self, dataframe_list: List[pl.DataFrame]

diff --git a/pyprobe/cyclers/biologic.py b/pyprobe/cyclers/biologic.py
@@ -1,7 +1,6 @@
 """A module to load and process Biologic battery cycler data."""
 
 
-import glob
 from datetime import datetime
 from typing import List
 
@@ -116,17 +115,3 @@ def apply_step_correction(
         return df_with_max_step.with_columns(
             pl.col("Ns").cast(pl.Int64) + pl.col("Max_Step")
         )
-
-    def _get_dataframe_list(self) -> list[pl.DataFrame | pl.LazyFrame]:
-        """Return a list of all the imported dataframes.
-
-        Args:
-            input_data_path (str): The path to the input data.
-
-        Returns:
-            List[DataFrame]: A list of DataFrames.
-        """
-        files = glob.glob(self.input_data_path)
-        files.sort()
-        list = [self.read_file(file) for file in files]
-        return list
diff --git a/tests/cyclers/test_basecycler.py b/tests/cyclers/test_basecycler.py
@@ -3,6 +3,7 @@
 import os
 import re
 
+import numpy as np
 import polars as pl
 import polars.testing as pl_testing
 import pytest
@@ -355,3 +356,32 @@ def test_ch_dis_capacity(sample_dataframe, sample_pyprobe_dataframe, column_dict
         base_cycler.pyprobe_dataframe.collect(), sample_pyprobe_dataframe
     )
     os.remove("tests/sample_data/test_data.csv")
+
+
+def test_with_missing_columns(sample_dataframe):
+    """Test with a dataframe missing columns."""
+    sample_dataframe.write_csv("tests/sample_data/test_data.csv")
+    df = copy.deepcopy(sample_dataframe)
+    df = df.drop("I [mA]")
+    df.write_csv("tests/sample_data/test_data1.csv")
+    base_cycler = BaseCycler(
+        input_data_path="tests/sample_data/test_data*.csv",
+        column_dict={
+            "DateTime": "Date",
+            "T [*]": "Time [*]",
+            "V [*]": "Voltage [*]",
+            "I [*]": "Current [*]",
+            "Q [*]": "Capacity [*]",
+            "Count": "Step",
+            "Temp [*]": "Temperature [*]",
+            "Q_ch [*]": "Charge Capacity [*]",
+            "Q_dis [*]": "Discharge Capacity [*]",
+        },
+    )
+    assert np.all(
+        np.isnan(
+            base_cycler.pyprobe_dataframe.collect().select("Current [A]").to_numpy()[3:]
+        )
+    )
+    os.remove("tests/sample_data/test_data.csv")
+    os.remove("tests/sample_data/test_data1.csv")