ONSdigital · giuliag92 · Oct 30, 2024 · Oct 25, 2024 · Oct 29, 2024 · Oct 29, 2024
diff --git a/mbs_results/staging/data_cleaning.py b/mbs_results/staging/data_cleaning.py
@@ -1,6 +1,5 @@
 from typing import List
 
-import numpy as np
 import pandas as pd
 
 from mbs_results.utilities.utils import convert_column_to_datetime
@@ -310,6 +309,8 @@ def run_live_or_frozen(
 
     """
 
+    df = df.copy()
+
     if state not in ["frozen", "live"]:
         raise ValueError(
             """{} is not an accepted state status, use either frozen or live """.format(
@@ -318,8 +319,10 @@ def run_live_or_frozen(
         )
 
     if state == "frozen":
-
-        df.loc[df[error_marker].isin(error_values), target] = np.nan
+        df["frozen_error"] = df.apply(
+            lambda x: x[target] if x[error_marker] in (error_values) else "", axis=1
+        )
+        df = df.fillna("")
 
     return df
 

diff --git a/tests/data/staging/data_cleaning/test_run_live_or_frozen.csv b/tests/data/staging/data_cleaning/test_run_live_or_frozen.csv
@@ -1,8 +1,8 @@
-target,error,live,frozen
-1,C,1,1
-2,E,2,
-3,O,3,3
-4,W,4,
-5,C,5,5
-6,E,6,
-7,W,7,
+target,error,live,frozen,frozen_error
+2,C,2,2,
+7,E,7,,7
+1,O,1,1,
+6,W,6,,6
+3,C,3,3,
+5,E,5,,5
+4,W,4,,4
diff --git a/tests/staging/test_data_cleaning.py b/tests/staging/test_data_cleaning.py
@@ -104,13 +104,16 @@ def test_run_live_or_frozen(filepath):
 
     df = pd.read_csv(filepath / "test_run_live_or_frozen.csv")
 
-    df_in = df.drop(columns=["frozen"])
+    df_in = df.drop(columns=["frozen", "frozen_error"])
 
     live_ouput = run_live_or_frozen(df_in, "target", "error", "live")
+
     frozen_output = run_live_or_frozen(df_in, "target", "error", "frozen")
 
-    expected_output_frozen = df_in.copy()
-    expected_output_frozen["target"] = df["frozen"]
+    expected_output_frozen = df.copy()
+
+    expected_output_frozen.drop(columns=["frozen"], inplace=True)
+    expected_output_frozen = expected_output_frozen.fillna("")
 
     assert_frame_equal(frozen_output, expected_output_frozen)
     assert_frame_equal(live_ouput, df_in)