From cf26f7d11376ea7e76c68c780ea4565c8f9a4f17 Mon Sep 17 00:00:00 2001 From: Giaccaglia Date: Fri, 25 Oct 2024 16:21:57 +0100 Subject: [PATCH 1/3] fixed issue with run live or frozen --- mbs_results/staging/data_cleaning.py | 7 +++++-- .../data_cleaning/test_run_live_or_frozen.csv | 16 ++++++++-------- tests/staging/test_data_cleaning.py | 12 ++++++++---- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/mbs_results/staging/data_cleaning.py b/mbs_results/staging/data_cleaning.py index f8563391..58dec684 100644 --- a/mbs_results/staging/data_cleaning.py +++ b/mbs_results/staging/data_cleaning.py @@ -307,6 +307,8 @@ def run_live_or_frozen( Original dataframe. """ + + df = df.copy() if state not in ["frozen", "live"]: raise ValueError( @@ -316,8 +318,9 @@ def run_live_or_frozen( ) if state == "frozen": - - df.loc[df[error_marker].isin(error_values), target] = np.nan + df['frozen_error'] = df.apply(lambda x: x[target] if x[error_marker] in (error_values) else '', axis=1) + df = df.fillna('') + return df diff --git a/tests/data/staging/data_cleaning/test_run_live_or_frozen.csv b/tests/data/staging/data_cleaning/test_run_live_or_frozen.csv index c68ed0e5..02471894 100644 --- a/tests/data/staging/data_cleaning/test_run_live_or_frozen.csv +++ b/tests/data/staging/data_cleaning/test_run_live_or_frozen.csv @@ -1,8 +1,8 @@ -target,error,live,frozen -1,C,1,1 -2,E,2, -3,O,3,3 -4,W,4, -5,C,5,5 -6,E,6, -7,W,7, +target,error,live,frozen,frozen_error +2,C,2,2, +7,E,7,,7 +1,O,1,1, +6,W,6,,6 +3,C,3,3, +5,E,5,,5 +4,W,4,,4 diff --git a/tests/staging/test_data_cleaning.py b/tests/staging/test_data_cleaning.py index cfa77c2c..2da12860 100644 --- a/tests/staging/test_data_cleaning.py +++ b/tests/staging/test_data_cleaning.py @@ -104,14 +104,18 @@ def test_run_live_or_frozen(filepath): df = pd.read_csv(filepath / "test_run_live_or_frozen.csv") - df_in = df.drop(columns=["frozen"]) - + df_in = df.drop(columns=["frozen", "frozen_error"]) + live_ouput = run_live_or_frozen(df_in, "target", "error", "live") + frozen_output = run_live_or_frozen(df_in, "target", "error", "frozen") - expected_output_frozen = df_in.copy() - expected_output_frozen["target"] = df["frozen"] + expected_output_frozen = df.copy() + + expected_output_frozen.drop(columns= ["frozen"], inplace=True) + expected_output_frozen = expected_output_frozen.fillna('') + assert_frame_equal(frozen_output, expected_output_frozen) assert_frame_equal(live_ouput, df_in) From 61c27e6b2047b5bad435169c396ca2d2a10b5b7d Mon Sep 17 00:00:00 2001 From: Giaccaglia Date: Tue, 29 Oct 2024 08:46:12 +0000 Subject: [PATCH 2/3] pre commit hooks --- mbs_results/staging/data_cleaning.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/mbs_results/staging/data_cleaning.py b/mbs_results/staging/data_cleaning.py index 58dec684..d64f3510 100644 --- a/mbs_results/staging/data_cleaning.py +++ b/mbs_results/staging/data_cleaning.py @@ -1,6 +1,5 @@ from typing import List -import numpy as np import pandas as pd from mbs_results.utilities.utils import convert_column_to_datetime @@ -307,7 +306,7 @@ def run_live_or_frozen( Original dataframe. """ - + df = df.copy() if state not in ["frozen", "live"]: @@ -318,9 +317,10 @@ def run_live_or_frozen( ) if state == "frozen": - df['frozen_error'] = df.apply(lambda x: x[target] if x[error_marker] in (error_values) else '', axis=1) - df = df.fillna('') - + df["frozen_error"] = df.apply( + lambda x: x[target] if x[error_marker] in (error_values) else "", axis=1 + ) + df = df.fillna("") return df @@ -442,8 +442,8 @@ def correct_values( # Update value only if columns exist if set(check_columns).issubset(df.columns): - df_temp.loc[ - df[condition_column].isin(condition_values), columns_to_correct - ] = replace_with + df_temp.loc[df[condition_column].isin(condition_values), columns_to_correct] = ( + replace_with + ) return df_temp From 94495b58990435079978bd506e3b39d29244ce79 Mon Sep 17 00:00:00 2001 From: Giaccaglia Date: Tue, 29 Oct 2024 09:09:37 +0000 Subject: [PATCH 3/3] fixed pre-commit hooks --- tests/staging/test_data_cleaning.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/staging/test_data_cleaning.py b/tests/staging/test_data_cleaning.py index 2da12860..fff60736 100644 --- a/tests/staging/test_data_cleaning.py +++ b/tests/staging/test_data_cleaning.py @@ -105,17 +105,16 @@ def test_run_live_or_frozen(filepath): df = pd.read_csv(filepath / "test_run_live_or_frozen.csv") df_in = df.drop(columns=["frozen", "frozen_error"]) - + live_ouput = run_live_or_frozen(df_in, "target", "error", "live") - + frozen_output = run_live_or_frozen(df_in, "target", "error", "frozen") expected_output_frozen = df.copy() - - expected_output_frozen.drop(columns= ["frozen"], inplace=True) - expected_output_frozen = expected_output_frozen.fillna('') - + expected_output_frozen.drop(columns=["frozen"], inplace=True) + expected_output_frozen = expected_output_frozen.fillna("") + assert_frame_equal(frozen_output, expected_output_frozen) assert_frame_equal(live_ouput, df_in)