Skip to content

Commit

Permalink
Merge pull request #66 from ONSdigital/419-test-outliering-with-anon-…
Browse files Browse the repository at this point in the history
…data

"419-test-outliering-with-anon-data"
  • Loading branch information
AntonZogk authored Aug 2, 2024
2 parents 5d69cfe + 00ebc36 commit 39788bc
Show file tree
Hide file tree
Showing 5 changed files with 6 additions and 6 deletions.
4 changes: 2 additions & 2 deletions mbs_results/calculate_predicted_unit_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def calculate_predicted_unit_value(
A pandas DataFrame with a new column containing the predicted unit value.
"""

winsorised = (df[sampled] == 1) & (not df[nw_ag_flag] is True)
winsorised = (df[sampled] == 1) & (df[nw_ag_flag] == False) # noqa: E712
filtered_df = df.loc[winsorised]

sum_weighted_target_values = (
Expand All @@ -40,7 +40,7 @@ def calculate_predicted_unit_value(
lambda x: x * (sum_weighted_target_values / sum_weighted_auxiliary_values)
)

non_winsorised = (df[sampled] == 0) | (df[nw_ag_flag] is True)
non_winsorised = (df[sampled] == 0) | (df[nw_ag_flag] == True) # noqa: E712
df["predicted_unit_value"] = df["predicted_unit_value"].mask(non_winsorised, np.nan)

return df
2 changes: 1 addition & 1 deletion mbs_results/calculate_ratio_estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def calculate_ratio_estimation(
)
df = df.drop("flag_calculation", axis=1)

non_winsorised = (df[sampled] == 0) | (df[nw_ag_flag] is True)
non_winsorised = (df[sampled] == 0) | (df[nw_ag_flag] == True) # noqa: E712
df["ratio_estimation_treshold"] = df["ratio_estimation_treshold"].mask(
non_winsorised, np.nan
)
Expand Down
2 changes: 1 addition & 1 deletion mbs_results/calculate_winsorised_weight.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def calculate_winsorised_weight(

df = df.drop(["w", "new_target"], axis=1)

non_winsorised = (df[sampled] == 0) | (df[nw_ag_flag] is True)
non_winsorised = (df[sampled] == 0) | (df[nw_ag_flag] == True) # noqa: E712
df["outlier_weight"] = df["outlier_weight"].mask(non_winsorised, np.nan)
df["new_target_variable"] = df["new_target_variable"].mask(non_winsorised, np.nan)

Expand Down
2 changes: 1 addition & 1 deletion mbs_results/flag_for_winsorisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def winsorisation_flag(df, a_weight, g_weight):

df["flag_calculation"] = df[a_weight] * df[g_weight]

df["nw_ag_flag"] = df["flag_calculation"].apply(lambda x: True if x <= 1 else 0)
df["nw_ag_flag"] = df["flag_calculation"].apply(lambda x: True if x <= 1 else False)

df = df.drop("flag_calculation", axis=1)

Expand Down
2 changes: 1 addition & 1 deletion tests/test_flag_for_winsorisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@ def test_winsorisation_flag(self, winsorisation_flag_test_data):
df=df_input, a_weight="a_weight", g_weight="g_weight"
)

assert_frame_equal(df_output, df_expected_output, check_dtype=False)
assert_frame_equal(df_output, df_expected_output)

0 comments on commit 39788bc

Please sign in to comment.