ONSdigital · AntonZogk · Jun 25, 2024 · Jun 13, 2024 · Jun 13, 2024 · Jun 13, 2024
diff --git a/src/imputation_flags.py b/src/imputation_flags.py
@@ -5,6 +5,7 @@
 def create_impute_flags(
     df: pd.DataFrame,
     target: str,
+    period: str,
     reference: str,
     strata: str,
     auxiliary: str,
@@ -25,9 +26,10 @@ def create_impute_flags(
         DataFrame containing forward, backward predictive period columns (
         These columns are created by calling flag_matched_pair_merge forward
         and backwards)
-
     target : str
         Column name containing target variable.
+    period: str
+        Column name containing date variable.
     reference : str
         Column name containing business reference id.
     strata : str
@@ -59,11 +61,20 @@ def create_impute_flags(
     backward_target_roll = "b_predictive_" + target + "_roll"
     forward_aux_roll = "f_predictive_" + auxiliary + "_roll"
 
-    df[forward_target_roll] = df.groupby([reference, strata])[
+    df.sort_values([reference, strata, period], inplace=True)
+
+    # TODO : similar conditions at cum imputation links
+    df["fill_group"] = (
+        (df[period] - pd.DateOffset(months=1) != df.shift(1)[period])
+        | (df[strata].diff(1) != 0)
+        | (df[reference].diff(1) != 0)
+    ).cumsum()
+
+    df[forward_target_roll] = df.groupby([reference, strata, "fill_group"])[
         "f_predictive_" + target
     ].ffill()
 
-    df[backward_target_roll] = df.groupby([reference, strata])[
+    df[backward_target_roll] = df.groupby([reference, strata, "fill_group"])[
         "b_predictive_" + target
     ].bfill()
 
@@ -80,7 +91,9 @@ def create_impute_flags(
     construction_conditions = df[target].isna() & df[auxiliary].notna()
     df["c_flag"] = np.where(construction_conditions, True, False)
 
-    df[forward_aux_roll] = df.groupby([reference, strata])[predictive_auxiliary].ffill()
+    df[forward_aux_roll] = df.groupby([reference, strata, "fill_group"])[
+        predictive_auxiliary
+    ].ffill()
 
     fic_conditions = df[target].isna() & df[forward_aux_roll].notna()
     df["fic_flag"] = np.where(fic_conditions, True, False)
@@ -91,6 +104,7 @@ def create_impute_flags(
             backward_target_roll,
             forward_aux_roll,
             predictive_auxiliary,
+            "fill_group",
         ],
         axis=1,
         inplace=True,

diff --git a/tests/imputation_flag_data.csv b/tests/imputation_flag_data.csv
@@ -26,3 +26,7 @@ reference,strata,period,target_variable,auxiliary,f_predictive_target_variable,b
 7,100,202001,,40.0,,,False,False,False,True,False,,c
 7,100,202002,,,,,False,False,False,False,True,40.0,fic
 7,100,202003,,,,,False,False,False,False,True,,fic
+8,100,202001,789,55,,,TRUE,FALSE,FALSE,FALSE,FALSE,,r
+8,100,202002,,66,789,,FALSE,TRUE,FALSE,TRUE,TRUE,55,fir
+8,100,202004,,77,,987,FALSE,FALSE,TRUE,TRUE,FALSE,,bir
+8,100,202005,987,88,,,TRUE,FALSE,FALSE,FALSE,FALSE,77,r
diff --git a/tests/test_imputation_flags.py b/tests/test_imputation_flags.py
@@ -32,6 +32,7 @@ def test_create_impute_flags(self, imputation_flag_test_data):
         df_output = create_impute_flags(
             df=df_input,
             target="target_variable",
+            period="period",
             reference="reference",
             strata="strata",
             auxiliary="auxiliary",