Skip to content

Commit

Permalink
Run pre commit hooks
Browse files Browse the repository at this point in the history
  • Loading branch information
AntonZogk committed Jun 19, 2024
1 parent 4ce48c8 commit fd53bd7
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 35 deletions.
45 changes: 21 additions & 24 deletions src/predictive_variable.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import pandas as pd


def shift_by_strata_period(
df: pd.DataFrame,
target: str,
period: str,
strata: str,
reference: str,
time_difference: int,
new_col: str,
**kwargs
df: pd.DataFrame,
target: str,
period: str,
strata: str,
reference: str,
time_difference: int,
new_col: str,
**kwargs
) -> pd.DataFrame:
"""
It will perform the usual shift by desired time_difference for each value
Expand Down Expand Up @@ -40,19 +41,15 @@ def shift_by_strata_period(
Pandas dataframe of original data with a new column containing the
shifted values.
"""

df.sort_values([reference,strata, period], inplace=True)

df[new_col] = (
df.groupby((
(
df[period] - pd.DateOffset(months=1)
!= df.shift(1)[period]
)
| (df[strata].diff(1) != 0)
| (df[reference].diff(1) != 0)
)
.cumsum())
.shift(time_difference)[target])

return df

df.sort_values([reference, strata, period], inplace=True)

df[new_col] = df.groupby(
(
(df[period] - pd.DateOffset(months=1) != df.shift(1)[period])
| (df[strata].diff(1) != 0)
| (df[reference].diff(1) != 0)
).cumsum()
).shift(time_difference)[target]

return df
25 changes: 14 additions & 11 deletions tests/test_predictive_variable.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,37 @@
from src.predictive_variable import shift_by_strata_period
from pathlib import Path

import pytest
from helper_functions import load_and_format
from pandas.testing import assert_frame_equal

import pytest
from src.predictive_variable import shift_by_strata_period


@pytest.fixture(scope="class")
def predictive_variable_test_data():
return load_and_format(Path("tests") / "predictive_variable.csv")


class TestPredictiveVariable:
def test_predictive_variable_forward(self, predictive_variable_test_data):
expected_output = predictive_variable_test_data[
['identifier', 'period', 'group', 'question', 'other',"f_predictive"
]]
["identifier", "period", "group", "question", "other", "f_predictive"]
]
input_data = expected_output.drop(columns="f_predictive")
actual_output = shift_by_strata_period(
input_data, "question", "period", "group","identifier",1,"f_predictive"
input_data, "question", "period", "group", "identifier", 1, "f_predictive"
)
actual_output.sort_index(ascending=True,inplace=True)
actual_output.sort_index(ascending=True, inplace=True)
assert_frame_equal(actual_output, expected_output)

def test_predictive_variable_backward(self, predictive_variable_test_data):

expected_output = predictive_variable_test_data[
['identifier', 'period', 'group', 'question', 'other',"b_predictive"
]]
["identifier", "period", "group", "question", "other", "b_predictive"]
]
input_data = expected_output.drop(columns="b_predictive")
actual_output = shift_by_strata_period(
input_data, "question", "period", "group","identifier",-1,"b_predictive"
input_data, "question", "period", "group", "identifier", -1, "b_predictive"
)
actual_output.sort_index(ascending=True,inplace=True)
assert_frame_equal(actual_output, expected_output)
actual_output.sort_index(ascending=True, inplace=True)
assert_frame_equal(actual_output, expected_output)

0 comments on commit fd53bd7

Please sign in to comment.