-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* set up construction matched pairs tests * Remove pair from names and add skeleton function * WIP: Add simplest case test * Add functionality for simplest test case * Update construction match flag function and test following discussion about test cases * Remove unused imports --------- Co-authored-by: giaccg <[email protected]>
- Loading branch information
1 parent
fd2145a
commit b651a54
Showing
3 changed files
with
52 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
import pandas as pd | ||
|
||
def flag_construction_matches(dataframe, target, period, auxiliary): | ||
""" | ||
Add flag to indicate whether the record has non-null target, period and | ||
auxiliary variables, and is therefore valid to use when calculating | ||
construction links | ||
Parameters | ||
---------- | ||
dataframe : pandas.DataFrame | ||
target : string | ||
name of column containing the target variable | ||
period : string | ||
name of column containing time period | ||
auxiliary : string | ||
name of column containing auxiliary information | ||
Returns | ||
------- | ||
pandas.DataFrame | ||
dataframe with additional flag_construction_matches column | ||
""" | ||
|
||
dataframe["flag_construction_matches"] = pd.notna(dataframe[[target, period, auxiliary]]).all(axis="columns") | ||
|
||
return dataframe |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
import pytest | ||
import pandas as pd | ||
import numpy as np | ||
|
||
from pandas.testing import assert_frame_equal | ||
|
||
from src.construction_matches import flag_construction_matches | ||
|
||
def test_construction_matches(): | ||
expected_output = pd.DataFrame(np.array([ | ||
[42,pd.to_datetime("202401", format="%Y%m"),10,True], | ||
[pd.NA,pd.to_datetime("202401", format="%Y%m"),10,False], | ||
]), | ||
columns=["target", "period", "auxiliary", "flag_construction_matches"], | ||
) | ||
|
||
# cast to python non-nullable bool type rather than pandas nullable boolean type | ||
expected_output["flag_construction_matches"] = expected_output["flag_construction_matches"].astype(bool) | ||
|
||
input_data = expected_output.drop(labels=["flag_construction_matches"], axis=1) | ||
actual_output = flag_construction_matches(input_data, "target", "period", "auxiliary") | ||
|
||
assert_frame_equal(actual_output, expected_output) |