Skip to content

Commit

Permalink
320 construction matched pairs (#4)
Browse files Browse the repository at this point in the history
* set up construction matched pairs tests

* Remove pair from names and add skeleton function

* WIP: Add simplest case test

* Add functionality for simplest test case

* Update construction match flag function and test following discussion about test cases

* Remove unused imports

---------

Co-authored-by: giaccg <[email protected]>
  • Loading branch information
rowanhemsi and giuliag92 authored May 1, 2024
1 parent fd2145a commit b651a54
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 1 deletion.
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ coverage
detect-secrets==1.0.3
myst-parser
pre-commit
pytest
pytest==7.0.1
Sphinx
toml
black
Expand All @@ -11,3 +11,4 @@ nbstripout
nbqa
pre_commit_hooks
flake8
pandas==1.1.5
27 changes: 27 additions & 0 deletions src/construction_matches.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import pandas as pd

def flag_construction_matches(dataframe, target, period, auxiliary):
"""
Add flag to indicate whether the record has non-null target, period and
auxiliary variables, and is therefore valid to use when calculating
construction links
Parameters
----------
dataframe : pandas.DataFrame
target : string
name of column containing the target variable
period : string
name of column containing time period
auxiliary : string
name of column containing auxiliary information
Returns
-------
pandas.DataFrame
dataframe with additional flag_construction_matches column
"""

dataframe["flag_construction_matches"] = pd.notna(dataframe[[target, period, auxiliary]]).all(axis="columns")

return dataframe
23 changes: 23 additions & 0 deletions tests/test_construction_matches.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import pytest
import pandas as pd
import numpy as np

from pandas.testing import assert_frame_equal

from src.construction_matches import flag_construction_matches

def test_construction_matches():
expected_output = pd.DataFrame(np.array([
[42,pd.to_datetime("202401", format="%Y%m"),10,True],
[pd.NA,pd.to_datetime("202401", format="%Y%m"),10,False],
]),
columns=["target", "period", "auxiliary", "flag_construction_matches"],
)

# cast to python non-nullable bool type rather than pandas nullable boolean type
expected_output["flag_construction_matches"] = expected_output["flag_construction_matches"].astype(bool)

input_data = expected_output.drop(labels=["flag_construction_matches"], axis=1)
actual_output = flag_construction_matches(input_data, "target", "period", "auxiliary")

assert_frame_equal(actual_output, expected_output)

0 comments on commit b651a54

Please sign in to comment.