Skip to content

Commit

Permalink
- amending dq tests to be more
Browse files Browse the repository at this point in the history
 specific to data subsets
  • Loading branch information
annajgibson committed Dec 19, 2024
1 parent 2088b03 commit 7b69de7
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 2 deletions.
3 changes: 3 additions & 0 deletions scripts/helpers/housing_gx_dq_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
'expect_llpg_column_values_to_match_regex': 'VALIDITY',
'expect_llpg_column_values_to_not_be_null': 'COMPLETENESS',
'expect_member_full_name_column_value_lengths_between': 'VALIDITY',
'expect_member_is_responsible_values_to_be_in_set': 'CONSISTENCY',
'expect_payment_ref_no_column_values_to_match_regex': 'VALIDITY',
'expect_payment_reference_column_not_to_be_null': 'COMPLETENESS',
'expect_person_id_and_payment_reference_column_values_to_be_unique_within_record': 'UNIQUENESS',
Expand All @@ -64,9 +65,11 @@
'expect_property_ref_column_values_to_not_be_null': 'COMPLETENESS',
'expect_select_column_values_to_be_unique_within_record': 'UNIQUENESS',
'expect_start_of_tenure_date_column_not_to_be_null': 'COMPLETENESS',
'expect_end_of_tenure_date_column_to_be_null': 'COMPLETENESS',
'expect_sub_type_column_values_to_be_in_set': 'CONSISTENCY',
'expect_sub_type_column_values_to_not_be_null': 'COMPLETENESS',
'expect_surname_column_value_length': 'VALIDITY',
'expect_firstname_column_value_length': 'VALIDITY',
'expect_target_id_and_value_column_values_to_be_unique_within_record': 'UNIQUENESS',
'expect_target_id_column_values_to_not_be_null': 'COMPLETENESS',
'expect_target_type_column_values_to_be_in_set': 'CONSISTENCY',
Expand Down
17 changes: 15 additions & 2 deletions scripts/jobs/housing/housing_person_reshape_gx_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,16 @@ class ExpectSurnameColumnValueLength(gxe.ExpectColumnValueLengthsToBeBetween):
column: str = "surname"
min_value: int = 1
description: str = "Expect surname to be at least 1 character length"
condition_parser: str = 'pandas'
row_condition: str = 'isorganisation<>True'


class ExpectFirstnameColumnValueLength(gxe.ExpectColumnValueLengthsToBeBetween):
column: str = "firstname"
min_value: int = 1
description: str = "Expect firstname to be at least 1 character length"
condition_parser: str = 'pandas'
row_condition: str = 'isorganisation<>True'


class ExpectPersonTypeValuesToBeInSet(gxe.ExpectColumnValuesToBeInSet):
Expand Down Expand Up @@ -78,14 +88,16 @@ class ExpectDateOfBirthColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull
column: str = 'dateofbirth_parsed'
description: str = "Expect dateofbirth_parsed be complete with no missing values"
condition_parser: str = 'pandas'
row_condition: str = 'isorganisation<>true'
row_condition: str = 'isorganisation<>True'


class ExpectDateOfBirthToBeBetween(gxe.ExpectColumnValuesToBeBetween):
column: str = 'dateofbirth_parsed'
min_value: str = datetime(1900, 1, 1, 0, 0, 0).isoformat()
max_value: str = datetime.today().isoformat()
description: str = "Expect dateofbirth_parsed be complete with no missing values"
description: str = "Expect dateofbirth_parsed be between 1900-01-01 and today's date"
condition_parser: str = 'pandas'
row_condition: str = 'isorganisation<>True'


# add to GX context
Expand All @@ -94,6 +106,7 @@ class ExpectDateOfBirthToBeBetween(gxe.ExpectColumnValuesToBeBetween):
suite = gx.ExpectationSuite(name='person_reshape_suite')
# suite.add_expectation(ExpectFirstNameColumnValueLength())
suite.add_expectation(ExpectSurnameColumnValueLength())
suite.add_expectation(ExpectFirstnameColumnValueLength())
suite.add_expectation(ExpectPersonTypeValuesToBeInSet())
suite.add_expectation(ExpectPreferredTitleValuesToBeInSet())
suite.add_expectation(ExpectPersonIDColumnValuesToBeUnique())
Expand Down
14 changes: 14 additions & 0 deletions scripts/jobs/housing/housing_tenure_reshape_gx_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ class ExpectMemberIsResponsibleValuesToBeInSet(gxe.ExpectColumnValuesToBeInSet):
description: str = "Expect member_is_responsible field to be boolean value of true or false"


class ExpectIsMutualExchangeValuesToBeInSet(gxe.ExpectColumnValuesToBeInSet):
column: str = 'ismutualexchange'
value_set: list = [True, False]
description: str = "Expect ismutualexchange field to be boolean value of true or false"


class ExpectTenancyIDAndPropertyReferenceColumnValuesToBeUniqueWithinRecord(
gxe.ExpectSelectColumnValuesToBeUniqueWithinRecord):
column_list: list = ['tenancy_id', 'property_reference']
Expand Down Expand Up @@ -61,6 +67,13 @@ class ExpectStartOfTenureDateColumnNotToBeNull(gxe.ExpectColumnValuesToNotBeNull
description: str = "Expect Start of Tenure Date column to be complete with no missing values"


class ExpectEndOfTenureDateColumnToBeNull(gxe.ExpectColumnValuesToBeNull):
column: str = "endoftenuredate"
description: str = "Expect End of Tenure Date column to be null with no default values"
condition_parser: str = 'pandas'
row_condition: str = 'isterminated<>False'


class ExpectTenureCodeColumnNotToBeNull(gxe.ExpectColumnValuesToNotBeNull):
column: str = "tenure_code"
description: str = "Expect Tenure Code column to be complete with no missing values"
Expand All @@ -84,6 +97,7 @@ class ExpectTenureCodeValuesToBeInSet(gxe.ExpectColumnValuesToBeInSet):
suite.add_expectation(ExpectTenancyIDAndPropertyReferenceColumnValuesToBeUniqueWithinRecord())
suite.add_expectation(ExpectTenancyIDColumnNotToBeNull())
suite.add_expectation(ExpectStartOfTenureDateColumnNotToBeNull())
suite.add_expectation(ExpectEndOfTenureDateColumnToBeNull())
suite.add_expectation(ExpectTenureCodeColumnNotToBeNull())
suite.add_expectation(ExpectTagRefColumnNotToBeNull())
suite.add_expectation(ExpectTenureCodeValuesToBeInSet())
Expand Down

0 comments on commit 7b69de7

Please sign in to comment.