From 7b69de7206fef3ad50c937f546a1dca36fccc731 Mon Sep 17 00:00:00 2001 From: AGibson <4319494+annajgibson@users.noreply.github.com> Date: Thu, 19 Dec 2024 16:32:17 +0000 Subject: [PATCH] - amending dq tests to be more specific to data subsets --- scripts/helpers/housing_gx_dq_inputs.py | 3 +++ .../housing/housing_person_reshape_gx_suite.py | 17 +++++++++++++++-- .../housing/housing_tenure_reshape_gx_suite.py | 14 ++++++++++++++ 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/scripts/helpers/housing_gx_dq_inputs.py b/scripts/helpers/housing_gx_dq_inputs.py index f2545d2e7..21a0f0ff8 100644 --- a/scripts/helpers/housing_gx_dq_inputs.py +++ b/scripts/helpers/housing_gx_dq_inputs.py @@ -49,6 +49,7 @@ 'expect_llpg_column_values_to_match_regex': 'VALIDITY', 'expect_llpg_column_values_to_not_be_null': 'COMPLETENESS', 'expect_member_full_name_column_value_lengths_between': 'VALIDITY', + 'expect_member_is_responsible_values_to_be_in_set': 'CONSISTENCY', 'expect_payment_ref_no_column_values_to_match_regex': 'VALIDITY', 'expect_payment_reference_column_not_to_be_null': 'COMPLETENESS', 'expect_person_id_and_payment_reference_column_values_to_be_unique_within_record': 'UNIQUENESS', @@ -64,9 +65,11 @@ 'expect_property_ref_column_values_to_not_be_null': 'COMPLETENESS', 'expect_select_column_values_to_be_unique_within_record': 'UNIQUENESS', 'expect_start_of_tenure_date_column_not_to_be_null': 'COMPLETENESS', + 'expect_end_of_tenure_date_column_to_be_null': 'COMPLETENESS', 'expect_sub_type_column_values_to_be_in_set': 'CONSISTENCY', 'expect_sub_type_column_values_to_not_be_null': 'COMPLETENESS', 'expect_surname_column_value_length': 'VALIDITY', + 'expect_firstname_column_value_length': 'VALIDITY', 'expect_target_id_and_value_column_values_to_be_unique_within_record': 'UNIQUENESS', 'expect_target_id_column_values_to_not_be_null': 'COMPLETENESS', 'expect_target_type_column_values_to_be_in_set': 'CONSISTENCY', diff --git a/scripts/jobs/housing/housing_person_reshape_gx_suite.py b/scripts/jobs/housing/housing_person_reshape_gx_suite.py index 5145bf7dc..14a9f4236 100644 --- a/scripts/jobs/housing/housing_person_reshape_gx_suite.py +++ b/scripts/jobs/housing/housing_person_reshape_gx_suite.py @@ -21,6 +21,16 @@ class ExpectSurnameColumnValueLength(gxe.ExpectColumnValueLengthsToBeBetween): column: str = "surname" min_value: int = 1 description: str = "Expect surname to be at least 1 character length" + condition_parser: str = 'pandas' + row_condition: str = 'isorganisation<>True' + + +class ExpectFirstnameColumnValueLength(gxe.ExpectColumnValueLengthsToBeBetween): + column: str = "firstname" + min_value: int = 1 + description: str = "Expect firstname to be at least 1 character length" + condition_parser: str = 'pandas' + row_condition: str = 'isorganisation<>True' class ExpectPersonTypeValuesToBeInSet(gxe.ExpectColumnValuesToBeInSet): @@ -78,14 +88,16 @@ class ExpectDateOfBirthColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull column: str = 'dateofbirth_parsed' description: str = "Expect dateofbirth_parsed be complete with no missing values" condition_parser: str = 'pandas' - row_condition: str = 'isorganisation<>true' + row_condition: str = 'isorganisation<>True' class ExpectDateOfBirthToBeBetween(gxe.ExpectColumnValuesToBeBetween): column: str = 'dateofbirth_parsed' min_value: str = datetime(1900, 1, 1, 0, 0, 0).isoformat() max_value: str = datetime.today().isoformat() - description: str = "Expect dateofbirth_parsed be complete with no missing values" + description: str = "Expect dateofbirth_parsed be between 1900-01-01 and today's date" + condition_parser: str = 'pandas' + row_condition: str = 'isorganisation<>True' # add to GX context @@ -94,6 +106,7 @@ class ExpectDateOfBirthToBeBetween(gxe.ExpectColumnValuesToBeBetween): suite = gx.ExpectationSuite(name='person_reshape_suite') # suite.add_expectation(ExpectFirstNameColumnValueLength()) suite.add_expectation(ExpectSurnameColumnValueLength()) +suite.add_expectation(ExpectFirstnameColumnValueLength()) suite.add_expectation(ExpectPersonTypeValuesToBeInSet()) suite.add_expectation(ExpectPreferredTitleValuesToBeInSet()) suite.add_expectation(ExpectPersonIDColumnValuesToBeUnique()) diff --git a/scripts/jobs/housing/housing_tenure_reshape_gx_suite.py b/scripts/jobs/housing/housing_tenure_reshape_gx_suite.py index a4e8b162a..e1d398fb9 100644 --- a/scripts/jobs/housing/housing_tenure_reshape_gx_suite.py +++ b/scripts/jobs/housing/housing_tenure_reshape_gx_suite.py @@ -34,6 +34,12 @@ class ExpectMemberIsResponsibleValuesToBeInSet(gxe.ExpectColumnValuesToBeInSet): description: str = "Expect member_is_responsible field to be boolean value of true or false" +class ExpectIsMutualExchangeValuesToBeInSet(gxe.ExpectColumnValuesToBeInSet): + column: str = 'ismutualexchange' + value_set: list = [True, False] + description: str = "Expect ismutualexchange field to be boolean value of true or false" + + class ExpectTenancyIDAndPropertyReferenceColumnValuesToBeUniqueWithinRecord( gxe.ExpectSelectColumnValuesToBeUniqueWithinRecord): column_list: list = ['tenancy_id', 'property_reference'] @@ -61,6 +67,13 @@ class ExpectStartOfTenureDateColumnNotToBeNull(gxe.ExpectColumnValuesToNotBeNull description: str = "Expect Start of Tenure Date column to be complete with no missing values" +class ExpectEndOfTenureDateColumnToBeNull(gxe.ExpectColumnValuesToBeNull): + column: str = "endoftenuredate" + description: str = "Expect End of Tenure Date column to be null with no default values" + condition_parser: str = 'pandas' + row_condition: str = 'isterminated<>False' + + class ExpectTenureCodeColumnNotToBeNull(gxe.ExpectColumnValuesToNotBeNull): column: str = "tenure_code" description: str = "Expect Tenure Code column to be complete with no missing values" @@ -84,6 +97,7 @@ class ExpectTenureCodeValuesToBeInSet(gxe.ExpectColumnValuesToBeInSet): suite.add_expectation(ExpectTenancyIDAndPropertyReferenceColumnValuesToBeUniqueWithinRecord()) suite.add_expectation(ExpectTenancyIDColumnNotToBeNull()) suite.add_expectation(ExpectStartOfTenureDateColumnNotToBeNull()) +suite.add_expectation(ExpectEndOfTenureDateColumnToBeNull()) suite.add_expectation(ExpectTenureCodeColumnNotToBeNull()) suite.add_expectation(ExpectTagRefColumnNotToBeNull()) suite.add_expectation(ExpectTenureCodeValuesToBeInSet())