Skip to content

Commit

Permalink
- add two new tables, table specific tests and GX test suites to GX D…
Browse files Browse the repository at this point in the history
…Q testing: maproperty and matenancyagreement.
  • Loading branch information
annajgibson committed Jan 6, 2025
1 parent 7b69de7 commit d838f9f
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 4 deletions.
18 changes: 14 additions & 4 deletions scripts/helpers/housing_gx_dq_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
'sql': """SELECT * FROM "housing-refined-zone"."tenure_reshape" where import_date=(select max(import_date) from "housing-refined-zone"."tenure_reshape") and description in ('Secure', 'Introductory', 'Mesne Profit Ac', 'Non-Secure') and (endoftenuredate is null or substr(endoftenuredate, 1, 11) = '1900-01-01')""",
'id_field': 'tenancy_id'},
'contacts_reshape': {
'sql': """SELECT id, targetid, createdat, contacttype, subtype, value, lastmodified, targettype, isactive, person_id, import_date FROM "housing-refined-zone"."contacts_reshape" where import_date=(select max(import_date) from "housing-refined-zone"."contacts_reshape") and isactive=True""",
'sql': """SELECT id, targetid, substr(createdat, 1, 10) as createdat, contacttype, subtype, value, substr(lastmodified, 1, 10) as lastmodified, targettype, isactive, person_id, import_date FROM "housing-refined-zone"."contacts_reshape" where import_date=(select max(import_date) from "housing-refined-zone"."contacts_reshape") and isactive=True""",
'id_field': 'id'},
'housing_homeowner_record_sheet': {
'sql': """SELECT * FROM "housing-raw-zone"."housing_homeowner_record_sheet" where import_date=(select max(import_date) from "housing-raw-zone"."housing_homeowner_record_sheet")""",
Expand All @@ -16,11 +16,17 @@
'id_field': 'property_dwelling_reference_number'},
'assets_reshape': {
'sql': """SELECT * FROM "housing-refined-zone"."assets_reshape" where import_date=(select max(import_date) from "housing-refined-zone"."assets_reshape") and assettype = 'Dwelling'""",
'id_field': 'asset_id'}
'id_field': 'asset_id'},
'matenancyagreement': {
'sql': """SELECT *, substr(cast(eot as varchar), 1, 10) as eot_parsed, substr(cast(cot as varchar), 1, 10) as cot_parsed FROM "housing-raw-zone"."sow2b_dbo_matenancyagreement" where import_date=(select max(import_date) FROM "housing-raw-zone"."sow2b_dbo_matenancyagreement")""",
'id_field': 'tag_ref'},
'maproperty': {
'sql': """SELECT * FROM "housing-raw-zone"."sow2b_dbo_maproperty" where import_date=(select max(import_date) FROM "housing-raw-zone"."sow2b_dbo_maproperty")""",
'id_field': 'prop_ref'}
}

table_list = ['person_reshape', 'tenure_reshape', 'contacts_reshape', 'housing_homeowner_record_sheet',
'housing_dwellings_list', 'assets_reshape']
'housing_dwellings_list', 'assets_reshape', 'matenancyagreement', 'maproperty']

partition_keys = ['import_year', 'import_month', 'import_day', 'import_date']

Expand All @@ -39,10 +45,12 @@
'expect_contact_value_column_values_to_be_unique': 'UNIQUENESS',
'expect_contact_value_column_values_to_not_be_null': 'COMPLETENESS',
'expect_date_of_birth_column_values_to_not_be_null': 'COMPLETENESS',
'expect_date_of_birth_to_be_between': 'VALIDITY',
'expect_date_of_birth_to_be_between': 'TIMELINESS',
'expect_description_values_to_be_in_set': 'CONSISTENCY',
'expect_estate_ref_no_column_values_to_match_regex': 'VALIDITY',
'expect_first_name_column_value_length': 'VALIDITY',
'expect_is_organisation_column_values_to_not_be_null': 'COMPLETENESS',
'expect_is_organisation_values_to_be_in_set': 'CONSISTENCY',
'expect_llpg_and_prop_ref_column_values_to_be_unique_within_record': 'UNIQUENESS',
'expect_llpg_column_value_lengths_between': 'VALIDITY',
'expect_llpg_column_values_to_be_unique': 'UNIQUENESS',
Expand Down Expand Up @@ -70,6 +78,7 @@
'expect_sub_type_column_values_to_not_be_null': 'COMPLETENESS',
'expect_surname_column_value_length': 'VALIDITY',
'expect_firstname_column_value_length': 'VALIDITY',
'expect_tag_ref_column_not_to_be_null': 'COMPLETENESS',
'expect_target_id_and_value_column_values_to_be_unique_within_record': 'UNIQUENESS',
'expect_target_id_column_values_to_not_be_null': 'COMPLETENESS',
'expect_target_type_column_values_to_be_in_set': 'CONSISTENCY',
Expand All @@ -78,6 +87,7 @@
'expect_tenancy_id_column_not_to_be_null': 'COMPLETENESS',
'expect_tenure_code_column_not_to_be_null': 'COMPLETENESS',
'expect_tenure_type_column_values_to_be_in_set': 'CONSISTENCY',
'expect_tenure_code_values_to_be_in_set': 'CONSISTENCY',
'expect_uprn_column_value_lengths_between': 'VALIDITY',
'expect_uprn_column_values_to_match_regex': 'VALIDITY',
'expect_uprn_column_values_to_not_be_null': 'COMPLETENESS',
Expand Down
37 changes: 37 additions & 0 deletions scripts/jobs/housing/housing_maproperty_gx_suite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# flake8: noqa: F821
import sys

from awsglue.utils import getResolvedOptions
import great_expectations as gx
import great_expectations.expectations as gxe

arg_key = ['s3_target_location']
args = getResolvedOptions(sys.argv, arg_key)
locals().update(args)


class ExpectPropRefColumnValuesToBeUnique(gxe.ExpectColumnValuesToBeUnique):
column: str = 'prop_ref'
description: str = "Expect Prop Ref field to be unique for a property type"


class ExpectArrPatchNotToBeNull(gxe.ExpectColumnValuesToNotBeNull):
column: str = "arr_patch"
description: str = "Expect Arrears Patch column to be complete with no missing values"


class ExpectPropRefNotToBeNull(gxe.ExpectColumnValuesToNotBeNull):
column: str = "prop_ref"
description: str = "Expect Prop Ref column to be complete with no missing values"


# add to GX context
context = gx.get_context(mode="file", project_root_dir=s3_target_location)

suite = gx.ExpectationSuite(name='maproperty_suite')

suite.add_expectation(ExpectPropRefColumnValuesToBeUnique())
suite.add_expectation(ExpectArrPatchNotToBeNull())
suite.add_expectation(ExpectPropRefNotToBeNull())

suite = context.suites.add(suite)
82 changes: 82 additions & 0 deletions scripts/jobs/housing/housing_matenancyagreement_gx_suite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# flake8: noqa: F821
from datetime import datetime
import sys

from awsglue.utils import getResolvedOptions
import great_expectations as gx
import great_expectations.expectations as gxe

arg_key = ['s3_target_location']
args = getResolvedOptions(sys.argv, arg_key)
locals().update(args)


class ExpectTagRefColumnValuesToBeUnique(gxe.ExpectColumnValuesToBeUnique):
column: str = 'tag_ref'
description: str = "Expect Tag Ref field to be unique for a tenancy"


class ExpectTagRefNotToBeNull(gxe.ExpectColumnValuesToNotBeNull):
column: str = "tag_ref"
description: str = "Expect Tag Ref column to be complete with no missing values"


class ExpectPropRefNotToBeNull(gxe.ExpectColumnValuesToNotBeNull):
column: str = "prop_ref"
description: str = "Expect Prop Ref column to be complete with no missing values"


class ExpectCoTNotToBeNull(gxe.ExpectColumnValuesToNotBeNull):
column: str = "cot"
description: str = "Expect Tenancy start date column (cot) to be complete with no missing values"


class ExpectTenureNotToBeNull(gxe.ExpectColumnValuesToNotBeNull):
column: str = "tenure"
description: str = "Expect tenure to be complete with no missing values"


class ExpectSaffRentAccNotToBeNull(gxe.ExpectColumnValuesToNotBeNull):
column: str = "u_saff_rentacc"
description: str = "Expect Saff rent account (payment ref) to be complete with no missing values"


class ExpectRentGroupRefNotToBeNull(gxe.ExpectColumnValuesToNotBeNull):
column: str = "rentgrp_ref"
description: str = "Expect Rent Group ref column to be complete with no missing values"


class ExpectEoTToBeBetween(gxe.ExpectColumnValuesToBeBetween):
column: str = 'eot_parsed'
min_value: str = datetime(1920, 1, 1, 0, 0, 0).isoformat()
max_value: str = datetime.today().isoformat()
description: str = "Expect eot_parsed be between 1920-01-01 and today's date"
condition_parser: str = 'great_expectations'
row_condition: str = 'col("eot_parsed").notNull()'


class ExpectCoTToBeBetween(gxe.ExpectColumnValuesToBeBetween):
column: str = 'cot_parsed'
min_value: str = datetime(1920, 1, 1, 0, 0, 0).isoformat()
max_value: str = datetime.today().isoformat()
description: str = "Expect cot_parsed be between 1920-01-01 and today's date"
condition_parser: str = 'great_expectations'
row_condition: str = 'col("cot").notNull()'


# add to GX context
context = gx.get_context(mode="file", project_root_dir=s3_target_location)

suite = gx.ExpectationSuite(name='matenancyagreement_suite')

suite.add_expectation(ExpectTagRefColumnValuesToBeUnique())
suite.add_expectation(ExpectTagRefNotToBeNull())
suite.add_expectation(ExpectPropRefNotToBeNull())
suite.add_expectation(ExpectCoTNotToBeNull())
suite.add_expectation(ExpectTenureNotToBeNull())
suite.add_expectation(ExpectSaffRentAccNotToBeNull())
suite.add_expectation(ExpectRentGroupRefNotToBeNull())
suite.add_expectation(ExpectEoTToBeBetween())
suite.add_expectation(ExpectCoTToBeBetween())

suite = context.suites.add(suite)

0 comments on commit d838f9f

Please sign in to comment.