From 0c0770f1a14fbb731548a5a2c60c237336739141 Mon Sep 17 00:00:00 2001 From: Sandrine Balley <42965897+LBHSBALLEY@users.noreply.github.com> Date: Thu, 28 Sep 2023 09:37:11 +0100 Subject: [PATCH 1/2] Update parking_cycle_hangars_waiting_list.py Consider email address from licence_party --- scripts/jobs/parking/parking_cycle_hangars_waiting_list.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/jobs/parking/parking_cycle_hangars_waiting_list.py b/scripts/jobs/parking/parking_cycle_hangars_waiting_list.py index f43ee9866..1251096cb 100644 --- a/scripts/jobs/parking/parking_cycle_hangars_waiting_list.py +++ b/scripts/jobs/parking/parking_cycle_hangars_waiting_list.py @@ -53,7 +53,8 @@ def sparkSqlQuery(glueContext, query, mapping, transformation_ctx) -> DynamicFra database="dataplatform-" + environment + "-liberator-raw-zone", table_name="liberator_licence_party", transformation_ctx="AmazonS3_node1685714303815", - push_down_predicate=create_pushdown_predicate_for_max_date_partition_value("dataplatform-" + environment + "-liberator-raw-zone", "liberator_licence_party", 'import_date') + push_down_predicate=create_pushdown_predicate_for_max_date_partition_value("dataplatform-" + environment + "-liberator-raw-zone", "liberator_licence_party", 'import_date'), + additional_options = {"mergeSchema": "true"} ) # Script generated for node SQL @@ -107,6 +108,7 @@ def sparkSqlQuery(glueContext, query, mapping, transformation_ctx) -> DynamicFra A.party_id, first_name, surname, B.uprn as user_uprn, B.address1, B.address2, B.address3, B.postcode, B.telephone_number, + B.email_address, D.Address2 as street, B.record_created as Date_Registered, C.x, C.y, From e98fa4f49d9dd35ce2ed3096efa1b62c72af4125 Mon Sep 17 00:00:00 2001 From: Sandrine Balley <42965897+LBHSBALLEY@users.noreply.github.com> Date: Thu, 28 Sep 2023 14:54:51 +0100 Subject: [PATCH 2/2] Update parking_cycle_hangars_denormalisation.py new email column in cycle hangars + pushdown predicates --- .../parking_cycle_hangars_denormalisation.py | 32 +++++++++++++++---- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/scripts/jobs/parking/parking_cycle_hangars_denormalisation.py b/scripts/jobs/parking/parking_cycle_hangars_denormalisation.py index 029e1d0b2..35512ba89 100644 --- a/scripts/jobs/parking/parking_cycle_hangars_denormalisation.py +++ b/scripts/jobs/parking/parking_cycle_hangars_denormalisation.py @@ -6,7 +6,7 @@ from awsglue.job import Job from awsglue.dynamicframe import DynamicFrame -from scripts.helpers.helpers import get_glue_env_var +from scripts.helpers.helpers import get_glue_env_var, create_pushdown_predicate_for_max_date_partition_value environment = get_glue_env_var("environment") def sparkSqlQuery(glueContext, query, mapping, transformation_ctx) -> DynamicFrame: @@ -87,7 +87,8 @@ def sparkSqlQuery(glueContext, query, mapping, transformation_ctx) -> DynamicFra B.ADDRESS2 , B.ADDRESS3 , B.POSTCODE , - B.TELEPHONE_NUMBER + B.TELEPHONE_NUMBER, + B.EMAIL_ADDRESS FROM HangarAlloc as A LEFT JOIN licence_party as B ON A.party_id = B.business_party_id WHERE RW = 1 AND Allocation_Status NOT IN ('cancelled', 'key_returned') @@ -117,22 +118,39 @@ def sparkSqlQuery(glueContext, query, mapping, transformation_ctx) -> DynamicFra ## @args: [database = "dataplatform-" + environment + "-liberator-raw-zone", table_name = "liberator_licence_party", transformation_ctx = "DataSource0"] ## @return: DataSource0 ## @inputs: [] -DataSource0 = glueContext.create_dynamic_frame.from_catalog(database = "dataplatform-" + environment + "-liberator-raw-zone", table_name = "liberator_licence_party", transformation_ctx = "DataSource0") +DataSource0 = glueContext.create_dynamic_frame.from_catalog( + database = "dataplatform-" + environment + "-liberator-raw-zone", + table_name = "liberator_licence_party", + transformation_ctx = "DataSource0", + push_down_predicate = create_pushdown_predicate_for_max_date_partition_value("dataplatform-" + environment + "-liberator-raw-zone", "liberator_licence_party", 'import_date'), + additional_options = {"mergeSchema": "true"}) ## @type: DataSource ## @args: [database = "dataplatform-" + environment + "-liberator-raw-zone", table_name = "liberator_hangar_allocations", transformation_ctx = "DataSource3"] ## @return: DataSource3 ## @inputs: [] -DataSource3 = glueContext.create_dynamic_frame.from_catalog(database = "dataplatform-" + environment + "-liberator-raw-zone", table_name = "liberator_hangar_allocations", transformation_ctx = "DataSource3") +DataSource3 = glueContext.create_dynamic_frame.from_catalog( + database = "dataplatform-" + environment + "-liberator-raw-zone", + table_name = "liberator_hangar_allocations", + push_down_predicate = create_pushdown_predicate_for_max_date_partition_value("dataplatform-" + environment + "-liberator-raw-zone", "liberator_hangar_allocations", 'import_date'), + transformation_ctx = "DataSource3") ## @type: DataSource ## @args: [database = "dataplatform-" + environment + "-liberator-raw-zone", table_name = "liberator_hangar_types", transformation_ctx = "DataSource2"] ## @return: DataSource2 ## @inputs: [] -DataSource2 = glueContext.create_dynamic_frame.from_catalog(database = "dataplatform-" + environment + "-liberator-raw-zone", table_name = "liberator_hangar_types", transformation_ctx = "DataSource2") +DataSource2 = glueContext.create_dynamic_frame.from_catalog( + database = "dataplatform-" + environment + "-liberator-raw-zone", + table_name = "liberator_hangar_types", + push_down_predicate = create_pushdown_predicate_for_max_date_partition_value("dataplatform-" + environment + "-liberator-raw-zone", "liberator_hangar_types", 'import_date'), + transformation_ctx = "DataSource2") ## @type: DataSource ## @args: [database = "dataplatform-" + environment + "-liberator-raw-zone", table_name = "liberator_hangar_details", transformation_ctx = "DataSource1"] ## @return: DataSource1 ## @inputs: [] -DataSource1 = glueContext.create_dynamic_frame.from_catalog(database = "dataplatform-" + environment + "-liberator-raw-zone", table_name = "liberator_hangar_details", transformation_ctx = "DataSource1") +DataSource1 = glueContext.create_dynamic_frame.from_catalog( + database = "dataplatform-" + environment + "-liberator-raw-zone", + table_name = "liberator_hangar_details", + push_down_predicate = create_pushdown_predicate_for_max_date_partition_value("dataplatform-" + environment + "-liberator-raw-zone", "liberator_hangar_details", 'import_date') + transformation_ctx = "DataSource1") ## @type: SqlCode ## @args: [sqlAliases = {"liberator_hangar_details": DataSource1, "liberator_hangar_types": DataSource2, "liberator_hangar_allocations": DataSource3, "liberator_licence_party": DataSource0}, sqlName = SqlQuery0, transformation_ctx = "Transform0"] ## @return: Transform0 @@ -147,4 +165,4 @@ def sparkSqlQuery(glueContext, query, mapping, transformation_ctx) -> DynamicFra DataSink0.setFormat("glueparquet") DataSink0.writeFrame(Transform0) -job.commit() \ No newline at end of file +job.commit()