From 85589e3688e5004de74edf52c8e28e0a3f0a1bbb Mon Sep 17 00:00:00 2001 From: timburke-hackit <61045197+timburke-hackit@users.noreply.github.com> Date: Mon, 6 Nov 2023 14:07:50 +0000 Subject: [PATCH] academy state machine (#1481) * max concurrency lambda * add academy state machine in stg * shorter lambda name * add lambda zip output path * fix subnet id value --- lambdas/calculate_max_concurrency/main.py | 13 +++ terraform/core/13-mssql-ingestion.tf | 132 ++++++++++++++++++++++ 2 files changed, 145 insertions(+) create mode 100644 lambdas/calculate_max_concurrency/main.py diff --git a/lambdas/calculate_max_concurrency/main.py b/lambdas/calculate_max_concurrency/main.py new file mode 100644 index 000000000..be53a81e3 --- /dev/null +++ b/lambdas/calculate_max_concurrency/main.py @@ -0,0 +1,13 @@ +def calculate_max_concurrency(available_ips: int, ips_per_job: int) -> int: + return int(available_ips / ips_per_job) + + +def lambda_handler(event, context): + available_ips = event["available_ips"] + ips_per_job = event["ips_per_job"] + max_concurrency = calculate_max_concurrency(available_ips, ips_per_job) + return {"max_concurrency": max_concurrency} + + +if __name__ == "__main__": + lambda_handler("event", "lambda_context") diff --git a/terraform/core/13-mssql-ingestion.tf b/terraform/core/13-mssql-ingestion.tf index 218178c7e..12492a33f 100644 --- a/terraform/core/13-mssql-ingestion.tf +++ b/terraform/core/13-mssql-ingestion.tf @@ -195,3 +195,135 @@ module "copy_academy_revenues_to_raw_zone" { "--conf" = "spark.sql.legacy.timeParserPolicy=LEGACY --conf spark.sql.legacy.parquet.int96RebaseModeInRead=LEGACY --conf spark.sql.legacy.parquet.int96RebaseModeInWrite=LEGACY --conf spark.sql.legacy.parquet.datetimeRebaseModeInRead=LEGACY --conf spark.sql.legacy.parquet.datetimeRebaseModeInWrite=LEGACY" } } + +## Academy State Machine + +locals { + academy_state_machine_count = local.is_live_environment && !local.is_production_environment ? 1 : 0 +} + +module "academy_glue_job" { + count = local.academy_state_machine_count + source = "../modules/aws-glue-job" + tags = module.tags.values + is_live_environment = local.is_live_environment + is_production_environment = local.is_production_environment + + job_name = "${local.short_identifier_prefix}Academy Revs & Bens Housing Needs Database Ingestion" + script_s3_object_key = aws_s3_object.ingest_database_tables_via_jdbc_connection.key + environment = var.environment + pydeequ_zip_key = aws_s3_object.pydeequ.key + helper_module_key = aws_s3_object.helpers.key + jdbc_connections = [module.academy_mssql_database_ingestion[0].jdbc_connection_name] + glue_role_arn = aws_iam_role.glue_role.arn + glue_temp_bucket_id = module.glue_temp_storage.bucket_id + glue_scripts_bucket_id = module.glue_scripts.bucket_id + spark_ui_output_storage_id = module.spark_ui_output_storage.bucket_id + glue_job_timeout = 420 + glue_version = "4.0" + glue_job_worker_type = "G.1X" + number_of_workers_for_glue_job = 2 + job_parameters = { + "--source_data_database" = module.academy_mssql_database_ingestion[0].ingestion_database_name + "--s3_ingestion_bucket_target" = "s3://${module.landing_zone.bucket_id}/academy/" + "--s3_ingestion_details_target" = "s3://${module.landing_zone.bucket_id}/academy/ingestion-details/" + "--table_filter_expression" = "" + "--conf" = "spark.sql.legacy.timeParserPolicy=LEGACY --conf spark.sql.legacy.parquet.int96RebaseModeInRead=LEGACY --conf spark.sql.legacy.parquet.int96RebaseModeInWrite=LEGACY --conf spark.sql.legacy.parquet.datetimeRebaseModeInRead=LEGACY --conf spark.sql.legacy.parquet.datetimeRebaseModeInWrite=LEGACY" + } +} + + +module "academy_state_machine" { + count = local.academy_state_machine_count + tags = module.tags.values + source = "../modules/aws-step-functions" + name = "academy-revs-and-bens-housing-needs-database-ingestion" + identifier_prefix = local.short_identifier_prefix + role_arn = aws_iam_role.academy_step_functions_role[0].arn + definition = <