Skip to content

Commit

Permalink
academy state machine (#1481)
Browse files Browse the repository at this point in the history
* max concurrency lambda

* add academy state machine in stg

* shorter lambda name

* add lambda zip output path

* fix subnet id value
  • Loading branch information
timburke-hackit authored Nov 6, 2023
1 parent 6a3c215 commit 85589e3
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 0 deletions.
13 changes: 13 additions & 0 deletions lambdas/calculate_max_concurrency/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
def calculate_max_concurrency(available_ips: int, ips_per_job: int) -> int:
return int(available_ips / ips_per_job)


def lambda_handler(event, context):
available_ips = event["available_ips"]
ips_per_job = event["ips_per_job"]
max_concurrency = calculate_max_concurrency(available_ips, ips_per_job)
return {"max_concurrency": max_concurrency}


if __name__ == "__main__":
lambda_handler("event", "lambda_context")
132 changes: 132 additions & 0 deletions terraform/core/13-mssql-ingestion.tf
Original file line number Diff line number Diff line change
Expand Up @@ -195,3 +195,135 @@ module "copy_academy_revenues_to_raw_zone" {
"--conf" = "spark.sql.legacy.timeParserPolicy=LEGACY --conf spark.sql.legacy.parquet.int96RebaseModeInRead=LEGACY --conf spark.sql.legacy.parquet.int96RebaseModeInWrite=LEGACY --conf spark.sql.legacy.parquet.datetimeRebaseModeInRead=LEGACY --conf spark.sql.legacy.parquet.datetimeRebaseModeInWrite=LEGACY"
}
}

## Academy State Machine

locals {
academy_state_machine_count = local.is_live_environment && !local.is_production_environment ? 1 : 0
}

module "academy_glue_job" {
count = local.academy_state_machine_count
source = "../modules/aws-glue-job"
tags = module.tags.values
is_live_environment = local.is_live_environment
is_production_environment = local.is_production_environment

job_name = "${local.short_identifier_prefix}Academy Revs & Bens Housing Needs Database Ingestion"
script_s3_object_key = aws_s3_object.ingest_database_tables_via_jdbc_connection.key
environment = var.environment
pydeequ_zip_key = aws_s3_object.pydeequ.key
helper_module_key = aws_s3_object.helpers.key
jdbc_connections = [module.academy_mssql_database_ingestion[0].jdbc_connection_name]
glue_role_arn = aws_iam_role.glue_role.arn
glue_temp_bucket_id = module.glue_temp_storage.bucket_id
glue_scripts_bucket_id = module.glue_scripts.bucket_id
spark_ui_output_storage_id = module.spark_ui_output_storage.bucket_id
glue_job_timeout = 420
glue_version = "4.0"
glue_job_worker_type = "G.1X"
number_of_workers_for_glue_job = 2
job_parameters = {
"--source_data_database" = module.academy_mssql_database_ingestion[0].ingestion_database_name
"--s3_ingestion_bucket_target" = "s3://${module.landing_zone.bucket_id}/academy/"
"--s3_ingestion_details_target" = "s3://${module.landing_zone.bucket_id}/academy/ingestion-details/"
"--table_filter_expression" = ""
"--conf" = "spark.sql.legacy.timeParserPolicy=LEGACY --conf spark.sql.legacy.parquet.int96RebaseModeInRead=LEGACY --conf spark.sql.legacy.parquet.int96RebaseModeInWrite=LEGACY --conf spark.sql.legacy.parquet.datetimeRebaseModeInRead=LEGACY --conf spark.sql.legacy.parquet.datetimeRebaseModeInWrite=LEGACY"
}
}


module "academy_state_machine" {
count = local.academy_state_machine_count
tags = module.tags.values
source = "../modules/aws-step-functions"
name = "academy-revs-and-bens-housing-needs-database-ingestion"
identifier_prefix = local.short_identifier_prefix
role_arn = aws_iam_role.academy_step_functions_role[0].arn
definition = <<EOF
{
"Comment": "A description of my state machine",
"StartAt": "GetNumberOfAvailableIPs",
"States": {
"GetNumberOfAvailableIPs": {
"Type": "Task",
"Resource": "arn:aws:states:::aws-sdk:ec2:describeSubnets",
"Parameters": {
"SubnetIds": [
"${local.instance_subnet_id}"
]
},
"ResultPath": "$.SubnetResult",
"Next": "InvokeLambdaCalculateMaxConcurrency"
},
"InvokeLambdaCalculateMaxConcurrency": {
"Type": "Task",
"Resource": "${module.max_concurrency_lambda[0].lambda_function_arn}",
"Parameters": {
"AvailableIPs.$": "$.SubnetResult.Subnets[0].AvailableIpAddressCount",
"NumTasks.$": "$.TaskCount"
},
"ResultPath": "$.MaxConcurrencyResult",
"Next": "Map"
},
"Map": {
"Type": "Map",
"ItemProcessor": {
"ProcessorConfig": {
"Mode": "INLINE"
},
"StartAt": "Glue StartJobRun",
"States": {
"Glue StartJobRun": {
"Type": "Task",
"Resource": "arn:aws:states:::glue:startJobRun.sync",
"Parameters": {
"Parameters": {
"Arguments": {
"--table_filter_expression": "$.table_filter_expression"
}
}
},
"End": true
}
}
},
"MaxConcurrencyPath": "$.MaxConcurrencyResult",
"End": true
}
}
}
EOF
}

module "max_concurrency_lambda" {
count = local.academy_state_machine_count
source = "../modules/aws-lambda"
tags = module.tags.values
lambda_name = "academy-max-concurrency"
identifier_prefix = local.short_identifier_prefix
handler = "main.lambda_handler"
lambda_artefact_storage_bucket = module.lambda_artefact_storage.bucket_id
s3_key = "academy-revs-and-bens-housing-needs-database-ingestion-max-concurrency.zip"
lambda_source_dir = "../../lambdas/calculate_max_concurrency"
lambda_output_path = "../../lambdas/calculate_max_concurrency/max-concurrency.zip"
runtime = "python3.8"
}

resource "aws_iam_role" "academy_step_functions_role" {
count = local.academy_state_machine_count
name = "${local.short_identifier_prefix}academy-step-functions-role"
tags = module.tags.values
assume_role_policy = data.aws_iam_policy_document.step_functions_assume_role_policy.json
}

data "aws_iam_policy_document" "step_functions_assume_role_policy" {
statement {
actions = ["sts:AssumeRole"]

principals {
type = "Service"
identifiers = ["states.amazonaws.com"]
}
}
}

0 comments on commit 85589e3

Please sign in to comment.