From d644b92ee3f8cfb8459848cdf8e37cbb4300302f Mon Sep 17 00:00:00 2001 From: koladeadewuyi-moj <136330532+koladeadewuyi-moj@users.noreply.github.com> Date: Wed, 4 Dec 2024 09:31:25 +0000 Subject: [PATCH] DPR2-1492: Reduce polling frequency when checking processed files (#8889) --- .../modules/domains/maintenance-pipeline/pipeline.tf | 3 ++- .../modules/domains/maintenance-pipeline/variables.tf | 10 ++++++++++ .../modules/domains/reload-pipeline/pipeline.tf | 3 ++- .../modules/domains/reload-pipeline/variables.tf | 10 ++++++++++ .../modules/domains/replay-pipeline/pipeline.tf | 6 ++++-- .../modules/domains/replay-pipeline/variables.tf | 10 ++++++++++ .../modules/domains/stop-cdc-pipeline/pipeline.tf | 4 ++-- .../modules/domains/stop-cdc-pipeline/variables.tf | 10 ++++++++++ 8 files changed, 50 insertions(+), 6 deletions(-) diff --git a/terraform/environments/digital-prison-reporting/modules/domains/maintenance-pipeline/pipeline.tf b/terraform/environments/digital-prison-reporting/modules/domains/maintenance-pipeline/pipeline.tf index 84b4f581882..e894ac6482b 100644 --- a/terraform/environments/digital-prison-reporting/modules/domains/maintenance-pipeline/pipeline.tf +++ b/terraform/environments/digital-prison-reporting/modules/domains/maintenance-pipeline/pipeline.tf @@ -30,7 +30,8 @@ module "maintenance_pipeline" { "Parameters" : { "JobName" : var.glue_unprocessed_raw_files_check_job, "Arguments" : { - "--dpr.orchestration.wait.interval.seconds" : "60", + "--dpr.orchestration.wait.interval.seconds" : tostring(var.processed_files_check_wait_interval_seconds), + "--dpr.orchestration.max.attempts" : tostring(var.processed_files_check_max_attempts), "--dpr.datastorage.retry.maxAttempts" : tostring(var.glue_s3_max_attempts), "--dpr.datastorage.retry.minWaitMillis" : tostring(var.glue_s3_retry_min_wait_millis), "--dpr.datastorage.retry.maxWaitMillis" : tostring(var.glue_s3_retry_max_wait_millis) diff --git a/terraform/environments/digital-prison-reporting/modules/domains/maintenance-pipeline/variables.tf b/terraform/environments/digital-prison-reporting/modules/domains/maintenance-pipeline/variables.tf index a16130e7ee9..02ac21df6d3 100644 --- a/terraform/environments/digital-prison-reporting/modules/domains/maintenance-pipeline/variables.tf +++ b/terraform/environments/digital-prison-reporting/modules/domains/maintenance-pipeline/variables.tf @@ -157,6 +157,16 @@ variable "retention_curated_num_workers" { } } +variable "processed_files_check_wait_interval_seconds" { + description = "Amount of seconds between checks to s3 if all files have been processed" + type = number +} + +variable "processed_files_check_max_attempts" { + description = "Maximum number of attempts to check if all files have been processed" + type = number +} + variable "glue_s3_max_attempts" { description = "The maximum number of attempts when making requests to S3" type = number diff --git a/terraform/environments/digital-prison-reporting/modules/domains/reload-pipeline/pipeline.tf b/terraform/environments/digital-prison-reporting/modules/domains/reload-pipeline/pipeline.tf index 40b941765bf..5ca48bbef99 100644 --- a/terraform/environments/digital-prison-reporting/modules/domains/reload-pipeline/pipeline.tf +++ b/terraform/environments/digital-prison-reporting/modules/domains/reload-pipeline/pipeline.tf @@ -53,7 +53,8 @@ module "reload_pipeline" { "Parameters" : { "JobName" : var.glue_unprocessed_raw_files_check_job, "Arguments" : { - "--dpr.orchestration.wait.interval.seconds" : "60" + "--dpr.orchestration.wait.interval.seconds" : tostring(var.processed_files_check_wait_interval_seconds), + "--dpr.orchestration.max.attempts" : tostring(var.processed_files_check_max_attempts) } }, "Next" : "Stop Glue Streaming Job" diff --git a/terraform/environments/digital-prison-reporting/modules/domains/reload-pipeline/variables.tf b/terraform/environments/digital-prison-reporting/modules/domains/reload-pipeline/variables.tf index ea22b35cf8c..28a3b7a73ac 100644 --- a/terraform/environments/digital-prison-reporting/modules/domains/reload-pipeline/variables.tf +++ b/terraform/environments/digital-prison-reporting/modules/domains/reload-pipeline/variables.tf @@ -238,6 +238,16 @@ variable "retention_curated_num_workers" { } } +variable "processed_files_check_wait_interval_seconds" { + description = "Amount of seconds between checks to s3 if all files have been processed" + type = number +} + +variable "processed_files_check_max_attempts" { + description = "Maximum number of attempts to check if all files have been processed" + type = number +} + variable "glue_s3_max_attempts" { description = "The maximum number of attempts when making requests to S3" type = number diff --git a/terraform/environments/digital-prison-reporting/modules/domains/replay-pipeline/pipeline.tf b/terraform/environments/digital-prison-reporting/modules/domains/replay-pipeline/pipeline.tf index e6a20b0c2fc..2042a0502b8 100644 --- a/terraform/environments/digital-prison-reporting/modules/domains/replay-pipeline/pipeline.tf +++ b/terraform/environments/digital-prison-reporting/modules/domains/replay-pipeline/pipeline.tf @@ -52,7 +52,8 @@ module "replay_pipeline" { "Parameters" : { "JobName" : var.glue_unprocessed_raw_files_check_job, "Arguments" : { - "--dpr.orchestration.wait.interval.seconds" : "60" + "--dpr.orchestration.wait.interval.seconds" : tostring(var.processed_files_check_wait_interval_seconds), + "--dpr.orchestration.max.attempts" : tostring(var.processed_files_check_max_attempts) } }, "Next" : "Stop Glue Streaming Job" @@ -250,7 +251,8 @@ module "replay_pipeline" { "Parameters" : { "JobName" : var.glue_unprocessed_raw_files_check_job, "Arguments" : { - "--dpr.orchestration.wait.interval.seconds" : "60" + "--dpr.orchestration.wait.interval.seconds" : tostring(var.processed_files_check_wait_interval_seconds), + "--dpr.orchestration.max.attempts" : tostring(var.processed_files_check_max_attempts) } }, "Next" : "Empty Raw Data" diff --git a/terraform/environments/digital-prison-reporting/modules/domains/replay-pipeline/variables.tf b/terraform/environments/digital-prison-reporting/modules/domains/replay-pipeline/variables.tf index f84fccb9b6c..b2d5ad19292 100644 --- a/terraform/environments/digital-prison-reporting/modules/domains/replay-pipeline/variables.tf +++ b/terraform/environments/digital-prison-reporting/modules/domains/replay-pipeline/variables.tf @@ -235,6 +235,16 @@ variable "retention_curated_num_workers" { } } +variable "processed_files_check_wait_interval_seconds" { + description = "Amount of seconds between checks to s3 if all files have been processed" + type = number +} + +variable "processed_files_check_max_attempts" { + description = "Maximum number of attempts to check if all files have been processed" + type = number +} + variable "glue_s3_max_attempts" { description = "The maximum number of attempts when making requests to S3" type = number diff --git a/terraform/environments/digital-prison-reporting/modules/domains/stop-cdc-pipeline/pipeline.tf b/terraform/environments/digital-prison-reporting/modules/domains/stop-cdc-pipeline/pipeline.tf index 3143ffbbd55..d0ef2ef0519 100644 --- a/terraform/environments/digital-prison-reporting/modules/domains/stop-cdc-pipeline/pipeline.tf +++ b/terraform/environments/digital-prison-reporting/modules/domains/stop-cdc-pipeline/pipeline.tf @@ -29,8 +29,8 @@ module "cdc_stop_pipeline" { "Parameters" : { "JobName" : var.glue_unprocessed_raw_files_check_job, "Arguments" : { - "--dpr.orchestration.wait.interval.seconds" : "60" - "--dpr.orchestration.max.attempts" : "120" + "--dpr.orchestration.wait.interval.seconds" : tostring(var.processed_files_check_wait_interval_seconds), + "--dpr.orchestration.max.attempts" : tostring(var.processed_files_check_max_attempts) } }, "Next" : "Stop Glue Streaming Job" diff --git a/terraform/environments/digital-prison-reporting/modules/domains/stop-cdc-pipeline/variables.tf b/terraform/environments/digital-prison-reporting/modules/domains/stop-cdc-pipeline/variables.tf index 62ab1e00c38..d6d60a9ca57 100644 --- a/terraform/environments/digital-prison-reporting/modules/domains/stop-cdc-pipeline/variables.tf +++ b/terraform/environments/digital-prison-reporting/modules/domains/stop-cdc-pipeline/variables.tf @@ -39,6 +39,16 @@ variable "glue_unprocessed_raw_files_check_job" { type = string } +variable "processed_files_check_wait_interval_seconds" { + description = "Amount of seconds between checks to s3 if all files have been processed" + type = number +} + +variable "processed_files_check_max_attempts" { + description = "Maximum number of attempts to check if all files have been processed" + type = number +} + variable "tags" { type = map(string) default = {}