Skip to content

Commit

Permalink
fix!: disable autoscaling for indexwork service
Browse files Browse the repository at this point in the history
Some jobs are being terminated inflight during scale-in for this
service.  Disabling autoscaling for this service is the safest
route forward until this is resolved.

BREAKING CHANGE: The following variable has been removed

- indexwork_autoscaling_max_count
  • Loading branch information
deeno35 committed Nov 14, 2024
1 parent d551e55 commit 8e53564
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 99 deletions.
94 changes: 1 addition & 93 deletions modules/bigeye/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2582,7 +2582,7 @@ module "indexwork" {

# Task settings
control_desired_count = false
desired_count = 0
desired_count = var.indexwork_desired_count
cpu = var.indexwork_cpu
memory = var.indexwork_memory
execution_role_arn = local.ecs_role_arn
Expand Down Expand Up @@ -2630,98 +2630,6 @@ module "indexwork" {
secret_arns = local.datawatch_secret_arns
}

resource "aws_appautoscaling_target" "indexwork" {
depends_on = [module.indexwork]
min_capacity = 0
max_capacity = var.indexwork_autoscaling_max_count
resource_id = format("service/%s/%s-indexwork", local.name, local.name)
scalable_dimension = "ecs:service:DesiredCount"
service_namespace = "ecs"
}

resource "aws_appautoscaling_policy" "indexwork" {
depends_on = [aws_appautoscaling_target.indexwork]
name = format("%s-indexwork-catalog-autoscaling", local.name)
policy_type = "StepScaling"
resource_id = aws_appautoscaling_target.indexwork.resource_id
scalable_dimension = aws_appautoscaling_target.indexwork.scalable_dimension
service_namespace = aws_appautoscaling_target.indexwork.service_namespace
step_scaling_policy_configuration {
adjustment_type = "ExactCapacity"
cooldown = 300
metric_aggregation_type = "Minimum"

# Scale to 0 when there is no work on the queue
step_adjustment {
scaling_adjustment = 0
metric_interval_upper_bound = 1
}

# Scale up when there is at least 1 job in the queue. More fine grained scaling steps is not
# practical for MQ based services as we will loose in-flight jobs during scale-in since our MQ
# workers do not respect sigterm.
step_adjustment {
scaling_adjustment = var.indexwork_autoscaling_max_count
metric_interval_lower_bound = 1
}
}
}

resource "aws_cloudwatch_metric_alarm" "indexwork" {
alarm_name = "${local.name}-indexwork autoscaling"
actions_enabled = true
alarm_actions = [aws_appautoscaling_policy.indexwork.arn]
evaluation_periods = 1
datapoints_to_alarm = 1
threshold = 0
comparison_operator = "GreaterThanOrEqualToThreshold"
treat_missing_data = "missing"
tags = {}
# (12 unchanged attributes hidden)

metric_query {
id = "m1"
period = 0
return_data = false

metric {
dimensions = {
"Broker" = local.name
"Queue" = "dataset_index_op_v2"
"VirtualHost" = "/"
}
metric_name = "MessageCount"
namespace = "AWS/AmazonMQ"
period = 300
stat = "Minimum"
}
}
metric_query {
id = "m2"
period = 0
return_data = false

metric {
dimensions = {
"Broker" = local.name
"Queue" = "catalog_index_v2"
"VirtualHost" = "/"
}
metric_name = "MessageCount"
namespace = "AWS/AmazonMQ"
period = 300
stat = "Minimum"
}
}
metric_query {
expression = "SUM(METRICS())"
id = "e1"
label = "sum queued messages across queues"
period = 0
return_data = true
}
}

module "lineagework" {
depends_on = [aws_secretsmanager_secret_version.robot_password, aws_secretsmanager_secret_version.robot_agent_api_key]
source = "../simpleservice"
Expand Down
12 changes: 6 additions & 6 deletions modules/bigeye/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2055,6 +2055,12 @@ variable "indexwork_image_tag" {
default = ""
}

variable "indexwork_desired_count" {
description = "The desired number of replicas"
type = number
default = 2
}

variable "indexwork_cpu" {
description = "Amount of CPU to allocate"
type = number
Expand Down Expand Up @@ -2103,12 +2109,6 @@ variable "indexwork_enable_ecs_exec" {
default = false
}

variable "indexwork_autoscaling_max_count" {
description = "When there is work in the queue, the indexwork will scale up to this number of instances."
type = number
default = 2
}

#======================================================
# Application Variables - Lineagework
#======================================================
Expand Down

0 comments on commit 8e53564

Please sign in to comment.