From 8e5356422a05fdd67621b7f78886cd624df3b9ff Mon Sep 17 00:00:00 2001
From: David Nguyen <david@bigeye.com>
Date: Wed, 13 Nov 2024 16:33:26 -0800
Subject: [PATCH] fix!: disable autoscaling for indexwork service

Some jobs are being terminated inflight during scale-in for this
service.  Disabling autoscaling for this service is the safest
route forward until this is resolved.

BREAKING CHANGE: The following variable has been removed

- indexwork_autoscaling_max_count
---
 modules/bigeye/main.tf      | 94 +------------------------------------
 modules/bigeye/variables.tf | 12 ++---
 2 files changed, 7 insertions(+), 99 deletions(-)

diff --git a/modules/bigeye/main.tf b/modules/bigeye/main.tf
index 8519d6f..6e29bc2 100644
--- a/modules/bigeye/main.tf
+++ b/modules/bigeye/main.tf
@@ -2582,7 +2582,7 @@ module "indexwork" {
 
   # Task settings
   control_desired_count     = false
-  desired_count             = 0
+  desired_count             = var.indexwork_desired_count
   cpu                       = var.indexwork_cpu
   memory                    = var.indexwork_memory
   execution_role_arn        = local.ecs_role_arn
@@ -2630,98 +2630,6 @@ module "indexwork" {
   secret_arns = local.datawatch_secret_arns
 }
 
-resource "aws_appautoscaling_target" "indexwork" {
-  depends_on         = [module.indexwork]
-  min_capacity       = 0
-  max_capacity       = var.indexwork_autoscaling_max_count
-  resource_id        = format("service/%s/%s-indexwork", local.name, local.name)
-  scalable_dimension = "ecs:service:DesiredCount"
-  service_namespace  = "ecs"
-}
-
-resource "aws_appautoscaling_policy" "indexwork" {
-  depends_on         = [aws_appautoscaling_target.indexwork]
-  name               = format("%s-indexwork-catalog-autoscaling", local.name)
-  policy_type        = "StepScaling"
-  resource_id        = aws_appautoscaling_target.indexwork.resource_id
-  scalable_dimension = aws_appautoscaling_target.indexwork.scalable_dimension
-  service_namespace  = aws_appautoscaling_target.indexwork.service_namespace
-  step_scaling_policy_configuration {
-    adjustment_type         = "ExactCapacity"
-    cooldown                = 300
-    metric_aggregation_type = "Minimum"
-
-    # Scale to 0 when there is no work on the queue
-    step_adjustment {
-      scaling_adjustment          = 0
-      metric_interval_upper_bound = 1
-    }
-
-    # Scale up when there is at least 1 job in the queue.  More fine grained scaling steps is not
-    # practical for MQ based services as we will loose in-flight jobs during scale-in since our MQ
-    # workers do not respect sigterm.
-    step_adjustment {
-      scaling_adjustment          = var.indexwork_autoscaling_max_count
-      metric_interval_lower_bound = 1
-    }
-  }
-}
-
-resource "aws_cloudwatch_metric_alarm" "indexwork" {
-  alarm_name          = "${local.name}-indexwork autoscaling"
-  actions_enabled     = true
-  alarm_actions       = [aws_appautoscaling_policy.indexwork.arn]
-  evaluation_periods  = 1
-  datapoints_to_alarm = 1
-  threshold           = 0
-  comparison_operator = "GreaterThanOrEqualToThreshold"
-  treat_missing_data  = "missing"
-  tags                = {}
-  # (12 unchanged attributes hidden)
-
-  metric_query {
-    id          = "m1"
-    period      = 0
-    return_data = false
-
-    metric {
-      dimensions = {
-        "Broker"      = local.name
-        "Queue"       = "dataset_index_op_v2"
-        "VirtualHost" = "/"
-      }
-      metric_name = "MessageCount"
-      namespace   = "AWS/AmazonMQ"
-      period      = 300
-      stat        = "Minimum"
-    }
-  }
-  metric_query {
-    id          = "m2"
-    period      = 0
-    return_data = false
-
-    metric {
-      dimensions = {
-        "Broker"      = local.name
-        "Queue"       = "catalog_index_v2"
-        "VirtualHost" = "/"
-      }
-      metric_name = "MessageCount"
-      namespace   = "AWS/AmazonMQ"
-      period      = 300
-      stat        = "Minimum"
-    }
-  }
-  metric_query {
-    expression  = "SUM(METRICS())"
-    id          = "e1"
-    label       = "sum queued messages across queues"
-    period      = 0
-    return_data = true
-  }
-}
-
 module "lineagework" {
   depends_on = [aws_secretsmanager_secret_version.robot_password, aws_secretsmanager_secret_version.robot_agent_api_key]
   source     = "../simpleservice"
diff --git a/modules/bigeye/variables.tf b/modules/bigeye/variables.tf
index abf8998..9e36445 100644
--- a/modules/bigeye/variables.tf
+++ b/modules/bigeye/variables.tf
@@ -2055,6 +2055,12 @@ variable "indexwork_image_tag" {
   default     = ""
 }
 
+variable "indexwork_desired_count" {
+  description = "The desired number of replicas"
+  type        = number
+  default     = 2
+}
+
 variable "indexwork_cpu" {
   description = "Amount of CPU to allocate"
   type        = number
@@ -2103,12 +2109,6 @@ variable "indexwork_enable_ecs_exec" {
   default     = false
 }
 
-variable "indexwork_autoscaling_max_count" {
-  description = "When there is work in the queue, the indexwork will scale up to this number of instances."
-  type        = number
-  default     = 2
-}
-
 #======================================================
 # Application Variables - Lineagework
 #======================================================