Skip to content

Commit

Permalink
feat: add ability to mount EFS volume in app containers
Browse files Browse the repository at this point in the history
This is useful for debugging, like in the case where we would want
to persist some generated data on instance stop like a heap dump.

A single EFS volume for the entire stack was chosen instead of
volume per service as ECS containers can only allow 1 EFS volume
mount, so with a single mount, all of the various container's
EFS data can be read from the bigeye-admin container.

EFS access point per service gives us logical separation of
data between the different services, so each service can only
read/write its own data (except the admin container which can
mount the root "/" so will see them all).

Also note it is not possible to disable AZ redundancy by using
"one zone" mode to save on costs as we run into problems with
EFS mount points not existing in the various AZ's since we don't
allow constraining ECS tasks to a single AZ.
  • Loading branch information
deeno35 committed Jun 13, 2024
1 parent 5832c29 commit 569bb5c
Show file tree
Hide file tree
Showing 10 changed files with 296 additions and 13 deletions.
36 changes: 28 additions & 8 deletions modules/admin/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,9 @@ locals {
RABBITMQ_PASSWORD = var.rabbitmq_password_secret_arn
}

create_iam_role = var.task_iam_role_arn == ""
ecs_iam_role = local.create_iam_role ? aws_iam_role.this[0].arn : var.task_iam_role_arn
create_iam_role = var.task_iam_role_arn == ""
ecs_iam_role = local.create_iam_role ? aws_iam_role.this[0].arn : var.task_iam_role_arn
efs_volume_enabled = var.efs_mount_point != "" && var.efs_access_point_id != ""
}

data "aws_region" "current" {}
Expand Down Expand Up @@ -211,12 +212,17 @@ resource "aws_vpc_security_group_ingress_rule" "client_from_main" {

locals {
primary_container_definition = {
name = local.name
cpu = 1024 - var.awsfirelens_cpu
memory = 2048 - var.awsfirelens_memory
image = var.image
essential = true
mountPoints = []
name = local.name
cpu = 1024 - var.awsfirelens_cpu
memory = 2048 - var.awsfirelens_memory
image = var.image
essential = true
mountPoints = local.efs_volume_enabled ? [
{
containerPath : var.efs_mount_point,
sourceVolume : local.name,
}
] : []
portMappings = []
volumesFrom = []
systemControls = []
Expand Down Expand Up @@ -258,6 +264,20 @@ resource "aws_ecs_task_definition" "this" {
[local.primary_container_definition],
var.awsfirelens_enabled ? [local.awsfirelens_container_definition] : [],
))
dynamic "volume" {
for_each = local.efs_volume_enabled ? ["this"] : []
content {
name = local.name
efs_volume_configuration {
file_system_id = var.efs_volume_id
transit_encryption = "ENABLED"
authorization_config {
access_point_id = var.efs_access_point_id
iam = "ENABLED"
}
}
}
}
}

#======================================================
Expand Down
18 changes: 18 additions & 0 deletions modules/admin/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,24 @@ variable "execution_role_arn" {
type = string
}

variable "efs_volume_id" {
description = "Use in conjunction with var.access_point_id to mount an EFS volume on the app container"
type = string
default = ""
}

variable "efs_access_point_id" {
description = "Use in conjunction with var.efs_volume_id to mount an EFS volume on the app container"
type = string
default = ""
}

variable "efs_mount_point" {
description = "Container path where the EFS volume will be mounted."
type = string
default = ""
}

### Info we need for environment variables
variable "haproxy_domain_name" {
description = "haproxy domain name"
Expand Down
112 changes: 112 additions & 0 deletions modules/bigeye/efs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
resource "aws_security_group" "efs" {
count = var.create_security_groups && local.efs_volume_enabled ? 1 : 0
name = "${local.name}-efs"
vpc_id = local.vpc_id

tags = merge(local.tags, {
Name = local.name
})
}

resource "aws_vpc_security_group_ingress_rule" "efs" {
count = var.create_security_groups && local.efs_volume_enabled ? 1 : 0
description = "Allow NFS"
security_group_id = aws_security_group.efs[0].id
from_port = 2049 # NFS
to_port = 2049 # NFS
ip_protocol = "TCP"
cidr_ipv4 = var.vpc_cidr_block
}

resource "aws_efs_file_system" "this" {
count = local.efs_volume_enabled ? 1 : 0
throughput_mode = "elastic"
encrypted = true
lifecycle_policy {
transition_to_ia = "AFTER_7_DAYS"
}
tags = merge(local.tags, {
Name = local.name
})
}

resource "aws_efs_file_system_policy" "this" {
count = local.efs_volume_enabled ? 1 : 0
file_system_id = aws_efs_file_system.this[0].id
policy = jsonencode(
{
"Version" : "2012-10-17",
"Statement" : [
{
"Sid" : "allow-root-access",
"Effect" : "Allow",
"Principal" : {
"AWS" : "*"
},
"Action" : [
"elasticfilesystem:ClientRootAccess",
"elasticfilesystem:ClientWrite",
"elasticfilesystem:ClientMount"
],
"Resource" : aws_efs_file_system.this[0].arn,
"Condition" : {
"Bool" : {
"elasticfilesystem:AccessedViaMountTarget" : "true"
}
}
},
{
"Sid" : "deny-unencrypted-transport",
"Effect" : "Deny",
"Principal" : {
"AWS" : "*"
},
"Action" : "*",
"Resource" : aws_efs_file_system.this[0].arn,
"Condition" : {
"Bool" : {
"aws:SecureTransport" : "false"
}
}
}
]
}

)
}

resource "aws_efs_mount_target" "this" {
for_each = local.efs_volume_enabled ? toset(local.application_subnet_ids) : []
file_system_id = aws_efs_file_system.this[0].id
subnet_id = each.value
security_groups = concat(var.efs_volume_extra_security_group_ids, [one(aws_security_group.efs[*].id)])
}

resource "aws_efs_access_point" "bigeye_admin" {
count = local.efs_volume_enabled && var.enable_bigeye_admin_module ? 1 : 0
file_system_id = aws_efs_file_system.this[0].id
root_directory {
path = "/"
}
tags = merge(local.tags, {
Name = "${local.name}-bigeye-admin"
app = "bigeye-admin"
})
}

resource "aws_efs_access_point" "this" {
for_each = local.efs_volume_enabled ? toset(var.efs_volume_enabled_services) : []
file_system_id = aws_efs_file_system.this[0].id
root_directory {
creation_info {
owner_gid = 0
owner_uid = 0
permissions = "777"
}
path = "/${each.value}"
}
tags = merge(local.tags, {
Name = "${local.name}-${each.value}"
app = each.value
})
}
1 change: 1 addition & 0 deletions modules/bigeye/locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ locals {
},
var.datawatch_additional_secret_arns,
)
efs_volume_enabled = length(var.efs_volume_enabled_services) > 0

temporal_lb_port = 443
temporal_per_namespace_worker_count = coalesce(var.temporal_per_namespace_worker_count, var.temporal_desired_count * 3)
Expand Down
60 changes: 57 additions & 3 deletions modules/bigeye/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -589,8 +589,7 @@ resource "aws_iam_role_policy" "ecs_execution" {
"ecr:BatchGetImage"
]
Resource = "*"
},
{
}, {
Sid = "AllowCloudWatch"
Effect = "Allow"
Action = [
Expand All @@ -604,7 +603,6 @@ resource "aws_iam_role_policy" "ecs_execution" {
}
]
})

}

resource "aws_iam_role_policy" "ecs_secrets" {
Expand Down Expand Up @@ -659,6 +657,9 @@ module "bigeye_admin" {
execution_role_arn = local.ecs_role_arn
task_iam_role_arn = var.admin_container_ecs_task_role_arn
fargate_version = var.fargate_version
efs_volume_id = local.efs_volume_enabled && var.enable_bigeye_admin_module ? aws_efs_file_system.this[0].id : ""
efs_access_point_id = local.efs_volume_enabled && var.enable_bigeye_admin_module ? aws_efs_access_point.bigeye_admin[0].id : ""
efs_mount_point = var.efs_mount_point

stack_name = local.name

Expand Down Expand Up @@ -921,6 +922,9 @@ module "haproxy" {
image_repository = format("%s%s", "haproxy", var.image_repository_suffix)
image_tag = local.haproxy_image_tag
cloudwatch_log_group_name = aws_cloudwatch_log_group.bigeye.name
efs_volume_id = contains(var.efs_volume_enabled_services, "haproxy") ? aws_efs_file_system.this[0].id : ""
efs_access_point_id = contains(var.efs_volume_enabled_services, "haproxy") ? aws_efs_access_point.this["haproxy"].id : ""
efs_mount_point = var.efs_mount_point

# Datadog
datadog_agent_enabled = var.datadog_agent_enabled
Expand Down Expand Up @@ -1022,6 +1026,9 @@ module "web" {
image_repository = format("%s%s", "web", var.image_repository_suffix)
image_tag = local.web_image_tag
cloudwatch_log_group_name = aws_cloudwatch_log_group.bigeye.name
efs_volume_id = contains(var.efs_volume_enabled_services, "web") ? aws_efs_file_system.this[0].id : ""
efs_access_point_id = contains(var.efs_volume_enabled_services, "web") ? aws_efs_access_point.this["web"].id : ""
efs_mount_point = var.efs_mount_point

# Datadog
datadog_agent_enabled = var.datadog_agent_enabled
Expand Down Expand Up @@ -1247,6 +1254,9 @@ module "monocle" {
image_repository = format("%s%s", "monocle", var.image_repository_suffix)
image_tag = local.monocle_image_tag
cloudwatch_log_group_name = aws_cloudwatch_log_group.bigeye.name
efs_volume_id = contains(var.efs_volume_enabled_services, "monocle") ? aws_efs_file_system.this[0].id : ""
efs_access_point_id = contains(var.efs_volume_enabled_services, "monocle") ? aws_efs_access_point.this["monocle"].id : ""
efs_mount_point = var.efs_mount_point

# Datadog
datadog_agent_enabled = var.datadog_agent_enabled
Expand Down Expand Up @@ -1383,6 +1393,9 @@ module "toretto" {
image_repository = format("%s%s", "toretto", var.image_repository_suffix)
image_tag = local.toretto_image_tag
cloudwatch_log_group_name = aws_cloudwatch_log_group.bigeye.name
efs_volume_id = contains(var.efs_volume_enabled_services, "toretto") ? aws_efs_file_system.this[0].id : ""
efs_access_point_id = contains(var.efs_volume_enabled_services, "toretto") ? aws_efs_access_point.this["toretto"].id : ""
efs_mount_point = var.efs_mount_point

# This can be removed when toretto handles sigterm better
stop_timeout = 10
Expand Down Expand Up @@ -1554,6 +1567,9 @@ module "scheduler" {
image_repository = format("%s%s", "scheduler", var.image_repository_suffix)
image_tag = local.scheduler_image_tag
cloudwatch_log_group_name = aws_cloudwatch_log_group.bigeye.name
efs_volume_id = contains(var.efs_volume_enabled_services, "scheduler") ? aws_efs_file_system.this[0].id : ""
efs_access_point_id = contains(var.efs_volume_enabled_services, "scheduler") ? aws_efs_access_point.this["scheduler"].id : ""
efs_mount_point = var.efs_mount_point

# Datadog
datadog_agent_enabled = var.datadog_agent_enabled
Expand Down Expand Up @@ -1756,6 +1772,32 @@ resource "aws_iam_role_policy" "datawatch_ecs_exec" {
})
}

resource "aws_iam_role_policy" "datawatch_efs" {
count = local.create_datawatch_role && local.efs_volume_enabled ? 1 : 0
role = aws_iam_role.datawatch[0].id
name = "AllowECSExec"
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
"Effect" : "Allow",
"Action" : [
"elasticfilesystem:ClientMount",
"elasticfilesystem:ClientWrite"
],
"Resource" : "arn:aws:elasticfilesystem:${local.aws_region}:${local.aws_account_id}:file-system/*"
Condition = {
StringEquals = {
"aws:ResourceTag/stack" = local.name
}
}
}
]
})
}



#======================================================
# Datawatch - Redis
#======================================================
Expand Down Expand Up @@ -2065,6 +2107,9 @@ module "datawatch" {
image_repository = format("%s%s", "datawatch", var.image_repository_suffix)
image_tag = local.datawatch_image_tag
cloudwatch_log_group_name = aws_cloudwatch_log_group.bigeye.name
efs_volume_id = contains(var.efs_volume_enabled_services, "datawatch") ? aws_efs_file_system.this[0].id : ""
efs_access_point_id = contains(var.efs_volume_enabled_services, "datawatch") ? aws_efs_access_point.this["datawatch"].id : ""
efs_mount_point = var.efs_mount_point

# Datadog
datadog_agent_enabled = var.datadog_agent_enabled
Expand Down Expand Up @@ -2139,6 +2184,9 @@ module "datawork" {
image_tag = local.datawork_image_tag
cloudwatch_log_group_name = aws_cloudwatch_log_group.bigeye.name
stop_timeout = 120
efs_volume_id = contains(var.efs_volume_enabled_services, "datawork") ? aws_efs_file_system.this[0].id : ""
efs_access_point_id = contains(var.efs_volume_enabled_services, "datawork") ? aws_efs_access_point.this["datawork"].id : ""
efs_mount_point = var.efs_mount_point

# Datadog
datadog_agent_enabled = var.datadog_agent_enabled
Expand Down Expand Up @@ -2215,6 +2263,9 @@ module "lineagework" {
image_tag = local.lineagework_image_tag
cloudwatch_log_group_name = aws_cloudwatch_log_group.bigeye.name
stop_timeout = 120
efs_volume_id = contains(var.efs_volume_enabled_services, "lineagework") ? aws_efs_file_system.this[0].id : ""
efs_access_point_id = contains(var.efs_volume_enabled_services, "lineagework") ? aws_efs_access_point.this["lineagework"].id : ""
efs_mount_point = var.efs_mount_point

# Datadog
datadog_agent_enabled = var.datadog_agent_enabled
Expand Down Expand Up @@ -2293,6 +2344,9 @@ module "metricwork" {
image_tag = local.metricwork_image_tag
cloudwatch_log_group_name = aws_cloudwatch_log_group.bigeye.name
stop_timeout = 120
efs_volume_id = contains(var.efs_volume_enabled_services, "metricwork") ? aws_efs_file_system.this[0].id : ""
efs_access_point_id = contains(var.efs_volume_enabled_services, "metricwork") ? aws_efs_access_point.this["metricwork"].id : ""
efs_mount_point = var.efs_mount_point

# Datadog
datadog_agent_enabled = var.datadog_agent_enabled
Expand Down
21 changes: 21 additions & 0 deletions modules/bigeye/temporal.tf
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,20 @@ resource "aws_ecs_task_definition" "temporal_components" {
var.datadog_agent_enabled ? [local.temporal_component_datadog_container_def[each.key]] : [],
var.awsfirelens_enabled && var.temporal_logging_enabled ? [local.temporal_component_awsfirelens_container_def[each.key]] : [],
))
dynamic "volume" {
for_each = contains(var.efs_volume_enabled_services, "temporal-${local.temporal_svc_override_names[each.key]}") ? ["temporal-${local.temporal_svc_override_names[each.key]}"] : []
content {
name = "${local.name}-${each.value}"
efs_volume_configuration {
file_system_id = aws_efs_file_system.this[each.value].id
transit_encryption = "ENABLED"
authorization_config {
access_point_id = aws_efs_access_point.this[each.value].id
iam = "ENABLED"
}
}
}
}
}

resource "aws_ecs_service" "temporal_components" {
Expand Down Expand Up @@ -516,6 +530,10 @@ locals {
}
) : {}
environment = [for k, v in local.temporal_component_env_vars[svc] : { Name = k, Value = v }]
mountPoints = contains(var.efs_volume_enabled_services, "temporal-${local.temporal_svc_override_names[svc]}") ? [{
containerPath : var.efs_mount_point,
sourceVolume : "temporal-${local.temporal_svc_override_names[svc]}",
}] : []
}
)
}
Expand Down Expand Up @@ -678,6 +696,9 @@ module "temporalui" {
image_repository = format("%s%s", "temporalui", var.image_repository_suffix)
image_tag = local.temporalui_image_tag
cloudwatch_log_group_name = aws_cloudwatch_log_group.temporal.name
efs_volume_id = contains(var.efs_volume_enabled_services, "temporalui") ? aws_efs_file_system.this[0].id : ""
efs_access_point_id = contains(var.efs_volume_enabled_services, "temporalui") ? aws_efs_access_point.this["temporalui"].id : ""
efs_mount_point = var.efs_mount_point

# Datadog
datadog_agent_enabled = var.datadog_agent_enabled
Expand Down
Loading

0 comments on commit 569bb5c

Please sign in to comment.