Skip to content

Commit

Permalink
feat: Update to EKS module v20
Browse files Browse the repository at this point in the history
  • Loading branch information
bryantbiggs committed Feb 3, 2024
1 parent fb52229 commit 2c94910
Show file tree
Hide file tree
Showing 25 changed files with 97 additions and 372 deletions.
6 changes: 4 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/antonbabenko/pre-commit-terraform
rev: v1.81.0
rev: v1.86.0
hooks:
- id: terraform_fmt
- id: terraform_validate
Expand All @@ -18,9 +18,11 @@ repos:
- '--args=--only=terraform_naming_convention'
- '--args=--only=terraform_required_version'
- '--args=--only=terraform_required_providers'
- '--args=--only=terraform_unused_required_providers'
- '--args=--only=terraform_workspace_remote'
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v4.5.0
hooks:
- id: check-merge-conflict
- id: end-of-file-fixer
- id: trailing-whitespace
4 changes: 2 additions & 2 deletions eks-managed-node-group/eks_al2.tf
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
module "eks_al2" {
source = "terraform-aws-modules/eks/aws"
version = "~> 19.15"
version = "~> 20.0"

cluster_name = "${local.name}-al2"
cluster_version = "1.27"
cluster_version = "1.29"

# EKS Addons
cluster_addons = {
Expand Down
4 changes: 2 additions & 2 deletions eks-managed-node-group/eks_bottlerocket.tf
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
module "eks_bottlerocket" {
source = "terraform-aws-modules/eks/aws"
version = "~> 19.15"
version = "~> 20.0"

cluster_name = "${local.name}-br"
cluster_version = "1.27"
cluster_version = "1.29"

# EKS Addons
cluster_addons = {
Expand Down
4 changes: 2 additions & 2 deletions eks-managed-node-group/eks_default.tf
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
module "eks_default" {
source = "terraform-aws-modules/eks/aws"
version = "~> 19.15"
version = "~> 20.0"

cluster_name = "${local.name}-default"
cluster_version = "1.27"
cluster_version = "1.29"

# EKS Addons
cluster_addons = {
Expand Down
6 changes: 2 additions & 4 deletions ephemeral-vol-test/eks.tf
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
module "eks" {
source = "terraform-aws-modules/eks/aws"
version = "~> 19.15"

# create = false
version = "~> 20.0"

cluster_name = local.name
cluster_version = "1.27"
cluster_version = "1.29"

cluster_endpoint_public_access = true

Expand Down
24 changes: 3 additions & 21 deletions inferentia/eks.tf
Original file line number Diff line number Diff line change
@@ -1,43 +1,25 @@
locals {
sso_path = "/aws-reserved/sso.amazonaws.com/"
plugin_name = "neuron-device-plugin"

inferentia_instance_classes = ["inf1.xlarge", "inf1.2xlarge", "inf1.6xlarge", "inf1.4xlarge"]
}

data "aws_iam_roles" "sso_admin" {
name_regex = "AWSReservedSSO_AWSAdministratorAccess_.*"
path_prefix = local.sso_path
}

################################################################################
# EKS Cluster
################################################################################

module "eks" {
source = "terraform-aws-modules/eks/aws"
version = "~> 19.15"
version = "~> 20.0"

cluster_name = local.name
cluster_version = "1.27"
cluster_version = "1.29"

cluster_endpoint_public_access = true

vpc_id = module.vpc.vpc_id
subnet_ids = module.vpc.private_subnets

enable_irsa = true

manage_aws_auth_configmap = true
aws_auth_roles = [
{
# Need to strip path -> https://github.com/kubernetes-sigs/aws-iam-authenticator/issues/268
rolearn = replace(one(data.aws_iam_roles.sso_admin.arns), "/${local.sso_path}/", "/")
username = "sso_admin"
groups = ["system:masters"]
},
]

eks_managed_node_groups = {
inf1 = {
ami_type = "AL2_x86_64_GPU"
Expand Down Expand Up @@ -183,7 +165,7 @@ resource "kubernetes_daemon_set_v1" "neuron_device" {

container {
# https://gallery.ecr.aws/neuron/neuron-device-plugin
image = "public.ecr.aws/neuron/neuron-device-plugin:2.17.3.0"
image = "public.ecr.aws/neuron/neuron-device-plugin:2.19.16.0"
name = local.plugin_name
image_pull_policy = "Always"

Expand Down
4 changes: 2 additions & 2 deletions inferentia/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ terraform {
version = ">= 2.20"
}
kubectl = {
source = "gavinbunney/kubectl"
version = ">= 1.14"
source = "alekc/kubectl"
version = ">= 2.0"
}
}

Expand Down
4 changes: 2 additions & 2 deletions ipv4-prefix-delegation/eks.tf
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
module "eks" {
source = "terraform-aws-modules/eks/aws"
version = "~> 19.15"
version = "~> 20.0"

cluster_name = local.name
cluster_version = "1.27"
cluster_version = "1.29"

cluster_endpoint_public_access = true

Expand Down
4 changes: 2 additions & 2 deletions ipvs/eks.tf
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
module "eks" {
source = "terraform-aws-modules/eks/aws"
version = "~> 19.15"
version = "~> 20.0"

cluster_name = local.name
cluster_version = "1.27"
cluster_version = "1.29"

cluster_endpoint_public_access = true

Expand Down
2 changes: 1 addition & 1 deletion karpenter/alb_controller.tf
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ resource "helm_release" "alb_controller" {
name = "aws-load-balancer-controller"
repository = "https://aws.github.io/eks-charts"
chart = "aws-load-balancer-controller"
version = "1.6.1"
version = "1.7.0"

set {
name = "serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn"
Expand Down
17 changes: 2 additions & 15 deletions karpenter/eks.tf
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
module "eks" {
source = "terraform-aws-modules/eks/aws"
version = "~> 19.15"
version = "~> 20.0"

cluster_name = local.name
cluster_version = "1.27"
cluster_version = "1.29"

cluster_endpoint_public_access = true

Expand Down Expand Up @@ -45,19 +45,6 @@ module "eks" {
create_cluster_security_group = false
create_node_security_group = false

manage_aws_auth_configmap = true
aws_auth_roles = [
# We need to add in the Karpenter node IAM role for nodes launched by Karpenter
{
rolearn = module.karpenter.role_arn
username = "system:node:{{EC2PrivateDNSName}}"
groups = [
"system:bootstrappers",
"system:nodes",
]
},
]

fargate_profiles = {
karpenter = {
selectors = [
Expand Down
105 changes: 56 additions & 49 deletions karpenter/karpenter.tf
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@

module "karpenter" {
source = "terraform-aws-modules/eks/aws//modules/karpenter"
version = "~> 19.15"
version = "~> 20.0"

cluster_name = module.eks.cluster_name
cluster_name = module.eks.cluster_name

# EKS Fargate currently does not support Pod Identity
enable_irsa = true
irsa_oidc_provider_arn = module.eks.oidc_provider_arn

tags = module.tags.tags
Expand All @@ -17,86 +20,90 @@ module "karpenter" {
################################################################################

resource "helm_release" "karpenter" {
namespace = "karpenter"
create_namespace = true

namespace = "karpenter"
create_namespace = true
name = "karpenter"
repository = "oci://public.ecr.aws/karpenter"
repository_username = data.aws_ecrpublic_authorization_token.token.user_name
repository_password = data.aws_ecrpublic_authorization_token.token.password
chart = "karpenter"
version = "v0.30.0"
version = "v0.33.1"
wait = false

# Memory request/limit set to maximize the capacity provisioned by Fargate
# 2G allocated - 256Mb overhead = 1792Mb
values = [
<<-EOT
controller:
resources:
requests:
memory: "1792M"
limits:
memory: "1792M"
nodeSelector:
eks.amazonaws.com/compute-type: fargate
settings:
aws:
clusterName: ${module.eks.cluster_name}
clusterEndpoint: ${module.eks.cluster_endpoint}
defaultInstanceProfile: ${module.karpenter.instance_profile_name}
interruptionQueueName: ${module.karpenter.queue_name}
serviceAccount:
annotations:
eks.amazonaws.com/role-arn: ${module.karpenter.irsa_arn}
settings:
clusterName: ${module.eks.cluster_name}
clusterEndpoint: ${module.eks.cluster_endpoint}
interruptionQueue: ${module.karpenter.queue_name}
serviceAccount:
annotations:
eks.amazonaws.com/role-arn: ${module.karpenter.iam_role_arn}
EOT
]
}

################################################################################
# Karpenter Provisioner
# Karpenter - NodeClass & NodePool
################################################################################

resource "kubectl_manifest" "karpenter_provisioner" {
resource "kubectl_manifest" "karpenter_node_class" {
yaml_body = <<-YAML
apiVersion: karpenter.sh/v1alpha5
kind: Provisioner
apiVersion: karpenter.k8s.aws/v1beta1
kind: EC2NodeClass
metadata:
name: default
spec:
requirements:
- key: karpenter.sh/capacity-type
operator: In
values: ["spot"]
limits:
resources:
cpu: 1000
providerRef:
name: default
ttlSecondsAfterEmpty: 30
amiFamily: AL2
role: ${module.karpenter.node_iam_role_name}
subnetSelectorTerms:
- tags:
karpenter.sh/discovery: ${module.eks.cluster_name}
securityGroupSelectorTerms:
- tags:
karpenter.sh/discovery: ${module.eks.cluster_name}
tags:
karpenter.sh/discovery: ${module.eks.cluster_name}
YAML

depends_on = [
helm_release.karpenter
]
}

resource "kubectl_manifest" "karpenter_node_template" {
resource "kubectl_manifest" "karpenter_node_pool" {
yaml_body = <<-YAML
apiVersion: karpenter.k8s.aws/v1alpha1
kind: AWSNodeTemplate
apiVersion: karpenter.sh/v1beta1
kind: NodePool
metadata:
name: default
spec:
subnetSelector:
karpenter.sh/discovery: ${module.eks.cluster_name}
securityGroupSelector:
karpenter.sh/discovery: ${module.eks.cluster_name}
tags:
karpenter.sh/discovery: ${module.eks.cluster_name}
template:
spec:
nodeClassRef:
name: default
requirements:
- key: "karpenter.k8s.aws/instance-category"
operator: In
values: ["c", "m", "r"]
- key: "karpenter.k8s.aws/instance-cpu"
operator: In
values: ["4", "8", "16", "32"]
- key: "karpenter.k8s.aws/instance-hypervisor"
operator: In
values: ["nitro"]
- key: "karpenter.k8s.aws/instance-generation"
operator: Gt
values: ["2"]
limits:
cpu: 1000
disruption:
consolidationPolicy: WhenEmpty
consolidateAfter: 30s
YAML

depends_on = [
helm_release.karpenter
kubectl_manifest.karpenter_node_class
]
}

Expand Down
20 changes: 2 additions & 18 deletions karpenter/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,8 @@ terraform {
version = "~> 2.6"
}
kubectl = {
source = "gavinbunney/kubectl"
version = "~> 1.14"
}
kubernetes = {
source = "hashicorp/kubernetes"
version = ">= 2.20"
source = "alekc/kubectl"
version = ">= 2.0"
}
}

Expand All @@ -39,18 +35,6 @@ provider "aws" {
# }
}

provider "kubernetes" {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)

exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}

provider "helm" {
kubernetes {
host = module.eks.cluster_endpoint
Expand Down
Loading

0 comments on commit 2c94910

Please sign in to comment.