diff --git a/patterns/istio/index.html b/patterns/istio/index.html index 5023ecc02f..f3ad7c31b1 100644 --- a/patterns/istio/index.html +++ b/patterns/istio/index.html @@ -1172,57 +1172,60 @@
See here for the prerequisites and steps to deploy this pattern.
+Once the resources have been provisioned, you will need to replace the istio-ingress
pods due to a istiod
dependency issue. Use the following command to perform a rolling restart of the istio-ingress
pods:
Use the following code snippet to add the Istio Observability Add-ons on the EKS cluster with deployed Istio.
-for ADDON in kiali jaeger prometheus grafana
-do
- ADDON_URL="https://raw.githubusercontent.com/istio/istio/release-1.18/samples/addons/$ADDON.yaml"
- kubectl apply -f $ADDON_URL
-done
+for ADDON in kiali jaeger prometheus grafana
+do
+ ADDON_URL="https://raw.githubusercontent.com/istio/istio/release-1.18/samples/addons/$ADDON.yaml"
+ kubectl apply -f $ADDON_URL
+done
Validate¶
-
List out all pods and services in the istio-system
namespace:
-kubectl get pods,svc -n istio-system
-kubectl get pods,svc -n istio-ingress
+
-NAME READY STATUS RESTARTS AGE
-pod/grafana-7d4f5589fb-4xj9m 1/1 Running 0 4m14s
-pod/istiod-ff577f8b8-c8ssk 1/1 Running 0 4m40s
-pod/jaeger-58c79c85cd-n7bkx 1/1 Running 0 4m14s
-pod/kiali-749d76d7bb-8kjg7 1/1 Running 0 4m14s
-pod/prometheus-5d5d6d6fc-sptxl 2/2 Running 0 4m15s
-
-NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
-service/grafana ClusterIP 172.20.141.12 <none> 3000/TCP 4m14s
-service/istiod ClusterIP 172.20.172.70 <none> 15010/TCP,15012/TCP,443/TCP,15014/TCP 4m40s
-service/jaeger-collector ClusterIP 172.20.223.28 <none> 14268/TCP,14250/TCP,9411/TCP 4m15s
-service/kiali ClusterIP 172.20.182.231 <none> 20001/TCP,9090/TCP 4m15s
-service/prometheus ClusterIP 172.20.89.64 <none> 9090/TCP 4m14s
-service/tracing ClusterIP 172.20.253.201 <none> 80/TCP,16685/TCP 4m14s
-service/zipkin ClusterIP 172.20.221.157 <none> 9411/TCP 4m15s
-
-NAME READY STATUS RESTARTS AGE
-pod/istio-ingress-6f7c5dffd8-glszr 1/1 Running 0 4m28s
-
-NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
-service/istio-ingress LoadBalancer 172.20.104.27 k8s-istioing-istioing-844c89b6c2-875b8c9a4b4e9365.elb.us-west-2.amazonaws.com 15021:32760/TCP,80:31496/TCP,443:32534/TCP 4m28s
+NAME READY STATUS RESTARTS AGE
+pod/grafana-7d4f5589fb-4xj9m 1/1 Running 0 4m14s
+pod/istiod-ff577f8b8-c8ssk 1/1 Running 0 4m40s
+pod/jaeger-58c79c85cd-n7bkx 1/1 Running 0 4m14s
+pod/kiali-749d76d7bb-8kjg7 1/1 Running 0 4m14s
+pod/prometheus-5d5d6d6fc-sptxl 2/2 Running 0 4m15s
+
+NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
+service/grafana ClusterIP 172.20.141.12 <none> 3000/TCP 4m14s
+service/istiod ClusterIP 172.20.172.70 <none> 15010/TCP,15012/TCP,443/TCP,15014/TCP 4m40s
+service/jaeger-collector ClusterIP 172.20.223.28 <none> 14268/TCP,14250/TCP,9411/TCP 4m15s
+service/kiali ClusterIP 172.20.182.231 <none> 20001/TCP,9090/TCP 4m15s
+service/prometheus ClusterIP 172.20.89.64 <none> 9090/TCP 4m14s
+service/tracing ClusterIP 172.20.253.201 <none> 80/TCP,16685/TCP 4m14s
+service/zipkin ClusterIP 172.20.221.157 <none> 9411/TCP 4m15s
+
+NAME READY STATUS RESTARTS AGE
+pod/istio-ingress-6f7c5dffd8-glszr 1/1 Running 0 4m28s
+
+NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
+service/istio-ingress LoadBalancer 172.20.104.27 k8s-istioing-istioing-844c89b6c2-875b8c9a4b4e9365.elb.us-west-2.amazonaws.com 15021:32760/TCP,80:31496/TCP,443:32534/TCP 4m28s
-
Verify all the Helm releases installed in the istio-system
and istio-ingress
namespaces:
-helm list -n istio-system
+
-NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION
-istio-base istio-system 1 2023-07-19 11:05:41.599921 -0700 PDT deployed base-1.18.1 1.18.1
-istiod istio-system 1 2023-07-19 11:05:48.087616 -0700 PDT deployed istiod-1.18.1 1.18.1
+NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION
+istio-base istio-system 1 2023-07-19 11:05:41.599921 -0700 PDT deployed base-1.18.1 1.18.1
+istiod istio-system 1 2023-07-19 11:05:48.087616 -0700 PDT deployed istiod-1.18.1 1.18.1
-helm list -n istio-ingress
+
-NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION
-istio-ingress istio-ingress 1 2023-07-19 11:06:03.41609 -0700 PDT deployed gateway-1.18.1 1.18.1
+NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION
+istio-ingress istio-ingress 1 2023-07-19 11:06:03.41609 -0700 PDT deployed gateway-1.18.1 1.18.1
@@ -1231,187 +1234,187 @@ Observability Add-onshttp://localhost:\ where <port>
is one of the
port number for the corresponding service.
-# Visualize Istio Mesh console using Kiali
-kubectl port-forward svc/kiali 20001:20001 -n istio-system
-
-# Get to the Prometheus UI
-kubectl port-forward svc/prometheus 9090:9090 -n istio-system
-
-# Visualize metrics in using Grafana
-kubectl port-forward svc/grafana 3000:3000 -n istio-system
-
-# Visualize application traces via Jaeger
-kubectl port-forward svc/jaeger 16686:16686 -n istio-system
+# Visualize Istio Mesh console using Kiali
+kubectl port-forward svc/kiali 20001:20001 -n istio-system
+
+# Get to the Prometheus UI
+kubectl port-forward svc/prometheus 9090:9090 -n istio-system
+
+# Visualize metrics in using Grafana
+kubectl port-forward svc/grafana 3000:3000 -n istio-system
+
+# Visualize application traces via Jaeger
+kubectl port-forward svc/jaeger 16686:16686 -n istio-system
Example¶
-
Create the sample
namespace and enable the sidecar injection on it
-kubectl create namespace sample
-kubectl label namespace sample istio-injection=enabled
+
-namespace/sample created
-namespace/sample labeled
+
-
Deploy helloworld
app
-cat <<EOF > helloworld.yaml
-apiVersion: v1
-kind: Service
-metadata:
- name: helloworld
- labels:
- app: helloworld
- service: helloworld
-spec:
- ports:
- - port: 5000
- name: http
- selector:
- app: helloworld
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
- name: helloworld-v1
- labels:
- app: helloworld
- version: v1
-spec:
- replicas: 1
- selector:
- matchLabels:
- app: helloworld
- version: v1
- template:
- metadata:
- labels:
- app: helloworld
- version: v1
- spec:
- containers:
- - name: helloworld
- image: docker.io/istio/examples-helloworld-v1
- resources:
- requests:
- cpu: "100m"
- imagePullPolicy: IfNotPresent #Always
- ports:
- - containerPort: 5000
-EOF
-
-kubectl apply -f helloworld.yaml -n sample
+cat <<EOF > helloworld.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ name: helloworld
+ labels:
+ app: helloworld
+ service: helloworld
+spec:
+ ports:
+ - port: 5000
+ name: http
+ selector:
+ app: helloworld
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: helloworld-v1
+ labels:
+ app: helloworld
+ version: v1
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app: helloworld
+ version: v1
+ template:
+ metadata:
+ labels:
+ app: helloworld
+ version: v1
+ spec:
+ containers:
+ - name: helloworld
+ image: docker.io/istio/examples-helloworld-v1
+ resources:
+ requests:
+ cpu: "100m"
+ imagePullPolicy: IfNotPresent #Always
+ ports:
+ - containerPort: 5000
+EOF
+
+kubectl apply -f helloworld.yaml -n sample
-service/helloworld created
-deployment.apps/helloworld-v1 created
+
-
Deploy sleep
app that we will use to connect to helloworld
app
-cat <<EOF > sleep.yaml
-apiVersion: v1
-kind: ServiceAccount
-metadata:
- name: sleep
----
-apiVersion: v1
-kind: Service
-metadata:
- name: sleep
- labels:
- app: sleep
- service: sleep
-spec:
- ports:
- - port: 80
- name: http
- selector:
- app: sleep
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
- name: sleep
-spec:
- replicas: 1
- selector:
- matchLabels:
- app: sleep
- template:
- metadata:
- labels:
- app: sleep
- spec:
- terminationGracePeriodSeconds: 0
- serviceAccountName: sleep
- containers:
- - name: sleep
- image: curlimages/curl
- command: ["/bin/sleep", "infinity"]
- imagePullPolicy: IfNotPresent
- volumeMounts:
- - mountPath: /etc/sleep/tls
- name: secret-volume
- volumes:
- - name: secret-volume
- secret:
- secretName: sleep-secret
- optional: true
-EOF
-
-kubectl apply -f sleep.yaml -n sample
+cat <<EOF > sleep.yaml
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+ name: sleep
+---
+apiVersion: v1
+kind: Service
+metadata:
+ name: sleep
+ labels:
+ app: sleep
+ service: sleep
+spec:
+ ports:
+ - port: 80
+ name: http
+ selector:
+ app: sleep
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: sleep
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app: sleep
+ template:
+ metadata:
+ labels:
+ app: sleep
+ spec:
+ terminationGracePeriodSeconds: 0
+ serviceAccountName: sleep
+ containers:
+ - name: sleep
+ image: curlimages/curl
+ command: ["/bin/sleep", "infinity"]
+ imagePullPolicy: IfNotPresent
+ volumeMounts:
+ - mountPath: /etc/sleep/tls
+ name: secret-volume
+ volumes:
+ - name: secret-volume
+ secret:
+ secretName: sleep-secret
+ optional: true
+EOF
+
+kubectl apply -f sleep.yaml -n sample
-serviceaccount/sleep created
-service/sleep created
-deployment.apps/sleep created
+
-
Check all the pods in the sample
namespace
-kubectl get pods -n sample
+
-NAME READY STATUS RESTARTS AGE
-helloworld-v1-b6c45f55-bx2xk 2/2 Running 0 50s
-sleep-9454cc476-p2zxr 2/2 Running 0 15s
+NAME READY STATUS RESTARTS AGE
+helloworld-v1-b6c45f55-bx2xk 2/2 Running 0 50s
+sleep-9454cc476-p2zxr 2/2 Running 0 15s
-
Connect to helloworld
app from sleep
app and verify if the connection uses envoy proxy
-kubectl exec -n sample -c sleep \
- "$(kubectl get pod -n sample -l \
- app=sleep -o jsonpath='{.items[0].metadata.name}')" \
- -- curl -v helloworld.sample:5000/hello
+kubectl exec -n sample -c sleep \
+ "$(kubectl get pod -n sample -l \
+ app=sleep -o jsonpath='{.items[0].metadata.name}')" \
+ -- curl -v helloworld.sample:5000/hello
-* processing: helloworld.sample:5000/hello
-% Total % Received % Xferd Average Speed Time Time Time Current
- Dload Upload Total Spent Left Speed
-0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0* Trying 172.20.26.38:5000...
-* Connected to helloworld.sample (172.20.26.38) port 5000
-> GET /hello HTTP/1.1
-> Host: helloworld.sample:5000
-> User-Agent: curl/8.2.0
-> Accept: */*
->
-< HTTP/1.1 200 OK
-< server: envoy
-< date: Fri, 21 Jul 2023 18:56:09 GMT
-< content-type: text/html; charset=utf-8
-< content-length: 58
-< x-envoy-upstream-service-time: 142
-<
-{ [58 bytes data]
-100 58 100 58 Hello version: v1, instance: helloworld-v1-b6c45f55-h592c
-0 0 392 0 --:--:-- --:--:-- --:--:-- 394
-* Connection #0 to host helloworld.sample left intact
+* processing: helloworld.sample:5000/hello
+% Total % Received % Xferd Average Speed Time Time Time Current
+ Dload Upload Total Spent Left Speed
+0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0* Trying 172.20.26.38:5000...
+* Connected to helloworld.sample (172.20.26.38) port 5000
+> GET /hello HTTP/1.1
+> Host: helloworld.sample:5000
+> User-Agent: curl/8.2.0
+> Accept: */*
+>
+< HTTP/1.1 200 OK
+< server: envoy
+< date: Fri, 21 Jul 2023 18:56:09 GMT
+< content-type: text/html; charset=utf-8
+< content-length: 58
+< x-envoy-upstream-service-time: 142
+<
+{ [58 bytes data]
+100 58 100 58 Hello version: v1, instance: helloworld-v1-b6c45f55-h592c
+0 0 392 0 --:--:-- --:--:-- --:--:-- 394
+* Connection #0 to host helloworld.sample left intact
Destroy¶
-terraform destroy -target="module.eks_blueprints_addons" -auto-approve
-terraform destroy -target="module.eks" -auto-approve
-terraform destroy -auto-approve
+terraform destroy -target="module.eks_blueprints_addons" -auto-approve
+terraform destroy -target="module.eks" -auto-approve
+terraform destroy -auto-approve
See here for more details on cleaning up the resources created.
diff --git a/search/search_index.json b/search/search_index.json
index be556fa4b8..986835079f 100644
--- a/search/search_index.json
+++ b/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Overview","text":""},{"location":"#amazon-eks-blueprints-for-terraform","title":"Amazon EKS Blueprints for Terraform","text":"Welcome to Amazon EKS Blueprints for Terraform!
This project contains a collection of Amazon EKS cluster patterns implemented in Terraform that demonstrate how fast and easy it is for customers to adopt Amazon EKS. The patterns can be used by AWS customers, partners, and internal AWS teams to configure and manage complete EKS clusters that are fully bootstrapped with the operational software that is needed to deploy and operate workloads.
"},{"location":"#motivation","title":"Motivation","text":"Kubernetes is a powerful and extensible container orchestration technology that allows you to deploy and manage containerized applications at scale. The extensible nature of Kubernetes also allows you to use a wide range of popular open-source tools in Kubernetes clusters. However, With the wide array of tooling and design choices available, configuring an EKS cluster that meets your organization\u2019s specific needs can take a significant amount of time. It involves integrating a wide range of open-source tools and AWS services as well as expertise in AWS and Kubernetes.
AWS customers have asked for patterns that demonstrate how to integrate the landscape of Kubernetes tools and make it easy for them to provision complete, opinionated EKS clusters that meet specific application requirements. Customers can utilize EKS Blueprints to configure and deploy purpose built EKS clusters, and start onboarding workloads in days, rather than months.
"},{"location":"#consumption","title":"Consumption","text":"EKS Blueprints for Terraform has been designed to be consumed in the following manners:
- Reference: Users can refer to the patterns and snippets provided to help guide them to their desired solution. Users will typically view how the pattern or snippet is configured to achieve the desired end result and then replicate that in their environment.
- Copy & Paste: Users can copy and paste the patterns and snippets into their own environment, using EKS Blueprints as the starting point for their implementation. Users can then adapt the initial pattern to customize it to their specific needs.
EKS Blueprints for Terraform are not intended to be consumed as-is directly from this project. In \"Terraform speak\" - the patterns and snippets provided in this repository are not designed to be consumed as a Terraform module. Therefore, the patterns provided only contain variables
when certain information is required to deploy the pattern (i.e. - a Route53 hosted zone ID, or ACM certificate ARN) and generally use local variables. If you wish to deploy the patterns into a different region or with other changes, it is recommended that you make those modifications locally before applying the pattern. EKS Blueprints for Terraform will not expose variables and outputs in the same manner that Terraform modules follow in order to avoid confusion around the consumption model.
However, we do have a number of Terraform modules that were created to support EKS Blueprints in addition to the community hosted modules. Please see the respective projects for more details on the modules constructed to support EKS Blueprints for Terraform; those projects are listed below.
terraform-aws-eks-blueprint-addon
- (Note the singular form) Terraform module which can provision an addon using the Terraform helm_release
resource in addition to an IAM role for service account (IRSA). terraform-aws-eks-blueprint-addons
- (Note the plural form) Terraform module which can provision multiple addons; both EKS addons using the aws_eks_addon
resource as well as Helm chart based addons using the terraform-aws-eks-blueprint-addon
module. terraform-aws-eks-blueprints-teams
- Terraform module that creates Kubernetes multi-tenancy resources and configurations, allowing both administrators and application developers to access only the resources which they are responsible for.
"},{"location":"#related-projects","title":"Related Projects","text":"In addition to the supporting EKS Blueprints Terraform modules listed above, there are a number of related projects that users should be aware of:
-
GitOps
terraform-aws-eks-ack-addons
- Terraform module to deploy ACK controllers onto EKS clusters crossplane-on-eks
- Crossplane Blueprints is an open source repo to bootstrap Amazon EKS clusters and provision AWS resources using a library of Crossplane Compositions (XRs) with Composite Resource Definitions (XRDs).
-
Data on EKS
data-on-eks
- A collection of blueprints intended for data workloads on Amazon EKS. terraform-aws-eks-data-addons
- Terraform module to deploy multiple addons that are specific to data workloads on EKS clusters.
-
Observability Accelerator
terraform-aws-observability-accelerator
- A set of opinionated modules to help you set up observability for your AWS environments with AWS-managed observability services such as Amazon Managed Service for Prometheus, Amazon Managed Grafana, AWS Distro for OpenTelemetry (ADOT) and Amazon CloudWatch
"},{"location":"#terraform-caveats","title":"Terraform Caveats","text":"EKS Blueprints for Terraform does not intend to teach users the recommended practices for Terraform nor does it offer guidance on how users should structure their Terraform projects. The patterns provided are intended to show users how they can achieve a defined architecture or configuration in a way that they can quickly and easily get up and running to start interacting with that pattern. Therefore, there are a few caveats users should be aware of when using EKS Blueprints for Terraform:
-
We recognize that most users will already have an existing VPC in a separate Terraform workspace. However, the patterns provided come complete with a VPC to ensure a stable, deployable example that has been tested and validated.
-
Hashicorp does not recommend providing computed values in provider blocks , which means that the cluster configuration should be defined in a workspace separate from the resources deployed onto the cluster (i.e. - addons). However, to simplify the pattern experience, we have defined everything in one workspace and provided instructions to provision the patterns using a targeted apply approach. Users are encouraged to investigate a Terraform project structure that suites their needs; EKS Blueprints for Terraform does not have an opinion in this matter and will defer to Hashicorp's guidance.
-
Patterns are not intended to be consumed in-place in the same manner that one would consume a module. Therefore, we do not provide variables and outputs to expose various levels of configuration for the examples. Users can modify the pattern locally after cloning to suite their requirements.
-
Please see the FAQ section on authenticating Kubernetes based providers (kubernetes
, helm
, kubectl
) to Amazon EKS clusters regarding the use of static tokens versus dynamic tokens using the awscli
.
"},{"location":"#support-feedback","title":"Support & Feedback","text":"EKS Blueprints for Terraform is maintained by AWS Solution Architects. It is not part of an AWS service and support is provided as a best-effort by the EKS Blueprints community. To provide feedback, please use the issues templates provided. If you are interested in contributing to EKS Blueprints, see the Contribution guide.
"},{"location":"#security","title":"Security","text":"See CONTRIBUTING for more information.
"},{"location":"#license","title":"License","text":"Apache-2.0 Licensed. See LICENSE.
"},{"location":"faq/","title":"Frequently Asked Questions","text":""},{"location":"faq/#timeouts-on-destroy","title":"Timeouts on destroy","text":"Customers who are deleting their environments using terraform destroy
may see timeout errors when VPCs are being deleted. This is due to a known issue in the vpc-cni
Customers may face a situation where ENIs that were attached to EKS managed nodes (same may apply to self-managed nodes) are not being deleted by the VPC CNI as expected which leads to IaC tool failures, such as:
- ENIs are left on subnets
- EKS managed security group which is attached to the ENI can\u2019t be deleted by EKS
The current recommendation is to execute cleanup in the following order:
- delete all pods that have been created in the cluster.
- add delay/ wait
- delete VPC CNI
- delete nodes
- delete cluster
"},{"location":"faq/#leaked-cloudwatch-logs-group","title":"Leaked CloudWatch Logs Group","text":"Sometimes, customers may see the CloudWatch Log Group for EKS cluster being created is left behind after their blueprint has been destroyed using terraform destroy
. This happens because even after terraform deletes the CW log group, there\u2019s still logs being processed behind the scene by AWS EKS and service continues to write logs after recreating the log group using the EKS service IAM role which users don't have control over. This results in a terraform failure when the same blueprint is being recreated due to the existing log group left behind.
There are two options here:
-
During cluster creation set var.create_cloudwatch_log_group
to false
. This will tell the EKS module to not create the log group, but instead let the EKS service create the log group. This means that upon cluster deletion the log group will be left behind but there will not be Terraform failures if you re-create the same cluster as Terraform does not manage the log group creation/deletion anymore.
-
During cluster creation set var.create_cloudwatch_log_group
to true
. This will tell the EKS module to create the log group via Terraform. The EKS service will detect the log group and will start forwarding the logs for the log types enabled. Upon deletion terraform will delete the log group but depending upon any un-forwarded logs, the EKS service may recreate log group using the service role. This will result in terraform errors if the same blueprint is recreated. To proceed, manually delete the log group using the console or cli rerun the terraform destroy
.
"},{"location":"faq/#provider-authentication","title":"Provider Authentication","text":"The chain of events when provisioning an example is typically in the stages of VPC -> EKS cluster -> addons and manifests. Per Terraform's recommendation, it is not recommended to pass an unknown value into provider configurations. However, for the sake of simplicity and ease of use, Blueprints does specify the AWS provider along with the Kubernetes, Helm, and Kubectl providers in order to show the full configuration required for provisioning example. Note - this is the configuration required to provision the example, not necessarily the shape of how the configuration should be structured; users are encouraged to split up EKS cluster creation from addon and manifest provisioning to align with Terraform's recommendations.
With that said, the examples here are combining the providers and users can sometimes encounter various issues with the provider authentication methods. There are primarily two methods for authenticating the Kubernetes, Helm, and Kubectl providers to the EKS cluster created:
- Using a static token which has a lifetime of 15 minutes per the EKS service documentation.
- Using the
exec()
method which will fetch a token at the time of Terraform invocation.
The Kubernetes and Helm providers recommend the exec()
method, however this has the caveat that it requires the awscli to be installed on the machine running Terraform AND of at least a minimum version to support the API spec used by the provider (i.e. - \"client.authentication.k8s.io/v1alpha1\"
, \"client.authentication.k8s.io/v1beta1\"
, etc.). Selecting the appropriate provider authentication method is left up to users, and the examples used in this project will default to using the static token method for ease of use.
Users of the static token method should be aware that if they receive a 401 Unauthorized
message, they might have a token that has expired and will need to run terraform refresh
to get a new token. Users of the exec()
method should be aware that the exec()
method is reliant on the awscli and the associated authentication API version; the awscli version may need to be updated to support a later API version required by the Kubernetes version in use.
The following examples demonstrate either method that users can utilize - please refer to the associated provider's documentation for further details on configuration.
"},{"location":"faq/#static-token-example","title":"Static Token Example","text":"provider \"kubernetes\" {\nhost = module.eks.cluster_endpoint\ncluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)\ntoken = data.aws_eks_cluster_auth.this.token\n}\nprovider \"helm\" {\nkubernetes {\nhost = module.eks.cluster_endpoint\ncluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)\ntoken = data.aws_eks_cluster_auth.this.token\n}\n}\nprovider \"kubectl\" {\napply_retry_count = 10\nhost = module.eks.cluster_endpoint\ncluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)\nload_config_file = false\ntoken = data.aws_eks_cluster_auth.this.token\n}\ndata \"aws_eks_cluster_auth\" \"this\" {\nname = module.eks.cluster_name\n}\n
"},{"location":"faq/#exec-example","title":"exec()
Example","text":"Usage of exec plugin for AWS credentials
Links to References related to this issue
- https://github.com/hashicorp/terraform/issues/29182
- https://github.com/aws/aws-cli/pull/6476
provider \"kubernetes\" {\nhost = module.eks.cluster_endpoint\ncluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)\nexec {\napi_version = \"client.authentication.k8s.io/v1beta1\"\ncommand = \"aws\"\n # This requires the awscli to be installed locally where Terraform is executed\nargs = [\"eks\", \"get-token\", \"--cluster-name\", module.eks.cluster_name]\n}\n}\nprovider \"helm\" {\nkubernetes {\nhost = module.eks.cluster_endpoint\ncluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)\nexec {\napi_version = \"client.authentication.k8s.io/v1beta1\"\ncommand = \"aws\"\n # This requires the awscli to be installed locally where Terraform is executed\nargs = [\"eks\", \"get-token\", \"--cluster-name\", module.eks.cluster_name]\n}\n}\n}\nprovider \"kubectl\" {\napply_retry_count = 5\nhost = module.eks.cluster_endpoint\ncluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)\nload_config_file = false\nexec {\napi_version = \"client.authentication.k8s.io/v1beta1\"\ncommand = \"aws\"\n # This requires the awscli to be installed locally where Terraform is executed\nargs = [\"eks\", \"get-token\", \"--cluster-name\", module.eks.cluster_name]\n}\n}\n
"},{"location":"faq/#unable-to-destroy-namespace-created-by-terraform","title":"Unable to destroy namespace created by Terraform","text":"In some cases, when you try to run terraform destroy on kubernetes resources created by Terraform such as namespace, you may end up seeing failures such as timeout and context deadline exceeded failures. Namespace one of those resources we've seen before, the main reason this happens is because orphaned resources created through CRDs of addons (such as ArgoCD, AWS LBC and more) are left behind after the addons are being deleted, this is case by case scenario. For example, with namespaces:
-
Confirm the namespace is hanging in status Terminating
kubectl get namespaces\n
-
Check for any orphaned resources in the namespace, make sure to replace with your namespace:
kubectl api-resources --verbs=list --namespaced -o name | xargs -n 1 kubectl get \\\n--show-kind --ignore-not-found -n <namespace_name>\n
-
For any of the above output, patch the resource finalize:
kubectl patch RESOURCE NAME -p '{\"metadata\":{\"finalizers\":[]}}' --type=merge\n
-
Check the status of the namespace, if needed you may need to patch the namespace finalizers as-well
kubectl patch ns <ns-name> -p '{\"spec\":{\"finalizers\":null}}'\n
"},{"location":"getting-started/","title":"Getting Started","text":"This getting started guide will help you deploy your first pattern using EKS Blueprints.
"},{"location":"getting-started/#prerequisites","title":"Prerequisites","text":"Ensure that you have installed the following tools locally:
- awscli
- kubectl
- terraform
"},{"location":"getting-started/#deploy","title":"Deploy","text":" -
For consuming EKS Blueprints, please see the Consumption section. For exploring and trying out the patterns provided, please clone the project locally to quickly get up and running with a pattern. After cloning the project locally, cd
into the pattern directory of your choice.
-
To provision the pattern, the typical steps of execution are as follows:
terraform init\nterraform apply -target=\"module.vpc\" -auto-approve\nterraform apply -target=\"module.eks\" -auto-approve\nterraform apply -auto-approve\n
For patterns that deviate from this general flow, see the pattern's respective REAMDE.md
for more details.
Terraform targetted apply
Please see the Terraform Caveats section for details on the use of targeted Terraform apply's
-
Once all of the resources have successfully been provisioned, the following command can be used to update the kubeconfig
on your local machine and allow you to interact with your EKS Cluster using kubectl
.
aws eks --region <REGION> update-kubeconfig --name <CLUSTER_NAME>\n
Pattern Terraform outputs
Most examples will output the aws eks update-kubeconfig ...
command as part of the Terraform apply output to simplify this process for users
Private clusters
Clusters that do not enable the clusters public endpoint will require users to access the cluster from within the VPC. For these patterns, a sample EC2 or other means are provided to demonstrate how to access those clusters privately
and without exposing the public endpoint. Please see the respective pattern's README.md
for more details.
-
Once you have updated your kubeconfig
, you can verify that you are able to interact with your cluster by running the following command:
kubectl get nodes\n
This should return a list of the node(s) running in the cluster created. If any errors are encountered, please re-trace the steps above and consult the pattern's README.md
for more details on any additional/specific steps that may be required.
"},{"location":"getting-started/#destroy","title":"Destroy","text":"To teardown and remove the resources created in the pattern, the typical steps of execution are as follows:
terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
Resources created outside of Terraform
Depending on the pattern, some resources may have been created that Terraform is not aware of that will cause issues when attempting to clean up the pattern. For example, Karpenter is responsible for creating additional EC2 instances to satisfy the pod scheduling requirements. These instances will not be cleaned up by Terraform and will need to be de-provisioned BEFORE attempting to terraform destroy
. This is why it is important that the addons, or any resources provisioned onto the cluster are cleaned up first. Please see the respective pattern's README.md
for more details.
"},{"location":"_partials/destroy/","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"internal/ci/","title":"E2E tests","text":"We use GitHub Actions to run an end-to-end tests to verify all PRs. The GitHub Actions used are a combination of aws-actions/configure-aws-credentials
and hashicorp/setup-terraform@v1
.
"},{"location":"internal/ci/#setup","title":"Setup","text":" - Use the following CloudFormation template to setup a new IAM role.
Parameters:\nGitHubOrg:\nType: String\nRepositoryName:\nType: String\nOIDCProviderArn:\nDescription: Arn for the GitHub OIDC Provider.\nDefault: \"\"\nType: String\nConditions:\nCreateOIDCProvider: !Equals\n- !Ref OIDCProviderArn\n- \"\"\nResources:\nRole:\nType: AWS::IAM::Role\nProperties:\nAssumeRolePolicyDocument:\nStatement:\n- Effect: Allow\nAction: sts:AssumeRoleWithWebIdentity\nPrincipal:\nFederated: !If\n- CreateOIDCProvider\n- !Ref GithubOidc\n- !Ref OIDCProviderArn\nCondition:\nStringLike:\ntoken.actions.githubusercontent.com:sub: !Sub repo:${GitHubOrg}/${RepositoryName}:*\nGithubOidc:\nType: AWS::IAM::OIDCProvider\nCondition: CreateOIDCProvider\nProperties:\nUrl: https://token.actions.githubusercontent.com\nClientIdList:\n- sts.amazonaws.com\nThumbprintList:\n- a031c46782e6e6c662c2c87c76da9aa62ccabd8e\nOutputs:\nRole:\nValue: !GetAtt Role.Arn\n
-
Add a permissible IAM Policy to the above create role. For our purpose AdministratorAccess
works the best.
-
Setup a GitHub repo secret called ROLE_TO_ASSUME
and set it to ARN of the role created in 1.
-
We use an S3 backend for the e2e tests. This allows us to recover from any failures during the apply
stage. If you are setting up your own CI pipeline change the s3 bucket name in backend configuration of the example.
"},{"location":"patterns/agones-game-controller/","title":"Agones Game Controller","text":""},{"location":"patterns/agones-game-controller/#amazon-eks-deployment-with-agones-gaming-kubernetes-controller","title":"Amazon EKS Deployment with Agones Gaming Kubernetes Controller","text":"This pattern shows how to deploy and run gaming applications on Amazon EKS using the Agones Kubernetes Controller
Agones is an open source Kubernetes controller that provisions and manages dedicated game server processes within Kubernetes clusters using standard Kubernetes tooling and APIs. This model also allows any matchmaker to interact directly with Agones via the Kubernetes API to provision a dedicated game server
Amazon GameLift enables developers to deploy, operate, and scale dedicated, low-cost servers in the cloud for session-based, multiplayer games. Built on AWS global computing infrastructure, GameLift helps deliver high-performance, high-reliability, low-cost game servers while dynamically scaling your resource usage to meet worldwide player demand. See below for more information on how GameLift FleetIQ can be integrated with Agones deployed on Amazon EKS.
Amazon GameLift FleetIQ optimizes the use of low-cost Spot Instances for cloud-based game hosting with Amazon EC2. With GameLift FleetIQ, you can work directly with your hosting resources in Amazon EC2 and Auto Scaling while taking advantage of GameLift optimizations to deliver inexpensive, resilient game hosting for your players and makes the use of low-cost Spot Instances viable for game hosting
This blog walks through the details of deploying EKS Cluster using eksctl and deploy Agones with GameLift FleetIQ.
"},{"location":"patterns/agones-game-controller/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/agones-game-controller/#validate","title":"Validate","text":" -
Deploy the sample game server
kubectl create -f https://raw.githubusercontent.com/googleforgames/agones/release-1.32.0/examples/simple-game-server/gameserver.yaml\nkubectl get gs\n
NAME STATE ADDRESS PORT NODE AGE\nsimple-game-server-7r6jr Ready 34.243.345.22 7902 ip-10-1-23-233.eu-west-1.compute.internal 11h\n
-
Test the sample game server using netcat
echo -n \"UDP test - Hello EKS Blueprints!\" | nc -u 34.243.345.22 7902\n
Hello EKS Blueprints!\nACK: Hello EKS Blueprints!\nEXIT\nACK: EXIT\n
"},{"location":"patterns/agones-game-controller/#destroy","title":"Destroy","text":"Delete the resources created by the sample game server first:
kubectl -n default delete gs --all || true\n
terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/appmesh-mtls/","title":"AWS AppMesh mTLS","text":""},{"location":"patterns/appmesh-mtls/#eks-cluster-w-appmesh-mtls","title":"EKS Cluster w/ AppMesh mTLS","text":"This pattern demonstrates how to deploy and configure AppMesh mTLS on an Amazon EKS cluster.
"},{"location":"patterns/appmesh-mtls/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/appmesh-mtls/#validate","title":"Validate","text":" -
List the created Resources.
kubectl get pods -A\n
NAMESPACE NAME READY STATUS RESTARTS AGE\nappmesh-system appmesh-controller-7c98b87bdc-q6226 1/1 Running 0 4h44m\ncert-manager cert-manager-87f5555f-tcxj7 1/1 Running 0 4h43m\ncert-manager cert-manager-cainjector-8448ff8ddb-wwjsc 1/1 Running 0 4h43m\ncert-manager cert-manager-webhook-5468b675b-fvdwk 1/1 Running 0 4h43m\nkube-system aws-node-rf4wg 1/1 Running 0 4h43m\nkube-system aws-node-skkwh 1/1 Running 0 4h43m\nkube-system aws-privateca-issuer-b6fb8c5bd-hh8q4 1/1 Running 0 4h44m\nkube-system coredns-5f9f955df6-qhr6p 1/1 Running 0 4h44m\nkube-system coredns-5f9f955df6-tw8r7 1/1 Running 0 4h44m\nkube-system kube-proxy-q72l9 1/1 Running 0 4h43m\nkube-system kube-proxy-w54pc 1/1 Running 0 4h43m\n
-
List the AWSPCA cluster issues:
kubectl get awspcaclusterissuers.awspca.cert-manager.io\n
NAME AGE\nappmesh-mtls 4h42m\n
-
List the example certificates; the example certificate should be shown:
kubectl get certificate\n
NAME READY SECRET AGE\nexample True example-clusterissuer 4h12m\n
-
Describe the example certificate:
kubectl describe secret example-clusterissuer\n
Name: example-clusterissuer\nNamespace: default\nLabels: controller.cert-manager.io/fao=true\nAnnotations: cert-manager.io/alt-names:\n cert-manager.io/certificate-name: example\n cert-manager.io/common-name: example.com\n cert-manager.io/ip-sans:\n cert-manager.io/issuer-group: awspca.cert-manager.io\n cert-manager.io/issuer-kind: AWSPCAClusterIssuer\n cert-manager.io/issuer-name: appmesh-mtls\n cert-manager.io/uri-sans:\n\nType: kubernetes.io/tls\n\nData\n====\nca.crt: 1785 bytes\ntls.crt: 1517 bytes\ntls.key: 1675 bytes\n
"},{"location":"patterns/appmesh-mtls/#example","title":"Example","text":"The full documentation for this example can be found here.
-
Annotate the default
Namespace to allow Side Car Injection:
kubectl label namespaces default appmesh.k8s.aws/sidecarInjectorWebhook=enabled\n
namespace/default labeled\n
-
Create the mesh:
cat <<EOF | kubectl apply -f -\napiVersion: appmesh.k8s.aws/v1beta2\nkind: Mesh\nmetadata:\n name: appmesh-example\nspec:\n namespaceSelector:\n matchLabels:\n kubernetes.io/metadata.name: default\nEOF\nmesh.appmesh.k8s.aws/appmesh-example created\n
-
Create a virtual node:
cat <<EOF | kubectl apply -f -\napiVersion: appmesh.k8s.aws/v1beta2\nkind: VirtualNode\nmetadata:\n name: appmesh-example-vn\n namespace: default\nspec:\n podSelector:\n matchLabels:\n app: appmesh-example\n listeners:\n - portMapping:\n port: 80\n protocol: http\n backendDefaults:\n clientPolicy:\n tls:\n certificate:\n sds:\n secretName: example-clusterissuer\n enforce: true\n ports: []\n validation:\n trust:\n acm:\n certificateAuthorityARNs:\n - arn:aws:acm-pca:us-west-2:978045894046:certificate-authority/4386d166-4d68-4347-b940-4324ac493d65\n serviceDiscovery:\n dns:\n hostname: appmesh-example-svc.default.svc.cluster.local\nEOF\n
-
Create a virtual router:
cat <<EOF | kubectl apply -f -\napiVersion: appmesh.k8s.aws/v1beta2\nkind: VirtualRouter\nmetadata:\n namespace: default\n name: appmesh-example-vr\nspec:\n listeners:\n - portMapping:\n port: 80\n protocol: http\n routes:\n - name: appmesh-example-route\n httpRoute:\n match:\n prefix: /\n action:\n weightedTargets:\n - virtualNodeRef:\n name: appmesh-example-vn\n weight: 1\nEOF\n
-
Create a virtual service:
cat <<EOF | kubectl apply -f -\napiVersion: appmesh.k8s.aws/v1beta2\nkind: VirtualService\nmetadata:\n name: appmesh-example-vs\n namespace: default\nspec:\n awsName: appmesh-example-svc.default.svc.cluster.local\n provider:\n virtualRouter:\n virtualRouterRef:\n name: appmesh-example-vr\nEOF\n
-
Create a deployment and a service in the default
namespace:
cat <<EOF | kubectl apply -f -\napiVersion: v1\nkind: Service\nmetadata:\n name: appmesh-example-svc\n namespace: default\n labels:\n app: appmesh-example\nspec:\n selector:\n app: appmesh-example\n ports:\n - protocol: TCP\n port: 80\n targetPort: 80\n---\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n name: appmesh-example-app\n namespace: default\n labels:\n app: appmesh-example\nspec:\n replicas: 2\n selector:\n matchLabels:\n app: appmesh-example\n template:\n metadata:\n labels:\n app: appmesh-example\n spec:\n serviceAccountName: appmesh-example-sa\n containers:\n - name: nginx\n image: nginx:1.19.0\n ports:\n - containerPort: 80\nEOF\n
-
Validate if the pods are in the Running
state with 2 containers, one of them should include the AppMesh sidecar:
kubectl get pods\n
NAME READY STATUS RESTARTS AGE\nappmesh-example-app-6946cdbdf6-gnxww 2/2 Running 0 54s\nappmesh-example-app-6946cdbdf6-nx9tg 2/2 Running 0 54s\n
"},{"location":"patterns/appmesh-mtls/#destroy","title":"Destroy","text":"Delete the example resources created first:
# delete all example resources created in the default namespace\nkubectl delete all --all\n
terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/argocd/","title":"ArgoCD","text":""},{"location":"patterns/argocd/#amazon-eks-cluster-w-argocd","title":"Amazon EKS Cluster w/ ArgoCD","text":"This pattern demonstrates an EKS cluster that uses ArgoCD for application deployments.
- Documentation
- EKS Blueprints Add-ons Repo
- EKS Blueprints Workloads Repo
"},{"location":"patterns/argocd/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/argocd/#validate","title":"Validate","text":" -
List out the pods running currently:
kubectl get pods -A\n
NAMESPACE NAME READY STATUS RESTARTS AGE\nargo-rollouts argo-rollouts-5d47ccb8d4-854s6 1/1 Running 0 23h\nargo-rollouts argo-rollouts-5d47ccb8d4-srjk9 1/1 Running 0 23h\nargocd argo-cd-argocd-application-controller-0 1/1 Running 0 24h\nargocd argo-cd-argocd-applicationset-controller-547f9cfd68-kp89p 1/1 Running 0 24h\nargocd argo-cd-argocd-dex-server-55765f7cd7-t8r2f 1/1 Running 0 24h\nargocd argo-cd-argocd-notifications-controller-657df4dbcb-p596r 1/1 Running 0 24h\nargocd argo-cd-argocd-repo-server-7d4dddf886-2vmgt 1/1 Running 0 24h\nargocd argo-cd-argocd-repo-server-7d4dddf886-bm7tz 1/1 Running 0 24h\nargocd argo-cd-argocd-server-775ddf74b8-8jzvc 1/1 Running 0 24h\nargocd argo-cd-argocd-server-775ddf74b8-z6lz6 1/1 Running 0 24h\nargocd argo-cd-redis-ha-haproxy-6d7b7d4656-b8bt8 1/1 Running 0 24h\nargocd argo-cd-redis-ha-haproxy-6d7b7d4656-mgjx5 1/1 Running 0 24h\nargocd argo-cd-redis-ha-haproxy-6d7b7d4656-qsbgw 1/1 Running 0 24h\nargocd argo-cd-redis-ha-server-0 4/4 Running 0 24h\nargocd argo-cd-redis-ha-server-1 4/4 Running 0 24h\nargocd argo-cd-redis-ha-server-2 4/4 Running 0 24h\ncert-manager cert-manager-586ccb6656-2v8mf 1/1 Running 0 23h\ncert-manager cert-manager-cainjector-99d64d795-2gwnj 1/1 Running 0 23h\ncert-manager cert-manager-webhook-8d87786cb-24kww 1/1 Running 0 23h\ngeolocationapi geolocationapi-85599c5c74-rqqqs 2/2 Running 0 25m\ngeolocationapi geolocationapi-85599c5c74-whsp6 2/2 Running 0 25m\ngeordie downstream0-7f6ff946b6-r8sxc 1/1 Running 0 25m\ngeordie downstream1-64c7db6f9-rsbk5 1/1 Running 0 25m\ngeordie frontend-646bfb947c-wshpb 1/1 Running 0 25m\ngeordie redis-server-6bd7885d5d-s7rqw 1/1 Running 0 25m\ngeordie yelb-appserver-5d89946ffd-vkxt9 1/1 Running 0 25m\ngeordie yelb-db-697bd9f9d9-2t4b6 1/1 Running 0 25m\ngeordie yelb-ui-75ff8b96ff-fh6bw 1/1 Running 0 25m\nkarpenter karpenter-7b99fb785d-87k6h 1/1 Running 0 106m\nkarpenter karpenter-7b99fb785d-lkq9l 1/1 Running 0 106m\nkube-system aws-load-balancer-controller-6cf9bdbfdf-h7bzb 1/1 Running 0 20m\nkube-system aws-load-balancer-controller-6cf9bdbfdf-vfbrj 1/1 Running 0 20m\nkube-system aws-node-cvjmq 1/1 Running 0 24h\nkube-system aws-node-fw7zc 1/1 Running 0 24h\nkube-system aws-node-l7589 1/1 Running 0 24h\nkube-system aws-node-nll82 1/1 Running 0 24h\nkube-system aws-node-zhz8l 1/1 Running 0 24h\nkube-system coredns-7975d6fb9b-5sf7r 1/1 Running 0 24h\nkube-system coredns-7975d6fb9b-k78dz 1/1 Running 0 24h\nkube-system ebs-csi-controller-5cd4944c94-7jwlb 6/6 Running 0 24h\nkube-system ebs-csi-controller-5cd4944c94-8tcsg 6/6 Running 0 24h\nkube-system ebs-csi-node-66jmx 3/3 Running 0 24h\nkube-system ebs-csi-node-b2pw4 3/3 Running 0 24h\nkube-system ebs-csi-node-g4v9z 3/3 Running 0 24h\nkube-system ebs-csi-node-k7nvp 3/3 Running 0 24h\nkube-system ebs-csi-node-tfq9q 3/3 Running 0 24h\nkube-system kube-proxy-4x8vm 1/1 Running 0 24h\nkube-system kube-proxy-gtlpm 1/1 Running 0 24h\nkube-system kube-proxy-vfnbf 1/1 Running 0 24h\nkube-system kube-proxy-z9wdh 1/1 Running 0 24h\nkube-system kube-proxy-zzx9m 1/1 Running 0 24h\nkube-system metrics-server-7f4db5fd87-9n6dv 1/1 Running 0 23h\nkube-system metrics-server-7f4db5fd87-t8wxg 1/1 Running 0 23h\nkube-system metrics-server-7f4db5fd87-xcxlv 1/1 Running 0 23h\nteam-burnham burnham-66fccc4fb5-k4qtm 1/1 Running 0 25m\nteam-burnham burnham-66fccc4fb5-rrf4j 1/1 Running 0 25m\nteam-burnham burnham-66fccc4fb5-s9kbr 1/1 Running 0 25m\nteam-burnham nginx-7d47cfdff7-lzdjb 1/1 Running 0 25m\nteam-riker deployment-2048-6f7c78f959-h76rx 1/1 Running 0 25m\nteam-riker deployment-2048-6f7c78f959-skmrr 1/1 Running 0 25m\nteam-riker deployment-2048-6f7c78f959-tn9dw 1/1 Running 0 25m\nteam-riker guestbook-ui-c86c478bd-zg2z4 1/1 Running 0 25m\n
-
Access the ArgoCD UI by running the following command:
kubectl port-forward svc/argo-cd-argocd-server 8080:443 -n argocd\n
Then, open your browser and navigate to https://localhost:8080/
Username should be admin
.
The password will be the generated password by random_password
resource, stored in AWS Secrets Manager. You can easily retrieve the password by running the following command:
aws secretsmanager get-secret-value --secret-id <SECRET_NAME>--region <REGION>\n
Replace <SECRET_NAME>
with the name of the secret name, if you haven't changed it then it should be argocd
, also, make sure to replace <REGION>
with the region you are using.
Pickup the the secret from the SecretString
.
"},{"location":"patterns/argocd/#destroy","title":"Destroy","text":"First, we need to ensure that the ArgoCD applications are properly cleaned up from the cluster, this can be achieved in multiple ways:
- Disabling the
argocd_applications
configuration and running terraform apply
again - Deleting the apps using
argocd
cli - Deleting the apps using
kubectl
following ArgoCD guidance
terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/blue-green-upgrade/","title":"Blue/Green Upgrade","text":""},{"location":"patterns/blue-green-upgrade/#bluegreen-migration","title":"Blue/Green Migration","text":"This directory provides a solution based on EKS Blueprint for Terraform that shows how to leverage blue/green or canary application workload migration between EKS clusters, using Amazon Route 53 weighted routing feature. The workloads will be dynamically exposed using AWS LoadBalancer Controller and External DNS add-on.
We are leveraging the existing EKS Blueprints Workloads GitHub repository sample to deploy our GitOps ArgoCD applications, which are defined as helm charts. We are leveraging ArgoCD Apps of apps pattern where an ArgoCD Application can also reference other Helm charts to deploy.
You can also find more informations in the associated blog post
"},{"location":"patterns/blue-green-upgrade/#table-of-content","title":"Table of content","text":" - Blue/Green or Canary Amazon EKS clusters migration for stateless ArgoCD workloads
- Table of content
- Project structure
- Prerequisites
- Quick Start
- Configure the Stacks
- Create the environment stack
- Create the Blue cluster
- Create the Green cluster
- How this work
- Watch our Workload: we focus on team-burnham namespace.
- Using AWS Route53 and External DNS
- Configure Ingress resources with weighted records
- Automate the migration from Terraform
- Delete the Stack
- Delete the EKS Cluster(s)
- TL;DR
- Manual
- Delete the environment stack
- Troubleshoot
- External DNS Ownership
- Check Route 53 Record status
- Check current resolution and TTL value
- Get ArgoCD UI Password
"},{"location":"patterns/blue-green-upgrade/#project-structure","title":"Project structure","text":"See the Architecture of what we are building
Our sample is composed of four main directory:
- environment \u2192 this stack will create the common VPC and its dependencies used by our EKS clusters: create a Route53 sub domain hosted zone for our sample, a wildcard certificate on Certificate Manager for our applications TLS endpoints, and a SecretManager password for the ArgoCD UIs.
- modules/eks_cluster \u2192 local module defining the EKS blueprint cluster with ArgoCD add-on which will automatically deploy additional add-ons and our demo workloads
- eks-blue \u2192 an instance of the eks_cluster module to create blue cluster
- eks-green \u2192 an instance of the eks_cluster module to create green cluster
So we are going to create 2 EKS clusters, sharing the same VPC, and each one of them will install locally our workloads from the central GitOps repository leveraging ArgoCD add-on. In the GitOps workload repository, we have configured our applications deployments to leverage AWS Load Balancers Controllers annotations, so that applications will be exposed on AWS Load Balancers, created from our Kubernetes manifests. We will have 1 load balancer per cluster for each of our applications.
We have configured ExternalDNS add-ons in our two clusters to share the same Route53 Hosted Zone. The workloads in both clusters also share the same Route 53 DNS records, we rely on AWS Route53 weighted records to allow us to configure canary workload migration between our two EKS clusters.
Here we use the same GitOps workload configuration repository and adapt parameters with the values.yaml
. We could also use different ArgoCD repository for each cluster, or use a new directory if we want to validate or test new deployment manifests with maybe additional features, configurations or to use with different Kubernetes add-ons (like changing ingress controller).
Our objective here is to show you how Application teams and Platform teams can configure their infrastructure and workloads so that application teams are able to deploy autonomously their workloads to the EKS clusters thanks to ArgoCD, and platform team can keep the control of migrating production workloads from one cluster to another without having to synchronized operations with applications teams, or asking them to build a complicated CD pipeline.
In this example we show how you can seamlessly migrate your stateless workloads between the 2 clusters for a blue/green or Canary migration, but you can also leverage the same architecture to have your workloads for example separated in different accounts or regions, for either High Availability or Lower latency Access from your customers.
"},{"location":"patterns/blue-green-upgrade/#prerequisites","title":"Prerequisites","text":" - Terraform (tested version v1.3.5 on linux)
- Git
- AWS CLI
- AWS test account with administrator role access
- For working with this repository, you will need an existing Amazon Route 53 Hosted Zone that will be used to create our project hosted zone. It will be provided via the Terraform variable
hosted_zone_name
defined in terraform.tfvars.example. - Before moving to the next step, you will need to register a parent domain with AWS Route 53 (https://docs.aws.amazon.com/Route53/latest/DeveloperGuide/domain-register.html) in case you don\u2019t have one created yet.
- Accessing GitOps git repositories with SSH access requiring an SSH key for authentication. In this example our workloads repositories are stored in GitHub, you can see in GitHub documentation on how to connect with SSH.
- Your GitHub private ssh key value is supposed to be stored in plain text in AWS Secret Manager in a secret named
github-blueprint-ssh-key
, but you can change it using the terraform variable workload_repo_secret
in terraform.tfvars.example.
"},{"location":"patterns/blue-green-upgrade/#quick-start","title":"Quick Start","text":""},{"location":"patterns/blue-green-upgrade/#configure-the-stacks","title":"Configure the Stacks","text":" - Clone the repository
git clone https://github.com/aws-ia/terraform-aws-eks-blueprints.git\ncd patterns/blue-green-upgrade/\n
- Copy the
terraform.tfvars.example
to terraform.tfvars
on each environment
, eks-blue
and eks-green
folders, and change region, hosted_zone_name, eks_admin_role_name according to your needs.
cp terraform.tfvars.example environment/terraform.tfvars\ncp terraform.tfvars.example eks-blue/terraform.tfvars\ncp terraform.tfvars.example eks-green/terraform.tfvars\n
- You will need to provide the
hosted_zone_name
for example my-example.com
. Terraform will create a new hosted zone for the project with name: ${environment}.${hosted_zone_name}
so in our example eks-blueprint.my-example.com
. - You need to provide a valid IAM role in
eks_admin_role_name
to have EKS cluster admin rights, generally the one uses in the EKS console.
"},{"location":"patterns/blue-green-upgrade/#create-the-environment-stack","title":"Create the environment stack","text":"More info in the environment Readme
cd environment\nterraform init\nterraform apply\n
There can be some Warnings due to not declare variables. This is normal and you can ignore them as we share the same terraform.tfvars
for the 3 projects by using symlinks for a unique file, and we declare some variables used for the eks-blue and eks-green directory
"},{"location":"patterns/blue-green-upgrade/#create-the-blue-cluster","title":"Create the Blue cluster","text":"More info in the eks-blue Readme, you can also see the detailed step in the local module Readme
cd eks-blue\nterraform init\nterraform apply\n
This can take 8mn for EKS cluster, 15mn
"},{"location":"patterns/blue-green-upgrade/#create-the-green-cluster","title":"Create the Green cluster","text":"cd eks-green\nterraform init\nterraform apply\n
By default the only differences in the 2 clusters are the values defined in main.tf. We will change those values to upgrade Kubernetes version of new cluster, and to migrate our stateless workloads between clusters.
"},{"location":"patterns/blue-green-upgrade/#how-this-work","title":"How this work","text":""},{"location":"patterns/blue-green-upgrade/#watch-our-workload-we-focus-on-team-burnham-namespace","title":"Watch our Workload: we focus on team-burnham namespace.","text":"Our clusters are configured with existing ArgoCD Github repository that is synchronized into each of the clusters:
- EKS Blueprints Add-ons repository
- Workloads repository
We are going to look after one of the application deployed from the workload repository as example to demonstrate our migration automation: the Burnham
workload in the team-burnham namespace. We have set up a simple go application than can respond in it's body the name of the cluster it is running on. With this it will be easy to see the current migration on our workload.
<head>\n <title>Hello EKS Blueprint</title>\n</head>\n<div class=\"info\">\n <h>Hello EKS Blueprint Version 1.4</h>\n <p><span>Server address:</span> <span>10.0.2.201:34120</span></p>\n <p><span>Server name:</span> <span>burnham-9d686dc7b-dw45m</span></p>\n <p class=\"smaller\"><span>Date:</span> <span>2022.10.13 07:27:28</span></p>\n <p class=\"smaller\"><span>URI:</span> <span>/</span></p>\n <p class=\"smaller\"><span>HOST:</span> <span>burnham.eks-blueprint.mon-domain.com</span></p>\n <p class=\"smaller\"><span>CLUSTER_NAME:</span> <span>eks-blueprint-blue</span></p>\n</div>\n
The application is deployed from our workload repository manifest
Connect to the cluster: Execute one of the EKS cluster login commands from the terraform output
command, depending on the IAM role you can assume to access to the cluster. If you want EKS Admin cluster, you can execute the command associated to the eks_blueprints_admin_team_configure_kubectl output. It should be something similar to:
aws eks --region eu-west-3 update-kubeconfig --name eks-blueprint-blue --role-arn arn:aws:iam::0123456789:role/admin-team-20230505075455219300000002\n
Note it will allow the role associated to the parameter eks_admin_role_name to assume the role.
You can also connect with the user who created the EKS cluster without specifying the --role-arn
parameter
Next, you can interact with the cluster and see the deployment
$ kubectl get deployment -n team-burnham -l app=burnham\nNAME READY UP-TO-DATE AVAILABLE AGE\nburnham 3/3 3 3 3d18h\n
See the pods
$ kubectl get pods -n team-burnham -l app=burnham\nNAME READY STATUS RESTARTS AGE\nburnham-7db4c6fdbb-82hxn 1/1 Running 0 3d18h\nburnham-7db4c6fdbb-dl59v 1/1 Running 0 3d18h\nburnham-7db4c6fdbb-hpq6h 1/1 Running 0 3d18h\n
See the logs:
$ kubectl logs -n team-burnham -l app=burnham\n2022/10/10 12:35:40 {url: / }, cluster: eks-blueprint-blue }\n2022/10/10 12:35:49 {url: / }, cluster: eks-blueprint-blue }\n
You can make a request to the service, and filter the output to know on which cluster it runs:
$ URL=$(echo -n \"https://\" ; kubectl get ing -n team-burnham burnham-ingress -o json | jq \".spec.rules[0].host\" -r)\n$ curl -s $URL | grep CLUSTER_NAME | awk -F \"<span>|</span>\" '{print $4}'\neks-blueprint-blue\n
"},{"location":"patterns/blue-green-upgrade/#using-aws-route53-and-external-dns","title":"Using AWS Route53 and External DNS","text":"We have configured both our clusters to configure the same Amazon Route 53 Hosted Zones. This is done by having the same configuration of ExternalDNS add-on in main.tf
:
This is the Terraform configuration to configure the ExternalDNS Add-on which is deployed by the Blueprint using ArgoCD.
enable_external_dns = true\n\n external_dns_helm_config = {\n txtOwnerId = local.name\n zoneIdFilter = data.aws_route53_zone.sub.zone_id\n policy = \"sync\"\n awszoneType = \"public\"\n zonesCacheDuration = \"1h\"\n logLevel = \"debug\"\n }\n
- We use ExternalDNS in
sync
mode so that the controller can create but also remove DNS records accordingly to service or ingress objects creation. - We also configured the
txtOwnerId
with the name of the cluster, so that each controller will be able to create/update/delete records but only for records which are associated to the proper OwnerId. - Each Route53 record will be also associated with a
txt
record. This record is used to specify the owner of the associated record and is in the form of:
\"heritage=external-dns,external-dns/owner=eks-blueprint-blue,external-dns/resource=ingress/team-burnham/burnham-ingress\"\n
So in this example the Owner of the record is the external-dns controller, from the eks-blueprint-blue EKS cluster, and correspond to the Kubernetes ingress resource names burnham-ingress in the team-burnham namespace.
Using this feature, and relying on weighted records, we will be able to do blue/green or canary migration by changing the weight of ingress resources defined in each cluster.
"},{"location":"patterns/blue-green-upgrade/#configure-ingress-resources-with-weighted-records","title":"Configure Ingress resources with weighted records","text":"Since we have configured ExternalDNS add-on, we can now defined specific annotation in our ingress
object. You may already know that our workload are synchronized using ArgoCD from our workload repository sample.
We are focusing on the burnham deployment, which is defined here where we configure the burnham-ingress
ingress object with:
external-dns.alpha.kubernetes.io/set-identifier: {{ .Values.spec.clusterName }}\n external-dns.alpha.kubernetes.io/aws-weight: '{{ .Values.spec.ingress.route53_weight }}'\n
We rely on two external-dns annotation to configure how the record will be created. the set-identifier
annotation will contain the name of the cluster we want to create, which must match the one define in the external-dns txtOwnerId
configuration.
The aws-weight
will be used to configure the value of the weighted record, and it will be deployed from Helm values, that will be injected by Terraform in our example, so that our platform team will be able to control autonomously how and when they want to migrate workloads between the EKS clusters.
Amazon Route 53 weighted records works like this:
- If we specify a value of 100 in eks-blue cluster and 0 in eks-green cluster, then Route 53 will route all requests to eks-blue cluster.
- If we specify a value of 0 in eks-blue cluster and 0 in eks-green cluster, then Route 53 will route all requests to eks-green cluster.
- we can also define any intermediate values like 100 in eks-blue cluster and 100 in eks-green cluster, so we will have 50% on eks-blue and 50% on eks-green.
"},{"location":"patterns/blue-green-upgrade/#automate-the-migration-from-terraform","title":"Automate the migration from Terraform","text":"Now that we have setup our 2 clusters, deployed with ArgoCD and that the weighed records from values.yaml
are injected from Terraform, let's see how our Platform team can trigger the workload migration.
- At first, 100% of burnham traffic is set to the eks-blue cluster, this is controlled from the
eks-blue/main.tf
& eks-green/main.tf
files with the parameter route53_weight = \"100\"
. The same parameter is set to 0 in cluster eks-green.
Which correspond to :
All requests to our endpoint should response with eks-blueprint-blue
we can test it with the following command:
URL=$(echo -n \"https://\" ; kubectl get ing -n team-burnham burnham-ingress -o json | jq \".spec.rules[0].host\" -r)\ncurl -s $URL | grep CLUSTER_NAME | awk -F \"<span>|</span>\" '{print $4}'\n
you should see:
eks-blueprint-blue\n
- Let's change traffic to 50% eks-blue and 50% eks-green by activating also value 100 in eks-green locals.tf (
route53_weight = \"100\"
) and let's terraform apply
to let terraform update the configuration
Which correspond to :
All records have weight of 100, so we will have 50% requests on each clusters.
We can check the ratio of requests resolution between both clusters
URL=$(echo -n \"https://\" ; kubectl get ing -n team-burnham burnham-ingress -o json | jq \".spec.rules[0].host\" -r)\nrepeat 10 curl -s $URL | grep CLUSTER_NAME | awk -F \"<span>|</span>\" '{print $4}' && sleep 60\n
Result should be similar to:
eks-blueprint-blue\neks-blueprint-blue\neks-blueprint-blue\neks-blueprint-blue\neks-blueprint-green\neks-blueprint-green\neks-blueprint-blue\neks-blueprint-green\neks-blueprint-blue\neks-blueprint-green\n
The default TTL is for 60 seconds, and you have 50% chance to have blue or green cluster, then you may need to replay the previous command several times to have an idea of the repartition, which theoretically is 50%
- Now that we see that our green cluster is taking requests correctly, we can update the eks-blue cluster configuration to have the weight to 0 and apply again. after a few moment, your route53 records should look like the below screenshot, and all requests should now reach eks-green cluster.
Which correspond to :
At this step, once all DNS TTL will be up to date, all the traffic will be coming on the eks-green cluster. You can either, delete the eks-blue cluster, or decide to make upgrades on the blue cluster and send back traffic on eks-blue afterward, or simply keep it as a possibility for rollback if needed.
In this sample, we uses a simple terraform variable to control the weight for all applications, we can also choose to have several parameters, let's say one per application, so you can finer control your migration strategy application by application.
"},{"location":"patterns/blue-green-upgrade/#delete-the-stack","title":"Delete the Stack","text":""},{"location":"patterns/blue-green-upgrade/#delete-the-eks-clusters","title":"Delete the EKS Cluster(s)","text":"This section, can be executed in either eks-blue or eks-green folders, or in both if you want to delete both clusters.
In order to properly destroy the Cluster, we need first to remove the ArgoCD workloads, while keeping the ArgoCD addons. We will also need to remove our Karpenter provisioners, and any other objects you created outside of Terraform that needs to be cleaned before destroying the terraform stack.
Why doing this? When we remove an ingress object, we want the associated Kubernetes add-ons like aws load balancer controller and External DNS to correctly free the associated AWS resources. If we directly ask terraform to destroy everything, it can remove first theses controllers without allowing them the time to remove associated aws resources that will still existing in AWS, preventing us to completely delete our cluster.
"},{"location":"patterns/blue-green-upgrade/#tldr","title":"TL;DR","text":"../tear-down.sh\n
"},{"location":"patterns/blue-green-upgrade/#manual","title":"Manual","text":" - If also deployed, delete your Karpenter provisioners
this is safe to delete if no addons are deployed on Karpenter, which is the case here. If not we should separate the team-platform deployments which installed Karpenter provisioners in a separate ArgoCD Application to avoid any conflicts.
kubectl delete provisioners.karpenter.sh --all\n
- Delete Workloads App of App
kubectl delete application workloads -n argocd\n
- If also deployed, delete ecsdemo App of App
kubectl delete application ecsdemo -n argocd\n
Once every workload applications as been freed on AWS side, (this can take some times), we can then destroy our add-ons and terraform resources
Note: it can take time to deregister all load balancers, verify that you don't have any more AWS resources created by EKS prior to start destroying EKS with terraform.
- Destroy terraform resources
terraform apply -destroy -target=\"module.eks_cluster.module.kubernetes_addons\" -auto-approve\nterraform apply -destroy -target=\"module.eks_cluster.module.eks\" -auto-approve\nterraform apply -destroy -auto-approve\n
"},{"location":"patterns/blue-green-upgrade/#delete-the-environment-stack","title":"Delete the environment stack","text":"If you have finish playing with this solution, and once you have destroyed the 2 EKS clusters, you can now delete the environment stack.
cd environment\nterraform apply -destroy -auto-approve\n
This will destroy the Route53 hosted zone, the Certificate manager certificate, the VPC with all it's associated resources.
"},{"location":"patterns/blue-green-upgrade/#troubleshoot","title":"Troubleshoot","text":""},{"location":"patterns/blue-green-upgrade/#external-dns-ownership","title":"External DNS Ownership","text":"The Amazon Route 53 records association are controlled by ExternalDNS controller. You can see the logs from the controller to understand what is happening by executing the following command in each cluster:
kubectl logs -n external-dns -l app.kubernetes.io/name=external-dns -f\n
In eks-blue cluster, you can see logs like the following, which showcase that the eks-blueprint-blue controller won't make any change in records owned by eks-blueprint-green cluster, the reverse is also true.
time=\"2022-10-10T15:46:54Z\" level=debug msg=\"Skipping endpoint skiapp.eks-blueprint.sallaman.people.aws.dev 300 IN CNAME eks-blueprint-green k8s-riker-68438cd99f-893407990.eu-west-1.elb.amazonaws.com [{aws/evaluate-target-health true} {alias true} {aws/weight 100}] because owner id does not match, found: \\\"eks-blueprint-green\\\", required: \\\"eks-blueprint-blue\\\"\"\ntime=\"2022-10-10T15:46:54Z\" level=debug msg=\"Refreshing zones list cache\"\n
"},{"location":"patterns/blue-green-upgrade/#check-route-53-record-status","title":"Check Route 53 Record status","text":"We can also use the CLI to see our current Route 53 configuration:
export ROOT_DOMAIN=<your-domain-name> # the value you put in hosted_zone_name\nZONE_ID=$(aws route53 list-hosted-zones-by-name --output json --dns-name \"eks-blueprint.${ROOT_DOMAIN}.\" --query \"HostedZones[0].Id\" --out text)\necho $ZONE_ID\naws route53 list-resource-record-sets \\\n--output json \\\n--hosted-zone-id $ZONE_ID \\\n--query \"ResourceRecordSets[?Name == 'burnham.eks-blueprint.$ROOT_DOMAIN.']|[?Type == 'A']\"\naws route53 list-resource-record-sets \\\n--output json \\\n--hosted-zone-id $ZONE_ID \\\n--query \"ResourceRecordSets[?Name == 'burnham.eks-blueprint.$ROOT_DOMAIN.']|[?Type == 'TXT']\"\n
"},{"location":"patterns/blue-green-upgrade/#check-current-resolution-and-ttl-value","title":"Check current resolution and TTL value","text":"As DNS migration is dependent of DNS caching, normally relying on the TTL, you can use dig to see the current value of the TTL used locally
export ROOT_DOMAIN=<your-domain-name> # the value you put for hosted_zone_name\ndig +noauthority +noquestion +noadditional +nostats +ttlunits +ttlid A burnham.eks-blueprint.$ROOT_DOMAIN\n
"},{"location":"patterns/blue-green-upgrade/#get-argocd-ui-password","title":"Get ArgoCD UI Password","text":"You can connect to the ArgoCD UI using the service :
kubectl get svc -n argocd argo-cd-argocd-server -o json | jq '.status.loadBalancer.ingress[0].hostname' -r\n
Then login with admin and get the password from AWS Secret Manager:
aws secretsmanager get-secret-value \\\n--secret-id argocd-admin-secret.eks-blueprint \\\n--query SecretString \\\n--output text --region $AWS_REGION\n
"},{"location":"patterns/elastic-fabric-adapter/","title":"Elastic Fabric Adapter","text":""},{"location":"patterns/elastic-fabric-adapter/#eks-cluster-w-elastic-fabric-adapter","title":"EKS Cluster w/ Elastic Fabric Adapter","text":"This pattern demonstrates an Amazon EKS Cluster with an EFA-enabled nodegroup.
"},{"location":"patterns/elastic-fabric-adapter/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/elastic-fabric-adapter/#validate","title":"Validate","text":" -
List the nodes by instance type:
kubectl get nodes -o yaml | grep instance-type | grep node | grep -v f:\n
node.kubernetes.io/instance-type: g5.8xlarge\nnode.kubernetes.io/instance-type: m5.large\nnode.kubernetes.io/instance-type: m5.large\nnode.kubernetes.io/instance-type: g5.8xlarge\n
You should see two EFA-enabled (in this example g5.8xlarge
) nodes in the list.
-
Deploy Kubeflow MPI Operator
Kubeflow MPI Operator is required for running MPIJobs on EKS. We will use an MPIJob to test EFA. To deploy the MPI operator execute the following:
kubectl apply -f https://raw.githubusercontent.com/kubeflow/mpi-operator/v0.3.0/deploy/v2beta1/mpi-operator.yaml\n
namespace/mpi-operator created\ncustomresourcedefinition.apiextensions.k8s.io/mpijobs.kubeflow.org created\nserviceaccount/mpi-operator created\nclusterrole.rbac.authorization.k8s.io/kubeflow-mpijobs-admin created\nclusterrole.rbac.authorization.k8s.io/kubeflow-mpijobs-edit created\nclusterrole.rbac.authorization.k8s.io/kubeflow-mpijobs-view created\nclusterrole.rbac.authorization.k8s.io/mpi-operator created\nclusterrolebinding.rbac.authorization.k8s.io/mpi-operator created\ndeployment.apps/mpi-operator created\n
In addition to deploying the operator, please apply a patch to the mpi-operator clusterrole to allow the mpi-operator service account access to leases
resources in the coordination.k8s.io
apiGroup.
kubectl apply -f https://raw.githubusercontent.com/aws-samples/aws-do-eks/main/Container-Root/eks/deployment/kubeflow/mpi-operator/clusterrole-mpi-operator.yaml\n
clusterrole.rbac.authorization.k8s.io/mpi-operator configured\n
-
EFA test
The results should shown that two EFA adapters are available (one for each worker pod)
kubectl apply -f https://raw.githubusercontent.com/aws-samples/aws-do-eks/main/Container-Root/eks/deployment/efa-device-plugin/test-efa.yaml\n
mpijob.kubeflow.org/efa-info-test created\n
Once the test launcher pod enters status Running
or Completed
, see the test logs using the command below:
kubectl logs -f $(kubectl get pods | grep launcher | cut -d ' ' -f 1)\n
Warning: Permanently added 'efa-info-test-worker-1.efa-info-test-worker.default.svc,10.11.13.224' (ECDSA) to the list of known hosts.\nWarning: Permanently added 'efa-info-test-worker-0.efa-info-test-worker.default.svc,10.11.4.63' (ECDSA) to the list of known hosts.\n[1,1]<stdout>:provider: efa\n[1,1]<stdout>: fabric: efa\n[1,1]<stdout>: domain: rdmap197s0-rdm\n[1,1]<stdout>: version: 116.10\n[1,1]<stdout>: type: FI_EP_RDM\n[1,1]<stdout>: protocol: FI_PROTO_EFA\n[1,0]<stdout>:provider: efa\n[1,0]<stdout>: fabric: efa\n[1,0]<stdout>: domain: rdmap197s0-rdm\n[1,0]<stdout>: version: 116.10\n[1,0]<stdout>: type: FI_EP_RDM\n[1,0]<stdout>: protocol: FI_PROTO_EFA\n
-
EFA NCCL test
To run the EFA NCCL test please execute the following kubectl command:
kubectl apply -f https://raw.githubusercontent.com/aws-samples/aws-do-eks/main/Container-Root/eks/deployment/efa-device-plugin/test-nccl-efa.yaml\n
mpijob.kubeflow.org/test-nccl-efa created\n
Once the launcher pod enters Running
or Completed
state, execute the following to see the test logs:
kubectl logs -f $(kubectl get pods | grep launcher | cut -d ' ' -f 1)\n
[1,0]<stdout>:test-nccl-efa-worker-0:21:21 [0] NCCL INFO NET/OFI Selected Provider is efa (found 1 nics)\n[1,0]<stdout>:test-nccl-efa-worker-0:21:21 [0] NCCL INFO Using network AWS Libfabric\n[1,0]<stdout>:NCCL version 2.12.7+cuda11.4\n
Columns 8 and 12 in the output table show the in-place and out-of-place bus bandwidth calculated for the data size listed in column 1. In this case it is 3.13 and 3.12 GB/s respectively. Your actual results may be slightly different. The calculated average bus bandwidth is displayed at the bottom of the log when the test finishes after it reaches the max data size, specified in the mpijob manifest. In this result the average bus bandwidth is 1.15 GB/s.
[1,0]<stdout>:# size count type redop root time algbw busbw #wrong time algbw busbw #wrong\n[1,0]<stdout>:# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s)\n...\n[1,0]<stdout>: 262144 65536 float sum -1 195.0 1.34 1.34 0 194.0 1.35 1.35 0\n[1,0]<stdout>: 524288 131072 float sum -1 296.9 1.77 1.77 0 291.1 1.80 1.80 0\n[1,0]<stdout>: 1048576 262144 float sum -1 583.4 1.80 1.80 0 579.6 1.81 1.81 0\n[1,0]<stdout>: 2097152 524288 float sum -1 983.3 2.13 2.13 0 973.9 2.15 2.15 0\n[1,0]<stdout>: 4194304 1048576 float sum -1 1745.4 2.40 2.40 0 1673.2 2.51 2.51 0\n...\n[1,0]<stdout>:# Avg bus bandwidth : 1.15327\n
"},{"location":"patterns/elastic-fabric-adapter/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/external-secrets/","title":"External Secrets","text":""},{"location":"patterns/external-secrets/#amazon-eks-cluster-w-external-secrets-operator","title":"Amazon EKS Cluster w/ External Secrets Operator","text":"This pattern deploys an EKS Cluster with the External Secrets Operator. The cluster is populated with a ClusterSecretStore and SecretStore example using SecretManager and Parameter Store respectively. A secret for each store is also created. Both stores use IRSA to retrieve the secret values from AWS.
"},{"location":"patterns/external-secrets/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/external-secrets/#validate","title":"Validate","text":" -
List the secret resources in the external-secrets
namespace
kubectl get externalsecrets -n external-secrets\nkubectl get secrets -n external-secrets\n
"},{"location":"patterns/external-secrets/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/fargate-serverless/","title":"Fargate Serverless","text":""},{"location":"patterns/fargate-serverless/#serverless-amazon-eks-cluster","title":"Serverless Amazon EKS Cluster","text":"This pattern demonstrates an Amazon EKS Cluster that utilizes Fargate profiles for a serverless data plane.
"},{"location":"patterns/fargate-serverless/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/fargate-serverless/#validate","title":"Validate","text":" -
List the nodes in in the cluster; you should see Fargate instances:
kubectl get nodes\n
NAME STATUS ROLES AGE VERSION\nfargate-ip-10-0-17-17.us-west-2.compute.internal Ready <none> 25m v1.26.3-eks-f4dc2c0\nfargate-ip-10-0-20-244.us-west-2.compute.internal Ready <none> 71s v1.26.3-eks-f4dc2c0\nfargate-ip-10-0-41-143.us-west-2.compute.internal Ready <none> 25m v1.26.3-eks-f4dc2c0\nfargate-ip-10-0-44-95.us-west-2.compute.internal Ready <none> 25m v1.26.3-eks-f4dc2c0\nfargate-ip-10-0-45-153.us-west-2.compute.internal Ready <none> 77s v1.26.3-eks-f4dc2c0\nfargate-ip-10-0-47-31.us-west-2.compute.internal Ready <none> 75s v1.26.3-eks-f4dc2c0\nfargate-ip-10-0-6-175.us-west-2.compute.internal Ready <none> 25m v1.26.3-eks-f4dc2c0\n
-
List the pods. All the pods should reach a status of Running
after approximately 60 seconds:
kubectl get pods -A\n
NAMESPACE NAME READY STATUS RESTARTS AGE\napp-2048 app-2048-65bd744dfb-7g9rx 1/1 Running 0 2m34s\napp-2048 app-2048-65bd744dfb-nxcbm 1/1 Running 0 2m34s\napp-2048 app-2048-65bd744dfb-z4b6z 1/1 Running 0 2m34s\nkube-system aws-load-balancer-controller-6cbdb58654-fvskt 1/1 Running 0 26m\nkube-system aws-load-balancer-controller-6cbdb58654-sc7dk 1/1 Running 0 26m\nkube-system coredns-7b7bddbc85-jmbv6 1/1 Running 0 26m\nkube-system coredns-7b7bddbc85-rgmzq 1/1 Running 0 26m\n
-
Validate the aws-logging
configMap for Fargate Fluentbit was created:
kubectl -n aws-observability get configmap aws-logging -o yaml\n
apiVersion: v1\ndata:\nfilters.conf: |\n[FILTER]\nName parser\nMatch *\nKey_Name log\nParser regex\nPreserve_Key True\nReserve_Data True\nflb_log_cw: \"true\"\noutput.conf: |\n[OUTPUT]\nName cloudwatch_logs\nMatch *\nregion us-west-2\nlog_group_name /fargate-serverless/fargate-fluentbit-logs20230509014113352200000006\nlog_stream_prefix fargate-logs-\nauto_create_group true\nparsers.conf: |\n[PARSER]\nName regex\nFormat regex\nRegex ^(?<time>[^ ]+) (?<stream>[^ ]+) (?<logtag>[^ ]+) (?<message>.+)$\nTime_Key time\nTime_Format %Y-%m-%dT%H:%M:%S.%L%z\nTime_Keep On\nDecode_Field_As json message\nimmutable: false\nkind: ConfigMap\nmetadata:\ncreationTimestamp: \"2023-05-08T21:14:52Z\"\nname: aws-logging\nnamespace: aws-observability\nresourceVersion: \"1795\"\nuid: d822bcf5-a441-4996-857e-7fb1357bc07e\n
You can also validate if the CloudWatch LogGroup was created accordingly, and LogStreams were populated:
aws logs describe-log-groups \\\n--log-group-name-prefix \"/fargate-serverless/fargate-fluentbit\"\n
{\n\"logGroups\": [\n{\n\"logGroupName\": \"/fargate-serverless/fargate-fluentbit-logs20230509014113352200000006\",\n\"creationTime\": 1683580491652,\n\"retentionInDays\": 90,\n\"metricFilterCount\": 0,\n\"arn\": \"arn:aws:logs:us-west-2:111222333444:log-group:/fargate-serverless/fargate-fluentbit-logs20230509014113352200000006:*\",\n\"storedBytes\": 0\n}\n]\n}\n
aws logs describe-log-streams \\\n--log-group-name \"/fargate-serverless/fargate-fluentbit-logs20230509014113352200000006\" \\\n--log-stream-name-prefix fargate-logs --query 'logStreams[].logStreamName'\n
[\n\"fargate-logs-flblogs.var.log.fluent-bit.log\",\n\"fargate-logs-kube.var.log.containers.aws-load-balancer-controller-7f989fc6c-grjsq_kube-system_aws-load-balancer-controller-feaa22b4cdaa71ecfc8355feb81d4b61ea85598a7bb57aef07667c767c6b98e4.log\",\n\"fargate-logs-kube.var.log.containers.aws-load-balancer-controller-7f989fc6c-wzr46_kube-system_aws-load-balancer-controller-69075ea9ab3c7474eac2a1696d3a84a848a151420cd783d79aeef960b181567f.log\",\n\"fargate-logs-kube.var.log.containers.coredns-7b7bddbc85-8cxvq_kube-system_coredns-9e4f3ab435269a566bcbaa606c02c146ad58508e67cef09fa87d5c09e4ac0088.log\",\n\"fargate-logs-kube.var.log.containers.coredns-7b7bddbc85-gcjwp_kube-system_coredns-11016818361cd68c32bf8f0b1328f3d92a6d7b8cf5879bfe8b301f393cb011cc.log\"\n]\n
"},{"location":"patterns/fargate-serverless/#example","title":"Example","text":" -
Create an ingress resource using the AWS load balancer controller deployed, pointing to our application service:
kubectl get svc -n app-2048\n
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE\napp-2048 NodePort 172.20.33.217 <none> 80:32568/TCP 2m48s\n
kubectl -n app-2048 create ingress app-2048 --class alb --rule=\"/*=app-2048:80\" \\\n--annotation alb.ingress.kubernetes.io/scheme=internet-facing \\\n--annotation alb.ingress.kubernetes.io/target-type=ip\n
kubectl -n app-2048 get ingress\n
NAME CLASS HOSTS ADDRESS PORTS AGE\napp-2048 alb * k8s-app2048-app2048-6d9c5e92d6-1234567890.us-west-2.elb.amazonaws.com 80 4m9s\n
-
Open the browser to access the application via the URL address shown in the last output in the ADDRESS column.
In our example: k8s-app2048-app2048-6d9c5e92d6-1234567890.us-west-2.elb.amazonaws.com
Info
You might need to wait a few minutes, and then refresh your browser. If your Ingress isn't created after several minutes, then run this command to view the AWS Load Balancer Controller logs:
kubectl logs -n kube-system deployment.apps/aws-load-balancer-controller\n
"},{"location":"patterns/fargate-serverless/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/fully-private-cluster/","title":"Fully Private Cluster","text":""},{"location":"patterns/fully-private-cluster/#fully-private-amazon-eks-cluster","title":"Fully Private Amazon EKS Cluster","text":"This pattern demonstrates an Amazon EKS cluster that does not have internet access. The private cluster must pull images from a container registry that is within in your VPC, and also must have endpoint private access enabled. This is required for nodes to register with the cluster endpoint.
Please see this document for more details on configuring fully private EKS Clusters.
For fully Private EKS clusters requires the following VPC endpoints to be created to communicate with AWS services. This example solution will provide these endpoints if you choose to create VPC. If you are using an existing VPC then you may need to ensure these endpoints are created.
com.amazonaws.region.aps-workspaces - If using AWS Managed Prometheus Workspace\ncom.amazonaws.region.ssm - Secrets Management\ncom.amazonaws.region.ec2\ncom.amazonaws.region.ecr.api\ncom.amazonaws.region.ecr.dkr\ncom.amazonaws.region.logs \u2013 For CloudWatch Logs\ncom.amazonaws.region.sts \u2013 If using AWS Fargate or IAM roles for service accounts\ncom.amazonaws.region.elasticloadbalancing \u2013 If using Application Load Balancers\ncom.amazonaws.region.autoscaling \u2013 If using Cluster Autoscaler\ncom.amazonaws.region.s3\n
"},{"location":"patterns/fully-private-cluster/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/fully-private-cluster/#validate","title":"Validate","text":" -
Test by listing Nodes in in the cluster:
kubectl get nodes\n
NAME STATUS ROLES AGE VERSION\nip-10-0-19-90.us-west-2.compute.internal Ready <none> 8m34s v1.26.2-eks-a59e1f0\nip-10-0-44-110.us-west-2.compute.internal Ready <none> 8m36s v1.26.2-eks-a59e1f0\nip-10-0-9-147.us-west-2.compute.internal Ready <none> 8m35s v1.26.2-eks-a59e1f0\n
-
Test by listing all the Pods running currently. All the Pods should reach a status of Running
after approximately 60 seconds:
kubectl get pods -A\n
NAMESPACE NAME READY STATUS RESTARTS AGE\nkube-system aws-node-jvn9x 1/1 Running 0 7m42s\nkube-system aws-node-mnjlf 1/1 Running 0 7m45s\nkube-system aws-node-q458h 1/1 Running 0 7m49s\nkube-system coredns-6c45d94f67-495rr 1/1 Running 0 14m\nkube-system coredns-6c45d94f67-5c8tc 1/1 Running 0 14m\nkube-system kube-proxy-47wfh 1/1 Running 0 8m32s\nkube-system kube-proxy-f6chz 1/1 Running 0 8m30s\nkube-system kube-proxy-xcfkc 1/1 Running 0 8m31s\n
"},{"location":"patterns/fully-private-cluster/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/ipv6-eks-cluster/","title":"IPv6 Networking","text":""},{"location":"patterns/ipv6-eks-cluster/#amazon-eks-cluster-w-ipv6-networking","title":"Amazon EKS Cluster w/ IPv6 Networking","text":"This pattern demonstrates an EKS cluster that utilizes IPv6 networking.
"},{"location":"patterns/ipv6-eks-cluster/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/ipv6-eks-cluster/#validate","title":"Validate","text":" -
Test by listing all the pods running currently; the IP
should be an IPv6 address.
kubectl get pods -A -o wide\n
# Output should look like below\nNAMESPACE NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES\nkube-system aws-node-bhd2s 1/1 Running 0 3m5s 2600:1f13:6c4:a703:ecf8:3ac1:76b0:9303 ip-10-0-10-183.us-west-2.compute.internal <none> <none>\nkube-system aws-node-nmdgq 1/1 Running 0 3m21s 2600:1f13:6c4:a705:a929:f8d4:9350:1b20 ip-10-0-12-188.us-west-2.compute.internal <none> <none>\nkube-system coredns-799c5565b4-6wxrc 1/1 Running 0 10m 2600:1f13:6c4:a705:bbda:: ip-10-0-12-188.us-west-2.compute.internal <none> <none>\nkube-system coredns-799c5565b4-fjq4q 1/1 Running 0 10m 2600:1f13:6c4:a705:bbda::1 ip-10-0-12-188.us-west-2.compute.internal <none> <none>\nkube-system kube-proxy-58tp7 1/1 Running 0 4m25s 2600:1f13:6c4:a703:ecf8:3ac1:76b0:9303 ip-10-0-10-183.us-west-2.compute.internal <none> <none>\nkube-system kube-proxy-hqkgw 1/1 Running 0 4m25s 2600:1f13:6c4:a705:a929:f8d4:9350:1b20 ip-10-0-12-188.us-west-2.compute.internal <none> <none>\n
-
Test by listing all the nodes running currently; the INTERNAL-IP
should be an IPv6 address.
kubectl nodes -A -o wide\n
# Output should look like below\nNAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME\nip-10-0-10-183.us-west-2.compute.internal Ready <none> 4m57s v1.24.7-eks-fb459a0 2600:1f13:6c4:a703:ecf8:3ac1:76b0:9303 <none> Amazon Linux 2 5.4.226-129.415.amzn2.x86_64 containerd://1.6.6\nip-10-0-12-188.us-west-2.compute.internal Ready <none> 4m57s v1.24.7-eks-fb459a0 2600:1f13:6c4:a705:a929:f8d4:9350:1b20 <none> Amazon Linux 2 5.4.226-129.415.amzn2.x86_64 containerd://1.6.6\n
"},{"location":"patterns/ipv6-eks-cluster/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/istio-multi-cluster/","title":"Istio - Multi-Cluster","text":""},{"location":"patterns/istio-multi-cluster/#amazon-eks-multi-cluster-w-istio","title":"Amazon EKS Multi-Cluster w/ Istio","text":"This pattern demonstrates 2 Amazon EKS clusters configured with Istio. Istio will be set-up to operate in a Multi-Primary configuration, where services are shared across clusters.
Refer to the documentation for Istio
concepts.
"},{"location":"patterns/istio-multi-cluster/#notable-configuration","title":"Notable configuration","text":" - This sample rely on reading data from Terraform Remote State in the different folders. In a production setup, Terraform Remote State is stored in a persistent backend such as Terraform Cloud or S3. For more information, please refer to the Terraform Backends documentation
- The process for connecting clusters is seperated from the cluster creation as it requires all cluster to be created first, and excahnge configuration between one to the other
"},{"location":"patterns/istio-multi-cluster/#folder-structure","title":"Folder structure","text":""},{"location":"patterns/istio-multi-cluster/#0certs-tool","title":"0.certs-tool
","text":"This folder is the Makefiles from the Istio projects to generate 1 root CA with 2 intermediate CAs for each cluster. Please refer to the \"Certificate Management\" section in the Istio documentation. For production setup it's highly recommended by the Istio project to have a production-ready CA solution.
NOTE: The 0.certs-tool/create-certs.sh script needs to run before the cluster creation so the code will pick up the relevant certificates
"},{"location":"patterns/istio-multi-cluster/#0vpc","title":"0.vpc
","text":"This folder creates the VPC for both clusters. The VPC creation is not part of the cluster provisionig and therefore lives in a seperate folder. To support the multi-cluster/Multi-Primary setup, this folder also creates additional security group to be used by each cluster worker nodes to allow cross-cluster communication (resources cluster1_additional_sg
and cluster2_additional_sg
). These security groups allow communication from one to the other and each will be added to the worker nodes of the relevant cluster
"},{"location":"patterns/istio-multi-cluster/#1cluster1","title":"1.cluster1
","text":"This folder creates an Amazon EKS Cluster, named by default cluster-1
(see variables.tf
), with AWS Load Balancer Controller, and Istio installation. Configurations in this folder to be aware of:
- The cluster is configured to use the security groups created in the
0.vpc
folder (cluster1_additional_sg
in this case). - Kubernetes Secret named
cacerts
is created with the certificates created by the 0.certs-tool/create-certs.sh script - Kubernetes Secret named
cacerts
named istio-reader-service-account-istio-remote-secret-token
of type Service-Account
is being created. This is to replicate the istioctl experimental create-remote-secret command. This secret will be used in folder 3.istio-multi-primary
to apply kubeconfig secret with tokens from the other cluster to be abble to communicate to the other cluster API Server
"},{"location":"patterns/istio-multi-cluster/#2cluster2","title":"2.cluster2
","text":"Same configuration as in 1.cluster1
except the name of the cluster which is cluster-2
.
"},{"location":"patterns/istio-multi-cluster/#3istio-multi-primary","title":"3.istio-multi-primary
","text":"This folder deploys a reader secret on each cluster. It replicates the istioctl experimental create-remote-secret
by applying a kubeconfig secret prefixed istio-remote-secret-
with the cluster name at the end.
"},{"location":"patterns/istio-multi-cluster/#4test-connectivity","title":"4.test-connectivity
","text":"This folder test the installation connectivity. It follows the Istio guide Verify the installation by deploying services on each cluster, and curl
-ing from one to the other
"},{"location":"patterns/istio-multi-cluster/#prerequisites","title":"Prerequisites","text":"Ensure that you have the following tools installed locally:
- aws cli
- kubectl
- terraform
"},{"location":"patterns/istio-multi-cluster/#deploy","title":"Deploy","text":""},{"location":"patterns/istio-multi-cluster/#prereq-provision-certificates","title":"Prereq - Provision Certificates","text":"cd 0.certs-tool\n./create-certs.sh\ncd..\n
"},{"location":"patterns/istio-multi-cluster/#step-0-create-the-vpc","title":"Step 0 - Create the VPC","text":"cd 0.certs-tool\n./create-certs.sh\ncd..\n
"},{"location":"patterns/istio-multi-cluster/#step-1-deploy-cluster-1","title":"Step 1 - Deploy cluster-1","text":"cd 1.cluster1\nterraform init\nterraform apply -auto-approve\ncd..\n
"},{"location":"patterns/istio-multi-cluster/#step-2-deploy-cluster-2","title":"Step 2 - Deploy cluster-2","text":"cd 2.cluster2\nterraform init\nterraform apply -auto-approve\ncd..\n
"},{"location":"patterns/istio-multi-cluster/#step-3-configure-istio-multi-primary","title":"Step 3 - Configure Istio Multi-Primary","text":"cd 3.istio-multi-primary\nterraform init\nterraform apply -auto-approve\ncd..\n
"},{"location":"patterns/istio-multi-cluster/#step-4-test-installation-and-connectivity","title":"Step 4 - test installation and connectivity","text":"cd 4.test-connectivity\n./test_connectivity.sh\ncd..\n
This script deploy the sample application to both clusters and run curl from a pod in one cluster to a service that is deployed in both cluster. You should expect to see responses from both V1
and V2
of the sample application. The script run 4 curl
command from cluster-1 to cluster-2 and vice versa
"},{"location":"patterns/istio-multi-cluster/#destroy","title":"Destroy","text":"To teardown and remove the resources created in this example:
cd 3.istio-multi-primary\nterraform apply -destroy -autoapprove\ncd ../2.cluster2\nterraform apply -destroy -autoapprove\ncd ../1.cluster1\nterraform apply -destroy -autoapprove\ncd ../0.vpc\nterraform apply -destroy -autoapprove\n
"},{"location":"patterns/istio/","title":"Istio","text":""},{"location":"patterns/istio/#amazon-eks-cluster-w-istio","title":"Amazon EKS Cluster w/ Istio","text":"This example shows how to provision an EKS cluster with Istio.
- Deploy EKS Cluster with one managed node group in an VPC
- Add node_security_group rules for port access required for Istio communication
- Install Istio using Helm resources in Terraform
- Install Istio Ingress Gateway using Helm resources in Terraform
- This step deploys a Service of type
LoadBalancer
that creates an AWS Network Load Balancer. - Deploy/Validate Istio communication using sample application
Refer to the documentation on Istio concepts.
"},{"location":"patterns/istio/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/istio/#observability-add-ons","title":"Observability Add-ons","text":"Use the following code snippet to add the Istio Observability Add-ons on the EKS cluster with deployed Istio.
for ADDON in kiali jaeger prometheus grafana\ndo\nADDON_URL=\"https://raw.githubusercontent.com/istio/istio/release-1.18/samples/addons/$ADDON.yaml\"\nkubectl apply -f $ADDON_URL\ndone\n
"},{"location":"patterns/istio/#validate","title":"Validate","text":" -
List out all pods and services in the istio-system
namespace:
kubectl get pods,svc -n istio-system\nkubectl get pods,svc -n istio-ingress\n
NAME READY STATUS RESTARTS AGE\npod/grafana-7d4f5589fb-4xj9m 1/1 Running 0 4m14s\npod/istiod-ff577f8b8-c8ssk 1/1 Running 0 4m40s\npod/jaeger-58c79c85cd-n7bkx 1/1 Running 0 4m14s\npod/kiali-749d76d7bb-8kjg7 1/1 Running 0 4m14s\npod/prometheus-5d5d6d6fc-sptxl 2/2 Running 0 4m15s\n\nNAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE\nservice/grafana ClusterIP 172.20.141.12 <none> 3000/TCP 4m14s\nservice/istiod ClusterIP 172.20.172.70 <none> 15010/TCP,15012/TCP,443/TCP,15014/TCP 4m40s\nservice/jaeger-collector ClusterIP 172.20.223.28 <none> 14268/TCP,14250/TCP,9411/TCP 4m15s\nservice/kiali ClusterIP 172.20.182.231 <none> 20001/TCP,9090/TCP 4m15s\nservice/prometheus ClusterIP 172.20.89.64 <none> 9090/TCP 4m14s\nservice/tracing ClusterIP 172.20.253.201 <none> 80/TCP,16685/TCP 4m14s\nservice/zipkin ClusterIP 172.20.221.157 <none> 9411/TCP 4m15s\n\nNAME READY STATUS RESTARTS AGE\npod/istio-ingress-6f7c5dffd8-glszr 1/1 Running 0 4m28s\n\nNAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE\nservice/istio-ingress LoadBalancer 172.20.104.27 k8s-istioing-istioing-844c89b6c2-875b8c9a4b4e9365.elb.us-west-2.amazonaws.com 15021:32760/TCP,80:31496/TCP,443:32534/TCP 4m28s\n
-
Verify all the Helm releases installed in the istio-system
and istio-ingress
namespaces:
helm list -n istio-system\n
NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION\nistio-base istio-system 1 2023-07-19 11:05:41.599921 -0700 PDT deployed base-1.18.1 1.18.1\nistiod istio-system 1 2023-07-19 11:05:48.087616 -0700 PDT deployed istiod-1.18.1 1.18.1\n
helm list -n istio-ingress\n
NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION\nistio-ingress istio-ingress 1 2023-07-19 11:06:03.41609 -0700 PDT deployed gateway-1.18.1 1.18.1\n
"},{"location":"patterns/istio/#observability-add-ons_1","title":"Observability Add-ons","text":"Validate the setup of the observability add-ons by running the following commands and accessing each of the service endpoints using this URL of the form http://localhost:\\ where <port>
is one of the port number for the corresponding service.
# Visualize Istio Mesh console using Kiali\nkubectl port-forward svc/kiali 20001:20001 -n istio-system\n\n# Get to the Prometheus UI\nkubectl port-forward svc/prometheus 9090:9090 -n istio-system\n\n# Visualize metrics in using Grafana\nkubectl port-forward svc/grafana 3000:3000 -n istio-system\n\n# Visualize application traces via Jaeger\nkubectl port-forward svc/jaeger 16686:16686 -n istio-system\n
"},{"location":"patterns/istio/#example","title":"Example","text":" -
Create the sample
namespace and enable the sidecar injection on it
kubectl create namespace sample\nkubectl label namespace sample istio-injection=enabled\n
namespace/sample created\nnamespace/sample labeled\n
-
Deploy helloworld
app
cat <<EOF > helloworld.yaml\napiVersion: v1\nkind: Service\nmetadata:\n name: helloworld\n labels:\n app: helloworld\n service: helloworld\nspec:\n ports:\n - port: 5000\n name: http\n selector:\n app: helloworld\n---\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n name: helloworld-v1\n labels:\n app: helloworld\n version: v1\nspec:\n replicas: 1\n selector:\n matchLabels:\n app: helloworld\n version: v1\n template:\n metadata:\n labels:\n app: helloworld\n version: v1\n spec:\n containers:\n - name: helloworld\n image: docker.io/istio/examples-helloworld-v1\n resources:\n requests:\n cpu: \"100m\"\n imagePullPolicy: IfNotPresent #Always\n ports:\n - containerPort: 5000\nEOF\nkubectl apply -f helloworld.yaml -n sample\n
service/helloworld created\ndeployment.apps/helloworld-v1 created\n
-
Deploy sleep
app that we will use to connect to helloworld
app
cat <<EOF > sleep.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n name: sleep\n---\napiVersion: v1\nkind: Service\nmetadata:\n name: sleep\n labels:\n app: sleep\n service: sleep\nspec:\n ports:\n - port: 80\n name: http\n selector:\n app: sleep\n---\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n name: sleep\nspec:\n replicas: 1\n selector:\n matchLabels:\n app: sleep\n template:\n metadata:\n labels:\n app: sleep\n spec:\n terminationGracePeriodSeconds: 0\n serviceAccountName: sleep\n containers:\n - name: sleep\n image: curlimages/curl\n command: [\"/bin/sleep\", \"infinity\"]\n imagePullPolicy: IfNotPresent\n volumeMounts:\n - mountPath: /etc/sleep/tls\n name: secret-volume\n volumes:\n - name: secret-volume\n secret:\n secretName: sleep-secret\n optional: true\nEOF\nkubectl apply -f sleep.yaml -n sample\n
serviceaccount/sleep created\nservice/sleep created\ndeployment.apps/sleep created\n
-
Check all the pods in the sample
namespace
kubectl get pods -n sample\n
NAME READY STATUS RESTARTS AGE\nhelloworld-v1-b6c45f55-bx2xk 2/2 Running 0 50s\nsleep-9454cc476-p2zxr 2/2 Running 0 15s\n
-
Connect to helloworld
app from sleep
app and verify if the connection uses envoy proxy
kubectl exec -n sample -c sleep \\\n\"$(kubectl get pod -n sample -l \\\napp=sleep -o jsonpath='{.items[0].metadata.name}')\" \\\n-- curl -v helloworld.sample:5000/hello\n
* processing: helloworld.sample:5000/hello\n% Total % Received % Xferd Average Speed Time Time Time Current\n Dload Upload Total Spent Left Speed\n0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0* Trying 172.20.26.38:5000...\n* Connected to helloworld.sample (172.20.26.38) port 5000\n> GET /hello HTTP/1.1\n> Host: helloworld.sample:5000\n> User-Agent: curl/8.2.0\n> Accept: */*\n>\n< HTTP/1.1 200 OK\n< server: envoy\n< date: Fri, 21 Jul 2023 18:56:09 GMT\n< content-type: text/html; charset=utf-8\n< content-length: 58\n< x-envoy-upstream-service-time: 142\n<\n{ [58 bytes data]\n100 58 100 58 Hello version: v1, instance: helloworld-v1-b6c45f55-h592c\n0 0 392 0 --:--:-- --:--:-- --:--:-- 394\n* Connection #0 to host helloworld.sample left intact\n
"},{"location":"patterns/istio/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/karpenter/","title":"Karpenter","text":""},{"location":"patterns/karpenter/#karpenter","title":"Karpenter","text":"This pattern demonstrates how to provision Karpenter on a serverless cluster (serverless data plane) using Fargate Profiles.
"},{"location":"patterns/karpenter/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/karpenter/#validate","title":"Validate","text":"TODO
Add in validation steps
"},{"location":"patterns/karpenter/#destroy","title":"Destroy","text":"Scale down the deployment to de-provision Karpenter created resources first:
kubectl delete deployment inflate\n
terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/multi-tenancy-with-teams/","title":"Multi-Tenancy w/ Teams","text":""},{"location":"patterns/multi-tenancy-with-teams/#multi-tenancy-w-teams","title":"Multi-Tenancy w/ Teams","text":"This pattern demonstrates how to provision and configure a multi-tenancy Amazon EKS cluster with safeguards for resource consumption and namespace isolation.
This example solution provides:
- Two development teams -
team-red
and team-blue
- isolated to their respective namespaces - An admin team with privileged access to the cluster (
team-admin
)
"},{"location":"patterns/multi-tenancy-with-teams/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/multi-tenancy-with-teams/#validate","title":"Validate","text":"TODO
Add in validation steps
"},{"location":"patterns/multi-tenancy-with-teams/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/private-public-ingress/","title":"Private and Public Ingress","text":""},{"location":"patterns/private-public-ingress/#amazon-eks-private-and-public-ingress-example","title":"Amazon EKS Private and Public Ingress example","text":"This example demonstrates how to provision an Amazon EKS cluster with two ingress-nginx controllers; one to expose applications publicly and the other to expose applications internally. It also assigns security groups to the Network Load Balancers used to expose the internal and external ingress controllers.
This solution:
- Installs an ingress-nginx controller for public traffic
- Installs an ingress-nginx controller for internal traffic
To expose your application services via an Ingress
resource with this solution you can set the respective ingressClassName
as either ingress-nginx-external
or ingress-nginx-internal
.
Refer to the documentation for AWS Load Balancer controller
configuration options.
"},{"location":"patterns/private-public-ingress/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/private-public-ingress/#validate","title":"Validate","text":"TODO
Add in validation steps
"},{"location":"patterns/private-public-ingress/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/privatelink-access/","title":"PrivateLink Access","text":""},{"location":"patterns/privatelink-access/#private-eks-cluster-access-via-aws-privatelink","title":"Private EKS cluster access via AWS PrivateLink","text":"This pattern demonstrates how to access a private EKS cluster using AWS PrivateLink.
Refer to the documentation for further details on AWS PrivateLink
.
"},{"location":"patterns/privatelink-access/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/privatelink-access/#validate","title":"Validate","text":""},{"location":"patterns/privatelink-access/#network-connectivity","title":"Network Connectivity","text":"An output ssm_test
has been provided to aid in quickly testing the connectivity from the client EC2 instance to the private EKS cluster via AWS PrivateLink. Copy the output value and paste it into your terminal to execute and check the connectivity. If configured correctly, the value returned should be ok
.
COMMAND=\"curl -ks https://9A85B21811733524E3ABCDFEA8714642.gr7.us-west-2.eks.amazonaws.com/readyz\"\nCOMMAND_ID=$(aws ssm send-command --region us-west-2 \\\n--document-name \"AWS-RunShellScript\" \\\n--parameters \"commands=[$COMMAND]\" \\\n--targets \"Key=instanceids,Values=i-0a45eff73ba408575\" \\\n--query 'Command.CommandId' \\\n--output text)\naws ssm get-command-invocation --region us-west-2 \\\n--command-id $COMMAND_ID \\\n--instance-id i-0a45eff73ba408575 \\\n--query 'StandardOutputContent' \\\n--output text\n
"},{"location":"patterns/privatelink-access/#cluster-access","title":"Cluster Access","text":"To test access to the cluster, you will need to execute Kubernetes API calls from within the private network to access the cluster. An EC2 instance has been deployed into a \"client\" VPC to simulate this scenario. However, since the EKS cluster was created with your local IAM identity, the aws-auth
ConfigMap will only have your local identity that is permitted to access the cluster. Since cluster's API endpoint is private, we cannot use Terraform to reach it to add additional entries to the ConfigMap; we can only access the cluster from within the private network of the cluster's VPC or from the client VPC using AWS PrivateLink access.
Info
The \"client\" EC2 instance provided and copying of AWS credentials to that instance are merely for demonstration purposes only. Please consider alternate methods of network access such as AWS Client VPN to provide more secure access.
Perform the following steps to access the cluster with kubectl
from the provided \"client\" EC2 instance.
- Execute the command below on your local machine to get temporary credentials that will be used on the \"client\" EC2 instance:
aws sts get-session-token --duration-seconds 3600 --output yaml\n
- Start a new SSM session on the \"client\" EC2 instance using the provided
ssm_start_session
output value. Copy the output value and paste it into your terminal to execute. Your terminal will now be connected to the \"client\" EC2 instance.
aws ssm start-session --region us-west-2 --target i-0280cf604085f4a44\n
-
Once logged in, export the following environment variables from the output of step #1:
Warning
The session credentials are only valid for 1 hour; you can adjust the session duration in the command provided in step #1
export AWS_ACCESS_KEY_ID=XXXX\nexport AWS_SECRET_ACCESS_KEY=YYYY\nexport AWS_SESSION_TOKEN=ZZZZ\n
- Run the following command to update the local
~/.kube/config
file to enable access to the cluster:
aws eks update-kubeconfig --region us-west-2 --name privatelink-access\n
- Test access by listing the pods running on the cluster:
kubectl get pods -A\n
NAMESPACE NAME READY STATUS RESTARTS AGE\nkube-system aws-node-4f8g8 1/1 Running 0 1m\nkube-system coredns-6ff9c46cd8-59sqp 1/1 Running 0 1m\nkube-system coredns-6ff9c46cd8-svnpb 1/1 Running 0 2m\nkube-system kube-proxy-mm2zc 1/1 Running 0 1m\n
"},{"location":"patterns/privatelink-access/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/sso-iam-identity-center/","title":"SSO - IAM Identity Center","text":""},{"location":"patterns/sso-iam-identity-center/#iam-identity-center-single-sign-on-for-amazon-eks-cluster","title":"IAM Identity Center Single Sign-On for Amazon EKS Cluster","text":"This example demonstrates how to deploy an Amazon EKS cluster that is deployed on the AWS Cloud, integrated with IAM Identity Center (former AWS SSO) as an the Identity Provider (IdP) for Single Sign-On (SSO) authentication. The configuration for authorization is done using Kubernetes Role-based access control (RBAC).
"},{"location":"patterns/sso-iam-identity-center/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/sso-iam-identity-center/#validate","title":"Validate","text":"After the terraform
commands are executed sucessfully, check if the newly created users are active.
To do that use the link provided in the email invite - if you added a valid email address for your users either in your Terraform code or IAM Identity Center Console - or go to the IAM Identity Center Console, in the Users dashboard on the left hand side menu, then select the user, and click on Reset password button on the upper right corner. Choose the option to Generate a one-time password and share the password with the user.
With the active users, use one of the terraform output
examples to configure your AWS credentials for SSO, as shown in the examples below. After you choose the SSO registration scopes, your browser windows will appear and request to login using your IAM Identity Center username and password.
Admin user example
configure_sso_admin = <<EOT\n # aws configure sso\n SSO session name (Recommended): <SESSION_NAME>\n SSO start URL [None]: https://d-1234567890.awsapps.com/start\n SSO region [None]: us-west-2\n SSO registration scopes [sso:account:access]:\n Attempting to automatically open the SSO authorization page in your default browser.\n If the browser does not open or you wish to use a different device to authorize this request, open the following URL:\n\n https://device.sso.us-west-2.amazonaws.com/\n\n Then enter the code:\n\n The only AWS account available to you is: 123456789012\n Using the account ID 123456789012\n The only role available to you is: EKSClusterAdmin\n Using the role name EKSClusterAdmin\n CLI default client Region [us-west-2]: us-west-2\n CLI default output format [json]: json\n CLI profile name [EKSClusterAdmin-123456789012]:\n\n To use this profile, specify the profile name using --profile, as shown:\n\n aws eks --region us-west-2 update-kubeconfig --name iam-identity-center --profile EKSClusterAdmin-123456789012\n\nEOT\n
Read-only user example
configure_sso_user = <<EOT\n # aws configure sso\n SSO session name (Recommended): <SESSION_NAME>\n SSO start URL [None]: https://d-1234567890.awsapps.com/start\n SSO region [None]: us-west-2\n SSO registration scopes [sso:account:access]:\n Attempting to automatically open the SSO authorization page in your default browser.\n If the browser does not open or you wish to use a different device to authorize this request, open the following URL:\n\n https://device.sso.us-west-2.amazonaws.com/\n\n Then enter the code:\n\n The only AWS account available to you is: 123456789012\n Using the account ID 123456789012\n The only role available to you is: EKSClusterUser\n Using the role name EKSClusterUser\n CLI default client Region [us-west-2]: us-west-2\n CLI default output format [json]: json\n CLI profile name [EKSClusterUser-123456789012]:\n\n To use this profile, specify the profile name using --profile, as shown:\n\n aws eks --region us-west-2 update-kubeconfig --name iam-identity-center --profile EKSClusterUser-123456789012\n\nEOT\n
With the kubeconfig
configured, you'll be able to run kubectl
commands in your Amazon EKS Cluster with the impersonated user. The read-only user has a cluster-viewer
Kubernetes role bound to it's group, whereas the admin user, has the admin
Kubernetes role bound to it's group.
kubectl get pods -A\nNAMESPACE NAME READY STATUS RESTARTS AGE\namazon-guardduty aws-guardduty-agent-bl2v2 1/1 Running 0 3h54m\namazon-guardduty aws-guardduty-agent-sqvcx 1/1 Running 0 3h54m\namazon-guardduty aws-guardduty-agent-w8gfc 1/1 Running 0 3h54m\nkube-system aws-node-m9hmd 1/1 Running 0 3h53m\nkube-system aws-node-w42b8 1/1 Running 0 3h53m\nkube-system aws-node-wm6rm 1/1 Running 0 3h53m\nkube-system coredns-6ff9c46cd8-94jlr 1/1 Running 0 3h59m\nkube-system coredns-6ff9c46cd8-nwmrb 1/1 Running 0 3h59m\nkube-system kube-proxy-7fb86 1/1 Running 0 3h54m\nkube-system kube-proxy-p4f5g 1/1 Running 0 3h54m\nkube-system kube-proxy-qkfmc 1/1 Running 0 3h54m\n
You can also use the configure_kubectl
output to assume the Cluster creator role with cluster-admin
access.
configure_kubectl = \"aws eks --region us-west-2 update-kubeconfig --name iam-identity-center\"\n
"},{"location":"patterns/sso-iam-identity-center/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/sso-okta/","title":"SSO - Okta","text":""},{"location":"patterns/sso-okta/#okta-single-sign-on-for-amazon-eks-cluster","title":"Okta Single Sign-On for Amazon EKS Cluster","text":"This example demonstrates how to deploy an Amazon EKS cluster that is deployed on the AWS Cloud, integrated with Okta as an the Identity Provider (IdP) for Single Sign-On (SSO) authentication. The configuration for authorization is done using Kubernetes Role-based access control (RBAC).
"},{"location":"patterns/sso-okta/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/sso-okta/#validate","title":"Validate","text":"After the terraform
commands are executed sucessfully, check if the newly created users are active.
To do that use the link provided in the email invite if you added a valid email address for your users, or go to the Okta Admin Dashboard, select the user, and click on Set Password and Activate button.
With the active users, use the terraform output
example to setup your kubeconfig
profile to authenticate through Okta.
configure_kubeconfig = <<EOT\n kubectl config set-credentials oidc \\\n --exec-api-version=client.authentication.k8s.io/v1beta1 \\\n --exec-command=kubectl \\\n --exec-arg=oidc-login \\\n --exec-arg=get-token \\\n --exec-arg=--oidc-issuer-url=https://dev-ORGID.okta.com/oauth2/1234567890abcdefghij \\\n --exec-arg=--oidc-client-id=1234567890abcdefghij\n --exec-arg=--oidc-extra-scope=\"email offline_access profile openid\"\n
With the kubeconfig
configured, you'll be able to run kubectl
commands in your Amazon EKS Cluster using the --user
cli option to impersonate the Okta authenticated user. When kubectl
command is issued with the --user
option for the first time, your browser window will open and require you to authenticate.
The read-only user has a cluster-viewer
Kubernetes role bound to it's group, whereas the admin user, has the admin
Kubernetes role bound to it's group.
kubectl get pods -A\nNAMESPACE NAME READY STATUS RESTARTS AGE\namazon-guardduty aws-guardduty-agent-bl2v2 1/1 Running 0 3h54m\namazon-guardduty aws-guardduty-agent-sqvcx 1/1 Running 0 3h54m\namazon-guardduty aws-guardduty-agent-w8gfc 1/1 Running 0 3h54m\nkube-system aws-node-m9hmd 1/1 Running 0 3h53m\nkube-system aws-node-w42b8 1/1 Running 0 3h53m\nkube-system aws-node-wm6rm 1/1 Running 0 3h53m\nkube-system coredns-6ff9c46cd8-94jlr 1/1 Running 0 3h59m\nkube-system coredns-6ff9c46cd8-nwmrb 1/1 Running 0 3h59m\nkube-system kube-proxy-7fb86 1/1 Running 0 3h54m\nkube-system kube-proxy-p4f5g 1/1 Running 0 3h54m\nkube-system kube-proxy-qkfmc 1/1 Running 0 3h54m\n
You can also use the configure_kubectl
output to assume the Cluster creator role with cluster-admin
access.
configure_kubectl = \"aws eks --region us-west-2 update-kubeconfig --name okta\"\n
It's also possible to preconfigure your kubeconfig
using the okta_login
output. This will also require you to authenticate in a browser window.
okta_login = \"kubectl oidc-login setup --oidc-issuer-url=https://dev-ORGID.okta.com/oauth2/1234567890abcdefghij--oidc-client-id=1234567890abcdefghij\"\n
"},{"location":"patterns/sso-okta/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/stateful/","title":"Stateful","text":""},{"location":"patterns/stateful/#amazon-eks-cluster-for-stateful-workloads","title":"Amazon EKS Cluster for Stateful Workloads","text":""},{"location":"patterns/stateful/#features","title":"Features","text":"Please note: not all of the features listed below are required for stateful workloads on EKS. We are simply grouping together a set of features that are commonly encountered when managing stateful workloads. Users are encouraged to only enable the features that are required for their workload(s) and use case(s).
"},{"location":"patterns/stateful/#velero","title":"velero","text":"(From the project documentation) velero
(formerly Heptio Ark) gives you tools to back up and restore your Kubernetes cluster resources and persistent volumes. You can run Velero with a public cloud platform or on-premises. Velero lets you:
- Take backups of your cluster and restore in case of loss.
- Migrate cluster resources to other clusters.
- Replicate your production cluster to development and testing clusters.
"},{"location":"patterns/stateful/#ebs-efs-csi-drivers","title":"EBS & EFS CSI Drivers","text":" - A second storage class for
gp3
backed volumes has been added and made the default over the EKS default gp2
storage class (gp2
storage class remains in the cluster for use, but it is no longer the default storage class) - A standard implementation of the EFS CSI driver
"},{"location":"patterns/stateful/#eks-managed-nodegroup-w-multiple-volumes","title":"EKS Managed Nodegroup w/ Multiple Volumes","text":"An EKS managed nodegroup that utilizes multiple EBS volumes. The primary use case demonstrated in this example is a second volume that is dedicated to the containerd
runtime to ensure the root volume is not filled up nor has its I/O exhausted to ensure the instance does not reach a degraded state. The containerd
directories are mapped to this volume. You can read more about this recommendation in our EKS best practices guide and refer to the containerd
documentation for more information. The update for containerd
to use the second volume is managed through the provided user data.
In addition, the following properties are configured on the nodegroup volumes:
- EBS encryption using a customer managed key (CMK)
- Configuring the volumes to use GP3 storage
"},{"location":"patterns/stateful/#eks-managed-nodegroup-w-instance-store-volumes","title":"EKS Managed Nodegroup w/ Instance Store Volume(s)","text":"An EKS managed nodegroup that utilizes EC2 instances with ephemeral instance store(s). Instance stores are ideal for temporary storage of information that changes frequently, such as buffers, caches, scratch data, and other temporary content, or for data that is replicated across a fleet of instances. You can read more about instance stores in the AWS documentation; and be sure to check out the Block device mapping instance store caveats
section as well which covers why the example has provided user data for mounting the instance store(s). The size and number of instance stores will vary based on the EC2 instance type and class.
In addition, the following properties are configured on the nodegroup volumes:
- EBS encryption using a customer managed key (CMK)
- Configuring the volumes to use GP3 storage
"},{"location":"patterns/stateful/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/stateful/#validate","title":"Validate","text":"For validating velero
see here
The following command will update the kubeconfig
on your local machine and allow you to interact with your EKS Cluster using kubectl
to validate the deployment.
-
List the storage classes to view that efs
, gp2
, and gp3
classes are present and gp3
is the default storage class
kubectl get storageclasses\n
NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE\nefs efs.csi.aws.com Delete Immediate true 2m19s\ngp2 kubernetes.io/aws-ebs Delete WaitForFirstConsumer false 15m\ngp3 (default) ebs.csi.aws.com Delete WaitForFirstConsumer true 2m19s\n
-
From an instance launched with instance store(s), check that the instance store has been mounted correctly. To verify, first install the nvme-cli
tool and then use it to verify. To verify, you can access the instance using SSM Session Manager:
# Install the nvme-cli tool\nsudo yum install nvme-cli -y\n\n# Show NVMe volumes attached\nsudo nvme list\n
# Notice the model is `EC2 NVMe Instance Storage` for the instance store\nNode SN Model Namespace Usage Format FW Rev\n---------------- -------------------- ---------------------------------------- --------- -------------------------- ---------------- --------\n/dev/nvme0n1 vol0546d3c3b0af0bf6d Amazon Elastic Block Store 1 25.77 GB / 25.77 GB 512 B + 0 B 1.0\n/dev/nvme1n1 AWS24BBF51AF55097008 Amazon EC2 NVMe Instance Storage 1 75.00 GB / 75.00 GB 512 B + 0 B 0\n\n# Show disks, their partitions and mounts\nsudo lsblk\n\n# Output should look like below\nNAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT\nnvme0n1 259:0 0 24G 0 disk\n\u251c\u2500nvme0n1p1 259:2 0 24G 0 part /\n\u2514\u2500nvme0n1p128 259:3 0 1M 0 part\nnvme1n1 259:1 0 69.9G 0 disk /local1 # <--- this is the instance store\n
-
From an instance launched with multiple volume(s), check that the instance store has been mounted correctly. To verify, first install the nvme-cli
tool and then use it to verify. To verify, you can access the instance using SSM Session Manager:
# Install the nvme-cli tool\nsudo yum install nvme-cli -y\n\n# Show NVMe volumes attached\nsudo nvme list\n
# /dev/nvme0n1 is the root volume and /dev/nvme1n1 is the second, additional volume\nNode SN Model Namespace Usage Format FW Rev\n---------------- -------------------- ---------------------------------------- --------- -------------------------- ---------------- --------\n/dev/nvme0n1 vol0cd37dab9e4a5c184 Amazon Elastic Block Store 1 68.72 GB / 68.72 GB 512 B + 0 B 1.0\n/dev/nvme1n1 vol0ad3629c159ee869c Amazon Elastic Block Store 1 25.77 GB / 25.77 GB 512 B + 0 B 1.0\n
-
From the same instance used in step 4, check that the containerd directories are using the second /dev/nvme1n1
volume:
df /var/lib/containerd/\n
# Output should look like below, which shows the directory on the\n# /dev/nvme1n1 volume and NOT on /dev/nvme0n1 (root volume)\nFilesystem 1K-blocks Used Available Use% Mounted on\n/dev/nvme1n1 24594768 2886716 20433380 13% /var/lib/containerd\n
df /run/containerd/\n
# Output should look like below, which shows the directory on the\n# /dev/nvme1n1 volume and NOT on /dev/nvme0n1 (root volume)\nFilesystem 1K-blocks Used Available Use% Mounted on\n/dev/nvme1n1 24594768 2886716 20433380 13% /run/containerd\n
"},{"location":"patterns/stateful/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/tls-with-aws-pca-issuer/","title":"TLS w/ AWS PCA Issuer","text":""},{"location":"patterns/tls-with-aws-pca-issuer/#tls-with-aws-pca-issuer","title":"TLS with AWS PCA Issuer","text":"This pattern demonstrates how to enable TLS with AWS PCA issuer on an Amazon EKS cluster.
"},{"location":"patterns/tls-with-aws-pca-issuer/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/tls-with-aws-pca-issuer/#validate","title":"Validate","text":" -
List all the pods running in aws-privateca-issuer
and cert-manager
Namespace.
kubectl get pods -n aws-privateca-issuer\nkubectl get pods -n cert-manager\n
-
View the certificate
status in the default
Namespace. It should be in Ready
state, and be pointing to a secret
created in the same Namespace.
kubectl get certificate -o wide\n
NAME READY SECRET ISSUER STATUS AGE\nexample True example-clusterissuer tls-with-aws-pca-issuer Certificate is up to date and has not expired 41m\n
kubectl get secret example-clusterissuer\n
NAME TYPE DATA AGE\nexample-clusterissuer kubernetes.io/tls 3 43m\n
"},{"location":"patterns/tls-with-aws-pca-issuer/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/wireguard-with-cilium/","title":"Wireguard /w Cilium","text":""},{"location":"patterns/wireguard-with-cilium/#transparent-encryption-with-cilium-and-wireguard","title":"Transparent Encryption with Cilium and Wireguard","text":"This pattern demonstrates Cilium configured in CNI chaining mode with VPC CNI and with Wireguard transparent encryption enabled on an Amazon EKS cluster.
- Cilium CNI Chaining Documentation
- Cilium Wireguard Encryption Documentation
"},{"location":"patterns/wireguard-with-cilium/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/wireguard-with-cilium/#validate","title":"Validate","text":" -
List the daemonsets
kubectl get ds -n kube-system\n
NAME DESIRED CURRENT READY UP-TO-DATE AVAILABLE NODE SELECTOR AGE\naws-node 2 2 2 2 2 <none> 156m\ncilium 2 2 2 2 2 kubernetes.io/os=linux 152m\nkube-proxy 2 2 2 2 2 <none> 156m\n
-
Open a shell inside the cilium container
kubectl -n kube-system exec -ti ds/cilium -- bash\n
-
Verify Encryption is enabled
cilium status | grep Encryption\n
Encryption: Wireguard [cilium_wg0 (Pubkey: b2krgbHgaCsVWALMnFLiS/RekhhcE36PXEjQ7T8+mW0=, Port: 51871, Peers: 1)]\n
-
Install tcpdump
apt-get update\napt-get install -y tcpdump\n
-
Start a packet capture on cilium_wg0
and verify you see payload in clear text, it means the traffic is encrypted with wireguard
tcpdump -A -c 40 -i cilium_wg0 | grep \"Welcome to nginx!\"\n
<title>Welcome to nginx!</title>\n<h1>Welcome to nginx!</h1>\n...\n\n40 packets captured\n40 packets received by filter\n0 packets dropped by kernel\n
"},{"location":"patterns/wireguard-with-cilium/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"snippets/ipv4-prefix-delegation/","title":"IPv4 Prefix Delegation","text":"The configuration snippet below shows how to enable prefix delegation to increase the number of available IP addresses on the provisioned EC2 nodes.
- Documentation
- Blog post
"},{"location":"snippets/ipv4-prefix-delegation/#vpc-cni-configuration","title":"VPC CNI Configuration","text":"In this example, the vpc-cni
addon is configured using before_compute = true
. This is done to ensure the vpc-cni
is created and updated before any EC2 instances are created so that the desired settings have applied before they will be referenced. With this configuration, you will now see that nodes created will have --max-pods 110
configured do to the use of prefix delegation being enabled on the vpc-cni
.
If you find that your nodes are not being created with the correct number of max pods (i.e. - for m5.large
, if you are seeing a max pods of 29 instead of 110), most likely the vpc-cni
was not configured before the EC2 instances.
module \"eks\" {\nsource = \"terraform-aws-modules/eks/aws\"\n# Truncated for brevity\n...\ncluster_addons = {\nvpc-cni = {\nbefore_compute = true\nmost_recent = true # To ensure access to the latest settings provided\nconfiguration_values = jsonencode({\nenv = {\nENABLE_PREFIX_DELEGATION = \"true\"\nWARM_PREFIX_TARGET = \"1\"\n}\n})\n}\n}\n...\n}\n
When enabled, inspect one of the aws-node-*
(AWS VPC CNI) pods to ensure prefix delegation is enabled and warm prefix target is 1:
kubectl describe ds -n kube-system aws-node | grep ENABLE_PREFIX_DELEGATION: -A 3\n
Output should look similar to below (truncated for brevity):
ENABLE_PREFIX_DELEGATION: true # <- this should be set to true\nWARM_ENI_TARGET: 1\nWARM_PREFIX_TARGET: 1 # <- this should be set to 1\n...\n
"},{"location":"snippets/vpc-cni-custom-networking/","title":"VPC CNI Custom Networking","text":"Custom networking addresses the IP exhaustion issue by assigning the node and Pod IPs from secondary VPC address spaces (CIDR). Custom networking support supports ENIConfig custom resource. The ENIConfig includes an alternate subnet CIDR range (carved from a secondary VPC CIDR), along with the security group(s) that the Pods will belong to. When custom networking is enabled, the VPC CNI creates secondary ENIs in the subnet defined under ENIConfig. The CNI assigns Pods an IP addresses from a CIDR range defined in a ENIConfig CRD.
Since the primary ENI is not used by custom networking, the maximum number of Pods you can run on a node is lower. The host network Pods continue to use IP address assigned to the primary ENI. Additionally, the primary ENI is used to handle source network translation and route Pods traffic outside the node.
- Documentation
- Best Practices Guide
"},{"location":"snippets/vpc-cni-custom-networking/#vpc-cni-configuration","title":"VPC CNI Configuration","text":"In this example, the vpc-cni
addon is configured using before_compute = true
. This is done to ensure the vpc-cni
is created and updated before any EC2 instances are created so that the desired settings have applied before they will be referenced. With this configuration, you will now see that nodes created will have --max-pods 110
configured do to the use of prefix delegation being enabled on the vpc-cni
.
If you find that your nodes are not being created with the correct number of max pods (i.e. - for m5.large
, if you are seeing a max pods of 29 instead of 110), most likely the vpc-cni
was not configured before the EC2 instances.
"},{"location":"snippets/vpc-cni-custom-networking/#components","title":"Components","text":"To enable VPC CNI custom networking, you must configuring the following components:
-
Create a VPC with additional CIDR block associations. These additional CIDR blocks will be used to create subnets for the VPC CNI custom networking:
module \"vpc\" {\nsource = \"terraform-aws-modules/vpc/aws\"\n# Truncated for brevity\n...\nsecondary_cidr_blocks = [local.secondary_vpc_cidr] # can add up to 5 total CIDR blocks\nazs = local.azs\nprivate_subnets = concat(\n[for k, v in local.azs : cidrsubnet(local.vpc_cidr, 4, k)],\n[for k, v in local.azs : cidrsubnet(local.secondary_vpc_cidr, 2, k)]\n)\n...\n}\n
-
Specify the VPC CNI custom networking configuration in the vpc-cni
addon configuration:
module \"eks\" {\nsource = \"terraform-aws-modules/eks/aws\"\n# Truncated for brevity\n...\ncluster_addons = {\nvpc-cni = {\nbefore_compute = true\nmost_recent = true # To ensure access to the latest settings provided\nconfiguration_values = jsonencode({\nenv = {\nAWS_VPC_K8S_CNI_CUSTOM_NETWORK_CFG = \"true\"\nENI_CONFIG_LABEL_DEF = \"topology.kubernetes.io/zone\"\n})\n}\n}\n...\n}\n
-
Create the ENIConfig
custom resource for each subnet that you want to deploy pods into:
resource \"kubectl_manifest\" \"eni_config\" {\nfor_each = zipmap(local.azs, slice(module.vpc.private_subnets, 3, 6))\nyaml_body = yamlencode({\napiVersion = \"crd.k8s.amazonaws.com/v1alpha1\"\nkind = \"ENIConfig\"\nmetadata = {\nname = each.key\n}\nspec = {\nsecurityGroups = [\nmodule.eks.node_security_group_id,\n]\nsubnet = each.value\n}\n})\n}\n
Once those settings have been successfully applied, you can verify if custom networking is enabled correctly by inspecting one of the aws-node-*
(AWS VPC CNI) pods:
kubectl describe pod aws-node-ttg4h -n kube-system\n\n# Output should look similar below (truncated for brevity)\nEnvironment:\n ADDITIONAL_ENI_TAGS: {}\nAWS_VPC_CNI_NODE_PORT_SUPPORT: true\nAWS_VPC_ENI_MTU: 9001\nAWS_VPC_K8S_CNI_CONFIGURE_RPFILTER: false\nAWS_VPC_K8S_CNI_CUSTOM_NETWORK_CFG: true # <- this should be set to true\nAWS_VPC_K8S_CNI_EXTERNALSNAT: false\nAWS_VPC_K8S_CNI_LOGLEVEL: DEBUG\n ...\n
"},{"location":"v4-to-v5/addons/","title":"Migrate to EKS Blueprints Addons Module","text":"Please consult the docs/v4-to-v5/example directory for reference configurations. If you find a bug, please open an issue with supporting configuration to reproduce.
"},{"location":"v4-to-v5/addons/#this-guide-is-under-active-development","title":"\u26a0\ufe0f This guide is under active development.","text":""},{"location":"v4-to-v5/addons/#list-of-backwards-incompatible-changes","title":"List of backwards incompatible changes","text":"-
"},{"location":"v4-to-v5/addons/#additional-changes","title":"Additional changes","text":""},{"location":"v4-to-v5/addons/#added","title":"Added","text":"-
"},{"location":"v4-to-v5/addons/#modified","title":"Modified","text":"-
"},{"location":"v4-to-v5/addons/#removed","title":"Removed","text":"-
"},{"location":"v4-to-v5/addons/#variable-and-output-changes","title":"Variable and output changes","text":" -
Removed variables:
-
-
Renamed variables:
-
-
Added variables:
-
-
Removed outputs:
-
-
Renamed outputs:
-
-
Added outputs:
-
"},{"location":"v4-to-v5/addons/#upgrade-migrations","title":"Upgrade Migrations","text":""},{"location":"v4-to-v5/addons/#before-v4x-example","title":"Before - v4.x Example","text":"module \"eks_blueprints_addons\" {\nsource = \"github.com/aws-ia/terraform-aws-eks-blueprints//modules/kubernetes-addons?ref=v4.32.1\"\neks_cluster_id = module.eks.cluster_name\neks_cluster_endpoint = module.eks.cluster_endpoint\neks_oidc_provider = module.eks.oidc_provider\neks_cluster_version = module.eks.cluster_version\n # TODO\n}\n
"},{"location":"v4-to-v5/addons/#after-v5x-example","title":"After - v5.x Example","text":"module \"eks_blueprints_addons\" {\nsource = \"aws-ia/eks-blueprints-addons/aws\"\nversion = \"~> 1.0\"\ncluster_name = module.eks.cluster_name\ncluster_endpoint = module.eks.cluster_endpoint\ncluster_version = module.eks.cluster_version\noidc_provider_arn = module.eks.oidc_provider_arn\n # TODO\n}\n
"},{"location":"v4-to-v5/addons/#diff-of-before-vs-after","title":"Diff of Before vs After","text":"module \"eks_blueprints_addons\" {\n- source = \"github.com/aws-ia/terraform-aws-eks-blueprints//modules/kubernetes-addons?ref=v4.32.1\"\n+ source = \"aws-ia/eks-blueprints-addons/aws\"\n+ version = \"~> 1.0\"\n # TODO\n}\n
"},{"location":"v4-to-v5/addons/#state-move-commands","title":"State Move Commands","text":"In conjunction with the changes above, users can elect to move their external capacity provider(s) under this module using the following move command. Command is shown using the values from the example shown above, please update to suit your configuration names:
terraform state mv 'xxx' 'yyy'\n
"},{"location":"v4-to-v5/cluster/","title":"Migrate to EKS Module v19.x","text":"Please consult the docs/v4-to-v5/example directory for reference configurations. If you find a bug, please open an issue with supporting configuration to reproduce.
"},{"location":"v4-to-v5/cluster/#backwards-incompatible-changes","title":"Backwards incompatible changes","text":" - The cluster module provided in EKS Blueprints is being removed entirely from the project. Instead, users are encouraged to use the
terraform-aws-eks
module for creating and managing their EKS cluster in Terraform. - The KMS module provided in EKS Blueprints has been removed. Users can leverage the KMS creation/management functionality provided by the
terraform-aws-eks
module or utilize the standalone terraform-aws-kms
module. - The EMR on EKS module provided in EKS Blueprints has been removed. Instead, users are encouraged to use the
terraform-aws-emr
virtual cluster sub-module for creating and managing their EMR on EKS virtual cluster in Terraform. - The teams multi-tenancy module provided in EKS Blueprints has been removed. Instead, users are encouraged to use the
terraform-aws-eks-blueprints-teams
module for creating and managing their multi-tenancy constructions within their EKS clusters in Terraform.
"},{"location":"v4-to-v5/cluster/#additional-changes","title":"Additional changes","text":""},{"location":"v4-to-v5/cluster/#added","title":"Added","text":" - N/A
"},{"location":"v4-to-v5/cluster/#modified","title":"Modified","text":" - N/A
"},{"location":"v4-to-v5/cluster/#removed","title":"Removed","text":" - All noted above under
Backwards incompatible changes
"},{"location":"v4-to-v5/cluster/#variable-and-output-changes","title":"Variable and output changes","text":"Since the change is to replace the EKS Blueprints cluster module with the terraform-aws-eks
module, there aren't technically any variable or output changes other than their removal. Please consult the terraform-aws-eks
module for its respective variables/outputs.
-
Removed variables:
- All
-
Renamed variables:
- None
-
Added variables:
- None
-
Removed outputs:
- All
-
Renamed outputs:
- None
-
Added outputs:
- None
"},{"location":"v4-to-v5/cluster/#upgrade-migrations","title":"Upgrade Migrations","text":""},{"location":"v4-to-v5/cluster/#before-v432-example","title":"Before - v4.32 Example","text":"module \"eks\" {\nsource = \"github.com/aws-ia/terraform-aws-eks-blueprints?ref=v4.32.1\"\ncluster_name = local.name\ncluster_version = \"1.26\"\nvpc_id = module.vpc.vpc_id\nprivate_subnet_ids = module.vpc.private_subnets\ncluster_endpoint_private_access = true\nmap_roles = [\n{\nrolearn = data.aws_caller_identity.current.arn\nusername = \"me\"\ngroups = [\"system:masters\"]\n},\n]\nmanaged_node_groups = {\nmanaged = {\nnode_group_name = \"managed\"\ninstance_types = [\"m5.large\"]\nmin_size = 1\nmax_size = 2\ndesired_size = 1\nk8s_labels = {\nWhich = \"managed\"\n}\n}\n}\nfargate_profiles = {\nfargate = {\nfargate_profile_name = \"fargate\"\nfargate_profile_namespaces = [{\nnamespace = \"default\"\nk8s_labels = {\nWhich = \"fargate\"\n}\n}]\nsubnet_ids = module.vpc.private_subnets\n}\n}\nself_managed_node_groups = {\nself_managed = {\nnode_group_name = \"self_managed\"\ninstance_type = \"m5.large\"\nlaunch_template_os = \"amazonlinux2eks\"\nmin_size = 1\nmax_size = 2\ndesired_size = 1\nk8s_labels = {\nWhich = \"self-managed\"\n}\n}\n}\ntags = {\nBlueprint = local.name\nGithubRepo = \"github.com/aws-ia/terraform-aws-eks-blueprints\"\n}\n}\n
"},{"location":"v4-to-v5/cluster/#after-v50-example","title":"After - v5.0 Example","text":"Any of the values that are marked with # Backwards compat
are provided to demonstrate configuration level changes to reduce the number of Terraform changes when migrating to the EKS module.
module \"eks\" {\nsource = \"terraform-aws-modules/eks/aws\"\nversion = \"~> 19.13\"\ncluster_name = local.name\ncluster_version = \"1.26\"\ncluster_endpoint_public_access = true # Backwards compat\ncluster_enabled_log_types = [\"api\", \"audit\", \"authenticator\", \"controllerManager\", \"scheduler\"] # Backwards compat\niam_role_name = \"${local.name}-cluster-role\" # Backwards compat\niam_role_use_name_prefix = false # Backwards compat\nkms_key_aliases = [local.name] # Backwards compat\nvpc_id = module.vpc.vpc_id\nsubnet_ids = module.vpc.private_subnets\nmanage_aws_auth_configmap = true\naws_auth_roles = [\n{\nrolearn = data.aws_caller_identity.current.arn\nusername = \"me\"\ngroups = [\"system:masters\"]\n},\n]\neks_managed_node_groups = {\nmanaged = {\niam_role_name = \"${local.name}-managed\" # Backwards compat\niam_role_use_name_prefix = false # Backwards compat\nuse_custom_launch_template = false # Backwards compat\ninstance_types = [\"m5.large\"]\nmin_size = 1\nmax_size = 2\ndesired_size = 1\nlabels = {\nWhich = \"managed\"\n}\n}\n}\nfargate_profiles = {\nfargate = {\niam_role_name = \"${local.name}-fargate\" # Backwards compat\niam_role_use_name_prefix = false # Backwards compat\nselectors = [{\nnamespace = \"default\"\nlabels = {\nWhich = \"fargate\"\n}\n}]\n}\n}\nself_managed_node_groups = {\nself_managed = {\nname = \"${local.name}-self_managed\" # Backwards compat\nuse_name_prefix = false # Backwards compat\niam_role_name = \"${local.name}-self_managed\" # Backwards compat\niam_role_use_name_prefix = false # Backwards compat\nlaunch_template_name = \"self_managed-${local.name}\" # Backwards compat\nlaunch_template_use_name_prefix = false # Backwards compat\ninstance_type = \"m5.large\"\nmin_size = 1\nmax_size = 2\ndesired_size = 1\nlabels = {\nWhich = \"self-managed\"\n}\n}\n}\ntags = {\nBlueprint = local.name\nGithubRepo = \"github.com/aws-ia/terraform-aws-eks-blueprints\"\n}\n}\n
"},{"location":"v4-to-v5/cluster/#diff-of-before-vs-after","title":"Diff of Before vs After","text":"module \"eks\" {\n- source = \"github.com/aws-ia/terraform-aws-eks-blueprints?ref=v4.32.1\"\n+ source = \"terraform-aws-modules/eks/aws\"\n+ version = \"~> 19.13\"\n cluster_name = local.name\n cluster_version = \"1.26\"\n\n vpc_id = module.vpc.vpc_id\n private_subnet_ids = module.vpc.private_subnets\n+ cluster_endpoint_public_access = true\n- cluster_endpoint_private_access = true\n- map_roles = [\n+ aws_auth_roles = [\n {\n rolearn = data.aws_caller_identity.current.arn\n username = \"me\"\n groups = [\"system:masters\"]\n },\n ]\n\n- managed_node_groups = {\n+ eks_managed_node_groups = {\n managed = {\n- node_group_name = \"managed\"\n instance_types = [\"m5.large\"]\n\n min_size = 1\n max_size = 2\n desired_size = 1\n\n- k8s_labels = {\n+ labels = {\n Which = \"managed\"\n }\n }\n }\n\n fargate_profiles = {\n fargate = {\n- fargate_profile_name = \"fargate\"\n- fargate_profile_namespaces = [{\n+ selectors = [{\n namespace = \"default\"\n\n- k8s_labels = {\n+ labels = {\n Which = \"fargate\"\n }\n }]\n- subnet_ids = module.vpc.private_subnets\n }\n }\n\n self_managed_node_groups = {\n self_managed = {\n- node_group_name = \"self_managed\"\n instance_type = \"m5.large\"\n- launch_template_os = \"amazonlinux2eks\"\n min_size = 1\n max_size = 2\n desired_size = 1\n\n- k8s_labels = {\n+ labels = {\n Which = \"self-managed\"\n }\n }\n }\n\n tags = {\n Blueprint = local.name\n GithubRepo = \"github.com/aws-ia/terraform-aws-eks-blueprints\"\n }\n}\n
"},{"location":"v4-to-v5/cluster/#state-move-commands","title":"State Move Commands","text":"The following Terraform state move commands are provided to aid in migrating the control plane and data plane components.
# This is not removing the configmap from the cluster -\n# it will be adopted by the new module\nterraform state rm 'module.eks.kubernetes_config_map.aws_auth[0]'\n# Cluster\nterraform state mv 'module.eks.module.aws_eks.aws_eks_cluster.this[0]' 'module.eks.aws_eks_cluster.this[0]'\n# Cluster IAM role\nterraform state mv 'module.eks.module.aws_eks.aws_iam_role.this[0]' 'module.eks.aws_iam_role.this[0]'\nterraform state mv 'module.eks.module.aws_eks.aws_iam_role_policy_attachment.this[\"arn:aws:iam::aws:policy/AmazonEKSClusterPolicy\"]' 'module.eks.aws_iam_role_policy_attachment.this[\"AmazonEKSClusterPolicy\"]'\nterraform state mv 'module.eks.module.aws_eks.aws_iam_role_policy_attachment.this[\"arn:aws:iam::aws:policy/AmazonEKSVPCResourceController\"]' 'module.eks.aws_iam_role_policy_attachment.this[\"AmazonEKSVPCResourceController\"]'\n# Cluster primary security group tags\n# Note: This will depend on the tags applied to the module - here we\n# are demonstrating the two tags used in the configuration above\nterraform state mv 'module.eks.module.aws_eks.aws_ec2_tag.cluster_primary_security_group[\"Blueprint\"]' 'module.eks.aws_ec2_tag.cluster_primary_security_group[\"Blueprint\"]'\nterraform state mv 'module.eks.module.aws_eks.aws_ec2_tag.cluster_primary_security_group[\"GithubRepo\"]' 'module.eks.aws_ec2_tag.cluster_primary_security_group[\"GithubRepo\"]'\n# Cluster security group\nterraform state mv 'module.eks.module.aws_eks.aws_security_group.cluster[0]' 'module.eks.aws_security_group.cluster[0]'\n# Cluster security group rules\nterraform state mv 'module.eks.module.aws_eks.aws_security_group_rule.cluster[\"ingress_nodes_443\"]' 'module.eks.aws_security_group_rule.cluster[\"ingress_nodes_443\"]'\n# Node security group\nterraform state mv 'module.eks.module.aws_eks.aws_security_group.node[0]' 'module.eks.aws_security_group.node[0]'\n# Node security group rules\nterraform state mv 'module.eks.module.aws_eks.aws_security_group_rule.node[\"ingress_cluster_443\"]' 'module.eks.aws_security_group_rule.node[\"ingress_cluster_443\"]'\nterraform state mv 'module.eks.module.aws_eks.aws_security_group_rule.node[\"ingress_cluster_kubelet\"]' 'module.eks.aws_security_group_rule.node[\"ingress_cluster_kubelet\"]'\nterraform state mv 'module.eks.module.aws_eks.aws_security_group_rule.node[\"ingress_self_coredns_tcp\"]' 'module.eks.aws_security_group_rule.node[\"ingress_self_coredns_tcp\"]'\nterraform state mv 'module.eks.module.aws_eks.aws_security_group_rule.node[\"ingress_self_coredns_udp\"]' 'module.eks.aws_security_group_rule.node[\"ingress_self_coredns_udp\"]'\n# OIDC provider\nterraform state mv 'module.eks.module.aws_eks.aws_iam_openid_connect_provider.oidc_provider[0]' 'module.eks.aws_iam_openid_connect_provider.oidc_provider[0]'\n# Fargate profile(s)\n# Note: This demonstrates migrating one profile that is stored under the\n# key `fargate` in the module definition. The same set of steps would\n# need to be performed for each profile, changing only the key name\nterraform state mv 'module.eks.module.aws_eks_fargate_profiles[\"fargate\"].aws_eks_fargate_profile.eks_fargate' 'module.eks.module.fargate_profile[\"fargate\"].aws_eks_fargate_profile.this[0]'\nterraform state mv 'module.eks.module.aws_eks_fargate_profiles[\"fargate\"].aws_iam_role.fargate[0]' 'module.eks.module.fargate_profile[\"fargate\"].aws_iam_role.this[0]'\nterraform state mv 'module.eks.module.aws_eks_fargate_profiles[\"fargate\"].aws_iam_role_policy_attachment.fargate_pod_execution_role_policy[\"arn:aws:iam::aws:policy/AmazonEKSFargatePodExecutionRolePolicy\"]' 'module.eks.module.fargate_profile[\"fargate\"].aws_iam_role_policy_attachment.this[\"arn:aws:iam::aws:policy/AmazonEKSFargatePodExecutionRolePolicy\"]'\n# Managed nodegroup(s)\n# Note: This demonstrates migrating one nodegroup that is stored under the\n# key `managed` in the module definition. The same set of steps would\n# need to be performed for each nodegroup, changing only the key name\nterraform state mv 'module.eks.module.aws_eks_managed_node_groups[\"managed\"].aws_eks_node_group.managed_ng' 'module.eks.module.eks_managed_node_group[\"managed\"].aws_eks_node_group.this[0]'\nterraform state mv 'module.eks.module.aws_eks_managed_node_groups[\"managed\"].aws_iam_role.managed_ng[0]' 'module.eks.module.eks_managed_node_group[\"managed\"].aws_iam_role.this[0]'\nterraform state mv 'module.eks.module.aws_eks_managed_node_groups[\"managed\"].aws_iam_role_policy_attachment.managed_ng[\"arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly\"]' 'module.eks.module.eks_managed_node_group[\"managed\"].aws_iam_role_policy_attachment.this[\"arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly\"]'\nterraform state mv 'module.eks.module.aws_eks_managed_node_groups[\"managed\"].aws_iam_role_policy_attachment.managed_ng[\"arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy\"]' 'module.eks.module.eks_managed_node_group[\"managed\"].aws_iam_role_policy_attachment.this[\"arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy\"]'\nterraform state mv 'module.eks.module.aws_eks_managed_node_groups[\"managed\"].aws_iam_role_policy_attachment.managed_ng[\"arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy\"]' 'module.eks.module.eks_managed_node_group[\"managed\"].aws_iam_role_policy_attachment.this[\"arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy\"]'\n# Self-managed nodegroup(s)\n# Note: This demonstrates migrating one nodegroup that is stored under the\n# key `self_managed` in the module definition. The same set of steps would\n# need to be performed for each nodegroup, changing only the key name\nterraform state mv 'module.eks.module.aws_eks_self_managed_node_groups[\"self_managed\"].aws_autoscaling_group.self_managed_ng' 'module.eks.module.self_managed_node_group[\"self_managed\"].aws_autoscaling_group.this[0]'\nterraform state mv 'module.eks.module.aws_eks_self_managed_node_groups[\"self_managed\"].aws_iam_instance_profile.self_managed_ng[0]' 'module.eks.module.self_managed_node_group[\"self_managed\"].aws_iam_instance_profile.this[0]'\nterraform state mv 'module.eks.module.aws_eks_self_managed_node_groups[\"self_managed\"].aws_iam_role.self_managed_ng[0]' 'module.eks.module.self_managed_node_group[\"self_managed\"].aws_iam_role.this[0]'\nterraform state mv 'module.eks.module.aws_eks_self_managed_node_groups[\"self_managed\"].aws_iam_role_policy_attachment.self_managed_ng[\"arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly\"]' 'module.eks.module.self_managed_node_group[\"self_managed\"].aws_iam_role_policy_attachment.this[\"arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly\"]'\nterraform state mv 'module.eks.module.aws_eks_self_managed_node_groups[\"self_managed\"].aws_iam_role_policy_attachment.self_managed_ng[\"arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy\"]' 'module.eks.module.self_managed_node_group[\"self_managed\"].aws_iam_role_policy_attachment.this[\"arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy\"]'\nterraform state mv 'module.eks.module.aws_eks_self_managed_node_groups[\"self_managed\"].aws_iam_role_policy_attachment.self_managed_ng[\"arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy\"]' 'module.eks.module.self_managed_node_group[\"self_managed\"].aws_iam_role_policy_attachment.this[\"arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy\"]'\nterraform state mv 'module.eks.module.aws_eks_self_managed_node_groups[\"self_managed\"].module.launch_template_self_managed_ng.aws_launch_template.this[\"self-managed-node-group\"]' 'module.eks.module.self_managed_node_group[\"self_managed\"].aws_launch_template.this[0]'\n# Secrets KMS key\nterraform state mv ' module.eks.module.kms[0].aws_kms_key.this' 'module.eks.module.kms.aws_kms_key.this[0]'\nterraform state mv 'module.eks.module.kms[0].aws_kms_alias.this' 'module.eks.module.kms.aws_kms_alias.this[\"migration\"]'\n# Cloudwatch Log Group\nterraform import 'module.eks.aws_cloudwatch_log_group.this[0]' /aws/eks/migration/cluster\n
"},{"location":"v4-to-v5/cluster/#removed-resources","title":"Removed Resources","text":"The following resources will be destroyed when migrating from EKS Blueprints v4.32.1 cluster to the v19.x EKS cluster:
module.eks.module.aws_eks_managed_node_groups[\"managed\"].aws_iam_instance_profile.managed_ng[0]\n
- It is not directly used and was intended to be used by Karpenter. The https://github.com/aws-ia/terraform-aws-eks-blueprints-addons module provides its own resource for creating an IAM instance profile for Karpenter
module.eks.module.aws_eks_managed_node_groups[\"managed\"].aws_iam_role_policy_attachment.managed_ng[\"arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore\"]\n
- IAM policy is not required by EKS - users can re-add this policy at their discretion
module.eks.module.aws_eks_fargate_profiles[\"fargate\"].aws_iam_policy.cwlogs[0]\nmodule.eks.module.aws_eks_fargate_profiles[\"fargate\"].aws_iam_role_policy_attachment.cwlogs[0]\n
- Policy is not required by EKS
module.eks.module.aws_eks_self_managed_node_groups[\"self_managed\"].aws_iam_role_policy_attachment.self_managed_ng[\"arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore\"]\n
- IAM policy is not required by EKS - users can re-add this policy at their discretion
"},{"location":"v4-to-v5/motivation/","title":"Direction for v5 of Terraform EKS Blueprints","text":""},{"location":"v4-to-v5/motivation/#what-has-worked","title":"What Has Worked","text":" -
EKS Blueprints was started to make it easier for customers to adopt Amazon Elastic Kubernetes Service (EKS) in a shorter period of time. The project has been quite successful in this regard - hearing from customers stating that EKS Blueprints has helped them get from zero to one or more clusters running with applications in less than 1-2 weeks.
-
EKS Blueprints has also been successful in providing working examples to users that demonstrate common architectural patterns and workload solutions. Some popular examples include:
- Spark on EKS
- Karpenter on EKS Fargate
- Transparent encryption with Wireguard and Cilium
- Fully serverless cluster with EKS Fargate
"},{"location":"v4-to-v5/motivation/#what-has-not","title":"What Has Not","text":" -
Scaling and managing addons that are created through EKS Blueprints. With almost 1,200 projects on the CNCF roadmap, the number of various ways and methods that a project allows for deploying onto a cluster (i.e. - Datadog offers 5 different Helm charts for its service, Prometheus hosts over 30 Helm charts for its services), as well as the number of different tools used to provision addons (i.e. - Terraform, ArgoCD, FluxCD, etc.), supporting both the number of addons and their different forms has been extremely challenging for the team. In addition to managing just the sheer number of addons, supporting the different configurations that users wish to have exposed in conjunction with testing and validating those various configurations is only compounded by the number of addons and their methods of creation.
-
Managing resources provisioned on the cluster using Terraform. Terraform is a fantastic tool for provisioning infrastructure and it is the tool of choice for many customers when it comes to creating resources in AWS. However, there are a number of downsides with Terraform when it comes to provisioning resources on a Kubernetes cluster. These include:
-
Ordering of dependencies when relationships live outside of Terraform's HCL syntax. Terraform wants to evaluate the current state of what it controls and be able to plan a series of actions to align the current state with the desired state in one action. It does this once for each terraform plan
or terraform apply
, and if any issues are encountered, it simply fails and halts execution. When Terraform cannot infer the ordering of dependencies across resources (i.e. - through passing outputs of parent resources to arguments of child resources using the Terraform <resource>.<name>.<attribute>
syntax), it will view this as no relationship between the resources and attempt to execute their provisioning in parallel and asynchronously. Any resources that are left waiting for a dependency will eventually timeout and fail, causing Terraform itself to timeout and fail the apply. This is where the reconciliation loop of a Kubernetes controller or operator on the cluster is better suited - continuously trying to reconcile the state over and over again as dependencies are eventually resolved. (To be clear - the issue of dependency ordering still exists, but the controller/operator will keep retrying and on each retry, some resources will succeed which will move the execution along with each cycle until everything is fully deployed. Terraform could do this if it kept re-trying, but it does not do this today)
-
Publicly exposing access to the EKS endpoints in order to provision resources defined outside of the VPC onto the cluster. When using Terraform, the resource provisioning operation is a \"push\" model where Terraform will send requests to the EKS API Server to create resources. Coupled with the fact that the Terraform operation typically resides outside of the VPC where the cluster is running, this results in users enabling public access to the EKS endpoints to provision resources. However, the more widely accepted approach by the Kubernetes community has been the adoption of GitOps which uses a \"pull\" based model, where an operator or controller running on the cluster will pull the resource definitions from a Git repository and reconcile state from within the cluster itself. This approach is more secure as it does not require public access to the EKS endpoints and instead relies on the cluster's internal network to communicate with the EKS API Server.
-
The nesting of multiple sub-modules in conjunction with the necessity to even require a module to be able to support an addon. When we compare and contrast the Terraform approach to addons versus the GitOps approach, the Terraform approach has a glaring disadvantage - the need to create a module that wraps the addon's Helm chart in order to provision the addon via Terraform. As opposed to the GitOps approach, where users simply consume the charts from where they are stored as needed. This creates a bottleneck on the team to review, test, and validate each new addon as well as the overhead then added for maintaining and updating those addons going forward. This also opens up more areas where breaking changes are encountered which is compounded by the fact that Terraform addons are grouped under an \"umbrella\" module which obfuscates versioning.
-
Being able to support a combination of various tools, modules, frameworks, etc., to meet the needs of customers. The terraform-aws-eks
was created long before EKS Blueprints, and many customers had already adopted this module for creating their clusters. In addition, Amazon has since adopted the eksctl
as the official CLI for Amazon EKS. When EKS Blueprints was first announced, many customers raised questions asking if they needed to abandon their current clusters created through those other tools in order to adopt EKS Blueprints. The answer is no - users can and should be able to use their existing clusters while EKS Blueprints can help augment that process through its supporting modules (addons, teams, etc.). This left the team with the question - why create a Terraform module for creating an EKS cluster when the terraform-aws-eks
already exists and the EKS Blueprints implementation already uses that module for creating the control plane and security groups?
"},{"location":"v4-to-v5/motivation/#what-is-changing","title":"What Is Changing","text":"The direction for EKS Blueprints in v5 will shift from providing an all-encompassing, monolithic \"framework\" and instead focus more on how users can organize a set of modular components to create the desired solution on Amazon EKS. This will allow customers to use the components of their choosing in a way that is more familiar to them and their organization instead of having to adopt and conform to a framework.
With this shift in direction, the cluster definition will be removed from the project and instead examples will reference the terraform-aws-eks
module for cluster creation. The remaining modules will be moved out to their own respective repositories as standalone projects. This leaves the EKS Blueprint project as the canonical place where users can receive guidance on how to configure their clusters to meet a desired architecture, how best to setup their clusters following well-architected practices, as well as references on the various ways that different workloads can be deployed on Amazon EKS.
"},{"location":"v4-to-v5/motivation/#notable-changes","title":"Notable Changes","text":" - EKS Blueprints will remove its Amazon EKS cluster Terraform module components (control plane, EKS managed node group, self-managed node group, and Fargate profile modules) from the project. In its place, users are encouraged to utilize the
terraform-aws-eks
module which meets or exceeds nearly all of the functionality of the EKS Blueprints v4.x cluster module. This includes the Terraform code contained at the root of the project as well as the aws-eks-fargate-profiles
, aws-eks-managed-node-groups
, aws-eks-self-managed-node-groups
, and launch-templates
modules which will all be removed from the project. - The
aws-kms
module will be removed entirely. This was consumed in the root project module for cluster secret encryption. In its place, users can utilize the KMS key creation functionality of the terraform-aws-eks
module or the terraform-aws-kms
module if they wish to control the key separately from the cluster itself. - The
emr-on-eks
module will be removed entirely; its replacement can be found in the new external module terraform-aws-emr
. - The
irsa
and helm-addon
modules will be removed entirely; we have released a new external module terraform-aws-eks-blueprints-addon
that is available on the Terraform registry that replicates/replaces the functionality of these two modules. This will now allow users, as well as partners, to create their own addons that are not natively supported by EKS Blueprints more easily and following the same process as EKS Blueprints. - The
aws-eks-teams
module will be removed entirely; its replacement will be the new external module terraform-aws-eks-blueprints-teams
that incorporates the changes customers have been asking for in https://github.com/aws-ia/terraform-aws-eks-blueprints/issues/842 - The integration between Terraform and ArgoCD has been removed in the initial release of v5. The team is currently investigating better patterns and solutions in conjunction with the ArgoCD and FluxCD teams that will provide a better, more integrated experience when using a GitOps based approach for cluster management. This will be released in a future version of EKS Blueprints v5 and is tracked here
"},{"location":"v4-to-v5/motivation/#resulting-project-structure","title":"Resulting Project Structure","text":"Previously under the v4.x structure, the EKS Blueprint project was comprised of various repositories across multiple AWS organizations that looked roughly like the following:
"},{"location":"v4-to-v5/motivation/#v4x-structure","title":"v4.x Structure","text":"\u251c\u2500\u2500 aws-ia/\n| \u251c\u2500\u2500 terraform-aws-eks-ack-addons/\n| \u2514\u2500\u2500 terraform-aws-eks-blueprints/\n| \u251c\u2500\u2500 aws-auth-configmap.tf\n| \u251c\u2500\u2500 data.tf\n| \u251c\u2500\u2500 eks-worker.tf\n| \u251c\u2500\u2500 locals.tf\n| \u251c\u2500\u2500 main.tf\n| \u251c\u2500\u2500 outputs.tf\n| \u251c\u2500\u2500 variables.tf\n| \u251c\u2500\u2500 versions.tf\n| \u251c\u2500\u2500 examples/\n| \u2514\u2500\u2500 modules\n| \u251c\u2500\u2500 aws-eks-fargate-profiles/\n| \u251c\u2500\u2500 aws-eks-managed-node-groups/\n| \u251c\u2500\u2500 aws-eks-self-managed-node-groups/\n| \u251c\u2500\u2500 aws-eks-teams/\n| \u251c\u2500\u2500 aws-kms/\n| \u251c\u2500\u2500 emr-on-eks/\n| \u251c\u2500\u2500 irsa/\n| \u251c\u2500\u2500 kubernetes-addons/\n| \u2514\u2500\u2500 launch-templates/\n\u251c\u2500\u2500 awslabs/\n| \u251c\u2500\u2500 crossplane-on-eks/\n| \u2514\u2500\u2500 data-on-eks/\n\u2514\u2500\u2500 aws-samples/\n \u251c\u2500\u2500 eks-blueprints-add-ons/ # Previously shared with the CDK based EKS Blueprints project\n\u2514\u2500\u2500 eks-blueprints-workloads/ # Previously shared with the CDK based EKS Blueprints project\n
Under th new v5.x structure, the Terraform based EKS Blueprints project will be comprised of the following repositories:
"},{"location":"v4-to-v5/motivation/#v5x-structure","title":"v5.x Structure","text":"\u251c\u2500\u2500 aws-ia/\n| \u251c\u2500\u2500 terraform-aws-eks-ack-addons/\n| \u251c\u2500\u2500 terraform-aws-eks-blueprints/ # Will contain only example/blueprint implementations; no modules\n| \u251c\u2500\u2500 terraform-aws-eks-blueprints-addon # Module for creating Terraform based addon (IRSA + Helm chart)\n| \u251c\u2500\u2500 terraform-aws-eks-blueprints-addons # Will contain a select set of addons supported by the EKS Blueprints\n| \u2514\u2500\u2500 terraform-aws-eks-blueprints-teams # Was previously `aws-eks-teams/` EKS Blueprint sub-module; updated based on customer feedback\n\u2514\u2500\u2500 awslabs/\n \u251c\u2500\u2500 crossplane-on-eks/\n \u2514\u2500\u2500 data-on-eks/ # Data related patterns that used to be located in `terraform-aws-eks-blueprints/` are now located here\n
"},{"location":"v4-to-v5/motivation/#what-can-users-expect","title":"What Can Users Expect","text":"With these changes, the team intends to provide a better experience for users of the Terraform EKS Blueprints project as well as new and improved reference architectures. Following the v5 changes, the team intends to:
- Improved quality of the examples provided - more information on the intent of the example, why it might be useful for users, what scenarios is the pattern applicable, etc. Where applicable, architectural diagrams and supporting material will be provided to highlight the intent of the example and how its constructed.
- A more clear distinction between a blueprint and a usage reference. For example - the Karpenter on EKS Fargate blueprint should demonstrate all of the various aspects that users should be aware of and consider in order to take full advantage of this pattern (recommended practices, observability, logging, monitoring, security, day 2 operations, etc.); this is what makes it a blueprint. In contrast, a usage reference would be an example that shows how users can pass configuration values to the Karpenter provisioner. This example is less focused on the holistic architecture and more focused on how one might configure Karpenter using the implementation. The EKS Blueprints repository will focus mostly on holistic architecture and patterns, and any usage references should be saved for the repository that contains that implementation definition (i.e. - the
terraform-aws-eks-blueprints-addons
repository where the addon implementation is defined). - Faster, and more responsive feedback. The first part of this is going to be improved documentation on how to contribute which should help clarify whether a contribution is worthy and willing to be accepted by the team before any effort is spent by the contributor. However, the goal of v5 is to focus more on the value added benefits that EKS Blueprints was created to provide as opposed to simply mass producing Helm chart wrappers (addons) and trying to keep up with that operationally intensive process.
- Lastly, more examples and blueprints that demonstrate various architectures and workloads that run on top of Amazon EKS as well as integrations into other AWS services.
"},{"location":"v4-to-v5/teams/","title":"Migrate to EKS Blueprints Teams Module","text":"Please consult the docs/v4-to-v5/example directory for reference configurations. If you find a bug, please open an issue with supporting configuration to reproduce.
"},{"location":"v4-to-v5/teams/#this-guide-is-under-active-development","title":"\u26a0\ufe0f This guide is under active development.","text":""},{"location":"v4-to-v5/teams/#list-of-backwards-incompatible-changes","title":"List of backwards incompatible changes","text":"-
"},{"location":"v4-to-v5/teams/#additional-changes","title":"Additional changes","text":""},{"location":"v4-to-v5/teams/#added","title":"Added","text":"-
"},{"location":"v4-to-v5/teams/#modified","title":"Modified","text":"-
"},{"location":"v4-to-v5/teams/#removed","title":"Removed","text":"-
"},{"location":"v4-to-v5/teams/#variable-and-output-changes","title":"Variable and output changes","text":" -
Removed variables:
-
-
Renamed variables:
-
-
Added variables:
-
-
Removed outputs:
-
-
Renamed outputs:
-
-
Added outputs:
-
"},{"location":"v4-to-v5/teams/#upgrade-migrations","title":"Upgrade Migrations","text":""},{"location":"v4-to-v5/teams/#before-v4x-example","title":"Before - v4.x Example","text":"module \"eks_blueprints\" {\nsource = \"github.com/aws-ia/terraform-aws-eks-blueprints?ref=v4.32.1\"\n # TODO\n}\n
"},{"location":"v4-to-v5/teams/#after-v5x-example","title":"After - v5.x Example","text":"module \"eks_blueprints_teams\" {\nsource = \"aws-ia/eks-blueprints-teams/aws\"\nversion = \"~> 1.0\"\n # TODO\n}\n
"},{"location":"v4-to-v5/teams/#diff-of-before-vs-after","title":"Diff of Before vs After","text":"module \"eks_blueprints_teams\" {\n- source = \"github.com/aws-ia/terraform-aws-eks-blueprints?ref=v4.32.1\"\n+ source = \"aws-ia/eks-blueprints-teams/aws\"\n+ version = \"~> 1.0\"\n # TODO\n}\n
"},{"location":"v4-to-v5/teams/#state-move-commands","title":"State Move Commands","text":"In conjunction with the changes above, users can elect to move their external capacity provider(s) under this module using the following move command. Command is shown using the values from the example shown above, please update to suit your configuration names:
terraform state mv 'xxx' 'yyy'\n
"},{"location":"v4-to-v5/example/","title":"Migration - v4 to v5","text":""}]}
\ No newline at end of file
+{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Overview","text":""},{"location":"#amazon-eks-blueprints-for-terraform","title":"Amazon EKS Blueprints for Terraform","text":"Welcome to Amazon EKS Blueprints for Terraform!
This project contains a collection of Amazon EKS cluster patterns implemented in Terraform that demonstrate how fast and easy it is for customers to adopt Amazon EKS. The patterns can be used by AWS customers, partners, and internal AWS teams to configure and manage complete EKS clusters that are fully bootstrapped with the operational software that is needed to deploy and operate workloads.
"},{"location":"#motivation","title":"Motivation","text":"Kubernetes is a powerful and extensible container orchestration technology that allows you to deploy and manage containerized applications at scale. The extensible nature of Kubernetes also allows you to use a wide range of popular open-source tools in Kubernetes clusters. However, With the wide array of tooling and design choices available, configuring an EKS cluster that meets your organization\u2019s specific needs can take a significant amount of time. It involves integrating a wide range of open-source tools and AWS services as well as expertise in AWS and Kubernetes.
AWS customers have asked for patterns that demonstrate how to integrate the landscape of Kubernetes tools and make it easy for them to provision complete, opinionated EKS clusters that meet specific application requirements. Customers can utilize EKS Blueprints to configure and deploy purpose built EKS clusters, and start onboarding workloads in days, rather than months.
"},{"location":"#consumption","title":"Consumption","text":"EKS Blueprints for Terraform has been designed to be consumed in the following manners:
- Reference: Users can refer to the patterns and snippets provided to help guide them to their desired solution. Users will typically view how the pattern or snippet is configured to achieve the desired end result and then replicate that in their environment.
- Copy & Paste: Users can copy and paste the patterns and snippets into their own environment, using EKS Blueprints as the starting point for their implementation. Users can then adapt the initial pattern to customize it to their specific needs.
EKS Blueprints for Terraform are not intended to be consumed as-is directly from this project. In \"Terraform speak\" - the patterns and snippets provided in this repository are not designed to be consumed as a Terraform module. Therefore, the patterns provided only contain variables
when certain information is required to deploy the pattern (i.e. - a Route53 hosted zone ID, or ACM certificate ARN) and generally use local variables. If you wish to deploy the patterns into a different region or with other changes, it is recommended that you make those modifications locally before applying the pattern. EKS Blueprints for Terraform will not expose variables and outputs in the same manner that Terraform modules follow in order to avoid confusion around the consumption model.
However, we do have a number of Terraform modules that were created to support EKS Blueprints in addition to the community hosted modules. Please see the respective projects for more details on the modules constructed to support EKS Blueprints for Terraform; those projects are listed below.
terraform-aws-eks-blueprint-addon
- (Note the singular form) Terraform module which can provision an addon using the Terraform helm_release
resource in addition to an IAM role for service account (IRSA). terraform-aws-eks-blueprint-addons
- (Note the plural form) Terraform module which can provision multiple addons; both EKS addons using the aws_eks_addon
resource as well as Helm chart based addons using the terraform-aws-eks-blueprint-addon
module. terraform-aws-eks-blueprints-teams
- Terraform module that creates Kubernetes multi-tenancy resources and configurations, allowing both administrators and application developers to access only the resources which they are responsible for.
"},{"location":"#related-projects","title":"Related Projects","text":"In addition to the supporting EKS Blueprints Terraform modules listed above, there are a number of related projects that users should be aware of:
-
GitOps
terraform-aws-eks-ack-addons
- Terraform module to deploy ACK controllers onto EKS clusters crossplane-on-eks
- Crossplane Blueprints is an open source repo to bootstrap Amazon EKS clusters and provision AWS resources using a library of Crossplane Compositions (XRs) with Composite Resource Definitions (XRDs).
-
Data on EKS
data-on-eks
- A collection of blueprints intended for data workloads on Amazon EKS. terraform-aws-eks-data-addons
- Terraform module to deploy multiple addons that are specific to data workloads on EKS clusters.
-
Observability Accelerator
terraform-aws-observability-accelerator
- A set of opinionated modules to help you set up observability for your AWS environments with AWS-managed observability services such as Amazon Managed Service for Prometheus, Amazon Managed Grafana, AWS Distro for OpenTelemetry (ADOT) and Amazon CloudWatch
"},{"location":"#terraform-caveats","title":"Terraform Caveats","text":"EKS Blueprints for Terraform does not intend to teach users the recommended practices for Terraform nor does it offer guidance on how users should structure their Terraform projects. The patterns provided are intended to show users how they can achieve a defined architecture or configuration in a way that they can quickly and easily get up and running to start interacting with that pattern. Therefore, there are a few caveats users should be aware of when using EKS Blueprints for Terraform:
-
We recognize that most users will already have an existing VPC in a separate Terraform workspace. However, the patterns provided come complete with a VPC to ensure a stable, deployable example that has been tested and validated.
-
Hashicorp does not recommend providing computed values in provider blocks , which means that the cluster configuration should be defined in a workspace separate from the resources deployed onto the cluster (i.e. - addons). However, to simplify the pattern experience, we have defined everything in one workspace and provided instructions to provision the patterns using a targeted apply approach. Users are encouraged to investigate a Terraform project structure that suites their needs; EKS Blueprints for Terraform does not have an opinion in this matter and will defer to Hashicorp's guidance.
-
Patterns are not intended to be consumed in-place in the same manner that one would consume a module. Therefore, we do not provide variables and outputs to expose various levels of configuration for the examples. Users can modify the pattern locally after cloning to suite their requirements.
-
Please see the FAQ section on authenticating Kubernetes based providers (kubernetes
, helm
, kubectl
) to Amazon EKS clusters regarding the use of static tokens versus dynamic tokens using the awscli
.
"},{"location":"#support-feedback","title":"Support & Feedback","text":"EKS Blueprints for Terraform is maintained by AWS Solution Architects. It is not part of an AWS service and support is provided as a best-effort by the EKS Blueprints community. To provide feedback, please use the issues templates provided. If you are interested in contributing to EKS Blueprints, see the Contribution guide.
"},{"location":"#security","title":"Security","text":"See CONTRIBUTING for more information.
"},{"location":"#license","title":"License","text":"Apache-2.0 Licensed. See LICENSE.
"},{"location":"faq/","title":"Frequently Asked Questions","text":""},{"location":"faq/#timeouts-on-destroy","title":"Timeouts on destroy","text":"Customers who are deleting their environments using terraform destroy
may see timeout errors when VPCs are being deleted. This is due to a known issue in the vpc-cni
Customers may face a situation where ENIs that were attached to EKS managed nodes (same may apply to self-managed nodes) are not being deleted by the VPC CNI as expected which leads to IaC tool failures, such as:
- ENIs are left on subnets
- EKS managed security group which is attached to the ENI can\u2019t be deleted by EKS
The current recommendation is to execute cleanup in the following order:
- delete all pods that have been created in the cluster.
- add delay/ wait
- delete VPC CNI
- delete nodes
- delete cluster
"},{"location":"faq/#leaked-cloudwatch-logs-group","title":"Leaked CloudWatch Logs Group","text":"Sometimes, customers may see the CloudWatch Log Group for EKS cluster being created is left behind after their blueprint has been destroyed using terraform destroy
. This happens because even after terraform deletes the CW log group, there\u2019s still logs being processed behind the scene by AWS EKS and service continues to write logs after recreating the log group using the EKS service IAM role which users don't have control over. This results in a terraform failure when the same blueprint is being recreated due to the existing log group left behind.
There are two options here:
-
During cluster creation set var.create_cloudwatch_log_group
to false
. This will tell the EKS module to not create the log group, but instead let the EKS service create the log group. This means that upon cluster deletion the log group will be left behind but there will not be Terraform failures if you re-create the same cluster as Terraform does not manage the log group creation/deletion anymore.
-
During cluster creation set var.create_cloudwatch_log_group
to true
. This will tell the EKS module to create the log group via Terraform. The EKS service will detect the log group and will start forwarding the logs for the log types enabled. Upon deletion terraform will delete the log group but depending upon any un-forwarded logs, the EKS service may recreate log group using the service role. This will result in terraform errors if the same blueprint is recreated. To proceed, manually delete the log group using the console or cli rerun the terraform destroy
.
"},{"location":"faq/#provider-authentication","title":"Provider Authentication","text":"The chain of events when provisioning an example is typically in the stages of VPC -> EKS cluster -> addons and manifests. Per Terraform's recommendation, it is not recommended to pass an unknown value into provider configurations. However, for the sake of simplicity and ease of use, Blueprints does specify the AWS provider along with the Kubernetes, Helm, and Kubectl providers in order to show the full configuration required for provisioning example. Note - this is the configuration required to provision the example, not necessarily the shape of how the configuration should be structured; users are encouraged to split up EKS cluster creation from addon and manifest provisioning to align with Terraform's recommendations.
With that said, the examples here are combining the providers and users can sometimes encounter various issues with the provider authentication methods. There are primarily two methods for authenticating the Kubernetes, Helm, and Kubectl providers to the EKS cluster created:
- Using a static token which has a lifetime of 15 minutes per the EKS service documentation.
- Using the
exec()
method which will fetch a token at the time of Terraform invocation.
The Kubernetes and Helm providers recommend the exec()
method, however this has the caveat that it requires the awscli to be installed on the machine running Terraform AND of at least a minimum version to support the API spec used by the provider (i.e. - \"client.authentication.k8s.io/v1alpha1\"
, \"client.authentication.k8s.io/v1beta1\"
, etc.). Selecting the appropriate provider authentication method is left up to users, and the examples used in this project will default to using the static token method for ease of use.
Users of the static token method should be aware that if they receive a 401 Unauthorized
message, they might have a token that has expired and will need to run terraform refresh
to get a new token. Users of the exec()
method should be aware that the exec()
method is reliant on the awscli and the associated authentication API version; the awscli version may need to be updated to support a later API version required by the Kubernetes version in use.
The following examples demonstrate either method that users can utilize - please refer to the associated provider's documentation for further details on configuration.
"},{"location":"faq/#static-token-example","title":"Static Token Example","text":"provider \"kubernetes\" {\nhost = module.eks.cluster_endpoint\ncluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)\ntoken = data.aws_eks_cluster_auth.this.token\n}\nprovider \"helm\" {\nkubernetes {\nhost = module.eks.cluster_endpoint\ncluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)\ntoken = data.aws_eks_cluster_auth.this.token\n}\n}\nprovider \"kubectl\" {\napply_retry_count = 10\nhost = module.eks.cluster_endpoint\ncluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)\nload_config_file = false\ntoken = data.aws_eks_cluster_auth.this.token\n}\ndata \"aws_eks_cluster_auth\" \"this\" {\nname = module.eks.cluster_name\n}\n
"},{"location":"faq/#exec-example","title":"exec()
Example","text":"Usage of exec plugin for AWS credentials
Links to References related to this issue
- https://github.com/hashicorp/terraform/issues/29182
- https://github.com/aws/aws-cli/pull/6476
provider \"kubernetes\" {\nhost = module.eks.cluster_endpoint\ncluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)\nexec {\napi_version = \"client.authentication.k8s.io/v1beta1\"\ncommand = \"aws\"\n # This requires the awscli to be installed locally where Terraform is executed\nargs = [\"eks\", \"get-token\", \"--cluster-name\", module.eks.cluster_name]\n}\n}\nprovider \"helm\" {\nkubernetes {\nhost = module.eks.cluster_endpoint\ncluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)\nexec {\napi_version = \"client.authentication.k8s.io/v1beta1\"\ncommand = \"aws\"\n # This requires the awscli to be installed locally where Terraform is executed\nargs = [\"eks\", \"get-token\", \"--cluster-name\", module.eks.cluster_name]\n}\n}\n}\nprovider \"kubectl\" {\napply_retry_count = 5\nhost = module.eks.cluster_endpoint\ncluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)\nload_config_file = false\nexec {\napi_version = \"client.authentication.k8s.io/v1beta1\"\ncommand = \"aws\"\n # This requires the awscli to be installed locally where Terraform is executed\nargs = [\"eks\", \"get-token\", \"--cluster-name\", module.eks.cluster_name]\n}\n}\n
"},{"location":"faq/#unable-to-destroy-namespace-created-by-terraform","title":"Unable to destroy namespace created by Terraform","text":"In some cases, when you try to run terraform destroy on kubernetes resources created by Terraform such as namespace, you may end up seeing failures such as timeout and context deadline exceeded failures. Namespace one of those resources we've seen before, the main reason this happens is because orphaned resources created through CRDs of addons (such as ArgoCD, AWS LBC and more) are left behind after the addons are being deleted, this is case by case scenario. For example, with namespaces:
-
Confirm the namespace is hanging in status Terminating
kubectl get namespaces\n
-
Check for any orphaned resources in the namespace, make sure to replace with your namespace:
kubectl api-resources --verbs=list --namespaced -o name | xargs -n 1 kubectl get \\\n--show-kind --ignore-not-found -n <namespace_name>\n
-
For any of the above output, patch the resource finalize:
kubectl patch RESOURCE NAME -p '{\"metadata\":{\"finalizers\":[]}}' --type=merge\n
-
Check the status of the namespace, if needed you may need to patch the namespace finalizers as-well
kubectl patch ns <ns-name> -p '{\"spec\":{\"finalizers\":null}}'\n
"},{"location":"getting-started/","title":"Getting Started","text":"This getting started guide will help you deploy your first pattern using EKS Blueprints.
"},{"location":"getting-started/#prerequisites","title":"Prerequisites","text":"Ensure that you have installed the following tools locally:
- awscli
- kubectl
- terraform
"},{"location":"getting-started/#deploy","title":"Deploy","text":" -
For consuming EKS Blueprints, please see the Consumption section. For exploring and trying out the patterns provided, please clone the project locally to quickly get up and running with a pattern. After cloning the project locally, cd
into the pattern directory of your choice.
-
To provision the pattern, the typical steps of execution are as follows:
terraform init\nterraform apply -target=\"module.vpc\" -auto-approve\nterraform apply -target=\"module.eks\" -auto-approve\nterraform apply -auto-approve\n
For patterns that deviate from this general flow, see the pattern's respective REAMDE.md
for more details.
Terraform targetted apply
Please see the Terraform Caveats section for details on the use of targeted Terraform apply's
-
Once all of the resources have successfully been provisioned, the following command can be used to update the kubeconfig
on your local machine and allow you to interact with your EKS Cluster using kubectl
.
aws eks --region <REGION> update-kubeconfig --name <CLUSTER_NAME>\n
Pattern Terraform outputs
Most examples will output the aws eks update-kubeconfig ...
command as part of the Terraform apply output to simplify this process for users
Private clusters
Clusters that do not enable the clusters public endpoint will require users to access the cluster from within the VPC. For these patterns, a sample EC2 or other means are provided to demonstrate how to access those clusters privately
and without exposing the public endpoint. Please see the respective pattern's README.md
for more details.
-
Once you have updated your kubeconfig
, you can verify that you are able to interact with your cluster by running the following command:
kubectl get nodes\n
This should return a list of the node(s) running in the cluster created. If any errors are encountered, please re-trace the steps above and consult the pattern's README.md
for more details on any additional/specific steps that may be required.
"},{"location":"getting-started/#destroy","title":"Destroy","text":"To teardown and remove the resources created in the pattern, the typical steps of execution are as follows:
terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
Resources created outside of Terraform
Depending on the pattern, some resources may have been created that Terraform is not aware of that will cause issues when attempting to clean up the pattern. For example, Karpenter is responsible for creating additional EC2 instances to satisfy the pod scheduling requirements. These instances will not be cleaned up by Terraform and will need to be de-provisioned BEFORE attempting to terraform destroy
. This is why it is important that the addons, or any resources provisioned onto the cluster are cleaned up first. Please see the respective pattern's README.md
for more details.
"},{"location":"_partials/destroy/","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"internal/ci/","title":"E2E tests","text":"We use GitHub Actions to run an end-to-end tests to verify all PRs. The GitHub Actions used are a combination of aws-actions/configure-aws-credentials
and hashicorp/setup-terraform@v1
.
"},{"location":"internal/ci/#setup","title":"Setup","text":" - Use the following CloudFormation template to setup a new IAM role.
Parameters:\nGitHubOrg:\nType: String\nRepositoryName:\nType: String\nOIDCProviderArn:\nDescription: Arn for the GitHub OIDC Provider.\nDefault: \"\"\nType: String\nConditions:\nCreateOIDCProvider: !Equals\n- !Ref OIDCProviderArn\n- \"\"\nResources:\nRole:\nType: AWS::IAM::Role\nProperties:\nAssumeRolePolicyDocument:\nStatement:\n- Effect: Allow\nAction: sts:AssumeRoleWithWebIdentity\nPrincipal:\nFederated: !If\n- CreateOIDCProvider\n- !Ref GithubOidc\n- !Ref OIDCProviderArn\nCondition:\nStringLike:\ntoken.actions.githubusercontent.com:sub: !Sub repo:${GitHubOrg}/${RepositoryName}:*\nGithubOidc:\nType: AWS::IAM::OIDCProvider\nCondition: CreateOIDCProvider\nProperties:\nUrl: https://token.actions.githubusercontent.com\nClientIdList:\n- sts.amazonaws.com\nThumbprintList:\n- a031c46782e6e6c662c2c87c76da9aa62ccabd8e\nOutputs:\nRole:\nValue: !GetAtt Role.Arn\n
-
Add a permissible IAM Policy to the above create role. For our purpose AdministratorAccess
works the best.
-
Setup a GitHub repo secret called ROLE_TO_ASSUME
and set it to ARN of the role created in 1.
-
We use an S3 backend for the e2e tests. This allows us to recover from any failures during the apply
stage. If you are setting up your own CI pipeline change the s3 bucket name in backend configuration of the example.
"},{"location":"patterns/agones-game-controller/","title":"Agones Game Controller","text":""},{"location":"patterns/agones-game-controller/#amazon-eks-deployment-with-agones-gaming-kubernetes-controller","title":"Amazon EKS Deployment with Agones Gaming Kubernetes Controller","text":"This pattern shows how to deploy and run gaming applications on Amazon EKS using the Agones Kubernetes Controller
Agones is an open source Kubernetes controller that provisions and manages dedicated game server processes within Kubernetes clusters using standard Kubernetes tooling and APIs. This model also allows any matchmaker to interact directly with Agones via the Kubernetes API to provision a dedicated game server
Amazon GameLift enables developers to deploy, operate, and scale dedicated, low-cost servers in the cloud for session-based, multiplayer games. Built on AWS global computing infrastructure, GameLift helps deliver high-performance, high-reliability, low-cost game servers while dynamically scaling your resource usage to meet worldwide player demand. See below for more information on how GameLift FleetIQ can be integrated with Agones deployed on Amazon EKS.
Amazon GameLift FleetIQ optimizes the use of low-cost Spot Instances for cloud-based game hosting with Amazon EC2. With GameLift FleetIQ, you can work directly with your hosting resources in Amazon EC2 and Auto Scaling while taking advantage of GameLift optimizations to deliver inexpensive, resilient game hosting for your players and makes the use of low-cost Spot Instances viable for game hosting
This blog walks through the details of deploying EKS Cluster using eksctl and deploy Agones with GameLift FleetIQ.
"},{"location":"patterns/agones-game-controller/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/agones-game-controller/#validate","title":"Validate","text":" -
Deploy the sample game server
kubectl create -f https://raw.githubusercontent.com/googleforgames/agones/release-1.32.0/examples/simple-game-server/gameserver.yaml\nkubectl get gs\n
NAME STATE ADDRESS PORT NODE AGE\nsimple-game-server-7r6jr Ready 34.243.345.22 7902 ip-10-1-23-233.eu-west-1.compute.internal 11h\n
-
Test the sample game server using netcat
echo -n \"UDP test - Hello EKS Blueprints!\" | nc -u 34.243.345.22 7902\n
Hello EKS Blueprints!\nACK: Hello EKS Blueprints!\nEXIT\nACK: EXIT\n
"},{"location":"patterns/agones-game-controller/#destroy","title":"Destroy","text":"Delete the resources created by the sample game server first:
kubectl -n default delete gs --all || true\n
terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/appmesh-mtls/","title":"AWS AppMesh mTLS","text":""},{"location":"patterns/appmesh-mtls/#eks-cluster-w-appmesh-mtls","title":"EKS Cluster w/ AppMesh mTLS","text":"This pattern demonstrates how to deploy and configure AppMesh mTLS on an Amazon EKS cluster.
"},{"location":"patterns/appmesh-mtls/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/appmesh-mtls/#validate","title":"Validate","text":" -
List the created Resources.
kubectl get pods -A\n
NAMESPACE NAME READY STATUS RESTARTS AGE\nappmesh-system appmesh-controller-7c98b87bdc-q6226 1/1 Running 0 4h44m\ncert-manager cert-manager-87f5555f-tcxj7 1/1 Running 0 4h43m\ncert-manager cert-manager-cainjector-8448ff8ddb-wwjsc 1/1 Running 0 4h43m\ncert-manager cert-manager-webhook-5468b675b-fvdwk 1/1 Running 0 4h43m\nkube-system aws-node-rf4wg 1/1 Running 0 4h43m\nkube-system aws-node-skkwh 1/1 Running 0 4h43m\nkube-system aws-privateca-issuer-b6fb8c5bd-hh8q4 1/1 Running 0 4h44m\nkube-system coredns-5f9f955df6-qhr6p 1/1 Running 0 4h44m\nkube-system coredns-5f9f955df6-tw8r7 1/1 Running 0 4h44m\nkube-system kube-proxy-q72l9 1/1 Running 0 4h43m\nkube-system kube-proxy-w54pc 1/1 Running 0 4h43m\n
-
List the AWSPCA cluster issues:
kubectl get awspcaclusterissuers.awspca.cert-manager.io\n
NAME AGE\nappmesh-mtls 4h42m\n
-
List the example certificates; the example certificate should be shown:
kubectl get certificate\n
NAME READY SECRET AGE\nexample True example-clusterissuer 4h12m\n
-
Describe the example certificate:
kubectl describe secret example-clusterissuer\n
Name: example-clusterissuer\nNamespace: default\nLabels: controller.cert-manager.io/fao=true\nAnnotations: cert-manager.io/alt-names:\n cert-manager.io/certificate-name: example\n cert-manager.io/common-name: example.com\n cert-manager.io/ip-sans:\n cert-manager.io/issuer-group: awspca.cert-manager.io\n cert-manager.io/issuer-kind: AWSPCAClusterIssuer\n cert-manager.io/issuer-name: appmesh-mtls\n cert-manager.io/uri-sans:\n\nType: kubernetes.io/tls\n\nData\n====\nca.crt: 1785 bytes\ntls.crt: 1517 bytes\ntls.key: 1675 bytes\n
"},{"location":"patterns/appmesh-mtls/#example","title":"Example","text":"The full documentation for this example can be found here.
-
Annotate the default
Namespace to allow Side Car Injection:
kubectl label namespaces default appmesh.k8s.aws/sidecarInjectorWebhook=enabled\n
namespace/default labeled\n
-
Create the mesh:
cat <<EOF | kubectl apply -f -\napiVersion: appmesh.k8s.aws/v1beta2\nkind: Mesh\nmetadata:\n name: appmesh-example\nspec:\n namespaceSelector:\n matchLabels:\n kubernetes.io/metadata.name: default\nEOF\nmesh.appmesh.k8s.aws/appmesh-example created\n
-
Create a virtual node:
cat <<EOF | kubectl apply -f -\napiVersion: appmesh.k8s.aws/v1beta2\nkind: VirtualNode\nmetadata:\n name: appmesh-example-vn\n namespace: default\nspec:\n podSelector:\n matchLabels:\n app: appmesh-example\n listeners:\n - portMapping:\n port: 80\n protocol: http\n backendDefaults:\n clientPolicy:\n tls:\n certificate:\n sds:\n secretName: example-clusterissuer\n enforce: true\n ports: []\n validation:\n trust:\n acm:\n certificateAuthorityARNs:\n - arn:aws:acm-pca:us-west-2:978045894046:certificate-authority/4386d166-4d68-4347-b940-4324ac493d65\n serviceDiscovery:\n dns:\n hostname: appmesh-example-svc.default.svc.cluster.local\nEOF\n
-
Create a virtual router:
cat <<EOF | kubectl apply -f -\napiVersion: appmesh.k8s.aws/v1beta2\nkind: VirtualRouter\nmetadata:\n namespace: default\n name: appmesh-example-vr\nspec:\n listeners:\n - portMapping:\n port: 80\n protocol: http\n routes:\n - name: appmesh-example-route\n httpRoute:\n match:\n prefix: /\n action:\n weightedTargets:\n - virtualNodeRef:\n name: appmesh-example-vn\n weight: 1\nEOF\n
-
Create a virtual service:
cat <<EOF | kubectl apply -f -\napiVersion: appmesh.k8s.aws/v1beta2\nkind: VirtualService\nmetadata:\n name: appmesh-example-vs\n namespace: default\nspec:\n awsName: appmesh-example-svc.default.svc.cluster.local\n provider:\n virtualRouter:\n virtualRouterRef:\n name: appmesh-example-vr\nEOF\n
-
Create a deployment and a service in the default
namespace:
cat <<EOF | kubectl apply -f -\napiVersion: v1\nkind: Service\nmetadata:\n name: appmesh-example-svc\n namespace: default\n labels:\n app: appmesh-example\nspec:\n selector:\n app: appmesh-example\n ports:\n - protocol: TCP\n port: 80\n targetPort: 80\n---\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n name: appmesh-example-app\n namespace: default\n labels:\n app: appmesh-example\nspec:\n replicas: 2\n selector:\n matchLabels:\n app: appmesh-example\n template:\n metadata:\n labels:\n app: appmesh-example\n spec:\n serviceAccountName: appmesh-example-sa\n containers:\n - name: nginx\n image: nginx:1.19.0\n ports:\n - containerPort: 80\nEOF\n
-
Validate if the pods are in the Running
state with 2 containers, one of them should include the AppMesh sidecar:
kubectl get pods\n
NAME READY STATUS RESTARTS AGE\nappmesh-example-app-6946cdbdf6-gnxww 2/2 Running 0 54s\nappmesh-example-app-6946cdbdf6-nx9tg 2/2 Running 0 54s\n
"},{"location":"patterns/appmesh-mtls/#destroy","title":"Destroy","text":"Delete the example resources created first:
# delete all example resources created in the default namespace\nkubectl delete all --all\n
terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/argocd/","title":"ArgoCD","text":""},{"location":"patterns/argocd/#amazon-eks-cluster-w-argocd","title":"Amazon EKS Cluster w/ ArgoCD","text":"This pattern demonstrates an EKS cluster that uses ArgoCD for application deployments.
- Documentation
- EKS Blueprints Add-ons Repo
- EKS Blueprints Workloads Repo
"},{"location":"patterns/argocd/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/argocd/#validate","title":"Validate","text":" -
List out the pods running currently:
kubectl get pods -A\n
NAMESPACE NAME READY STATUS RESTARTS AGE\nargo-rollouts argo-rollouts-5d47ccb8d4-854s6 1/1 Running 0 23h\nargo-rollouts argo-rollouts-5d47ccb8d4-srjk9 1/1 Running 0 23h\nargocd argo-cd-argocd-application-controller-0 1/1 Running 0 24h\nargocd argo-cd-argocd-applicationset-controller-547f9cfd68-kp89p 1/1 Running 0 24h\nargocd argo-cd-argocd-dex-server-55765f7cd7-t8r2f 1/1 Running 0 24h\nargocd argo-cd-argocd-notifications-controller-657df4dbcb-p596r 1/1 Running 0 24h\nargocd argo-cd-argocd-repo-server-7d4dddf886-2vmgt 1/1 Running 0 24h\nargocd argo-cd-argocd-repo-server-7d4dddf886-bm7tz 1/1 Running 0 24h\nargocd argo-cd-argocd-server-775ddf74b8-8jzvc 1/1 Running 0 24h\nargocd argo-cd-argocd-server-775ddf74b8-z6lz6 1/1 Running 0 24h\nargocd argo-cd-redis-ha-haproxy-6d7b7d4656-b8bt8 1/1 Running 0 24h\nargocd argo-cd-redis-ha-haproxy-6d7b7d4656-mgjx5 1/1 Running 0 24h\nargocd argo-cd-redis-ha-haproxy-6d7b7d4656-qsbgw 1/1 Running 0 24h\nargocd argo-cd-redis-ha-server-0 4/4 Running 0 24h\nargocd argo-cd-redis-ha-server-1 4/4 Running 0 24h\nargocd argo-cd-redis-ha-server-2 4/4 Running 0 24h\ncert-manager cert-manager-586ccb6656-2v8mf 1/1 Running 0 23h\ncert-manager cert-manager-cainjector-99d64d795-2gwnj 1/1 Running 0 23h\ncert-manager cert-manager-webhook-8d87786cb-24kww 1/1 Running 0 23h\ngeolocationapi geolocationapi-85599c5c74-rqqqs 2/2 Running 0 25m\ngeolocationapi geolocationapi-85599c5c74-whsp6 2/2 Running 0 25m\ngeordie downstream0-7f6ff946b6-r8sxc 1/1 Running 0 25m\ngeordie downstream1-64c7db6f9-rsbk5 1/1 Running 0 25m\ngeordie frontend-646bfb947c-wshpb 1/1 Running 0 25m\ngeordie redis-server-6bd7885d5d-s7rqw 1/1 Running 0 25m\ngeordie yelb-appserver-5d89946ffd-vkxt9 1/1 Running 0 25m\ngeordie yelb-db-697bd9f9d9-2t4b6 1/1 Running 0 25m\ngeordie yelb-ui-75ff8b96ff-fh6bw 1/1 Running 0 25m\nkarpenter karpenter-7b99fb785d-87k6h 1/1 Running 0 106m\nkarpenter karpenter-7b99fb785d-lkq9l 1/1 Running 0 106m\nkube-system aws-load-balancer-controller-6cf9bdbfdf-h7bzb 1/1 Running 0 20m\nkube-system aws-load-balancer-controller-6cf9bdbfdf-vfbrj 1/1 Running 0 20m\nkube-system aws-node-cvjmq 1/1 Running 0 24h\nkube-system aws-node-fw7zc 1/1 Running 0 24h\nkube-system aws-node-l7589 1/1 Running 0 24h\nkube-system aws-node-nll82 1/1 Running 0 24h\nkube-system aws-node-zhz8l 1/1 Running 0 24h\nkube-system coredns-7975d6fb9b-5sf7r 1/1 Running 0 24h\nkube-system coredns-7975d6fb9b-k78dz 1/1 Running 0 24h\nkube-system ebs-csi-controller-5cd4944c94-7jwlb 6/6 Running 0 24h\nkube-system ebs-csi-controller-5cd4944c94-8tcsg 6/6 Running 0 24h\nkube-system ebs-csi-node-66jmx 3/3 Running 0 24h\nkube-system ebs-csi-node-b2pw4 3/3 Running 0 24h\nkube-system ebs-csi-node-g4v9z 3/3 Running 0 24h\nkube-system ebs-csi-node-k7nvp 3/3 Running 0 24h\nkube-system ebs-csi-node-tfq9q 3/3 Running 0 24h\nkube-system kube-proxy-4x8vm 1/1 Running 0 24h\nkube-system kube-proxy-gtlpm 1/1 Running 0 24h\nkube-system kube-proxy-vfnbf 1/1 Running 0 24h\nkube-system kube-proxy-z9wdh 1/1 Running 0 24h\nkube-system kube-proxy-zzx9m 1/1 Running 0 24h\nkube-system metrics-server-7f4db5fd87-9n6dv 1/1 Running 0 23h\nkube-system metrics-server-7f4db5fd87-t8wxg 1/1 Running 0 23h\nkube-system metrics-server-7f4db5fd87-xcxlv 1/1 Running 0 23h\nteam-burnham burnham-66fccc4fb5-k4qtm 1/1 Running 0 25m\nteam-burnham burnham-66fccc4fb5-rrf4j 1/1 Running 0 25m\nteam-burnham burnham-66fccc4fb5-s9kbr 1/1 Running 0 25m\nteam-burnham nginx-7d47cfdff7-lzdjb 1/1 Running 0 25m\nteam-riker deployment-2048-6f7c78f959-h76rx 1/1 Running 0 25m\nteam-riker deployment-2048-6f7c78f959-skmrr 1/1 Running 0 25m\nteam-riker deployment-2048-6f7c78f959-tn9dw 1/1 Running 0 25m\nteam-riker guestbook-ui-c86c478bd-zg2z4 1/1 Running 0 25m\n
-
Access the ArgoCD UI by running the following command:
kubectl port-forward svc/argo-cd-argocd-server 8080:443 -n argocd\n
Then, open your browser and navigate to https://localhost:8080/
Username should be admin
.
The password will be the generated password by random_password
resource, stored in AWS Secrets Manager. You can easily retrieve the password by running the following command:
aws secretsmanager get-secret-value --secret-id <SECRET_NAME>--region <REGION>\n
Replace <SECRET_NAME>
with the name of the secret name, if you haven't changed it then it should be argocd
, also, make sure to replace <REGION>
with the region you are using.
Pickup the the secret from the SecretString
.
"},{"location":"patterns/argocd/#destroy","title":"Destroy","text":"First, we need to ensure that the ArgoCD applications are properly cleaned up from the cluster, this can be achieved in multiple ways:
- Disabling the
argocd_applications
configuration and running terraform apply
again - Deleting the apps using
argocd
cli - Deleting the apps using
kubectl
following ArgoCD guidance
terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/blue-green-upgrade/","title":"Blue/Green Upgrade","text":""},{"location":"patterns/blue-green-upgrade/#bluegreen-migration","title":"Blue/Green Migration","text":"This directory provides a solution based on EKS Blueprint for Terraform that shows how to leverage blue/green or canary application workload migration between EKS clusters, using Amazon Route 53 weighted routing feature. The workloads will be dynamically exposed using AWS LoadBalancer Controller and External DNS add-on.
We are leveraging the existing EKS Blueprints Workloads GitHub repository sample to deploy our GitOps ArgoCD applications, which are defined as helm charts. We are leveraging ArgoCD Apps of apps pattern where an ArgoCD Application can also reference other Helm charts to deploy.
You can also find more informations in the associated blog post
"},{"location":"patterns/blue-green-upgrade/#table-of-content","title":"Table of content","text":" - Blue/Green or Canary Amazon EKS clusters migration for stateless ArgoCD workloads
- Table of content
- Project structure
- Prerequisites
- Quick Start
- Configure the Stacks
- Create the environment stack
- Create the Blue cluster
- Create the Green cluster
- How this work
- Watch our Workload: we focus on team-burnham namespace.
- Using AWS Route53 and External DNS
- Configure Ingress resources with weighted records
- Automate the migration from Terraform
- Delete the Stack
- Delete the EKS Cluster(s)
- TL;DR
- Manual
- Delete the environment stack
- Troubleshoot
- External DNS Ownership
- Check Route 53 Record status
- Check current resolution and TTL value
- Get ArgoCD UI Password
"},{"location":"patterns/blue-green-upgrade/#project-structure","title":"Project structure","text":"See the Architecture of what we are building
Our sample is composed of four main directory:
- environment \u2192 this stack will create the common VPC and its dependencies used by our EKS clusters: create a Route53 sub domain hosted zone for our sample, a wildcard certificate on Certificate Manager for our applications TLS endpoints, and a SecretManager password for the ArgoCD UIs.
- modules/eks_cluster \u2192 local module defining the EKS blueprint cluster with ArgoCD add-on which will automatically deploy additional add-ons and our demo workloads
- eks-blue \u2192 an instance of the eks_cluster module to create blue cluster
- eks-green \u2192 an instance of the eks_cluster module to create green cluster
So we are going to create 2 EKS clusters, sharing the same VPC, and each one of them will install locally our workloads from the central GitOps repository leveraging ArgoCD add-on. In the GitOps workload repository, we have configured our applications deployments to leverage AWS Load Balancers Controllers annotations, so that applications will be exposed on AWS Load Balancers, created from our Kubernetes manifests. We will have 1 load balancer per cluster for each of our applications.
We have configured ExternalDNS add-ons in our two clusters to share the same Route53 Hosted Zone. The workloads in both clusters also share the same Route 53 DNS records, we rely on AWS Route53 weighted records to allow us to configure canary workload migration between our two EKS clusters.
Here we use the same GitOps workload configuration repository and adapt parameters with the values.yaml
. We could also use different ArgoCD repository for each cluster, or use a new directory if we want to validate or test new deployment manifests with maybe additional features, configurations or to use with different Kubernetes add-ons (like changing ingress controller).
Our objective here is to show you how Application teams and Platform teams can configure their infrastructure and workloads so that application teams are able to deploy autonomously their workloads to the EKS clusters thanks to ArgoCD, and platform team can keep the control of migrating production workloads from one cluster to another without having to synchronized operations with applications teams, or asking them to build a complicated CD pipeline.
In this example we show how you can seamlessly migrate your stateless workloads between the 2 clusters for a blue/green or Canary migration, but you can also leverage the same architecture to have your workloads for example separated in different accounts or regions, for either High Availability or Lower latency Access from your customers.
"},{"location":"patterns/blue-green-upgrade/#prerequisites","title":"Prerequisites","text":" - Terraform (tested version v1.3.5 on linux)
- Git
- AWS CLI
- AWS test account with administrator role access
- For working with this repository, you will need an existing Amazon Route 53 Hosted Zone that will be used to create our project hosted zone. It will be provided via the Terraform variable
hosted_zone_name
defined in terraform.tfvars.example. - Before moving to the next step, you will need to register a parent domain with AWS Route 53 (https://docs.aws.amazon.com/Route53/latest/DeveloperGuide/domain-register.html) in case you don\u2019t have one created yet.
- Accessing GitOps git repositories with SSH access requiring an SSH key for authentication. In this example our workloads repositories are stored in GitHub, you can see in GitHub documentation on how to connect with SSH.
- Your GitHub private ssh key value is supposed to be stored in plain text in AWS Secret Manager in a secret named
github-blueprint-ssh-key
, but you can change it using the terraform variable workload_repo_secret
in terraform.tfvars.example.
"},{"location":"patterns/blue-green-upgrade/#quick-start","title":"Quick Start","text":""},{"location":"patterns/blue-green-upgrade/#configure-the-stacks","title":"Configure the Stacks","text":" - Clone the repository
git clone https://github.com/aws-ia/terraform-aws-eks-blueprints.git\ncd patterns/blue-green-upgrade/\n
- Copy the
terraform.tfvars.example
to terraform.tfvars
on each environment
, eks-blue
and eks-green
folders, and change region, hosted_zone_name, eks_admin_role_name according to your needs.
cp terraform.tfvars.example environment/terraform.tfvars\ncp terraform.tfvars.example eks-blue/terraform.tfvars\ncp terraform.tfvars.example eks-green/terraform.tfvars\n
- You will need to provide the
hosted_zone_name
for example my-example.com
. Terraform will create a new hosted zone for the project with name: ${environment}.${hosted_zone_name}
so in our example eks-blueprint.my-example.com
. - You need to provide a valid IAM role in
eks_admin_role_name
to have EKS cluster admin rights, generally the one uses in the EKS console.
"},{"location":"patterns/blue-green-upgrade/#create-the-environment-stack","title":"Create the environment stack","text":"More info in the environment Readme
cd environment\nterraform init\nterraform apply\n
There can be some Warnings due to not declare variables. This is normal and you can ignore them as we share the same terraform.tfvars
for the 3 projects by using symlinks for a unique file, and we declare some variables used for the eks-blue and eks-green directory
"},{"location":"patterns/blue-green-upgrade/#create-the-blue-cluster","title":"Create the Blue cluster","text":"More info in the eks-blue Readme, you can also see the detailed step in the local module Readme
cd eks-blue\nterraform init\nterraform apply\n
This can take 8mn for EKS cluster, 15mn
"},{"location":"patterns/blue-green-upgrade/#create-the-green-cluster","title":"Create the Green cluster","text":"cd eks-green\nterraform init\nterraform apply\n
By default the only differences in the 2 clusters are the values defined in main.tf. We will change those values to upgrade Kubernetes version of new cluster, and to migrate our stateless workloads between clusters.
"},{"location":"patterns/blue-green-upgrade/#how-this-work","title":"How this work","text":""},{"location":"patterns/blue-green-upgrade/#watch-our-workload-we-focus-on-team-burnham-namespace","title":"Watch our Workload: we focus on team-burnham namespace.","text":"Our clusters are configured with existing ArgoCD Github repository that is synchronized into each of the clusters:
- EKS Blueprints Add-ons repository
- Workloads repository
We are going to look after one of the application deployed from the workload repository as example to demonstrate our migration automation: the Burnham
workload in the team-burnham namespace. We have set up a simple go application than can respond in it's body the name of the cluster it is running on. With this it will be easy to see the current migration on our workload.
<head>\n <title>Hello EKS Blueprint</title>\n</head>\n<div class=\"info\">\n <h>Hello EKS Blueprint Version 1.4</h>\n <p><span>Server address:</span> <span>10.0.2.201:34120</span></p>\n <p><span>Server name:</span> <span>burnham-9d686dc7b-dw45m</span></p>\n <p class=\"smaller\"><span>Date:</span> <span>2022.10.13 07:27:28</span></p>\n <p class=\"smaller\"><span>URI:</span> <span>/</span></p>\n <p class=\"smaller\"><span>HOST:</span> <span>burnham.eks-blueprint.mon-domain.com</span></p>\n <p class=\"smaller\"><span>CLUSTER_NAME:</span> <span>eks-blueprint-blue</span></p>\n</div>\n
The application is deployed from our workload repository manifest
Connect to the cluster: Execute one of the EKS cluster login commands from the terraform output
command, depending on the IAM role you can assume to access to the cluster. If you want EKS Admin cluster, you can execute the command associated to the eks_blueprints_admin_team_configure_kubectl output. It should be something similar to:
aws eks --region eu-west-3 update-kubeconfig --name eks-blueprint-blue --role-arn arn:aws:iam::0123456789:role/admin-team-20230505075455219300000002\n
Note it will allow the role associated to the parameter eks_admin_role_name to assume the role.
You can also connect with the user who created the EKS cluster without specifying the --role-arn
parameter
Next, you can interact with the cluster and see the deployment
$ kubectl get deployment -n team-burnham -l app=burnham\nNAME READY UP-TO-DATE AVAILABLE AGE\nburnham 3/3 3 3 3d18h\n
See the pods
$ kubectl get pods -n team-burnham -l app=burnham\nNAME READY STATUS RESTARTS AGE\nburnham-7db4c6fdbb-82hxn 1/1 Running 0 3d18h\nburnham-7db4c6fdbb-dl59v 1/1 Running 0 3d18h\nburnham-7db4c6fdbb-hpq6h 1/1 Running 0 3d18h\n
See the logs:
$ kubectl logs -n team-burnham -l app=burnham\n2022/10/10 12:35:40 {url: / }, cluster: eks-blueprint-blue }\n2022/10/10 12:35:49 {url: / }, cluster: eks-blueprint-blue }\n
You can make a request to the service, and filter the output to know on which cluster it runs:
$ URL=$(echo -n \"https://\" ; kubectl get ing -n team-burnham burnham-ingress -o json | jq \".spec.rules[0].host\" -r)\n$ curl -s $URL | grep CLUSTER_NAME | awk -F \"<span>|</span>\" '{print $4}'\neks-blueprint-blue\n
"},{"location":"patterns/blue-green-upgrade/#using-aws-route53-and-external-dns","title":"Using AWS Route53 and External DNS","text":"We have configured both our clusters to configure the same Amazon Route 53 Hosted Zones. This is done by having the same configuration of ExternalDNS add-on in main.tf
:
This is the Terraform configuration to configure the ExternalDNS Add-on which is deployed by the Blueprint using ArgoCD.
enable_external_dns = true\n\n external_dns_helm_config = {\n txtOwnerId = local.name\n zoneIdFilter = data.aws_route53_zone.sub.zone_id\n policy = \"sync\"\n awszoneType = \"public\"\n zonesCacheDuration = \"1h\"\n logLevel = \"debug\"\n }\n
- We use ExternalDNS in
sync
mode so that the controller can create but also remove DNS records accordingly to service or ingress objects creation. - We also configured the
txtOwnerId
with the name of the cluster, so that each controller will be able to create/update/delete records but only for records which are associated to the proper OwnerId. - Each Route53 record will be also associated with a
txt
record. This record is used to specify the owner of the associated record and is in the form of:
\"heritage=external-dns,external-dns/owner=eks-blueprint-blue,external-dns/resource=ingress/team-burnham/burnham-ingress\"\n
So in this example the Owner of the record is the external-dns controller, from the eks-blueprint-blue EKS cluster, and correspond to the Kubernetes ingress resource names burnham-ingress in the team-burnham namespace.
Using this feature, and relying on weighted records, we will be able to do blue/green or canary migration by changing the weight of ingress resources defined in each cluster.
"},{"location":"patterns/blue-green-upgrade/#configure-ingress-resources-with-weighted-records","title":"Configure Ingress resources with weighted records","text":"Since we have configured ExternalDNS add-on, we can now defined specific annotation in our ingress
object. You may already know that our workload are synchronized using ArgoCD from our workload repository sample.
We are focusing on the burnham deployment, which is defined here where we configure the burnham-ingress
ingress object with:
external-dns.alpha.kubernetes.io/set-identifier: {{ .Values.spec.clusterName }}\n external-dns.alpha.kubernetes.io/aws-weight: '{{ .Values.spec.ingress.route53_weight }}'\n
We rely on two external-dns annotation to configure how the record will be created. the set-identifier
annotation will contain the name of the cluster we want to create, which must match the one define in the external-dns txtOwnerId
configuration.
The aws-weight
will be used to configure the value of the weighted record, and it will be deployed from Helm values, that will be injected by Terraform in our example, so that our platform team will be able to control autonomously how and when they want to migrate workloads between the EKS clusters.
Amazon Route 53 weighted records works like this:
- If we specify a value of 100 in eks-blue cluster and 0 in eks-green cluster, then Route 53 will route all requests to eks-blue cluster.
- If we specify a value of 0 in eks-blue cluster and 0 in eks-green cluster, then Route 53 will route all requests to eks-green cluster.
- we can also define any intermediate values like 100 in eks-blue cluster and 100 in eks-green cluster, so we will have 50% on eks-blue and 50% on eks-green.
"},{"location":"patterns/blue-green-upgrade/#automate-the-migration-from-terraform","title":"Automate the migration from Terraform","text":"Now that we have setup our 2 clusters, deployed with ArgoCD and that the weighed records from values.yaml
are injected from Terraform, let's see how our Platform team can trigger the workload migration.
- At first, 100% of burnham traffic is set to the eks-blue cluster, this is controlled from the
eks-blue/main.tf
& eks-green/main.tf
files with the parameter route53_weight = \"100\"
. The same parameter is set to 0 in cluster eks-green.
Which correspond to :
All requests to our endpoint should response with eks-blueprint-blue
we can test it with the following command:
URL=$(echo -n \"https://\" ; kubectl get ing -n team-burnham burnham-ingress -o json | jq \".spec.rules[0].host\" -r)\ncurl -s $URL | grep CLUSTER_NAME | awk -F \"<span>|</span>\" '{print $4}'\n
you should see:
eks-blueprint-blue\n
- Let's change traffic to 50% eks-blue and 50% eks-green by activating also value 100 in eks-green locals.tf (
route53_weight = \"100\"
) and let's terraform apply
to let terraform update the configuration
Which correspond to :
All records have weight of 100, so we will have 50% requests on each clusters.
We can check the ratio of requests resolution between both clusters
URL=$(echo -n \"https://\" ; kubectl get ing -n team-burnham burnham-ingress -o json | jq \".spec.rules[0].host\" -r)\nrepeat 10 curl -s $URL | grep CLUSTER_NAME | awk -F \"<span>|</span>\" '{print $4}' && sleep 60\n
Result should be similar to:
eks-blueprint-blue\neks-blueprint-blue\neks-blueprint-blue\neks-blueprint-blue\neks-blueprint-green\neks-blueprint-green\neks-blueprint-blue\neks-blueprint-green\neks-blueprint-blue\neks-blueprint-green\n
The default TTL is for 60 seconds, and you have 50% chance to have blue or green cluster, then you may need to replay the previous command several times to have an idea of the repartition, which theoretically is 50%
- Now that we see that our green cluster is taking requests correctly, we can update the eks-blue cluster configuration to have the weight to 0 and apply again. after a few moment, your route53 records should look like the below screenshot, and all requests should now reach eks-green cluster.
Which correspond to :
At this step, once all DNS TTL will be up to date, all the traffic will be coming on the eks-green cluster. You can either, delete the eks-blue cluster, or decide to make upgrades on the blue cluster and send back traffic on eks-blue afterward, or simply keep it as a possibility for rollback if needed.
In this sample, we uses a simple terraform variable to control the weight for all applications, we can also choose to have several parameters, let's say one per application, so you can finer control your migration strategy application by application.
"},{"location":"patterns/blue-green-upgrade/#delete-the-stack","title":"Delete the Stack","text":""},{"location":"patterns/blue-green-upgrade/#delete-the-eks-clusters","title":"Delete the EKS Cluster(s)","text":"This section, can be executed in either eks-blue or eks-green folders, or in both if you want to delete both clusters.
In order to properly destroy the Cluster, we need first to remove the ArgoCD workloads, while keeping the ArgoCD addons. We will also need to remove our Karpenter provisioners, and any other objects you created outside of Terraform that needs to be cleaned before destroying the terraform stack.
Why doing this? When we remove an ingress object, we want the associated Kubernetes add-ons like aws load balancer controller and External DNS to correctly free the associated AWS resources. If we directly ask terraform to destroy everything, it can remove first theses controllers without allowing them the time to remove associated aws resources that will still existing in AWS, preventing us to completely delete our cluster.
"},{"location":"patterns/blue-green-upgrade/#tldr","title":"TL;DR","text":"../tear-down.sh\n
"},{"location":"patterns/blue-green-upgrade/#manual","title":"Manual","text":" - If also deployed, delete your Karpenter provisioners
this is safe to delete if no addons are deployed on Karpenter, which is the case here. If not we should separate the team-platform deployments which installed Karpenter provisioners in a separate ArgoCD Application to avoid any conflicts.
kubectl delete provisioners.karpenter.sh --all\n
- Delete Workloads App of App
kubectl delete application workloads -n argocd\n
- If also deployed, delete ecsdemo App of App
kubectl delete application ecsdemo -n argocd\n
Once every workload applications as been freed on AWS side, (this can take some times), we can then destroy our add-ons and terraform resources
Note: it can take time to deregister all load balancers, verify that you don't have any more AWS resources created by EKS prior to start destroying EKS with terraform.
- Destroy terraform resources
terraform apply -destroy -target=\"module.eks_cluster.module.kubernetes_addons\" -auto-approve\nterraform apply -destroy -target=\"module.eks_cluster.module.eks\" -auto-approve\nterraform apply -destroy -auto-approve\n
"},{"location":"patterns/blue-green-upgrade/#delete-the-environment-stack","title":"Delete the environment stack","text":"If you have finish playing with this solution, and once you have destroyed the 2 EKS clusters, you can now delete the environment stack.
cd environment\nterraform apply -destroy -auto-approve\n
This will destroy the Route53 hosted zone, the Certificate manager certificate, the VPC with all it's associated resources.
"},{"location":"patterns/blue-green-upgrade/#troubleshoot","title":"Troubleshoot","text":""},{"location":"patterns/blue-green-upgrade/#external-dns-ownership","title":"External DNS Ownership","text":"The Amazon Route 53 records association are controlled by ExternalDNS controller. You can see the logs from the controller to understand what is happening by executing the following command in each cluster:
kubectl logs -n external-dns -l app.kubernetes.io/name=external-dns -f\n
In eks-blue cluster, you can see logs like the following, which showcase that the eks-blueprint-blue controller won't make any change in records owned by eks-blueprint-green cluster, the reverse is also true.
time=\"2022-10-10T15:46:54Z\" level=debug msg=\"Skipping endpoint skiapp.eks-blueprint.sallaman.people.aws.dev 300 IN CNAME eks-blueprint-green k8s-riker-68438cd99f-893407990.eu-west-1.elb.amazonaws.com [{aws/evaluate-target-health true} {alias true} {aws/weight 100}] because owner id does not match, found: \\\"eks-blueprint-green\\\", required: \\\"eks-blueprint-blue\\\"\"\ntime=\"2022-10-10T15:46:54Z\" level=debug msg=\"Refreshing zones list cache\"\n
"},{"location":"patterns/blue-green-upgrade/#check-route-53-record-status","title":"Check Route 53 Record status","text":"We can also use the CLI to see our current Route 53 configuration:
export ROOT_DOMAIN=<your-domain-name> # the value you put in hosted_zone_name\nZONE_ID=$(aws route53 list-hosted-zones-by-name --output json --dns-name \"eks-blueprint.${ROOT_DOMAIN}.\" --query \"HostedZones[0].Id\" --out text)\necho $ZONE_ID\naws route53 list-resource-record-sets \\\n--output json \\\n--hosted-zone-id $ZONE_ID \\\n--query \"ResourceRecordSets[?Name == 'burnham.eks-blueprint.$ROOT_DOMAIN.']|[?Type == 'A']\"\naws route53 list-resource-record-sets \\\n--output json \\\n--hosted-zone-id $ZONE_ID \\\n--query \"ResourceRecordSets[?Name == 'burnham.eks-blueprint.$ROOT_DOMAIN.']|[?Type == 'TXT']\"\n
"},{"location":"patterns/blue-green-upgrade/#check-current-resolution-and-ttl-value","title":"Check current resolution and TTL value","text":"As DNS migration is dependent of DNS caching, normally relying on the TTL, you can use dig to see the current value of the TTL used locally
export ROOT_DOMAIN=<your-domain-name> # the value you put for hosted_zone_name\ndig +noauthority +noquestion +noadditional +nostats +ttlunits +ttlid A burnham.eks-blueprint.$ROOT_DOMAIN\n
"},{"location":"patterns/blue-green-upgrade/#get-argocd-ui-password","title":"Get ArgoCD UI Password","text":"You can connect to the ArgoCD UI using the service :
kubectl get svc -n argocd argo-cd-argocd-server -o json | jq '.status.loadBalancer.ingress[0].hostname' -r\n
Then login with admin and get the password from AWS Secret Manager:
aws secretsmanager get-secret-value \\\n--secret-id argocd-admin-secret.eks-blueprint \\\n--query SecretString \\\n--output text --region $AWS_REGION\n
"},{"location":"patterns/elastic-fabric-adapter/","title":"Elastic Fabric Adapter","text":""},{"location":"patterns/elastic-fabric-adapter/#eks-cluster-w-elastic-fabric-adapter","title":"EKS Cluster w/ Elastic Fabric Adapter","text":"This pattern demonstrates an Amazon EKS Cluster with an EFA-enabled nodegroup.
"},{"location":"patterns/elastic-fabric-adapter/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/elastic-fabric-adapter/#validate","title":"Validate","text":" -
List the nodes by instance type:
kubectl get nodes -o yaml | grep instance-type | grep node | grep -v f:\n
node.kubernetes.io/instance-type: g5.8xlarge\nnode.kubernetes.io/instance-type: m5.large\nnode.kubernetes.io/instance-type: m5.large\nnode.kubernetes.io/instance-type: g5.8xlarge\n
You should see two EFA-enabled (in this example g5.8xlarge
) nodes in the list.
-
Deploy Kubeflow MPI Operator
Kubeflow MPI Operator is required for running MPIJobs on EKS. We will use an MPIJob to test EFA. To deploy the MPI operator execute the following:
kubectl apply -f https://raw.githubusercontent.com/kubeflow/mpi-operator/v0.3.0/deploy/v2beta1/mpi-operator.yaml\n
namespace/mpi-operator created\ncustomresourcedefinition.apiextensions.k8s.io/mpijobs.kubeflow.org created\nserviceaccount/mpi-operator created\nclusterrole.rbac.authorization.k8s.io/kubeflow-mpijobs-admin created\nclusterrole.rbac.authorization.k8s.io/kubeflow-mpijobs-edit created\nclusterrole.rbac.authorization.k8s.io/kubeflow-mpijobs-view created\nclusterrole.rbac.authorization.k8s.io/mpi-operator created\nclusterrolebinding.rbac.authorization.k8s.io/mpi-operator created\ndeployment.apps/mpi-operator created\n
In addition to deploying the operator, please apply a patch to the mpi-operator clusterrole to allow the mpi-operator service account access to leases
resources in the coordination.k8s.io
apiGroup.
kubectl apply -f https://raw.githubusercontent.com/aws-samples/aws-do-eks/main/Container-Root/eks/deployment/kubeflow/mpi-operator/clusterrole-mpi-operator.yaml\n
clusterrole.rbac.authorization.k8s.io/mpi-operator configured\n
-
EFA test
The results should shown that two EFA adapters are available (one for each worker pod)
kubectl apply -f https://raw.githubusercontent.com/aws-samples/aws-do-eks/main/Container-Root/eks/deployment/efa-device-plugin/test-efa.yaml\n
mpijob.kubeflow.org/efa-info-test created\n
Once the test launcher pod enters status Running
or Completed
, see the test logs using the command below:
kubectl logs -f $(kubectl get pods | grep launcher | cut -d ' ' -f 1)\n
Warning: Permanently added 'efa-info-test-worker-1.efa-info-test-worker.default.svc,10.11.13.224' (ECDSA) to the list of known hosts.\nWarning: Permanently added 'efa-info-test-worker-0.efa-info-test-worker.default.svc,10.11.4.63' (ECDSA) to the list of known hosts.\n[1,1]<stdout>:provider: efa\n[1,1]<stdout>: fabric: efa\n[1,1]<stdout>: domain: rdmap197s0-rdm\n[1,1]<stdout>: version: 116.10\n[1,1]<stdout>: type: FI_EP_RDM\n[1,1]<stdout>: protocol: FI_PROTO_EFA\n[1,0]<stdout>:provider: efa\n[1,0]<stdout>: fabric: efa\n[1,0]<stdout>: domain: rdmap197s0-rdm\n[1,0]<stdout>: version: 116.10\n[1,0]<stdout>: type: FI_EP_RDM\n[1,0]<stdout>: protocol: FI_PROTO_EFA\n
-
EFA NCCL test
To run the EFA NCCL test please execute the following kubectl command:
kubectl apply -f https://raw.githubusercontent.com/aws-samples/aws-do-eks/main/Container-Root/eks/deployment/efa-device-plugin/test-nccl-efa.yaml\n
mpijob.kubeflow.org/test-nccl-efa created\n
Once the launcher pod enters Running
or Completed
state, execute the following to see the test logs:
kubectl logs -f $(kubectl get pods | grep launcher | cut -d ' ' -f 1)\n
[1,0]<stdout>:test-nccl-efa-worker-0:21:21 [0] NCCL INFO NET/OFI Selected Provider is efa (found 1 nics)\n[1,0]<stdout>:test-nccl-efa-worker-0:21:21 [0] NCCL INFO Using network AWS Libfabric\n[1,0]<stdout>:NCCL version 2.12.7+cuda11.4\n
Columns 8 and 12 in the output table show the in-place and out-of-place bus bandwidth calculated for the data size listed in column 1. In this case it is 3.13 and 3.12 GB/s respectively. Your actual results may be slightly different. The calculated average bus bandwidth is displayed at the bottom of the log when the test finishes after it reaches the max data size, specified in the mpijob manifest. In this result the average bus bandwidth is 1.15 GB/s.
[1,0]<stdout>:# size count type redop root time algbw busbw #wrong time algbw busbw #wrong\n[1,0]<stdout>:# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s)\n...\n[1,0]<stdout>: 262144 65536 float sum -1 195.0 1.34 1.34 0 194.0 1.35 1.35 0\n[1,0]<stdout>: 524288 131072 float sum -1 296.9 1.77 1.77 0 291.1 1.80 1.80 0\n[1,0]<stdout>: 1048576 262144 float sum -1 583.4 1.80 1.80 0 579.6 1.81 1.81 0\n[1,0]<stdout>: 2097152 524288 float sum -1 983.3 2.13 2.13 0 973.9 2.15 2.15 0\n[1,0]<stdout>: 4194304 1048576 float sum -1 1745.4 2.40 2.40 0 1673.2 2.51 2.51 0\n...\n[1,0]<stdout>:# Avg bus bandwidth : 1.15327\n
"},{"location":"patterns/elastic-fabric-adapter/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/external-secrets/","title":"External Secrets","text":""},{"location":"patterns/external-secrets/#amazon-eks-cluster-w-external-secrets-operator","title":"Amazon EKS Cluster w/ External Secrets Operator","text":"This pattern deploys an EKS Cluster with the External Secrets Operator. The cluster is populated with a ClusterSecretStore and SecretStore example using SecretManager and Parameter Store respectively. A secret for each store is also created. Both stores use IRSA to retrieve the secret values from AWS.
"},{"location":"patterns/external-secrets/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/external-secrets/#validate","title":"Validate","text":" -
List the secret resources in the external-secrets
namespace
kubectl get externalsecrets -n external-secrets\nkubectl get secrets -n external-secrets\n
"},{"location":"patterns/external-secrets/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/fargate-serverless/","title":"Fargate Serverless","text":""},{"location":"patterns/fargate-serverless/#serverless-amazon-eks-cluster","title":"Serverless Amazon EKS Cluster","text":"This pattern demonstrates an Amazon EKS Cluster that utilizes Fargate profiles for a serverless data plane.
"},{"location":"patterns/fargate-serverless/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/fargate-serverless/#validate","title":"Validate","text":" -
List the nodes in in the cluster; you should see Fargate instances:
kubectl get nodes\n
NAME STATUS ROLES AGE VERSION\nfargate-ip-10-0-17-17.us-west-2.compute.internal Ready <none> 25m v1.26.3-eks-f4dc2c0\nfargate-ip-10-0-20-244.us-west-2.compute.internal Ready <none> 71s v1.26.3-eks-f4dc2c0\nfargate-ip-10-0-41-143.us-west-2.compute.internal Ready <none> 25m v1.26.3-eks-f4dc2c0\nfargate-ip-10-0-44-95.us-west-2.compute.internal Ready <none> 25m v1.26.3-eks-f4dc2c0\nfargate-ip-10-0-45-153.us-west-2.compute.internal Ready <none> 77s v1.26.3-eks-f4dc2c0\nfargate-ip-10-0-47-31.us-west-2.compute.internal Ready <none> 75s v1.26.3-eks-f4dc2c0\nfargate-ip-10-0-6-175.us-west-2.compute.internal Ready <none> 25m v1.26.3-eks-f4dc2c0\n
-
List the pods. All the pods should reach a status of Running
after approximately 60 seconds:
kubectl get pods -A\n
NAMESPACE NAME READY STATUS RESTARTS AGE\napp-2048 app-2048-65bd744dfb-7g9rx 1/1 Running 0 2m34s\napp-2048 app-2048-65bd744dfb-nxcbm 1/1 Running 0 2m34s\napp-2048 app-2048-65bd744dfb-z4b6z 1/1 Running 0 2m34s\nkube-system aws-load-balancer-controller-6cbdb58654-fvskt 1/1 Running 0 26m\nkube-system aws-load-balancer-controller-6cbdb58654-sc7dk 1/1 Running 0 26m\nkube-system coredns-7b7bddbc85-jmbv6 1/1 Running 0 26m\nkube-system coredns-7b7bddbc85-rgmzq 1/1 Running 0 26m\n
-
Validate the aws-logging
configMap for Fargate Fluentbit was created:
kubectl -n aws-observability get configmap aws-logging -o yaml\n
apiVersion: v1\ndata:\nfilters.conf: |\n[FILTER]\nName parser\nMatch *\nKey_Name log\nParser regex\nPreserve_Key True\nReserve_Data True\nflb_log_cw: \"true\"\noutput.conf: |\n[OUTPUT]\nName cloudwatch_logs\nMatch *\nregion us-west-2\nlog_group_name /fargate-serverless/fargate-fluentbit-logs20230509014113352200000006\nlog_stream_prefix fargate-logs-\nauto_create_group true\nparsers.conf: |\n[PARSER]\nName regex\nFormat regex\nRegex ^(?<time>[^ ]+) (?<stream>[^ ]+) (?<logtag>[^ ]+) (?<message>.+)$\nTime_Key time\nTime_Format %Y-%m-%dT%H:%M:%S.%L%z\nTime_Keep On\nDecode_Field_As json message\nimmutable: false\nkind: ConfigMap\nmetadata:\ncreationTimestamp: \"2023-05-08T21:14:52Z\"\nname: aws-logging\nnamespace: aws-observability\nresourceVersion: \"1795\"\nuid: d822bcf5-a441-4996-857e-7fb1357bc07e\n
You can also validate if the CloudWatch LogGroup was created accordingly, and LogStreams were populated:
aws logs describe-log-groups \\\n--log-group-name-prefix \"/fargate-serverless/fargate-fluentbit\"\n
{\n\"logGroups\": [\n{\n\"logGroupName\": \"/fargate-serverless/fargate-fluentbit-logs20230509014113352200000006\",\n\"creationTime\": 1683580491652,\n\"retentionInDays\": 90,\n\"metricFilterCount\": 0,\n\"arn\": \"arn:aws:logs:us-west-2:111222333444:log-group:/fargate-serverless/fargate-fluentbit-logs20230509014113352200000006:*\",\n\"storedBytes\": 0\n}\n]\n}\n
aws logs describe-log-streams \\\n--log-group-name \"/fargate-serverless/fargate-fluentbit-logs20230509014113352200000006\" \\\n--log-stream-name-prefix fargate-logs --query 'logStreams[].logStreamName'\n
[\n\"fargate-logs-flblogs.var.log.fluent-bit.log\",\n\"fargate-logs-kube.var.log.containers.aws-load-balancer-controller-7f989fc6c-grjsq_kube-system_aws-load-balancer-controller-feaa22b4cdaa71ecfc8355feb81d4b61ea85598a7bb57aef07667c767c6b98e4.log\",\n\"fargate-logs-kube.var.log.containers.aws-load-balancer-controller-7f989fc6c-wzr46_kube-system_aws-load-balancer-controller-69075ea9ab3c7474eac2a1696d3a84a848a151420cd783d79aeef960b181567f.log\",\n\"fargate-logs-kube.var.log.containers.coredns-7b7bddbc85-8cxvq_kube-system_coredns-9e4f3ab435269a566bcbaa606c02c146ad58508e67cef09fa87d5c09e4ac0088.log\",\n\"fargate-logs-kube.var.log.containers.coredns-7b7bddbc85-gcjwp_kube-system_coredns-11016818361cd68c32bf8f0b1328f3d92a6d7b8cf5879bfe8b301f393cb011cc.log\"\n]\n
"},{"location":"patterns/fargate-serverless/#example","title":"Example","text":" -
Create an ingress resource using the AWS load balancer controller deployed, pointing to our application service:
kubectl get svc -n app-2048\n
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE\napp-2048 NodePort 172.20.33.217 <none> 80:32568/TCP 2m48s\n
kubectl -n app-2048 create ingress app-2048 --class alb --rule=\"/*=app-2048:80\" \\\n--annotation alb.ingress.kubernetes.io/scheme=internet-facing \\\n--annotation alb.ingress.kubernetes.io/target-type=ip\n
kubectl -n app-2048 get ingress\n
NAME CLASS HOSTS ADDRESS PORTS AGE\napp-2048 alb * k8s-app2048-app2048-6d9c5e92d6-1234567890.us-west-2.elb.amazonaws.com 80 4m9s\n
-
Open the browser to access the application via the URL address shown in the last output in the ADDRESS column.
In our example: k8s-app2048-app2048-6d9c5e92d6-1234567890.us-west-2.elb.amazonaws.com
Info
You might need to wait a few minutes, and then refresh your browser. If your Ingress isn't created after several minutes, then run this command to view the AWS Load Balancer Controller logs:
kubectl logs -n kube-system deployment.apps/aws-load-balancer-controller\n
"},{"location":"patterns/fargate-serverless/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/fully-private-cluster/","title":"Fully Private Cluster","text":""},{"location":"patterns/fully-private-cluster/#fully-private-amazon-eks-cluster","title":"Fully Private Amazon EKS Cluster","text":"This pattern demonstrates an Amazon EKS cluster that does not have internet access. The private cluster must pull images from a container registry that is within in your VPC, and also must have endpoint private access enabled. This is required for nodes to register with the cluster endpoint.
Please see this document for more details on configuring fully private EKS Clusters.
For fully Private EKS clusters requires the following VPC endpoints to be created to communicate with AWS services. This example solution will provide these endpoints if you choose to create VPC. If you are using an existing VPC then you may need to ensure these endpoints are created.
com.amazonaws.region.aps-workspaces - If using AWS Managed Prometheus Workspace\ncom.amazonaws.region.ssm - Secrets Management\ncom.amazonaws.region.ec2\ncom.amazonaws.region.ecr.api\ncom.amazonaws.region.ecr.dkr\ncom.amazonaws.region.logs \u2013 For CloudWatch Logs\ncom.amazonaws.region.sts \u2013 If using AWS Fargate or IAM roles for service accounts\ncom.amazonaws.region.elasticloadbalancing \u2013 If using Application Load Balancers\ncom.amazonaws.region.autoscaling \u2013 If using Cluster Autoscaler\ncom.amazonaws.region.s3\n
"},{"location":"patterns/fully-private-cluster/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/fully-private-cluster/#validate","title":"Validate","text":" -
Test by listing Nodes in in the cluster:
kubectl get nodes\n
NAME STATUS ROLES AGE VERSION\nip-10-0-19-90.us-west-2.compute.internal Ready <none> 8m34s v1.26.2-eks-a59e1f0\nip-10-0-44-110.us-west-2.compute.internal Ready <none> 8m36s v1.26.2-eks-a59e1f0\nip-10-0-9-147.us-west-2.compute.internal Ready <none> 8m35s v1.26.2-eks-a59e1f0\n
-
Test by listing all the Pods running currently. All the Pods should reach a status of Running
after approximately 60 seconds:
kubectl get pods -A\n
NAMESPACE NAME READY STATUS RESTARTS AGE\nkube-system aws-node-jvn9x 1/1 Running 0 7m42s\nkube-system aws-node-mnjlf 1/1 Running 0 7m45s\nkube-system aws-node-q458h 1/1 Running 0 7m49s\nkube-system coredns-6c45d94f67-495rr 1/1 Running 0 14m\nkube-system coredns-6c45d94f67-5c8tc 1/1 Running 0 14m\nkube-system kube-proxy-47wfh 1/1 Running 0 8m32s\nkube-system kube-proxy-f6chz 1/1 Running 0 8m30s\nkube-system kube-proxy-xcfkc 1/1 Running 0 8m31s\n
"},{"location":"patterns/fully-private-cluster/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/ipv6-eks-cluster/","title":"IPv6 Networking","text":""},{"location":"patterns/ipv6-eks-cluster/#amazon-eks-cluster-w-ipv6-networking","title":"Amazon EKS Cluster w/ IPv6 Networking","text":"This pattern demonstrates an EKS cluster that utilizes IPv6 networking.
"},{"location":"patterns/ipv6-eks-cluster/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/ipv6-eks-cluster/#validate","title":"Validate","text":" -
Test by listing all the pods running currently; the IP
should be an IPv6 address.
kubectl get pods -A -o wide\n
# Output should look like below\nNAMESPACE NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES\nkube-system aws-node-bhd2s 1/1 Running 0 3m5s 2600:1f13:6c4:a703:ecf8:3ac1:76b0:9303 ip-10-0-10-183.us-west-2.compute.internal <none> <none>\nkube-system aws-node-nmdgq 1/1 Running 0 3m21s 2600:1f13:6c4:a705:a929:f8d4:9350:1b20 ip-10-0-12-188.us-west-2.compute.internal <none> <none>\nkube-system coredns-799c5565b4-6wxrc 1/1 Running 0 10m 2600:1f13:6c4:a705:bbda:: ip-10-0-12-188.us-west-2.compute.internal <none> <none>\nkube-system coredns-799c5565b4-fjq4q 1/1 Running 0 10m 2600:1f13:6c4:a705:bbda::1 ip-10-0-12-188.us-west-2.compute.internal <none> <none>\nkube-system kube-proxy-58tp7 1/1 Running 0 4m25s 2600:1f13:6c4:a703:ecf8:3ac1:76b0:9303 ip-10-0-10-183.us-west-2.compute.internal <none> <none>\nkube-system kube-proxy-hqkgw 1/1 Running 0 4m25s 2600:1f13:6c4:a705:a929:f8d4:9350:1b20 ip-10-0-12-188.us-west-2.compute.internal <none> <none>\n
-
Test by listing all the nodes running currently; the INTERNAL-IP
should be an IPv6 address.
kubectl nodes -A -o wide\n
# Output should look like below\nNAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME\nip-10-0-10-183.us-west-2.compute.internal Ready <none> 4m57s v1.24.7-eks-fb459a0 2600:1f13:6c4:a703:ecf8:3ac1:76b0:9303 <none> Amazon Linux 2 5.4.226-129.415.amzn2.x86_64 containerd://1.6.6\nip-10-0-12-188.us-west-2.compute.internal Ready <none> 4m57s v1.24.7-eks-fb459a0 2600:1f13:6c4:a705:a929:f8d4:9350:1b20 <none> Amazon Linux 2 5.4.226-129.415.amzn2.x86_64 containerd://1.6.6\n
"},{"location":"patterns/ipv6-eks-cluster/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/istio-multi-cluster/","title":"Istio - Multi-Cluster","text":""},{"location":"patterns/istio-multi-cluster/#amazon-eks-multi-cluster-w-istio","title":"Amazon EKS Multi-Cluster w/ Istio","text":"This pattern demonstrates 2 Amazon EKS clusters configured with Istio. Istio will be set-up to operate in a Multi-Primary configuration, where services are shared across clusters.
Refer to the documentation for Istio
concepts.
"},{"location":"patterns/istio-multi-cluster/#notable-configuration","title":"Notable configuration","text":" - This sample rely on reading data from Terraform Remote State in the different folders. In a production setup, Terraform Remote State is stored in a persistent backend such as Terraform Cloud or S3. For more information, please refer to the Terraform Backends documentation
- The process for connecting clusters is seperated from the cluster creation as it requires all cluster to be created first, and excahnge configuration between one to the other
"},{"location":"patterns/istio-multi-cluster/#folder-structure","title":"Folder structure","text":""},{"location":"patterns/istio-multi-cluster/#0certs-tool","title":"0.certs-tool
","text":"This folder is the Makefiles from the Istio projects to generate 1 root CA with 2 intermediate CAs for each cluster. Please refer to the \"Certificate Management\" section in the Istio documentation. For production setup it's highly recommended by the Istio project to have a production-ready CA solution.
NOTE: The 0.certs-tool/create-certs.sh script needs to run before the cluster creation so the code will pick up the relevant certificates
"},{"location":"patterns/istio-multi-cluster/#0vpc","title":"0.vpc
","text":"This folder creates the VPC for both clusters. The VPC creation is not part of the cluster provisionig and therefore lives in a seperate folder. To support the multi-cluster/Multi-Primary setup, this folder also creates additional security group to be used by each cluster worker nodes to allow cross-cluster communication (resources cluster1_additional_sg
and cluster2_additional_sg
). These security groups allow communication from one to the other and each will be added to the worker nodes of the relevant cluster
"},{"location":"patterns/istio-multi-cluster/#1cluster1","title":"1.cluster1
","text":"This folder creates an Amazon EKS Cluster, named by default cluster-1
(see variables.tf
), with AWS Load Balancer Controller, and Istio installation. Configurations in this folder to be aware of:
- The cluster is configured to use the security groups created in the
0.vpc
folder (cluster1_additional_sg
in this case). - Kubernetes Secret named
cacerts
is created with the certificates created by the 0.certs-tool/create-certs.sh script - Kubernetes Secret named
cacerts
named istio-reader-service-account-istio-remote-secret-token
of type Service-Account
is being created. This is to replicate the istioctl experimental create-remote-secret command. This secret will be used in folder 3.istio-multi-primary
to apply kubeconfig secret with tokens from the other cluster to be abble to communicate to the other cluster API Server
"},{"location":"patterns/istio-multi-cluster/#2cluster2","title":"2.cluster2
","text":"Same configuration as in 1.cluster1
except the name of the cluster which is cluster-2
.
"},{"location":"patterns/istio-multi-cluster/#3istio-multi-primary","title":"3.istio-multi-primary
","text":"This folder deploys a reader secret on each cluster. It replicates the istioctl experimental create-remote-secret
by applying a kubeconfig secret prefixed istio-remote-secret-
with the cluster name at the end.
"},{"location":"patterns/istio-multi-cluster/#4test-connectivity","title":"4.test-connectivity
","text":"This folder test the installation connectivity. It follows the Istio guide Verify the installation by deploying services on each cluster, and curl
-ing from one to the other
"},{"location":"patterns/istio-multi-cluster/#prerequisites","title":"Prerequisites","text":"Ensure that you have the following tools installed locally:
- aws cli
- kubectl
- terraform
"},{"location":"patterns/istio-multi-cluster/#deploy","title":"Deploy","text":""},{"location":"patterns/istio-multi-cluster/#prereq-provision-certificates","title":"Prereq - Provision Certificates","text":"cd 0.certs-tool\n./create-certs.sh\ncd..\n
"},{"location":"patterns/istio-multi-cluster/#step-0-create-the-vpc","title":"Step 0 - Create the VPC","text":"cd 0.certs-tool\n./create-certs.sh\ncd..\n
"},{"location":"patterns/istio-multi-cluster/#step-1-deploy-cluster-1","title":"Step 1 - Deploy cluster-1","text":"cd 1.cluster1\nterraform init\nterraform apply -auto-approve\ncd..\n
"},{"location":"patterns/istio-multi-cluster/#step-2-deploy-cluster-2","title":"Step 2 - Deploy cluster-2","text":"cd 2.cluster2\nterraform init\nterraform apply -auto-approve\ncd..\n
"},{"location":"patterns/istio-multi-cluster/#step-3-configure-istio-multi-primary","title":"Step 3 - Configure Istio Multi-Primary","text":"cd 3.istio-multi-primary\nterraform init\nterraform apply -auto-approve\ncd..\n
"},{"location":"patterns/istio-multi-cluster/#step-4-test-installation-and-connectivity","title":"Step 4 - test installation and connectivity","text":"cd 4.test-connectivity\n./test_connectivity.sh\ncd..\n
This script deploy the sample application to both clusters and run curl from a pod in one cluster to a service that is deployed in both cluster. You should expect to see responses from both V1
and V2
of the sample application. The script run 4 curl
command from cluster-1 to cluster-2 and vice versa
"},{"location":"patterns/istio-multi-cluster/#destroy","title":"Destroy","text":"To teardown and remove the resources created in this example:
cd 3.istio-multi-primary\nterraform apply -destroy -autoapprove\ncd ../2.cluster2\nterraform apply -destroy -autoapprove\ncd ../1.cluster1\nterraform apply -destroy -autoapprove\ncd ../0.vpc\nterraform apply -destroy -autoapprove\n
"},{"location":"patterns/istio/","title":"Istio","text":""},{"location":"patterns/istio/#amazon-eks-cluster-w-istio","title":"Amazon EKS Cluster w/ Istio","text":"This example shows how to provision an EKS cluster with Istio.
- Deploy EKS Cluster with one managed node group in an VPC
- Add node_security_group rules for port access required for Istio communication
- Install Istio using Helm resources in Terraform
- Install Istio Ingress Gateway using Helm resources in Terraform
- This step deploys a Service of type
LoadBalancer
that creates an AWS Network Load Balancer. - Deploy/Validate Istio communication using sample application
Refer to the documentation on Istio concepts.
"},{"location":"patterns/istio/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
Once the resources have been provisioned, you will need to replace the istio-ingress
pods due to a istiod
dependency issue. Use the following command to perform a rolling restart of the istio-ingress
pods:
kubectl rollout restart deployment istio-ingress -n istio-ingress\n
"},{"location":"patterns/istio/#observability-add-ons","title":"Observability Add-ons","text":"Use the following code snippet to add the Istio Observability Add-ons on the EKS cluster with deployed Istio.
for ADDON in kiali jaeger prometheus grafana\ndo\nADDON_URL=\"https://raw.githubusercontent.com/istio/istio/release-1.18/samples/addons/$ADDON.yaml\"\nkubectl apply -f $ADDON_URL\ndone\n
"},{"location":"patterns/istio/#validate","title":"Validate","text":" -
List out all pods and services in the istio-system
namespace:
kubectl get pods,svc -n istio-system\nkubectl get pods,svc -n istio-ingress\n
NAME READY STATUS RESTARTS AGE\npod/grafana-7d4f5589fb-4xj9m 1/1 Running 0 4m14s\npod/istiod-ff577f8b8-c8ssk 1/1 Running 0 4m40s\npod/jaeger-58c79c85cd-n7bkx 1/1 Running 0 4m14s\npod/kiali-749d76d7bb-8kjg7 1/1 Running 0 4m14s\npod/prometheus-5d5d6d6fc-sptxl 2/2 Running 0 4m15s\n\nNAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE\nservice/grafana ClusterIP 172.20.141.12 <none> 3000/TCP 4m14s\nservice/istiod ClusterIP 172.20.172.70 <none> 15010/TCP,15012/TCP,443/TCP,15014/TCP 4m40s\nservice/jaeger-collector ClusterIP 172.20.223.28 <none> 14268/TCP,14250/TCP,9411/TCP 4m15s\nservice/kiali ClusterIP 172.20.182.231 <none> 20001/TCP,9090/TCP 4m15s\nservice/prometheus ClusterIP 172.20.89.64 <none> 9090/TCP 4m14s\nservice/tracing ClusterIP 172.20.253.201 <none> 80/TCP,16685/TCP 4m14s\nservice/zipkin ClusterIP 172.20.221.157 <none> 9411/TCP 4m15s\n\nNAME READY STATUS RESTARTS AGE\npod/istio-ingress-6f7c5dffd8-glszr 1/1 Running 0 4m28s\n\nNAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE\nservice/istio-ingress LoadBalancer 172.20.104.27 k8s-istioing-istioing-844c89b6c2-875b8c9a4b4e9365.elb.us-west-2.amazonaws.com 15021:32760/TCP,80:31496/TCP,443:32534/TCP 4m28s\n
-
Verify all the Helm releases installed in the istio-system
and istio-ingress
namespaces:
helm list -n istio-system\n
NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION\nistio-base istio-system 1 2023-07-19 11:05:41.599921 -0700 PDT deployed base-1.18.1 1.18.1\nistiod istio-system 1 2023-07-19 11:05:48.087616 -0700 PDT deployed istiod-1.18.1 1.18.1\n
helm list -n istio-ingress\n
NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION\nistio-ingress istio-ingress 1 2023-07-19 11:06:03.41609 -0700 PDT deployed gateway-1.18.1 1.18.1\n
"},{"location":"patterns/istio/#observability-add-ons_1","title":"Observability Add-ons","text":"Validate the setup of the observability add-ons by running the following commands and accessing each of the service endpoints using this URL of the form http://localhost:\\ where <port>
is one of the port number for the corresponding service.
# Visualize Istio Mesh console using Kiali\nkubectl port-forward svc/kiali 20001:20001 -n istio-system\n\n# Get to the Prometheus UI\nkubectl port-forward svc/prometheus 9090:9090 -n istio-system\n\n# Visualize metrics in using Grafana\nkubectl port-forward svc/grafana 3000:3000 -n istio-system\n\n# Visualize application traces via Jaeger\nkubectl port-forward svc/jaeger 16686:16686 -n istio-system\n
"},{"location":"patterns/istio/#example","title":"Example","text":" -
Create the sample
namespace and enable the sidecar injection on it
kubectl create namespace sample\nkubectl label namespace sample istio-injection=enabled\n
namespace/sample created\nnamespace/sample labeled\n
-
Deploy helloworld
app
cat <<EOF > helloworld.yaml\napiVersion: v1\nkind: Service\nmetadata:\n name: helloworld\n labels:\n app: helloworld\n service: helloworld\nspec:\n ports:\n - port: 5000\n name: http\n selector:\n app: helloworld\n---\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n name: helloworld-v1\n labels:\n app: helloworld\n version: v1\nspec:\n replicas: 1\n selector:\n matchLabels:\n app: helloworld\n version: v1\n template:\n metadata:\n labels:\n app: helloworld\n version: v1\n spec:\n containers:\n - name: helloworld\n image: docker.io/istio/examples-helloworld-v1\n resources:\n requests:\n cpu: \"100m\"\n imagePullPolicy: IfNotPresent #Always\n ports:\n - containerPort: 5000\nEOF\nkubectl apply -f helloworld.yaml -n sample\n
service/helloworld created\ndeployment.apps/helloworld-v1 created\n
-
Deploy sleep
app that we will use to connect to helloworld
app
cat <<EOF > sleep.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n name: sleep\n---\napiVersion: v1\nkind: Service\nmetadata:\n name: sleep\n labels:\n app: sleep\n service: sleep\nspec:\n ports:\n - port: 80\n name: http\n selector:\n app: sleep\n---\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n name: sleep\nspec:\n replicas: 1\n selector:\n matchLabels:\n app: sleep\n template:\n metadata:\n labels:\n app: sleep\n spec:\n terminationGracePeriodSeconds: 0\n serviceAccountName: sleep\n containers:\n - name: sleep\n image: curlimages/curl\n command: [\"/bin/sleep\", \"infinity\"]\n imagePullPolicy: IfNotPresent\n volumeMounts:\n - mountPath: /etc/sleep/tls\n name: secret-volume\n volumes:\n - name: secret-volume\n secret:\n secretName: sleep-secret\n optional: true\nEOF\nkubectl apply -f sleep.yaml -n sample\n
serviceaccount/sleep created\nservice/sleep created\ndeployment.apps/sleep created\n
-
Check all the pods in the sample
namespace
kubectl get pods -n sample\n
NAME READY STATUS RESTARTS AGE\nhelloworld-v1-b6c45f55-bx2xk 2/2 Running 0 50s\nsleep-9454cc476-p2zxr 2/2 Running 0 15s\n
-
Connect to helloworld
app from sleep
app and verify if the connection uses envoy proxy
kubectl exec -n sample -c sleep \\\n\"$(kubectl get pod -n sample -l \\\napp=sleep -o jsonpath='{.items[0].metadata.name}')\" \\\n-- curl -v helloworld.sample:5000/hello\n
* processing: helloworld.sample:5000/hello\n% Total % Received % Xferd Average Speed Time Time Time Current\n Dload Upload Total Spent Left Speed\n0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0* Trying 172.20.26.38:5000...\n* Connected to helloworld.sample (172.20.26.38) port 5000\n> GET /hello HTTP/1.1\n> Host: helloworld.sample:5000\n> User-Agent: curl/8.2.0\n> Accept: */*\n>\n< HTTP/1.1 200 OK\n< server: envoy\n< date: Fri, 21 Jul 2023 18:56:09 GMT\n< content-type: text/html; charset=utf-8\n< content-length: 58\n< x-envoy-upstream-service-time: 142\n<\n{ [58 bytes data]\n100 58 100 58 Hello version: v1, instance: helloworld-v1-b6c45f55-h592c\n0 0 392 0 --:--:-- --:--:-- --:--:-- 394\n* Connection #0 to host helloworld.sample left intact\n
"},{"location":"patterns/istio/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/karpenter/","title":"Karpenter","text":""},{"location":"patterns/karpenter/#karpenter","title":"Karpenter","text":"This pattern demonstrates how to provision Karpenter on a serverless cluster (serverless data plane) using Fargate Profiles.
"},{"location":"patterns/karpenter/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/karpenter/#validate","title":"Validate","text":"TODO
Add in validation steps
"},{"location":"patterns/karpenter/#destroy","title":"Destroy","text":"Scale down the deployment to de-provision Karpenter created resources first:
kubectl delete deployment inflate\n
terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/multi-tenancy-with-teams/","title":"Multi-Tenancy w/ Teams","text":""},{"location":"patterns/multi-tenancy-with-teams/#multi-tenancy-w-teams","title":"Multi-Tenancy w/ Teams","text":"This pattern demonstrates how to provision and configure a multi-tenancy Amazon EKS cluster with safeguards for resource consumption and namespace isolation.
This example solution provides:
- Two development teams -
team-red
and team-blue
- isolated to their respective namespaces - An admin team with privileged access to the cluster (
team-admin
)
"},{"location":"patterns/multi-tenancy-with-teams/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/multi-tenancy-with-teams/#validate","title":"Validate","text":"TODO
Add in validation steps
"},{"location":"patterns/multi-tenancy-with-teams/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/private-public-ingress/","title":"Private and Public Ingress","text":""},{"location":"patterns/private-public-ingress/#amazon-eks-private-and-public-ingress-example","title":"Amazon EKS Private and Public Ingress example","text":"This example demonstrates how to provision an Amazon EKS cluster with two ingress-nginx controllers; one to expose applications publicly and the other to expose applications internally. It also assigns security groups to the Network Load Balancers used to expose the internal and external ingress controllers.
This solution:
- Installs an ingress-nginx controller for public traffic
- Installs an ingress-nginx controller for internal traffic
To expose your application services via an Ingress
resource with this solution you can set the respective ingressClassName
as either ingress-nginx-external
or ingress-nginx-internal
.
Refer to the documentation for AWS Load Balancer controller
configuration options.
"},{"location":"patterns/private-public-ingress/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/private-public-ingress/#validate","title":"Validate","text":"TODO
Add in validation steps
"},{"location":"patterns/private-public-ingress/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/privatelink-access/","title":"PrivateLink Access","text":""},{"location":"patterns/privatelink-access/#private-eks-cluster-access-via-aws-privatelink","title":"Private EKS cluster access via AWS PrivateLink","text":"This pattern demonstrates how to access a private EKS cluster using AWS PrivateLink.
Refer to the documentation for further details on AWS PrivateLink
.
"},{"location":"patterns/privatelink-access/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/privatelink-access/#validate","title":"Validate","text":""},{"location":"patterns/privatelink-access/#network-connectivity","title":"Network Connectivity","text":"An output ssm_test
has been provided to aid in quickly testing the connectivity from the client EC2 instance to the private EKS cluster via AWS PrivateLink. Copy the output value and paste it into your terminal to execute and check the connectivity. If configured correctly, the value returned should be ok
.
COMMAND=\"curl -ks https://9A85B21811733524E3ABCDFEA8714642.gr7.us-west-2.eks.amazonaws.com/readyz\"\nCOMMAND_ID=$(aws ssm send-command --region us-west-2 \\\n--document-name \"AWS-RunShellScript\" \\\n--parameters \"commands=[$COMMAND]\" \\\n--targets \"Key=instanceids,Values=i-0a45eff73ba408575\" \\\n--query 'Command.CommandId' \\\n--output text)\naws ssm get-command-invocation --region us-west-2 \\\n--command-id $COMMAND_ID \\\n--instance-id i-0a45eff73ba408575 \\\n--query 'StandardOutputContent' \\\n--output text\n
"},{"location":"patterns/privatelink-access/#cluster-access","title":"Cluster Access","text":"To test access to the cluster, you will need to execute Kubernetes API calls from within the private network to access the cluster. An EC2 instance has been deployed into a \"client\" VPC to simulate this scenario. However, since the EKS cluster was created with your local IAM identity, the aws-auth
ConfigMap will only have your local identity that is permitted to access the cluster. Since cluster's API endpoint is private, we cannot use Terraform to reach it to add additional entries to the ConfigMap; we can only access the cluster from within the private network of the cluster's VPC or from the client VPC using AWS PrivateLink access.
Info
The \"client\" EC2 instance provided and copying of AWS credentials to that instance are merely for demonstration purposes only. Please consider alternate methods of network access such as AWS Client VPN to provide more secure access.
Perform the following steps to access the cluster with kubectl
from the provided \"client\" EC2 instance.
- Execute the command below on your local machine to get temporary credentials that will be used on the \"client\" EC2 instance:
aws sts get-session-token --duration-seconds 3600 --output yaml\n
- Start a new SSM session on the \"client\" EC2 instance using the provided
ssm_start_session
output value. Copy the output value and paste it into your terminal to execute. Your terminal will now be connected to the \"client\" EC2 instance.
aws ssm start-session --region us-west-2 --target i-0280cf604085f4a44\n
-
Once logged in, export the following environment variables from the output of step #1:
Warning
The session credentials are only valid for 1 hour; you can adjust the session duration in the command provided in step #1
export AWS_ACCESS_KEY_ID=XXXX\nexport AWS_SECRET_ACCESS_KEY=YYYY\nexport AWS_SESSION_TOKEN=ZZZZ\n
- Run the following command to update the local
~/.kube/config
file to enable access to the cluster:
aws eks update-kubeconfig --region us-west-2 --name privatelink-access\n
- Test access by listing the pods running on the cluster:
kubectl get pods -A\n
NAMESPACE NAME READY STATUS RESTARTS AGE\nkube-system aws-node-4f8g8 1/1 Running 0 1m\nkube-system coredns-6ff9c46cd8-59sqp 1/1 Running 0 1m\nkube-system coredns-6ff9c46cd8-svnpb 1/1 Running 0 2m\nkube-system kube-proxy-mm2zc 1/1 Running 0 1m\n
"},{"location":"patterns/privatelink-access/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/sso-iam-identity-center/","title":"SSO - IAM Identity Center","text":""},{"location":"patterns/sso-iam-identity-center/#iam-identity-center-single-sign-on-for-amazon-eks-cluster","title":"IAM Identity Center Single Sign-On for Amazon EKS Cluster","text":"This example demonstrates how to deploy an Amazon EKS cluster that is deployed on the AWS Cloud, integrated with IAM Identity Center (former AWS SSO) as an the Identity Provider (IdP) for Single Sign-On (SSO) authentication. The configuration for authorization is done using Kubernetes Role-based access control (RBAC).
"},{"location":"patterns/sso-iam-identity-center/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/sso-iam-identity-center/#validate","title":"Validate","text":"After the terraform
commands are executed sucessfully, check if the newly created users are active.
To do that use the link provided in the email invite - if you added a valid email address for your users either in your Terraform code or IAM Identity Center Console - or go to the IAM Identity Center Console, in the Users dashboard on the left hand side menu, then select the user, and click on Reset password button on the upper right corner. Choose the option to Generate a one-time password and share the password with the user.
With the active users, use one of the terraform output
examples to configure your AWS credentials for SSO, as shown in the examples below. After you choose the SSO registration scopes, your browser windows will appear and request to login using your IAM Identity Center username and password.
Admin user example
configure_sso_admin = <<EOT\n # aws configure sso\n SSO session name (Recommended): <SESSION_NAME>\n SSO start URL [None]: https://d-1234567890.awsapps.com/start\n SSO region [None]: us-west-2\n SSO registration scopes [sso:account:access]:\n Attempting to automatically open the SSO authorization page in your default browser.\n If the browser does not open or you wish to use a different device to authorize this request, open the following URL:\n\n https://device.sso.us-west-2.amazonaws.com/\n\n Then enter the code:\n\n The only AWS account available to you is: 123456789012\n Using the account ID 123456789012\n The only role available to you is: EKSClusterAdmin\n Using the role name EKSClusterAdmin\n CLI default client Region [us-west-2]: us-west-2\n CLI default output format [json]: json\n CLI profile name [EKSClusterAdmin-123456789012]:\n\n To use this profile, specify the profile name using --profile, as shown:\n\n aws eks --region us-west-2 update-kubeconfig --name iam-identity-center --profile EKSClusterAdmin-123456789012\n\nEOT\n
Read-only user example
configure_sso_user = <<EOT\n # aws configure sso\n SSO session name (Recommended): <SESSION_NAME>\n SSO start URL [None]: https://d-1234567890.awsapps.com/start\n SSO region [None]: us-west-2\n SSO registration scopes [sso:account:access]:\n Attempting to automatically open the SSO authorization page in your default browser.\n If the browser does not open or you wish to use a different device to authorize this request, open the following URL:\n\n https://device.sso.us-west-2.amazonaws.com/\n\n Then enter the code:\n\n The only AWS account available to you is: 123456789012\n Using the account ID 123456789012\n The only role available to you is: EKSClusterUser\n Using the role name EKSClusterUser\n CLI default client Region [us-west-2]: us-west-2\n CLI default output format [json]: json\n CLI profile name [EKSClusterUser-123456789012]:\n\n To use this profile, specify the profile name using --profile, as shown:\n\n aws eks --region us-west-2 update-kubeconfig --name iam-identity-center --profile EKSClusterUser-123456789012\n\nEOT\n
With the kubeconfig
configured, you'll be able to run kubectl
commands in your Amazon EKS Cluster with the impersonated user. The read-only user has a cluster-viewer
Kubernetes role bound to it's group, whereas the admin user, has the admin
Kubernetes role bound to it's group.
kubectl get pods -A\nNAMESPACE NAME READY STATUS RESTARTS AGE\namazon-guardduty aws-guardduty-agent-bl2v2 1/1 Running 0 3h54m\namazon-guardduty aws-guardduty-agent-sqvcx 1/1 Running 0 3h54m\namazon-guardduty aws-guardduty-agent-w8gfc 1/1 Running 0 3h54m\nkube-system aws-node-m9hmd 1/1 Running 0 3h53m\nkube-system aws-node-w42b8 1/1 Running 0 3h53m\nkube-system aws-node-wm6rm 1/1 Running 0 3h53m\nkube-system coredns-6ff9c46cd8-94jlr 1/1 Running 0 3h59m\nkube-system coredns-6ff9c46cd8-nwmrb 1/1 Running 0 3h59m\nkube-system kube-proxy-7fb86 1/1 Running 0 3h54m\nkube-system kube-proxy-p4f5g 1/1 Running 0 3h54m\nkube-system kube-proxy-qkfmc 1/1 Running 0 3h54m\n
You can also use the configure_kubectl
output to assume the Cluster creator role with cluster-admin
access.
configure_kubectl = \"aws eks --region us-west-2 update-kubeconfig --name iam-identity-center\"\n
"},{"location":"patterns/sso-iam-identity-center/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/sso-okta/","title":"SSO - Okta","text":""},{"location":"patterns/sso-okta/#okta-single-sign-on-for-amazon-eks-cluster","title":"Okta Single Sign-On for Amazon EKS Cluster","text":"This example demonstrates how to deploy an Amazon EKS cluster that is deployed on the AWS Cloud, integrated with Okta as an the Identity Provider (IdP) for Single Sign-On (SSO) authentication. The configuration for authorization is done using Kubernetes Role-based access control (RBAC).
"},{"location":"patterns/sso-okta/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/sso-okta/#validate","title":"Validate","text":"After the terraform
commands are executed sucessfully, check if the newly created users are active.
To do that use the link provided in the email invite if you added a valid email address for your users, or go to the Okta Admin Dashboard, select the user, and click on Set Password and Activate button.
With the active users, use the terraform output
example to setup your kubeconfig
profile to authenticate through Okta.
configure_kubeconfig = <<EOT\n kubectl config set-credentials oidc \\\n --exec-api-version=client.authentication.k8s.io/v1beta1 \\\n --exec-command=kubectl \\\n --exec-arg=oidc-login \\\n --exec-arg=get-token \\\n --exec-arg=--oidc-issuer-url=https://dev-ORGID.okta.com/oauth2/1234567890abcdefghij \\\n --exec-arg=--oidc-client-id=1234567890abcdefghij\n --exec-arg=--oidc-extra-scope=\"email offline_access profile openid\"\n
With the kubeconfig
configured, you'll be able to run kubectl
commands in your Amazon EKS Cluster using the --user
cli option to impersonate the Okta authenticated user. When kubectl
command is issued with the --user
option for the first time, your browser window will open and require you to authenticate.
The read-only user has a cluster-viewer
Kubernetes role bound to it's group, whereas the admin user, has the admin
Kubernetes role bound to it's group.
kubectl get pods -A\nNAMESPACE NAME READY STATUS RESTARTS AGE\namazon-guardduty aws-guardduty-agent-bl2v2 1/1 Running 0 3h54m\namazon-guardduty aws-guardduty-agent-sqvcx 1/1 Running 0 3h54m\namazon-guardduty aws-guardduty-agent-w8gfc 1/1 Running 0 3h54m\nkube-system aws-node-m9hmd 1/1 Running 0 3h53m\nkube-system aws-node-w42b8 1/1 Running 0 3h53m\nkube-system aws-node-wm6rm 1/1 Running 0 3h53m\nkube-system coredns-6ff9c46cd8-94jlr 1/1 Running 0 3h59m\nkube-system coredns-6ff9c46cd8-nwmrb 1/1 Running 0 3h59m\nkube-system kube-proxy-7fb86 1/1 Running 0 3h54m\nkube-system kube-proxy-p4f5g 1/1 Running 0 3h54m\nkube-system kube-proxy-qkfmc 1/1 Running 0 3h54m\n
You can also use the configure_kubectl
output to assume the Cluster creator role with cluster-admin
access.
configure_kubectl = \"aws eks --region us-west-2 update-kubeconfig --name okta\"\n
It's also possible to preconfigure your kubeconfig
using the okta_login
output. This will also require you to authenticate in a browser window.
okta_login = \"kubectl oidc-login setup --oidc-issuer-url=https://dev-ORGID.okta.com/oauth2/1234567890abcdefghij--oidc-client-id=1234567890abcdefghij\"\n
"},{"location":"patterns/sso-okta/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/stateful/","title":"Stateful","text":""},{"location":"patterns/stateful/#amazon-eks-cluster-for-stateful-workloads","title":"Amazon EKS Cluster for Stateful Workloads","text":""},{"location":"patterns/stateful/#features","title":"Features","text":"Please note: not all of the features listed below are required for stateful workloads on EKS. We are simply grouping together a set of features that are commonly encountered when managing stateful workloads. Users are encouraged to only enable the features that are required for their workload(s) and use case(s).
"},{"location":"patterns/stateful/#velero","title":"velero","text":"(From the project documentation) velero
(formerly Heptio Ark) gives you tools to back up and restore your Kubernetes cluster resources and persistent volumes. You can run Velero with a public cloud platform or on-premises. Velero lets you:
- Take backups of your cluster and restore in case of loss.
- Migrate cluster resources to other clusters.
- Replicate your production cluster to development and testing clusters.
"},{"location":"patterns/stateful/#ebs-efs-csi-drivers","title":"EBS & EFS CSI Drivers","text":" - A second storage class for
gp3
backed volumes has been added and made the default over the EKS default gp2
storage class (gp2
storage class remains in the cluster for use, but it is no longer the default storage class) - A standard implementation of the EFS CSI driver
"},{"location":"patterns/stateful/#eks-managed-nodegroup-w-multiple-volumes","title":"EKS Managed Nodegroup w/ Multiple Volumes","text":"An EKS managed nodegroup that utilizes multiple EBS volumes. The primary use case demonstrated in this example is a second volume that is dedicated to the containerd
runtime to ensure the root volume is not filled up nor has its I/O exhausted to ensure the instance does not reach a degraded state. The containerd
directories are mapped to this volume. You can read more about this recommendation in our EKS best practices guide and refer to the containerd
documentation for more information. The update for containerd
to use the second volume is managed through the provided user data.
In addition, the following properties are configured on the nodegroup volumes:
- EBS encryption using a customer managed key (CMK)
- Configuring the volumes to use GP3 storage
"},{"location":"patterns/stateful/#eks-managed-nodegroup-w-instance-store-volumes","title":"EKS Managed Nodegroup w/ Instance Store Volume(s)","text":"An EKS managed nodegroup that utilizes EC2 instances with ephemeral instance store(s). Instance stores are ideal for temporary storage of information that changes frequently, such as buffers, caches, scratch data, and other temporary content, or for data that is replicated across a fleet of instances. You can read more about instance stores in the AWS documentation; and be sure to check out the Block device mapping instance store caveats
section as well which covers why the example has provided user data for mounting the instance store(s). The size and number of instance stores will vary based on the EC2 instance type and class.
In addition, the following properties are configured on the nodegroup volumes:
- EBS encryption using a customer managed key (CMK)
- Configuring the volumes to use GP3 storage
"},{"location":"patterns/stateful/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/stateful/#validate","title":"Validate","text":"For validating velero
see here
The following command will update the kubeconfig
on your local machine and allow you to interact with your EKS Cluster using kubectl
to validate the deployment.
-
List the storage classes to view that efs
, gp2
, and gp3
classes are present and gp3
is the default storage class
kubectl get storageclasses\n
NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE\nefs efs.csi.aws.com Delete Immediate true 2m19s\ngp2 kubernetes.io/aws-ebs Delete WaitForFirstConsumer false 15m\ngp3 (default) ebs.csi.aws.com Delete WaitForFirstConsumer true 2m19s\n
-
From an instance launched with instance store(s), check that the instance store has been mounted correctly. To verify, first install the nvme-cli
tool and then use it to verify. To verify, you can access the instance using SSM Session Manager:
# Install the nvme-cli tool\nsudo yum install nvme-cli -y\n\n# Show NVMe volumes attached\nsudo nvme list\n
# Notice the model is `EC2 NVMe Instance Storage` for the instance store\nNode SN Model Namespace Usage Format FW Rev\n---------------- -------------------- ---------------------------------------- --------- -------------------------- ---------------- --------\n/dev/nvme0n1 vol0546d3c3b0af0bf6d Amazon Elastic Block Store 1 25.77 GB / 25.77 GB 512 B + 0 B 1.0\n/dev/nvme1n1 AWS24BBF51AF55097008 Amazon EC2 NVMe Instance Storage 1 75.00 GB / 75.00 GB 512 B + 0 B 0\n\n# Show disks, their partitions and mounts\nsudo lsblk\n\n# Output should look like below\nNAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT\nnvme0n1 259:0 0 24G 0 disk\n\u251c\u2500nvme0n1p1 259:2 0 24G 0 part /\n\u2514\u2500nvme0n1p128 259:3 0 1M 0 part\nnvme1n1 259:1 0 69.9G 0 disk /local1 # <--- this is the instance store\n
-
From an instance launched with multiple volume(s), check that the instance store has been mounted correctly. To verify, first install the nvme-cli
tool and then use it to verify. To verify, you can access the instance using SSM Session Manager:
# Install the nvme-cli tool\nsudo yum install nvme-cli -y\n\n# Show NVMe volumes attached\nsudo nvme list\n
# /dev/nvme0n1 is the root volume and /dev/nvme1n1 is the second, additional volume\nNode SN Model Namespace Usage Format FW Rev\n---------------- -------------------- ---------------------------------------- --------- -------------------------- ---------------- --------\n/dev/nvme0n1 vol0cd37dab9e4a5c184 Amazon Elastic Block Store 1 68.72 GB / 68.72 GB 512 B + 0 B 1.0\n/dev/nvme1n1 vol0ad3629c159ee869c Amazon Elastic Block Store 1 25.77 GB / 25.77 GB 512 B + 0 B 1.0\n
-
From the same instance used in step 4, check that the containerd directories are using the second /dev/nvme1n1
volume:
df /var/lib/containerd/\n
# Output should look like below, which shows the directory on the\n# /dev/nvme1n1 volume and NOT on /dev/nvme0n1 (root volume)\nFilesystem 1K-blocks Used Available Use% Mounted on\n/dev/nvme1n1 24594768 2886716 20433380 13% /var/lib/containerd\n
df /run/containerd/\n
# Output should look like below, which shows the directory on the\n# /dev/nvme1n1 volume and NOT on /dev/nvme0n1 (root volume)\nFilesystem 1K-blocks Used Available Use% Mounted on\n/dev/nvme1n1 24594768 2886716 20433380 13% /run/containerd\n
"},{"location":"patterns/stateful/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/tls-with-aws-pca-issuer/","title":"TLS w/ AWS PCA Issuer","text":""},{"location":"patterns/tls-with-aws-pca-issuer/#tls-with-aws-pca-issuer","title":"TLS with AWS PCA Issuer","text":"This pattern demonstrates how to enable TLS with AWS PCA issuer on an Amazon EKS cluster.
"},{"location":"patterns/tls-with-aws-pca-issuer/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/tls-with-aws-pca-issuer/#validate","title":"Validate","text":" -
List all the pods running in aws-privateca-issuer
and cert-manager
Namespace.
kubectl get pods -n aws-privateca-issuer\nkubectl get pods -n cert-manager\n
-
View the certificate
status in the default
Namespace. It should be in Ready
state, and be pointing to a secret
created in the same Namespace.
kubectl get certificate -o wide\n
NAME READY SECRET ISSUER STATUS AGE\nexample True example-clusterissuer tls-with-aws-pca-issuer Certificate is up to date and has not expired 41m\n
kubectl get secret example-clusterissuer\n
NAME TYPE DATA AGE\nexample-clusterissuer kubernetes.io/tls 3 43m\n
"},{"location":"patterns/tls-with-aws-pca-issuer/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"patterns/wireguard-with-cilium/","title":"Wireguard /w Cilium","text":""},{"location":"patterns/wireguard-with-cilium/#transparent-encryption-with-cilium-and-wireguard","title":"Transparent Encryption with Cilium and Wireguard","text":"This pattern demonstrates Cilium configured in CNI chaining mode with VPC CNI and with Wireguard transparent encryption enabled on an Amazon EKS cluster.
- Cilium CNI Chaining Documentation
- Cilium Wireguard Encryption Documentation
"},{"location":"patterns/wireguard-with-cilium/#deploy","title":"Deploy","text":"See here for the prerequisites and steps to deploy this pattern.
"},{"location":"patterns/wireguard-with-cilium/#validate","title":"Validate","text":" -
List the daemonsets
kubectl get ds -n kube-system\n
NAME DESIRED CURRENT READY UP-TO-DATE AVAILABLE NODE SELECTOR AGE\naws-node 2 2 2 2 2 <none> 156m\ncilium 2 2 2 2 2 kubernetes.io/os=linux 152m\nkube-proxy 2 2 2 2 2 <none> 156m\n
-
Open a shell inside the cilium container
kubectl -n kube-system exec -ti ds/cilium -- bash\n
-
Verify Encryption is enabled
cilium status | grep Encryption\n
Encryption: Wireguard [cilium_wg0 (Pubkey: b2krgbHgaCsVWALMnFLiS/RekhhcE36PXEjQ7T8+mW0=, Port: 51871, Peers: 1)]\n
-
Install tcpdump
apt-get update\napt-get install -y tcpdump\n
-
Start a packet capture on cilium_wg0
and verify you see payload in clear text, it means the traffic is encrypted with wireguard
tcpdump -A -c 40 -i cilium_wg0 | grep \"Welcome to nginx!\"\n
<title>Welcome to nginx!</title>\n<h1>Welcome to nginx!</h1>\n...\n\n40 packets captured\n40 packets received by filter\n0 packets dropped by kernel\n
"},{"location":"patterns/wireguard-with-cilium/#destroy","title":"Destroy","text":"terraform destroy -target=\"module.eks_blueprints_addons\" -auto-approve\nterraform destroy -target=\"module.eks\" -auto-approve\nterraform destroy -auto-approve\n
See here for more details on cleaning up the resources created.
"},{"location":"snippets/ipv4-prefix-delegation/","title":"IPv4 Prefix Delegation","text":"The configuration snippet below shows how to enable prefix delegation to increase the number of available IP addresses on the provisioned EC2 nodes.
- Documentation
- Blog post
"},{"location":"snippets/ipv4-prefix-delegation/#vpc-cni-configuration","title":"VPC CNI Configuration","text":"In this example, the vpc-cni
addon is configured using before_compute = true
. This is done to ensure the vpc-cni
is created and updated before any EC2 instances are created so that the desired settings have applied before they will be referenced. With this configuration, you will now see that nodes created will have --max-pods 110
configured do to the use of prefix delegation being enabled on the vpc-cni
.
If you find that your nodes are not being created with the correct number of max pods (i.e. - for m5.large
, if you are seeing a max pods of 29 instead of 110), most likely the vpc-cni
was not configured before the EC2 instances.
module \"eks\" {\nsource = \"terraform-aws-modules/eks/aws\"\n# Truncated for brevity\n...\ncluster_addons = {\nvpc-cni = {\nbefore_compute = true\nmost_recent = true # To ensure access to the latest settings provided\nconfiguration_values = jsonencode({\nenv = {\nENABLE_PREFIX_DELEGATION = \"true\"\nWARM_PREFIX_TARGET = \"1\"\n}\n})\n}\n}\n...\n}\n
When enabled, inspect one of the aws-node-*
(AWS VPC CNI) pods to ensure prefix delegation is enabled and warm prefix target is 1:
kubectl describe ds -n kube-system aws-node | grep ENABLE_PREFIX_DELEGATION: -A 3\n
Output should look similar to below (truncated for brevity):
ENABLE_PREFIX_DELEGATION: true # <- this should be set to true\nWARM_ENI_TARGET: 1\nWARM_PREFIX_TARGET: 1 # <- this should be set to 1\n...\n
"},{"location":"snippets/vpc-cni-custom-networking/","title":"VPC CNI Custom Networking","text":"Custom networking addresses the IP exhaustion issue by assigning the node and Pod IPs from secondary VPC address spaces (CIDR). Custom networking support supports ENIConfig custom resource. The ENIConfig includes an alternate subnet CIDR range (carved from a secondary VPC CIDR), along with the security group(s) that the Pods will belong to. When custom networking is enabled, the VPC CNI creates secondary ENIs in the subnet defined under ENIConfig. The CNI assigns Pods an IP addresses from a CIDR range defined in a ENIConfig CRD.
Since the primary ENI is not used by custom networking, the maximum number of Pods you can run on a node is lower. The host network Pods continue to use IP address assigned to the primary ENI. Additionally, the primary ENI is used to handle source network translation and route Pods traffic outside the node.
- Documentation
- Best Practices Guide
"},{"location":"snippets/vpc-cni-custom-networking/#vpc-cni-configuration","title":"VPC CNI Configuration","text":"In this example, the vpc-cni
addon is configured using before_compute = true
. This is done to ensure the vpc-cni
is created and updated before any EC2 instances are created so that the desired settings have applied before they will be referenced. With this configuration, you will now see that nodes created will have --max-pods 110
configured do to the use of prefix delegation being enabled on the vpc-cni
.
If you find that your nodes are not being created with the correct number of max pods (i.e. - for m5.large
, if you are seeing a max pods of 29 instead of 110), most likely the vpc-cni
was not configured before the EC2 instances.
"},{"location":"snippets/vpc-cni-custom-networking/#components","title":"Components","text":"To enable VPC CNI custom networking, you must configuring the following components:
-
Create a VPC with additional CIDR block associations. These additional CIDR blocks will be used to create subnets for the VPC CNI custom networking:
module \"vpc\" {\nsource = \"terraform-aws-modules/vpc/aws\"\n# Truncated for brevity\n...\nsecondary_cidr_blocks = [local.secondary_vpc_cidr] # can add up to 5 total CIDR blocks\nazs = local.azs\nprivate_subnets = concat(\n[for k, v in local.azs : cidrsubnet(local.vpc_cidr, 4, k)],\n[for k, v in local.azs : cidrsubnet(local.secondary_vpc_cidr, 2, k)]\n)\n...\n}\n
-
Specify the VPC CNI custom networking configuration in the vpc-cni
addon configuration:
module \"eks\" {\nsource = \"terraform-aws-modules/eks/aws\"\n# Truncated for brevity\n...\ncluster_addons = {\nvpc-cni = {\nbefore_compute = true\nmost_recent = true # To ensure access to the latest settings provided\nconfiguration_values = jsonencode({\nenv = {\nAWS_VPC_K8S_CNI_CUSTOM_NETWORK_CFG = \"true\"\nENI_CONFIG_LABEL_DEF = \"topology.kubernetes.io/zone\"\n})\n}\n}\n...\n}\n
-
Create the ENIConfig
custom resource for each subnet that you want to deploy pods into:
resource \"kubectl_manifest\" \"eni_config\" {\nfor_each = zipmap(local.azs, slice(module.vpc.private_subnets, 3, 6))\nyaml_body = yamlencode({\napiVersion = \"crd.k8s.amazonaws.com/v1alpha1\"\nkind = \"ENIConfig\"\nmetadata = {\nname = each.key\n}\nspec = {\nsecurityGroups = [\nmodule.eks.node_security_group_id,\n]\nsubnet = each.value\n}\n})\n}\n
Once those settings have been successfully applied, you can verify if custom networking is enabled correctly by inspecting one of the aws-node-*
(AWS VPC CNI) pods:
kubectl describe pod aws-node-ttg4h -n kube-system\n\n# Output should look similar below (truncated for brevity)\nEnvironment:\n ADDITIONAL_ENI_TAGS: {}\nAWS_VPC_CNI_NODE_PORT_SUPPORT: true\nAWS_VPC_ENI_MTU: 9001\nAWS_VPC_K8S_CNI_CONFIGURE_RPFILTER: false\nAWS_VPC_K8S_CNI_CUSTOM_NETWORK_CFG: true # <- this should be set to true\nAWS_VPC_K8S_CNI_EXTERNALSNAT: false\nAWS_VPC_K8S_CNI_LOGLEVEL: DEBUG\n ...\n
"},{"location":"v4-to-v5/addons/","title":"Migrate to EKS Blueprints Addons Module","text":"Please consult the docs/v4-to-v5/example directory for reference configurations. If you find a bug, please open an issue with supporting configuration to reproduce.
"},{"location":"v4-to-v5/addons/#this-guide-is-under-active-development","title":"\u26a0\ufe0f This guide is under active development.","text":""},{"location":"v4-to-v5/addons/#list-of-backwards-incompatible-changes","title":"List of backwards incompatible changes","text":"-
"},{"location":"v4-to-v5/addons/#additional-changes","title":"Additional changes","text":""},{"location":"v4-to-v5/addons/#added","title":"Added","text":"-
"},{"location":"v4-to-v5/addons/#modified","title":"Modified","text":"-
"},{"location":"v4-to-v5/addons/#removed","title":"Removed","text":"-
"},{"location":"v4-to-v5/addons/#variable-and-output-changes","title":"Variable and output changes","text":" -
Removed variables:
-
-
Renamed variables:
-
-
Added variables:
-
-
Removed outputs:
-
-
Renamed outputs:
-
-
Added outputs:
-
"},{"location":"v4-to-v5/addons/#upgrade-migrations","title":"Upgrade Migrations","text":""},{"location":"v4-to-v5/addons/#before-v4x-example","title":"Before - v4.x Example","text":"module \"eks_blueprints_addons\" {\nsource = \"github.com/aws-ia/terraform-aws-eks-blueprints//modules/kubernetes-addons?ref=v4.32.1\"\neks_cluster_id = module.eks.cluster_name\neks_cluster_endpoint = module.eks.cluster_endpoint\neks_oidc_provider = module.eks.oidc_provider\neks_cluster_version = module.eks.cluster_version\n # TODO\n}\n
"},{"location":"v4-to-v5/addons/#after-v5x-example","title":"After - v5.x Example","text":"module \"eks_blueprints_addons\" {\nsource = \"aws-ia/eks-blueprints-addons/aws\"\nversion = \"~> 1.0\"\ncluster_name = module.eks.cluster_name\ncluster_endpoint = module.eks.cluster_endpoint\ncluster_version = module.eks.cluster_version\noidc_provider_arn = module.eks.oidc_provider_arn\n # TODO\n}\n
"},{"location":"v4-to-v5/addons/#diff-of-before-vs-after","title":"Diff of Before vs After","text":"module \"eks_blueprints_addons\" {\n- source = \"github.com/aws-ia/terraform-aws-eks-blueprints//modules/kubernetes-addons?ref=v4.32.1\"\n+ source = \"aws-ia/eks-blueprints-addons/aws\"\n+ version = \"~> 1.0\"\n # TODO\n}\n
"},{"location":"v4-to-v5/addons/#state-move-commands","title":"State Move Commands","text":"In conjunction with the changes above, users can elect to move their external capacity provider(s) under this module using the following move command. Command is shown using the values from the example shown above, please update to suit your configuration names:
terraform state mv 'xxx' 'yyy'\n
"},{"location":"v4-to-v5/cluster/","title":"Migrate to EKS Module v19.x","text":"Please consult the docs/v4-to-v5/example directory for reference configurations. If you find a bug, please open an issue with supporting configuration to reproduce.
"},{"location":"v4-to-v5/cluster/#backwards-incompatible-changes","title":"Backwards incompatible changes","text":" - The cluster module provided in EKS Blueprints is being removed entirely from the project. Instead, users are encouraged to use the
terraform-aws-eks
module for creating and managing their EKS cluster in Terraform. - The KMS module provided in EKS Blueprints has been removed. Users can leverage the KMS creation/management functionality provided by the
terraform-aws-eks
module or utilize the standalone terraform-aws-kms
module. - The EMR on EKS module provided in EKS Blueprints has been removed. Instead, users are encouraged to use the
terraform-aws-emr
virtual cluster sub-module for creating and managing their EMR on EKS virtual cluster in Terraform. - The teams multi-tenancy module provided in EKS Blueprints has been removed. Instead, users are encouraged to use the
terraform-aws-eks-blueprints-teams
module for creating and managing their multi-tenancy constructions within their EKS clusters in Terraform.
"},{"location":"v4-to-v5/cluster/#additional-changes","title":"Additional changes","text":""},{"location":"v4-to-v5/cluster/#added","title":"Added","text":" - N/A
"},{"location":"v4-to-v5/cluster/#modified","title":"Modified","text":" - N/A
"},{"location":"v4-to-v5/cluster/#removed","title":"Removed","text":" - All noted above under
Backwards incompatible changes
"},{"location":"v4-to-v5/cluster/#variable-and-output-changes","title":"Variable and output changes","text":"Since the change is to replace the EKS Blueprints cluster module with the terraform-aws-eks
module, there aren't technically any variable or output changes other than their removal. Please consult the terraform-aws-eks
module for its respective variables/outputs.
-
Removed variables:
- All
-
Renamed variables:
- None
-
Added variables:
- None
-
Removed outputs:
- All
-
Renamed outputs:
- None
-
Added outputs:
- None
"},{"location":"v4-to-v5/cluster/#upgrade-migrations","title":"Upgrade Migrations","text":""},{"location":"v4-to-v5/cluster/#before-v432-example","title":"Before - v4.32 Example","text":"module \"eks\" {\nsource = \"github.com/aws-ia/terraform-aws-eks-blueprints?ref=v4.32.1\"\ncluster_name = local.name\ncluster_version = \"1.26\"\nvpc_id = module.vpc.vpc_id\nprivate_subnet_ids = module.vpc.private_subnets\ncluster_endpoint_private_access = true\nmap_roles = [\n{\nrolearn = data.aws_caller_identity.current.arn\nusername = \"me\"\ngroups = [\"system:masters\"]\n},\n]\nmanaged_node_groups = {\nmanaged = {\nnode_group_name = \"managed\"\ninstance_types = [\"m5.large\"]\nmin_size = 1\nmax_size = 2\ndesired_size = 1\nk8s_labels = {\nWhich = \"managed\"\n}\n}\n}\nfargate_profiles = {\nfargate = {\nfargate_profile_name = \"fargate\"\nfargate_profile_namespaces = [{\nnamespace = \"default\"\nk8s_labels = {\nWhich = \"fargate\"\n}\n}]\nsubnet_ids = module.vpc.private_subnets\n}\n}\nself_managed_node_groups = {\nself_managed = {\nnode_group_name = \"self_managed\"\ninstance_type = \"m5.large\"\nlaunch_template_os = \"amazonlinux2eks\"\nmin_size = 1\nmax_size = 2\ndesired_size = 1\nk8s_labels = {\nWhich = \"self-managed\"\n}\n}\n}\ntags = {\nBlueprint = local.name\nGithubRepo = \"github.com/aws-ia/terraform-aws-eks-blueprints\"\n}\n}\n
"},{"location":"v4-to-v5/cluster/#after-v50-example","title":"After - v5.0 Example","text":"Any of the values that are marked with # Backwards compat
are provided to demonstrate configuration level changes to reduce the number of Terraform changes when migrating to the EKS module.
module \"eks\" {\nsource = \"terraform-aws-modules/eks/aws\"\nversion = \"~> 19.13\"\ncluster_name = local.name\ncluster_version = \"1.26\"\ncluster_endpoint_public_access = true # Backwards compat\ncluster_enabled_log_types = [\"api\", \"audit\", \"authenticator\", \"controllerManager\", \"scheduler\"] # Backwards compat\niam_role_name = \"${local.name}-cluster-role\" # Backwards compat\niam_role_use_name_prefix = false # Backwards compat\nkms_key_aliases = [local.name] # Backwards compat\nvpc_id = module.vpc.vpc_id\nsubnet_ids = module.vpc.private_subnets\nmanage_aws_auth_configmap = true\naws_auth_roles = [\n{\nrolearn = data.aws_caller_identity.current.arn\nusername = \"me\"\ngroups = [\"system:masters\"]\n},\n]\neks_managed_node_groups = {\nmanaged = {\niam_role_name = \"${local.name}-managed\" # Backwards compat\niam_role_use_name_prefix = false # Backwards compat\nuse_custom_launch_template = false # Backwards compat\ninstance_types = [\"m5.large\"]\nmin_size = 1\nmax_size = 2\ndesired_size = 1\nlabels = {\nWhich = \"managed\"\n}\n}\n}\nfargate_profiles = {\nfargate = {\niam_role_name = \"${local.name}-fargate\" # Backwards compat\niam_role_use_name_prefix = false # Backwards compat\nselectors = [{\nnamespace = \"default\"\nlabels = {\nWhich = \"fargate\"\n}\n}]\n}\n}\nself_managed_node_groups = {\nself_managed = {\nname = \"${local.name}-self_managed\" # Backwards compat\nuse_name_prefix = false # Backwards compat\niam_role_name = \"${local.name}-self_managed\" # Backwards compat\niam_role_use_name_prefix = false # Backwards compat\nlaunch_template_name = \"self_managed-${local.name}\" # Backwards compat\nlaunch_template_use_name_prefix = false # Backwards compat\ninstance_type = \"m5.large\"\nmin_size = 1\nmax_size = 2\ndesired_size = 1\nlabels = {\nWhich = \"self-managed\"\n}\n}\n}\ntags = {\nBlueprint = local.name\nGithubRepo = \"github.com/aws-ia/terraform-aws-eks-blueprints\"\n}\n}\n
"},{"location":"v4-to-v5/cluster/#diff-of-before-vs-after","title":"Diff of Before vs After","text":"module \"eks\" {\n- source = \"github.com/aws-ia/terraform-aws-eks-blueprints?ref=v4.32.1\"\n+ source = \"terraform-aws-modules/eks/aws\"\n+ version = \"~> 19.13\"\n cluster_name = local.name\n cluster_version = \"1.26\"\n\n vpc_id = module.vpc.vpc_id\n private_subnet_ids = module.vpc.private_subnets\n+ cluster_endpoint_public_access = true\n- cluster_endpoint_private_access = true\n- map_roles = [\n+ aws_auth_roles = [\n {\n rolearn = data.aws_caller_identity.current.arn\n username = \"me\"\n groups = [\"system:masters\"]\n },\n ]\n\n- managed_node_groups = {\n+ eks_managed_node_groups = {\n managed = {\n- node_group_name = \"managed\"\n instance_types = [\"m5.large\"]\n\n min_size = 1\n max_size = 2\n desired_size = 1\n\n- k8s_labels = {\n+ labels = {\n Which = \"managed\"\n }\n }\n }\n\n fargate_profiles = {\n fargate = {\n- fargate_profile_name = \"fargate\"\n- fargate_profile_namespaces = [{\n+ selectors = [{\n namespace = \"default\"\n\n- k8s_labels = {\n+ labels = {\n Which = \"fargate\"\n }\n }]\n- subnet_ids = module.vpc.private_subnets\n }\n }\n\n self_managed_node_groups = {\n self_managed = {\n- node_group_name = \"self_managed\"\n instance_type = \"m5.large\"\n- launch_template_os = \"amazonlinux2eks\"\n min_size = 1\n max_size = 2\n desired_size = 1\n\n- k8s_labels = {\n+ labels = {\n Which = \"self-managed\"\n }\n }\n }\n\n tags = {\n Blueprint = local.name\n GithubRepo = \"github.com/aws-ia/terraform-aws-eks-blueprints\"\n }\n}\n
"},{"location":"v4-to-v5/cluster/#state-move-commands","title":"State Move Commands","text":"The following Terraform state move commands are provided to aid in migrating the control plane and data plane components.
# This is not removing the configmap from the cluster -\n# it will be adopted by the new module\nterraform state rm 'module.eks.kubernetes_config_map.aws_auth[0]'\n# Cluster\nterraform state mv 'module.eks.module.aws_eks.aws_eks_cluster.this[0]' 'module.eks.aws_eks_cluster.this[0]'\n# Cluster IAM role\nterraform state mv 'module.eks.module.aws_eks.aws_iam_role.this[0]' 'module.eks.aws_iam_role.this[0]'\nterraform state mv 'module.eks.module.aws_eks.aws_iam_role_policy_attachment.this[\"arn:aws:iam::aws:policy/AmazonEKSClusterPolicy\"]' 'module.eks.aws_iam_role_policy_attachment.this[\"AmazonEKSClusterPolicy\"]'\nterraform state mv 'module.eks.module.aws_eks.aws_iam_role_policy_attachment.this[\"arn:aws:iam::aws:policy/AmazonEKSVPCResourceController\"]' 'module.eks.aws_iam_role_policy_attachment.this[\"AmazonEKSVPCResourceController\"]'\n# Cluster primary security group tags\n# Note: This will depend on the tags applied to the module - here we\n# are demonstrating the two tags used in the configuration above\nterraform state mv 'module.eks.module.aws_eks.aws_ec2_tag.cluster_primary_security_group[\"Blueprint\"]' 'module.eks.aws_ec2_tag.cluster_primary_security_group[\"Blueprint\"]'\nterraform state mv 'module.eks.module.aws_eks.aws_ec2_tag.cluster_primary_security_group[\"GithubRepo\"]' 'module.eks.aws_ec2_tag.cluster_primary_security_group[\"GithubRepo\"]'\n# Cluster security group\nterraform state mv 'module.eks.module.aws_eks.aws_security_group.cluster[0]' 'module.eks.aws_security_group.cluster[0]'\n# Cluster security group rules\nterraform state mv 'module.eks.module.aws_eks.aws_security_group_rule.cluster[\"ingress_nodes_443\"]' 'module.eks.aws_security_group_rule.cluster[\"ingress_nodes_443\"]'\n# Node security group\nterraform state mv 'module.eks.module.aws_eks.aws_security_group.node[0]' 'module.eks.aws_security_group.node[0]'\n# Node security group rules\nterraform state mv 'module.eks.module.aws_eks.aws_security_group_rule.node[\"ingress_cluster_443\"]' 'module.eks.aws_security_group_rule.node[\"ingress_cluster_443\"]'\nterraform state mv 'module.eks.module.aws_eks.aws_security_group_rule.node[\"ingress_cluster_kubelet\"]' 'module.eks.aws_security_group_rule.node[\"ingress_cluster_kubelet\"]'\nterraform state mv 'module.eks.module.aws_eks.aws_security_group_rule.node[\"ingress_self_coredns_tcp\"]' 'module.eks.aws_security_group_rule.node[\"ingress_self_coredns_tcp\"]'\nterraform state mv 'module.eks.module.aws_eks.aws_security_group_rule.node[\"ingress_self_coredns_udp\"]' 'module.eks.aws_security_group_rule.node[\"ingress_self_coredns_udp\"]'\n# OIDC provider\nterraform state mv 'module.eks.module.aws_eks.aws_iam_openid_connect_provider.oidc_provider[0]' 'module.eks.aws_iam_openid_connect_provider.oidc_provider[0]'\n# Fargate profile(s)\n# Note: This demonstrates migrating one profile that is stored under the\n# key `fargate` in the module definition. The same set of steps would\n# need to be performed for each profile, changing only the key name\nterraform state mv 'module.eks.module.aws_eks_fargate_profiles[\"fargate\"].aws_eks_fargate_profile.eks_fargate' 'module.eks.module.fargate_profile[\"fargate\"].aws_eks_fargate_profile.this[0]'\nterraform state mv 'module.eks.module.aws_eks_fargate_profiles[\"fargate\"].aws_iam_role.fargate[0]' 'module.eks.module.fargate_profile[\"fargate\"].aws_iam_role.this[0]'\nterraform state mv 'module.eks.module.aws_eks_fargate_profiles[\"fargate\"].aws_iam_role_policy_attachment.fargate_pod_execution_role_policy[\"arn:aws:iam::aws:policy/AmazonEKSFargatePodExecutionRolePolicy\"]' 'module.eks.module.fargate_profile[\"fargate\"].aws_iam_role_policy_attachment.this[\"arn:aws:iam::aws:policy/AmazonEKSFargatePodExecutionRolePolicy\"]'\n# Managed nodegroup(s)\n# Note: This demonstrates migrating one nodegroup that is stored under the\n# key `managed` in the module definition. The same set of steps would\n# need to be performed for each nodegroup, changing only the key name\nterraform state mv 'module.eks.module.aws_eks_managed_node_groups[\"managed\"].aws_eks_node_group.managed_ng' 'module.eks.module.eks_managed_node_group[\"managed\"].aws_eks_node_group.this[0]'\nterraform state mv 'module.eks.module.aws_eks_managed_node_groups[\"managed\"].aws_iam_role.managed_ng[0]' 'module.eks.module.eks_managed_node_group[\"managed\"].aws_iam_role.this[0]'\nterraform state mv 'module.eks.module.aws_eks_managed_node_groups[\"managed\"].aws_iam_role_policy_attachment.managed_ng[\"arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly\"]' 'module.eks.module.eks_managed_node_group[\"managed\"].aws_iam_role_policy_attachment.this[\"arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly\"]'\nterraform state mv 'module.eks.module.aws_eks_managed_node_groups[\"managed\"].aws_iam_role_policy_attachment.managed_ng[\"arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy\"]' 'module.eks.module.eks_managed_node_group[\"managed\"].aws_iam_role_policy_attachment.this[\"arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy\"]'\nterraform state mv 'module.eks.module.aws_eks_managed_node_groups[\"managed\"].aws_iam_role_policy_attachment.managed_ng[\"arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy\"]' 'module.eks.module.eks_managed_node_group[\"managed\"].aws_iam_role_policy_attachment.this[\"arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy\"]'\n# Self-managed nodegroup(s)\n# Note: This demonstrates migrating one nodegroup that is stored under the\n# key `self_managed` in the module definition. The same set of steps would\n# need to be performed for each nodegroup, changing only the key name\nterraform state mv 'module.eks.module.aws_eks_self_managed_node_groups[\"self_managed\"].aws_autoscaling_group.self_managed_ng' 'module.eks.module.self_managed_node_group[\"self_managed\"].aws_autoscaling_group.this[0]'\nterraform state mv 'module.eks.module.aws_eks_self_managed_node_groups[\"self_managed\"].aws_iam_instance_profile.self_managed_ng[0]' 'module.eks.module.self_managed_node_group[\"self_managed\"].aws_iam_instance_profile.this[0]'\nterraform state mv 'module.eks.module.aws_eks_self_managed_node_groups[\"self_managed\"].aws_iam_role.self_managed_ng[0]' 'module.eks.module.self_managed_node_group[\"self_managed\"].aws_iam_role.this[0]'\nterraform state mv 'module.eks.module.aws_eks_self_managed_node_groups[\"self_managed\"].aws_iam_role_policy_attachment.self_managed_ng[\"arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly\"]' 'module.eks.module.self_managed_node_group[\"self_managed\"].aws_iam_role_policy_attachment.this[\"arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly\"]'\nterraform state mv 'module.eks.module.aws_eks_self_managed_node_groups[\"self_managed\"].aws_iam_role_policy_attachment.self_managed_ng[\"arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy\"]' 'module.eks.module.self_managed_node_group[\"self_managed\"].aws_iam_role_policy_attachment.this[\"arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy\"]'\nterraform state mv 'module.eks.module.aws_eks_self_managed_node_groups[\"self_managed\"].aws_iam_role_policy_attachment.self_managed_ng[\"arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy\"]' 'module.eks.module.self_managed_node_group[\"self_managed\"].aws_iam_role_policy_attachment.this[\"arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy\"]'\nterraform state mv 'module.eks.module.aws_eks_self_managed_node_groups[\"self_managed\"].module.launch_template_self_managed_ng.aws_launch_template.this[\"self-managed-node-group\"]' 'module.eks.module.self_managed_node_group[\"self_managed\"].aws_launch_template.this[0]'\n# Secrets KMS key\nterraform state mv ' module.eks.module.kms[0].aws_kms_key.this' 'module.eks.module.kms.aws_kms_key.this[0]'\nterraform state mv 'module.eks.module.kms[0].aws_kms_alias.this' 'module.eks.module.kms.aws_kms_alias.this[\"migration\"]'\n# Cloudwatch Log Group\nterraform import 'module.eks.aws_cloudwatch_log_group.this[0]' /aws/eks/migration/cluster\n
"},{"location":"v4-to-v5/cluster/#removed-resources","title":"Removed Resources","text":"The following resources will be destroyed when migrating from EKS Blueprints v4.32.1 cluster to the v19.x EKS cluster:
module.eks.module.aws_eks_managed_node_groups[\"managed\"].aws_iam_instance_profile.managed_ng[0]\n
- It is not directly used and was intended to be used by Karpenter. The https://github.com/aws-ia/terraform-aws-eks-blueprints-addons module provides its own resource for creating an IAM instance profile for Karpenter
module.eks.module.aws_eks_managed_node_groups[\"managed\"].aws_iam_role_policy_attachment.managed_ng[\"arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore\"]\n
- IAM policy is not required by EKS - users can re-add this policy at their discretion
module.eks.module.aws_eks_fargate_profiles[\"fargate\"].aws_iam_policy.cwlogs[0]\nmodule.eks.module.aws_eks_fargate_profiles[\"fargate\"].aws_iam_role_policy_attachment.cwlogs[0]\n
- Policy is not required by EKS
module.eks.module.aws_eks_self_managed_node_groups[\"self_managed\"].aws_iam_role_policy_attachment.self_managed_ng[\"arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore\"]\n
- IAM policy is not required by EKS - users can re-add this policy at their discretion
"},{"location":"v4-to-v5/motivation/","title":"Direction for v5 of Terraform EKS Blueprints","text":""},{"location":"v4-to-v5/motivation/#what-has-worked","title":"What Has Worked","text":" -
EKS Blueprints was started to make it easier for customers to adopt Amazon Elastic Kubernetes Service (EKS) in a shorter period of time. The project has been quite successful in this regard - hearing from customers stating that EKS Blueprints has helped them get from zero to one or more clusters running with applications in less than 1-2 weeks.
-
EKS Blueprints has also been successful in providing working examples to users that demonstrate common architectural patterns and workload solutions. Some popular examples include:
- Spark on EKS
- Karpenter on EKS Fargate
- Transparent encryption with Wireguard and Cilium
- Fully serverless cluster with EKS Fargate
"},{"location":"v4-to-v5/motivation/#what-has-not","title":"What Has Not","text":" -
Scaling and managing addons that are created through EKS Blueprints. With almost 1,200 projects on the CNCF roadmap, the number of various ways and methods that a project allows for deploying onto a cluster (i.e. - Datadog offers 5 different Helm charts for its service, Prometheus hosts over 30 Helm charts for its services), as well as the number of different tools used to provision addons (i.e. - Terraform, ArgoCD, FluxCD, etc.), supporting both the number of addons and their different forms has been extremely challenging for the team. In addition to managing just the sheer number of addons, supporting the different configurations that users wish to have exposed in conjunction with testing and validating those various configurations is only compounded by the number of addons and their methods of creation.
-
Managing resources provisioned on the cluster using Terraform. Terraform is a fantastic tool for provisioning infrastructure and it is the tool of choice for many customers when it comes to creating resources in AWS. However, there are a number of downsides with Terraform when it comes to provisioning resources on a Kubernetes cluster. These include:
-
Ordering of dependencies when relationships live outside of Terraform's HCL syntax. Terraform wants to evaluate the current state of what it controls and be able to plan a series of actions to align the current state with the desired state in one action. It does this once for each terraform plan
or terraform apply
, and if any issues are encountered, it simply fails and halts execution. When Terraform cannot infer the ordering of dependencies across resources (i.e. - through passing outputs of parent resources to arguments of child resources using the Terraform <resource>.<name>.<attribute>
syntax), it will view this as no relationship between the resources and attempt to execute their provisioning in parallel and asynchronously. Any resources that are left waiting for a dependency will eventually timeout and fail, causing Terraform itself to timeout and fail the apply. This is where the reconciliation loop of a Kubernetes controller or operator on the cluster is better suited - continuously trying to reconcile the state over and over again as dependencies are eventually resolved. (To be clear - the issue of dependency ordering still exists, but the controller/operator will keep retrying and on each retry, some resources will succeed which will move the execution along with each cycle until everything is fully deployed. Terraform could do this if it kept re-trying, but it does not do this today)
-
Publicly exposing access to the EKS endpoints in order to provision resources defined outside of the VPC onto the cluster. When using Terraform, the resource provisioning operation is a \"push\" model where Terraform will send requests to the EKS API Server to create resources. Coupled with the fact that the Terraform operation typically resides outside of the VPC where the cluster is running, this results in users enabling public access to the EKS endpoints to provision resources. However, the more widely accepted approach by the Kubernetes community has been the adoption of GitOps which uses a \"pull\" based model, where an operator or controller running on the cluster will pull the resource definitions from a Git repository and reconcile state from within the cluster itself. This approach is more secure as it does not require public access to the EKS endpoints and instead relies on the cluster's internal network to communicate with the EKS API Server.
-
The nesting of multiple sub-modules in conjunction with the necessity to even require a module to be able to support an addon. When we compare and contrast the Terraform approach to addons versus the GitOps approach, the Terraform approach has a glaring disadvantage - the need to create a module that wraps the addon's Helm chart in order to provision the addon via Terraform. As opposed to the GitOps approach, where users simply consume the charts from where they are stored as needed. This creates a bottleneck on the team to review, test, and validate each new addon as well as the overhead then added for maintaining and updating those addons going forward. This also opens up more areas where breaking changes are encountered which is compounded by the fact that Terraform addons are grouped under an \"umbrella\" module which obfuscates versioning.
-
Being able to support a combination of various tools, modules, frameworks, etc., to meet the needs of customers. The terraform-aws-eks
was created long before EKS Blueprints, and many customers had already adopted this module for creating their clusters. In addition, Amazon has since adopted the eksctl
as the official CLI for Amazon EKS. When EKS Blueprints was first announced, many customers raised questions asking if they needed to abandon their current clusters created through those other tools in order to adopt EKS Blueprints. The answer is no - users can and should be able to use their existing clusters while EKS Blueprints can help augment that process through its supporting modules (addons, teams, etc.). This left the team with the question - why create a Terraform module for creating an EKS cluster when the terraform-aws-eks
already exists and the EKS Blueprints implementation already uses that module for creating the control plane and security groups?
"},{"location":"v4-to-v5/motivation/#what-is-changing","title":"What Is Changing","text":"The direction for EKS Blueprints in v5 will shift from providing an all-encompassing, monolithic \"framework\" and instead focus more on how users can organize a set of modular components to create the desired solution on Amazon EKS. This will allow customers to use the components of their choosing in a way that is more familiar to them and their organization instead of having to adopt and conform to a framework.
With this shift in direction, the cluster definition will be removed from the project and instead examples will reference the terraform-aws-eks
module for cluster creation. The remaining modules will be moved out to their own respective repositories as standalone projects. This leaves the EKS Blueprint project as the canonical place where users can receive guidance on how to configure their clusters to meet a desired architecture, how best to setup their clusters following well-architected practices, as well as references on the various ways that different workloads can be deployed on Amazon EKS.
"},{"location":"v4-to-v5/motivation/#notable-changes","title":"Notable Changes","text":" - EKS Blueprints will remove its Amazon EKS cluster Terraform module components (control plane, EKS managed node group, self-managed node group, and Fargate profile modules) from the project. In its place, users are encouraged to utilize the
terraform-aws-eks
module which meets or exceeds nearly all of the functionality of the EKS Blueprints v4.x cluster module. This includes the Terraform code contained at the root of the project as well as the aws-eks-fargate-profiles
, aws-eks-managed-node-groups
, aws-eks-self-managed-node-groups
, and launch-templates
modules which will all be removed from the project. - The
aws-kms
module will be removed entirely. This was consumed in the root project module for cluster secret encryption. In its place, users can utilize the KMS key creation functionality of the terraform-aws-eks
module or the terraform-aws-kms
module if they wish to control the key separately from the cluster itself. - The
emr-on-eks
module will be removed entirely; its replacement can be found in the new external module terraform-aws-emr
. - The
irsa
and helm-addon
modules will be removed entirely; we have released a new external module terraform-aws-eks-blueprints-addon
that is available on the Terraform registry that replicates/replaces the functionality of these two modules. This will now allow users, as well as partners, to create their own addons that are not natively supported by EKS Blueprints more easily and following the same process as EKS Blueprints. - The
aws-eks-teams
module will be removed entirely; its replacement will be the new external module terraform-aws-eks-blueprints-teams
that incorporates the changes customers have been asking for in https://github.com/aws-ia/terraform-aws-eks-blueprints/issues/842 - The integration between Terraform and ArgoCD has been removed in the initial release of v5. The team is currently investigating better patterns and solutions in conjunction with the ArgoCD and FluxCD teams that will provide a better, more integrated experience when using a GitOps based approach for cluster management. This will be released in a future version of EKS Blueprints v5 and is tracked here
"},{"location":"v4-to-v5/motivation/#resulting-project-structure","title":"Resulting Project Structure","text":"Previously under the v4.x structure, the EKS Blueprint project was comprised of various repositories across multiple AWS organizations that looked roughly like the following:
"},{"location":"v4-to-v5/motivation/#v4x-structure","title":"v4.x Structure","text":"\u251c\u2500\u2500 aws-ia/\n| \u251c\u2500\u2500 terraform-aws-eks-ack-addons/\n| \u2514\u2500\u2500 terraform-aws-eks-blueprints/\n| \u251c\u2500\u2500 aws-auth-configmap.tf\n| \u251c\u2500\u2500 data.tf\n| \u251c\u2500\u2500 eks-worker.tf\n| \u251c\u2500\u2500 locals.tf\n| \u251c\u2500\u2500 main.tf\n| \u251c\u2500\u2500 outputs.tf\n| \u251c\u2500\u2500 variables.tf\n| \u251c\u2500\u2500 versions.tf\n| \u251c\u2500\u2500 examples/\n| \u2514\u2500\u2500 modules\n| \u251c\u2500\u2500 aws-eks-fargate-profiles/\n| \u251c\u2500\u2500 aws-eks-managed-node-groups/\n| \u251c\u2500\u2500 aws-eks-self-managed-node-groups/\n| \u251c\u2500\u2500 aws-eks-teams/\n| \u251c\u2500\u2500 aws-kms/\n| \u251c\u2500\u2500 emr-on-eks/\n| \u251c\u2500\u2500 irsa/\n| \u251c\u2500\u2500 kubernetes-addons/\n| \u2514\u2500\u2500 launch-templates/\n\u251c\u2500\u2500 awslabs/\n| \u251c\u2500\u2500 crossplane-on-eks/\n| \u2514\u2500\u2500 data-on-eks/\n\u2514\u2500\u2500 aws-samples/\n \u251c\u2500\u2500 eks-blueprints-add-ons/ # Previously shared with the CDK based EKS Blueprints project\n\u2514\u2500\u2500 eks-blueprints-workloads/ # Previously shared with the CDK based EKS Blueprints project\n
Under th new v5.x structure, the Terraform based EKS Blueprints project will be comprised of the following repositories:
"},{"location":"v4-to-v5/motivation/#v5x-structure","title":"v5.x Structure","text":"\u251c\u2500\u2500 aws-ia/\n| \u251c\u2500\u2500 terraform-aws-eks-ack-addons/\n| \u251c\u2500\u2500 terraform-aws-eks-blueprints/ # Will contain only example/blueprint implementations; no modules\n| \u251c\u2500\u2500 terraform-aws-eks-blueprints-addon # Module for creating Terraform based addon (IRSA + Helm chart)\n| \u251c\u2500\u2500 terraform-aws-eks-blueprints-addons # Will contain a select set of addons supported by the EKS Blueprints\n| \u2514\u2500\u2500 terraform-aws-eks-blueprints-teams # Was previously `aws-eks-teams/` EKS Blueprint sub-module; updated based on customer feedback\n\u2514\u2500\u2500 awslabs/\n \u251c\u2500\u2500 crossplane-on-eks/\n \u2514\u2500\u2500 data-on-eks/ # Data related patterns that used to be located in `terraform-aws-eks-blueprints/` are now located here\n
"},{"location":"v4-to-v5/motivation/#what-can-users-expect","title":"What Can Users Expect","text":"With these changes, the team intends to provide a better experience for users of the Terraform EKS Blueprints project as well as new and improved reference architectures. Following the v5 changes, the team intends to:
- Improved quality of the examples provided - more information on the intent of the example, why it might be useful for users, what scenarios is the pattern applicable, etc. Where applicable, architectural diagrams and supporting material will be provided to highlight the intent of the example and how its constructed.
- A more clear distinction between a blueprint and a usage reference. For example - the Karpenter on EKS Fargate blueprint should demonstrate all of the various aspects that users should be aware of and consider in order to take full advantage of this pattern (recommended practices, observability, logging, monitoring, security, day 2 operations, etc.); this is what makes it a blueprint. In contrast, a usage reference would be an example that shows how users can pass configuration values to the Karpenter provisioner. This example is less focused on the holistic architecture and more focused on how one might configure Karpenter using the implementation. The EKS Blueprints repository will focus mostly on holistic architecture and patterns, and any usage references should be saved for the repository that contains that implementation definition (i.e. - the
terraform-aws-eks-blueprints-addons
repository where the addon implementation is defined). - Faster, and more responsive feedback. The first part of this is going to be improved documentation on how to contribute which should help clarify whether a contribution is worthy and willing to be accepted by the team before any effort is spent by the contributor. However, the goal of v5 is to focus more on the value added benefits that EKS Blueprints was created to provide as opposed to simply mass producing Helm chart wrappers (addons) and trying to keep up with that operationally intensive process.
- Lastly, more examples and blueprints that demonstrate various architectures and workloads that run on top of Amazon EKS as well as integrations into other AWS services.
"},{"location":"v4-to-v5/teams/","title":"Migrate to EKS Blueprints Teams Module","text":"Please consult the docs/v4-to-v5/example directory for reference configurations. If you find a bug, please open an issue with supporting configuration to reproduce.
"},{"location":"v4-to-v5/teams/#this-guide-is-under-active-development","title":"\u26a0\ufe0f This guide is under active development.","text":""},{"location":"v4-to-v5/teams/#list-of-backwards-incompatible-changes","title":"List of backwards incompatible changes","text":"-
"},{"location":"v4-to-v5/teams/#additional-changes","title":"Additional changes","text":""},{"location":"v4-to-v5/teams/#added","title":"Added","text":"-
"},{"location":"v4-to-v5/teams/#modified","title":"Modified","text":"-
"},{"location":"v4-to-v5/teams/#removed","title":"Removed","text":"-
"},{"location":"v4-to-v5/teams/#variable-and-output-changes","title":"Variable and output changes","text":" -
Removed variables:
-
-
Renamed variables:
-
-
Added variables:
-
-
Removed outputs:
-
-
Renamed outputs:
-
-
Added outputs:
-
"},{"location":"v4-to-v5/teams/#upgrade-migrations","title":"Upgrade Migrations","text":""},{"location":"v4-to-v5/teams/#before-v4x-example","title":"Before - v4.x Example","text":"module \"eks_blueprints\" {\nsource = \"github.com/aws-ia/terraform-aws-eks-blueprints?ref=v4.32.1\"\n # TODO\n}\n
"},{"location":"v4-to-v5/teams/#after-v5x-example","title":"After - v5.x Example","text":"module \"eks_blueprints_teams\" {\nsource = \"aws-ia/eks-blueprints-teams/aws\"\nversion = \"~> 1.0\"\n # TODO\n}\n
"},{"location":"v4-to-v5/teams/#diff-of-before-vs-after","title":"Diff of Before vs After","text":"module \"eks_blueprints_teams\" {\n- source = \"github.com/aws-ia/terraform-aws-eks-blueprints?ref=v4.32.1\"\n+ source = \"aws-ia/eks-blueprints-teams/aws\"\n+ version = \"~> 1.0\"\n # TODO\n}\n
"},{"location":"v4-to-v5/teams/#state-move-commands","title":"State Move Commands","text":"In conjunction with the changes above, users can elect to move their external capacity provider(s) under this module using the following move command. Command is shown using the values from the example shown above, please update to suit your configuration names:
terraform state mv 'xxx' 'yyy'\n
"},{"location":"v4-to-v5/example/","title":"Migration - v4 to v5","text":""}]}
\ No newline at end of file
diff --git a/sitemap.xml.gz b/sitemap.xml.gz
index 476db4f6a8..56b577b072 100644
Binary files a/sitemap.xml.gz and b/sitemap.xml.gz differ