From 516f914568dca0696be7066c07d2b8dfd2bb4e6e Mon Sep 17 00:00:00 2001 From: Patryk Przekwas Date: Wed, 29 Mar 2023 15:34:23 +0200 Subject: [PATCH 01/12] prow-build-canary-cluster: add scripts for provisioning canary eks cluster --- .../prow-build-canary-cluster/.gitignore | 27 +++ .../.terraform.lock.hcl | 125 ++++++++++++++ .../prow-build-canary-cluster/OWNERS | 14 ++ .../prow-build-canary-cluster/README.md | 8 + .../prow-build-canary-cluster/eks.tf | 154 ++++++++++++++++++ .../prow-build-canary-cluster/iam.tf | 54 ++++++ .../prow-build-canary-cluster/irsa.tf | 94 +++++++++++ .../prow-build-canary-cluster/kubernetes.tf | 101 ++++++++++++ .../prow-build-canary-cluster/main.tf | 74 +++++++++ .../prow-build-canary-cluster/outputs.tf | 20 +++ .../prow-build-canary-cluster/providers.tf | 40 +++++ .../resources/rbac/cluster-admin-crb.yaml | 12 ++ .../resources/rbac/prow-admin-crb.yaml | 12 ++ .../secrets_manager.tf | 34 ++++ .../prow-build-canary-cluster/variables.tf | 95 +++++++++++ .../prow-build-canary-cluster/vpc.tf | 69 ++++++++ 16 files changed, 933 insertions(+) create mode 100644 infra/aws/terraform/prow-build-canary-cluster/.gitignore create mode 100644 infra/aws/terraform/prow-build-canary-cluster/.terraform.lock.hcl create mode 100644 infra/aws/terraform/prow-build-canary-cluster/OWNERS create mode 100644 infra/aws/terraform/prow-build-canary-cluster/README.md create mode 100644 infra/aws/terraform/prow-build-canary-cluster/eks.tf create mode 100644 infra/aws/terraform/prow-build-canary-cluster/iam.tf create mode 100644 infra/aws/terraform/prow-build-canary-cluster/irsa.tf create mode 100644 infra/aws/terraform/prow-build-canary-cluster/kubernetes.tf create mode 100644 infra/aws/terraform/prow-build-canary-cluster/main.tf create mode 100644 infra/aws/terraform/prow-build-canary-cluster/outputs.tf create mode 100644 infra/aws/terraform/prow-build-canary-cluster/providers.tf create mode 100644 infra/aws/terraform/prow-build-canary-cluster/resources/rbac/cluster-admin-crb.yaml create mode 100644 infra/aws/terraform/prow-build-canary-cluster/resources/rbac/prow-admin-crb.yaml create mode 100644 infra/aws/terraform/prow-build-canary-cluster/secrets_manager.tf create mode 100644 infra/aws/terraform/prow-build-canary-cluster/variables.tf create mode 100644 infra/aws/terraform/prow-build-canary-cluster/vpc.tf diff --git a/infra/aws/terraform/prow-build-canary-cluster/.gitignore b/infra/aws/terraform/prow-build-canary-cluster/.gitignore new file mode 100644 index 00000000000..6665869f80f --- /dev/null +++ b/infra/aws/terraform/prow-build-canary-cluster/.gitignore @@ -0,0 +1,27 @@ +# Local .terraform directories +**/.terraform/* + +# .tfstate files +*.tfstate +*.tfstate.* +*.tfplan + +# Crash log files +crash.log + +# Exclude all .tfvars files, which are likely to contain sentitive data, such as +# password, private keys, and other secrets. These should not be part of version +# control as they are data points which are potentially sensitive and subject +# to change depending on the environment. +*.tfvars + +# Ignore override files as they are usually used to override resources locally and so +# are not checked in +override.tf +override.tf.json +*_override.tf +*_override.tf.json + +# Ignore CLI configuration files +.terraformrc +terraform.rc diff --git a/infra/aws/terraform/prow-build-canary-cluster/.terraform.lock.hcl b/infra/aws/terraform/prow-build-canary-cluster/.terraform.lock.hcl new file mode 100644 index 00000000000..9377953c4b0 --- /dev/null +++ b/infra/aws/terraform/prow-build-canary-cluster/.terraform.lock.hcl @@ -0,0 +1,125 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/aws" { + version = "4.60.0" + constraints = ">= 3.72.0, >= 3.73.0, >= 4.0.0, >= 4.47.0" + hashes = [ + "h1:XxVhnhtrRW3YueabP668hVZ3qL4th7pcWbx+ot/l864=", + "zh:1853d6bc89e289ac36c13485e8ff877c1be8485e22f545bb32c7a30f1d1856e8", + "zh:4321d145969e3b7ede62fe51bee248a15fe398643f21df9541eef85526bf3641", + "zh:4c01189cc6963abfe724e6b289a7c06d2de9c395011d8d54efa8fe1aac444e2e", + "zh:5934db7baa2eec0f9acb9c7f1c3dd3b3fe1e67e23dd4a49e9fe327832967b32b", + "zh:5fbedf5d55c6e04e34c32b744151e514a80308e7dec633a56b852829b41e4b5a", + "zh:651558e1446cc05061b75e6f5cc6e2959feb17615cd0ace6ec7a2bcc846321c0", + "zh:76875eb697916475e554af080f9d4d3cd1f7d5d58ecdd3317a844a30980f4eec", + "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", + "zh:a52528e6d6c945a6ac45b89e9a70a5435148e4c151241e04c231dd2acc4a8c80", + "zh:af5f94c69025f1c2466a3cf970d1e9bed72938ec33b976c8c067468b6707bb57", + "zh:b6692fad956c9d4ef4266519d9ac2ee9f699f8f2c21627625c9ed63814d41590", + "zh:b74311af5fa5ac6e4eb159c12cfb380dfe2f5cd8685da2eac8073475f398ae60", + "zh:cc5aa6f738baa42edacba5ef1ca0969e5a959422e4491607255f3f6142ba90ed", + "zh:dd1a7ff1b22f0036a76bc905a8229ce7ed0a7eb5a783d3a2586fb1bd920515c3", + "zh:e5ab40c4ad0f1c7bd4d5d834d1aa144e690d1a93329d73b3d37512715a638de9", + ] +} + +provider "registry.terraform.io/hashicorp/cloudinit" { + version = "2.3.2" + constraints = ">= 2.0.0" + hashes = [ + "h1:ocyv0lvfyvzW4krenxV5CL4Jq5DiA3EUfoy8DR6zFMw=", + "zh:2487e498736ed90f53de8f66fe2b8c05665b9f8ff1506f751c5ee227c7f457d1", + "zh:3d8627d142942336cf65eea6eb6403692f47e9072ff3fa11c3f774a3b93130b3", + "zh:434b643054aeafb5df28d5529b72acc20c6f5ded24decad73b98657af2b53f4f", + "zh:436aa6c2b07d82aa6a9dd746a3e3a627f72787c27c80552ceda6dc52d01f4b6f", + "zh:458274c5aabe65ef4dbd61d43ce759287788e35a2da004e796373f88edcaa422", + "zh:54bc70fa6fb7da33292ae4d9ceef5398d637c7373e729ed4fce59bd7b8d67372", + "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", + "zh:893ba267e18749c1a956b69be569f0d7bc043a49c3a0eb4d0d09a8e8b2ca3136", + "zh:95493b7517bce116f75cdd4c63b7c82a9d0d48ec2ef2f5eb836d262ef96d0aa7", + "zh:9ae21ab393be52e3e84e5cce0ef20e690d21f6c10ade7d9d9d22b39851bfeddc", + "zh:cc3b01ac2472e6d59358d54d5e4945032efbc8008739a6d4946ca1b621a16040", + "zh:f23bfe9758f06a1ec10ea3a81c9deedf3a7b42963568997d84a5153f35c5839a", + ] +} + +provider "registry.terraform.io/hashicorp/helm" { + version = "2.9.0" + constraints = "2.9.0" + hashes = [ + "h1:fEDID5J/9ret/sLpOSNAu98F/ZBEZhOmL0Leut7m5JU=", + "zh:1471cb45908b426104687c962007b2980cfde294fa3530fabc4798ce9fb6c20c", + "zh:1572e9cec20591ec08ece797b3630802be816a5adde36ca91a93359f2430b130", + "zh:1b10ae03cf5ab1ae21ffaac2251de99797294ae4242b156b3b0beebbdbcb7e0f", + "zh:3bd043b68de967d8d0b549d3f71485193d81167d5656f5507d743dedfe60e352", + "zh:538911921c729185900176cc22eb8edcb822bc8d22b9ebb48103a1d9bb53cc38", + "zh:69a6a2d40c0463662c3fb1621e37a3ee65024ea4479adf4d5f7f19fb0dea48c2", + "zh:94b58daa0c351a49d01f6d8f1caae46c95c2d6c3f29753e2b9ea3e3c0e7c9ab4", + "zh:9d0543331a4a32241e1ab5457f30b41df745acb235a0391205c725a5311e4809", + "zh:a6789306524ca121512a95e873e3949b4175114a6c5db32bed2df2551a79368f", + "zh:d146b94cd9502cca7f2044797a328d71c7ec2a98e2d138270d8a28c872f04289", + "zh:d14ccd14511f0446eacf43a9243f22de7c1427ceb059cf67d7bf9803be2cb15d", + "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + ] +} + +provider "registry.terraform.io/hashicorp/kubernetes" { + version = "2.19.0" + constraints = ">= 2.10.0" + hashes = [ + "h1:ID/u9YOv00w+Z8iG+592oyuV7HcqRmPiZpEC9hnyTMY=", + "zh:028d346460de2d1d19b4c863dfc36be51c7bcd97d372b54a3a946bcb19f3f613", + "zh:391d0b38c455437d0a2ab1beb6ce6e1230aa4160bbae11c58b2810b258b44280", + "zh:40ea742f91b67f66e71d7091cfd40cc604528c4947651924bd6d8bd8d9793708", + "zh:48a99d341c8ba3cadaafa7cb99c0f11999f5e23f5cfb0f8469b4e352d9116e74", + "zh:4a5ade940eff267cbf7dcd52c1a7ac3999e7cc24996a409bd8b37bdb48a97f02", + "zh:5063742016a8249a4be057b9cc0ef24a684ec76d0ae5463d4b07e9b2d21e047e", + "zh:5d36b3a5662f840a6788f5e2a19d02139e87318feb3c5d82c7d076be1366fec4", + "zh:75edd9960cb30e54ef7de1b7df2761a274f17d4d41f54e72f86b43f41af3eb6d", + "zh:b85cadef3e6f25f1a10a617472bf5e8449decd61626733a1bc723de5edc08f64", + "zh:dc565b17b4ea6dde6bd1b92bc37e5e850fcbf9400540eec00ad3d9552a76ac2e", + "zh:deb665cc2123f2701aa3d653987b2ca35fb035a08a76a2382efb215c209f19a5", + "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + ] +} + +provider "registry.terraform.io/hashicorp/time" { + version = "0.9.1" + constraints = ">= 0.9.0" + hashes = [ + "h1:VxyoYYOCaJGDmLz4TruZQTSfQhvwEcMxvcKclWdnpbs=", + "zh:00a1476ecf18c735cc08e27bfa835c33f8ac8fa6fa746b01cd3bcbad8ca84f7f", + "zh:3007f8fc4a4f8614c43e8ef1d4b0c773a5de1dcac50e701d8abc9fdc8fcb6bf5", + "zh:5f79d0730fdec8cb148b277de3f00485eff3e9cf1ff47fb715b1c969e5bbd9d4", + "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", + "zh:8c8094689a2bed4bb597d24a418bbbf846e15507f08be447d0a5acea67c2265a", + "zh:a6d9206e95d5681229429b406bc7a9ba4b2d9b67470bda7df88fa161508ace57", + "zh:aa299ec058f23ebe68976c7581017de50da6204883950de228ed9246f309e7f1", + "zh:b129f00f45fba1991db0aa954a6ba48d90f64a738629119bfb8e9a844b66e80b", + "zh:ef6cecf5f50cda971c1b215847938ced4cb4a30a18095509c068643b14030b00", + "zh:f1f46a4f6c65886d2dd27b66d92632232adc64f92145bf8403fe64d5ffa5caea", + "zh:f79d6155cda7d559c60d74883a24879a01c4d5f6fd7e8d1e3250f3cd215fb904", + "zh:fd59fa73074805c3575f08cd627eef7acda14ab6dac2c135a66e7a38d262201c", + ] +} + +provider "registry.terraform.io/hashicorp/tls" { + version = "4.0.4" + constraints = ">= 3.0.0" + hashes = [ + "h1:GZcFizg5ZT2VrpwvxGBHQ/hO9r6g0vYdQqx3bFD3anY=", + "zh:23671ed83e1fcf79745534841e10291bbf34046b27d6e68a5d0aab77206f4a55", + "zh:45292421211ffd9e8e3eb3655677700e3c5047f71d8f7650d2ce30242335f848", + "zh:59fedb519f4433c0fdb1d58b27c210b27415fddd0cd73c5312530b4309c088be", + "zh:5a8eec2409a9ff7cd0758a9d818c74bcba92a240e6c5e54b99df68fff312bbd5", + "zh:5e6a4b39f3171f53292ab88058a59e64825f2b842760a4869e64dc1dc093d1fe", + "zh:810547d0bf9311d21c81cc306126d3547e7bd3f194fc295836acf164b9f8424e", + "zh:824a5f3617624243bed0259d7dd37d76017097dc3193dac669be342b90b2ab48", + "zh:9361ccc7048be5dcbc2fafe2d8216939765b3160bd52734f7a9fd917a39ecbd8", + "zh:aa02ea625aaf672e649296bce7580f62d724268189fe9ad7c1b36bb0fa12fa60", + "zh:c71b4cd40d6ec7815dfeefd57d88bc592c0c42f5e5858dcc88245d371b4b8b1e", + "zh:dabcd52f36b43d250a3d71ad7abfa07b5622c69068d989e60b79b2bb4f220316", + "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + ] +} diff --git a/infra/aws/terraform/prow-build-canary-cluster/OWNERS b/infra/aws/terraform/prow-build-canary-cluster/OWNERS new file mode 100644 index 00000000000..43c1ca39668 --- /dev/null +++ b/infra/aws/terraform/prow-build-canary-cluster/OWNERS @@ -0,0 +1,14 @@ +# See the OWNERS docs at https://go.k8s.io/owners + +filters: + ".*": + approvers: + - pkprzekwas + - xmudrii + labels: + - sig/k8s-infra + - area/infra + - area/infra/aws + "\\.sh$": + labels: + - area/bash diff --git a/infra/aws/terraform/prow-build-canary-cluster/README.md b/infra/aws/terraform/prow-build-canary-cluster/README.md new file mode 100644 index 00000000000..645d656ee39 --- /dev/null +++ b/infra/aws/terraform/prow-build-canary-cluster/README.md @@ -0,0 +1,8 @@ +## Differences + +cluster name changed +cluster admin role name changed +secrets-manager iam policy name changed +missing `prow.tf` file (no OIDC provider and iam role for prow) +different subnet setup +used smaller instance \ No newline at end of file diff --git a/infra/aws/terraform/prow-build-canary-cluster/eks.tf b/infra/aws/terraform/prow-build-canary-cluster/eks.tf new file mode 100644 index 00000000000..c0ab5aa0da9 --- /dev/null +++ b/infra/aws/terraform/prow-build-canary-cluster/eks.tf @@ -0,0 +1,154 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +############################################### +# EKS Cluster +############################################### + +module "eks" { + source = "terraform-aws-modules/eks/aws" + version = "19.10.0" + + # General cluster properties. + cluster_name = var.cluster_name + cluster_version = var.cluster_version + cluster_endpoint_public_access = true + + # Manage aws-auth ConfigMap. + manage_aws_auth_configmap = true + + # Configure aws-auth + aws_auth_roles = [ + # Allow access to the Prow-Cluster-Admin IAM role (used with assume role with other IAM accounts). + { + "rolearn" = aws_iam_role.iam_cluster_admin.arn + "username" = "eks-cluster-admin" + "groups" = [ + "eks-cluster-admin" + ] + }, + ] + # Allow EKS access to the root account. + aws_auth_users = [ + { + "userarn" = local.root_account_arn + "username" = "root" + "groups" = [ + "eks-cluster-admin" + ] + }, + ] + + # Allow access to the KMS key used for secrets encryption to the root account. + kms_key_administrators = [ + local.root_account_arn + ] + # Allow service access to the KMS key to the Prow-Cluster-Admin role. + kms_key_service_users = [ + aws_iam_role.iam_cluster_admin.arn + ] + + # We use IPv4 for the best compatibility with the existing setup. + # Additionally, Ubuntu EKS optimized AMI doesn't support IPv6 well. + cluster_ip_family = "ipv4" + + vpc_id = module.vpc.vpc_id + subnet_ids = module.vpc.private_subnets + control_plane_subnet_ids = module.vpc.intra_subnets + + cluster_addons = { + coredns = { + most_recent = true + } + kube-proxy = { + most_recent = true + } + vpc-cni = { + most_recent = true + service_account_role_arn = module.vpc_cni_irsa.iam_role_arn + } + aws-ebs-csi-driver = { + most_recent = true + service_account_role_arn = module.ebs_csi_irsa.iam_role_arn + } + } + + eks_managed_node_group_defaults = { + ami_id = var.node_ami + enable_bootstrap_user_data = true + instance_types = var.node_instance_types + + # We are using the IRSA created below for permissions + # However, we have to deploy with the policy attached FIRST (when creating a fresh cluster) + # and then turn this off after the cluster/node group is created. Without this initial policy, + # the VPC CNI fails to assign IPs and nodes cannot join the cluster + # See https://github.com/aws/containers-roadmap/issues/1666 for more context + iam_role_attach_cni_policy = false + } + + eks_managed_node_groups = { + # Build cluster node group. + build = { + name = "build-managed" + description = "EKS managed node group used for build nodes" + use_name_prefix = true + + subnet_ids = module.vpc.private_subnets + + min_size = var.node_min_size + max_size = var.node_max_size + desired_size = var.node_desired_size + + ami_id = var.node_ami + enable_bootstrap_user_data = true + + force_update_version = false + update_config = { + max_unavailable_percentage = var.node_max_unavailable_percentage + } + + pre_bootstrap_user_data = file("${path.module}/../prow-build-cluster/bootstrap/node_bootstrap.sh") + + capacity_type = "ON_DEMAND" + instance_types = var.node_instance_types + + ebs_optimized = true + enable_monitoring = true + + block_device_mappings = { + # This must be sda1 in order to match the root volume, + # otherwise a new volume is created. + sda1 = { + device_name = "/dev/sda1" + ebs = { + volume_size = var.node_volume_size + volume_type = "gp3" + iops = 16000 # Maximum for gp3 volume. + throughput = 1000 # Maximum for gp3 volume. + encrypted = false + delete_on_termination = true + } + } + } + + enclave_options = { + enabled = true + } + + tags = local.node_group_tags + } + } +} diff --git a/infra/aws/terraform/prow-build-canary-cluster/iam.tf b/infra/aws/terraform/prow-build-canary-cluster/iam.tf new file mode 100644 index 00000000000..4896404b920 --- /dev/null +++ b/infra/aws/terraform/prow-build-canary-cluster/iam.tf @@ -0,0 +1,54 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +############################################### +# IAM access +############################################### + +data "aws_iam_user" "user_xmudrii" { + user_name = "xmudrii" +} +data "aws_iam_user" "user_pprzekwa" { + user_name = "pprzekwa" +} + +resource "aws_iam_role" "iam_cluster_admin" { + name = "Prow-Canary-Cluster-Admin" + description = "IAM role used to delegate access to prow-build-canary-cluster" + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + "Effect" : "Allow", + "Principal" : { + "AWS" : [ + data.aws_iam_user.user_xmudrii.arn, + data.aws_iam_user.user_pprzekwa.arn, + ] + }, + "Action" : "sts:AssumeRole", + "Condition" : {} + } + ] + }) +} + +# Give administrator access to the admin IAM role so it can be used with Terraform. +resource "aws_iam_role_policy_attachment" "iam_policy_cluster_admin" { + role = aws_iam_role.iam_cluster_admin.name + policy_arn = "arn:aws:iam::aws:policy/AdministratorAccess" +} diff --git a/infra/aws/terraform/prow-build-canary-cluster/irsa.tf b/infra/aws/terraform/prow-build-canary-cluster/irsa.tf new file mode 100644 index 00000000000..fb81fc9ba1c --- /dev/null +++ b/infra/aws/terraform/prow-build-canary-cluster/irsa.tf @@ -0,0 +1,94 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +############################################### +# IAM +############################################### + +# IAM policy used for the AWS VPC CNI plugin. +module "vpc_cni_irsa" { + source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" + version = "~> 5.11" + + role_name_prefix = "VPC-CNI-IRSA" + attach_vpc_cni_policy = true + vpc_cni_enable_ipv4 = true + vpc_cni_enable_ipv6 = true + + oidc_providers = { + main = { + provider_arn = module.eks.oidc_provider_arn + namespace_service_accounts = ["kube-system:aws-node"] + } + } + + tags = local.tags +} + +# IAM policy used for the AWS EBS CSI driver plugin. +module "ebs_csi_irsa" { + source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" + version = "~> 5.11" + + role_name_prefix = "EBS-CSI-IRSA" + attach_ebs_csi_policy = true + + oidc_providers = { + main = { + provider_arn = module.eks.oidc_provider_arn + namespace_service_accounts = ["kube-system:ebs-csi-controller-sa"] + } + } + + tags = local.tags +} + +# IAM policy used for AWS Load Balancer Controller. +module "aws_load_balancer_controller_irsa" { + source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" + version = "~> 5.11" + + role_name_prefix = "LBCONTROLLER-IRSA" + attach_load_balancer_controller_policy = true + + oidc_providers = { + main = { + provider_arn = module.eks.oidc_provider_arn + namespace_service_accounts = ["kube-system:aws-load-balancer-controller"] + } + } + + tags = local.tags +} + +# IAM policy used for Cluster Autoscaler. +module "cluster_autoscaler_irsa" { + source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" + version = "~> 5.11" + + role_name_prefix = "AUTOSCALER-IRSA" + attach_cluster_autoscaler_policy = true + cluster_autoscaler_cluster_ids = [module.eks.cluster_name] + + oidc_providers = { + main = { + provider_arn = module.eks.oidc_provider_arn + namespace_service_accounts = ["kube-system:cluster-autoscaler"] + } + } + + tags = local.tags +} diff --git a/infra/aws/terraform/prow-build-canary-cluster/kubernetes.tf b/infra/aws/terraform/prow-build-canary-cluster/kubernetes.tf new file mode 100644 index 00000000000..e8fc1dff892 --- /dev/null +++ b/infra/aws/terraform/prow-build-canary-cluster/kubernetes.tf @@ -0,0 +1,101 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +# This file contains resources that require EKS to be running before terrafrom plan/apply. + +# module "cluster_autoscaler" { +# source = "../prow-build-cluster/modules/cluster-autoscaler" +# providers = { +# kubernetes = kubernetes +# } + +# cluster_name = module.eks.cluster_name +# cluster_autoscaler_iam_role_arn = module.cluster_autoscaler_irsa.iam_role_arn +# cluster_autoscaler_version = var.cluster_autoscaler_version + +# depends_on = [ +# module.eks +# ] +# } + +# module "metrics_server" { +# source = "../prow-build-cluster/modules/metrics-server" +# providers = { +# kubernetes = kubernetes +# } + +# depends_on = [ +# module.eks +# ] +# } + +# # AWS Load Balancer Controller (ALB/NLB integration). +# resource "helm_release" "aws_lb_controller" { +# name = "aws-load-balancer-controller" +# namespace = "kube-system" +# repository = "https://aws.github.io/eks-charts" +# chart = "aws-load-balancer-controller" +# version = "1.4.8" + +# set { +# name = "clusterName" +# value = module.eks.cluster_name +# } + +# set { +# name = "serviceAccount.create" +# value = "true" +# } + +# set { +# name = "serviceAccount.name" +# value = "aws-load-balancer-controller" +# } + +# set { +# name = "serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn" +# value = module.aws_load_balancer_controller_irsa.iam_role_arn +# } + +# depends_on = [ +# module.eks +# ] +# } + +# # AWS Secrets Manager integration +# resource "helm_release" "secrets_store_csi_driver" { +# name = "secrets-store-csi-driver" +# namespace = "kube-system" +# repository = "https://kubernetes-sigs.github.io/secrets-store-csi-driver/charts" +# chart = "secrets-store-csi-driver" +# version = "1.3.1" + +# depends_on = [ +# module.eks +# ] +# } + +# resource "helm_release" "secrets_store_csi_driver_provider_aws" { +# name = "aws-secrets-manager" +# namespace = "kube-system" +# repository = "https://aws.github.io/secrets-store-csi-driver-provider-aws" +# chart = "secrets-store-csi-driver-provider-aws" +# version = "0.3.0" + +# depends_on = [ +# module.eks +# ] +# } diff --git a/infra/aws/terraform/prow-build-canary-cluster/main.tf b/infra/aws/terraform/prow-build-canary-cluster/main.tf new file mode 100644 index 00000000000..842b334619b --- /dev/null +++ b/infra/aws/terraform/prow-build-canary-cluster/main.tf @@ -0,0 +1,74 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +############################################### +# INITIALIZATION +############################################### + +provider "aws" { + region = var.cluster_region + + # We have a chicken-egg problem here. This role is not going to exist + # when creating the cluster for the first time. In that case, this must + # be commented, than uncommented afterwards. + # assume_role { + # role_arn = "arn:aws:iam::468814281478:role/Prow-Canary-Cluster-Admin" + # session_name = "prow-build-cluster-terraform" + # } +} + +provider "kubernetes" { + host = module.eks.cluster_endpoint + cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data) + + # This requires the awscli to be installed locally where Terraform is executed. + exec { + api_version = "client.authentication.k8s.io/v1beta1" + command = "aws" + args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name, "--role-arn", aws_iam_role.iam_cluster_admin.arn] + } +} + +provider "helm" { + kubernetes { + host = module.eks.cluster_endpoint + cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data) + + # This requires the awscli to be installed locally where Terraform is executed. + exec { + api_version = "client.authentication.k8s.io/v1beta1" + command = "aws" + args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name, "--role-arn", aws_iam_role.iam_cluster_admin.arn] + } + } +} + +data "aws_caller_identity" "current" {} +data "aws_availability_zones" "available" {} + +locals { + root_account_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:root" + + tags = { + Cluster = var.cluster_name + } + auto_scaling_tags = { + "k8s.io/cluster-autoscaler/${var.cluster_name}" = "owned" + "k8s.io/cluster-autoscaler/enabled" = true + } + node_group_tags = merge(local.tags, local.auto_scaling_tags) + azs = slice(data.aws_availability_zones.available.names, 0, 3) +} diff --git a/infra/aws/terraform/prow-build-canary-cluster/outputs.tf b/infra/aws/terraform/prow-build-canary-cluster/outputs.tf new file mode 100644 index 00000000000..fca4505f6c7 --- /dev/null +++ b/infra/aws/terraform/prow-build-canary-cluster/outputs.tf @@ -0,0 +1,20 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +output "cluster_endpoint" { + description = "Endpoint for EKS control plane" + value = module.eks.cluster_endpoint +} diff --git a/infra/aws/terraform/prow-build-canary-cluster/providers.tf b/infra/aws/terraform/prow-build-canary-cluster/providers.tf new file mode 100644 index 00000000000..8536f3864c6 --- /dev/null +++ b/infra/aws/terraform/prow-build-canary-cluster/providers.tf @@ -0,0 +1,40 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +terraform { + backend "s3" { + bucket = "prow-build-cluster-tfstate" + key = "prow-build-canary-cluster/terraform.tfstate" + region = "us-east-2" + } + + required_version = "~> 1.3.0" + + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 4.47" + } + kubernetes = { + source = "hashicorp/kubernetes" + version = ">= 2.10" + } + helm = { + source = "hashicorp/helm" + version = "2.9.0" + } + } +} diff --git a/infra/aws/terraform/prow-build-canary-cluster/resources/rbac/cluster-admin-crb.yaml b/infra/aws/terraform/prow-build-canary-cluster/resources/rbac/cluster-admin-crb.yaml new file mode 100644 index 00000000000..e1caaf97e73 --- /dev/null +++ b/infra/aws/terraform/prow-build-canary-cluster/resources/rbac/cluster-admin-crb.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: eks-cluster-admin +subjects: +- kind: Group + name: eks-cluster-admin + apiGroup: rbac.authorization.k8s.io +roleRef: + kind: ClusterRole + name: cluster-admin + apiGroup: rbac.authorization.k8s.io diff --git a/infra/aws/terraform/prow-build-canary-cluster/resources/rbac/prow-admin-crb.yaml b/infra/aws/terraform/prow-build-canary-cluster/resources/rbac/prow-admin-crb.yaml new file mode 100644 index 00000000000..3d5232a8e93 --- /dev/null +++ b/infra/aws/terraform/prow-build-canary-cluster/resources/rbac/prow-admin-crb.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: eks-prow-cluster-admin +subjects: +- kind: Group + name: eks-prow-cluster-admin + apiGroup: rbac.authorization.k8s.io +roleRef: + kind: ClusterRole + name: cluster-admin + apiGroup: rbac.authorization.k8s.io diff --git a/infra/aws/terraform/prow-build-canary-cluster/secrets_manager.tf b/infra/aws/terraform/prow-build-canary-cluster/secrets_manager.tf new file mode 100644 index 00000000000..9a3b2176eb2 --- /dev/null +++ b/infra/aws/terraform/prow-build-canary-cluster/secrets_manager.tf @@ -0,0 +1,34 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +data "aws_iam_policy_document" "secretsmanager_read" { + statement { + sid = "" + effect = "Allow" + resources = ["*"] + + actions = [ + "secretsmanager:GetSecretValue", + "secretsmanager:DescribeSecret", + ] + } +} + +resource "aws_iam_policy" "secretsmanager_read" { + name = "canary-secretsmanager_read" + path = "/" + policy = data.aws_iam_policy_document.secretsmanager_read.json +} diff --git a/infra/aws/terraform/prow-build-canary-cluster/variables.tf b/infra/aws/terraform/prow-build-canary-cluster/variables.tf new file mode 100644 index 00000000000..0ce8beb8ef2 --- /dev/null +++ b/infra/aws/terraform/prow-build-canary-cluster/variables.tf @@ -0,0 +1,95 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +variable "vpc_cidr" { + type = string + description = "CIDR of the VPC" +} + +variable "vpc_secondary_cidr_blocks" { + type = list(string) + description = "Additional CIDRs to attach to the VPC" +} + +variable "vpc_public_subnet" { + type = list(string) + description = "Public subnets (one per AZ)" +} + +variable "vpc_private_subnet" { + type = list(string) + description = "Private subnets (one per AZ)" +} + +variable "vpc_intra_subnet" { + type = list(string) + description = "Intra subnets (one per AZ, subnet without access to external services)" +} + +variable "cluster_name" { + type = string + description = "Name of the EKS cluster" +} + +variable "cluster_region" { + type = string + description = "AWS region of the EKS cluster" +} + +variable "cluster_version" { + type = string + description = "Kubernetes version of the EKS cluster" +} + +variable "node_ami" { + type = string + description = "EKS optimized AMI to be used for Node groups" +} + +variable "node_instance_types" { + type = list(string) + description = "Instance sizes to use for EKS node group" +} + +variable "node_volume_size" { + type = number + description = "Volume size per node to use for EKS node group" +} + +variable "node_min_size" { + type = number + description = "Minimum number of nodes in the EKS node group" +} + +variable "node_max_size" { + type = number + description = "Maximum number of nodes in the EKS node group" +} + +variable "node_desired_size" { + type = number + description = "Desired number of nodes in the EKS node group" +} + +variable "node_max_unavailable_percentage" { + type = number + description = "Maximum unavailable nodes in a node group" +} + +variable "cluster_autoscaler_version" { + type = string + description = "Cluster Autoscaler version to use (must match the EKS version)" +} diff --git a/infra/aws/terraform/prow-build-canary-cluster/vpc.tf b/infra/aws/terraform/prow-build-canary-cluster/vpc.tf new file mode 100644 index 00000000000..97be7cd6ac4 --- /dev/null +++ b/infra/aws/terraform/prow-build-canary-cluster/vpc.tf @@ -0,0 +1,69 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +############################################### +# VPC +############################################### + +# VPC is IPv4/IPv6 Dual-Stack, but our cluster is IPv4 because EKS doesn't +# support dual-stack yet. + +module "vpc" { + source = "terraform-aws-modules/vpc/aws" + version = "~> 3.0" + + name = "${var.cluster_name}-vpc" + + cidr = var.vpc_cidr + secondary_cidr_blocks = var.vpc_secondary_cidr_blocks + + azs = local.azs + private_subnets = var.vpc_private_subnet + public_subnets = var.vpc_public_subnet + + # intra_subnets are private subnets without the internet access + # (https://registry.terraform.io/modules/terraform-aws-modules/vpc/aws/latest#private-versus-intra-subnets) + intra_subnets = var.vpc_intra_subnet + + # Enable IPv6 for this subnet. + enable_ipv6 = true + assign_ipv6_address_on_creation = true + create_egress_only_igw = true + + # Used for calculating IPv6 CIDR based on the following formula: + # cidrsubnet(aws_vpc.this[0].ipv6_cidr_block, 8, var.private_subnet_ipv6_prefixes[count.index]) + private_subnet_ipv6_prefixes = [0, 1, 2] + public_subnet_ipv6_prefixes = [3, 4, 5] + intra_subnet_ipv6_prefixes = [6, 7, 8] + + # NAT Gateway allows connection to external services (e.g. Internet). + enable_nat_gateway = true + single_nat_gateway = true + enable_dns_hostnames = true + + # Tags to allow ELB (Elastic Load Balancing). + public_subnet_tags = { + "kubernetes.io/role/elb" = 1 + "kubernetes.io/cluster/${var.cluster_name}" = "owned" + } + + private_subnet_tags = { + "kubernetes.io/role/internal-elb" = 1 + "kubernetes.io/cluster/${var.cluster_name}" = "owned" + } + + tags = local.tags +} From 33b97253b3057306ee9e72df53e550afd8f35886 Mon Sep 17 00:00:00 2001 From: Patryk Przekwas Date: Wed, 29 Mar 2023 15:56:27 +0200 Subject: [PATCH 02/12] TODO: fix prow cluster IAM --- infra/aws/terraform/prow-build-canary-cluster/main.tf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/infra/aws/terraform/prow-build-canary-cluster/main.tf b/infra/aws/terraform/prow-build-canary-cluster/main.tf index 842b334619b..058db65e8df 100644 --- a/infra/aws/terraform/prow-build-canary-cluster/main.tf +++ b/infra/aws/terraform/prow-build-canary-cluster/main.tf @@ -24,10 +24,10 @@ provider "aws" { # We have a chicken-egg problem here. This role is not going to exist # when creating the cluster for the first time. In that case, this must # be commented, than uncommented afterwards. - # assume_role { - # role_arn = "arn:aws:iam::468814281478:role/Prow-Canary-Cluster-Admin" - # session_name = "prow-build-cluster-terraform" - # } + assume_role { + role_arn = "arn:aws:iam::468814281478:role/Prow-Canary-Cluster-Admin" + session_name = "prow-build-cluster-terraform" + } } provider "kubernetes" { From f3b5e1030ea4f42c76f1678dc7775207d4603e55 Mon Sep 17 00:00:00 2001 From: Patryk Przekwas Date: Fri, 31 Mar 2023 17:50:50 +0200 Subject: [PATCH 03/12] add condition for assuming role, create manual --- .../prow-build-canary-cluster/README.md | 61 +++++- .../prow-build-canary-cluster/eks.tf | 2 +- .../prow-build-canary-cluster/kubernetes.tf | 178 ++++++++++-------- .../prow-build-canary-cluster/main.tf | 55 +++--- .../prow-build-canary-cluster/outputs.tf | 5 + .../prow-build-canary-cluster/variables.tf | 5 + 6 files changed, 191 insertions(+), 115 deletions(-) diff --git a/infra/aws/terraform/prow-build-canary-cluster/README.md b/infra/aws/terraform/prow-build-canary-cluster/README.md index 645d656ee39..f7a2e45da15 100644 --- a/infra/aws/terraform/prow-build-canary-cluster/README.md +++ b/infra/aws/terraform/prow-build-canary-cluster/README.md @@ -1,8 +1,53 @@ -## Differences - -cluster name changed -cluster admin role name changed -secrets-manager iam policy name changed -missing `prow.tf` file (no OIDC provider and iam role for prow) -different subnet setup -used smaller instance \ No newline at end of file +# Prow Build Canary Cluster + +This directory contains a mirror of scripts used for provisioning EKS prow-build-cluster. It is meant testing infrastructure/configuration changes before applying on production Prow cluster. + +Here are some differences compared to the production setup: +* cluster name, +* cluster admin IAM role, +* secrets-manager IAM policy, +* missing `prow.tf` (originally used for configuring prow permissions), +* subnet setup, +* instance type and autoscaling paramethers (mainly for saving), +* cluster contains only basic components without monitoring stack. + +## Provisioning Cluster + +Running installation from scratch is different than consecutive invocations of Terraform. First run creates a role that can be later assumed by other users. Becasue of that additional variable has to be set: + +```bash +terraform init +terraform plan -var="assume_role=false" +terraform apply -var="assume_role=false" +``` + +Once the infrastructure is provisioned, next step is RBAC setup: + +```bash +# fetch & update kubeconfig +aws eks update-kubeconfig --region us-east-2 --name prow-build-canary-cluster + +# create cluster role bindings +kubectl apply -f ./resources/rbac +``` + +Lastly, run Terraform script again without additinal variable. This time, it will implicitly assume previously created role and provision resources on top of EKS cluster. + +```bash +terraform apply +``` + +From here, all consecutive runs should be possible with command from above. + +## Removing cluster + +Same as for installation, cluster removal requires running Terraform twice. **IMPORTANT**: It's possible only for users with assigned `AdministratorAccess` policy. + +```bash +# First remove resources running on the cluster and IAM role. This fails once assumed role gets deleted. +terraform destroy + +# Clean up the rest. +terraform destroy -var="assume_role=false" +``` + diff --git a/infra/aws/terraform/prow-build-canary-cluster/eks.tf b/infra/aws/terraform/prow-build-canary-cluster/eks.tf index c0ab5aa0da9..97bbe05db00 100644 --- a/infra/aws/terraform/prow-build-canary-cluster/eks.tf +++ b/infra/aws/terraform/prow-build-canary-cluster/eks.tf @@ -32,7 +32,7 @@ module "eks" { # Configure aws-auth aws_auth_roles = [ - # Allow access to the Prow-Cluster-Admin IAM role (used with assume role with other IAM accounts). + # Allow access to the Prow-Canary-Cluster-Admin IAM role (used with assume role with other IAM accounts). { "rolearn" = aws_iam_role.iam_cluster_admin.arn "username" = "eks-cluster-admin" diff --git a/infra/aws/terraform/prow-build-canary-cluster/kubernetes.tf b/infra/aws/terraform/prow-build-canary-cluster/kubernetes.tf index e8fc1dff892..6b5e779612a 100644 --- a/infra/aws/terraform/prow-build-canary-cluster/kubernetes.tf +++ b/infra/aws/terraform/prow-build-canary-cluster/kubernetes.tf @@ -16,86 +16,98 @@ limitations under the License. # This file contains resources that require EKS to be running before terrafrom plan/apply. -# module "cluster_autoscaler" { -# source = "../prow-build-cluster/modules/cluster-autoscaler" -# providers = { -# kubernetes = kubernetes -# } - -# cluster_name = module.eks.cluster_name -# cluster_autoscaler_iam_role_arn = module.cluster_autoscaler_irsa.iam_role_arn -# cluster_autoscaler_version = var.cluster_autoscaler_version - -# depends_on = [ -# module.eks -# ] -# } - -# module "metrics_server" { -# source = "../prow-build-cluster/modules/metrics-server" -# providers = { -# kubernetes = kubernetes -# } - -# depends_on = [ -# module.eks -# ] -# } - -# # AWS Load Balancer Controller (ALB/NLB integration). -# resource "helm_release" "aws_lb_controller" { -# name = "aws-load-balancer-controller" -# namespace = "kube-system" -# repository = "https://aws.github.io/eks-charts" -# chart = "aws-load-balancer-controller" -# version = "1.4.8" - -# set { -# name = "clusterName" -# value = module.eks.cluster_name -# } - -# set { -# name = "serviceAccount.create" -# value = "true" -# } - -# set { -# name = "serviceAccount.name" -# value = "aws-load-balancer-controller" -# } - -# set { -# name = "serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn" -# value = module.aws_load_balancer_controller_irsa.iam_role_arn -# } - -# depends_on = [ -# module.eks -# ] -# } - -# # AWS Secrets Manager integration -# resource "helm_release" "secrets_store_csi_driver" { -# name = "secrets-store-csi-driver" -# namespace = "kube-system" -# repository = "https://kubernetes-sigs.github.io/secrets-store-csi-driver/charts" -# chart = "secrets-store-csi-driver" -# version = "1.3.1" - -# depends_on = [ -# module.eks -# ] -# } - -# resource "helm_release" "secrets_store_csi_driver_provider_aws" { -# name = "aws-secrets-manager" -# namespace = "kube-system" -# repository = "https://aws.github.io/secrets-store-csi-driver-provider-aws" -# chart = "secrets-store-csi-driver-provider-aws" -# version = "0.3.0" - -# depends_on = [ -# module.eks -# ] -# } +module "cluster_autoscaler" { + count = var.assume_role ? 1 : 0 + + source = "../prow-build-cluster/modules/cluster-autoscaler" + + providers = { + kubernetes = kubernetes + } + + cluster_name = module.eks.cluster_name + cluster_autoscaler_iam_role_arn = module.cluster_autoscaler_irsa.iam_role_arn + cluster_autoscaler_version = var.cluster_autoscaler_version + + depends_on = [ + module.eks + ] +} + +module "metrics_server" { + count = var.assume_role ? 1 : 0 + + source = "../prow-build-cluster/modules/metrics-server" + + providers = { + kubernetes = kubernetes + } + + depends_on = [ + module.eks + ] +} + +# AWS Load Balancer Controller (ALB/NLB integration). +resource "helm_release" "aws_lb_controller" { + count = var.assume_role ? 1 : 0 + + name = "aws-load-balancer-controller" + namespace = "kube-system" + repository = "https://aws.github.io/eks-charts" + chart = "aws-load-balancer-controller" + version = "1.4.8" + + set { + name = "clusterName" + value = module.eks.cluster_name + } + + set { + name = "serviceAccount.create" + value = "true" + } + + set { + name = "serviceAccount.name" + value = "aws-load-balancer-controller" + } + + set { + name = "serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn" + value = module.aws_load_balancer_controller_irsa.iam_role_arn + } + + depends_on = [ + module.eks + ] +} + +# AWS Secrets Manager integration +resource "helm_release" "secrets_store_csi_driver" { + count = var.assume_role ? 1 : 0 + + name = "secrets-store-csi-driver" + namespace = "kube-system" + repository = "https://kubernetes-sigs.github.io/secrets-store-csi-driver/charts" + chart = "secrets-store-csi-driver" + version = "1.3.1" + + depends_on = [ + module.eks + ] +} + +resource "helm_release" "secrets_store_csi_driver_provider_aws" { + count = var.assume_role ? 1 : 0 + + name = "aws-secrets-manager" + namespace = "kube-system" + repository = "https://aws.github.io/secrets-store-csi-driver-provider-aws" + chart = "secrets-store-csi-driver-provider-aws" + version = "0.3.0" + + depends_on = [ + module.eks + ] +} diff --git a/infra/aws/terraform/prow-build-canary-cluster/main.tf b/infra/aws/terraform/prow-build-canary-cluster/main.tf index 058db65e8df..e4ea4a611f5 100644 --- a/infra/aws/terraform/prow-build-canary-cluster/main.tf +++ b/infra/aws/terraform/prow-build-canary-cluster/main.tf @@ -18,15 +18,40 @@ limitations under the License. # INITIALIZATION ############################################### +locals { + root_account_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:root" + aws_cli_base_args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name] + aws_cli_args = var.assume_role != true ? local.aws_cli_base_args : concat( + local.aws_cli_base_args, ["--role-arn", aws_iam_role.iam_cluster_admin.arn] + ) + + tags = { + Cluster = var.cluster_name + } + auto_scaling_tags = { + "k8s.io/cluster-autoscaler/${var.cluster_name}" = "owned" + "k8s.io/cluster-autoscaler/enabled" = true + } + node_group_tags = merge(local.tags, local.auto_scaling_tags) + azs = slice(data.aws_availability_zones.available.names, 0, 3) +} + +data "aws_caller_identity" "current" {} +data "aws_availability_zones" "available" {} + provider "aws" { region = var.cluster_region # We have a chicken-egg problem here. This role is not going to exist - # when creating the cluster for the first time. In that case, this must - # be commented, than uncommented afterwards. - assume_role { - role_arn = "arn:aws:iam::468814281478:role/Prow-Canary-Cluster-Admin" - session_name = "prow-build-cluster-terraform" + # when creating the cluster for the first time. In that case, `assume_role` var + # has to be set to false. + dynamic "assume_role" { + for_each = var.assume_role ? [null] : [] + + content { + role_arn = "arn:aws:iam::468814281478:role/Prow-Canary-Cluster-Admin" + session_name = "prow-build-cluster-terraform" + } } } @@ -38,7 +63,7 @@ provider "kubernetes" { exec { api_version = "client.authentication.k8s.io/v1beta1" command = "aws" - args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name, "--role-arn", aws_iam_role.iam_cluster_admin.arn] + args = local.aws_cli_args } } @@ -51,24 +76,8 @@ provider "helm" { exec { api_version = "client.authentication.k8s.io/v1beta1" command = "aws" - args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name, "--role-arn", aws_iam_role.iam_cluster_admin.arn] + args = local.aws_cli_args } } } -data "aws_caller_identity" "current" {} -data "aws_availability_zones" "available" {} - -locals { - root_account_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:root" - - tags = { - Cluster = var.cluster_name - } - auto_scaling_tags = { - "k8s.io/cluster-autoscaler/${var.cluster_name}" = "owned" - "k8s.io/cluster-autoscaler/enabled" = true - } - node_group_tags = merge(local.tags, local.auto_scaling_tags) - azs = slice(data.aws_availability_zones.available.names, 0, 3) -} diff --git a/infra/aws/terraform/prow-build-canary-cluster/outputs.tf b/infra/aws/terraform/prow-build-canary-cluster/outputs.tf index fca4505f6c7..3adf010422d 100644 --- a/infra/aws/terraform/prow-build-canary-cluster/outputs.tf +++ b/infra/aws/terraform/prow-build-canary-cluster/outputs.tf @@ -18,3 +18,8 @@ output "cluster_endpoint" { description = "Endpoint for EKS control plane" value = module.eks.cluster_endpoint } + +output "eks_admin_role_arn" { + description = "ARN of EKS cluster admin role." + value = aws_iam_role.iam_cluster_admin.arn +} diff --git a/infra/aws/terraform/prow-build-canary-cluster/variables.tf b/infra/aws/terraform/prow-build-canary-cluster/variables.tf index 0ce8beb8ef2..d2a1ab4d55d 100644 --- a/infra/aws/terraform/prow-build-canary-cluster/variables.tf +++ b/infra/aws/terraform/prow-build-canary-cluster/variables.tf @@ -14,6 +14,11 @@ See the License for the specific language governing permissions and limitations under the License. */ +variable "assume_role" { + type = bool + default = true +} + variable "vpc_cidr" { type = string description = "CIDR of the VPC" From f8d56fe7422f45988e0279abcf9c7d49436c2864 Mon Sep 17 00:00:00 2001 From: Patryk Przekwas Date: Mon, 3 Apr 2023 12:55:07 +0200 Subject: [PATCH 04/12] Merge canary and prod eks scripts into one --- .../prow-build-canary-cluster/.gitignore | 27 --- .../prow-build-canary-cluster/OWNERS | 14 -- .../prow-build-canary-cluster/README.md | 53 ------ .../prow-build-canary-cluster/eks.tf | 154 ------------------ .../prow-build-canary-cluster/iam.tf | 54 ------ .../prow-build-canary-cluster/irsa.tf | 94 ----------- .../prow-build-canary-cluster/kubernetes.tf | 113 ------------- .../prow-build-canary-cluster/main.tf | 83 ---------- .../prow-build-canary-cluster/outputs.tf | 25 --- .../prow-build-canary-cluster/providers.tf | 40 ----- .../resources/rbac/cluster-admin-crb.yaml | 12 -- .../resources/rbac/prow-admin-crb.yaml | 12 -- .../secrets_manager.tf | 34 ---- .../prow-build-canary-cluster/variables.tf | 100 ------------ .../prow-build-canary-cluster/vpc.tf | 69 -------- .../.terraform.lock.hcl | 32 ++-- .../aws/terraform/prow-build-cluster/Makefile | 32 ++++ .../terraform/prow-build-cluster/README.md | 69 ++++++++ infra/aws/terraform/prow-build-cluster/eks.tf | 2 +- infra/aws/terraform/prow-build-cluster/iam.tf | 4 +- .../prow-build-cluster/kubernetes.tf | 10 ++ .../aws/terraform/prow-build-cluster/main.tf | 58 ++++--- .../terraform/prow-build-cluster/providers.tf | 7 +- .../aws/terraform/prow-build-cluster/prow.tf | 11 +- .../prow-build-cluster/secrets_manager.tf | 2 +- .../prow-build-cluster/terraform.tfvars | 38 ----- .../tfbackends/canary.tfbackend | 3 + .../tfbackends/prod.tfbackend | 3 + .../terraform/prow-build-cluster/variables.tf | 14 ++ 29 files changed, 195 insertions(+), 974 deletions(-) delete mode 100644 infra/aws/terraform/prow-build-canary-cluster/.gitignore delete mode 100644 infra/aws/terraform/prow-build-canary-cluster/OWNERS delete mode 100644 infra/aws/terraform/prow-build-canary-cluster/README.md delete mode 100644 infra/aws/terraform/prow-build-canary-cluster/eks.tf delete mode 100644 infra/aws/terraform/prow-build-canary-cluster/iam.tf delete mode 100644 infra/aws/terraform/prow-build-canary-cluster/irsa.tf delete mode 100644 infra/aws/terraform/prow-build-canary-cluster/kubernetes.tf delete mode 100644 infra/aws/terraform/prow-build-canary-cluster/main.tf delete mode 100644 infra/aws/terraform/prow-build-canary-cluster/outputs.tf delete mode 100644 infra/aws/terraform/prow-build-canary-cluster/providers.tf delete mode 100644 infra/aws/terraform/prow-build-canary-cluster/resources/rbac/cluster-admin-crb.yaml delete mode 100644 infra/aws/terraform/prow-build-canary-cluster/resources/rbac/prow-admin-crb.yaml delete mode 100644 infra/aws/terraform/prow-build-canary-cluster/secrets_manager.tf delete mode 100644 infra/aws/terraform/prow-build-canary-cluster/variables.tf delete mode 100644 infra/aws/terraform/prow-build-canary-cluster/vpc.tf rename infra/aws/terraform/{prow-build-canary-cluster => prow-build-cluster}/.terraform.lock.hcl (83%) create mode 100644 infra/aws/terraform/prow-build-cluster/Makefile create mode 100644 infra/aws/terraform/prow-build-cluster/README.md delete mode 100644 infra/aws/terraform/prow-build-cluster/terraform.tfvars create mode 100644 infra/aws/terraform/prow-build-cluster/tfbackends/canary.tfbackend create mode 100644 infra/aws/terraform/prow-build-cluster/tfbackends/prod.tfbackend diff --git a/infra/aws/terraform/prow-build-canary-cluster/.gitignore b/infra/aws/terraform/prow-build-canary-cluster/.gitignore deleted file mode 100644 index 6665869f80f..00000000000 --- a/infra/aws/terraform/prow-build-canary-cluster/.gitignore +++ /dev/null @@ -1,27 +0,0 @@ -# Local .terraform directories -**/.terraform/* - -# .tfstate files -*.tfstate -*.tfstate.* -*.tfplan - -# Crash log files -crash.log - -# Exclude all .tfvars files, which are likely to contain sentitive data, such as -# password, private keys, and other secrets. These should not be part of version -# control as they are data points which are potentially sensitive and subject -# to change depending on the environment. -*.tfvars - -# Ignore override files as they are usually used to override resources locally and so -# are not checked in -override.tf -override.tf.json -*_override.tf -*_override.tf.json - -# Ignore CLI configuration files -.terraformrc -terraform.rc diff --git a/infra/aws/terraform/prow-build-canary-cluster/OWNERS b/infra/aws/terraform/prow-build-canary-cluster/OWNERS deleted file mode 100644 index 43c1ca39668..00000000000 --- a/infra/aws/terraform/prow-build-canary-cluster/OWNERS +++ /dev/null @@ -1,14 +0,0 @@ -# See the OWNERS docs at https://go.k8s.io/owners - -filters: - ".*": - approvers: - - pkprzekwas - - xmudrii - labels: - - sig/k8s-infra - - area/infra - - area/infra/aws - "\\.sh$": - labels: - - area/bash diff --git a/infra/aws/terraform/prow-build-canary-cluster/README.md b/infra/aws/terraform/prow-build-canary-cluster/README.md deleted file mode 100644 index f7a2e45da15..00000000000 --- a/infra/aws/terraform/prow-build-canary-cluster/README.md +++ /dev/null @@ -1,53 +0,0 @@ -# Prow Build Canary Cluster - -This directory contains a mirror of scripts used for provisioning EKS prow-build-cluster. It is meant testing infrastructure/configuration changes before applying on production Prow cluster. - -Here are some differences compared to the production setup: -* cluster name, -* cluster admin IAM role, -* secrets-manager IAM policy, -* missing `prow.tf` (originally used for configuring prow permissions), -* subnet setup, -* instance type and autoscaling paramethers (mainly for saving), -* cluster contains only basic components without monitoring stack. - -## Provisioning Cluster - -Running installation from scratch is different than consecutive invocations of Terraform. First run creates a role that can be later assumed by other users. Becasue of that additional variable has to be set: - -```bash -terraform init -terraform plan -var="assume_role=false" -terraform apply -var="assume_role=false" -``` - -Once the infrastructure is provisioned, next step is RBAC setup: - -```bash -# fetch & update kubeconfig -aws eks update-kubeconfig --region us-east-2 --name prow-build-canary-cluster - -# create cluster role bindings -kubectl apply -f ./resources/rbac -``` - -Lastly, run Terraform script again without additinal variable. This time, it will implicitly assume previously created role and provision resources on top of EKS cluster. - -```bash -terraform apply -``` - -From here, all consecutive runs should be possible with command from above. - -## Removing cluster - -Same as for installation, cluster removal requires running Terraform twice. **IMPORTANT**: It's possible only for users with assigned `AdministratorAccess` policy. - -```bash -# First remove resources running on the cluster and IAM role. This fails once assumed role gets deleted. -terraform destroy - -# Clean up the rest. -terraform destroy -var="assume_role=false" -``` - diff --git a/infra/aws/terraform/prow-build-canary-cluster/eks.tf b/infra/aws/terraform/prow-build-canary-cluster/eks.tf deleted file mode 100644 index 97bbe05db00..00000000000 --- a/infra/aws/terraform/prow-build-canary-cluster/eks.tf +++ /dev/null @@ -1,154 +0,0 @@ -/* -Copyright 2023 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -############################################### -# EKS Cluster -############################################### - -module "eks" { - source = "terraform-aws-modules/eks/aws" - version = "19.10.0" - - # General cluster properties. - cluster_name = var.cluster_name - cluster_version = var.cluster_version - cluster_endpoint_public_access = true - - # Manage aws-auth ConfigMap. - manage_aws_auth_configmap = true - - # Configure aws-auth - aws_auth_roles = [ - # Allow access to the Prow-Canary-Cluster-Admin IAM role (used with assume role with other IAM accounts). - { - "rolearn" = aws_iam_role.iam_cluster_admin.arn - "username" = "eks-cluster-admin" - "groups" = [ - "eks-cluster-admin" - ] - }, - ] - # Allow EKS access to the root account. - aws_auth_users = [ - { - "userarn" = local.root_account_arn - "username" = "root" - "groups" = [ - "eks-cluster-admin" - ] - }, - ] - - # Allow access to the KMS key used for secrets encryption to the root account. - kms_key_administrators = [ - local.root_account_arn - ] - # Allow service access to the KMS key to the Prow-Cluster-Admin role. - kms_key_service_users = [ - aws_iam_role.iam_cluster_admin.arn - ] - - # We use IPv4 for the best compatibility with the existing setup. - # Additionally, Ubuntu EKS optimized AMI doesn't support IPv6 well. - cluster_ip_family = "ipv4" - - vpc_id = module.vpc.vpc_id - subnet_ids = module.vpc.private_subnets - control_plane_subnet_ids = module.vpc.intra_subnets - - cluster_addons = { - coredns = { - most_recent = true - } - kube-proxy = { - most_recent = true - } - vpc-cni = { - most_recent = true - service_account_role_arn = module.vpc_cni_irsa.iam_role_arn - } - aws-ebs-csi-driver = { - most_recent = true - service_account_role_arn = module.ebs_csi_irsa.iam_role_arn - } - } - - eks_managed_node_group_defaults = { - ami_id = var.node_ami - enable_bootstrap_user_data = true - instance_types = var.node_instance_types - - # We are using the IRSA created below for permissions - # However, we have to deploy with the policy attached FIRST (when creating a fresh cluster) - # and then turn this off after the cluster/node group is created. Without this initial policy, - # the VPC CNI fails to assign IPs and nodes cannot join the cluster - # See https://github.com/aws/containers-roadmap/issues/1666 for more context - iam_role_attach_cni_policy = false - } - - eks_managed_node_groups = { - # Build cluster node group. - build = { - name = "build-managed" - description = "EKS managed node group used for build nodes" - use_name_prefix = true - - subnet_ids = module.vpc.private_subnets - - min_size = var.node_min_size - max_size = var.node_max_size - desired_size = var.node_desired_size - - ami_id = var.node_ami - enable_bootstrap_user_data = true - - force_update_version = false - update_config = { - max_unavailable_percentage = var.node_max_unavailable_percentage - } - - pre_bootstrap_user_data = file("${path.module}/../prow-build-cluster/bootstrap/node_bootstrap.sh") - - capacity_type = "ON_DEMAND" - instance_types = var.node_instance_types - - ebs_optimized = true - enable_monitoring = true - - block_device_mappings = { - # This must be sda1 in order to match the root volume, - # otherwise a new volume is created. - sda1 = { - device_name = "/dev/sda1" - ebs = { - volume_size = var.node_volume_size - volume_type = "gp3" - iops = 16000 # Maximum for gp3 volume. - throughput = 1000 # Maximum for gp3 volume. - encrypted = false - delete_on_termination = true - } - } - } - - enclave_options = { - enabled = true - } - - tags = local.node_group_tags - } - } -} diff --git a/infra/aws/terraform/prow-build-canary-cluster/iam.tf b/infra/aws/terraform/prow-build-canary-cluster/iam.tf deleted file mode 100644 index 4896404b920..00000000000 --- a/infra/aws/terraform/prow-build-canary-cluster/iam.tf +++ /dev/null @@ -1,54 +0,0 @@ -/* -Copyright 2023 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -############################################### -# IAM access -############################################### - -data "aws_iam_user" "user_xmudrii" { - user_name = "xmudrii" -} -data "aws_iam_user" "user_pprzekwa" { - user_name = "pprzekwa" -} - -resource "aws_iam_role" "iam_cluster_admin" { - name = "Prow-Canary-Cluster-Admin" - description = "IAM role used to delegate access to prow-build-canary-cluster" - - assume_role_policy = jsonencode({ - Version = "2012-10-17" - Statement = [ - { - "Effect" : "Allow", - "Principal" : { - "AWS" : [ - data.aws_iam_user.user_xmudrii.arn, - data.aws_iam_user.user_pprzekwa.arn, - ] - }, - "Action" : "sts:AssumeRole", - "Condition" : {} - } - ] - }) -} - -# Give administrator access to the admin IAM role so it can be used with Terraform. -resource "aws_iam_role_policy_attachment" "iam_policy_cluster_admin" { - role = aws_iam_role.iam_cluster_admin.name - policy_arn = "arn:aws:iam::aws:policy/AdministratorAccess" -} diff --git a/infra/aws/terraform/prow-build-canary-cluster/irsa.tf b/infra/aws/terraform/prow-build-canary-cluster/irsa.tf deleted file mode 100644 index fb81fc9ba1c..00000000000 --- a/infra/aws/terraform/prow-build-canary-cluster/irsa.tf +++ /dev/null @@ -1,94 +0,0 @@ -/* -Copyright 2023 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -############################################### -# IAM -############################################### - -# IAM policy used for the AWS VPC CNI plugin. -module "vpc_cni_irsa" { - source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "~> 5.11" - - role_name_prefix = "VPC-CNI-IRSA" - attach_vpc_cni_policy = true - vpc_cni_enable_ipv4 = true - vpc_cni_enable_ipv6 = true - - oidc_providers = { - main = { - provider_arn = module.eks.oidc_provider_arn - namespace_service_accounts = ["kube-system:aws-node"] - } - } - - tags = local.tags -} - -# IAM policy used for the AWS EBS CSI driver plugin. -module "ebs_csi_irsa" { - source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "~> 5.11" - - role_name_prefix = "EBS-CSI-IRSA" - attach_ebs_csi_policy = true - - oidc_providers = { - main = { - provider_arn = module.eks.oidc_provider_arn - namespace_service_accounts = ["kube-system:ebs-csi-controller-sa"] - } - } - - tags = local.tags -} - -# IAM policy used for AWS Load Balancer Controller. -module "aws_load_balancer_controller_irsa" { - source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "~> 5.11" - - role_name_prefix = "LBCONTROLLER-IRSA" - attach_load_balancer_controller_policy = true - - oidc_providers = { - main = { - provider_arn = module.eks.oidc_provider_arn - namespace_service_accounts = ["kube-system:aws-load-balancer-controller"] - } - } - - tags = local.tags -} - -# IAM policy used for Cluster Autoscaler. -module "cluster_autoscaler_irsa" { - source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "~> 5.11" - - role_name_prefix = "AUTOSCALER-IRSA" - attach_cluster_autoscaler_policy = true - cluster_autoscaler_cluster_ids = [module.eks.cluster_name] - - oidc_providers = { - main = { - provider_arn = module.eks.oidc_provider_arn - namespace_service_accounts = ["kube-system:cluster-autoscaler"] - } - } - - tags = local.tags -} diff --git a/infra/aws/terraform/prow-build-canary-cluster/kubernetes.tf b/infra/aws/terraform/prow-build-canary-cluster/kubernetes.tf deleted file mode 100644 index 6b5e779612a..00000000000 --- a/infra/aws/terraform/prow-build-canary-cluster/kubernetes.tf +++ /dev/null @@ -1,113 +0,0 @@ -/* -Copyright 2023 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -# This file contains resources that require EKS to be running before terrafrom plan/apply. - -module "cluster_autoscaler" { - count = var.assume_role ? 1 : 0 - - source = "../prow-build-cluster/modules/cluster-autoscaler" - - providers = { - kubernetes = kubernetes - } - - cluster_name = module.eks.cluster_name - cluster_autoscaler_iam_role_arn = module.cluster_autoscaler_irsa.iam_role_arn - cluster_autoscaler_version = var.cluster_autoscaler_version - - depends_on = [ - module.eks - ] -} - -module "metrics_server" { - count = var.assume_role ? 1 : 0 - - source = "../prow-build-cluster/modules/metrics-server" - - providers = { - kubernetes = kubernetes - } - - depends_on = [ - module.eks - ] -} - -# AWS Load Balancer Controller (ALB/NLB integration). -resource "helm_release" "aws_lb_controller" { - count = var.assume_role ? 1 : 0 - - name = "aws-load-balancer-controller" - namespace = "kube-system" - repository = "https://aws.github.io/eks-charts" - chart = "aws-load-balancer-controller" - version = "1.4.8" - - set { - name = "clusterName" - value = module.eks.cluster_name - } - - set { - name = "serviceAccount.create" - value = "true" - } - - set { - name = "serviceAccount.name" - value = "aws-load-balancer-controller" - } - - set { - name = "serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn" - value = module.aws_load_balancer_controller_irsa.iam_role_arn - } - - depends_on = [ - module.eks - ] -} - -# AWS Secrets Manager integration -resource "helm_release" "secrets_store_csi_driver" { - count = var.assume_role ? 1 : 0 - - name = "secrets-store-csi-driver" - namespace = "kube-system" - repository = "https://kubernetes-sigs.github.io/secrets-store-csi-driver/charts" - chart = "secrets-store-csi-driver" - version = "1.3.1" - - depends_on = [ - module.eks - ] -} - -resource "helm_release" "secrets_store_csi_driver_provider_aws" { - count = var.assume_role ? 1 : 0 - - name = "aws-secrets-manager" - namespace = "kube-system" - repository = "https://aws.github.io/secrets-store-csi-driver-provider-aws" - chart = "secrets-store-csi-driver-provider-aws" - version = "0.3.0" - - depends_on = [ - module.eks - ] -} diff --git a/infra/aws/terraform/prow-build-canary-cluster/main.tf b/infra/aws/terraform/prow-build-canary-cluster/main.tf deleted file mode 100644 index e4ea4a611f5..00000000000 --- a/infra/aws/terraform/prow-build-canary-cluster/main.tf +++ /dev/null @@ -1,83 +0,0 @@ -/* -Copyright 2023 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -############################################### -# INITIALIZATION -############################################### - -locals { - root_account_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:root" - aws_cli_base_args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name] - aws_cli_args = var.assume_role != true ? local.aws_cli_base_args : concat( - local.aws_cli_base_args, ["--role-arn", aws_iam_role.iam_cluster_admin.arn] - ) - - tags = { - Cluster = var.cluster_name - } - auto_scaling_tags = { - "k8s.io/cluster-autoscaler/${var.cluster_name}" = "owned" - "k8s.io/cluster-autoscaler/enabled" = true - } - node_group_tags = merge(local.tags, local.auto_scaling_tags) - azs = slice(data.aws_availability_zones.available.names, 0, 3) -} - -data "aws_caller_identity" "current" {} -data "aws_availability_zones" "available" {} - -provider "aws" { - region = var.cluster_region - - # We have a chicken-egg problem here. This role is not going to exist - # when creating the cluster for the first time. In that case, `assume_role` var - # has to be set to false. - dynamic "assume_role" { - for_each = var.assume_role ? [null] : [] - - content { - role_arn = "arn:aws:iam::468814281478:role/Prow-Canary-Cluster-Admin" - session_name = "prow-build-cluster-terraform" - } - } -} - -provider "kubernetes" { - host = module.eks.cluster_endpoint - cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data) - - # This requires the awscli to be installed locally where Terraform is executed. - exec { - api_version = "client.authentication.k8s.io/v1beta1" - command = "aws" - args = local.aws_cli_args - } -} - -provider "helm" { - kubernetes { - host = module.eks.cluster_endpoint - cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data) - - # This requires the awscli to be installed locally where Terraform is executed. - exec { - api_version = "client.authentication.k8s.io/v1beta1" - command = "aws" - args = local.aws_cli_args - } - } -} - diff --git a/infra/aws/terraform/prow-build-canary-cluster/outputs.tf b/infra/aws/terraform/prow-build-canary-cluster/outputs.tf deleted file mode 100644 index 3adf010422d..00000000000 --- a/infra/aws/terraform/prow-build-canary-cluster/outputs.tf +++ /dev/null @@ -1,25 +0,0 @@ -/* -Copyright 2023 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -output "cluster_endpoint" { - description = "Endpoint for EKS control plane" - value = module.eks.cluster_endpoint -} - -output "eks_admin_role_arn" { - description = "ARN of EKS cluster admin role." - value = aws_iam_role.iam_cluster_admin.arn -} diff --git a/infra/aws/terraform/prow-build-canary-cluster/providers.tf b/infra/aws/terraform/prow-build-canary-cluster/providers.tf deleted file mode 100644 index 8536f3864c6..00000000000 --- a/infra/aws/terraform/prow-build-canary-cluster/providers.tf +++ /dev/null @@ -1,40 +0,0 @@ -/* -Copyright 2023 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -terraform { - backend "s3" { - bucket = "prow-build-cluster-tfstate" - key = "prow-build-canary-cluster/terraform.tfstate" - region = "us-east-2" - } - - required_version = "~> 1.3.0" - - required_providers { - aws = { - source = "hashicorp/aws" - version = ">= 4.47" - } - kubernetes = { - source = "hashicorp/kubernetes" - version = ">= 2.10" - } - helm = { - source = "hashicorp/helm" - version = "2.9.0" - } - } -} diff --git a/infra/aws/terraform/prow-build-canary-cluster/resources/rbac/cluster-admin-crb.yaml b/infra/aws/terraform/prow-build-canary-cluster/resources/rbac/cluster-admin-crb.yaml deleted file mode 100644 index e1caaf97e73..00000000000 --- a/infra/aws/terraform/prow-build-canary-cluster/resources/rbac/cluster-admin-crb.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: eks-cluster-admin -subjects: -- kind: Group - name: eks-cluster-admin - apiGroup: rbac.authorization.k8s.io -roleRef: - kind: ClusterRole - name: cluster-admin - apiGroup: rbac.authorization.k8s.io diff --git a/infra/aws/terraform/prow-build-canary-cluster/resources/rbac/prow-admin-crb.yaml b/infra/aws/terraform/prow-build-canary-cluster/resources/rbac/prow-admin-crb.yaml deleted file mode 100644 index 3d5232a8e93..00000000000 --- a/infra/aws/terraform/prow-build-canary-cluster/resources/rbac/prow-admin-crb.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: eks-prow-cluster-admin -subjects: -- kind: Group - name: eks-prow-cluster-admin - apiGroup: rbac.authorization.k8s.io -roleRef: - kind: ClusterRole - name: cluster-admin - apiGroup: rbac.authorization.k8s.io diff --git a/infra/aws/terraform/prow-build-canary-cluster/secrets_manager.tf b/infra/aws/terraform/prow-build-canary-cluster/secrets_manager.tf deleted file mode 100644 index 9a3b2176eb2..00000000000 --- a/infra/aws/terraform/prow-build-canary-cluster/secrets_manager.tf +++ /dev/null @@ -1,34 +0,0 @@ -/* -Copyright 2023 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -data "aws_iam_policy_document" "secretsmanager_read" { - statement { - sid = "" - effect = "Allow" - resources = ["*"] - - actions = [ - "secretsmanager:GetSecretValue", - "secretsmanager:DescribeSecret", - ] - } -} - -resource "aws_iam_policy" "secretsmanager_read" { - name = "canary-secretsmanager_read" - path = "/" - policy = data.aws_iam_policy_document.secretsmanager_read.json -} diff --git a/infra/aws/terraform/prow-build-canary-cluster/variables.tf b/infra/aws/terraform/prow-build-canary-cluster/variables.tf deleted file mode 100644 index d2a1ab4d55d..00000000000 --- a/infra/aws/terraform/prow-build-canary-cluster/variables.tf +++ /dev/null @@ -1,100 +0,0 @@ -/* -Copyright 2023 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -variable "assume_role" { - type = bool - default = true -} - -variable "vpc_cidr" { - type = string - description = "CIDR of the VPC" -} - -variable "vpc_secondary_cidr_blocks" { - type = list(string) - description = "Additional CIDRs to attach to the VPC" -} - -variable "vpc_public_subnet" { - type = list(string) - description = "Public subnets (one per AZ)" -} - -variable "vpc_private_subnet" { - type = list(string) - description = "Private subnets (one per AZ)" -} - -variable "vpc_intra_subnet" { - type = list(string) - description = "Intra subnets (one per AZ, subnet without access to external services)" -} - -variable "cluster_name" { - type = string - description = "Name of the EKS cluster" -} - -variable "cluster_region" { - type = string - description = "AWS region of the EKS cluster" -} - -variable "cluster_version" { - type = string - description = "Kubernetes version of the EKS cluster" -} - -variable "node_ami" { - type = string - description = "EKS optimized AMI to be used for Node groups" -} - -variable "node_instance_types" { - type = list(string) - description = "Instance sizes to use for EKS node group" -} - -variable "node_volume_size" { - type = number - description = "Volume size per node to use for EKS node group" -} - -variable "node_min_size" { - type = number - description = "Minimum number of nodes in the EKS node group" -} - -variable "node_max_size" { - type = number - description = "Maximum number of nodes in the EKS node group" -} - -variable "node_desired_size" { - type = number - description = "Desired number of nodes in the EKS node group" -} - -variable "node_max_unavailable_percentage" { - type = number - description = "Maximum unavailable nodes in a node group" -} - -variable "cluster_autoscaler_version" { - type = string - description = "Cluster Autoscaler version to use (must match the EKS version)" -} diff --git a/infra/aws/terraform/prow-build-canary-cluster/vpc.tf b/infra/aws/terraform/prow-build-canary-cluster/vpc.tf deleted file mode 100644 index 97be7cd6ac4..00000000000 --- a/infra/aws/terraform/prow-build-canary-cluster/vpc.tf +++ /dev/null @@ -1,69 +0,0 @@ -/* -Copyright 2023 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -############################################### -# VPC -############################################### - -# VPC is IPv4/IPv6 Dual-Stack, but our cluster is IPv4 because EKS doesn't -# support dual-stack yet. - -module "vpc" { - source = "terraform-aws-modules/vpc/aws" - version = "~> 3.0" - - name = "${var.cluster_name}-vpc" - - cidr = var.vpc_cidr - secondary_cidr_blocks = var.vpc_secondary_cidr_blocks - - azs = local.azs - private_subnets = var.vpc_private_subnet - public_subnets = var.vpc_public_subnet - - # intra_subnets are private subnets without the internet access - # (https://registry.terraform.io/modules/terraform-aws-modules/vpc/aws/latest#private-versus-intra-subnets) - intra_subnets = var.vpc_intra_subnet - - # Enable IPv6 for this subnet. - enable_ipv6 = true - assign_ipv6_address_on_creation = true - create_egress_only_igw = true - - # Used for calculating IPv6 CIDR based on the following formula: - # cidrsubnet(aws_vpc.this[0].ipv6_cidr_block, 8, var.private_subnet_ipv6_prefixes[count.index]) - private_subnet_ipv6_prefixes = [0, 1, 2] - public_subnet_ipv6_prefixes = [3, 4, 5] - intra_subnet_ipv6_prefixes = [6, 7, 8] - - # NAT Gateway allows connection to external services (e.g. Internet). - enable_nat_gateway = true - single_nat_gateway = true - enable_dns_hostnames = true - - # Tags to allow ELB (Elastic Load Balancing). - public_subnet_tags = { - "kubernetes.io/role/elb" = 1 - "kubernetes.io/cluster/${var.cluster_name}" = "owned" - } - - private_subnet_tags = { - "kubernetes.io/role/internal-elb" = 1 - "kubernetes.io/cluster/${var.cluster_name}" = "owned" - } - - tags = local.tags -} diff --git a/infra/aws/terraform/prow-build-canary-cluster/.terraform.lock.hcl b/infra/aws/terraform/prow-build-cluster/.terraform.lock.hcl similarity index 83% rename from infra/aws/terraform/prow-build-canary-cluster/.terraform.lock.hcl rename to infra/aws/terraform/prow-build-cluster/.terraform.lock.hcl index 9377953c4b0..e0ae040c28e 100644 --- a/infra/aws/terraform/prow-build-canary-cluster/.terraform.lock.hcl +++ b/infra/aws/terraform/prow-build-cluster/.terraform.lock.hcl @@ -2,25 +2,25 @@ # Manual edits may be lost in future updates. provider "registry.terraform.io/hashicorp/aws" { - version = "4.60.0" + version = "4.61.0" constraints = ">= 3.72.0, >= 3.73.0, >= 4.0.0, >= 4.47.0" hashes = [ - "h1:XxVhnhtrRW3YueabP668hVZ3qL4th7pcWbx+ot/l864=", - "zh:1853d6bc89e289ac36c13485e8ff877c1be8485e22f545bb32c7a30f1d1856e8", - "zh:4321d145969e3b7ede62fe51bee248a15fe398643f21df9541eef85526bf3641", - "zh:4c01189cc6963abfe724e6b289a7c06d2de9c395011d8d54efa8fe1aac444e2e", - "zh:5934db7baa2eec0f9acb9c7f1c3dd3b3fe1e67e23dd4a49e9fe327832967b32b", - "zh:5fbedf5d55c6e04e34c32b744151e514a80308e7dec633a56b852829b41e4b5a", - "zh:651558e1446cc05061b75e6f5cc6e2959feb17615cd0ace6ec7a2bcc846321c0", - "zh:76875eb697916475e554af080f9d4d3cd1f7d5d58ecdd3317a844a30980f4eec", + "h1:qyBawxoNN6EpiiX5h5ZG5P2dHsBeA5Z67xESl2c1HRk=", + "zh:051e2588410b7448a5c4c30d668948dd6fdfa8037700bfc00fb228986ccbf3a5", + "zh:082fbcf9706b48d0880ba552a11c29527e228dadd6d83668d0789abda24e5922", + "zh:0e0e72f214fb24f4f9c601cab088a2d8e00ec3327c451bc753911951d773214a", + "zh:3af6d38ca733ca66cce15c6a5735ded7c18348ad26040ebd9a59778b2cd9cf6c", + "zh:404898bc2258bbb9527fa06c72cb927ca011fd9bc3f4b90931c0912652c3f9e9", + "zh:4f617653b0f17a7708bc896f029c4ab0b677a1a1c987bd77166acad1d82db469", + "zh:5dbe393355ac137aa3fd329e3d24871f27012d3ba93d714485b55820df240349", + "zh:6067c2127eb5c879227aca671f101de6dcba909d0d8d15d5711480351962a248", "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", - "zh:a52528e6d6c945a6ac45b89e9a70a5435148e4c151241e04c231dd2acc4a8c80", - "zh:af5f94c69025f1c2466a3cf970d1e9bed72938ec33b976c8c067468b6707bb57", - "zh:b6692fad956c9d4ef4266519d9ac2ee9f699f8f2c21627625c9ed63814d41590", - "zh:b74311af5fa5ac6e4eb159c12cfb380dfe2f5cd8685da2eac8073475f398ae60", - "zh:cc5aa6f738baa42edacba5ef1ca0969e5a959422e4491607255f3f6142ba90ed", - "zh:dd1a7ff1b22f0036a76bc905a8229ce7ed0a7eb5a783d3a2586fb1bd920515c3", - "zh:e5ab40c4ad0f1c7bd4d5d834d1aa144e690d1a93329d73b3d37512715a638de9", + "zh:a939f94461f91aa3b7ec7096271e2714309bd917fe9a03e02f68afb556d65e0f", + "zh:b21227b9082e5fafe8b7c415dc6a99c0d82da05492457377a5fe7d4acaed80e2", + "zh:b8d9f09ed5fc8c654b768b7bee1237eaf1e2287c898249e740695055fb0fe072", + "zh:d360e1e185b148ff6b1d0ed4f7d574e08f2391697ab43df62085b04a1a5b1284", + "zh:da962da17ddda744911cb1e92b983fa3874d73a28f3ee72faa9ddb6680a63774", + "zh:e2f1c4f5ebeb4fd7ef690178168a4c529025b54a91bb7a087dcea48e0b82737a", ] } diff --git a/infra/aws/terraform/prow-build-cluster/Makefile b/infra/aws/terraform/prow-build-cluster/Makefile new file mode 100644 index 00000000000..391e37fb54a --- /dev/null +++ b/infra/aws/terraform/prow-build-cluster/Makefile @@ -0,0 +1,32 @@ +TF ?= terraform +ASSUME_ROLE ?= true + +# Valid values are: canary, prod +PROW_CLUSTER ?= canary + +.PHONY: init +init: + $(TF) init \ + -backend-config=./tfbackends/$(PROW_CLUSTER).tfbackend + +.PHONY: plan +plan: + $(TF) plan \ + -var-file=./terraform.$(PROW_CLUSTER).tfvars \ + -var="assume_role=$(ASSUME_ROLE)" + +.PHONY: apply +apply: + $(TF) apply \ + -var-file=./terraform.$(PROW_CLUSTER).tfvars \ + -var="assume_role=$(ASSUME_ROLE)" + +.PHONY: destroy +destory: + $(TF) destroy \ + -var-file=./terraform.$(PROW_CLUSTER).tfvars \ + -var="assume_role=$(ASSUME_ROLE)" + +.PHONY: clean +clean: + rm -rf ./.terraform diff --git a/infra/aws/terraform/prow-build-cluster/README.md b/infra/aws/terraform/prow-build-cluster/README.md new file mode 100644 index 00000000000..69bef7f12dc --- /dev/null +++ b/infra/aws/terraform/prow-build-cluster/README.md @@ -0,0 +1,69 @@ +# Provisioninig EKS clusters + +## Prod vs Canary + +These scripts support provisioning two types of EKS clusters. One is meant for hosting prow jobs +on production and the other one is for testing infrastructure changes before promoting them to +production. + +Here are some differences between canary and production setups: +* cluster name, +* cluster admin IAM role name, +* secrets-manager IAM policy name, +* canary is missing k8s prow OIDC provider and corresponding role, +* subnet setup is different, +* instance type and autoscaling paramethers (mainly for saving), + +## Provisioning Cluster + +Running installation from scratch is different than consecutive invocations of Terraform. +First run creates a role that can be later assumed by other users. Becasue of that additional +variable has to be set: + +```bash +# For provisioning Prod: +export PROW_CLUSTER=prod +# For provisioning Canary: +export PROW_CLUSTER=canary + +# Just making sure we don't have state cached locally. +make clean + +ASSUME_ROLE=false make init +ASSUME_ROLE=false make apply +``` + +Once the infrastructure is provisioned, next step is RBAC setup: + +```bash +# Fetch & update kubeconfig. +# For Prod: +aws eks update-kubeconfig --region us-east-2 --name prow-build-cluster +# For Canary: +aws eks update-kubeconfig --region us-east-2 --name prow-build-canary-cluster + +# create cluster role bindings +kubectl apply -f ./resources/rbac +``` + +Lastly, run Terraform script again without additinal variable. This time, it will implicitly assume +previously created role and provision resources on top of EKS cluster. + +```bash +make apply +``` + +From here, all consecutive runs should be possible with command from above. + +## Removing cluster + +Same as for installation, cluster removal requires running Terraform twice. +**IMPORTANT**: It's possible only for users with assigned `AdministratorAccess` policy. + +```bash +# First remove resources running on the cluster and IAM role. This fails once assumed role gets deleted. +make destroy + +# Clean up the rest. +ASSUME_ROLE=false make destroy +``` diff --git a/infra/aws/terraform/prow-build-cluster/eks.tf b/infra/aws/terraform/prow-build-cluster/eks.tf index 7da650d1dd7..b82358c154a 100644 --- a/infra/aws/terraform/prow-build-cluster/eks.tf +++ b/infra/aws/terraform/prow-build-cluster/eks.tf @@ -34,7 +34,7 @@ module "eks" { aws_auth_roles = [ # Allow access to the Prow-EKS-Admin IAM role (used by Prow directly). { - "rolearn" = aws_iam_role.eks_admin.arn + "rolearn" = aws_iam_role.eks_admin[0].arn "username" = "eks-admin" "groups" = [ "eks-prow-cluster-admin" diff --git a/infra/aws/terraform/prow-build-cluster/iam.tf b/infra/aws/terraform/prow-build-cluster/iam.tf index 0d8d9737120..005237d9cdc 100644 --- a/infra/aws/terraform/prow-build-cluster/iam.tf +++ b/infra/aws/terraform/prow-build-cluster/iam.tf @@ -26,8 +26,8 @@ data "aws_iam_user" "user_pprzekwa" { } resource "aws_iam_role" "iam_cluster_admin" { - name = "Prow-Cluster-Admin" - description = "IAM role used to delegate access to prow-build-cluster" + name = "${local.canary_prefix}Prow-Cluster-Admin" + description = "IAM role used to delegate access to ${local.canary_prefix}prow-build-cluster" assume_role_policy = jsonencode({ Version = "2012-10-17" diff --git a/infra/aws/terraform/prow-build-cluster/kubernetes.tf b/infra/aws/terraform/prow-build-cluster/kubernetes.tf index c406e03bb78..02ffd589e69 100644 --- a/infra/aws/terraform/prow-build-cluster/kubernetes.tf +++ b/infra/aws/terraform/prow-build-cluster/kubernetes.tf @@ -15,6 +15,8 @@ limitations under the License. */ module "cluster_autoscaler" { + count = var.assume_role ? 1 : 0 + source = "./modules/cluster-autoscaler" providers = { kubernetes = kubernetes @@ -30,6 +32,8 @@ module "cluster_autoscaler" { } module "metrics_server" { + count = var.assume_role ? 1 : 0 + source = "./modules/metrics-server" providers = { kubernetes = kubernetes @@ -42,6 +46,8 @@ module "metrics_server" { # AWS Load Balancer Controller (ALB/NLB integration). resource "helm_release" "aws_lb_controller" { + count = var.assume_role ? 1 : 0 + name = "aws-load-balancer-controller" namespace = "kube-system" repository = "https://aws.github.io/eks-charts" @@ -75,6 +81,8 @@ resource "helm_release" "aws_lb_controller" { # AWS Secrets Manager integration resource "helm_release" "secrets_store_csi_driver" { + count = var.assume_role ? 1 : 0 + name = "secrets-store-csi-driver" namespace = "kube-system" repository = "https://kubernetes-sigs.github.io/secrets-store-csi-driver/charts" @@ -87,6 +95,8 @@ resource "helm_release" "secrets_store_csi_driver" { } resource "helm_release" "secrets_store_csi_driver_provider_aws" { + count = var.assume_role ? 1 : 0 + name = "aws-secrets-manager" namespace = "kube-system" repository = "https://aws.github.io/secrets-store-csi-driver-provider-aws" diff --git a/infra/aws/terraform/prow-build-cluster/main.tf b/infra/aws/terraform/prow-build-cluster/main.tf index e53da7d19ec..b7aaf9e6940 100644 --- a/infra/aws/terraform/prow-build-cluster/main.tf +++ b/infra/aws/terraform/prow-build-cluster/main.tf @@ -18,15 +18,42 @@ limitations under the License. # INITIALIZATION ############################################### +data "aws_caller_identity" "current" {} +data "aws_availability_zones" "available" {} + +locals { + canary_prefix = var.is_canary_installation ? "canary-" : "" + + root_account_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:root" + aws_cli_base_args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name] + aws_cli_args = var.assume_role != true ? local.aws_cli_base_args : concat( + local.aws_cli_base_args, ["--role-arn", aws_iam_role.iam_cluster_admin.arn] + ) + + tags = { + Cluster = var.cluster_name + } + auto_scaling_tags = { + "k8s.io/cluster-autoscaler/${var.cluster_name}" = "owned" + "k8s.io/cluster-autoscaler/enabled" = true + } + node_group_tags = merge(local.tags, local.auto_scaling_tags) + azs = slice(data.aws_availability_zones.available.names, 0, 3) +} + provider "aws" { region = var.cluster_region # We have a chicken-egg problem here. This role is not going to exist - # when creating the cluster for the first time. In that case, this must - # be commented, than uncommented afterwards. - assume_role { - role_arn = "arn:aws:iam::468814281478:role/Prow-Cluster-Admin" - session_name = "prow-build-cluster-terraform" + # when creating the cluster for the first time. In that case, `assume_role` var + # has to be set to false. + dynamic "assume_role" { + for_each = var.assume_role ? [null] : [] + + content { + role_arn = "arn:aws:iam::468814281478:role/${local.canary_prefix}Prow-Cluster-Admin" + session_name = "prow-build-cluster-terraform" + } } } @@ -38,7 +65,7 @@ provider "kubernetes" { exec { api_version = "client.authentication.k8s.io/v1beta1" command = "aws" - args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name, "--role-arn", aws_iam_role.iam_cluster_admin.arn] + args = local.aws_cli_args } } @@ -51,24 +78,7 @@ provider "helm" { exec { api_version = "client.authentication.k8s.io/v1beta1" command = "aws" - args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name, "--role-arn", aws_iam_role.iam_cluster_admin.arn] + args = local.aws_cli_args } } } - -data "aws_caller_identity" "current" {} -data "aws_availability_zones" "available" {} - -locals { - root_account_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:root" - - tags = { - Cluster = var.cluster_name - } - auto_scaling_tags = { - "k8s.io/cluster-autoscaler/${var.cluster_name}" = "owned" - "k8s.io/cluster-autoscaler/enabled" = true - } - node_group_tags = merge(local.tags, local.auto_scaling_tags) - azs = slice(data.aws_availability_zones.available.names, 0, 3) -} diff --git a/infra/aws/terraform/prow-build-cluster/providers.tf b/infra/aws/terraform/prow-build-cluster/providers.tf index 2655ad7561f..226bf41f8f0 100644 --- a/infra/aws/terraform/prow-build-cluster/providers.tf +++ b/infra/aws/terraform/prow-build-cluster/providers.tf @@ -15,11 +15,8 @@ limitations under the License. */ terraform { - backend "s3" { - bucket = "prow-build-cluster-tfstate" - key = "terraform.tfstate" - region = "us-east-2" - } + # Backend conifguration lives inside *.tfbackend files. + backend "s3" {} required_version = "~> 1.3.0" diff --git a/infra/aws/terraform/prow-build-cluster/prow.tf b/infra/aws/terraform/prow-build-cluster/prow.tf index 6b78d9c9474..6b897bd5e7e 100644 --- a/infra/aws/terraform/prow-build-cluster/prow.tf +++ b/infra/aws/terraform/prow-build-cluster/prow.tf @@ -14,11 +14,13 @@ See the License for the specific language governing permissions and limitations under the License. */ -# This IAM configuration allows Prow GKE Clusters to assume a role on AWS - +# This IAM configuration allows Prow GKE Clusters to assume a role on AWS. +# Provisioning those resources for canary installation is skipped. # Recognize federated identities from the prow trusted cluster resource "aws_iam_openid_connect_provider" "k8s_prow" { + count = var.is_canary_installation ? 0 : 1 + url = "https://container.googleapis.com/v1/projects/k8s-prow/locations/us-central1-f/clusters/prow" client_id_list = ["sts.amazonaws.com"] thumbprint_list = ["08745487e891c19e3078c1f2a07e452950ef36f6"] @@ -26,14 +28,17 @@ resource "aws_iam_openid_connect_provider" "k8s_prow" { # We allow Prow Pods with specific service acccounts on the a particular cluster to assume this role resource "aws_iam_role" "eks_admin" { + count = var.is_canary_installation ? 0 : 1 + name = "Prow-EKS-Admin" + assume_role_policy = jsonencode({ Version = "2012-10-17" Statement = [ { "Effect" : "Allow", "Principal" : { - "Federated" : aws_iam_openid_connect_provider.k8s_prow.arn + "Federated" : aws_iam_openid_connect_provider.k8s_prow[0].arn }, "Action" : "sts:AssumeRoleWithWebIdentity", "Condition" : { diff --git a/infra/aws/terraform/prow-build-cluster/secrets_manager.tf b/infra/aws/terraform/prow-build-cluster/secrets_manager.tf index 1772b7ef6c7..cdc482acf19 100644 --- a/infra/aws/terraform/prow-build-cluster/secrets_manager.tf +++ b/infra/aws/terraform/prow-build-cluster/secrets_manager.tf @@ -28,7 +28,7 @@ data "aws_iam_policy_document" "secretsmanager_read" { } resource "aws_iam_policy" "secretsmanager_read" { - name = "secretsmanager_read" + name = "${local.canary_prefix}secretsmanager_read" path = "/" policy = data.aws_iam_policy_document.secretsmanager_read.json } diff --git a/infra/aws/terraform/prow-build-cluster/terraform.tfvars b/infra/aws/terraform/prow-build-cluster/terraform.tfvars deleted file mode 100644 index 20c5879c0f4..00000000000 --- a/infra/aws/terraform/prow-build-cluster/terraform.tfvars +++ /dev/null @@ -1,38 +0,0 @@ -/* -Copyright 2023 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -cluster_name = "prow-build-cluster" -cluster_region = "us-east-2" -cluster_version = "1.25" - -vpc_cidr = "10.0.0.0/16" -vpc_secondary_cidr_blocks = ["10.1.0.0/16", "10.2.0.0/16"] -vpc_public_subnet = ["10.0.0.0/18", "10.0.64.0/18", "10.0.128.0/18"] -vpc_private_subnet = ["10.1.0.0/18", "10.1.64.0/18", "10.1.128.0/18"] -vpc_intra_subnet = ["10.2.0.0/18", "10.2.64.0/18", "10.2.128.0/18"] - -# Ubuntu EKS optimized AMI: https://cloud-images.ubuntu.com/aws-eks/ -node_ami = "ami-03de35fda144b3672" -node_instance_types = ["r5d.4xlarge"] -node_volume_size = 100 - -# TODO(xmudrii): Increase this later. -node_min_size = 20 -node_max_size = 40 -node_desired_size = 20 -node_max_unavailable_percentage = 100 # To ease testing - -cluster_autoscaler_version = "v1.25.0" diff --git a/infra/aws/terraform/prow-build-cluster/tfbackends/canary.tfbackend b/infra/aws/terraform/prow-build-cluster/tfbackends/canary.tfbackend new file mode 100644 index 00000000000..b83e94f0afa --- /dev/null +++ b/infra/aws/terraform/prow-build-cluster/tfbackends/canary.tfbackend @@ -0,0 +1,3 @@ +bucket = "prow-build-cluster-tfstate" +key = "prow-build-canary-cluster/terraform.tfstate" +region = "us-east-2" \ No newline at end of file diff --git a/infra/aws/terraform/prow-build-cluster/tfbackends/prod.tfbackend b/infra/aws/terraform/prow-build-cluster/tfbackends/prod.tfbackend new file mode 100644 index 00000000000..45d4c0e0cbb --- /dev/null +++ b/infra/aws/terraform/prow-build-cluster/tfbackends/prod.tfbackend @@ -0,0 +1,3 @@ +bucket = "prow-build-cluster-tfstate" +key = "terraform.tfstate" +region = "us-east-2" \ No newline at end of file diff --git a/infra/aws/terraform/prow-build-cluster/variables.tf b/infra/aws/terraform/prow-build-cluster/variables.tf index 0ce8beb8ef2..431fe09f6e7 100644 --- a/infra/aws/terraform/prow-build-cluster/variables.tf +++ b/infra/aws/terraform/prow-build-cluster/variables.tf @@ -14,6 +14,20 @@ See the License for the specific language governing permissions and limitations under the License. */ +# This variable is required in the installation process as we cannot +# assume a role that is yet to be created. +variable "assume_role" { + type = bool + description = "Assumes role to get access to EKS cluster after provisioning." + default = true +} + +variable "is_canary_installation" { + type = bool + description = "If set, scripts provision canary cluster instead of production." + default = false +} + variable "vpc_cidr" { type = string description = "CIDR of the VPC" From 8de48421c3d5b84b554b8664e5d5139f4fdc3653 Mon Sep 17 00:00:00 2001 From: Patryk Przekwas Date: Mon, 3 Apr 2023 13:05:23 +0200 Subject: [PATCH 05/12] Update license --- infra/aws/terraform/prow-build-cluster/Makefile | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/infra/aws/terraform/prow-build-cluster/Makefile b/infra/aws/terraform/prow-build-cluster/Makefile index 391e37fb54a..19be6a72458 100644 --- a/infra/aws/terraform/prow-build-cluster/Makefile +++ b/infra/aws/terraform/prow-build-cluster/Makefile @@ -1,3 +1,17 @@ +# Copyright 2023 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + TF ?= terraform ASSUME_ROLE ?= true From 3d3847a92d5f79f4bf7934dd3a6026fbe09f9ee8 Mon Sep 17 00:00:00 2001 From: Patryk Przekwas Date: Mon, 3 Apr 2023 13:17:55 +0200 Subject: [PATCH 06/12] Push tfvars --- .../terraform.canary.tfvars | 40 +++++++++++++++++++ .../prow-build-cluster/terraform.prod.tfvars | 38 ++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 infra/aws/terraform/prow-build-cluster/terraform.canary.tfvars create mode 100644 infra/aws/terraform/prow-build-cluster/terraform.prod.tfvars diff --git a/infra/aws/terraform/prow-build-cluster/terraform.canary.tfvars b/infra/aws/terraform/prow-build-cluster/terraform.canary.tfvars new file mode 100644 index 00000000000..6399e995eb0 --- /dev/null +++ b/infra/aws/terraform/prow-build-cluster/terraform.canary.tfvars @@ -0,0 +1,40 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +assume_role = true +is_canary_installation = true + +cluster_name = "prow-build-canary-cluster" +cluster_region = "us-east-2" +cluster_version = "1.25" + +vpc_cidr = "10.3.0.0/16" +vpc_secondary_cidr_blocks = ["10.4.0.0/16", "10.5.0.0/16"] +vpc_public_subnet = ["10.3.0.0/18", "10.3.64.0/18", "10.3.128.0/18"] +vpc_private_subnet = ["10.4.0.0/18", "10.4.64.0/18", "10.4.128.0/18"] +vpc_intra_subnet = ["10.5.0.0/18", "10.5.64.0/18", "10.5.128.0/18"] + +# Ubuntu EKS optimized AMI: https://cloud-images.ubuntu.com/aws-eks/ +node_ami = "ami-03de35fda144b3672" +node_instance_types = ["r5d.xlarge"] +node_volume_size = 100 + +node_min_size = 1 +node_max_size = 10 +node_desired_size = 1 +node_max_unavailable_percentage = 100 # To ease testing + +cluster_autoscaler_version = "v1.25.0" diff --git a/infra/aws/terraform/prow-build-cluster/terraform.prod.tfvars b/infra/aws/terraform/prow-build-cluster/terraform.prod.tfvars new file mode 100644 index 00000000000..20c5879c0f4 --- /dev/null +++ b/infra/aws/terraform/prow-build-cluster/terraform.prod.tfvars @@ -0,0 +1,38 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +cluster_name = "prow-build-cluster" +cluster_region = "us-east-2" +cluster_version = "1.25" + +vpc_cidr = "10.0.0.0/16" +vpc_secondary_cidr_blocks = ["10.1.0.0/16", "10.2.0.0/16"] +vpc_public_subnet = ["10.0.0.0/18", "10.0.64.0/18", "10.0.128.0/18"] +vpc_private_subnet = ["10.1.0.0/18", "10.1.64.0/18", "10.1.128.0/18"] +vpc_intra_subnet = ["10.2.0.0/18", "10.2.64.0/18", "10.2.128.0/18"] + +# Ubuntu EKS optimized AMI: https://cloud-images.ubuntu.com/aws-eks/ +node_ami = "ami-03de35fda144b3672" +node_instance_types = ["r5d.4xlarge"] +node_volume_size = 100 + +# TODO(xmudrii): Increase this later. +node_min_size = 20 +node_max_size = 40 +node_desired_size = 20 +node_max_unavailable_percentage = 100 # To ease testing + +cluster_autoscaler_version = "v1.25.0" From 7dcddaa7f9a54c54b9715dd8817afd77559fc1e4 Mon Sep 17 00:00:00 2001 From: Patryk Przekwas Date: Mon, 3 Apr 2023 13:36:16 +0200 Subject: [PATCH 07/12] Fix prow role listing issue --- infra/aws/terraform/prow-build-cluster/eks.tf | 54 +++++++++---------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/infra/aws/terraform/prow-build-cluster/eks.tf b/infra/aws/terraform/prow-build-cluster/eks.tf index b82358c154a..10ebc0054a7 100644 --- a/infra/aws/terraform/prow-build-cluster/eks.tf +++ b/infra/aws/terraform/prow-build-cluster/eks.tf @@ -18,6 +18,31 @@ limitations under the License. # EKS Cluster ############################################### +locals { + aws_auth_roles_base = [ + # Allow access to the Prow-Cluster-Admin IAM role (used with assume role with other IAM accounts). + { + "rolearn" = aws_iam_role.iam_cluster_admin.arn + "username" = "eks-cluster-admin" + "groups" = [ + "eks-cluster-admin" + ] + }, + ] + + aws_auth_roles = var.is_canary_installation ? local.aws_auth_roles_base : concat( + local.aws_auth_roles_base, [ + # Allow access to the Prow-EKS-Admin IAM role (used by Prow directly). + { + "rolearn" = aws_iam_role.eks_admin[0].arn + "username" = "eks-admin" + "groups" = [ + "eks-prow-cluster-admin" + ] + } + ]) +} + module "eks" { source = "terraform-aws-modules/eks/aws" version = "19.10.0" @@ -31,34 +56,7 @@ module "eks" { manage_aws_auth_configmap = true # Configure aws-auth - aws_auth_roles = [ - # Allow access to the Prow-EKS-Admin IAM role (used by Prow directly). - { - "rolearn" = aws_iam_role.eks_admin[0].arn - "username" = "eks-admin" - "groups" = [ - "eks-prow-cluster-admin" - ] - }, - # Allow access to the Prow-Cluster-Admin IAM role (used with assume role with other IAM accounts). - { - "rolearn" = aws_iam_role.iam_cluster_admin.arn - "username" = "eks-cluster-admin" - "groups" = [ - "eks-cluster-admin" - ] - }, - ] - # Allow EKS access to the root account. - aws_auth_users = [ - { - "userarn" = local.root_account_arn - "username" = "root" - "groups" = [ - "eks-cluster-admin" - ] - }, - ] + aws_auth_roles = local.aws_auth_roles # Allow access to the KMS key used for secrets encryption to the root account. kms_key_administrators = [ From ad63575f901dbdd4403878d8a2a8aef06d9de726 Mon Sep 17 00:00:00 2001 From: Patryk Przekwas Date: Mon, 3 Apr 2023 14:16:16 +0200 Subject: [PATCH 08/12] Address review comments --- .../aws/terraform/prow-build-cluster/Makefile | 17 +++++++--- infra/aws/terraform/prow-build-cluster/eks.tf | 32 ++++++++++++------- .../terraform.canary.tfvars | 3 +- .../prow-build-cluster/terraform.prod.tfvars | 4 +++ .../tfbackends/canary.tfbackend | 2 +- .../tfbackends/prod.tfbackend | 2 +- 6 files changed, 42 insertions(+), 18 deletions(-) diff --git a/infra/aws/terraform/prow-build-cluster/Makefile b/infra/aws/terraform/prow-build-cluster/Makefile index 19be6a72458..b57eb214f05 100644 --- a/infra/aws/terraform/prow-build-cluster/Makefile +++ b/infra/aws/terraform/prow-build-cluster/Makefile @@ -20,27 +20,36 @@ PROW_CLUSTER ?= canary .PHONY: init init: - $(TF) init \ + $(TF) $@ \ -backend-config=./tfbackends/$(PROW_CLUSTER).tfbackend .PHONY: plan plan: - $(TF) plan \ + $(TF) $@ \ -var-file=./terraform.$(PROW_CLUSTER).tfvars \ -var="assume_role=$(ASSUME_ROLE)" .PHONY: apply apply: - $(TF) apply \ + $(TF) $@ \ -var-file=./terraform.$(PROW_CLUSTER).tfvars \ -var="assume_role=$(ASSUME_ROLE)" .PHONY: destroy destory: - $(TF) destroy \ + $(TF) $@ \ -var-file=./terraform.$(PROW_CLUSTER).tfvars \ -var="assume_role=$(ASSUME_ROLE)" +.PHONY: fmt +fmt: + $(TF) $@ + +.PHONY: output +output: + $(TF) $@ + .PHONY: clean clean: rm -rf ./.terraform + diff --git a/infra/aws/terraform/prow-build-cluster/eks.tf b/infra/aws/terraform/prow-build-cluster/eks.tf index 10ebc0054a7..b98c0e7564c 100644 --- a/infra/aws/terraform/prow-build-cluster/eks.tf +++ b/infra/aws/terraform/prow-build-cluster/eks.tf @@ -30,17 +30,16 @@ locals { }, ] - aws_auth_roles = var.is_canary_installation ? local.aws_auth_roles_base : concat( - local.aws_auth_roles_base, [ - # Allow access to the Prow-EKS-Admin IAM role (used by Prow directly). - { - "rolearn" = aws_iam_role.eks_admin[0].arn - "username" = "eks-admin" - "groups" = [ - "eks-prow-cluster-admin" - ] - } - ]) + aws_auth_roles = var.is_canary_installation ? local.aws_auth_roles_base : concat([ + # Allow access to the Prow-EKS-Admin IAM role (used by Prow directly). + { + "rolearn" = aws_iam_role.eks_admin[0].arn + "username" = "eks-admin" + "groups" = [ + "eks-prow-cluster-admin" + ] + } + ], local.aws_auth_roles_base) } module "eks" { @@ -58,6 +57,17 @@ module "eks" { # Configure aws-auth aws_auth_roles = local.aws_auth_roles + # Allow EKS access to the root account. + aws_auth_users = [ + { + "userarn" = local.root_account_arn + "username" = "root" + "groups" = [ + "eks-cluster-admin" + ] + }, + ] + # Allow access to the KMS key used for secrets encryption to the root account. kms_key_administrators = [ local.root_account_arn diff --git a/infra/aws/terraform/prow-build-cluster/terraform.canary.tfvars b/infra/aws/terraform/prow-build-cluster/terraform.canary.tfvars index 6399e995eb0..aff75748b63 100644 --- a/infra/aws/terraform/prow-build-cluster/terraform.canary.tfvars +++ b/infra/aws/terraform/prow-build-cluster/terraform.canary.tfvars @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -assume_role = true +assume_role = true is_canary_installation = true cluster_name = "prow-build-canary-cluster" @@ -38,3 +38,4 @@ node_desired_size = 1 node_max_unavailable_percentage = 100 # To ease testing cluster_autoscaler_version = "v1.25.0" + diff --git a/infra/aws/terraform/prow-build-cluster/terraform.prod.tfvars b/infra/aws/terraform/prow-build-cluster/terraform.prod.tfvars index 20c5879c0f4..caf2cb2e549 100644 --- a/infra/aws/terraform/prow-build-cluster/terraform.prod.tfvars +++ b/infra/aws/terraform/prow-build-cluster/terraform.prod.tfvars @@ -14,6 +14,9 @@ See the License for the specific language governing permissions and limitations under the License. */ +assume_role = true +is_canary_installation = false + cluster_name = "prow-build-cluster" cluster_region = "us-east-2" cluster_version = "1.25" @@ -36,3 +39,4 @@ node_desired_size = 20 node_max_unavailable_percentage = 100 # To ease testing cluster_autoscaler_version = "v1.25.0" + diff --git a/infra/aws/terraform/prow-build-cluster/tfbackends/canary.tfbackend b/infra/aws/terraform/prow-build-cluster/tfbackends/canary.tfbackend index b83e94f0afa..102cd99f04b 100644 --- a/infra/aws/terraform/prow-build-cluster/tfbackends/canary.tfbackend +++ b/infra/aws/terraform/prow-build-cluster/tfbackends/canary.tfbackend @@ -1,3 +1,3 @@ bucket = "prow-build-cluster-tfstate" key = "prow-build-canary-cluster/terraform.tfstate" -region = "us-east-2" \ No newline at end of file +region = "us-east-2" diff --git a/infra/aws/terraform/prow-build-cluster/tfbackends/prod.tfbackend b/infra/aws/terraform/prow-build-cluster/tfbackends/prod.tfbackend index 45d4c0e0cbb..17f07b239c2 100644 --- a/infra/aws/terraform/prow-build-cluster/tfbackends/prod.tfbackend +++ b/infra/aws/terraform/prow-build-cluster/tfbackends/prod.tfbackend @@ -1,3 +1,3 @@ bucket = "prow-build-cluster-tfstate" key = "terraform.tfstate" -region = "us-east-2" \ No newline at end of file +region = "us-east-2" From 8ceaf98e9195784f54fbb9e0e885bbc730881dc6 Mon Sep 17 00:00:00 2001 From: Patryk Przekwas Date: Mon, 3 Apr 2023 14:22:38 +0200 Subject: [PATCH 09/12] Update readme with cluster usage manual --- .../terraform/prow-build-cluster/README.md | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/infra/aws/terraform/prow-build-cluster/README.md b/infra/aws/terraform/prow-build-cluster/README.md index 69bef7f12dc..c490478e301 100644 --- a/infra/aws/terraform/prow-build-cluster/README.md +++ b/infra/aws/terraform/prow-build-cluster/README.md @@ -55,6 +55,47 @@ make apply From here, all consecutive runs should be possible with command from above. +## Using cluster + +### Fetch kubeconfig + +```bash +# Prod: +aws eks update-kubeconfig --region us-east-2 --name prow-build-cluster +# Canary: +aws eks update-kubeconfig --region us-east-2 --name prow-build-canary-cluster +``` + +### Open kubeconfig and add assume role argument + +For Prod: +```yaml +args: + - --region + - us-east-2 + - eks + - get-token + - --cluster-name + - prow-build-cluster + - --role-arn + - arn:aws:iam::468814281478:role/Prow-Cluster-Admin +``` + +For Canary: +```yaml +args: + - --region + - us-east-2 + - eks + - get-token + - --cluster-name + - prow-build-canary-cluster + - --role-arn + - arn:aws:iam::468814281478:role/canary-Prow-Cluster-Admin +``` + + + ## Removing cluster Same as for installation, cluster removal requires running Terraform twice. From 1949510b0146da761d66f6a9af46f79800ee97a9 Mon Sep 17 00:00:00 2001 From: Patryk Przekwas Date: Mon, 3 Apr 2023 15:58:25 +0200 Subject: [PATCH 10/12] Replace tf backends with TF workspaces --- .../aws/terraform/prow-build-cluster/Makefile | 27 ++++++------ .../terraform/prow-build-cluster/README.md | 11 +++-- infra/aws/terraform/prow-build-cluster/eks.tf | 43 ++++++++++--------- .../aws/terraform/prow-build-cluster/main.tf | 2 +- .../terraform/prow-build-cluster/providers.tf | 7 ++- .../aws/terraform/prow-build-cluster/prow.tf | 4 +- .../terraform.canary.tfvars | 1 - .../prow-build-cluster/terraform.prod.tfvars | 1 - .../tfbackends/canary.tfbackend | 3 -- .../tfbackends/prod.tfbackend | 3 -- .../terraform/prow-build-cluster/variables.tf | 6 --- 11 files changed, 50 insertions(+), 58 deletions(-) delete mode 100644 infra/aws/terraform/prow-build-cluster/tfbackends/canary.tfbackend delete mode 100644 infra/aws/terraform/prow-build-cluster/tfbackends/prod.tfbackend diff --git a/infra/aws/terraform/prow-build-cluster/Makefile b/infra/aws/terraform/prow-build-cluster/Makefile index b57eb214f05..bbceb40916e 100644 --- a/infra/aws/terraform/prow-build-cluster/Makefile +++ b/infra/aws/terraform/prow-build-cluster/Makefile @@ -16,30 +16,33 @@ TF ?= terraform ASSUME_ROLE ?= true # Valid values are: canary, prod -PROW_CLUSTER ?= canary +WORKSPACE_NAME ?= canary + +.PHONY: workspace-select +workspace-select: + $(TF) workspace select $(WORKSPACE_NAME) .PHONY: init init: - $(TF) $@ \ - -backend-config=./tfbackends/$(PROW_CLUSTER).tfbackend + $(TF) $@ .PHONY: plan -plan: +plan: workspace-select $(TF) $@ \ - -var-file=./terraform.$(PROW_CLUSTER).tfvars \ - -var="assume_role=$(ASSUME_ROLE)" + -var="assume_role=$(ASSUME_ROLE)" \ + -var-file=./terraform.$(WORKSPACE_NAME).tfvars .PHONY: apply -apply: +apply: workspace-select $(TF) $@ \ - -var-file=./terraform.$(PROW_CLUSTER).tfvars \ - -var="assume_role=$(ASSUME_ROLE)" + -var="assume_role=$(ASSUME_ROLE)" \ + -var-file=./terraform.$(WORKSPACE_NAME).tfvars .PHONY: destroy -destory: +destory: workspace-select $(TF) $@ \ - -var-file=./terraform.$(PROW_CLUSTER).tfvars \ - -var="assume_role=$(ASSUME_ROLE)" + -var="assume_role=$(ASSUME_ROLE)" \ + -var-file=./terraform.$(WORKSPACE_NAME).tfvars .PHONY: fmt fmt: diff --git a/infra/aws/terraform/prow-build-cluster/README.md b/infra/aws/terraform/prow-build-cluster/README.md index c490478e301..8c9bbd580bc 100644 --- a/infra/aws/terraform/prow-build-cluster/README.md +++ b/infra/aws/terraform/prow-build-cluster/README.md @@ -22,13 +22,11 @@ variable has to be set: ```bash # For provisioning Prod: -export PROW_CLUSTER=prod +export WORKSPACE_NAME=prod # For provisioning Canary: -export PROW_CLUSTER=canary +export WORKSPACE_NAME=canary # Just making sure we don't have state cached locally. -make clean - ASSUME_ROLE=false make init ASSUME_ROLE=false make apply ``` @@ -94,17 +92,18 @@ args: - arn:aws:iam::468814281478:role/canary-Prow-Cluster-Admin ``` - - ## Removing cluster Same as for installation, cluster removal requires running Terraform twice. **IMPORTANT**: It's possible only for users with assigned `AdministratorAccess` policy. ```bash +export WORKSPACE_NAME= # choose between canary/prod + # First remove resources running on the cluster and IAM role. This fails once assumed role gets deleted. make destroy # Clean up the rest. ASSUME_ROLE=false make destroy ``` + diff --git a/infra/aws/terraform/prow-build-cluster/eks.tf b/infra/aws/terraform/prow-build-cluster/eks.tf index b98c0e7564c..50956180ea0 100644 --- a/infra/aws/terraform/prow-build-cluster/eks.tf +++ b/infra/aws/terraform/prow-build-cluster/eks.tf @@ -19,27 +19,28 @@ limitations under the License. ############################################### locals { - aws_auth_roles_base = [ - # Allow access to the Prow-Cluster-Admin IAM role (used with assume role with other IAM accounts). - { - "rolearn" = aws_iam_role.iam_cluster_admin.arn - "username" = "eks-cluster-admin" - "groups" = [ - "eks-cluster-admin" - ] - }, - ] - - aws_auth_roles = var.is_canary_installation ? local.aws_auth_roles_base : concat([ - # Allow access to the Prow-EKS-Admin IAM role (used by Prow directly). - { - "rolearn" = aws_iam_role.eks_admin[0].arn - "username" = "eks-admin" - "groups" = [ - "eks-prow-cluster-admin" - ] - } - ], local.aws_auth_roles_base) + aws_auth_roles = concat( + terraform.workspace == "prod" ? [ + # Allow access to the Prow-EKS-Admin IAM role (used by Prow directly). + { + "rolearn" = aws_iam_role.eks_admin[0].arn + "username" = "eks-admin" + "groups" = [ + "eks-prow-cluster-admin" + ] + } + ] : [], + [ + # Allow access to the Prow-Cluster-Admin IAM role (used with assume role with other IAM accounts). + { + "rolearn" = aws_iam_role.iam_cluster_admin.arn + "username" = "eks-cluster-admin" + "groups" = [ + "eks-cluster-admin" + ] + } + ] + ) } module "eks" { diff --git a/infra/aws/terraform/prow-build-cluster/main.tf b/infra/aws/terraform/prow-build-cluster/main.tf index b7aaf9e6940..f23fd615047 100644 --- a/infra/aws/terraform/prow-build-cluster/main.tf +++ b/infra/aws/terraform/prow-build-cluster/main.tf @@ -22,7 +22,7 @@ data "aws_caller_identity" "current" {} data "aws_availability_zones" "available" {} locals { - canary_prefix = var.is_canary_installation ? "canary-" : "" + canary_prefix = terraform.workspace != "prod" ? "canary-" : "" root_account_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:root" aws_cli_base_args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name] diff --git a/infra/aws/terraform/prow-build-cluster/providers.tf b/infra/aws/terraform/prow-build-cluster/providers.tf index 226bf41f8f0..2655ad7561f 100644 --- a/infra/aws/terraform/prow-build-cluster/providers.tf +++ b/infra/aws/terraform/prow-build-cluster/providers.tf @@ -15,8 +15,11 @@ limitations under the License. */ terraform { - # Backend conifguration lives inside *.tfbackend files. - backend "s3" {} + backend "s3" { + bucket = "prow-build-cluster-tfstate" + key = "terraform.tfstate" + region = "us-east-2" + } required_version = "~> 1.3.0" diff --git a/infra/aws/terraform/prow-build-cluster/prow.tf b/infra/aws/terraform/prow-build-cluster/prow.tf index 6b897bd5e7e..9cad9120c9d 100644 --- a/infra/aws/terraform/prow-build-cluster/prow.tf +++ b/infra/aws/terraform/prow-build-cluster/prow.tf @@ -19,7 +19,7 @@ limitations under the License. # Recognize federated identities from the prow trusted cluster resource "aws_iam_openid_connect_provider" "k8s_prow" { - count = var.is_canary_installation ? 0 : 1 + count = terraform.workspace == "prod" ? 1 : 0 url = "https://container.googleapis.com/v1/projects/k8s-prow/locations/us-central1-f/clusters/prow" client_id_list = ["sts.amazonaws.com"] @@ -28,7 +28,7 @@ resource "aws_iam_openid_connect_provider" "k8s_prow" { # We allow Prow Pods with specific service acccounts on the a particular cluster to assume this role resource "aws_iam_role" "eks_admin" { - count = var.is_canary_installation ? 0 : 1 + count = terraform.workspace == "prod" ? 1 : 0 name = "Prow-EKS-Admin" diff --git a/infra/aws/terraform/prow-build-cluster/terraform.canary.tfvars b/infra/aws/terraform/prow-build-cluster/terraform.canary.tfvars index aff75748b63..adc96d9595a 100644 --- a/infra/aws/terraform/prow-build-cluster/terraform.canary.tfvars +++ b/infra/aws/terraform/prow-build-cluster/terraform.canary.tfvars @@ -15,7 +15,6 @@ limitations under the License. */ assume_role = true -is_canary_installation = true cluster_name = "prow-build-canary-cluster" cluster_region = "us-east-2" diff --git a/infra/aws/terraform/prow-build-cluster/terraform.prod.tfvars b/infra/aws/terraform/prow-build-cluster/terraform.prod.tfvars index caf2cb2e549..fff515ddd40 100644 --- a/infra/aws/terraform/prow-build-cluster/terraform.prod.tfvars +++ b/infra/aws/terraform/prow-build-cluster/terraform.prod.tfvars @@ -15,7 +15,6 @@ limitations under the License. */ assume_role = true -is_canary_installation = false cluster_name = "prow-build-cluster" cluster_region = "us-east-2" diff --git a/infra/aws/terraform/prow-build-cluster/tfbackends/canary.tfbackend b/infra/aws/terraform/prow-build-cluster/tfbackends/canary.tfbackend deleted file mode 100644 index 102cd99f04b..00000000000 --- a/infra/aws/terraform/prow-build-cluster/tfbackends/canary.tfbackend +++ /dev/null @@ -1,3 +0,0 @@ -bucket = "prow-build-cluster-tfstate" -key = "prow-build-canary-cluster/terraform.tfstate" -region = "us-east-2" diff --git a/infra/aws/terraform/prow-build-cluster/tfbackends/prod.tfbackend b/infra/aws/terraform/prow-build-cluster/tfbackends/prod.tfbackend deleted file mode 100644 index 17f07b239c2..00000000000 --- a/infra/aws/terraform/prow-build-cluster/tfbackends/prod.tfbackend +++ /dev/null @@ -1,3 +0,0 @@ -bucket = "prow-build-cluster-tfstate" -key = "terraform.tfstate" -region = "us-east-2" diff --git a/infra/aws/terraform/prow-build-cluster/variables.tf b/infra/aws/terraform/prow-build-cluster/variables.tf index 431fe09f6e7..6f1dba6d0ce 100644 --- a/infra/aws/terraform/prow-build-cluster/variables.tf +++ b/infra/aws/terraform/prow-build-cluster/variables.tf @@ -22,12 +22,6 @@ variable "assume_role" { default = true } -variable "is_canary_installation" { - type = bool - description = "If set, scripts provision canary cluster instead of production." - default = false -} - variable "vpc_cidr" { type = string description = "CIDR of the VPC" From 0088b5d40c3238c651341c6baa649f004c29324a Mon Sep 17 00:00:00 2001 From: Patryk Przekwas Date: Tue, 4 Apr 2023 10:53:27 +0200 Subject: [PATCH 11/12] Change argument order in Makefile to fix assume_role & typo fix --- infra/aws/terraform/prow-build-cluster/Makefile | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/infra/aws/terraform/prow-build-cluster/Makefile b/infra/aws/terraform/prow-build-cluster/Makefile index bbceb40916e..42ee318a57f 100644 --- a/infra/aws/terraform/prow-build-cluster/Makefile +++ b/infra/aws/terraform/prow-build-cluster/Makefile @@ -29,20 +29,20 @@ init: .PHONY: plan plan: workspace-select $(TF) $@ \ - -var="assume_role=$(ASSUME_ROLE)" \ - -var-file=./terraform.$(WORKSPACE_NAME).tfvars + -var-file=./terraform.$(WORKSPACE_NAME).tfvars \ + -var="assume_role=$(ASSUME_ROLE)" .PHONY: apply apply: workspace-select $(TF) $@ \ - -var="assume_role=$(ASSUME_ROLE)" \ - -var-file=./terraform.$(WORKSPACE_NAME).tfvars + -var-file=./terraform.$(WORKSPACE_NAME).tfvars \ + -var="assume_role=$(ASSUME_ROLE)" .PHONY: destroy -destory: workspace-select +destroy: workspace-select $(TF) $@ \ - -var="assume_role=$(ASSUME_ROLE)" \ - -var-file=./terraform.$(WORKSPACE_NAME).tfvars + -var-file=./terraform.$(WORKSPACE_NAME).tfvars \ + -var="assume_role=$(ASSUME_ROLE)" .PHONY: fmt fmt: From a42cf0e83e9e1fa9c90af1c199be4546db647656 Mon Sep 17 00:00:00 2001 From: Patryk Przekwas Date: Tue, 4 Apr 2023 11:31:42 +0200 Subject: [PATCH 12/12] Run tf fmt --- infra/aws/terraform/prow-build-cluster/terraform.canary.tfvars | 2 +- infra/aws/terraform/prow-build-cluster/terraform.prod.tfvars | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/infra/aws/terraform/prow-build-cluster/terraform.canary.tfvars b/infra/aws/terraform/prow-build-cluster/terraform.canary.tfvars index adc96d9595a..3c3a322f5e6 100644 --- a/infra/aws/terraform/prow-build-cluster/terraform.canary.tfvars +++ b/infra/aws/terraform/prow-build-cluster/terraform.canary.tfvars @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -assume_role = true +assume_role = true cluster_name = "prow-build-canary-cluster" cluster_region = "us-east-2" diff --git a/infra/aws/terraform/prow-build-cluster/terraform.prod.tfvars b/infra/aws/terraform/prow-build-cluster/terraform.prod.tfvars index fff515ddd40..e0dfba31a85 100644 --- a/infra/aws/terraform/prow-build-cluster/terraform.prod.tfvars +++ b/infra/aws/terraform/prow-build-cluster/terraform.prod.tfvars @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -assume_role = true +assume_role = true cluster_name = "prow-build-cluster" cluster_region = "us-east-2"