diff --git a/.github/bundles/uds-bundle.yaml b/.github/bundles/uds-bundle.yaml new file mode 100644 index 000000000..e284335b8 --- /dev/null +++ b/.github/bundles/uds-bundle.yaml @@ -0,0 +1,50 @@ +kind: UDSBundle +metadata: + name: uds-core-eks-nightly + description: A UDS bundle for deploying EKS and UDS Core + # x-release-please-start-version + version: "0.18.0" + # x-release-please-end + +packages: + - name: init + repository: ghcr.io/defenseunicorns/packages/init + # renovate: datasource=github-tags depName=defenseunicorns/zarf versioning=semver + ref: v0.32.4 + + - name: core + path: ../../build/ + # x-release-please-start-version + ref: 0.18.0 + # x-release-please-end + overrides: + velero: + velero: + variables: + - name: VELERO_USE_SECRET + description: "Toggle use secret off to use IRSA." + path: credentials.useSecret + - name: VELERO_IRSA_ANNOTATION + description: "IRSA ARN annotation to use for Velero" + path: serviceAccount.server.annotations + loki: + loki: + variables: + - name: LOKI_CHUNKS_BUCKET + description: "The object storage bucket for Loki chunks" + path: loki.storage.bucketNames.chunks + - name: LOKI_RULER_BUCKET + description: "The object storage bucket for Loki ruler" + path: loki.storage.bucketNames.ruler + - name: LOKI_ADMIN_BUCKET + description: "The object storage bucket for Loki admin" + path: loki.storage.bucketNames.admin + - name: LOKI_S3_ENDPOINT + description: "The S3 endpoint" + path: loki.storage.s3.endpoint + - name: LOKI_S3_REGION + description: "The S3 region" + path: loki.storage.s3.region + - name: LOKI_IRSA_ANNOTATION + description: "The irsa role annotation" + path: loki.serviceAccount.annotations diff --git a/.github/bundles/uds-config.yaml b/.github/bundles/uds-config.yaml new file mode 100644 index 000000000..eb2db3a58 --- /dev/null +++ b/.github/bundles/uds-config.yaml @@ -0,0 +1,21 @@ +# Overwritten by ci-iac-aws package +options: + architecture: amd64 + +variables: + core: + loki_chunks_bucket: ${ZARF_VAR_LOKI_S3_BUCKET} + loki_ruler_bucket: ${ZARF_VAR_LOKI_S3_BUCKET} + loki_admin_bucket: ${ZARF_VAR_LOKI_S3_BUCKET} + loki_s3_region: ${ZARF_VAR_LOKI_S3_AWS_REGION} + loki_s3-endpoint: "" + loki_irsa_annotation: + eks.amazonaws.com/role-arn: ${ZARF_VAR_LOKI_S3_ROLE_ARN} + velero_use_secret: false + velero_irsa_annotation: + eks.amazonaws.com/role-arn: "${ZARF_VAR_VELERO_S3_ROLE_ARN}" + velero_bucket: ${ZARF_VAR_VELERO_S3_BUCKET} + velero_bucket_region: ${ZARF_VAR_VELERO_S3_AWS_REGION} + velero_bucket_provider_url: "" + velero_bucket_credential_name: "" + velero_bucket_credential_key: "" diff --git a/.github/test-infra/buckets-iac/loki.tf b/.github/test-infra/buckets-iac/loki.tf new file mode 100644 index 000000000..3defd0ca9 --- /dev/null +++ b/.github/test-infra/buckets-iac/loki.tf @@ -0,0 +1,28 @@ +resource "aws_iam_policy" "loki_policy" { + name = "${local.bucket_configurations.loki.name}-irsa-${random_id.unique_id.hex}" + path = "/" + description = "IAM policy for Loki to have necessary permissions to use S3 for storing logs." + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Action = ["s3:ListBucket"] + Resource = ["arn:${data.aws_partition.current.partition}:s3:::${module.S3["loki"].bucket_name}"] + }, + { + Effect = "Allow" + Action = ["s3:*Object"] + Resource = ["arn:${data.aws_partition.current.partition}:s3:::${module.S3["loki"].bucket_name}/*"] + }, + { + Effect = "Allow" + Action = [ + "kms:GenerateDataKey", + "kms:Decrypt" + ] + Resource = [local.kms_key_arns["loki"].kms_key_arn] + } + ] + }) +} diff --git a/.github/test-infra/buckets-iac/main.tf b/.github/test-infra/buckets-iac/main.tf new file mode 100644 index 000000000..539fde1fc --- /dev/null +++ b/.github/test-infra/buckets-iac/main.tf @@ -0,0 +1,130 @@ +provider "aws" { + region = var.region + + default_tags { + tags = { + PermissionsBoundary = var.permissions_boundary_name + } + } +} + +terraform { + required_version = "1.5.7" + backend "s3" { + } + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 4.0, != 5.17.0" + } + + random = { + source = "hashicorp/random" + version = "3.5.1" + } + } +} + +resource "random_id" "default" { + byte_length = 2 +} + +data "aws_eks_cluster" "existing" { + name = var.name +} + +data "aws_caller_identity" "current" {} + +data "aws_partition" "current" {} + +data "aws_region" "current" {} + +locals { + oidc_url_without_protocol = substr(data.aws_eks_cluster.existing.identity[0].oidc[0].issuer, 8, -1) + oidc_arn = "arn:${data.aws_partition.current.partition}:iam::${data.aws_caller_identity.current.account_id}:oidc-provider/${local.oidc_url_without_protocol}" + iam_role_permissions_boundary = var.use_permissions_boundary ? "arn:${data.aws_partition.current.partition}:iam::${data.aws_caller_identity.current.account_id}:policy/${var.permissions_boundary_name}" : null + + bucket_configurations = { + for instance in var.bucket_configurations : + instance.name => { + name = "${var.name}-${instance.name}" + service_account = instance.service_account + namespace = instance.namespace + } + } + + kms_key_arns = module.generate_kms + + iam_policies = { + "loki" = resource.aws_iam_policy.loki_policy.arn + "velero" = resource.aws_iam_policy.velero_policy.arn + } +} + +resource "random_id" "unique_id" { + byte_length = 4 +} + +module "generate_kms" { + for_each = local.bucket_configurations + source = "github.com/defenseunicorns/terraform-aws-uds-kms?ref=v0.0.2" + + key_owners = var.key_owner_arns + # A list of IAM ARNs for those who will have full key permissions (`kms:*`) + kms_key_alias_name_prefix = "${each.value.name}-" # Prefix for KMS key alias. + kms_key_deletion_window = var.kms_key_deletion_window + # Waiting period for scheduled KMS Key deletion. Can be 7-30 days. + kms_key_description = "${var.name} UDS Core deployment Loki Key" # Description for the KMS key. + tags = { + Deployment = "UDS Core ${each.value.name}" + } +} + +module "S3" { + for_each = local.bucket_configurations + source = "github.com/defenseunicorns/terraform-aws-uds-s3?ref=v0.0.6" + name_prefix = "${each.value.name}-" + kms_key_arn = local.kms_key_arns[each.key].kms_key_arn + force_destroy = "true" + create_bucket_lifecycle = true +} + +module "irsa" { + for_each = local.bucket_configurations + source = "github.com/defenseunicorns/terraform-aws-uds-irsa?ref=v0.0.2" + name = each.value.name + kubernetes_service_account = each.value.service_account + kubernetes_namespace = each.value.namespace + oidc_provider_arn = local.oidc_arn + role_permissions_boundary_arn = local.iam_role_permissions_boundary + + role_policy_arns = tomap({ + "${each.key}" = local.iam_policies[each.key] + }) +} + +resource "aws_s3_bucket_policy" "bucket_policy" { + for_each = local.bucket_configurations + bucket = module.S3[each.key].bucket_name + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Action = [ + "s3:ListBucket", + "s3:GetObject", + "s3:PutObject" + ] + Effect = "Allow" + Principal = { + AWS = module.irsa[each.key].role_arn + } + Resource = [ + module.S3[each.key].bucket_arn, + "${module.S3[each.key].bucket_arn}/*" + ] + } + ] + }) +} diff --git a/.github/test-infra/buckets-iac/output.tf b/.github/test-infra/buckets-iac/output.tf new file mode 100644 index 000000000..1228df95a --- /dev/null +++ b/.github/test-infra/buckets-iac/output.tf @@ -0,0 +1,27 @@ +output "aws_region" { + value = data.aws_region.current.name +} + +output "loki_irsa_role_arn" { + value = module.irsa["loki"].role_arn +} + +output "loki_s3" { + value = module.S3["loki"] +} + +output "loki_s3_bucket" { + value = module.S3["loki"].bucket_name +} + +output "velero_irsa_role_arn" { + value = module.irsa["velero"].role_arn +} + +output "velero_s3" { + value = module.S3["velero"] +} + +output "velero_s3_bucket" { + value = module.S3["velero"].bucket_name +} diff --git a/.github/test-infra/buckets-iac/variables.tf b/.github/test-infra/buckets-iac/variables.tf new file mode 100644 index 000000000..22bb12dad --- /dev/null +++ b/.github/test-infra/buckets-iac/variables.tf @@ -0,0 +1,55 @@ +variable "region" { + description = "AWS region" + type = string +} + +variable "name" { + description = "Name for cluster" + type = string +} + +variable "permissions_boundary_name" { + description = "The name of the permissions boundary for IAM resources. This will be used for tagging and to build out the ARN." + type = string + default = null +} + +variable "use_permissions_boundary" { + description = "Whether to use IAM permissions boundary for resources." + type = bool + default = true +} + +variable "key_owner_arns" { + description = "ARNS of KMS key owners, needed for use of key" + type = list(string) + default = [] +} + +# taken from zarf bb repo +variable "kms_key_deletion_window" { + description = "Waiting period for scheduled KMS Key deletion. Can be 7-30 days." + type = number + default = 7 +} + +variable "bucket_configurations" { + type = map(object({ + name = string + service_account = string + namespace = string + })) + default = { + loki = { + name = "loki" + service_account = "logging-loki" + namespace = "logging" + } + velero = { + name = "velero" + service_account = "velero-server" + namespace = "velero" + } + } +} + diff --git a/.github/test-infra/buckets-iac/velero.tf b/.github/test-infra/buckets-iac/velero.tf new file mode 100644 index 000000000..96e6ed627 --- /dev/null +++ b/.github/test-infra/buckets-iac/velero.tf @@ -0,0 +1,59 @@ +resource "aws_iam_policy" "velero_policy" { + name = "${local.bucket_configurations.velero.name}-irsa-${random_id.unique_id.hex}" + path = "/" + description = "Policy to give Velero necessary permissions for cluster backups." + + # Terraform expression result to valid JSON syntax. + policy = jsonencode( + { + Version = "2012-10-17", + Statement = [ + { + Effect = "Allow", + Action = [ + "ec2:DescribeVolumes", + "ec2:DescribeSnapshots", + "ec2:CreateTags", + "ec2:CreateVolume", + "ec2:CreateSnapshot", + "ec2:DeleteSnapshot" + ] + Resource = [ + "*" + ] + }, + { + Effect = "Allow" + Action = [ + "s3:GetObject", + "s3:DeleteObject", + "s3:PutObject", + "s3:AbortMultipartUpload", + "s3:ListMultipartUploadParts" + ] + Resource = [ + "arn:${data.aws_partition.current.partition}:s3:::${module.S3["velero"].bucket_name}/*" + ] + }, + { + Effect = "Allow", + Action = [ + "s3:ListBucket" + ], + Resource = [ + "arn:${data.aws_partition.current.partition}:s3:::${module.S3["velero"].bucket_name}/*" + ] + }, + { + Effect = "Allow" + Action = [ + "kms:GenerateDataKey", + "kms:Decrypt" + ] + Resource = [local.kms_key_arns["velero"].kms_key_arn] + } + + ] + }) +} + diff --git a/.github/workflows/nightly-testing.yaml b/.github/workflows/nightly-testing.yaml new file mode 100644 index 000000000..4b994e128 --- /dev/null +++ b/.github/workflows/nightly-testing.yaml @@ -0,0 +1,11 @@ +name: Nightly Testing + +on: + schedule: + - cron: '0 6 * * *' # Runs at midnight Mountain every day + +jobs: + nightly-testing: + name: Test Core on EKS + uses: ./.github/workflows/test-eks.yaml + secrets: inherit diff --git a/.github/workflows/test-eks.yaml b/.github/workflows/test-eks.yaml new file mode 100644 index 000000000..a7bba6669 --- /dev/null +++ b/.github/workflows/test-eks.yaml @@ -0,0 +1,87 @@ +name: Test Core On EKS + +on: + workflow_call: + +permissions: + id-token: write + contents: read + +jobs: + test-eks-install: + runs-on: ubuntu-latest + env: + SHA: ${{ github.sha }} + UDS_REGION: us-west-2 + UDS_PERMISSIONS_BOUNDARY_ARN: ${{ secrets.PERMISSIONS_BOUNDARY_ARN }} + UDS_PERMISSIONS_BOUNDARY_NAME: ${{ secrets.PERMISSIONS_BOUNDARY_NAME }} + UDS_STATE_BUCKET_NAME: uds-aws-ci-commercial-us-west-2-5246-tfstate + UDS_STATE_DYNAMODB_TABLE_NAME: uds-aws-ci-commercial-org-us-west-2-5246-tfstate-lock + steps: + - name: Set ENV + run: | + echo "UDS_CLUSTER_NAME=uds-core-aws-${SHA:0:7}" >> $GITHUB_ENV + echo "UDS_STATE_KEY="tfstate/ci/install/${SHA:0:7}-core-aws.tfstate >> $GITHUB_ENV + echo "TF_VAR_region=${UDS_REGION}" >> $GITHUB_ENV + echo "TF_VAR_name=uds-core-aws-${SHA:0:7}" >> $GITHUB_ENV + echo "TF_VAR_use_permissions_boundary=true" >> $GITHUB_ENV + echo "TF_VAR_permissions_boundary_name=${UDS_PERMISSIONS_BOUNDARY_NAME}" >> $GITHUB_ENV + + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.AWS_COMMERCIAL_ROLE_TO_ASSUME }} + role-session-name: ${{ github.job || github.event.client_payload.pull_request.head.sha || github.sha }} + aws-region: ${{ env.UDS_REGION }} + role-duration-seconds: 21600 + - name: Environment setup + uses: ./.github/actions/setup + + - name: Install eksctl + run: uds run -f tasks/iac.yaml install-eksctl + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v3 + with: + terraform_version: "1.5.7" + + - name: Create UDS Core Package + run: ZARF_ARCHITECTURE=amd64 uds run -f tasks/create.yaml standard-package + + - name: Create Core Bundle + run: uds create .github/bundles --confirm + + - name: Create Cluster + run: uds run -f tasks/iac.yaml create-cluster + timeout-minutes: 60 + + - name: Create IAC + run: uds run -f tasks/iac.yaml create-iac + timeout-minutes: 20 + + - name: Deploy Core Bundle + env: + UDS_CONFIG: .github/bundles/uds-config.yaml + run: uds deploy .github/bundles/uds-bundle-uds-core-eks-nightly-*.tar.zst --confirm + timeout-minutes: 20 + + - name: Remove UDS Core + if: always() + run: uds remove .github/bundles/uds-bundle-uds-core-eks-*.tar.zst --confirm + timeout-minutes: 10 + continue-on-error: true + + - name: Remove IAC + if: always() + run: uds run -f tasks/iac.yaml destroy-iac + timeout-minutes: 10 + continue-on-error: true + + - name: Teardown EKS cluster + if: always() + run: uds run -f tasks/iac.yaml destroy-cluster + timeout-minutes: 30 + continue-on-error: true diff --git a/.gitignore b/.gitignore index 80f001078..218f55004 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,5 @@ insecure* zarf tmp-tasks.yaml cacert.b64 +run/ +extract-terraform.sh diff --git a/src/velero/common/zarf.yaml b/src/velero/common/zarf.yaml index 261c7daae..17c9ec9fc 100644 --- a/src/velero/common/zarf.yaml +++ b/src/velero/common/zarf.yaml @@ -21,6 +21,10 @@ variables: sensitive: true description: "Key secret to use when connecting to the Velero bucket" default: "uds-secret" + - name: VELERO_BUCKET_CREDENTIAL_NAME + default: "velero-bucket-credentials" + - name: VELERO_BUCKET_CREDENTIAL_KEY + default: "cloud" components: - name: velero diff --git a/src/velero/values/values.yaml b/src/velero/values/values.yaml index d55bfce80..53f80976a 100644 --- a/src/velero/values/values.yaml +++ b/src/velero/values/values.yaml @@ -16,8 +16,8 @@ configuration: s3ForcePathStyle: true s3Url: "###ZARF_VAR_VELERO_BUCKET_PROVIDER_URL###" credential: - name: "velero-bucket-credentials" - key: "cloud" + name: "###ZARF_VAR_VELERO_BUCKET_CREDENTIAL_NAME###" + key: "###ZARF_VAR_VELERO_BUCKET_CREDENTIAL_KEY###" # volumeSnapshotLocation: # - name: default # provider: aws diff --git a/tasks/create.yaml b/tasks/create.yaml index b073c9e8c..29e2764b9 100644 --- a/tasks/create.yaml +++ b/tasks/create.yaml @@ -1,3 +1,6 @@ +includes: + - common: https://raw.githubusercontent.com/defenseunicorns/uds-common/v0.3.2/tasks/create.yaml + variables: - name: FLAVOR default: upstream diff --git a/tasks/iac.yaml b/tasks/iac.yaml new file mode 100644 index 000000000..031c0d7b4 --- /dev/null +++ b/tasks/iac.yaml @@ -0,0 +1,155 @@ +variables: + - name: CLUSTER_NAME + - name: REGION + - name: PERMISSIONS_BOUNDARY_NAME + - name: PERMISSIONS_BOUNDARY_ARN + - name: STATE_BUCKET_NAME + - name: STATE_DYNAMODB_TABLE_NAME + - name: STATE_KEY + - name: AMI_ID + default: ami-068ab6ac1cec494e0 + +tasks: + - name: install-eksctl + actions: + - cmd: | + curl --silent --location "https://github.com/weaveworks/eksctl/releases/download/v0.165.0/eksctl_Linux_amd64.tar.gz" | tar xz -C /tmp + sudo mv /tmp/eksctl /usr/local/bin + + - name: create-cluster + actions: + - cmd: | + cat < cluster-config.yaml + apiVersion: eksctl.io/v1alpha5 + kind: ClusterConfig + + metadata: + name: ${CLUSTER_NAME} + region: us-west-2 + version: "1.27" + tags: + PermissionsBoundary: ${PERMISSIONS_BOUNDARY_NAME} + + iam: + withOIDC: true + serviceRolePermissionsBoundary: ${PERMISSIONS_BOUNDARY_ARN} + + addons: + - name: aws-ebs-csi-driver + version: v1.25.0-eksbuild.1 + + attachPolicyARNs: + - arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy + permissionsBoundary: ${PERMISSIONS_BOUNDARY_ARN} + tags: + PermissionsBoundary: ${PERMISSIONS_BOUNDARY_NAME} + + - name: vpc-cni + permissionsBoundary: ${PERMISSIONS_BOUNDARY_ARN} + tags: + PermissionsBoundary: ${PERMISSIONS_BOUNDARY_NAME} + + managedNodeGroups: + - name: ng-1 + instanceType: m5.2xlarge + desiredCapacity: 3 + volumeSize: 150 + tags: + PermissionsBoundary: ${PERMISSIONS_BOUNDARY_NAME} + iam: + instanceRolePermissionsBoundary: ${PERMISSIONS_BOUNDARY_ARN} + ami: ${AMI_ID} + amiFamily: AmazonLinux2 + overrideBootstrapCommand: | + #!/bin/bash + /etc/eks/bootstrap.sh ${CLUSTER_NAME} --container-runtime containerd + EOF + + - cmd: eksctl create cluster --dry-run -f cluster-config.yaml + - cmd: sleep 5 + - cmd: eksctl create cluster -f cluster-config.yaml + - cmd: eksctl utils write-kubeconfig -c ${CLUSTER_NAME} + + - name: destroy-cluster + actions: + - cmd: eksctl delete cluster -f cluster-config.yaml --disable-nodegroup-eviction --wait + + - name: create-iac + actions: + - task: apply-terraform + - task: terraform-outputs + - task: create-uds-config + + - name: destroy-iac + actions: + - cmd: terraform destroy -auto-approve + dir: .github/test-infra/buckets-iac + + - name: apply-terraform + actions: + - cmd: echo ${STATE_KEY} | sed 's/\.tfstate/-buckets1.tfstate/g' + setVariables: + - name: BUCKETS_STATE_KEY + dir: .github/test-infra/buckets-iac + - cmd: echo ${BUCKETS_STATE_KEY} + - cmd: | + terraform init -force-copy \ + -backend-config="bucket=${STATE_BUCKET_NAME}" \ + -backend-config="key=${BUCKETS_STATE_KEY}" \ + -backend-config="region=${REGION}" \ + -backend-config="dynamodb_table=${STATE_DYNAMODB_TABLE_NAME}" + dir: .github/test-infra/buckets-iac + - cmd: terraform apply -auto-approve + dir: .github/test-infra/buckets-iac + + - name: terraform-outputs + actions: + - cmd: terraform output -raw loki_s3_bucket + setVariables: + - name: "LOKI_S3_BUCKET" + dir: .github/test-infra/buckets-iac + - cmd: terraform output -raw aws_region + setVariables: + - name: LOKI_S3_AWS_REGION + dir: .github/test-infra/buckets-iac + - cmd: terraform output -raw loki_irsa_role_arn + setVariables: + - name: LOKI_S3_ROLE_ARN + dir: .github/test-infra/buckets-iac + - cmd: terraform output -raw velero_s3_bucket + setVariables: + - name: VELERO_S3_BUCKET + dir: .github/test-infra/buckets-iac + - cmd: terraform output -raw aws_region + setVariables: + - name: VELERO_S3_AWS_REGION + dir: .github/test-infra/buckets-iac + - cmd: terraform output -raw velero_irsa_role_arn + setVariables: + - name: VELERO_S3_ROLE_ARN + dir: .github/test-infra/buckets-iac + + - name: create-uds-config + actions: + - cmd: | + cat < .github/bundles/uds-config.yaml + options: + architecture: amd6 + variables: + core: + loki_chunks_bucket: ${LOKI_S3_BUCKET} + loki_ruler_bucket: ${LOKI_S3_BUCKET} + loki_admin_bucket: ${LOKI_S3_BUCKET} + loki_s3_region: ${LOKI_S3_AWS_REGION} + loki_s3-endpoint: "" + loki_irsa_annotation: + eks.amazonaws.com/role-arn: "${LOKI_S3_ROLE_ARN}" + velero_use_secret: false + velero_irsa_annotation: + eks.amazonaws.com/role-arn: "${VELERO_S3_ROLE_ARN}" + velero_bucket: ${VELERO_S3_BUCKET} + velero_bucket_region: ${VELERO_S3_AWS_REGION} + velero_bucket_provider_url: "" + velero_bucket_credential_name: "" + velero_bucket_credential_key: "" + EOF