From 725f72a8a53eaf23863ebe97b4fa2eca4f2c2f3a Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Thu, 24 Oct 2024 11:02:05 -0700 Subject: [PATCH] review updates --- .../docs/upgrading/get-controller-policy.sh | 16 ++ .../en/docs/upgrading/upgrade-guide.md | 12 +- .../docs/upgrading/v1-controller-policy.json | 259 ------------------ .../content/en/docs/upgrading/v1-migration.md | 174 +++++++----- 4 files changed, 131 insertions(+), 330 deletions(-) create mode 100755 website/content/en/docs/upgrading/get-controller-policy.sh delete mode 100644 website/content/en/docs/upgrading/v1-controller-policy.json diff --git a/website/content/en/docs/upgrading/get-controller-policy.sh b/website/content/en/docs/upgrading/get-controller-policy.sh new file mode 100755 index 000000000000..efe8a6755ffd --- /dev/null +++ b/website/content/en/docs/upgrading/get-controller-policy.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +sourceVersionCfn=$(mktemp) +versionTag=$([[ ${KARPENTER_VERSION} == v* ]] && echo "${KARPENTER_VERSION}" || echo "v${KARPENTER_VERSION}") +curl -fsSL https://raw.githubusercontent.com/aws/karpenter-provider-aws/${versionTag}/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml > ${sourceVersionCfn} + +# Substitute the cloudformation templating strings for our environment variables +sed -e 's/!Sub//g' -i "" "${sourceVersionCfn}" +sed -e 's/${AWS::Partition}/${AWS_PARTITION}/g' -i "" "${sourceVersionCfn}" +sed -e 's/${AWS::Region}/${AWS_REGION}/g' -i "" "${sourceVersionCfn}" +sed -e 's/${AWS::AccountId}/${AWS_ACCOUNT_ID}/g' -i "" "${sourceVersionCfn}" +sed -e 's/${ClusterName}/${CLUSTER_NAME}/g' -i "" "${sourceVersionCfn}" +sed -e 's/${KarpenterInterruptionQueue.Arn}/arn:${AWS_PARTITION}:sqs:${AWS_REGION}:${AWS_ACCOUNT_ID}:${CLUSTER_NAME}/g' -i "" "${sourceVersionCfn}" +sed -e 's/${KarpenterNodeRole.Arn}/arn:${AWS_PARTITION}:iam::${AWS_ACCOUNT_ID}:role\/KarpenterNodeRole-${CLUSTER_NAME}/g' -i "" "${sourceVersionCfn}" + +yq '.Resources.KarpenterControllerPolicy.Properties.PolicyDocument' ${sourceVersionCfn} | envsubst diff --git a/website/content/en/docs/upgrading/upgrade-guide.md b/website/content/en/docs/upgrading/upgrade-guide.md index cf04da9b5a59..ca6ef0bd16d4 100644 --- a/website/content/en/docs/upgrading/upgrade-guide.md +++ b/website/content/en/docs/upgrading/upgrade-guide.md @@ -50,7 +50,7 @@ Refer to the `v1` Migration Guide for the [full changelog]({{= 1.25, no further action is required. If you are using a Kubernetes version below 1.25, you now need to set `DISABLE_WEBHOOK=false` in your container environment variables or `--set webhook.enabled=true` if using Helm. View the [Webhook Support Deprecated in Favor of CEL Section of the v1beta1 Migration Guide]({{}}). +* ~~`0.33.0` disables mutating and validating webhooks by default in favor of using [Common Expression Language for CRD validation](https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definitions/#validation). The Common Expression Language Validation Feature [is enabled by default on EKS 1.25](https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definitions/#validation-rules). If you are using Kubernetes version >= 1.25, no further action is required. If you are using a Kubernetes version below 1.25, you now need to set `DISABLE_WEBHOOK=false` in your container environment variables or `--set webhook.enabled=true` if using Helm. View the [Webhook Support Deprecated in Favor of CEL Section of the v1beta1 Migration Guide]({{}}).~~ * `0.33.0` drops support for passing settings through the `karpenter-global-settings` ConfigMap. You should pass settings through the container environment variables in the Karpenter deployment manifest. View the [Global Settings Section of the v1beta1 Migration Guide]({{}}) for more details. * `0.33.0` enables `Drift=true` by default in the `FEATURE_GATES`. If you previously didn't enable the feature gate, Karpenter will now check if there is a difference between the desired state of your nodes declared in your NodePool and the actual state of your nodes. View the [Drift Section of Disruption Conceptual Docs]({{}}) for more details. * `0.33.0` drops looking up the `zap-logger-config` through ConfigMap discovery. Instead, Karpenter now expects the logging config to be mounted on the filesystem if you are using this to configure Zap logging. This is not enabled by default, but can be enabled through `--set logConfig.enabled=true` in the Helm values. If you are setting any values in the `logConfig` from the `0.32.x` upgrade, such as `logConfig.logEncoding`, note that you will have to explicitly set `logConfig.enabled=true` alongside it. Also, note that setting the Zap logging config is a deprecated feature in beta and is planned to be dropped at v1. View the [Logging Configuration Section of the v1beta1 Migration Guide]({{}}) for more details. diff --git a/website/content/en/docs/upgrading/v1-controller-policy.json b/website/content/en/docs/upgrading/v1-controller-policy.json deleted file mode 100644 index 33109567611b..000000000000 --- a/website/content/en/docs/upgrading/v1-controller-policy.json +++ /dev/null @@ -1,259 +0,0 @@ -{ - "Version": "2012-10-17", - "Statement": [ - { - "Sid": "AllowScopedEC2InstanceAccessActions", - "Effect": "Allow", - "Resource": [ - "arn:${AWS_PARTITION}:ec2:${AWS_REGION}::image/*", - "arn:${AWS_PARTITION}:ec2:${AWS_REGION}::snapshot/*", - "arn:${AWS_PARTITION}:ec2:${AWS_REGION}:*:security-group/*", - "arn:${AWS_PARTITION}:ec2:${AWS_REGION}:*:subnet/*" - ], - "Action": [ - "ec2:RunInstances", - "ec2:CreateFleet" - ] - }, - { - "Sid": "AllowScopedEC2LaunchTemplateAccessActions", - "Effect": "Allow", - "Resource": "arn:${AWS_PARTITION}:ec2:${AWS_REGION}:*:launch-template/*", - "Action": [ - "ec2:RunInstances", - "ec2:CreateFleet" - ], - "Condition": { - "StringEquals": { - "aws:ResourceTag/kubernetes.io/cluster/${CLUSTER_NAME}": "owned" - }, - "StringLike": { - "aws:ResourceTag/karpenter.sh/nodepool": "*" - } - } - }, - { - "Sid": "AllowScopedEC2InstanceActionsWithTags", - "Effect": "Allow", - "Resource": [ - "arn:${AWS_PARTITION}:ec2:${AWS_REGION}:*:fleet/*", - "arn:${AWS_PARTITION}:ec2:${AWS_REGION}:*:instance/*", - "arn:${AWS_PARTITION}:ec2:${AWS_REGION}:*:volume/*", - "arn:${AWS_PARTITION}:ec2:${AWS_REGION}:*:network-interface/*", - "arn:${AWS_PARTITION}:ec2:${AWS_REGION}:*:launch-template/*", - "arn:${AWS_PARTITION}:ec2:${AWS_REGION}:*:spot-instances-request/*" - ], - "Action": [ - "ec2:RunInstances", - "ec2:CreateFleet", - "ec2:CreateLaunchTemplate" - ], - "Condition": { - "StringEquals": { - "aws:RequestTag/kubernetes.io/cluster/${CLUSTER_NAME}": "owned", - "aws:RequestTag/eks:eks-cluster-name": "${CLUSTER_NAME}" - }, - "StringLike": { - "aws:RequestTag/karpenter.sh/nodepool": "*" - } - } - }, - { - "Sid": "AllowScopedResourceCreationTagging", - "Effect": "Allow", - "Resource": [ - "arn:${AWS_PARTITION}:ec2:${AWS_REGION}:*:fleet/*", - "arn:${AWS_PARTITION}:ec2:${AWS_REGION}:*:instance/*", - "arn:${AWS_PARTITION}:ec2:${AWS_REGION}:*:volume/*", - "arn:${AWS_PARTITION}:ec2:${AWS_REGION}:*:network-interface/*", - "arn:${AWS_PARTITION}:ec2:${AWS_REGION}:*:launch-template/*", - "arn:${AWS_PARTITION}:ec2:${AWS_REGION}:*:spot-instances-request/*" - ], - "Action": "ec2:CreateTags", - "Condition": { - "StringEquals": { - "aws:RequestTag/kubernetes.io/cluster/${CLUSTER_NAME}": "owned", - "aws:RequestTag/eks:eks-cluster-name": "${CLUSTER_NAME}", - "ec2:CreateAction": [ - "RunInstances", - "CreateFleet", - "CreateLaunchTemplate" - ] - }, - "StringLike": { - "aws:RequestTag/karpenter.sh/nodepool": "*" - } - } - }, - { - "Sid": "AllowScopedResourceTagging", - "Effect": "Allow", - "Resource": "arn:${AWS_PARTITION}:ec2:${AWS_REGION}:*:instance/*", - "Action": "ec2:CreateTags", - "Condition": { - "StringEquals": { - "aws:ResourceTag/kubernetes.io/cluster/${CLUSTER_NAME}": "owned" - }, - "StringLike": { - "aws:ResourceTag/karpenter.sh/nodepool": "*" - }, - "StringEqualsIfExists": { - "aws:RequestTag/eks:eks-cluster-name": "${CLUSTER_NAME}" - }, - "ForAllValues:StringEquals": { - "aws:TagKeys": [ - "eks:eks-cluster-name", - "karpenter.sh/nodeclaim", - "Name" - ] - } - } - }, - { - "Sid": "AllowScopedDeletion", - "Effect": "Allow", - "Resource": [ - "arn:${AWS_PARTITION}:ec2:${AWS_REGION}:*:instance/*", - "arn:${AWS_PARTITION}:ec2:${AWS_REGION}:*:launch-template/*" - ], - "Action": [ - "ec2:TerminateInstances", - "ec2:DeleteLaunchTemplate" - ], - "Condition": { - "StringEquals": { - "aws:ResourceTag/kubernetes.io/cluster/${CLUSTER_NAME}": "owned" - }, - "StringLike": { - "aws:ResourceTag/karpenter.sh/nodepool": "*" - } - } - }, - { - "Sid": "AllowRegionalReadActions", - "Effect": "Allow", - "Resource": "*", - "Action": [ - "ec2:DescribeImages", - "ec2:DescribeInstances", - "ec2:DescribeInstanceTypeOfferings", - "ec2:DescribeInstanceTypes", - "ec2:DescribeLaunchTemplates", - "ec2:DescribeSecurityGroups", - "ec2:DescribeSpotPriceHistory", - "ec2:DescribeSubnets" - ], - "Condition": { - "StringEquals": { - "aws:RequestedRegion": "${AWS_REGION}" - } - } - }, - { - "Sid": "AllowSSMReadActions", - "Effect": "Allow", - "Resource": "arn:${AWS_PARTITION}:ssm:${AWS_REGION}::parameter/aws/service/*", - "Action": "ssm:GetParameter" - }, - { - "Sid": "AllowPricingReadActions", - "Effect": "Allow", - "Resource": "*", - "Action": "pricing:GetProducts" - }, - { - "Sid": "AllowInterruptionQueueActions", - "Effect": "Allow", - "Resource": "arn:${AWS_PARTITION}:sqs:${AWS_REGION}:${AWS_ACCOUNT_ID}:${CLUSTER_NAME}", - "Action": [ - "sqs:DeleteMessage", - "sqs:GetQueueUrl", - "sqs:ReceiveMessage" - ] - }, - { - "Sid": "AllowPassingInstanceRole", - "Effect": "Allow", - "Resource": "arn:${AWS_PARTITION}:iam::${AWS_ACCOUNT_ID}:role/KarpenterNodeRole-${CLUSTER_NAME}", - "Action": "iam:PassRole", - "Condition": { - "StringEquals": { - "iam:PassedToService": [ - "ec2.amazonaws.com", - "ec2.amazonaws.com.cn" - ] - } - } - }, - { - "Sid": "AllowScopedInstanceProfileCreationActions", - "Effect": "Allow", - "Resource": "arn:${AWS_PARTITION}:iam::${AWS_ACCOUNT_ID}:instance-profile/*", - "Action": [ - "iam:CreateInstanceProfile" - ], - "Condition": { - "StringEquals": { - "aws:RequestTag/kubernetes.io/cluster/${CLUSTER_NAME}": "owned", - "aws:RequestTag/eks:eks-cluster-name": "${CLUSTER_NAME}", - "aws:RequestTag/topology.kubernetes.io/region": "${AWS_REGION}" - }, - "StringLike": { - "aws:RequestTag/karpenter.k8s.aws/ec2nodeclass": "*" - } - } - }, - { - "Sid": "AllowScopedInstanceProfileTagActions", - "Effect": "Allow", - "Resource": "arn:${AWS_PARTITION}:iam::${AWS_ACCOUNT_ID}:instance-profile/*", - "Action": [ - "iam:TagInstanceProfile" - ], - "Condition": { - "StringEquals": { - "aws:ResourceTag/kubernetes.io/cluster/${CLUSTER_NAME}": "owned", - "aws:ResourceTag/topology.kubernetes.io/region": "${AWS_REGION}", - "aws:RequestTag/kubernetes.io/cluster/${CLUSTER_NAME}": "owned", - "aws:RequestTag/eks:eks-cluster-name": "${CLUSTER_NAME}", - "aws:RequestTag/topology.kubernetes.io/region": "${AWS_REGION}" - }, - "StringLike": { - "aws:ResourceTag/karpenter.k8s.aws/ec2nodeclass": "*", - "aws:RequestTag/karpenter.k8s.aws/ec2nodeclass": "*" - } - } - }, - { - "Sid": "AllowScopedInstanceProfileActions", - "Effect": "Allow", - "Resource": "arn:${AWS_PARTITION}:iam::${AWS_ACCOUNT_ID}:instance-profile/*", - "Action": [ - "iam:AddRoleToInstanceProfile", - "iam:RemoveRoleFromInstanceProfile", - "iam:DeleteInstanceProfile" - ], - "Condition": { - "StringEquals": { - "aws:ResourceTag/kubernetes.io/cluster/${CLUSTER_NAME}": "owned", - "aws:ResourceTag/topology.kubernetes.io/region": "${AWS_REGION}" - }, - "StringLike": { - "aws:ResourceTag/karpenter.k8s.aws/ec2nodeclass": "*" - } - } - }, - { - "Sid": "AllowInstanceProfileReadActions", - "Effect": "Allow", - "Resource": "arn:${AWS_PARTITION}:iam::${AWS_ACCOUNT_ID}:instance-profile/*", - "Action": "iam:GetInstanceProfile" - }, - { - "Sid": "AllowAPIServerEndpointDiscovery", - "Effect": "Allow", - "Resource": "arn:${AWS_PARTITION}:eks:${AWS_REGION}:${AWS_ACCOUNT_ID}:cluster/${CLUSTER_NAME}", - "Action": "eks:DescribeCluster" - } - ] -} diff --git a/website/content/en/docs/upgrading/v1-migration.md b/website/content/en/docs/upgrading/v1-migration.md index af58300f8e7f..16e3644674d3 100644 --- a/website/content/en/docs/upgrading/v1-migration.md +++ b/website/content/en/docs/upgrading/v1-migration.md @@ -121,6 +121,8 @@ spec: effect: NoExecute ``` +If you are using one of Karpenter's managed AMI families, this will be handled for you by Karpenter's [generated UserData]({{}}). + ## Upgrading Before proceeding with the upgrade, be sure to review the [changelog]({{}}) and review the [upgrade procedure]({{}}) in its entirety. @@ -134,7 +136,7 @@ Upgrading directly may leave you unable to rollback. For more information on the rollback procedure, refer to the [downgrading section]({{}}). {{% alert title="Note" color="primary" %}} -The examples provided in the [upgrade procedure]({{}}) demonstrate how to perform the migration by manually updating the helm charts and IAM roles. +The examples provided in the [upgrade procedure]({{}}) assume you've installed Karpenter following the [getting started guide]({{}}). If you are using IaC / GitOps, you may need to adapt the procedure to fit your specific infrastructure solution. You should still review the upgrade procedure; the sequence of operations remains the same regardless of the solution used to roll out the changes. {{% /alert %}} @@ -155,7 +157,7 @@ You should still review the upgrade procedure; the sequence of operations remain 2. Determine your current Karpenter version: ```bash - kubectl get pod -A -l app.kubernetes.io/name=karpenter -ojsonpath='{.items[0].spec.containers[0].image}' | sed -e 's/^[^:]*:\([^@]*\).*$/\1/' + kubectl get deployment -A -l app.kubernetes.io/name=karpenter -ojsonpath="{.items[0].metadata.labels['app\.kubernetes\.io/version']}{'\n'}" ``` To upgrade to v1, you must be running a Karpenter version between `v0.33` and `v0.37`. @@ -196,7 +198,7 @@ You should still review the upgrade procedure; the sequence of operations remain 5. Apply the latest patch version of your current minor version's Custom Resource Definitions (CRDs). Applying this version of the CRDs will enable the use of both the `v1` and `v1beta1` APIs on this version via the conversion webhooks. - Note that this is only for rollback purposes, and new features available with the `v1` APIs may not work on your minor version. + Note that this is only for rollback purposes, and new features available with the `v1` APIs will not work on your minor version. ```bash helm upgrade --install karpenter-crd oci://public.ecr.aws/karpenter/karpenter-crd --version "${KARPENTER_VERSION}" --namespace "${KARPENTER_NAMESPACE}" --create-namespace \ @@ -205,7 +207,7 @@ You should still review the upgrade procedure; the sequence of operations remain --set webhook.port=8443 ``` {{% alert title="Note" color="primary" %}} - To properly template the `conversion` stanza in the CRD, the `karpenter-crd` chart must be used. + To properly template the `conversion` field in the CRD, the `karpenter-crd` chart must be used. If you're using a GitOps solution to manage your Karpenter installation, you should use this chart to manage your CRDs. You should set `skipCrds` to true for the main `karpenter` chart (e.g. [Argo CD](https://argo-cd.readthedocs.io/en/latest/user-guide/helm/#helm-skip-crds)). @@ -238,7 +240,7 @@ You should still review the upgrade procedure; the sequence of operations remain {{% /alert %}} -6. Validate that Karpenter is operating as expected on this latest patch release. +6. Validate that Karpenter is operating as expected on this patch release. If you need to rollback after upgrading to `v1`, this is the version you will need to rollback to. {{% alert title="Note" color="primary" %}} @@ -254,17 +256,17 @@ You should still review the upgrade procedure; the sequence of operations remain 8. Attach the v1 policy to your existing NodeRole. Notable Changes to the IAM Policy include additional tag-scoping for the `eks:eks-cluster-name` tag for instances and instance profiles. + We will remove this additional policy later once the controller has been migrated to v1 and we've updated the Karpenter cloudformation stack. - ```bash - TEMPOUT=$(mktemp) - curl -fsSL https://raw.githubusercontent.com/aws/karpenter-provider-aws/website/content/en/docs/upgrading/v1-controller-policy.json > ${TEMPOUT} - POLICY_DOCUMENT=$(envsubst < ${TEMPOUT}) - POLICY_NAME="KarpenterControllerPolicy-${CLUSTER_NAME}-v1" - ROLE_NAME="${CLUSTER_NAME}-karpenter" - POLICY_ARN="$(aws iam create-policy --policy-name "${POLICY_NAME}" --policy-document "${POLICY_DOCUMENT}" | jq -r .Policy.Arn)" - aws iam attach-role-policy --role-name "${ROLE_NAME}" --policy-arn "${POLICY_ARN}" - ``` + ```bash + POLICY_DOCUMENT=$(mktemp) + curl -fsSL https://raw.githubusercontent.com/aws/karpenter-provider-aws/website/content/en/docs/v1.0/upgrading/get-controller-policy.sh | sh | envsubst > ${POLICY_DOCUMENT} + POLICY_NAME="KarpenterControllerPolicy-${CLUSTER_NAME}-v1" + ROLE_NAME="${CLUSTER_NAME}-karpenter" + POLICY_ARN="$(aws iam create-policy --policy-name "${POLICY_NAME}" --policy-document "file://${POLICY_DOCUMENT}" | jq -r .Policy.Arn)" + aws iam attach-role-policy --role-name "${ROLE_NAME}" --policy-arn "${POLICY_ARN}" + ``` 9. Apply the `v1` Custom Resource Definitions (CRDs): @@ -294,13 +296,22 @@ You should still review the upgrade procedure; the sequence of operations remain Karpenter has deprecated and moved a number of Helm values as part of the v1 release. Ensure that you upgrade to the newer version of these helm values during your migration to v1. You can find detail for all the settings that were moved in the [v1 Upgrade Reference]({{}}). {{% /alert %}} -11. Remove the `v1beta1` IAM policy from your NodeRole. +11. Upgrade your cloudformation stack and remove the temporary `v1` controller policy. ```bash + TEMPOUT=$(mktemp) + curl -fsSL https://raw.githubusercontent.com/aws/karpenter-provider-aws/v"${KARPENTER_VERSION}"/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml > "${TEMPOUT}" + aws cloudformation deploy \ + --stack-name "Karpenter-${CLUSTER_NAME}" \ + --template-file "${TEMPOUT}" \ + --capabilities CAPABILITY_NAMED_IAM \ + --parameter-overrides "ClusterName=${CLUSTER_NAME}" + ROLE_NAME="${CLUSTER_NAME}-karpenter" - POLICY_NAME="KarpenterControllerPolicy-${CLUSTER_NAME}" + POLICY_NAME="KarpenterControllerPolicy-${CLUSTER_NAME}-v1" POLICY_ARN=$(aws iam list-policies --query "Policies[?PolicyName=='${POLICY_NAME}'].Arn" --output text) aws iam detach-role-policy --role-name "${ROLE_NAME}" --policy-arn "${POLICY_ARN}" + aws iam delete-policy --policy-arn "${POLICY_ARN}" ``` ## Downgrading @@ -354,51 +365,21 @@ For example: `kubectl get nodepoll.v1beta1.karpenter.sh`. export KARPENTER_VERSION="0.37.5" # Replace with your minor version ``` - {{% alert title="Warning" color="warning" %}} - If you open a new shell to run steps in this procedure, you need to set some or all of the environment variables again. - To remind yourself of these values, type: - - ```bash - echo "${KARPENTER_NAMESPACE}" "${KARPENTER_VERSION}" "${CLUSTER_NAME}" "${TEMPOUT}" - ``` - - {{% /alert %}} - 3. Attach the `v1beta1` policy from your target version to your existing NodeRole. - If you didn't remove the `v1beta1` policy after upgrading to v1, you may skip this step. ```bash - TEMPOUT=$(mktemp) - VERSION_TAG=$([[ ${KARPENTER_VERSION} == v* ]] && echo "${KARPENTER_VERSION}" || echo "v${KARPENTER_VERSION}") - curl -fsSL https://raw.githubusercontent.com/aws/karpenter-provider-aws/${VERSION_TAG}/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml > ${TEMPOUT} - sed -e 's/!Sub//g' -i "" "${TEMPOUT}" - sed -e 's/${AWS::Partition}/${AWS_PARTITION}/g' -i "" "${TEMPOUT}" - sed -e 's/${AWS::Region}/${AWS_REGION}/g' -i "" "${TEMPOUT}" - sed -e 's/${AWS::AccountId}/${AWS_ACCOUNT_ID}/g' -i "" "${TEMPOUT}" - sed -e 's/${ClusterName}/${CLUSTER_NAME}/g' -i "" "${TEMPOUT}" - sed -e 's/${KarpenterInterruptionQueue.Arn}/arn:${AWS_PARTITION}:sqs:${AWS_REGION}:${AWS_ACCOUNT_ID}:${CLUSTER_NAME}/g' -i "" "${TEMPOUT}" - sed -e 's/${KarpenterNodeRole.Arn}/arn:${AWS_PARTITION}:iam::${AWS_ACCOUNT_ID}:role\/KarpenterNodeRole-${CLUSTER_NAME}/g' -i "" "${TEMPOUT}" - - POLICY_DOCUMENT=$(yq '.Resources.KarpenterControllerPolicy.Properties.PolicyDocument' ${TEMPOUT} | envsubst) - POLICY_NAME="KarpenterControllerPolicy-${CLUSTER_NAME}-${VERSION_TAG}" + POLICY_DOCUMENT=$(mktemp) + curl -fsSL https://raw.githubusercontent.com/aws/karpenter-provider-aws/website/docs/v1.0/upgrading/get-controller-policy.sh | sh | envsubst > ${POLICY_DOCUMENT} + POLICY_NAME="KarpenterControllerPolicy-${CLUSTER_NAME}-${KARPENTER_VERSION}" ROLE_NAME="${CLUSTER_NAME}-karpenter" - POLICY_ARN="$(aws iam create-policy --policy-name "${POLICY_NAME}" --policy-document "${POLICY_DOCUMENT}" | jq -r .Policy.Arn)" + POLICY_ARN="$(aws iam create-policy --policy-name "${POLICY_NAME}" --policy-document "file://${POLICY_DOCUMENT}" | jq -r .Policy.Arn)" aws iam attach-role-policy --role-name "${ROLE_NAME}" --policy-arn "${POLICY_ARN}" ``` -4. Rollback the CRDs. +4. Rollback the Karpenter Controller: Note that webhooks must be **enabled** to rollback. Without enabling the webhooks, Karpenter will be unable to correctly operate on `v1` versions of the resources already stored in ETCD. - ```bash - helm upgrade --install karpenter-crd oci://public.ecr.aws/karpenter/karpenter-crd --version "${KARPENTER_VERSION}" --namespace "${KARPENTER_NAMESPACE}" --create-namespace \ - --set webhook.enabled=true \ - --set webhook.serviceName=karpenter \ - --set webhook.port=8443 - ``` - -5. Rollback the Karpenter Controller: - ```bash # Service account annotation can be dropped when using pod identity helm upgrade --install karpenter oci://public.ecr.aws/karpenter/karpenter --version ${KARPENTER_VERSION} --namespace "${KARPENTER_NAMESPACE}" --create-namespace \ @@ -414,6 +395,34 @@ For example: `kubectl get nodepoll.v1beta1.karpenter.sh`. --wait ``` +5. Rollback the CRDs. + + ```bash + helm upgrade --install karpenter-crd oci://public.ecr.aws/karpenter/karpenter-crd --version "${KARPENTER_VERSION}" --namespace "${KARPENTER_NAMESPACE}" --create-namespace \ + --set webhook.enabled=true \ + --set webhook.serviceName=karpenter \ + --set webhook.port=8443 + ``` + +6. Rollback your cloudformation stack and remove the temporary `v1beta1` controller policy. + + ```bash + TEMPOUT=$(mktemp) + VERSION_TAG=$([[ ${KARPENTER_VERSION} == v* ]] && echo "${KARPENTER_VERSION}" || echo "v${KARPENTER_VERSION}") + curl -fsSL https://raw.githubusercontent.com/aws/karpenter-provider-aws/${VERSION_TAG}/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml > "${TEMPOUT}" + aws cloudformation deploy \ + --stack-name "Karpenter-${CLUSTER_NAME}" \ + --template-file "${TEMPOUT}" \ + --capabilities CAPABILITY_NAMED_IAM \ + --parameter-overrides "ClusterName=${CLUSTER_NAME}" + + ROLE_NAME="${CLUSTER_NAME}-karpenter" + POLICY_NAME="KarpenterControllerPolicy-${CLUSTER_NAME}-${KARPENTER_VERSION}" + POLICY_ARN=$(aws iam list-policies --query "Policies[?PolicyName=='${POLICY_NAME}'].Arn" --output text) + aws iam detach-role-policy --role-name "${ROLE_NAME}" --policy-arn "${POLICY_ARN}" + aws iam delete-policy --policy-arn "${POLICY_ARN}" + ``` + ## Before Upgrading to `v1.1.0` You've successfully upgraded to `v1.0`, but more than likely your manifests are still `v1beta1`. @@ -466,24 +475,25 @@ kubectl get nodepools default -o yaml > v1-nodepool.yaml ``` {{% alert title="Note" color="primary" %}} -Due to the many-to-one relation between `NodePools` and `EC2NodeClasses`, the `kubelet` stanza is **not** automtatically migrated by the conversion webhooks. -When updating your manifests, make sure you are migrating the `kubelet` stanza from your `NodePools` to your `EC2NodeClasses`. +Due to the many-to-one relation between `NodePools` and `EC2NodeClasses`, the `kubelet` field is **not** automtatically migrated by the conversion webhooks. +When updating your manifests, make sure you are migrating the `kubelet` field from your `NodePools` to your `EC2NodeClasses`. For more information, refer to [kubelet configuration migration]({{}}). {{% /alert %}} #### Kubelet Configuration Migration -One of the changes made to the `NodePool` and `EC2NodeClass` schemas for `v1` was the migration of the `kubelet` stanza from the `NodePool` to the `EC2NodeClass`. +One of the changes made to the `NodePool` and `EC2NodeClass` schemas for `v1` was the migration of the `kubelet` field from the `NodePool` to the `EC2NodeClass`. This change is difficult to properly handle with conversion webhooks due to the many-to-one relation between `NodePools` and `EC2NodeClasses`. To facilitate this, Karpenter adds the `compatibility.karpenter.sh/v1beta1-kubelet-conversion` annotation to converted `NodePools`. -If this annotation is present, it will take precedence over the `kubelet` stanza in the `EC2NodeClass`. +If this annotation is present, it will take precedence over the `kubelet` field in the `EC2NodeClass`. This annotation is only meant to support migration, and support will be dropped in `v1.1`. Before upgrading to `v1.1+`, you must migrate your kubelet configuration to your `EC2NodeClasses`, and remove the compatibility annotation from your `NodePools`. {{% alert title="Warning" color="warning" %}} -Do not remove the compatibility annotation until you have updated your `EC2NodeClass` with the matching `kubelet` stanza. -Prematurely removing the compatibility annotation will result in Node drift. +Do not remove the compatibility annotation until you have updated your `EC2NodeClass` with the matching `kubelet` field. +Once the annotations is removed, the `EC2NodeClass` will be used as the source of truth for your kubelet configuration. +If the field doesn't match, this will result in Nodes drifting. If you need to rollback to a pre-`v1.0` version after removing the compatibility annotation, you must re-add it before rolling back. {{% /alert %}} @@ -527,8 +537,46 @@ metadata: ``` In this example, we have two `NodePools` with different `kubelet` values, but they refer to the same `EC2NodeClass`. -When handling conversion, Karpenter will annotate the `NodePools` with the `compatibility.karpenter.sh/v1beta1-kubelet-conversion` annotation. +The conversion webhook will annotate the `NodePools` with the `compatibility.karpenter.sh/v1beta1-kubelet-conversion` annotation. +This is the result of that conversion: + +```yaml +apiVersion: karpenter.sh/v1 +kind: NodePool +metadata: + name: nodepool-a + annotations: + compatibility.karpenter.sh/v1beta1-kubelet-conversion: "{\"maxPods\": 10}" +spec: + template: + spec: + nodeClassRef: + group: karpenter.k8s.aws + kind: EC2NodeClass + name: nodeclass +--- +apiVersion: karpenter.sh/v1 +kind: NodePool +metadata: + name: nodepool-b + annotations: + compatibility.karpenter.sh/v1beta1-kubelet-conversion: "{\"maxPods\": 20}" +spec: + template: + spec: + nodeClassRef: + group: karpenter.k8s.aws + kind: EC2NodeClass + name: nodeclass +--- +apiVersion: karpenter.k8s.aws/v1 +kind: EC2NodeClass +metadata: + name: nodeclass +``` + Before upgrading to `v1.1`, you must update your `NodePools` to refer to separate `EC2NodeClasses` to retain this behavior. +Note that this will drift the Nodes associated with these NodePools due to the updated `nodeClassRef`. ```yaml apiVersion: karpenter.sh/v1 @@ -543,7 +591,7 @@ spec: kind: EC2NodeClass name: nodeclass-a --- -apiVersion: karpenter.sh/v1beta1 +apiVersion: karpenter.sh/v1 kind: NodePool metadata: name: nodepool-b @@ -555,7 +603,7 @@ spec: kind: EC2NodeClass name: nodeclass-b --- -apiVersion: karpenter.k8s.aws/v1beta1 +apiVersion: karpenter.k8s.aws/v1 kind: EC2NodeClass metadata: name: nodeclass-a @@ -563,7 +611,7 @@ spec: kubelet: maxPods: 10 --- -apiVersion: karpenter.k8s.aws/v1beta1 +apiVersion: karpenter.k8s.aws/v1 kind: EC2NodeClass metadata: name: nodeclass-b @@ -572,10 +620,6 @@ spec: maxPods: 20 ``` -{{% alert title="Note" color="primary" %}} -Updating the `nodeClassRef` for your `NodePools` will cause those `NodePools`' nodes to drift. -{{% /alert %}} - ## Changelog