From 9e853fbf37bbb555d66bb516597f6c9d11ea7737 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Apr 2024 08:47:31 -0700 Subject: [PATCH 01/17] chore(deps): bump the go-deps group with 2 updates (#6075) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index a6b0f98b9f6c..c518c5aa466a 100644 --- a/go.mod +++ b/go.mod @@ -6,14 +6,14 @@ require ( github.com/Pallinder/go-randomdata v1.2.0 github.com/PuerkitoBio/goquery v1.9.1 github.com/avast/retry-go v3.0.0+incompatible - github.com/aws/aws-sdk-go v1.51.21 + github.com/aws/aws-sdk-go v1.51.25 github.com/aws/karpenter-provider-aws/tools/kompat v0.0.0-20240410220356-6b868db24881 github.com/awslabs/amazon-eks-ami/nodeadm v0.0.0-20240229193347-cfab22a10647 github.com/go-logr/zapr v1.3.0 github.com/imdario/mergo v0.3.16 github.com/mitchellh/hashstructure/v2 v2.0.2 github.com/onsi/ginkgo/v2 v2.17.1 - github.com/onsi/gomega v1.32.0 + github.com/onsi/gomega v1.33.0 github.com/patrickmn/go-cache v2.1.0+incompatible github.com/pelletier/go-toml/v2 v2.2.1 github.com/prometheus/client_golang v1.19.0 diff --git a/go.sum b/go.sum index 42e7c2be239e..b9854fd02f42 100644 --- a/go.sum +++ b/go.sum @@ -54,8 +54,8 @@ github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6 github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHSxpiH9JdtuBj0= github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY= -github.com/aws/aws-sdk-go v1.51.21 h1:UrT6JC9R9PkYYXDZBV0qDKTualMr+bfK2eboTknMgbs= -github.com/aws/aws-sdk-go v1.51.21/go.mod h1:LF8svs817+Nz+DmiMQKTO3ubZ/6IaTpq3TjupRn3Eqk= +github.com/aws/aws-sdk-go v1.51.25 h1:DjTT8mtmsachhV6yrXR8+yhnG6120dazr720nopRsls= +github.com/aws/aws-sdk-go v1.51.25/go.mod h1:LF8svs817+Nz+DmiMQKTO3ubZ/6IaTpq3TjupRn3Eqk= github.com/aws/karpenter-provider-aws/tools/kompat v0.0.0-20240410220356-6b868db24881 h1:m9rhsGhdepdQV96tZgfy68oU75AWAjOH8u65OefTjwA= github.com/aws/karpenter-provider-aws/tools/kompat v0.0.0-20240410220356-6b868db24881/go.mod h1:+Mk5k0b6HpKobxNq+B56DOhZ+I/NiPhd5MIBhQMSTSs= github.com/awslabs/amazon-eks-ami/nodeadm v0.0.0-20240229193347-cfab22a10647 h1:8yRBVsjGmI7qQsPWtIrbWP+XfwHO9Wq7gdLVzjqiZFs= @@ -272,8 +272,8 @@ github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= github.com/onsi/ginkgo/v2 v2.17.1 h1:V++EzdbhI4ZV4ev0UTIj0PzhzOcReJFyJaLjtSF55M8= github.com/onsi/ginkgo/v2 v2.17.1/go.mod h1:llBI3WDLL9Z6taip6f33H76YcWtJv+7R3HigUjbIBOs= -github.com/onsi/gomega v1.32.0 h1:JRYU78fJ1LPxlckP6Txi/EYqJvjtMrDC04/MM5XRHPk= -github.com/onsi/gomega v1.32.0/go.mod h1:a4x4gW6Pz2yK1MAmvluYme5lvYTn61afQ2ETw/8n4Lg= +github.com/onsi/gomega v1.33.0 h1:snPCflnZrpMsy94p4lXVEkHo12lmPnc3vY5XBbreexE= +github.com/onsi/gomega v1.33.0/go.mod h1:+925n5YtiFsLzzafLUHzVMBpvvRAzrydIBiSIxjX3wY= github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc= github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ= github.com/pelletier/go-toml/v2 v2.2.1 h1:9TA9+T8+8CUCO2+WYnDLCgrYi9+omqKXyjDtosvtEhg= From 2056eb60d18dbc9c8967ee98ed3887183cf9f3f9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Apr 2024 08:47:49 -0700 Subject: [PATCH 02/17] chore(deps): bump actions/checkout from 4.1.2 to 4.1.3 in /.github/actions/e2e/install-prometheus in the action-deps group (#6076) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/actions/e2e/install-prometheus/action.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/e2e/install-prometheus/action.yaml b/.github/actions/e2e/install-prometheus/action.yaml index f80dd138c8a6..b52721b75ca4 100644 --- a/.github/actions/e2e/install-prometheus/action.yaml +++ b/.github/actions/e2e/install-prometheus/action.yaml @@ -27,7 +27,7 @@ inputs: runs: using: "composite" steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3 with: ref: ${{ inputs.git_ref }} - uses: ./.github/actions/e2e/install-helm From 8aaeb7a9ea8c404e199987970dc4de3324736365 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Apr 2024 08:48:03 -0700 Subject: [PATCH 03/17] chore(deps): bump actions/checkout from 4.1.2 to 4.1.3 in /.github/actions/e2e/slack/notify in the action-deps group (#6077) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/actions/e2e/slack/notify/action.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/e2e/slack/notify/action.yaml b/.github/actions/e2e/slack/notify/action.yaml index 3fadcd90954f..91160181ef54 100644 --- a/.github/actions/e2e/slack/notify/action.yaml +++ b/.github/actions/e2e/slack/notify/action.yaml @@ -17,7 +17,7 @@ inputs: runs: using: "composite" steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3 with: ref: ${{ inputs.git_ref }} - id: get-run-name From 0c281a8f80985b822cac9fe1340de7061b8df211 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Apr 2024 08:48:16 -0700 Subject: [PATCH 04/17] chore(deps): bump actions/checkout from 4.1.2 to 4.1.3 in /.github/actions/e2e/install-karpenter in the action-deps group (#6080) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/actions/e2e/install-karpenter/action.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/e2e/install-karpenter/action.yaml b/.github/actions/e2e/install-karpenter/action.yaml index 137f6237bb26..67392858d70f 100644 --- a/.github/actions/e2e/install-karpenter/action.yaml +++ b/.github/actions/e2e/install-karpenter/action.yaml @@ -30,7 +30,7 @@ inputs: runs: using: "composite" steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3 with: ref: ${{ inputs.git_ref }} - uses: ./.github/actions/e2e/install-helm From 859f61ba4879bb7ead27c9aa2d2514765a76f31c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Apr 2024 08:48:29 -0700 Subject: [PATCH 05/17] chore(deps): bump actions/checkout from 4.1.2 to 4.1.3 in /.github/actions/e2e/upgrade-crds in the action-deps group (#6081) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/actions/e2e/upgrade-crds/action.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/e2e/upgrade-crds/action.yaml b/.github/actions/e2e/upgrade-crds/action.yaml index 23c28d7fb1c5..8b9242097642 100644 --- a/.github/actions/e2e/upgrade-crds/action.yaml +++ b/.github/actions/e2e/upgrade-crds/action.yaml @@ -24,7 +24,7 @@ runs: role-to-assume: arn:aws:iam::${{ inputs.account_id }}:role/${{ inputs.role }} aws-region: ${{ inputs.region }} role-duration-seconds: 21600 - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3 with: ref: ${{ inputs.git_ref }} - name: install-karpenter From 09572f4e6bae19eba21416a93e9067e93ec56d0d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Apr 2024 08:48:43 -0700 Subject: [PATCH 06/17] chore(deps): bump actions/checkout from 4.1.2 to 4.1.3 in /.github/actions/e2e/cleanup in the action-deps group (#6082) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/actions/e2e/cleanup/action.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/e2e/cleanup/action.yaml b/.github/actions/e2e/cleanup/action.yaml index 7237012066c6..3f053d8449a0 100644 --- a/.github/actions/e2e/cleanup/action.yaml +++ b/.github/actions/e2e/cleanup/action.yaml @@ -24,7 +24,7 @@ inputs: runs: using: "composite" steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3 with: ref: ${{ inputs.git_ref }} - uses: ./.github/actions/e2e/install-eksctl From a280891e6ff1607922e631aa9428b6defe370ca7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Apr 2024 08:48:58 -0700 Subject: [PATCH 07/17] chore(deps): bump actions/upload-artifact from 4.3.1 to 4.3.2 in the actions-deps group (#6083) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/approval-comment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/approval-comment.yaml b/.github/workflows/approval-comment.yaml index 34dfcafe9c05..ddd05e1e2a6a 100644 --- a/.github/workflows/approval-comment.yaml +++ b/.github/workflows/approval-comment.yaml @@ -19,7 +19,7 @@ jobs: mkdir -p /tmp/artifacts { echo "$REVIEW_BODY"; echo "$PULL_REQUEST_NUMBER"; echo "$COMMIT_ID"; } >> /tmp/artifacts/metadata.txt cat /tmp/artifacts/metadata.txt - - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1 + - uses: actions/upload-artifact@1746f4ab65b179e0ea60a494b83293b640dd5bba # v4.3.2 with: name: artifacts path: /tmp/artifacts From bec5cb9f122e71d8e4d9b2bf08502c03d0ac47fa Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Apr 2024 08:50:44 -0700 Subject: [PATCH 08/17] chore(deps): bump actions/checkout from 4.1.2 to 4.1.3 in /.github/actions/e2e/setup-cluster in the action-deps group (#6078) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/actions/e2e/setup-cluster/action.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/e2e/setup-cluster/action.yaml b/.github/actions/e2e/setup-cluster/action.yaml index c829662294f0..ffda793d2fc0 100644 --- a/.github/actions/e2e/setup-cluster/action.yaml +++ b/.github/actions/e2e/setup-cluster/action.yaml @@ -50,7 +50,7 @@ inputs: runs: using: "composite" steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3 with: ref: ${{ inputs.git_ref }} - uses: ./.github/actions/e2e/install-eksctl From 79fc60157c88e98cb7b5ab11e0c34e632b0eee99 Mon Sep 17 00:00:00 2001 From: Jonathan Innis Date: Mon, 22 Apr 2024 09:07:43 -0700 Subject: [PATCH 09/17] docs: Clarify conceptual docs around `Exists` operator (#6070) --- .../content/en/docs/concepts/scheduling.md | 68 +++++++++++++++++-- .../content/en/preview/concepts/scheduling.md | 68 +++++++++++++++++-- .../content/en/v0.32/concepts/scheduling.md | 68 +++++++++++++++++-- .../content/en/v0.34/concepts/scheduling.md | 68 +++++++++++++++++-- .../content/en/v0.35/concepts/scheduling.md | 68 +++++++++++++++++-- .../content/en/v0.36/concepts/scheduling.md | 68 +++++++++++++++++-- 6 files changed, 366 insertions(+), 42 deletions(-) diff --git a/website/content/en/docs/concepts/scheduling.md b/website/content/en/docs/concepts/scheduling.md index 76c09cbc5124..49e43cf45f9d 100755 --- a/website/content/en/docs/concepts/scheduling.md +++ b/website/content/en/docs/concepts/scheduling.md @@ -626,19 +626,73 @@ If using Gt/Lt operators, make sure to use values under the actual label values The `Exists` operator can be used on a NodePool to provide workload segregation across nodes. ```yaml -... -requirements: -- key: company.com/team - operator: Exists +apiVersion: karpenter.sh/v1beta1 +kind: NodePool +spec: + template: + spec: + requirements: + - key: company.com/team + operator: Exists ... ``` -With the requirement on the NodePool, workloads can optionally specify a custom value as a required node affinity or node selector. Karpenter will then label the nodes it launches for these pods which prevents `kube-scheduler` from scheduling conflicting pods to those nodes. This provides a way to more dynamically isolate workloads without requiring a unique NodePool for each workload subset. +With this requirement on the NodePool, workloads can specify the same key (e.g. `company.com/team`) with custom values (e.g. `team-a`, `team-b`, etc.) as a required `nodeAffinity` or `nodeSelector`. Karpenter will then apply the key/value pair to nodes it launches dynamically based on the pod's node requirements. + +If each set of pods that can schedule with this NodePool specifies this key in its `nodeAffinity` or `nodeSelector`, you can isolate pods onto different nodes based on their values. This provides a way to more dynamically isolate workloads without requiring a unique NodePool for each workload subset. + +For example, providing the following `nodeSelectors` would isolate the pods for each of these deployments on different nodes. + +#### Team A Deployment ```yaml -nodeSelector: - company.com/team: team-a +apiVersion: v1 +kind: Deployment +metadata: + name: team-a-deployment +spec: + replicas: 5 + template: + spec: + nodeSelector: + company.com/team: team-a +``` + +#### Team A Node + +```yaml +apiVersion: v1 +kind: Node +metadata: + labels: + company.com/team: team-a ``` + +#### Team B Deployment + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: team-b-deployment +spec: + replicas: 5 + template: + spec: + nodeSelector: + company.com/team: team-b +``` + +#### Team B Node + +```yaml +apiVersion: v1 +kind: Node +metadata: + labels: + company.com/team: team-b +``` + {{% alert title="Note" color="primary" %}} If a workload matches the NodePool but doesn't specify a label, Karpenter will generate a random label for the node. {{% /alert %}} diff --git a/website/content/en/preview/concepts/scheduling.md b/website/content/en/preview/concepts/scheduling.md index 76c09cbc5124..49e43cf45f9d 100755 --- a/website/content/en/preview/concepts/scheduling.md +++ b/website/content/en/preview/concepts/scheduling.md @@ -626,19 +626,73 @@ If using Gt/Lt operators, make sure to use values under the actual label values The `Exists` operator can be used on a NodePool to provide workload segregation across nodes. ```yaml -... -requirements: -- key: company.com/team - operator: Exists +apiVersion: karpenter.sh/v1beta1 +kind: NodePool +spec: + template: + spec: + requirements: + - key: company.com/team + operator: Exists ... ``` -With the requirement on the NodePool, workloads can optionally specify a custom value as a required node affinity or node selector. Karpenter will then label the nodes it launches for these pods which prevents `kube-scheduler` from scheduling conflicting pods to those nodes. This provides a way to more dynamically isolate workloads without requiring a unique NodePool for each workload subset. +With this requirement on the NodePool, workloads can specify the same key (e.g. `company.com/team`) with custom values (e.g. `team-a`, `team-b`, etc.) as a required `nodeAffinity` or `nodeSelector`. Karpenter will then apply the key/value pair to nodes it launches dynamically based on the pod's node requirements. + +If each set of pods that can schedule with this NodePool specifies this key in its `nodeAffinity` or `nodeSelector`, you can isolate pods onto different nodes based on their values. This provides a way to more dynamically isolate workloads without requiring a unique NodePool for each workload subset. + +For example, providing the following `nodeSelectors` would isolate the pods for each of these deployments on different nodes. + +#### Team A Deployment ```yaml -nodeSelector: - company.com/team: team-a +apiVersion: v1 +kind: Deployment +metadata: + name: team-a-deployment +spec: + replicas: 5 + template: + spec: + nodeSelector: + company.com/team: team-a +``` + +#### Team A Node + +```yaml +apiVersion: v1 +kind: Node +metadata: + labels: + company.com/team: team-a ``` + +#### Team B Deployment + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: team-b-deployment +spec: + replicas: 5 + template: + spec: + nodeSelector: + company.com/team: team-b +``` + +#### Team B Node + +```yaml +apiVersion: v1 +kind: Node +metadata: + labels: + company.com/team: team-b +``` + {{% alert title="Note" color="primary" %}} If a workload matches the NodePool but doesn't specify a label, Karpenter will generate a random label for the node. {{% /alert %}} diff --git a/website/content/en/v0.32/concepts/scheduling.md b/website/content/en/v0.32/concepts/scheduling.md index 466b54910454..9617591d7868 100755 --- a/website/content/en/v0.32/concepts/scheduling.md +++ b/website/content/en/v0.32/concepts/scheduling.md @@ -625,19 +625,73 @@ If using Gt/Lt operators, make sure to use values under the actual label values The `Exists` operator can be used on a NodePool to provide workload segregation across nodes. ```yaml -... -requirements: -- key: company.com/team - operator: Exists +apiVersion: karpenter.sh/v1beta1 +kind: NodePool +spec: + template: + spec: + requirements: + - key: company.com/team + operator: Exists ... ``` -With the requirement on the NodePool, workloads can optionally specify a custom value as a required node affinity or node selector. Karpenter will then label the nodes it launches for these pods which prevents `kube-scheduler` from scheduling conflicting pods to those nodes. This provides a way to more dynamically isolate workloads without requiring a unique NodePool for each workload subset. +With this requirement on the NodePool, workloads can specify the same key (e.g. `company.com/team`) with custom values (e.g. `team-a`, `team-b`, etc.) as a required `nodeAffinity` or `nodeSelector`. Karpenter will then apply the key/value pair to nodes it launches dynamically based on the pod's node requirements. + +If each set of pods that can schedule with this NodePool specifies this key in its `nodeAffinity` or `nodeSelector`, you can isolate pods onto different nodes based on their values. This provides a way to more dynamically isolate workloads without requiring a unique NodePool for each workload subset. + +For example, providing the following `nodeSelectors` would isolate the pods for each of these deployments on different nodes. + +#### Team A Deployment ```yaml -nodeSelector: - company.com/team: team-a +apiVersion: v1 +kind: Deployment +metadata: + name: team-a-deployment +spec: + replicas: 5 + template: + spec: + nodeSelector: + company.com/team: team-a +``` + +#### Team A Node + +```yaml +apiVersion: v1 +kind: Node +metadata: + labels: + company.com/team: team-a ``` + +#### Team B Deployment + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: team-b-deployment +spec: + replicas: 5 + template: + spec: + nodeSelector: + company.com/team: team-b +``` + +#### Team B Node + +```yaml +apiVersion: v1 +kind: Node +metadata: + labels: + company.com/team: team-b +``` + {{% alert title="Note" color="primary" %}} If a workload matches the NodePool but doesn't specify a label, Karpenter will generate a random label for the node. {{% /alert %}} diff --git a/website/content/en/v0.34/concepts/scheduling.md b/website/content/en/v0.34/concepts/scheduling.md index ef12a29cc347..4eebe154c855 100755 --- a/website/content/en/v0.34/concepts/scheduling.md +++ b/website/content/en/v0.34/concepts/scheduling.md @@ -625,19 +625,73 @@ If using Gt/Lt operators, make sure to use values under the actual label values The `Exists` operator can be used on a NodePool to provide workload segregation across nodes. ```yaml -... -requirements: -- key: company.com/team - operator: Exists +apiVersion: karpenter.sh/v1beta1 +kind: NodePool +spec: + template: + spec: + requirements: + - key: company.com/team + operator: Exists ... ``` -With the requirement on the NodePool, workloads can optionally specify a custom value as a required node affinity or node selector. Karpenter will then label the nodes it launches for these pods which prevents `kube-scheduler` from scheduling conflicting pods to those nodes. This provides a way to more dynamically isolate workloads without requiring a unique NodePool for each workload subset. +With this requirement on the NodePool, workloads can specify the same key (e.g. `company.com/team`) with custom values (e.g. `team-a`, `team-b`, etc.) as a required `nodeAffinity` or `nodeSelector`. Karpenter will then apply the key/value pair to nodes it launches dynamically based on the pod's node requirements. + +If each set of pods that can schedule with this NodePool specifies this key in its `nodeAffinity` or `nodeSelector`, you can isolate pods onto different nodes based on their values. This provides a way to more dynamically isolate workloads without requiring a unique NodePool for each workload subset. + +For example, providing the following `nodeSelectors` would isolate the pods for each of these deployments on different nodes. + +#### Team A Deployment ```yaml -nodeSelector: - company.com/team: team-a +apiVersion: v1 +kind: Deployment +metadata: + name: team-a-deployment +spec: + replicas: 5 + template: + spec: + nodeSelector: + company.com/team: team-a +``` + +#### Team A Node + +```yaml +apiVersion: v1 +kind: Node +metadata: + labels: + company.com/team: team-a ``` + +#### Team B Deployment + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: team-b-deployment +spec: + replicas: 5 + template: + spec: + nodeSelector: + company.com/team: team-b +``` + +#### Team B Node + +```yaml +apiVersion: v1 +kind: Node +metadata: + labels: + company.com/team: team-b +``` + {{% alert title="Note" color="primary" %}} If a workload matches the NodePool but doesn't specify a label, Karpenter will generate a random label for the node. {{% /alert %}} diff --git a/website/content/en/v0.35/concepts/scheduling.md b/website/content/en/v0.35/concepts/scheduling.md index ef12a29cc347..4eebe154c855 100755 --- a/website/content/en/v0.35/concepts/scheduling.md +++ b/website/content/en/v0.35/concepts/scheduling.md @@ -625,19 +625,73 @@ If using Gt/Lt operators, make sure to use values under the actual label values The `Exists` operator can be used on a NodePool to provide workload segregation across nodes. ```yaml -... -requirements: -- key: company.com/team - operator: Exists +apiVersion: karpenter.sh/v1beta1 +kind: NodePool +spec: + template: + spec: + requirements: + - key: company.com/team + operator: Exists ... ``` -With the requirement on the NodePool, workloads can optionally specify a custom value as a required node affinity or node selector. Karpenter will then label the nodes it launches for these pods which prevents `kube-scheduler` from scheduling conflicting pods to those nodes. This provides a way to more dynamically isolate workloads without requiring a unique NodePool for each workload subset. +With this requirement on the NodePool, workloads can specify the same key (e.g. `company.com/team`) with custom values (e.g. `team-a`, `team-b`, etc.) as a required `nodeAffinity` or `nodeSelector`. Karpenter will then apply the key/value pair to nodes it launches dynamically based on the pod's node requirements. + +If each set of pods that can schedule with this NodePool specifies this key in its `nodeAffinity` or `nodeSelector`, you can isolate pods onto different nodes based on their values. This provides a way to more dynamically isolate workloads without requiring a unique NodePool for each workload subset. + +For example, providing the following `nodeSelectors` would isolate the pods for each of these deployments on different nodes. + +#### Team A Deployment ```yaml -nodeSelector: - company.com/team: team-a +apiVersion: v1 +kind: Deployment +metadata: + name: team-a-deployment +spec: + replicas: 5 + template: + spec: + nodeSelector: + company.com/team: team-a +``` + +#### Team A Node + +```yaml +apiVersion: v1 +kind: Node +metadata: + labels: + company.com/team: team-a ``` + +#### Team B Deployment + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: team-b-deployment +spec: + replicas: 5 + template: + spec: + nodeSelector: + company.com/team: team-b +``` + +#### Team B Node + +```yaml +apiVersion: v1 +kind: Node +metadata: + labels: + company.com/team: team-b +``` + {{% alert title="Note" color="primary" %}} If a workload matches the NodePool but doesn't specify a label, Karpenter will generate a random label for the node. {{% /alert %}} diff --git a/website/content/en/v0.36/concepts/scheduling.md b/website/content/en/v0.36/concepts/scheduling.md index 76c09cbc5124..49e43cf45f9d 100755 --- a/website/content/en/v0.36/concepts/scheduling.md +++ b/website/content/en/v0.36/concepts/scheduling.md @@ -626,19 +626,73 @@ If using Gt/Lt operators, make sure to use values under the actual label values The `Exists` operator can be used on a NodePool to provide workload segregation across nodes. ```yaml -... -requirements: -- key: company.com/team - operator: Exists +apiVersion: karpenter.sh/v1beta1 +kind: NodePool +spec: + template: + spec: + requirements: + - key: company.com/team + operator: Exists ... ``` -With the requirement on the NodePool, workloads can optionally specify a custom value as a required node affinity or node selector. Karpenter will then label the nodes it launches for these pods which prevents `kube-scheduler` from scheduling conflicting pods to those nodes. This provides a way to more dynamically isolate workloads without requiring a unique NodePool for each workload subset. +With this requirement on the NodePool, workloads can specify the same key (e.g. `company.com/team`) with custom values (e.g. `team-a`, `team-b`, etc.) as a required `nodeAffinity` or `nodeSelector`. Karpenter will then apply the key/value pair to nodes it launches dynamically based on the pod's node requirements. + +If each set of pods that can schedule with this NodePool specifies this key in its `nodeAffinity` or `nodeSelector`, you can isolate pods onto different nodes based on their values. This provides a way to more dynamically isolate workloads without requiring a unique NodePool for each workload subset. + +For example, providing the following `nodeSelectors` would isolate the pods for each of these deployments on different nodes. + +#### Team A Deployment ```yaml -nodeSelector: - company.com/team: team-a +apiVersion: v1 +kind: Deployment +metadata: + name: team-a-deployment +spec: + replicas: 5 + template: + spec: + nodeSelector: + company.com/team: team-a +``` + +#### Team A Node + +```yaml +apiVersion: v1 +kind: Node +metadata: + labels: + company.com/team: team-a ``` + +#### Team B Deployment + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: team-b-deployment +spec: + replicas: 5 + template: + spec: + nodeSelector: + company.com/team: team-b +``` + +#### Team B Node + +```yaml +apiVersion: v1 +kind: Node +metadata: + labels: + company.com/team: team-b +``` + {{% alert title="Note" color="primary" %}} If a workload matches the NodePool but doesn't specify a label, Karpenter will generate a random label for the node. {{% /alert %}} From 283edd464e25331a9f3b2182863902ed91626657 Mon Sep 17 00:00:00 2001 From: Jeff Harris Date: Tue, 23 Apr 2024 09:07:29 +1200 Subject: [PATCH 10/17] docs: Fix reference to total cluster size constraint (#6071) --- website/content/en/docs/concepts/nodepools.md | 2 +- website/content/en/preview/concepts/nodepools.md | 2 +- website/content/en/v0.32/concepts/nodepools.md | 2 +- website/content/en/v0.34/concepts/nodepools.md | 2 +- website/content/en/v0.35/concepts/nodepools.md | 2 +- website/content/en/v0.36/concepts/nodepools.md | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/website/content/en/docs/concepts/nodepools.md b/website/content/en/docs/concepts/nodepools.md index d679bb5f4373..9a9b9a06d57a 100644 --- a/website/content/en/docs/concepts/nodepools.md +++ b/website/content/en/docs/concepts/nodepools.md @@ -157,7 +157,7 @@ spec: duration: 8h nodes: "0" - # Resource limits constrain the total size of the cluster. + # Resource limits constrain the total size of the pool. # Limits prevent Karpenter from creating new instances once the limit is exceeded. limits: cpu: "1000" diff --git a/website/content/en/preview/concepts/nodepools.md b/website/content/en/preview/concepts/nodepools.md index d679bb5f4373..9a9b9a06d57a 100644 --- a/website/content/en/preview/concepts/nodepools.md +++ b/website/content/en/preview/concepts/nodepools.md @@ -157,7 +157,7 @@ spec: duration: 8h nodes: "0" - # Resource limits constrain the total size of the cluster. + # Resource limits constrain the total size of the pool. # Limits prevent Karpenter from creating new instances once the limit is exceeded. limits: cpu: "1000" diff --git a/website/content/en/v0.32/concepts/nodepools.md b/website/content/en/v0.32/concepts/nodepools.md index 607311c5ef6e..7a369b13f326 100644 --- a/website/content/en/v0.32/concepts/nodepools.md +++ b/website/content/en/v0.32/concepts/nodepools.md @@ -140,7 +140,7 @@ spec: # You can choose to disable expiration entirely by setting the string value 'Never' here expireAfter: 720h - # Resource limits constrain the total size of the cluster. + # Resource limits constrain the total size of the pool. # Limits prevent Karpenter from creating new instances once the limit is exceeded. limits: cpu: "1000" diff --git a/website/content/en/v0.34/concepts/nodepools.md b/website/content/en/v0.34/concepts/nodepools.md index cb85d5988475..c7f6a6072f86 100644 --- a/website/content/en/v0.34/concepts/nodepools.md +++ b/website/content/en/v0.34/concepts/nodepools.md @@ -150,7 +150,7 @@ spec: duration: 8h nodes: "0" - # Resource limits constrain the total size of the cluster. + # Resource limits constrain the total size of the pool. # Limits prevent Karpenter from creating new instances once the limit is exceeded. limits: cpu: "1000" diff --git a/website/content/en/v0.35/concepts/nodepools.md b/website/content/en/v0.35/concepts/nodepools.md index d679bb5f4373..9a9b9a06d57a 100644 --- a/website/content/en/v0.35/concepts/nodepools.md +++ b/website/content/en/v0.35/concepts/nodepools.md @@ -157,7 +157,7 @@ spec: duration: 8h nodes: "0" - # Resource limits constrain the total size of the cluster. + # Resource limits constrain the total size of the pool. # Limits prevent Karpenter from creating new instances once the limit is exceeded. limits: cpu: "1000" diff --git a/website/content/en/v0.36/concepts/nodepools.md b/website/content/en/v0.36/concepts/nodepools.md index d679bb5f4373..9a9b9a06d57a 100644 --- a/website/content/en/v0.36/concepts/nodepools.md +++ b/website/content/en/v0.36/concepts/nodepools.md @@ -157,7 +157,7 @@ spec: duration: 8h nodes: "0" - # Resource limits constrain the total size of the cluster. + # Resource limits constrain the total size of the pool. # Limits prevent Karpenter from creating new instances once the limit is exceeded. limits: cpu: "1000" From fe6c6394af9cecbba79f8f634806633a1aa9bec1 Mon Sep 17 00:00:00 2001 From: Max Forasteiro <18661016+maxforasteiro@users.noreply.github.com> Date: Tue, 23 Apr 2024 09:20:33 +0200 Subject: [PATCH 11/17] fix: Update docs about queue name change (#6062) --- website/content/en/docs/concepts/disruption.md | 2 +- website/content/en/docs/faq.md | 4 ++-- website/content/en/docs/upgrading/upgrade-guide.md | 5 +++-- website/content/en/preview/concepts/disruption.md | 2 +- website/content/en/preview/faq.md | 4 ++-- website/content/en/preview/upgrading/upgrade-guide.md | 5 +++-- website/content/en/v0.32/concepts/disruption.md | 2 +- website/content/en/v0.32/faq.md | 2 +- website/content/en/v0.32/upgrading/upgrade-guide.md | 1 + website/content/en/v0.34/concepts/disruption.md | 2 +- website/content/en/v0.34/faq.md | 4 ++-- website/content/en/v0.34/upgrading/upgrade-guide.md | 3 ++- website/content/en/v0.35/concepts/disruption.md | 2 +- website/content/en/v0.35/faq.md | 4 ++-- website/content/en/v0.35/upgrading/upgrade-guide.md | 3 ++- website/content/en/v0.36/concepts/disruption.md | 2 +- website/content/en/v0.36/faq.md | 4 ++-- website/content/en/v0.36/upgrading/upgrade-guide.md | 5 +++-- 18 files changed, 31 insertions(+), 25 deletions(-) diff --git a/website/content/en/docs/concepts/disruption.md b/website/content/en/docs/concepts/disruption.md index d92d1c68fc2c..aa10df2124f3 100644 --- a/website/content/en/docs/concepts/disruption.md +++ b/website/content/en/docs/concepts/disruption.md @@ -190,7 +190,7 @@ If you require handling for Spot Rebalance Recommendations, you can use the [AWS Karpenter enables this feature by watching an SQS queue which receives critical events from AWS services which may affect your nodes. Karpenter requires that an SQS queue be provisioned and EventBridge rules and targets be added that forward interruption events from AWS services to the SQS queue. Karpenter provides details for provisioning this infrastructure in the [CloudFormation template in the Getting Started Guide](../../getting-started/getting-started-with-karpenter/#create-the-karpenter-infrastructure-and-iam-roles). -To enable interruption handling, configure the `--interruption-queue-name` CLI argument with the name of the interruption queue provisioned to handle interruption events. +To enable interruption handling, configure the `--interruption-queue` CLI argument with the name of the interruption queue provisioned to handle interruption events. ## Controls diff --git a/website/content/en/docs/faq.md b/website/content/en/docs/faq.md index c765ef174e2d..cfbf61a302d7 100644 --- a/website/content/en/docs/faq.md +++ b/website/content/en/docs/faq.md @@ -17,7 +17,7 @@ AWS is the first cloud provider supported by Karpenter, although it is designed Yes, but there is no documentation yet for it. Start with Karpenter's GitHub [cloudprovider](https://github.com/aws/karpenter-core/tree/v0.36.0/pkg/cloudprovider) documentation to see how the AWS provider is built, but there are other sections of the code that will require changes too. ### What operating system nodes does Karpenter deploy? -Karpenter uses the OS defined by the [AMI Family in your EC2NodeClass]({{< ref "./concepts/nodeclasses#specamifamily" >}}). +Karpenter uses the OS defined by the [AMI Family in your EC2NodeClass]({{< ref "./concepts/nodeclasses#specamifamily" >}}). ### Can I provide my own custom operating system images? Karpenter has multiple mechanisms for configuring the [operating system]({{< ref "./concepts/nodeclasses/#specamiselectorterms" >}}) for your nodes. @@ -231,7 +231,7 @@ Karpenter's native interruption handling offers two main benefits over the stand 1. You don't have to manage and maintain a separate component to exclusively handle interruption events. 2. Karpenter's native interruption handling coordinates with other deprovisioning so that consolidation, expiration, etc. can be aware of interruption events and vice-versa. -### Why am I receiving QueueNotFound errors when I set `--interruption-queue-name`? +### Why am I receiving QueueNotFound errors when I set `--interruption-queue`? Karpenter requires a queue to exist that receives event messages from EC2 and health services in order to handle interruption messages properly for nodes. Details on the types of events that Karpenter handles can be found in the [Interruption Handling Docs]({{< ref "./concepts/disruption/#interruption" >}}). diff --git a/website/content/en/docs/upgrading/upgrade-guide.md b/website/content/en/docs/upgrading/upgrade-guide.md index c7e84b894521..1687bfa3f5a5 100644 --- a/website/content/en/docs/upgrading/upgrade-guide.md +++ b/website/content/en/docs/upgrading/upgrade-guide.md @@ -44,7 +44,7 @@ WHEN CREATING A NEW SECTION OF THE UPGRADE GUIDANCE FOR NEWER VERSIONS, ENSURE T {{% /alert %}} {{% alert title="Warning" color="warning" %}} - v0.36.x introduces update to drift that restricts rollback. When rolling back from >=v0.36.0, note that v0.32.9+, v0.33.4+, v0.34.5+, v0.35.4+ are the patch versions that support rollback. If Karpenter is rolled back to an older patch version, Karpenter can potentially drift all the nodes in the cluster. + v0.36.x introduces update to drift that restricts rollback. When rolling back from >=v0.36.0, note that v0.32.9+, v0.33.4+, v0.34.5+, v0.35.4+ are the patch versions that support rollback. If Karpenter is rolled back to an older patch version, Karpenter can potentially drift all the nodes in the cluster. {{% /alert %}} * Karpenter changed the name of the `karpenter_cloudprovider_instance_type_price_estimate` metric to `karpenter_cloudprovider_instance_type_offering_price_estimate` to align with the new `karpenter_cloudprovider_instance_type_offering_available` metric. The `region` label was also dropped from the metric, since this can be inferred from the environment that Karpenter is running in. @@ -72,7 +72,7 @@ The Ubuntu EKS optimized AMI has moved from 20.04 to 22.04 for Kubernetes 1.29+. * `Empty Expiration / Empty Drift / Empty Consolidation`: infinite parallelism * `Non-Empty Expiration / Non-Empty Drift / Single-Node Consolidation`: one node at a time * `Multi-Node Consolidation`: max 100 nodes -* To support Disruption Budgets, `0.34.0`+ includes critical changes to Karpenter's core controllers, which allows Karpenter to consider multiple batches of disrupting nodes simultaneously. This increases Karpenter's performance with the potential downside of higher CPU and memory utilization from the Karpenter pod. While the magnitude of this difference varies on a case-by-case basis, when upgrading to Karpenter `0.34.0`+, please note that you may need to increase the resources allocated to the Karpenter controller pods. +* To support Disruption Budgets, `0.34.0`+ includes critical changes to Karpenter's core controllers, which allows Karpenter to consider multiple batches of disrupting nodes simultaneously. This increases Karpenter's performance with the potential downside of higher CPU and memory utilization from the Karpenter pod. While the magnitude of this difference varies on a case-by-case basis, when upgrading to Karpenter `0.34.0`+, please note that you may need to increase the resources allocated to the Karpenter controller pods. * Karpenter now adds a default `podSecurityContext` that configures the `fsgroup: 65536` of volumes in the pod. If you are using sidecar containers, you should review if this configuration is compatible for them. You can disable this default `podSecurityContext` through helm by performing `--set podSecurityContext=null` when installing/upgrading the chart. * The `dnsPolicy` for the Karpenter controller pod has been changed back to the Kubernetes cluster default of `ClusterFirst`. Setting our `dnsPolicy` to `Default` (confusingly, this is not the Kubernetes cluster default) caused more confusion for any users running IPv6 clusters with dual-stack nodes or anyone running Karpenter with dependencies on cluster services (like clusters running service meshes). This change may be breaking for any users on Fargate or MNG who were allowing Karpenter to manage their in-cluster DNS service (`core-dns` on most clusters). If you still want the old behavior here, you can change the `dnsPolicy` to point to use `Default` by setting the helm value on install/upgrade with `--set dnsPolicy=Default`. More details on this issue can be found in the following Github issues: [#2186](https://github.com/aws/karpenter-provider-aws/issues/2186) and [#4947](https://github.com/aws/karpenter-provider-aws/issues/4947). * Karpenter now disallows `nodepool.spec.template.spec.resources` to be set. The webhook validation never allowed `nodepool.spec.template.spec.resources`. We are now ensuring that CEL validation also disallows `nodepool.spec.template.spec.resources` to be set. If you were previously setting the resources field on your NodePool, ensure that you remove this field before upgrading to the newest version of Karpenter or else updates to the resource may fail on the new version. @@ -103,6 +103,7 @@ This version includes **dual support** for both alpha and beta APIs to ensure th Note that if you are rolling back after upgrading to `0.32.0`, note that `0.31.4` is the only version that supports handling rollback after you have deployed the v1beta1 APIs to your cluster. {{% /alert %}} +* Karpenter now uses `settings.InterruptionQueue` instead of `settings.aws.InterruptionQueueName` in its helm chart. The CLI argument also changed to `--interruption-queue`. * Karpenter now serves the webhook prometheus metrics server on port `8001`. If this port is already in-use on the pod or you are running in `hostNetworking` mode, you may need to change this port value. You can configure this port value through the `WEBHOOK_METRICS_PORT` environment variable or the `webhook.metrics.port` value if installing via Helm. * Karpenter now exposes the ability to disable webhooks through the `webhook.enabled=false` value. This value will disable the webhook server and will prevent any permissions, mutating or validating webhook configurations from being deployed to the cluster. * Karpenter now moves all logging configuration for the Zap logger into the `logConfig` values block. Configuring Karpenter logging with this mechanism _is_ deprecated and will be dropped at v1. Karpenter now only surfaces logLevel through the `logLevel` helm value. If you need more advanced configuration due to log parsing constraints, we recommend configuring your log parser to handle Karpenter's Zap JSON logging. diff --git a/website/content/en/preview/concepts/disruption.md b/website/content/en/preview/concepts/disruption.md index d92d1c68fc2c..aa10df2124f3 100644 --- a/website/content/en/preview/concepts/disruption.md +++ b/website/content/en/preview/concepts/disruption.md @@ -190,7 +190,7 @@ If you require handling for Spot Rebalance Recommendations, you can use the [AWS Karpenter enables this feature by watching an SQS queue which receives critical events from AWS services which may affect your nodes. Karpenter requires that an SQS queue be provisioned and EventBridge rules and targets be added that forward interruption events from AWS services to the SQS queue. Karpenter provides details for provisioning this infrastructure in the [CloudFormation template in the Getting Started Guide](../../getting-started/getting-started-with-karpenter/#create-the-karpenter-infrastructure-and-iam-roles). -To enable interruption handling, configure the `--interruption-queue-name` CLI argument with the name of the interruption queue provisioned to handle interruption events. +To enable interruption handling, configure the `--interruption-queue` CLI argument with the name of the interruption queue provisioned to handle interruption events. ## Controls diff --git a/website/content/en/preview/faq.md b/website/content/en/preview/faq.md index 4549a26bef94..d9b9118de4cb 100644 --- a/website/content/en/preview/faq.md +++ b/website/content/en/preview/faq.md @@ -17,7 +17,7 @@ AWS is the first cloud provider supported by Karpenter, although it is designed Yes, but there is no documentation yet for it. Start with Karpenter's GitHub [cloudprovider](https://github.com/aws/karpenter-core/tree{{< githubRelRef >}}pkg/cloudprovider) documentation to see how the AWS provider is built, but there are other sections of the code that will require changes too. ### What operating system nodes does Karpenter deploy? -Karpenter uses the OS defined by the [AMI Family in your EC2NodeClass]({{< ref "./concepts/nodeclasses#specamifamily" >}}). +Karpenter uses the OS defined by the [AMI Family in your EC2NodeClass]({{< ref "./concepts/nodeclasses#specamifamily" >}}). ### Can I provide my own custom operating system images? Karpenter has multiple mechanisms for configuring the [operating system]({{< ref "./concepts/nodeclasses/#specamiselectorterms" >}}) for your nodes. @@ -231,7 +231,7 @@ Karpenter's native interruption handling offers two main benefits over the stand 1. You don't have to manage and maintain a separate component to exclusively handle interruption events. 2. Karpenter's native interruption handling coordinates with other deprovisioning so that consolidation, expiration, etc. can be aware of interruption events and vice-versa. -### Why am I receiving QueueNotFound errors when I set `--interruption-queue-name`? +### Why am I receiving QueueNotFound errors when I set `--interruption-queue`? Karpenter requires a queue to exist that receives event messages from EC2 and health services in order to handle interruption messages properly for nodes. Details on the types of events that Karpenter handles can be found in the [Interruption Handling Docs]({{< ref "./concepts/disruption/#interruption" >}}). diff --git a/website/content/en/preview/upgrading/upgrade-guide.md b/website/content/en/preview/upgrading/upgrade-guide.md index b991ad2dd856..66f1d2b8c1e5 100644 --- a/website/content/en/preview/upgrading/upgrade-guide.md +++ b/website/content/en/preview/upgrading/upgrade-guide.md @@ -48,7 +48,7 @@ WHEN CREATING A NEW SECTION OF THE UPGRADE GUIDANCE FOR NEWER VERSIONS, ENSURE T {{% /alert %}} {{% alert title="Warning" color="warning" %}} - v0.36.x introduces update to drift that restricts rollback. When rolling back from >=v0.36.0, note that v0.32.9+, v0.33.4+, v0.34.5+, v0.35.4+ are the patch versions that support rollback. If Karpenter is rolled back to an older patch version, Karpenter can potentially drift all the nodes in the cluster. + v0.36.x introduces update to drift that restricts rollback. When rolling back from >=v0.36.0, note that v0.32.9+, v0.33.4+, v0.34.5+, v0.35.4+ are the patch versions that support rollback. If Karpenter is rolled back to an older patch version, Karpenter can potentially drift all the nodes in the cluster. {{% /alert %}} * Karpenter changed the name of the `karpenter_cloudprovider_instance_type_price_estimate` metric to `karpenter_cloudprovider_instance_type_offering_price_estimate` to align with the new `karpenter_cloudprovider_instance_type_offering_available` metric. The `region` label was also dropped from the metric, since this can be inferred from the environment that Karpenter is running in. @@ -76,7 +76,7 @@ The Ubuntu EKS optimized AMI has moved from 20.04 to 22.04 for Kubernetes 1.29+. * `Empty Expiration / Empty Drift / Empty Consolidation`: infinite parallelism * `Non-Empty Expiration / Non-Empty Drift / Single-Node Consolidation`: one node at a time * `Multi-Node Consolidation`: max 100 nodes -* To support Disruption Budgets, `0.34.0`+ includes critical changes to Karpenter's core controllers, which allows Karpenter to consider multiple batches of disrupting nodes simultaneously. This increases Karpenter's performance with the potential downside of higher CPU and memory utilization from the Karpenter pod. While the magnitude of this difference varies on a case-by-case basis, when upgrading to Karpenter `0.34.0`+, please note that you may need to increase the resources allocated to the Karpenter controller pods. +* To support Disruption Budgets, `0.34.0`+ includes critical changes to Karpenter's core controllers, which allows Karpenter to consider multiple batches of disrupting nodes simultaneously. This increases Karpenter's performance with the potential downside of higher CPU and memory utilization from the Karpenter pod. While the magnitude of this difference varies on a case-by-case basis, when upgrading to Karpenter `0.34.0`+, please note that you may need to increase the resources allocated to the Karpenter controller pods. * Karpenter now adds a default `podSecurityContext` that configures the `fsgroup: 65536` of volumes in the pod. If you are using sidecar containers, you should review if this configuration is compatible for them. You can disable this default `podSecurityContext` through helm by performing `--set podSecurityContext=null` when installing/upgrading the chart. * The `dnsPolicy` for the Karpenter controller pod has been changed back to the Kubernetes cluster default of `ClusterFirst`. Setting our `dnsPolicy` to `Default` (confusingly, this is not the Kubernetes cluster default) caused more confusion for any users running IPv6 clusters with dual-stack nodes or anyone running Karpenter with dependencies on cluster services (like clusters running service meshes). This change may be breaking for any users on Fargate or MNG who were allowing Karpenter to manage their in-cluster DNS service (`core-dns` on most clusters). If you still want the old behavior here, you can change the `dnsPolicy` to point to use `Default` by setting the helm value on install/upgrade with `--set dnsPolicy=Default`. More details on this issue can be found in the following Github issues: [#2186](https://github.com/aws/karpenter-provider-aws/issues/2186) and [#4947](https://github.com/aws/karpenter-provider-aws/issues/4947). * Karpenter now disallows `nodepool.spec.template.spec.resources` to be set. The webhook validation never allowed `nodepool.spec.template.spec.resources`. We are now ensuring that CEL validation also disallows `nodepool.spec.template.spec.resources` to be set. If you were previously setting the resources field on your NodePool, ensure that you remove this field before upgrading to the newest version of Karpenter or else updates to the resource may fail on the new version. @@ -107,6 +107,7 @@ This version includes **dual support** for both alpha and beta APIs to ensure th Note that if you are rolling back after upgrading to `0.32.0`, note that `0.31.4` is the only version that supports handling rollback after you have deployed the v1beta1 APIs to your cluster. {{% /alert %}} +* Karpenter now uses `settings.InterruptionQueue` instead of `settings.aws.InterruptionQueueName` in its helm chart. The CLI argument also changed to `--interruption-queue`. * Karpenter now serves the webhook prometheus metrics server on port `8001`. If this port is already in-use on the pod or you are running in `hostNetworking` mode, you may need to change this port value. You can configure this port value through the `WEBHOOK_METRICS_PORT` environment variable or the `webhook.metrics.port` value if installing via Helm. * Karpenter now exposes the ability to disable webhooks through the `webhook.enabled=false` value. This value will disable the webhook server and will prevent any permissions, mutating or validating webhook configurations from being deployed to the cluster. * Karpenter now moves all logging configuration for the Zap logger into the `logConfig` values block. Configuring Karpenter logging with this mechanism _is_ deprecated and will be dropped at v1. Karpenter now only surfaces logLevel through the `logLevel` helm value. If you need more advanced configuration due to log parsing constraints, we recommend configuring your log parser to handle Karpenter's Zap JSON logging. diff --git a/website/content/en/v0.32/concepts/disruption.md b/website/content/en/v0.32/concepts/disruption.md index 408e591147a0..b7ebec2b83df 100644 --- a/website/content/en/v0.32/concepts/disruption.md +++ b/website/content/en/v0.32/concepts/disruption.md @@ -174,7 +174,7 @@ If you require handling for Spot Rebalance Recommendations, you can use the [AWS Karpenter enables this feature by watching an SQS queue which receives critical events from AWS services which may affect your nodes. Karpenter requires that an SQS queue be provisioned and EventBridge rules and targets be added that forward interruption events from AWS services to the SQS queue. Karpenter provides details for provisioning this infrastructure in the [CloudFormation template in the Getting Started Guide](../../getting-started/getting-started-with-karpenter/#create-the-karpenter-infrastructure-and-iam-roles). -To enable interruption handling, configure the `--interruption-queue-name` CLI argument with the name of the interruption queue provisioned to handle interruption events. +To enable interruption handling, configure the `--interruption-queue` CLI argument with the name of the interruption queue provisioned to handle interruption events. ## Controls diff --git a/website/content/en/v0.32/faq.md b/website/content/en/v0.32/faq.md index eb3a46e16351..cca4304b2d07 100644 --- a/website/content/en/v0.32/faq.md +++ b/website/content/en/v0.32/faq.md @@ -227,7 +227,7 @@ Karpenter's native interruption handling offers two main benefits over the stand 1. You don't have to manage and maintain a separate component to exclusively handle interruption events. 2. Karpenter's native interruption handling coordinates with other deprovisioning so that consolidation, expiration, etc. can be aware of interruption events and vice-versa. -### Why am I receiving QueueNotFound errors when I set `--interruption-queue-name`? +### Why am I receiving QueueNotFound errors when I set `--interruption-queue`? Karpenter requires a queue to exist that receives event messages from EC2 and health services in order to handle interruption messages properly for nodes. Details on the types of events that Karpenter handles can be found in the [Interruption Handling Docs]({{< ref "./concepts/disruption/#interruption" >}}). diff --git a/website/content/en/v0.32/upgrading/upgrade-guide.md b/website/content/en/v0.32/upgrading/upgrade-guide.md index 5deb0741f0b2..91bf44070f22 100644 --- a/website/content/en/v0.32/upgrading/upgrade-guide.md +++ b/website/content/en/v0.32/upgrading/upgrade-guide.md @@ -46,6 +46,7 @@ This version includes **dual support** for both alpha and beta APIs to ensure th Note that if you are rolling back after upgrading to v0.32.0, note that v0.31.4 is the only version that supports handling rollback after you have deployed the v1beta1 APIs to your cluster. {{% /alert %}} +* Karpenter now uses `settings.InterruptionQueue` instead of `settings.aws.InterruptionQueueName` in its helm chart. The CLI argument also changed to `--interruption-queue`. * Karpenter now serves the webhook prometheus metrics server on port `8001`. If this port is already in-use on the pod or you are running in `hostNetworking` mode, you may need to change this port value. You can configure this port value through the `WEBHOOK_METRICS_PORT` environment variable or the `webhook.metrics.port` value if installing via Helm. * Karpenter now exposes the ability to disable webhooks through the `webhook.enabled=false` value. This value will disable the webhook server and will prevent any permissions, mutating or validating webhook configurations from being deployed to the cluster. * Karpenter now moves all logging configuration for the Zap logger into the `logConfig` values block. Configuring Karpenter logging with this mechanism _is_ deprecated and will be dropped at v1. Karpenter now only surfaces logLevel through the `logLevel` helm value. If you need more advanced configuration due to log parsing constraints, we recommend configuring your log parser to handle Karpenter's Zap JSON logging. diff --git a/website/content/en/v0.34/concepts/disruption.md b/website/content/en/v0.34/concepts/disruption.md index 48e0b479c5f3..aa5adede7dc6 100644 --- a/website/content/en/v0.34/concepts/disruption.md +++ b/website/content/en/v0.34/concepts/disruption.md @@ -186,7 +186,7 @@ If you require handling for Spot Rebalance Recommendations, you can use the [AWS Karpenter enables this feature by watching an SQS queue which receives critical events from AWS services which may affect your nodes. Karpenter requires that an SQS queue be provisioned and EventBridge rules and targets be added that forward interruption events from AWS services to the SQS queue. Karpenter provides details for provisioning this infrastructure in the [CloudFormation template in the Getting Started Guide](../../getting-started/getting-started-with-karpenter/#create-the-karpenter-infrastructure-and-iam-roles). -To enable interruption handling, configure the `--interruption-queue-name` CLI argument with the name of the interruption queue provisioned to handle interruption events. +To enable interruption handling, configure the `--interruption-queue` CLI argument with the name of the interruption queue provisioned to handle interruption events. ## Controls diff --git a/website/content/en/v0.34/faq.md b/website/content/en/v0.34/faq.md index 5096c34f69db..b7e3c91adcaa 100644 --- a/website/content/en/v0.34/faq.md +++ b/website/content/en/v0.34/faq.md @@ -17,7 +17,7 @@ AWS is the first cloud provider supported by Karpenter, although it is designed Yes, but there is no documentation yet for it. Start with Karpenter's GitHub [cloudprovider](https://github.com/aws/karpenter-core/tree/v0.34.5/pkg/cloudprovider) documentation to see how the AWS provider is built, but there are other sections of the code that will require changes too. ### What operating system nodes does Karpenter deploy? -Karpenter uses the OS defined by the [AMI Family in your EC2NodeClass]({{< ref "./concepts/nodeclasses#specamifamily" >}}). +Karpenter uses the OS defined by the [AMI Family in your EC2NodeClass]({{< ref "./concepts/nodeclasses#specamifamily" >}}). ### Can I provide my own custom operating system images? Karpenter has multiple mechanisms for configuring the [operating system]({{< ref "./concepts/nodeclasses/#specamiselectorterms" >}}) for your nodes. @@ -231,7 +231,7 @@ Karpenter's native interruption handling offers two main benefits over the stand 1. You don't have to manage and maintain a separate component to exclusively handle interruption events. 2. Karpenter's native interruption handling coordinates with other deprovisioning so that consolidation, expiration, etc. can be aware of interruption events and vice-versa. -### Why am I receiving QueueNotFound errors when I set `--interruption-queue-name`? +### Why am I receiving QueueNotFound errors when I set `--interruption-queue`? Karpenter requires a queue to exist that receives event messages from EC2 and health services in order to handle interruption messages properly for nodes. Details on the types of events that Karpenter handles can be found in the [Interruption Handling Docs]({{< ref "./concepts/disruption/#interruption" >}}). diff --git a/website/content/en/v0.34/upgrading/upgrade-guide.md b/website/content/en/v0.34/upgrading/upgrade-guide.md index fbe1e7ba16df..18073f128e94 100644 --- a/website/content/en/v0.34/upgrading/upgrade-guide.md +++ b/website/content/en/v0.34/upgrading/upgrade-guide.md @@ -50,7 +50,7 @@ The Ubuntu EKS optimized AMI has moved from 20.04 to 22.04 for Kubernetes 1.29+. * `Empty Expiration / Empty Drift / Empty Consolidation`: infinite parallelism * `Non-Empty Expiration / Non-Empty Drift / Single-Node Consolidation`: one node at a time * `Multi-Node Consolidation`: max 100 nodes -* To support Disruption Budgets, v0.34+ includes critical changes to Karpenter's core controllers, which allows Karpenter to consider multiple batches of disrupting nodes simultaneously. This increases Karpenter's performance with the potential downside of higher CPU and memory utilization from the Karpenter pod. While the magnitude of this difference varies on a case-by-case basis, when upgrading to Karpenter v0.34+, please note that you may need to increase the resources allocated to the Karpenter controller pods. +* To support Disruption Budgets, v0.34+ includes critical changes to Karpenter's core controllers, which allows Karpenter to consider multiple batches of disrupting nodes simultaneously. This increases Karpenter's performance with the potential downside of higher CPU and memory utilization from the Karpenter pod. While the magnitude of this difference varies on a case-by-case basis, when upgrading to Karpenter v0.34+, please note that you may need to increase the resources allocated to the Karpenter controller pods. * Karpenter now adds a default `podSecurityContext` that configures the `fsgroup: 65536` of volumes in the pod. If you are using sidecar containers, you should review if this configuration is compatible for them. You can disable this default `podSecurityContext` through helm by performing `--set podSecurityContext=null` when installing/upgrading the chart. * The `dnsPolicy` for the Karpenter controller pod has been changed back to the Kubernetes cluster default of `ClusterFirst`. Setting our `dnsPolicy` to `Default` (confusingly, this is not the Kubernetes cluster default) caused more confusion for any users running IPv6 clusters with dual-stack nodes or anyone running Karpenter with dependencies on cluster services (like clusters running service meshes). This change may be breaking for any users on Fargate or MNG who were allowing Karpenter to manage their in-cluster DNS service (`core-dns` on most clusters). If you still want the old behavior here, you can change the `dnsPolicy` to point to use `Default` by setting the helm value on install/upgrade with `--set dnsPolicy=Default`. More details on this issue can be found in the following Github issues: [#2186](https://github.com/aws/karpenter-provider-aws/issues/2186) and [#4947](https://github.com/aws/karpenter-provider-aws/issues/4947). * Karpenter now disallows `nodepool.spec.template.spec.resources` to be set. The webhook validation never allowed `nodepool.spec.template.spec.resources`. We are now ensuring that CEL validation also disallows `nodepool.spec.template.spec.resources` to be set. If you were previously setting the resources field on your NodePool, ensure that you remove this field before upgrading to the newest version of Karpenter or else updates to the resource may fail on the new version. @@ -83,6 +83,7 @@ This version includes **dual support** for both alpha and beta APIs to ensure th Note that if you are rolling back after upgrading to v0.32.0, note that v0.31.4 is the only version that supports handling rollback after you have deployed the v1beta1 APIs to your cluster. {{% /alert %}} +* Karpenter now uses `settings.InterruptionQueue` instead of `settings.aws.InterruptionQueueName` in its helm chart. The CLI argument also changed to `--interruption-queue`. * Karpenter now serves the webhook prometheus metrics server on port `8001`. If this port is already in-use on the pod or you are running in `hostNetworking` mode, you may need to change this port value. You can configure this port value through the `WEBHOOK_METRICS_PORT` environment variable or the `webhook.metrics.port` value if installing via Helm. * Karpenter now exposes the ability to disable webhooks through the `webhook.enabled=false` value. This value will disable the webhook server and will prevent any permissions, mutating or validating webhook configurations from being deployed to the cluster. * Karpenter now moves all logging configuration for the Zap logger into the `logConfig` values block. Configuring Karpenter logging with this mechanism _is_ deprecated and will be dropped at v1. Karpenter now only surfaces logLevel through the `logLevel` helm value. If you need more advanced configuration due to log parsing constraints, we recommend configuring your log parser to handle Karpenter's Zap JSON logging. diff --git a/website/content/en/v0.35/concepts/disruption.md b/website/content/en/v0.35/concepts/disruption.md index 2799846b1e8f..44c746da121e 100644 --- a/website/content/en/v0.35/concepts/disruption.md +++ b/website/content/en/v0.35/concepts/disruption.md @@ -186,7 +186,7 @@ If you require handling for Spot Rebalance Recommendations, you can use the [AWS Karpenter enables this feature by watching an SQS queue which receives critical events from AWS services which may affect your nodes. Karpenter requires that an SQS queue be provisioned and EventBridge rules and targets be added that forward interruption events from AWS services to the SQS queue. Karpenter provides details for provisioning this infrastructure in the [CloudFormation template in the Getting Started Guide](../../getting-started/getting-started-with-karpenter/#create-the-karpenter-infrastructure-and-iam-roles). -To enable interruption handling, configure the `--interruption-queue-name` CLI argument with the name of the interruption queue provisioned to handle interruption events. +To enable interruption handling, configure the `--interruption-queue` CLI argument with the name of the interruption queue provisioned to handle interruption events. ## Controls diff --git a/website/content/en/v0.35/faq.md b/website/content/en/v0.35/faq.md index a825e34056f6..e25969f94526 100644 --- a/website/content/en/v0.35/faq.md +++ b/website/content/en/v0.35/faq.md @@ -17,7 +17,7 @@ AWS is the first cloud provider supported by Karpenter, although it is designed Yes, but there is no documentation yet for it. Start with Karpenter's GitHub [cloudprovider](https://github.com/aws/karpenter-core/tree/v0.35.4/pkg/cloudprovider) documentation to see how the AWS provider is built, but there are other sections of the code that will require changes too. ### What operating system nodes does Karpenter deploy? -Karpenter uses the OS defined by the [AMI Family in your EC2NodeClass]({{< ref "./concepts/nodeclasses#specamifamily" >}}). +Karpenter uses the OS defined by the [AMI Family in your EC2NodeClass]({{< ref "./concepts/nodeclasses#specamifamily" >}}). ### Can I provide my own custom operating system images? Karpenter has multiple mechanisms for configuring the [operating system]({{< ref "./concepts/nodeclasses/#specamiselectorterms" >}}) for your nodes. @@ -231,7 +231,7 @@ Karpenter's native interruption handling offers two main benefits over the stand 1. You don't have to manage and maintain a separate component to exclusively handle interruption events. 2. Karpenter's native interruption handling coordinates with other deprovisioning so that consolidation, expiration, etc. can be aware of interruption events and vice-versa. -### Why am I receiving QueueNotFound errors when I set `--interruption-queue-name`? +### Why am I receiving QueueNotFound errors when I set `--interruption-queue`? Karpenter requires a queue to exist that receives event messages from EC2 and health services in order to handle interruption messages properly for nodes. Details on the types of events that Karpenter handles can be found in the [Interruption Handling Docs]({{< ref "./concepts/disruption/#interruption" >}}). diff --git a/website/content/en/v0.35/upgrading/upgrade-guide.md b/website/content/en/v0.35/upgrading/upgrade-guide.md index 42acba982977..46739a103c1c 100644 --- a/website/content/en/v0.35/upgrading/upgrade-guide.md +++ b/website/content/en/v0.35/upgrading/upgrade-guide.md @@ -60,7 +60,7 @@ The Ubuntu EKS optimized AMI has moved from 20.04 to 22.04 for Kubernetes 1.29+. * `Empty Expiration / Empty Drift / Empty Consolidation`: infinite parallelism * `Non-Empty Expiration / Non-Empty Drift / Single-Node Consolidation`: one node at a time * `Multi-Node Consolidation`: max 100 nodes -* To support Disruption Budgets, `0.34.0`+ includes critical changes to Karpenter's core controllers, which allows Karpenter to consider multiple batches of disrupting nodes simultaneously. This increases Karpenter's performance with the potential downside of higher CPU and memory utilization from the Karpenter pod. While the magnitude of this difference varies on a case-by-case basis, when upgrading to Karpenter `0.34.0`+, please note that you may need to increase the resources allocated to the Karpenter controller pods. +* To support Disruption Budgets, `0.34.0`+ includes critical changes to Karpenter's core controllers, which allows Karpenter to consider multiple batches of disrupting nodes simultaneously. This increases Karpenter's performance with the potential downside of higher CPU and memory utilization from the Karpenter pod. While the magnitude of this difference varies on a case-by-case basis, when upgrading to Karpenter `0.34.0`+, please note that you may need to increase the resources allocated to the Karpenter controller pods. * Karpenter now adds a default `podSecurityContext` that configures the `fsgroup: 65536` of volumes in the pod. If you are using sidecar containers, you should review if this configuration is compatible for them. You can disable this default `podSecurityContext` through helm by performing `--set podSecurityContext=null` when installing/upgrading the chart. * The `dnsPolicy` for the Karpenter controller pod has been changed back to the Kubernetes cluster default of `ClusterFirst`. Setting our `dnsPolicy` to `Default` (confusingly, this is not the Kubernetes cluster default) caused more confusion for any users running IPv6 clusters with dual-stack nodes or anyone running Karpenter with dependencies on cluster services (like clusters running service meshes). This change may be breaking for any users on Fargate or MNG who were allowing Karpenter to manage their in-cluster DNS service (`core-dns` on most clusters). If you still want the old behavior here, you can change the `dnsPolicy` to point to use `Default` by setting the helm value on install/upgrade with `--set dnsPolicy=Default`. More details on this issue can be found in the following Github issues: [#2186](https://github.com/aws/karpenter-provider-aws/issues/2186) and [#4947](https://github.com/aws/karpenter-provider-aws/issues/4947). * Karpenter now disallows `nodepool.spec.template.spec.resources` to be set. The webhook validation never allowed `nodepool.spec.template.spec.resources`. We are now ensuring that CEL validation also disallows `nodepool.spec.template.spec.resources` to be set. If you were previously setting the resources field on your NodePool, ensure that you remove this field before upgrading to the newest version of Karpenter or else updates to the resource may fail on the new version. @@ -91,6 +91,7 @@ This version includes **dual support** for both alpha and beta APIs to ensure th Note that if you are rolling back after upgrading to `0.32.0`, note that `0.31.4` is the only version that supports handling rollback after you have deployed the v1beta1 APIs to your cluster. {{% /alert %}} +* Karpenter now uses `settings.InterruptionQueue` instead of `settings.aws.InterruptionQueueName` in its helm chart. The CLI argument also changed to `--interruption-queue`. * Karpenter now serves the webhook prometheus metrics server on port `8001`. If this port is already in-use on the pod or you are running in `hostNetworking` mode, you may need to change this port value. You can configure this port value through the `WEBHOOK_METRICS_PORT` environment variable or the `webhook.metrics.port` value if installing via Helm. * Karpenter now exposes the ability to disable webhooks through the `webhook.enabled=false` value. This value will disable the webhook server and will prevent any permissions, mutating or validating webhook configurations from being deployed to the cluster. * Karpenter now moves all logging configuration for the Zap logger into the `logConfig` values block. Configuring Karpenter logging with this mechanism _is_ deprecated and will be dropped at v1. Karpenter now only surfaces logLevel through the `logLevel` helm value. If you need more advanced configuration due to log parsing constraints, we recommend configuring your log parser to handle Karpenter's Zap JSON logging. diff --git a/website/content/en/v0.36/concepts/disruption.md b/website/content/en/v0.36/concepts/disruption.md index d92d1c68fc2c..aa10df2124f3 100644 --- a/website/content/en/v0.36/concepts/disruption.md +++ b/website/content/en/v0.36/concepts/disruption.md @@ -190,7 +190,7 @@ If you require handling for Spot Rebalance Recommendations, you can use the [AWS Karpenter enables this feature by watching an SQS queue which receives critical events from AWS services which may affect your nodes. Karpenter requires that an SQS queue be provisioned and EventBridge rules and targets be added that forward interruption events from AWS services to the SQS queue. Karpenter provides details for provisioning this infrastructure in the [CloudFormation template in the Getting Started Guide](../../getting-started/getting-started-with-karpenter/#create-the-karpenter-infrastructure-and-iam-roles). -To enable interruption handling, configure the `--interruption-queue-name` CLI argument with the name of the interruption queue provisioned to handle interruption events. +To enable interruption handling, configure the `--interruption-queue` CLI argument with the name of the interruption queue provisioned to handle interruption events. ## Controls diff --git a/website/content/en/v0.36/faq.md b/website/content/en/v0.36/faq.md index c765ef174e2d..cfbf61a302d7 100644 --- a/website/content/en/v0.36/faq.md +++ b/website/content/en/v0.36/faq.md @@ -17,7 +17,7 @@ AWS is the first cloud provider supported by Karpenter, although it is designed Yes, but there is no documentation yet for it. Start with Karpenter's GitHub [cloudprovider](https://github.com/aws/karpenter-core/tree/v0.36.0/pkg/cloudprovider) documentation to see how the AWS provider is built, but there are other sections of the code that will require changes too. ### What operating system nodes does Karpenter deploy? -Karpenter uses the OS defined by the [AMI Family in your EC2NodeClass]({{< ref "./concepts/nodeclasses#specamifamily" >}}). +Karpenter uses the OS defined by the [AMI Family in your EC2NodeClass]({{< ref "./concepts/nodeclasses#specamifamily" >}}). ### Can I provide my own custom operating system images? Karpenter has multiple mechanisms for configuring the [operating system]({{< ref "./concepts/nodeclasses/#specamiselectorterms" >}}) for your nodes. @@ -231,7 +231,7 @@ Karpenter's native interruption handling offers two main benefits over the stand 1. You don't have to manage and maintain a separate component to exclusively handle interruption events. 2. Karpenter's native interruption handling coordinates with other deprovisioning so that consolidation, expiration, etc. can be aware of interruption events and vice-versa. -### Why am I receiving QueueNotFound errors when I set `--interruption-queue-name`? +### Why am I receiving QueueNotFound errors when I set `--interruption-queue`? Karpenter requires a queue to exist that receives event messages from EC2 and health services in order to handle interruption messages properly for nodes. Details on the types of events that Karpenter handles can be found in the [Interruption Handling Docs]({{< ref "./concepts/disruption/#interruption" >}}). diff --git a/website/content/en/v0.36/upgrading/upgrade-guide.md b/website/content/en/v0.36/upgrading/upgrade-guide.md index c7e84b894521..1687bfa3f5a5 100644 --- a/website/content/en/v0.36/upgrading/upgrade-guide.md +++ b/website/content/en/v0.36/upgrading/upgrade-guide.md @@ -44,7 +44,7 @@ WHEN CREATING A NEW SECTION OF THE UPGRADE GUIDANCE FOR NEWER VERSIONS, ENSURE T {{% /alert %}} {{% alert title="Warning" color="warning" %}} - v0.36.x introduces update to drift that restricts rollback. When rolling back from >=v0.36.0, note that v0.32.9+, v0.33.4+, v0.34.5+, v0.35.4+ are the patch versions that support rollback. If Karpenter is rolled back to an older patch version, Karpenter can potentially drift all the nodes in the cluster. + v0.36.x introduces update to drift that restricts rollback. When rolling back from >=v0.36.0, note that v0.32.9+, v0.33.4+, v0.34.5+, v0.35.4+ are the patch versions that support rollback. If Karpenter is rolled back to an older patch version, Karpenter can potentially drift all the nodes in the cluster. {{% /alert %}} * Karpenter changed the name of the `karpenter_cloudprovider_instance_type_price_estimate` metric to `karpenter_cloudprovider_instance_type_offering_price_estimate` to align with the new `karpenter_cloudprovider_instance_type_offering_available` metric. The `region` label was also dropped from the metric, since this can be inferred from the environment that Karpenter is running in. @@ -72,7 +72,7 @@ The Ubuntu EKS optimized AMI has moved from 20.04 to 22.04 for Kubernetes 1.29+. * `Empty Expiration / Empty Drift / Empty Consolidation`: infinite parallelism * `Non-Empty Expiration / Non-Empty Drift / Single-Node Consolidation`: one node at a time * `Multi-Node Consolidation`: max 100 nodes -* To support Disruption Budgets, `0.34.0`+ includes critical changes to Karpenter's core controllers, which allows Karpenter to consider multiple batches of disrupting nodes simultaneously. This increases Karpenter's performance with the potential downside of higher CPU and memory utilization from the Karpenter pod. While the magnitude of this difference varies on a case-by-case basis, when upgrading to Karpenter `0.34.0`+, please note that you may need to increase the resources allocated to the Karpenter controller pods. +* To support Disruption Budgets, `0.34.0`+ includes critical changes to Karpenter's core controllers, which allows Karpenter to consider multiple batches of disrupting nodes simultaneously. This increases Karpenter's performance with the potential downside of higher CPU and memory utilization from the Karpenter pod. While the magnitude of this difference varies on a case-by-case basis, when upgrading to Karpenter `0.34.0`+, please note that you may need to increase the resources allocated to the Karpenter controller pods. * Karpenter now adds a default `podSecurityContext` that configures the `fsgroup: 65536` of volumes in the pod. If you are using sidecar containers, you should review if this configuration is compatible for them. You can disable this default `podSecurityContext` through helm by performing `--set podSecurityContext=null` when installing/upgrading the chart. * The `dnsPolicy` for the Karpenter controller pod has been changed back to the Kubernetes cluster default of `ClusterFirst`. Setting our `dnsPolicy` to `Default` (confusingly, this is not the Kubernetes cluster default) caused more confusion for any users running IPv6 clusters with dual-stack nodes or anyone running Karpenter with dependencies on cluster services (like clusters running service meshes). This change may be breaking for any users on Fargate or MNG who were allowing Karpenter to manage their in-cluster DNS service (`core-dns` on most clusters). If you still want the old behavior here, you can change the `dnsPolicy` to point to use `Default` by setting the helm value on install/upgrade with `--set dnsPolicy=Default`. More details on this issue can be found in the following Github issues: [#2186](https://github.com/aws/karpenter-provider-aws/issues/2186) and [#4947](https://github.com/aws/karpenter-provider-aws/issues/4947). * Karpenter now disallows `nodepool.spec.template.spec.resources` to be set. The webhook validation never allowed `nodepool.spec.template.spec.resources`. We are now ensuring that CEL validation also disallows `nodepool.spec.template.spec.resources` to be set. If you were previously setting the resources field on your NodePool, ensure that you remove this field before upgrading to the newest version of Karpenter or else updates to the resource may fail on the new version. @@ -103,6 +103,7 @@ This version includes **dual support** for both alpha and beta APIs to ensure th Note that if you are rolling back after upgrading to `0.32.0`, note that `0.31.4` is the only version that supports handling rollback after you have deployed the v1beta1 APIs to your cluster. {{% /alert %}} +* Karpenter now uses `settings.InterruptionQueue` instead of `settings.aws.InterruptionQueueName` in its helm chart. The CLI argument also changed to `--interruption-queue`. * Karpenter now serves the webhook prometheus metrics server on port `8001`. If this port is already in-use on the pod or you are running in `hostNetworking` mode, you may need to change this port value. You can configure this port value through the `WEBHOOK_METRICS_PORT` environment variable or the `webhook.metrics.port` value if installing via Helm. * Karpenter now exposes the ability to disable webhooks through the `webhook.enabled=false` value. This value will disable the webhook server and will prevent any permissions, mutating or validating webhook configurations from being deployed to the cluster. * Karpenter now moves all logging configuration for the Zap logger into the `logConfig` values block. Configuring Karpenter logging with this mechanism _is_ deprecated and will be dropped at v1. Karpenter now only surfaces logLevel through the `logLevel` helm value. If you need more advanced configuration due to log parsing constraints, we recommend configuring your log parser to handle Karpenter's Zap JSON logging. From 3ab2b06c55ba49ff7e7b8a86943b06799cf84637 Mon Sep 17 00:00:00 2001 From: Jonathan Innis Date: Tue, 23 Apr 2024 09:11:50 -0700 Subject: [PATCH 12/17] fix: Fix volume size validation (#6072) --- pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml | 9 ++------- pkg/apis/v1beta1/ec2nodeclass.go | 3 ++- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml index fe04920f8dad..15cd0e2fc34e 100644 --- a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -168,12 +168,6 @@ spec: format: int64 type: integer volumeSize: - allOf: - - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - - pattern: ^((?:[1-9][0-9]{0,3}|[1-4][0-9]{4}|[5][0-8][0-9]{3}|59000)Gi|(?:[1-9][0-9]{0,3}|[1-5][0-9]{4}|[6][0-3][0-9]{3}|64000)G|([1-9]||[1-5][0-7]|58)Ti|([1-9]||[1-5][0-9]|6[0-3]|64)T)$ - anyOf: - - type: integer - - type: string description: |- VolumeSize in `Gi`, `G`, `Ti`, or `T`. You must specify either a snapshot ID or a volume size. The following are the supported volumes sizes for each volume @@ -190,7 +184,8 @@ spec: * standard: 1-1,024 - x-kubernetes-int-or-string: true + pattern: ^((?:[1-9][0-9]{0,3}|[1-4][0-9]{4}|[5][0-8][0-9]{3}|59000)Gi|(?:[1-9][0-9]{0,3}|[1-5][0-9]{4}|[6][0-3][0-9]{3}|64000)G|([1-9]||[1-5][0-7]|58)Ti|([1-9]||[1-5][0-9]|6[0-3]|64)T)$ + type: string volumeType: description: |- VolumeType of the block device. diff --git a/pkg/apis/v1beta1/ec2nodeclass.go b/pkg/apis/v1beta1/ec2nodeclass.go index f9515b5e4d2f..66a2926b2ced 100644 --- a/pkg/apis/v1beta1/ec2nodeclass.go +++ b/pkg/apis/v1beta1/ec2nodeclass.go @@ -292,7 +292,8 @@ type BlockDevice struct { // + TODO: Add the CEL resources.quantity type after k8s 1.29 // + https://github.com/kubernetes/apiserver/commit/b137c256373aec1c5d5810afbabb8932a19ecd2a#diff-838176caa5882465c9d6061febd456397a3e2b40fb423ed36f0cabb1847ecb4dR190 // +kubebuilder:validation:Pattern:="^((?:[1-9][0-9]{0,3}|[1-4][0-9]{4}|[5][0-8][0-9]{3}|59000)Gi|(?:[1-9][0-9]{0,3}|[1-5][0-9]{4}|[6][0-3][0-9]{3}|64000)G|([1-9]||[1-5][0-7]|58)Ti|([1-9]||[1-5][0-9]|6[0-3]|64)T)$" - // +kubebuilder:validation:XIntOrString + // +kubebuilder:validation:Schemaless + // +kubebuilder:validation:Type:=string // +optional VolumeSize *resource.Quantity `json:"volumeSize,omitempty" hash:"string"` // VolumeType of the block device. From 59b5846708cefd45b9f9da8bd8ab08d9cad91ba9 Mon Sep 17 00:00:00 2001 From: WTTAT Date: Wed, 24 Apr 2024 02:22:14 +0800 Subject: [PATCH 13/17] fix: create tag script error (InvalidID) when multiple sunbets for eks nodegroup (#6073) --- .../migrating-from-cas/scripts/step05-tag-subnets.sh | 6 +++--- .../migrating-from-cas/scripts/step05-tag-subnets.sh | 6 +++--- .../migrating-from-cas/scripts/step05-tag-subnets.sh | 10 +++++----- .../migrating-from-cas/scripts/step05-tag-subnets.sh | 6 +++--- .../migrating-from-cas/scripts/step05-tag-subnets.sh | 6 +++--- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/website/content/en/docs/getting-started/migrating-from-cas/scripts/step05-tag-subnets.sh b/website/content/en/docs/getting-started/migrating-from-cas/scripts/step05-tag-subnets.sh index 47df188dc87d..139bbbd1cd02 100644 --- a/website/content/en/docs/getting-started/migrating-from-cas/scripts/step05-tag-subnets.sh +++ b/website/content/en/docs/getting-started/migrating-from-cas/scripts/step05-tag-subnets.sh @@ -1,6 +1,6 @@ for NODEGROUP in $(aws eks list-nodegroups --cluster-name "${CLUSTER_NAME}" --query 'nodegroups' --output text); do aws ec2 create-tags \ --tags "Key=karpenter.sh/discovery,Value=${CLUSTER_NAME}" \ - --resources "$(aws eks describe-nodegroup --cluster-name "${CLUSTER_NAME}" \ - --nodegroup-name "${NODEGROUP}" --query 'nodegroup.subnets' --output text )" -done + --resources $(aws eks describe-nodegroup --cluster-name "${CLUSTER_NAME}" \ + --nodegroup-name "${NODEGROUP}" --query 'nodegroup.subnets' --output text ) +done \ No newline at end of file diff --git a/website/content/en/preview/getting-started/migrating-from-cas/scripts/step05-tag-subnets.sh b/website/content/en/preview/getting-started/migrating-from-cas/scripts/step05-tag-subnets.sh index 47df188dc87d..139bbbd1cd02 100644 --- a/website/content/en/preview/getting-started/migrating-from-cas/scripts/step05-tag-subnets.sh +++ b/website/content/en/preview/getting-started/migrating-from-cas/scripts/step05-tag-subnets.sh @@ -1,6 +1,6 @@ for NODEGROUP in $(aws eks list-nodegroups --cluster-name "${CLUSTER_NAME}" --query 'nodegroups' --output text); do aws ec2 create-tags \ --tags "Key=karpenter.sh/discovery,Value=${CLUSTER_NAME}" \ - --resources "$(aws eks describe-nodegroup --cluster-name "${CLUSTER_NAME}" \ - --nodegroup-name "${NODEGROUP}" --query 'nodegroup.subnets' --output text )" -done + --resources $(aws eks describe-nodegroup --cluster-name "${CLUSTER_NAME}" \ + --nodegroup-name "${NODEGROUP}" --query 'nodegroup.subnets' --output text ) +done \ No newline at end of file diff --git a/website/content/en/v0.34/getting-started/migrating-from-cas/scripts/step05-tag-subnets.sh b/website/content/en/v0.34/getting-started/migrating-from-cas/scripts/step05-tag-subnets.sh index de972ea2bddd..139bbbd1cd02 100644 --- a/website/content/en/v0.34/getting-started/migrating-from-cas/scripts/step05-tag-subnets.sh +++ b/website/content/en/v0.34/getting-started/migrating-from-cas/scripts/step05-tag-subnets.sh @@ -1,6 +1,6 @@ -for NODEGROUP in $(aws eks list-nodegroups --cluster-name ${CLUSTER_NAME} \ - --query 'nodegroups' --output text); do aws ec2 create-tags \ +for NODEGROUP in $(aws eks list-nodegroups --cluster-name "${CLUSTER_NAME}" --query 'nodegroups' --output text); do + aws ec2 create-tags \ --tags "Key=karpenter.sh/discovery,Value=${CLUSTER_NAME}" \ - --resources $(aws eks describe-nodegroup --cluster-name ${CLUSTER_NAME} \ - --nodegroup-name $NODEGROUP --query 'nodegroup.subnets' --output text ) -done + --resources $(aws eks describe-nodegroup --cluster-name "${CLUSTER_NAME}" \ + --nodegroup-name "${NODEGROUP}" --query 'nodegroup.subnets' --output text ) +done \ No newline at end of file diff --git a/website/content/en/v0.35/getting-started/migrating-from-cas/scripts/step05-tag-subnets.sh b/website/content/en/v0.35/getting-started/migrating-from-cas/scripts/step05-tag-subnets.sh index 47df188dc87d..139bbbd1cd02 100644 --- a/website/content/en/v0.35/getting-started/migrating-from-cas/scripts/step05-tag-subnets.sh +++ b/website/content/en/v0.35/getting-started/migrating-from-cas/scripts/step05-tag-subnets.sh @@ -1,6 +1,6 @@ for NODEGROUP in $(aws eks list-nodegroups --cluster-name "${CLUSTER_NAME}" --query 'nodegroups' --output text); do aws ec2 create-tags \ --tags "Key=karpenter.sh/discovery,Value=${CLUSTER_NAME}" \ - --resources "$(aws eks describe-nodegroup --cluster-name "${CLUSTER_NAME}" \ - --nodegroup-name "${NODEGROUP}" --query 'nodegroup.subnets' --output text )" -done + --resources $(aws eks describe-nodegroup --cluster-name "${CLUSTER_NAME}" \ + --nodegroup-name "${NODEGROUP}" --query 'nodegroup.subnets' --output text ) +done \ No newline at end of file diff --git a/website/content/en/v0.36/getting-started/migrating-from-cas/scripts/step05-tag-subnets.sh b/website/content/en/v0.36/getting-started/migrating-from-cas/scripts/step05-tag-subnets.sh index 47df188dc87d..139bbbd1cd02 100644 --- a/website/content/en/v0.36/getting-started/migrating-from-cas/scripts/step05-tag-subnets.sh +++ b/website/content/en/v0.36/getting-started/migrating-from-cas/scripts/step05-tag-subnets.sh @@ -1,6 +1,6 @@ for NODEGROUP in $(aws eks list-nodegroups --cluster-name "${CLUSTER_NAME}" --query 'nodegroups' --output text); do aws ec2 create-tags \ --tags "Key=karpenter.sh/discovery,Value=${CLUSTER_NAME}" \ - --resources "$(aws eks describe-nodegroup --cluster-name "${CLUSTER_NAME}" \ - --nodegroup-name "${NODEGROUP}" --query 'nodegroup.subnets' --output text )" -done + --resources $(aws eks describe-nodegroup --cluster-name "${CLUSTER_NAME}" \ + --nodegroup-name "${NODEGROUP}" --query 'nodegroup.subnets' --output text ) +done \ No newline at end of file From b7745bfb200221bf09310b0f732435c72957ba01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20J=C3=B3zefczyk?= Date: Wed, 24 Apr 2024 08:11:41 +0200 Subject: [PATCH 14/17] docs: Remove settings.aws.enablePodENI from docs (#6088) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Maciej Józefczyk Co-authored-by: Jonathan Innis --- website/content/en/docs/concepts/scheduling.md | 2 -- website/content/en/docs/troubleshooting.md | 2 +- website/content/en/preview/concepts/scheduling.md | 2 -- website/content/en/preview/troubleshooting.md | 2 +- website/content/en/v0.32/concepts/scheduling.md | 2 -- website/content/en/v0.32/troubleshooting.md | 2 +- website/content/en/v0.34/concepts/scheduling.md | 2 -- website/content/en/v0.34/troubleshooting.md | 2 +- website/content/en/v0.35/concepts/scheduling.md | 2 -- website/content/en/v0.35/troubleshooting.md | 2 +- website/content/en/v0.36/concepts/scheduling.md | 2 -- website/content/en/v0.36/troubleshooting.md | 2 +- 12 files changed, 6 insertions(+), 18 deletions(-) diff --git a/website/content/en/docs/concepts/scheduling.md b/website/content/en/docs/concepts/scheduling.md index 49e43cf45f9d..696b2bb4afc6 100755 --- a/website/content/en/docs/concepts/scheduling.md +++ b/website/content/en/docs/concepts/scheduling.md @@ -104,8 +104,6 @@ Refer to general [Kubernetes GPU](https://kubernetes.io/docs/tasks/manage-gpus/s You must enable Pod ENI support in the AWS VPC CNI Plugin before enabling Pod ENI support in Karpenter. Please refer to the [Security Groups for Pods documentation](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html) for instructions. {{% /alert %}} -Now that Pod ENI support is enabled in the AWS VPC CNI Plugin, you can enable Pod ENI support in Karpenter by setting the `settings.aws.enablePodENI` Helm chart value to `true`. - Here is an example of a pod-eni resource defined in a deployment manifest: ``` spec: diff --git a/website/content/en/docs/troubleshooting.md b/website/content/en/docs/troubleshooting.md index 6dc784007b75..0e6c0d3114b9 100644 --- a/website/content/en/docs/troubleshooting.md +++ b/website/content/en/docs/troubleshooting.md @@ -663,7 +663,7 @@ This typically occurs when the node has not been considered fully initialized fo ### Log message of `inflight check failed for node, Expected resource "vpc.amazonaws.com/pod-eni" didn't register on the node` is reported -This error indicates that the `vpc.amazonaws.com/pod-eni` resource was never reported on the node. If you've enabled Pod ENI for Karpenter nodes via the `aws.enablePodENI` setting, you will need to make the corresponding change to the VPC CNI to enable [security groups for pods](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html) which will cause the resource to be registered. +This error indicates that the `vpc.amazonaws.com/pod-eni` resource was never reported on the node. You will need to make the corresponding change to the VPC CNI to enable [security groups for pods](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html) which will cause the resource to be registered. ### AWS Node Termination Handler (NTH) interactions Karpenter [doesn't currently support draining and terminating on spot rebalance recommendations]({{< ref "concepts/disruption#interruption" >}}). Users who want support for both drain and terminate on spot interruption as well as drain and termination on spot rebalance recommendations may install Node Termination Handler (NTH) on their clusters to support this behavior. diff --git a/website/content/en/preview/concepts/scheduling.md b/website/content/en/preview/concepts/scheduling.md index 49e43cf45f9d..696b2bb4afc6 100755 --- a/website/content/en/preview/concepts/scheduling.md +++ b/website/content/en/preview/concepts/scheduling.md @@ -104,8 +104,6 @@ Refer to general [Kubernetes GPU](https://kubernetes.io/docs/tasks/manage-gpus/s You must enable Pod ENI support in the AWS VPC CNI Plugin before enabling Pod ENI support in Karpenter. Please refer to the [Security Groups for Pods documentation](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html) for instructions. {{% /alert %}} -Now that Pod ENI support is enabled in the AWS VPC CNI Plugin, you can enable Pod ENI support in Karpenter by setting the `settings.aws.enablePodENI` Helm chart value to `true`. - Here is an example of a pod-eni resource defined in a deployment manifest: ``` spec: diff --git a/website/content/en/preview/troubleshooting.md b/website/content/en/preview/troubleshooting.md index 6dc784007b75..0e6c0d3114b9 100644 --- a/website/content/en/preview/troubleshooting.md +++ b/website/content/en/preview/troubleshooting.md @@ -663,7 +663,7 @@ This typically occurs when the node has not been considered fully initialized fo ### Log message of `inflight check failed for node, Expected resource "vpc.amazonaws.com/pod-eni" didn't register on the node` is reported -This error indicates that the `vpc.amazonaws.com/pod-eni` resource was never reported on the node. If you've enabled Pod ENI for Karpenter nodes via the `aws.enablePodENI` setting, you will need to make the corresponding change to the VPC CNI to enable [security groups for pods](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html) which will cause the resource to be registered. +This error indicates that the `vpc.amazonaws.com/pod-eni` resource was never reported on the node. You will need to make the corresponding change to the VPC CNI to enable [security groups for pods](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html) which will cause the resource to be registered. ### AWS Node Termination Handler (NTH) interactions Karpenter [doesn't currently support draining and terminating on spot rebalance recommendations]({{< ref "concepts/disruption#interruption" >}}). Users who want support for both drain and terminate on spot interruption as well as drain and termination on spot rebalance recommendations may install Node Termination Handler (NTH) on their clusters to support this behavior. diff --git a/website/content/en/v0.32/concepts/scheduling.md b/website/content/en/v0.32/concepts/scheduling.md index 9617591d7868..a52f1c0aa3e1 100755 --- a/website/content/en/v0.32/concepts/scheduling.md +++ b/website/content/en/v0.32/concepts/scheduling.md @@ -104,8 +104,6 @@ Refer to general [Kubernetes GPU](https://kubernetes.io/docs/tasks/manage-gpus/s You must enable Pod ENI support in the AWS VPC CNI Plugin before enabling Pod ENI support in Karpenter. Please refer to the [Security Groups for Pods documentation](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html) for instructions. {{% /alert %}} -Now that Pod ENI support is enabled in the AWS VPC CNI Plugin, you can enable Pod ENI support in Karpenter by setting the `settings.aws.enablePodENI` Helm chart value to `true`. - Here is an example of a pod-eni resource defined in a deployment manifest: ``` spec: diff --git a/website/content/en/v0.32/troubleshooting.md b/website/content/en/v0.32/troubleshooting.md index cb51859789d9..a5db4f33597e 100644 --- a/website/content/en/v0.32/troubleshooting.md +++ b/website/content/en/v0.32/troubleshooting.md @@ -651,7 +651,7 @@ This typically occurs when the node has not been considered fully initialized fo ### Log message of `inflight check failed for node, Expected resource "vpc.amazonaws.com/pod-eni" didn't register on the node` is reported -This error indicates that the `vpc.amazonaws.com/pod-eni` resource was never reported on the node. If you've enabled Pod ENI for Karpenter nodes via the `aws.enablePodENI` setting, you will need to make the corresponding change to the VPC CNI to enable [security groups for pods](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html) which will cause the resource to be registered. +This error indicates that the `vpc.amazonaws.com/pod-eni` resource was never reported on the node. You will need to make the corresponding change to the VPC CNI to enable [security groups for pods](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html) which will cause the resource to be registered. ### AWS Node Termination Handler (NTH) interactions Karpenter [doesn't currently support draining and terminating on spot rebalance recommendations]({{< ref "concepts/disruption#interruption" >}}). Users who want support for both drain and terminate on spot interruption as well as drain and termination on spot rebalance recommendations may install Node Termination Handler (NTH) on their clusters to support this behavior. diff --git a/website/content/en/v0.34/concepts/scheduling.md b/website/content/en/v0.34/concepts/scheduling.md index 4eebe154c855..a121b1e7c9c8 100755 --- a/website/content/en/v0.34/concepts/scheduling.md +++ b/website/content/en/v0.34/concepts/scheduling.md @@ -104,8 +104,6 @@ Refer to general [Kubernetes GPU](https://kubernetes.io/docs/tasks/manage-gpus/s You must enable Pod ENI support in the AWS VPC CNI Plugin before enabling Pod ENI support in Karpenter. Please refer to the [Security Groups for Pods documentation](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html) for instructions. {{% /alert %}} -Now that Pod ENI support is enabled in the AWS VPC CNI Plugin, you can enable Pod ENI support in Karpenter by setting the `settings.aws.enablePodENI` Helm chart value to `true`. - Here is an example of a pod-eni resource defined in a deployment manifest: ``` spec: diff --git a/website/content/en/v0.34/troubleshooting.md b/website/content/en/v0.34/troubleshooting.md index ee912f7fe180..5cef2b9a0eff 100644 --- a/website/content/en/v0.34/troubleshooting.md +++ b/website/content/en/v0.34/troubleshooting.md @@ -663,7 +663,7 @@ This typically occurs when the node has not been considered fully initialized fo ### Log message of `inflight check failed for node, Expected resource "vpc.amazonaws.com/pod-eni" didn't register on the node` is reported -This error indicates that the `vpc.amazonaws.com/pod-eni` resource was never reported on the node. If you've enabled Pod ENI for Karpenter nodes via the `aws.enablePodENI` setting, you will need to make the corresponding change to the VPC CNI to enable [security groups for pods](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html) which will cause the resource to be registered. +This error indicates that the `vpc.amazonaws.com/pod-eni` resource was never reported on the node. You will need to make the corresponding change to the VPC CNI to enable [security groups for pods](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html) which will cause the resource to be registered. ### AWS Node Termination Handler (NTH) interactions Karpenter [doesn't currently support draining and terminating on spot rebalance recommendations]({{< ref "concepts/disruption#interruption" >}}). Users who want support for both drain and terminate on spot interruption as well as drain and termination on spot rebalance recommendations may install Node Termination Handler (NTH) on their clusters to support this behavior. diff --git a/website/content/en/v0.35/concepts/scheduling.md b/website/content/en/v0.35/concepts/scheduling.md index 4eebe154c855..a121b1e7c9c8 100755 --- a/website/content/en/v0.35/concepts/scheduling.md +++ b/website/content/en/v0.35/concepts/scheduling.md @@ -104,8 +104,6 @@ Refer to general [Kubernetes GPU](https://kubernetes.io/docs/tasks/manage-gpus/s You must enable Pod ENI support in the AWS VPC CNI Plugin before enabling Pod ENI support in Karpenter. Please refer to the [Security Groups for Pods documentation](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html) for instructions. {{% /alert %}} -Now that Pod ENI support is enabled in the AWS VPC CNI Plugin, you can enable Pod ENI support in Karpenter by setting the `settings.aws.enablePodENI` Helm chart value to `true`. - Here is an example of a pod-eni resource defined in a deployment manifest: ``` spec: diff --git a/website/content/en/v0.35/troubleshooting.md b/website/content/en/v0.35/troubleshooting.md index 6dc784007b75..0e6c0d3114b9 100644 --- a/website/content/en/v0.35/troubleshooting.md +++ b/website/content/en/v0.35/troubleshooting.md @@ -663,7 +663,7 @@ This typically occurs when the node has not been considered fully initialized fo ### Log message of `inflight check failed for node, Expected resource "vpc.amazonaws.com/pod-eni" didn't register on the node` is reported -This error indicates that the `vpc.amazonaws.com/pod-eni` resource was never reported on the node. If you've enabled Pod ENI for Karpenter nodes via the `aws.enablePodENI` setting, you will need to make the corresponding change to the VPC CNI to enable [security groups for pods](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html) which will cause the resource to be registered. +This error indicates that the `vpc.amazonaws.com/pod-eni` resource was never reported on the node. You will need to make the corresponding change to the VPC CNI to enable [security groups for pods](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html) which will cause the resource to be registered. ### AWS Node Termination Handler (NTH) interactions Karpenter [doesn't currently support draining and terminating on spot rebalance recommendations]({{< ref "concepts/disruption#interruption" >}}). Users who want support for both drain and terminate on spot interruption as well as drain and termination on spot rebalance recommendations may install Node Termination Handler (NTH) on their clusters to support this behavior. diff --git a/website/content/en/v0.36/concepts/scheduling.md b/website/content/en/v0.36/concepts/scheduling.md index 49e43cf45f9d..696b2bb4afc6 100755 --- a/website/content/en/v0.36/concepts/scheduling.md +++ b/website/content/en/v0.36/concepts/scheduling.md @@ -104,8 +104,6 @@ Refer to general [Kubernetes GPU](https://kubernetes.io/docs/tasks/manage-gpus/s You must enable Pod ENI support in the AWS VPC CNI Plugin before enabling Pod ENI support in Karpenter. Please refer to the [Security Groups for Pods documentation](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html) for instructions. {{% /alert %}} -Now that Pod ENI support is enabled in the AWS VPC CNI Plugin, you can enable Pod ENI support in Karpenter by setting the `settings.aws.enablePodENI` Helm chart value to `true`. - Here is an example of a pod-eni resource defined in a deployment manifest: ``` spec: diff --git a/website/content/en/v0.36/troubleshooting.md b/website/content/en/v0.36/troubleshooting.md index 6dc784007b75..0e6c0d3114b9 100644 --- a/website/content/en/v0.36/troubleshooting.md +++ b/website/content/en/v0.36/troubleshooting.md @@ -663,7 +663,7 @@ This typically occurs when the node has not been considered fully initialized fo ### Log message of `inflight check failed for node, Expected resource "vpc.amazonaws.com/pod-eni" didn't register on the node` is reported -This error indicates that the `vpc.amazonaws.com/pod-eni` resource was never reported on the node. If you've enabled Pod ENI for Karpenter nodes via the `aws.enablePodENI` setting, you will need to make the corresponding change to the VPC CNI to enable [security groups for pods](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html) which will cause the resource to be registered. +This error indicates that the `vpc.amazonaws.com/pod-eni` resource was never reported on the node. You will need to make the corresponding change to the VPC CNI to enable [security groups for pods](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html) which will cause the resource to be registered. ### AWS Node Termination Handler (NTH) interactions Karpenter [doesn't currently support draining and terminating on spot rebalance recommendations]({{< ref "concepts/disruption#interruption" >}}). Users who want support for both drain and terminate on spot interruption as well as drain and termination on spot rebalance recommendations may install Node Termination Handler (NTH) on their clusters to support this behavior. From b103ff00d0f83608037733079afdce70db8bb9f7 Mon Sep 17 00:00:00 2001 From: Amanuel Engeda <74629455+engedaam@users.noreply.github.com> Date: Wed, 24 Apr 2024 14:23:26 -0700 Subject: [PATCH 15/17] chore: Forward Container logs for E2E tests (#5982) --- .../e2e/run-tests-private-cluster/action.yaml | 4 ++++ .github/actions/e2e/setup-cluster/action.yaml | 15 ++++++++++----- .github/workflows/e2e-upgrade.yaml | 11 ++++++++++- .github/workflows/e2e.yaml | 11 ++++++++++- 4 files changed, 34 insertions(+), 7 deletions(-) diff --git a/.github/actions/e2e/run-tests-private-cluster/action.yaml b/.github/actions/e2e/run-tests-private-cluster/action.yaml index 64504b9b6d58..8203553ebea4 100644 --- a/.github/actions/e2e/run-tests-private-cluster/action.yaml +++ b/.github/actions/e2e/run-tests-private-cluster/action.yaml @@ -125,6 +125,10 @@ runs: - kubectl delete ec2nodeclass --all - kubectl delete deployment --all - PRIVATE_CLUSTER=$CLUSTER_NAME TEST_SUITE=$SUITE ENABLE_METRICS=$ENABLE_METRICS METRICS_REGION=$METRICS_REGION GIT_REF="$(git rev-parse HEAD)" CLUSTER_NAME=$CLUSTER_NAME CLUSTER_ENDPOINT="$(aws eks describe-cluster --name $CLUSTER_NAME --query "cluster.endpoint" --output text)" INTERRUPTION_QUEUE=$CLUSTER_NAME make e2etests + - aws logs put-retention-policy --log-group-name /aws/containerinsights/$CLUSTER_NAME/application --retention-in-days 30 + - aws logs put-retention-policy --log-group-name /aws/containerinsights/$CLUSTER_NAME/dataplane --retention-in-days 30 + - aws logs put-retention-policy --log-group-name /aws/containerinsights/$CLUSTER_NAME/host --retention-in-days 30 + - aws logs put-retention-policy --log-group-name /aws/containerinsights/$CLUSTER_NAME/performance --retention-in-days 30 post_build: commands: # Describe karpenter pods diff --git a/.github/actions/e2e/setup-cluster/action.yaml b/.github/actions/e2e/setup-cluster/action.yaml index ffda793d2fc0..aec619949048 100644 --- a/.github/actions/e2e/setup-cluster/action.yaml +++ b/.github/actions/e2e/setup-cluster/action.yaml @@ -30,7 +30,7 @@ inputs: default: "1.29" eksctl_version: description: "Version of eksctl to install" - default: v0.169.0 + default: v0.175.0 ip_family: description: "IP Family of the cluster. Valid values are IPv4 or IPv6" default: "IPv4" @@ -152,11 +152,9 @@ runs: minSize: 2 maxSize: 2 iam: + withAddonPolicies: + cloudWatch: true instanceRolePermissionsBoundary: "arn:aws:iam::$ACCOUNT_ID:policy/GithubActionsPermissionsBoundary" - taints: - - key: CriticalAddonsOnly - value: "true" - effect: NoSchedule cloudWatch: clusterLogging: enableTypes: ["*"] @@ -175,6 +173,8 @@ runs: $KARPENTER_IAM withOIDC: true addons: + - name: amazon-cloudwatch-observability + permissionsBoundary: "arn:aws:iam::$ACCOUNT_ID:policy/GithubActionsPermissionsBoundary" - name: vpc-cni permissionsBoundary: "arn:aws:iam::$ACCOUNT_ID:policy/GithubActionsPermissionsBoundary" - name: coredns @@ -211,6 +211,11 @@ runs: else eksctl ${cmd} cluster -f clusterconfig.yaml fi + + # Adding taints after all necessary pods have scheduled to the manged node group nodes + # amazon-cloudwatch-observability pods do no not tolerate CriticalAddonsOnly=true:NoSchedule and + # amazon-cloudwatch-observability addons does not allow to add tolerations to the addon pods as part of the advanced configuration + kubectl taint nodes CriticalAddonsOnly=true:NoSchedule --all - name: tag oidc provider of the cluster if: always() shell: bash diff --git a/.github/workflows/e2e-upgrade.yaml b/.github/workflows/e2e-upgrade.yaml index 0ad5a6e3f3e0..2c6b8a8c0f28 100644 --- a/.github/workflows/e2e-upgrade.yaml +++ b/.github/workflows/e2e-upgrade.yaml @@ -90,7 +90,7 @@ jobs: region: ${{ inputs.region }} cluster_name: ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }} k8s_version: ${{ inputs.k8s_version }} - eksctl_version: v0.169.0 + eksctl_version: v0.175.0 ip_family: IPv4 # Set the value to IPv6 if IPv6 suite, else IPv4 git_ref: ${{ inputs.from_git_ref }} ecr_account_id: ${{ vars.SNAPSHOT_ACCOUNT_ID }} @@ -135,6 +135,15 @@ jobs: url: ${{ secrets.SLACK_WEBHOOK_URL }} suite: Upgrade git_ref: ${{ inputs.to_git_ref }} + - name: add log retention policy + if: ${{ inputs.workflow_trigger != 'private_cluster' }} + env: + CLUSTER_NAME: ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }} + run: | + aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/application --retention-in-days 30 + aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/dataplane --retention-in-days 30 + aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/host --retention-in-days 30 + aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/performance --retention-in-days 30 - name: dump logs on failure uses: ./.github/actions/e2e/dump-logs if: failure() || cancelled() diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index dc120754be4a..636c19c77b9c 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -132,7 +132,7 @@ jobs: region: ${{ inputs.region }} cluster_name: ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }} k8s_version: ${{ inputs.k8s_version }} - eksctl_version: v0.169.0 + eksctl_version: v0.175.0 ip_family: ${{ contains(inputs.suite, 'IPv6') && 'IPv6' || 'IPv4' }} # Set the value to IPv6 if IPv6 suite, else IPv4 private_cluster: ${{ inputs.workflow_trigger == 'private_cluster' }} git_ref: ${{ inputs.git_ref }} @@ -187,6 +187,15 @@ jobs: suite: ${{ inputs.suite }} git_ref: ${{ inputs.git_ref }} workflow_trigger: ${{ inputs.workflow_trigger }} + - name: add log retention policy + if: ${{ inputs.workflow_trigger != 'private_cluster' }} + env: + CLUSTER_NAME: ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }} + run: | + aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/application --retention-in-days 30 + aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/dataplane --retention-in-days 30 + aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/host --retention-in-days 30 + aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/performance --retention-in-days 30 - name: dump logs on failure uses: ./.github/actions/e2e/dump-logs if: (failure() || cancelled()) && inputs.workflow_trigger != 'private_cluster' From 81175f245c66ea80afc1c8bd0edfef4cf57e8d6f Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Thu, 25 Apr 2024 14:45:43 -0700 Subject: [PATCH 16/17] docs: release v0.36.1 (#6101) --- website/content/en/docs/faq.md | 4 ++-- .../getting-started-with-karpenter/_index.md | 8 ++++---- .../getting-started/migrating-from-cas/_index.md | 4 ++-- website/content/en/docs/reference/cloudformation.md | 2 +- website/content/en/docs/reference/threat-model.md | 10 +++++----- website/content/en/docs/upgrading/upgrade-guide.md | 12 ++++++------ .../content/en/preview/upgrading/upgrade-guide.md | 6 +++--- website/content/en/v0.36/faq.md | 4 ++-- .../getting-started-with-karpenter/_index.md | 8 ++++---- .../getting-started/migrating-from-cas/_index.md | 4 ++-- website/content/en/v0.36/reference/cloudformation.md | 2 +- website/content/en/v0.36/reference/threat-model.md | 10 +++++----- website/content/en/v0.36/upgrading/upgrade-guide.md | 12 ++++++------ website/hugo.yaml | 2 +- 14 files changed, 44 insertions(+), 44 deletions(-) diff --git a/website/content/en/docs/faq.md b/website/content/en/docs/faq.md index cfbf61a302d7..3c1b1df14dc0 100644 --- a/website/content/en/docs/faq.md +++ b/website/content/en/docs/faq.md @@ -14,7 +14,7 @@ See [Configuring NodePools]({{< ref "./concepts/#configuring-nodepools" >}}) for AWS is the first cloud provider supported by Karpenter, although it is designed to be used with other cloud providers as well. ### Can I write my own cloud provider for Karpenter? -Yes, but there is no documentation yet for it. Start with Karpenter's GitHub [cloudprovider](https://github.com/aws/karpenter-core/tree/v0.36.0/pkg/cloudprovider) documentation to see how the AWS provider is built, but there are other sections of the code that will require changes too. +Yes, but there is no documentation yet for it. Start with Karpenter's GitHub [cloudprovider](https://github.com/aws/karpenter-core/tree/v0.36.1/pkg/cloudprovider) documentation to see how the AWS provider is built, but there are other sections of the code that will require changes too. ### What operating system nodes does Karpenter deploy? Karpenter uses the OS defined by the [AMI Family in your EC2NodeClass]({{< ref "./concepts/nodeclasses#specamifamily" >}}). @@ -26,7 +26,7 @@ Karpenter has multiple mechanisms for configuring the [operating system]({{< ref Karpenter is flexible to multi-architecture configurations using [well known labels]({{< ref "./concepts/scheduling/#supported-labels">}}). ### What RBAC access is required? -All the required RBAC rules can be found in the Helm chart template. See [clusterrole-core.yaml](https://github.com/aws/karpenter/blob/v0.36.0/charts/karpenter/templates/clusterrole-core.yaml), [clusterrole.yaml](https://github.com/aws/karpenter/blob/v0.36.0/charts/karpenter/templates/clusterrole.yaml), [rolebinding.yaml](https://github.com/aws/karpenter/blob/v0.36.0/charts/karpenter/templates/rolebinding.yaml), and [role.yaml](https://github.com/aws/karpenter/blob/v0.36.0/charts/karpenter/templates/role.yaml) files for details. +All the required RBAC rules can be found in the Helm chart template. See [clusterrole-core.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/clusterrole-core.yaml), [clusterrole.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/clusterrole.yaml), [rolebinding.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/rolebinding.yaml), and [role.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/role.yaml) files for details. ### Can I run Karpenter outside of a Kubernetes cluster? Yes, as long as the controller has network and IAM/RBAC access to the Kubernetes API and your provider API. diff --git a/website/content/en/docs/getting-started/getting-started-with-karpenter/_index.md b/website/content/en/docs/getting-started/getting-started-with-karpenter/_index.md index 89242a872341..9b5891b1658b 100644 --- a/website/content/en/docs/getting-started/getting-started-with-karpenter/_index.md +++ b/website/content/en/docs/getting-started/getting-started-with-karpenter/_index.md @@ -45,7 +45,7 @@ After setting up the tools, set the Karpenter and Kubernetes version: ```bash export KARPENTER_NAMESPACE="kube-system" -export KARPENTER_VERSION="0.36.0" +export KARPENTER_VERSION="0.36.1" export K8S_VERSION="1.29" ``` @@ -109,13 +109,13 @@ See [Enabling Windows support](https://docs.aws.amazon.com/eks/latest/userguide/ As the OCI Helm chart is signed by [Cosign](https://github.com/sigstore/cosign) as part of the release process you can verify the chart before installing it by running the following command. ```bash -cosign verify public.ecr.aws/karpenter/karpenter:0.36.0 \ +cosign verify public.ecr.aws/karpenter/karpenter:0.36.1 \ --certificate-oidc-issuer=https://token.actions.githubusercontent.com \ --certificate-identity-regexp='https://github\.com/aws/karpenter-provider-aws/\.github/workflows/release\.yaml@.+' \ --certificate-github-workflow-repository=aws/karpenter-provider-aws \ --certificate-github-workflow-name=Release \ - --certificate-github-workflow-ref=refs/tags/v0.36.0 \ - --annotations version=0.36.0 + --certificate-github-workflow-ref=refs/tags/v0.36.1 \ + --annotations version=0.36.1 ``` {{% alert title="DNS Policy Notice" color="warning" %}} diff --git a/website/content/en/docs/getting-started/migrating-from-cas/_index.md b/website/content/en/docs/getting-started/migrating-from-cas/_index.md index f71116389411..8a053ecb51aa 100644 --- a/website/content/en/docs/getting-started/migrating-from-cas/_index.md +++ b/website/content/en/docs/getting-started/migrating-from-cas/_index.md @@ -92,7 +92,7 @@ One for your Karpenter node role and one for your existing node group. First set the Karpenter release you want to deploy. ```bash -export KARPENTER_VERSION="0.36.0" +export KARPENTER_VERSION="0.36.1" ``` We can now generate a full Karpenter deployment yaml from the Helm chart. @@ -133,7 +133,7 @@ Now that our deployment is ready we can create the karpenter namespace, create t ## Create default NodePool -We need to create a default NodePool so Karpenter knows what types of nodes we want for unscheduled workloads. You can refer to some of the [example NodePool](https://github.com/aws/karpenter/tree/v0.36.0/examples/v1beta1) for specific needs. +We need to create a default NodePool so Karpenter knows what types of nodes we want for unscheduled workloads. You can refer to some of the [example NodePool](https://github.com/aws/karpenter/tree/v0.36.1/examples/v1beta1) for specific needs. {{% script file="./content/en/{VERSION}/getting-started/migrating-from-cas/scripts/step10-create-nodepool.sh" language="bash" %}} diff --git a/website/content/en/docs/reference/cloudformation.md b/website/content/en/docs/reference/cloudformation.md index 1e3993b0b1bf..cdd34f44f47f 100644 --- a/website/content/en/docs/reference/cloudformation.md +++ b/website/content/en/docs/reference/cloudformation.md @@ -17,7 +17,7 @@ These descriptions should allow you to understand: To download a particular version of `cloudformation.yaml`, set the version and use `curl` to pull the file to your local system: ```bash -export KARPENTER_VERSION="0.36.0" +export KARPENTER_VERSION="0.36.1" curl https://raw.githubusercontent.com/aws/karpenter-provider-aws/v"${KARPENTER_VERSION}"/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml > cloudformation.yaml ``` diff --git a/website/content/en/docs/reference/threat-model.md b/website/content/en/docs/reference/threat-model.md index 71f8beaf3532..9f6cf6fe9c23 100644 --- a/website/content/en/docs/reference/threat-model.md +++ b/website/content/en/docs/reference/threat-model.md @@ -31,11 +31,11 @@ A Cluster Developer has the ability to create pods via `Deployments`, `ReplicaSe Karpenter has permissions to create and manage cloud instances. Karpenter has Kubernetes API permissions to create, update, and remove nodes, as well as evict pods. For a full list of the permissions, see the RBAC rules in the helm chart template. Karpenter also has AWS IAM permissions to create instances with IAM roles. -* [aggregate-clusterrole.yaml](https://github.com/aws/karpenter/blob/v0.36.0/charts/karpenter/templates/aggregate-clusterrole.yaml) -* [clusterrole-core.yaml](https://github.com/aws/karpenter/blob/v0.36.0/charts/karpenter/templates/clusterrole-core.yaml) -* [clusterrole.yaml](https://github.com/aws/karpenter/blob/v0.36.0/charts/karpenter/templates/clusterrole.yaml) -* [rolebinding.yaml](https://github.com/aws/karpenter/blob/v0.36.0/charts/karpenter/templates/rolebinding.yaml) -* [role.yaml](https://github.com/aws/karpenter/blob/v0.36.0/charts/karpenter/templates/role.yaml) +* [aggregate-clusterrole.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/aggregate-clusterrole.yaml) +* [clusterrole-core.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/clusterrole-core.yaml) +* [clusterrole.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/clusterrole.yaml) +* [rolebinding.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/rolebinding.yaml) +* [role.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/role.yaml) ## Assumptions diff --git a/website/content/en/docs/upgrading/upgrade-guide.md b/website/content/en/docs/upgrading/upgrade-guide.md index 1687bfa3f5a5..2ffd380031c8 100644 --- a/website/content/en/docs/upgrading/upgrade-guide.md +++ b/website/content/en/docs/upgrading/upgrade-guide.md @@ -28,23 +28,23 @@ If you get the error `invalid ownership metadata; label validation error:` while In general, you can reapply the CRDs in the `crds` directory of the Karpenter Helm chart: ```shell -kubectl apply -f https://raw.githubusercontent.com/aws/karpenter/v0.36.0/pkg/apis/crds/karpenter.sh_nodepools.yaml -kubectl apply -f https://raw.githubusercontent.com/aws/karpenter/v0.36.0/pkg/apis/crds/karpenter.sh_nodeclaims.yaml -kubectl apply -f https://raw.githubusercontent.com/aws/karpenter/v0.36.0/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml +kubectl apply -f https://raw.githubusercontent.com/aws/karpenter/v0.36.1/pkg/apis/crds/karpenter.sh_nodepools.yaml +kubectl apply -f https://raw.githubusercontent.com/aws/karpenter/v0.36.1/pkg/apis/crds/karpenter.sh_nodeclaims.yaml +kubectl apply -f https://raw.githubusercontent.com/aws/karpenter/v0.36.1/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml ``` -### Upgrading to `0.36.0`+ +### Upgrading to `0.36.1`+ {{% alert title="Warning" color="warning" %}} -`0.33.0`+ _only_ supports Karpenter v1beta1 APIs and will not work with existing Provisioner, AWSNodeTemplate or Machine alpha APIs. Do not upgrade to `0.36.0`+ without first [upgrading to `0.32.x`]({{}}). This version supports both the alpha and beta APIs, allowing you to migrate all of your existing APIs to beta APIs without experiencing downtime. +`0.33.0`+ _only_ supports Karpenter v1beta1 APIs and will not work with existing Provisioner, AWSNodeTemplate or Machine alpha APIs. Do not upgrade to `0.36.1`+ without first [upgrading to `0.32.x`]({{}}). This version supports both the alpha and beta APIs, allowing you to migrate all of your existing APIs to beta APIs without experiencing downtime. {{% /alert %}} {{% alert title="Warning" color="warning" %}} - v0.36.x introduces update to drift that restricts rollback. When rolling back from >=v0.36.0, note that v0.32.9+, v0.33.4+, v0.34.5+, v0.35.4+ are the patch versions that support rollback. If Karpenter is rolled back to an older patch version, Karpenter can potentially drift all the nodes in the cluster. + v0.36.x introduces update to drift that restricts rollback. When rolling back from >=v0.36.1, note that v0.32.9+, v0.33.4+, v0.34.5+, v0.35.4+ are the patch versions that support rollback. If Karpenter is rolled back to an older patch version, Karpenter can potentially drift all the nodes in the cluster. {{% /alert %}} * Karpenter changed the name of the `karpenter_cloudprovider_instance_type_price_estimate` metric to `karpenter_cloudprovider_instance_type_offering_price_estimate` to align with the new `karpenter_cloudprovider_instance_type_offering_available` metric. The `region` label was also dropped from the metric, since this can be inferred from the environment that Karpenter is running in. diff --git a/website/content/en/preview/upgrading/upgrade-guide.md b/website/content/en/preview/upgrading/upgrade-guide.md index 66f1d2b8c1e5..36ee9af27627 100644 --- a/website/content/en/preview/upgrading/upgrade-guide.md +++ b/website/content/en/preview/upgrading/upgrade-guide.md @@ -41,14 +41,14 @@ WHEN CREATING A NEW SECTION OF THE UPGRADE GUIDANCE FOR NEWER VERSIONS, ENSURE T * Karpenter updated the NodeClass controller naming in the following way: `nodeclass` -> `nodeclass.status`, `nodeclass.hash`, `nodeclass.termination` -### Upgrading to `0.36.0`+ +### Upgrading to `0.36.1`+ {{% alert title="Warning" color="warning" %}} -`0.33.0`+ _only_ supports Karpenter v1beta1 APIs and will not work with existing Provisioner, AWSNodeTemplate or Machine alpha APIs. Do not upgrade to `0.36.0`+ without first [upgrading to `0.32.x`]({{}}). This version supports both the alpha and beta APIs, allowing you to migrate all of your existing APIs to beta APIs without experiencing downtime. +`0.33.0`+ _only_ supports Karpenter v1beta1 APIs and will not work with existing Provisioner, AWSNodeTemplate or Machine alpha APIs. Do not upgrade to `0.36.1`+ without first [upgrading to `0.32.x`]({{}}). This version supports both the alpha and beta APIs, allowing you to migrate all of your existing APIs to beta APIs without experiencing downtime. {{% /alert %}} {{% alert title="Warning" color="warning" %}} - v0.36.x introduces update to drift that restricts rollback. When rolling back from >=v0.36.0, note that v0.32.9+, v0.33.4+, v0.34.5+, v0.35.4+ are the patch versions that support rollback. If Karpenter is rolled back to an older patch version, Karpenter can potentially drift all the nodes in the cluster. + v0.36.x introduces update to drift that restricts rollback. When rolling back from >=v0.36.1, note that v0.32.9+, v0.33.4+, v0.34.5+, v0.35.4+ are the patch versions that support rollback. If Karpenter is rolled back to an older patch version, Karpenter can potentially drift all the nodes in the cluster. {{% /alert %}} * Karpenter changed the name of the `karpenter_cloudprovider_instance_type_price_estimate` metric to `karpenter_cloudprovider_instance_type_offering_price_estimate` to align with the new `karpenter_cloudprovider_instance_type_offering_available` metric. The `region` label was also dropped from the metric, since this can be inferred from the environment that Karpenter is running in. diff --git a/website/content/en/v0.36/faq.md b/website/content/en/v0.36/faq.md index cfbf61a302d7..3c1b1df14dc0 100644 --- a/website/content/en/v0.36/faq.md +++ b/website/content/en/v0.36/faq.md @@ -14,7 +14,7 @@ See [Configuring NodePools]({{< ref "./concepts/#configuring-nodepools" >}}) for AWS is the first cloud provider supported by Karpenter, although it is designed to be used with other cloud providers as well. ### Can I write my own cloud provider for Karpenter? -Yes, but there is no documentation yet for it. Start with Karpenter's GitHub [cloudprovider](https://github.com/aws/karpenter-core/tree/v0.36.0/pkg/cloudprovider) documentation to see how the AWS provider is built, but there are other sections of the code that will require changes too. +Yes, but there is no documentation yet for it. Start with Karpenter's GitHub [cloudprovider](https://github.com/aws/karpenter-core/tree/v0.36.1/pkg/cloudprovider) documentation to see how the AWS provider is built, but there are other sections of the code that will require changes too. ### What operating system nodes does Karpenter deploy? Karpenter uses the OS defined by the [AMI Family in your EC2NodeClass]({{< ref "./concepts/nodeclasses#specamifamily" >}}). @@ -26,7 +26,7 @@ Karpenter has multiple mechanisms for configuring the [operating system]({{< ref Karpenter is flexible to multi-architecture configurations using [well known labels]({{< ref "./concepts/scheduling/#supported-labels">}}). ### What RBAC access is required? -All the required RBAC rules can be found in the Helm chart template. See [clusterrole-core.yaml](https://github.com/aws/karpenter/blob/v0.36.0/charts/karpenter/templates/clusterrole-core.yaml), [clusterrole.yaml](https://github.com/aws/karpenter/blob/v0.36.0/charts/karpenter/templates/clusterrole.yaml), [rolebinding.yaml](https://github.com/aws/karpenter/blob/v0.36.0/charts/karpenter/templates/rolebinding.yaml), and [role.yaml](https://github.com/aws/karpenter/blob/v0.36.0/charts/karpenter/templates/role.yaml) files for details. +All the required RBAC rules can be found in the Helm chart template. See [clusterrole-core.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/clusterrole-core.yaml), [clusterrole.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/clusterrole.yaml), [rolebinding.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/rolebinding.yaml), and [role.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/role.yaml) files for details. ### Can I run Karpenter outside of a Kubernetes cluster? Yes, as long as the controller has network and IAM/RBAC access to the Kubernetes API and your provider API. diff --git a/website/content/en/v0.36/getting-started/getting-started-with-karpenter/_index.md b/website/content/en/v0.36/getting-started/getting-started-with-karpenter/_index.md index 89242a872341..9b5891b1658b 100644 --- a/website/content/en/v0.36/getting-started/getting-started-with-karpenter/_index.md +++ b/website/content/en/v0.36/getting-started/getting-started-with-karpenter/_index.md @@ -45,7 +45,7 @@ After setting up the tools, set the Karpenter and Kubernetes version: ```bash export KARPENTER_NAMESPACE="kube-system" -export KARPENTER_VERSION="0.36.0" +export KARPENTER_VERSION="0.36.1" export K8S_VERSION="1.29" ``` @@ -109,13 +109,13 @@ See [Enabling Windows support](https://docs.aws.amazon.com/eks/latest/userguide/ As the OCI Helm chart is signed by [Cosign](https://github.com/sigstore/cosign) as part of the release process you can verify the chart before installing it by running the following command. ```bash -cosign verify public.ecr.aws/karpenter/karpenter:0.36.0 \ +cosign verify public.ecr.aws/karpenter/karpenter:0.36.1 \ --certificate-oidc-issuer=https://token.actions.githubusercontent.com \ --certificate-identity-regexp='https://github\.com/aws/karpenter-provider-aws/\.github/workflows/release\.yaml@.+' \ --certificate-github-workflow-repository=aws/karpenter-provider-aws \ --certificate-github-workflow-name=Release \ - --certificate-github-workflow-ref=refs/tags/v0.36.0 \ - --annotations version=0.36.0 + --certificate-github-workflow-ref=refs/tags/v0.36.1 \ + --annotations version=0.36.1 ``` {{% alert title="DNS Policy Notice" color="warning" %}} diff --git a/website/content/en/v0.36/getting-started/migrating-from-cas/_index.md b/website/content/en/v0.36/getting-started/migrating-from-cas/_index.md index f71116389411..8a053ecb51aa 100644 --- a/website/content/en/v0.36/getting-started/migrating-from-cas/_index.md +++ b/website/content/en/v0.36/getting-started/migrating-from-cas/_index.md @@ -92,7 +92,7 @@ One for your Karpenter node role and one for your existing node group. First set the Karpenter release you want to deploy. ```bash -export KARPENTER_VERSION="0.36.0" +export KARPENTER_VERSION="0.36.1" ``` We can now generate a full Karpenter deployment yaml from the Helm chart. @@ -133,7 +133,7 @@ Now that our deployment is ready we can create the karpenter namespace, create t ## Create default NodePool -We need to create a default NodePool so Karpenter knows what types of nodes we want for unscheduled workloads. You can refer to some of the [example NodePool](https://github.com/aws/karpenter/tree/v0.36.0/examples/v1beta1) for specific needs. +We need to create a default NodePool so Karpenter knows what types of nodes we want for unscheduled workloads. You can refer to some of the [example NodePool](https://github.com/aws/karpenter/tree/v0.36.1/examples/v1beta1) for specific needs. {{% script file="./content/en/{VERSION}/getting-started/migrating-from-cas/scripts/step10-create-nodepool.sh" language="bash" %}} diff --git a/website/content/en/v0.36/reference/cloudformation.md b/website/content/en/v0.36/reference/cloudformation.md index 1e3993b0b1bf..cdd34f44f47f 100644 --- a/website/content/en/v0.36/reference/cloudformation.md +++ b/website/content/en/v0.36/reference/cloudformation.md @@ -17,7 +17,7 @@ These descriptions should allow you to understand: To download a particular version of `cloudformation.yaml`, set the version and use `curl` to pull the file to your local system: ```bash -export KARPENTER_VERSION="0.36.0" +export KARPENTER_VERSION="0.36.1" curl https://raw.githubusercontent.com/aws/karpenter-provider-aws/v"${KARPENTER_VERSION}"/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml > cloudformation.yaml ``` diff --git a/website/content/en/v0.36/reference/threat-model.md b/website/content/en/v0.36/reference/threat-model.md index 71f8beaf3532..9f6cf6fe9c23 100644 --- a/website/content/en/v0.36/reference/threat-model.md +++ b/website/content/en/v0.36/reference/threat-model.md @@ -31,11 +31,11 @@ A Cluster Developer has the ability to create pods via `Deployments`, `ReplicaSe Karpenter has permissions to create and manage cloud instances. Karpenter has Kubernetes API permissions to create, update, and remove nodes, as well as evict pods. For a full list of the permissions, see the RBAC rules in the helm chart template. Karpenter also has AWS IAM permissions to create instances with IAM roles. -* [aggregate-clusterrole.yaml](https://github.com/aws/karpenter/blob/v0.36.0/charts/karpenter/templates/aggregate-clusterrole.yaml) -* [clusterrole-core.yaml](https://github.com/aws/karpenter/blob/v0.36.0/charts/karpenter/templates/clusterrole-core.yaml) -* [clusterrole.yaml](https://github.com/aws/karpenter/blob/v0.36.0/charts/karpenter/templates/clusterrole.yaml) -* [rolebinding.yaml](https://github.com/aws/karpenter/blob/v0.36.0/charts/karpenter/templates/rolebinding.yaml) -* [role.yaml](https://github.com/aws/karpenter/blob/v0.36.0/charts/karpenter/templates/role.yaml) +* [aggregate-clusterrole.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/aggregate-clusterrole.yaml) +* [clusterrole-core.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/clusterrole-core.yaml) +* [clusterrole.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/clusterrole.yaml) +* [rolebinding.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/rolebinding.yaml) +* [role.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/role.yaml) ## Assumptions diff --git a/website/content/en/v0.36/upgrading/upgrade-guide.md b/website/content/en/v0.36/upgrading/upgrade-guide.md index 1687bfa3f5a5..2ffd380031c8 100644 --- a/website/content/en/v0.36/upgrading/upgrade-guide.md +++ b/website/content/en/v0.36/upgrading/upgrade-guide.md @@ -28,23 +28,23 @@ If you get the error `invalid ownership metadata; label validation error:` while In general, you can reapply the CRDs in the `crds` directory of the Karpenter Helm chart: ```shell -kubectl apply -f https://raw.githubusercontent.com/aws/karpenter/v0.36.0/pkg/apis/crds/karpenter.sh_nodepools.yaml -kubectl apply -f https://raw.githubusercontent.com/aws/karpenter/v0.36.0/pkg/apis/crds/karpenter.sh_nodeclaims.yaml -kubectl apply -f https://raw.githubusercontent.com/aws/karpenter/v0.36.0/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml +kubectl apply -f https://raw.githubusercontent.com/aws/karpenter/v0.36.1/pkg/apis/crds/karpenter.sh_nodepools.yaml +kubectl apply -f https://raw.githubusercontent.com/aws/karpenter/v0.36.1/pkg/apis/crds/karpenter.sh_nodeclaims.yaml +kubectl apply -f https://raw.githubusercontent.com/aws/karpenter/v0.36.1/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml ``` -### Upgrading to `0.36.0`+ +### Upgrading to `0.36.1`+ {{% alert title="Warning" color="warning" %}} -`0.33.0`+ _only_ supports Karpenter v1beta1 APIs and will not work with existing Provisioner, AWSNodeTemplate or Machine alpha APIs. Do not upgrade to `0.36.0`+ without first [upgrading to `0.32.x`]({{}}). This version supports both the alpha and beta APIs, allowing you to migrate all of your existing APIs to beta APIs without experiencing downtime. +`0.33.0`+ _only_ supports Karpenter v1beta1 APIs and will not work with existing Provisioner, AWSNodeTemplate or Machine alpha APIs. Do not upgrade to `0.36.1`+ without first [upgrading to `0.32.x`]({{}}). This version supports both the alpha and beta APIs, allowing you to migrate all of your existing APIs to beta APIs without experiencing downtime. {{% /alert %}} {{% alert title="Warning" color="warning" %}} - v0.36.x introduces update to drift that restricts rollback. When rolling back from >=v0.36.0, note that v0.32.9+, v0.33.4+, v0.34.5+, v0.35.4+ are the patch versions that support rollback. If Karpenter is rolled back to an older patch version, Karpenter can potentially drift all the nodes in the cluster. + v0.36.x introduces update to drift that restricts rollback. When rolling back from >=v0.36.1, note that v0.32.9+, v0.33.4+, v0.34.5+, v0.35.4+ are the patch versions that support rollback. If Karpenter is rolled back to an older patch version, Karpenter can potentially drift all the nodes in the cluster. {{% /alert %}} * Karpenter changed the name of the `karpenter_cloudprovider_instance_type_price_estimate` metric to `karpenter_cloudprovider_instance_type_offering_price_estimate` to align with the new `karpenter_cloudprovider_instance_type_offering_available` metric. The `region` label was also dropped from the metric, since this can be inferred from the environment that Karpenter is running in. diff --git a/website/hugo.yaml b/website/hugo.yaml index a58124bf6053..93457276c925 100644 --- a/website/hugo.yaml +++ b/website/hugo.yaml @@ -76,7 +76,7 @@ params: url: "https://slack.k8s.io/" icon: fab fa-slack desc: "Chat with us on Slack in the #aws-provider channel" - latest_release_version: 0.36.0 + latest_release_version: 0.36.1 latest_k8s_version: 1.29 versions: - v0.36 From e6fa44245c881619e71102909f7f9e091ff0e761 Mon Sep 17 00:00:00 2001 From: Amanuel Engeda <74629455+engedaam@users.noreply.github.com> Date: Thu, 25 Apr 2024 15:25:53 -0700 Subject: [PATCH 17/17] chore: Use Security Group Status Controller to Asynchronously Hydrate Security Group Data (#6069) --- pkg/cloudprovider/drift.go | 12 ++-- pkg/cloudprovider/suite_test.go | 57 +++++++++++++++---- pkg/providers/instance/suite_test.go | 12 ++++ pkg/providers/instancetype/suite_test.go | 22 +++++++ .../launchtemplate/launchtemplate.go | 28 ++++----- pkg/providers/launchtemplate/suite_test.go | 22 +++++++ 6 files changed, 119 insertions(+), 34 deletions(-) diff --git a/pkg/cloudprovider/drift.go b/pkg/cloudprovider/drift.go index 69dc5aad1783..318af54ac768 100644 --- a/pkg/cloudprovider/drift.go +++ b/pkg/cloudprovider/drift.go @@ -54,7 +54,7 @@ func (c *CloudProvider) isNodeClassDrifted(ctx context.Context, nodeClaim *corev if err != nil { return "", fmt.Errorf("calculating ami drift, %w", err) } - securitygroupDrifted, err := c.areSecurityGroupsDrifted(ctx, instance, nodeClass) + securitygroupDrifted, err := c.areSecurityGroupsDrifted(instance, nodeClass) if err != nil { return "", fmt.Errorf("calculating securitygroup drift, %w", err) } @@ -118,14 +118,10 @@ func (c *CloudProvider) isSubnetDrifted(ctx context.Context, instance *instance. // Checks if the security groups are drifted, by comparing the security groups returned from the SecurityGroupProvider // to the ec2 instance security groups -func (c *CloudProvider) areSecurityGroupsDrifted(ctx context.Context, ec2Instance *instance.Instance, nodeClass *v1beta1.EC2NodeClass) (cloudprovider.DriftReason, error) { - securitygroup, err := c.securityGroupProvider.List(ctx, nodeClass) - if err != nil { - return "", err - } - securityGroupIds := sets.New(lo.Map(securitygroup, func(sg *ec2.SecurityGroup, _ int) string { return aws.StringValue(sg.GroupId) })...) +func (c *CloudProvider) areSecurityGroupsDrifted(ec2Instance *instance.Instance, nodeClass *v1beta1.EC2NodeClass) (cloudprovider.DriftReason, error) { + securityGroupIds := sets.New(lo.Map(nodeClass.Status.SecurityGroups, func(sg v1beta1.SecurityGroup, _ int) string { return sg.ID })...) if len(securityGroupIds) == 0 { - return "", fmt.Errorf("no security groups are discovered") + return "", fmt.Errorf("no security groups are present in the status") } if !securityGroupIds.Equal(sets.New(ec2Instance.SecurityGroupIDs...)) { diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index b9d255ed3808..3617355f1730 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -138,6 +138,20 @@ var _ = Describe("CloudProvider", func() { }, }, }) + nodeClass.Status.SecurityGroups = []v1beta1.SecurityGroup{ + { + ID: "sg-test1", + Name: "securityGroup-test1", + }, + { + ID: "sg-test2", + Name: "securityGroup-test2", + }, + { + ID: "sg-test3", + Name: "securityGroup-test3", + }, + } Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypes(ctx)).To(Succeed()) Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypeOfferings(ctx)).To(Succeed()) }) @@ -586,6 +600,11 @@ var _ = Describe("CloudProvider", func() { }, }, }) + nodeClass.Status.SecurityGroups = []v1beta1.SecurityGroup{ + { + ID: validSecurityGroup, + }, + } ExpectApplied(ctx, env.Client, nodePool, nodeClass) instanceTypes, err := cloudProvider.GetInstanceTypes(ctx, nodePool) Expect(err).ToNot(HaveOccurred()) @@ -671,7 +690,8 @@ var _ = Describe("CloudProvider", func() { Expect(isDrifted).To(BeEmpty()) }) It("should return an error if the security groups are empty", func() { - awsEnv.EC2API.DescribeSecurityGroupsOutput.Set(&ec2.DescribeSecurityGroupsOutput{SecurityGroups: []*ec2.SecurityGroup{}}) + nodeClass.Status.SecurityGroups = []v1beta1.SecurityGroup{} + ExpectApplied(ctx, env.Client, nodeClass) // Instance is a reference to what we return in the GetInstances call instance.SecurityGroups = []*ec2.GroupIdentifier{{GroupId: aws.String(fake.SecurityGroupID())}} _, err := cloudProvider.IsDrifted(ctx, nodeClaim) @@ -692,18 +712,17 @@ var _ = Describe("CloudProvider", func() { Expect(isDrifted).To(Equal(cloudprovider.SecurityGroupDrift)) }) It("should return drifted if more security groups are present than instance security groups then discovered from nodeclass", func() { - awsEnv.EC2API.DescribeSecurityGroupsOutput.Set(&ec2.DescribeSecurityGroupsOutput{ - SecurityGroups: []*ec2.SecurityGroup{ - { - GroupId: aws.String(validSecurityGroup), - GroupName: aws.String("test-securitygroup"), - }, - { - GroupId: aws.String(fake.SecurityGroupID()), - GroupName: aws.String("test-securitygroup"), - }, + nodeClass.Status.SecurityGroups = []v1beta1.SecurityGroup{ + { + ID: validSecurityGroup, + Name: "test-securitygroup", }, - }) + { + ID: fake.SecurityGroupID(), + Name: "test-securitygroup", + }, + } + ExpectApplied(ctx, env.Client, nodeClass) isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim) Expect(err).ToNot(HaveOccurred()) Expect(isDrifted).To(Equal(cloudprovider.SecurityGroupDrift)) @@ -777,6 +796,13 @@ var _ = Describe("CloudProvider", func() { }, }, }, + Status: v1beta1.EC2NodeClassStatus{ + SecurityGroups: []v1beta1.SecurityGroup{ + { + ID: validSecurityGroup, + }, + }, + }, } nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()}) nodeClaim.Annotations = lo.Assign(nodeClaim.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()}) @@ -1013,6 +1039,13 @@ var _ = Describe("CloudProvider", func() { }, }, }, + Status: v1beta1.EC2NodeClassStatus{ + SecurityGroups: []v1beta1.SecurityGroup{ + { + ID: "sg-test1", + }, + }, + }, }) nodePool2 := coretest.NodePool(corev1beta1.NodePool{ Spec: corev1beta1.NodePoolSpec{ diff --git a/pkg/providers/instance/suite_test.go b/pkg/providers/instance/suite_test.go index 1693e2eadf0d..8dc051795d10 100644 --- a/pkg/providers/instance/suite_test.go +++ b/pkg/providers/instance/suite_test.go @@ -106,11 +106,23 @@ var _ = Describe("InstanceProvider", func() { }, }, }) + nodeClass.Status.SecurityGroups = []v1beta1.SecurityGroup{ + { + ID: "sg-test1", + }, + { + ID: "sg-test2", + }, + { + ID: "sg-test3", + }, + } Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypes(ctx)).To(Succeed()) Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypeOfferings(ctx)).To(Succeed()) }) It("should return an ICE error when all attempted instance types return an ICE error", func() { ExpectApplied(ctx, env.Client, nodeClaim, nodePool, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) awsEnv.EC2API.InsufficientCapacityPools.Set([]fake.CapacityPool{ {CapacityType: corev1beta1.CapacityTypeOnDemand, InstanceType: "m5.xlarge", Zone: "test-zone-1a"}, {CapacityType: corev1beta1.CapacityTypeOnDemand, InstanceType: "m5.xlarge", Zone: "test-zone-1b"}, diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index 0e270e24a70b..a21937e83e1e 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -155,6 +155,28 @@ var _ = Describe("InstanceTypeProvider", func() { }, }, }) + nodeClass.Status.SecurityGroups = []v1beta1.SecurityGroup{ + { + ID: "sg-test1", + }, + { + ID: "sg-test2", + }, + { + ID: "sg-test3", + }, + } + windowsNodeClass.Status.SecurityGroups = []v1beta1.SecurityGroup{ + { + ID: "sg-test1", + }, + { + ID: "sg-test2", + }, + { + ID: "sg-test3", + }, + } Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypes(ctx)).To(Succeed()) Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypeOfferings(ctx)).To(Succeed()) }) diff --git a/pkg/providers/launchtemplate/launchtemplate.go b/pkg/providers/launchtemplate/launchtemplate.go index f7d692c6f0de..b90949530f6f 100644 --- a/pkg/providers/launchtemplate/launchtemplate.go +++ b/pkg/providers/launchtemplate/launchtemplate.go @@ -163,13 +163,15 @@ func (p *DefaultProvider) createAMIOptions(ctx context.Context, nodeClass *v1bet if err != nil { return nil, err } + // Relying on the status rather than an API call means that Karpenter is subject to a race + // condition where EC2NodeClass spec changes haven't propagated to the status once a node + // has launched. + // If a user changes their EC2NodeClass and shortly after Karpenter launches a node, + // in the worst case, the node could be drifted and re-created. + // TODO @aengeda: add status generation fields to gate node creation until the status is updated from a spec change // Get constrained security groups - securityGroups, err := p.securityGroupProvider.List(ctx, nodeClass) - if err != nil { - return nil, err - } - if len(securityGroups) == 0 { - return nil, fmt.Errorf("no security groups exist given constraints") + if len(nodeClass.Status.SecurityGroups) == 0 { + return nil, fmt.Errorf("no security groups are present in the status") } options := &amifamily.Options{ ClusterName: options.FromContext(ctx).ClusterName, @@ -177,14 +179,12 @@ func (p *DefaultProvider) createAMIOptions(ctx context.Context, nodeClass *v1bet ClusterCIDR: p.ClusterCIDR.Load(), InstanceProfile: instanceProfile, InstanceStorePolicy: nodeClass.Spec.InstanceStorePolicy, - SecurityGroups: lo.Map(securityGroups, func(s *ec2.SecurityGroup, _ int) v1beta1.SecurityGroup { - return v1beta1.SecurityGroup{ID: aws.StringValue(s.GroupId), Name: aws.StringValue(s.GroupName)} - }), - Tags: tags, - Labels: labels, - CABundle: p.CABundle, - KubeDNSIP: p.KubeDNSIP, - NodeClassName: nodeClass.Name, + SecurityGroups: nodeClass.Status.SecurityGroups, + Tags: tags, + Labels: labels, + CABundle: p.CABundle, + KubeDNSIP: p.KubeDNSIP, + NodeClassName: nodeClass.Name, } if nodeClass.Spec.AssociatePublicIPAddress != nil { options.AssociatePublicIPAddress = nodeClass.Spec.AssociatePublicIPAddress diff --git a/pkg/providers/launchtemplate/suite_test.go b/pkg/providers/launchtemplate/suite_test.go index 6799b9ed3ab8..5ae89e604bf5 100644 --- a/pkg/providers/launchtemplate/suite_test.go +++ b/pkg/providers/launchtemplate/suite_test.go @@ -146,6 +146,17 @@ var _ = Describe("LaunchTemplate Provider", func() { }, }, }) + nodeClass.Status.SecurityGroups = []v1beta1.SecurityGroup{ + { + ID: "sg-test1", + }, + { + ID: "sg-test2", + }, + { + ID: "sg-test3", + }, + } Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypes(ctx)).To(Succeed()) Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypeOfferings(ctx)).To(Succeed()) }) @@ -171,6 +182,17 @@ var _ = Describe("LaunchTemplate Provider", func() { }, }, }) + nodeClass2.Status.SecurityGroups = []v1beta1.SecurityGroup{ + { + ID: "sg-test1", + }, + { + ID: "sg-test2", + }, + { + ID: "sg-test3", + }, + } pods := []*v1.Pod{ coretest.UnschedulablePod(coretest.PodOptions{NodeRequirements: []v1.NodeSelectorRequirement{ {