From 4e884a9d9755d31511d24505baa1c014d994d411 Mon Sep 17 00:00:00 2001 From: nikmohan Date: Thu, 11 Jan 2024 21:28:45 -0600 Subject: [PATCH] Run docgen and address doc comments. --- Makefile | 2 +- charts/karpenter/README.md | 7 ++++--- website/content/en/preview/concepts/disruption.md | 7 ++++--- website/content/en/preview/reference/settings.md | 4 ++-- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index bae18cca5b9a..ad33b12e5422 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ HELM_OPTS ?= --set serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn=${K --set controller.resources.requests.memory=1Gi \ --set controller.resources.limits.cpu=1 \ --set controller.resources.limits.memory=1Gi \ - --set settings.featureGates.spotToSpotConsolidation=false \ + --set settings.featureGates.spotToSpotConsolidation=true \ --create-namespace # CR for local builds of Karpenter diff --git a/charts/karpenter/README.md b/charts/karpenter/README.md index c377479af27e..29db446c547f 100644 --- a/charts/karpenter/README.md +++ b/charts/karpenter/README.md @@ -63,8 +63,8 @@ helm upgrade --install --namespace karpenter --create-namespace \ | podAnnotations | object | `{}` | Additional annotations for the pod. | | podDisruptionBudget.maxUnavailable | int | `1` | | | podDisruptionBudget.name | string | `"karpenter"` | | -| podSecurityContext | object | `{"fsGroup":65536}` | SecurityContext for the pod. | | podLabels | object | `{}` | Additional labels for the pod. | +| podSecurityContext | object | `{"fsGroup":65536}` | SecurityContext for the pod. | | priorityClassName | string | `"system-cluster-critical"` | PriorityClass name for the pod. | | replicas | int | `2` | Number of replicas. | | revisionHistoryLimit | int | `10` | The number of old ReplicaSets to retain to allow rollback. | @@ -74,7 +74,7 @@ helm upgrade --install --namespace karpenter --create-namespace \ | serviceMonitor.additionalLabels | object | `{}` | Additional labels for the ServiceMonitor. | | serviceMonitor.enabled | bool | `false` | Specifies whether a ServiceMonitor should be created. | | serviceMonitor.endpointConfig | object | `{}` | Endpoint configuration for the ServiceMonitor. | -| settings | object | `{"assumeRoleARN":"","assumeRoleDuration":"15m","batchIdleDuration":"1s","batchMaxDuration":"10s","clusterCABundle":"","clusterEndpoint":"","clusterName":"","featureGates":{"drift":true},"interruptionQueue":"","isolatedVPC":false,"reservedENIs":"0","vmMemoryOverheadPercent":0.075}` | Global Settings to configure Karpenter | +| settings | object | `{"assumeRoleARN":"","assumeRoleDuration":"15m","batchIdleDuration":"1s","batchMaxDuration":"10s","clusterCABundle":"","clusterEndpoint":"","clusterName":"","featureGates":{"drift":true,"spotToSpotConsolidation":false},"interruptionQueue":"","isolatedVPC":false,"reservedENIs":"0","vmMemoryOverheadPercent":0.075}` | Global Settings to configure Karpenter | | settings.assumeRoleARN | string | `""` | Role to assume for calling AWS services. | | settings.assumeRoleDuration | string | `"15m"` | Duration of assumed credentials in minutes. Default value is 15 minutes. Not used unless assumeRoleARN set. | | settings.batchIdleDuration | string | `"1s"` | The maximum amount of time with no new ending pods that if exceeded ends the current batching window. If pods arrive faster than this time, the batching window will be extended up to the maxDuration. If they arrive slower, the pods will be batched separately. | @@ -82,8 +82,9 @@ helm upgrade --install --namespace karpenter --create-namespace \ | settings.clusterCABundle | string | `""` | Cluster CA bundle for TLS configuration of provisioned nodes. If not set, this is taken from the controller's TLS configuration for the API server. | | settings.clusterEndpoint | string | `""` | Cluster endpoint. If not set, will be discovered during startup (EKS only) | | settings.clusterName | string | `""` | Cluster name. | -| settings.featureGates | object | `{"drift":true}` | Feature Gate configuration values. Feature Gates will follow the same graduation process and requirements as feature gates in Kubernetes. More information here https://kubernetes.io/docs/reference/command-line-tools-reference/feature-gates/#feature-gates-for-alpha-or-beta-features | +| settings.featureGates | object | `{"drift":true,"spotToSpotConsolidation":false}` | Feature Gate configuration values. Feature Gates will follow the same graduation process and requirements as feature gates in Kubernetes. More information here https://kubernetes.io/docs/reference/command-line-tools-reference/feature-gates/#feature-gates-for-alpha-or-beta-features | | settings.featureGates.drift | bool | `true` | drift is in BETA and is enabled by default. Setting drift to false disables the drift disruption method to watch for drift between currently deployed nodes and the desired state of nodes set in nodepools and nodeclasses | +| settings.featureGates.spotToSpotConsolidation | bool | `false` | spotToSpotConsolidation is disabled by default. Setting this to true will enable spot replacement consolidation for both single and multi-node consolidation. | | settings.interruptionQueue | string | `""` | interruptionQueue is disabled if not specified. Enabling interruption handling may require additional permissions on the controller service account. Additional permissions are outlined in the docs. | | settings.isolatedVPC | bool | `false` | If true then assume we can't reach AWS services which don't have a VPC endpoint This also has the effect of disabling look-ups to the AWS pricing endpoint | | settings.reservedENIs | string | `"0"` | Reserved ENIs are not included in the calculations for max-pods or kube-reserved This is most often used in the VPC CNI custom networking setup https://docs.aws.amazon.com/eks/latest/userguide/cni-custom-network.html | diff --git a/website/content/en/preview/concepts/disruption.md b/website/content/en/preview/concepts/disruption.md index 7f6791d2f7c8..36ea6a793a5d 100644 --- a/website/content/en/preview/concepts/disruption.md +++ b/website/content/en/preview/concepts/disruption.md @@ -118,9 +118,10 @@ Using preferred anti-affinity and topology spreads can reduce the effectiveness {{% /alert %}} {{% alert title="Note" color="primary" %}} -For spot nodes, Karpenter has deletion consolidation enabled by default. It will replace a spot node with a cheaper spot node for both single and multi-node consolidation only if the feature flag `SpotToSpotConsolidation` is enabled. Cheaper spot instance types are selected with the `price-capacity-optimized` strategy and often the cheapest spot instance type is not launched due to the likelihood of interruption. Karpenter would replace consolidation for single-node consolidation only if there are more than 15 potential replacement spot instance types. This threshold ensures that we have enough instance diversity that it's very unlikely that all instances that we select will have high interruption. - -To enable the spotToSpotConsolidation feature flag, refer to the [Feature Gates]({{}}). +For spot nodes, Karpenter has deletion consolidation enabled by default. If you would like to enable replacement with spot consolidation, you need to enable the feature through the [`SpotToSpotConsolidation` feature flag]({{}}). +Cheaper spot instance types are selected with the [`price-capacity-optimized` strategy](https://aws.amazon.com/blogs/compute/introducing-price-capacity-optimized-allocation-strategy-for-ec2-spot-instances/) and often the cheapest spot instance type is not launched due to the likelihood of interruption; therefore, Karpenter uses the number of available instance type options cheaper than the currently launched spot instance as a heuristic for evaluating whether it should launch a replacement for the current spot node. +We refer to the number of instances that Karpenter has within its launch decision as a launch's "instance type flexibility." When Karpenter is considering performing a spot-to-spot consolidation replacement, performing a replacement from a single node to another node, it will check whether replacing the instance type will lead to enough instance type flexibility in the subsequent launch request. That is -- can Karpenter find enough cheaper options than the current instance type option that Karpenter can: 1) Be assured that it won't continually consolidate down to the cheapest spot instance which might have very poor availability and 2) Be assured that the launch with the new instance types will have enough flexibility that an instance type with good enough availability comparable to the current instance type will be chosen. +Karpenter requires a min instance type flexibility of 15 instance types when performing single node spot-to-spot consolidations (1 node to 1 node). It does not have the same instance type flexibility requirement for multi-node spot-to-spot consolidations (many nodes to 1 node) since doing so without requiring flexibility won't lead to "race to the bottom" scenarios. {{% /alert %}} ### Drift diff --git a/website/content/en/preview/reference/settings.md b/website/content/en/preview/reference/settings.md index 443e13836cd6..4150586483ea 100644 --- a/website/content/en/preview/reference/settings.md +++ b/website/content/en/preview/reference/settings.md @@ -21,7 +21,7 @@ Karpenter surfaces environment variables and CLI parameters to allow you to conf | CLUSTER_NAME | \-\-cluster-name | [REQUIRED] The kubernetes cluster name for resource discovery.| | DISABLE_WEBHOOK | \-\-disable-webhook | Disable the admission and validation webhooks| | ENABLE_PROFILING | \-\-enable-profiling | Enable the profiling on the metric endpoint| -| FEATURE_GATES | \-\-feature-gates | Optional features can be enabled / disabled using feature gates. Current options are: Drift (default = Drift=true), SpotToSpotConsolidation (default = SpotToSpotConsolidation=false)| +| FEATURE_GATES | \-\-feature-gates | Optional features can be enabled / disabled using feature gates. Current options are: Drift,SpotToSpotConsolidation (default = Drift=true,SpotToSpotConsolidation=false)| | HEALTH_PROBE_PORT | \-\-health-probe-port | The port the health probe endpoint binds to for reporting controller health (default = 8081)| | INTERRUPTION_QUEUE | \-\-interruption-queue | Interruption queue is disabled if not specified. Enabling interruption handling may require additional permissions on the controller service account. Additional permissions are outlined in the docs.| | ISOLATED_VPC | \-\-isolated-vpc | If true, then assume we can't reach AWS services which don't have a VPC endpoint. This also has the effect of disabling look-ups to the AWS pricing endpoint.| @@ -47,7 +47,7 @@ Karpenter uses [feature gates](https://kubernetes.io/docs/reference/command-line |-------------------------|---------|-------|---------|---------| | Drift | false | Alpha | v0.21.x | v0.32.x | | Drift | true | Beta | v0.33.x | | -| SpotToSpotConsolidation | false | Beta | v0.33.x | | +| SpotToSpotConsolidation | false | Beta | v0.34.x | | ### Batching Parameters