diff --git a/go.mod b/go.mod index b9c56ea2a201..93dc6cbc80fa 100644 --- a/go.mod +++ b/go.mod @@ -19,7 +19,7 @@ require ( github.com/patrickmn/go-cache v2.1.0+incompatible github.com/pelletier/go-toml/v2 v2.2.2 github.com/prometheus/client_golang v1.19.1 - github.com/samber/lo v1.45.0 + github.com/samber/lo v1.46.0 go.uber.org/multierr v1.11.0 go.uber.org/zap v1.27.0 golang.org/x/sync v0.7.0 @@ -31,7 +31,7 @@ require ( k8s.io/utils v0.0.0-20240102154912-e7106e64919e knative.dev/pkg v0.0.0-20231010144348-ca8c009405dd sigs.k8s.io/controller-runtime v0.18.4 - sigs.k8s.io/karpenter v0.37.1-0.20240711192037-195f8961cae4 + sigs.k8s.io/karpenter v0.37.1-0.20240715214311-a122e25df3de sigs.k8s.io/yaml v1.4.0 ) diff --git a/go.sum b/go.sum index c72e815149ff..512cb4ffbc64 100644 --- a/go.sum +++ b/go.sum @@ -332,8 +332,8 @@ github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFR github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/samber/lo v1.45.0 h1:TPK85Y30Lv9Jh8s3TrJeA94u1hwcbFA9JObx/vT6lYU= -github.com/samber/lo v1.45.0/go.mod h1:RmDH9Ct32Qy3gduHQuKJ3gW1fMHAnE/fAzQuf6He5cU= +github.com/samber/lo v1.46.0 h1:w8G+oaCPgz1PoCJztqymCFaKwXt+5cCXn51uPxExFfQ= +github.com/samber/lo v1.46.0/go.mod h1:RmDH9Ct32Qy3gduHQuKJ3gW1fMHAnE/fAzQuf6He5cU= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= @@ -761,8 +761,8 @@ sigs.k8s.io/controller-runtime v0.18.4 h1:87+guW1zhvuPLh1PHybKdYFLU0YJp4FhJRmiHv sigs.k8s.io/controller-runtime v0.18.4/go.mod h1:TVoGrfdpbA9VRFaRnKgk9P5/atA0pMwq+f+msb9M8Sg= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= -sigs.k8s.io/karpenter v0.37.1-0.20240711192037-195f8961cae4 h1:PfXPyel4nqHFf/3fMQ0TSP+gUZTyyrjSjBDjhp7IRjQ= -sigs.k8s.io/karpenter v0.37.1-0.20240711192037-195f8961cae4/go.mod h1:jwEZ2Efxsc0yyNkrDEFN2RduAwlm/s7reIVNblZ8vyM= +sigs.k8s.io/karpenter v0.37.1-0.20240715214311-a122e25df3de h1:S7/EVJbRkaTj0OIGB9pe1Rx2TFBnpcQOioBmuQNE9B8= +sigs.k8s.io/karpenter v0.37.1-0.20240715214311-a122e25df3de/go.mod h1:qJQHhUDQhzKa9ui7WApu0Fd2GXu70qexuBC24npDXJY= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= diff --git a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml index 9be03992d05a..fea305f1c840 100644 --- a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml +++ b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml @@ -248,6 +248,26 @@ spec: - key type: object type: array + terminationGracePeriod: + description: |- + TerminationGracePeriod is the maximum duration the controller will wait before forcefully deleting the pods on a node, measured from when deletion is first initiated. + + + Warning: this feature takes precedence over a Pod's terminationGracePeriodSeconds value, and bypasses any blocked PDBs or the karpenter.sh/do-not-disrupt annotation. + + + This field is intended to be used by cluster administrators to enforce that nodes can be cycled within a given time period. + When set, drifted nodes will begin draining even if there are pods blocking eviction. Draining will respect PDBs and the do-not-disrupt annotation until the TGP is reached. + + + Karpenter will preemptively delete pods so their terminationGracePeriodSeconds align with the node's terminationGracePeriod. + If a pod would be terminated without being granted its full terminationGracePeriodSeconds prior to the node timeout, + that pod will be deleted at T = node timeout - pod terminationGracePeriodSeconds. + + + The feature can also be used to allow maximum time limits for long-running jobs which can delay node termination with preStop hooks. + If left undefined, the controller will wait indefinitely for pods to be drained. + type: string required: - nodeClassRef - requirements @@ -690,6 +710,26 @@ spec: - key type: object type: array + terminationGracePeriod: + description: |- + TerminationGracePeriod is the maximum duration the controller will wait before forcefully deleting the pods on a node, measured from when deletion is first initiated. + + + Warning: this feature takes precedence over a Pod's terminationGracePeriodSeconds value, and bypasses any blocked PDBs or the karpenter.sh/do-not-disrupt annotation. + + + This field is intended to be used by cluster administrators to enforce that nodes can be cycled within a given time period. + When set, drifted nodes will begin draining even if there are pods blocking eviction. Draining will respect PDBs and the do-not-disrupt annotation until the TGP is reached. + + + Karpenter will preemptively delete pods so their terminationGracePeriodSeconds align with the node's terminationGracePeriod. + If a pod would be terminated without being granted its full terminationGracePeriodSeconds prior to the node timeout, + that pod will be deleted at T = node timeout - pod terminationGracePeriodSeconds. + + + The feature can also be used to allow maximum time limits for long-running jobs which can delay node termination with preStop hooks. + If left undefined, the controller will wait indefinitely for pods to be drained. + type: string required: - nodeClassRef - requirements diff --git a/pkg/apis/crds/karpenter.sh_nodepools.yaml b/pkg/apis/crds/karpenter.sh_nodepools.yaml index f14bdd8883a3..2fa0d4fa48f1 100644 --- a/pkg/apis/crds/karpenter.sh_nodepools.yaml +++ b/pkg/apis/crds/karpenter.sh_nodepools.yaml @@ -115,13 +115,13 @@ spec: description: |- Reasons is a list of disruption methods that this budget applies to. If Reasons is not set, this budget applies to all methods. Otherwise, this will apply to each reason defined. - allowed reasons are underutilized, empty, and drifted. + allowed reasons are Underutilized, Empty, and Drifted. items: description: DisruptionReason defines valid reasons for disruption budgets. enum: - - underutilized - - empty - - drifted + - Underutilized + - Empty + - Drifted type: string type: array schedule: @@ -390,6 +390,26 @@ spec: - key type: object type: array + terminationGracePeriod: + description: |- + TerminationGracePeriod is the maximum duration the controller will wait before forcefully deleting the pods on a node, measured from when deletion is first initiated. + + + Warning: this feature takes precedence over a Pod's terminationGracePeriodSeconds value, and bypasses any blocked PDBs or the karpenter.sh/do-not-disrupt annotation. + + + This field is intended to be used by cluster administrators to enforce that nodes can be cycled within a given time period. + When set, drifted nodes will begin draining even if there are pods blocking eviction. Draining will respect PDBs and the do-not-disrupt annotation until the TGP is reached. + + + Karpenter will preemptively delete pods so their terminationGracePeriodSeconds align with the node's terminationGracePeriod. + If a pod would be terminated without being granted its full terminationGracePeriodSeconds prior to the node timeout, + that pod will be deleted at T = node timeout - pod terminationGracePeriodSeconds. + + + The feature can also be used to allow maximum time limits for long-running jobs which can delay node termination with preStop hooks. + If left undefined, the controller will wait indefinitely for pods to be drained. + type: string required: - nodeClassRef - requirements @@ -573,13 +593,13 @@ spec: description: |- Reasons is a list of disruption methods that this budget applies to. If Reasons is not set, this budget applies to all methods. Otherwise, this will apply to each reason defined. - allowed reasons are underutilized, empty, and drifted. + allowed reasons are Underutilized, Empty, and Drifted. items: description: DisruptionReason defines valid reasons for disruption budgets. enum: - - underutilized - - empty - - drifted + - Underutilized + - Empty + - Drifted type: string type: array schedule: @@ -970,6 +990,26 @@ spec: - key type: object type: array + terminationGracePeriod: + description: |- + TerminationGracePeriod is the maximum duration the controller will wait before forcefully deleting the pods on a node, measured from when deletion is first initiated. + + + Warning: this feature takes precedence over a Pod's terminationGracePeriodSeconds value, and bypasses any blocked PDBs or the karpenter.sh/do-not-disrupt annotation. + + + This field is intended to be used by cluster administrators to enforce that nodes can be cycled within a given time period. + When set, drifted nodes will begin draining even if there are pods blocking eviction. Draining will respect PDBs and the do-not-disrupt annotation until the TGP is reached. + + + Karpenter will preemptively delete pods so their terminationGracePeriodSeconds align with the node's terminationGracePeriod. + If a pod would be terminated without being granted its full terminationGracePeriodSeconds prior to the node timeout, + that pod will be deleted at T = node timeout - pod terminationGracePeriodSeconds. + + + The feature can also be used to allow maximum time limits for long-running jobs which can delay node termination with preStop hooks. + If left undefined, the controller will wait indefinitely for pods to be drained. + type: string required: - nodeClassRef - requirements diff --git a/pkg/operator/options/options.go b/pkg/operator/options/options.go index e9684de8fa76..47121c75e9da 100644 --- a/pkg/operator/options/options.go +++ b/pkg/operator/options/options.go @@ -24,6 +24,8 @@ import ( coreoptions "sigs.k8s.io/karpenter/pkg/operator/options" "sigs.k8s.io/karpenter/pkg/utils/env" + + "github.com/aws/karpenter-provider-aws/pkg/utils" ) func init() { @@ -51,7 +53,7 @@ func (o *Options) AddFlags(fs *coreoptions.FlagSet) { fs.StringVar(&o.ClusterName, "cluster-name", env.WithDefaultString("CLUSTER_NAME", ""), "[REQUIRED] The kubernetes cluster name for resource discovery.") fs.StringVar(&o.ClusterEndpoint, "cluster-endpoint", env.WithDefaultString("CLUSTER_ENDPOINT", ""), "The external kubernetes cluster endpoint for new nodes to connect with. If not specified, will discover the cluster endpoint using DescribeCluster API.") fs.BoolVarWithEnv(&o.IsolatedVPC, "isolated-vpc", "ISOLATED_VPC", false, "If true, then assume we can't reach AWS services which don't have a VPC endpoint. This also has the effect of disabling look-ups to the AWS on-demand pricing endpoint.") - fs.Float64Var(&o.VMMemoryOverheadPercent, "vm-memory-overhead-percent", env.WithDefaultFloat64("VM_MEMORY_OVERHEAD_PERCENT", 0.075), "The VM memory overhead as a percent that will be subtracted from the total memory for all instance types.") + fs.Float64Var(&o.VMMemoryOverheadPercent, "vm-memory-overhead-percent", utils.WithDefaultFloat64("VM_MEMORY_OVERHEAD_PERCENT", 0.075), "The VM memory overhead as a percent that will be subtracted from the total memory for all instance types.") fs.StringVar(&o.InterruptionQueue, "interruption-queue", env.WithDefaultString("INTERRUPTION_QUEUE", ""), "Interruption queue is the name of the SQS queue used for processing interruption events from EC2. Interruption handling is disabled if not specified. Enabling interruption handling may require additional permissions on the controller service account. Additional permissions are outlined in the docs.") fs.IntVar(&o.ReservedENIs, "reserved-enis", env.WithDefaultInt("RESERVED_ENIS", 0), "Reserved ENIs are not included in the calculations for max-pods or kube-reserved. This is most often used in the VPC CNI custom networking setup https://docs.aws.amazon.com/eks/latest/userguide/cni-custom-network.html.") } diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index 835d513eb527..86d5c40762f1 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -18,7 +18,9 @@ import ( "context" "encoding/json" "fmt" + "os" "regexp" + "strconv" "strings" "github.com/aws/aws-sdk-go/aws" @@ -76,6 +78,20 @@ func PrettySlice[T any](s []T, maxItems int) string { return sb.String() } +// WithDefaultFloat64 returns the float64 value of the supplied environment variable or, if not present, +// the supplied default value. If the float64 conversion fails, returns the default +func WithDefaultFloat64(key string, def float64) float64 { + val, ok := os.LookupEnv(key) + if !ok { + return def + } + f, err := strconv.ParseFloat(val, 64) + if err != nil { + return def + } + return f +} + // GetKubletConfigurationWithNodePool use the most recent version of the kubelet configuration. // The priority of fields is listed below: // 1.) v1 NodePool kubelet annotation (Showing a user configured using v1beta1 NodePool at some point)