From 026b26e2f225a87af64fb10ce55ea5e8c54d4146 Mon Sep 17 00:00:00 2001 From: Jonathan Innis Date: Thu, 16 May 2024 11:22:36 -0500 Subject: [PATCH] chore: Factor minValues requirements checking into the InstanceTypes methods (#1246) --- pkg/cloudprovider/types.go | 78 +++++++++++++++++ pkg/controllers/disruption/consolidation.go | 49 +++++------ pkg/controllers/disruption/helpers.go | 25 +++--- .../disruption/multinodeconsolidation.go | 13 +-- .../scheduling/instance_selection_test.go | 2 +- .../provisioning/scheduling/nodeclaim.go | 86 +++---------------- .../provisioning/scheduling/scheduler.go | 27 ++---- .../provisioning/scheduling/suite_test.go | 2 +- 8 files changed, 137 insertions(+), 145 deletions(-) diff --git a/pkg/cloudprovider/types.go b/pkg/cloudprovider/types.go index 2fc8b4720b..a7bbab49d0 100644 --- a/pkg/cloudprovider/types.go +++ b/pkg/cloudprovider/types.go @@ -27,6 +27,7 @@ import ( "github.com/samber/lo" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/util/sets" "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "sigs.k8s.io/karpenter/pkg/scheduling" @@ -123,6 +124,83 @@ func (its InstanceTypes) Compatible(requirements scheduling.Requirements) Instan return filteredInstanceTypes } +// SatisfiesMinValues validates whether the InstanceTypes satisfies the minValues requirements +// It returns the minimum number of needed instance types to satisfy the minValues requirement and an error +// that indicates whether the InstanceTypes satisfy the passed-in requirements +// This minNeededInstanceTypes value is dependent on the ordering of instance types, so relying on this value in a +// deterministic way implies that the instance types are sorted ahead of using this method +// For example: +// Requirements: +// - key: node.kubernetes.io/instance-type +// operator: In +// values: ["c4.large","c4.xlarge","c5.large","c5.xlarge","m4.large","m4.xlarge"] +// minValues: 3 +// - key: karpenter.kwok.sh/instance-family +// operator: In +// values: ["c4","c5","m4"] +// minValues: 3 +// +// InstanceTypes: ["c4.large","c5.xlarge","m4.2xlarge"], it PASSES the requirements +// +// we get the map as : { +// node.kubernetes.io/instance-type: ["c4.large","c5.xlarge","m4.2xlarge"], +// karpenter.k8s.aws/instance-family: ["c4","c5","m4"] +// } +// so it returns 3 and a nil error to indicate a minimum of 3 instance types were required to fulfill the minValues requirements +// +// And if InstanceTypes: ["c4.large","c4.xlarge","c5.2xlarge"], it FAILS the requirements +// +// we get the map as : { +// node.kubernetes.io/instance-type: ["c4.large","c4.xlarge","c5.2xlarge"], +// karpenter.k8s.aws/instance-family: ["c4","c5"] // minimum requirement failed for this. +// } +// so it returns 3 and a non-nil error to indicate that the instance types weren't able to fulfill the minValues requirements +func (its InstanceTypes) SatisfiesMinValues(requirements scheduling.Requirements) (minNeededInstanceTypes int, err error) { + valuesForKey := map[string]sets.Set[string]{} + // We validate if sorting by price and truncating the number of instance types to minItems breaks the minValue requirement. + // If minValue requirement fails, we return an error that indicates the first requirement key that couldn't be satisfied. + var incompatibleKey string + for i, it := range its { + for _, req := range requirements { + if req.MinValues != nil { + if _, ok := valuesForKey[req.Key]; !ok { + valuesForKey[req.Key] = sets.New[string]() + } + valuesForKey[req.Key] = valuesForKey[req.Key].Insert(it.Requirements.Get(req.Key).Values()...) + } + } + incompatibleKey = func() string { + for k, v := range valuesForKey { + // Break if any of the MinValues of requirement is not honored + if len(v) < lo.FromPtr(requirements.Get(k).MinValues) { + return k + } + } + return "" + }() + if incompatibleKey == "" { + return i + 1, nil + } + } + if incompatibleKey != "" { + return len(its), fmt.Errorf("minValues requirement is not met for %q", incompatibleKey) + } + return len(its), nil +} + +// Truncate truncates the InstanceTypes based on the passed-in requirements +// It returns an error if it isn't possible to truncate the instance types on maxItems without violating minValues +func (its InstanceTypes) Truncate(requirements scheduling.Requirements, maxItems int) (InstanceTypes, error) { + truncatedInstanceTypes := InstanceTypes(lo.Slice(its.OrderByPrice(requirements), 0, maxItems)) + // Only check for a validity of NodeClaim if its requirement has minValues in it. + if requirements.HasMinValues() { + if _, err := truncatedInstanceTypes.SatisfiesMinValues(requirements); err != nil { + return its, fmt.Errorf("validating minValues, %w", err) + } + } + return truncatedInstanceTypes, nil +} + type InstanceTypeOverhead struct { // KubeReserved returns the default resources allocated to kubernetes system daemons by default KubeReserved v1.ResourceList diff --git a/pkg/controllers/disruption/consolidation.go b/pkg/controllers/disruption/consolidation.go index 501a35e401..70ffef1a93 100644 --- a/pkg/controllers/disruption/consolidation.go +++ b/pkg/controllers/disruption/consolidation.go @@ -174,21 +174,20 @@ func (c *consolidation) computeConsolidation(ctx context.Context, candidates ... return c.computeSpotToSpotConsolidation(ctx, candidates, results, candidatePrice) } - var incompatibleMinReqKey string - // filterByPriceWithMinValues returns the instanceTypes that are lower priced than the current candidate and the requirement for the NodeClaim that does not meet minValues. + // filterByPrice returns the instanceTypes that are lower priced than the current candidate and any error that indicates the input couldn't be filtered. // If we use this directly for spot-to-spot consolidation, we are bound to get repeated consolidations because the strategy that chooses to launch the spot instance from the list does // it based on availability and price which could result in selection/launch of non-lowest priced instance in the list. So, we would keep repeating this loop till we get to lowest priced instance // causing churns and landing onto lower available spot instance ultimately resulting in higher interruptions. - results.NewNodeClaims[0].NodeClaimTemplate.InstanceTypeOptions, incompatibleMinReqKey, _ = - filterByPriceWithMinValues(results.NewNodeClaims[0].InstanceTypeOptions, results.NewNodeClaims[0].Requirements, candidatePrice) - + results.NewNodeClaims[0].NodeClaimTemplate.InstanceTypeOptions, err = filterByPrice(results.NewNodeClaims[0].InstanceTypeOptions, results.NewNodeClaims[0].Requirements, candidatePrice) + if err != nil { + if len(candidates) == 1 { + c.recorder.Publish(disruptionevents.Unconsolidatable(candidates[0].Node, candidates[0].NodeClaim, fmt.Sprintf("Filtering by price: %v", err))...) + } + return Command{}, pscheduling.Results{}, nil + } if len(results.NewNodeClaims[0].NodeClaimTemplate.InstanceTypeOptions) == 0 { if len(candidates) == 1 { - if len(incompatibleMinReqKey) > 0 { - c.recorder.Publish(disruptionevents.Unconsolidatable(candidates[0].Node, candidates[0].NodeClaim, fmt.Sprintf("minValues requirement is not met for %s", incompatibleMinReqKey))...) - } else { - c.recorder.Publish(disruptionevents.Unconsolidatable(candidates[0].Node, candidates[0].NodeClaim, "Can't replace with a cheaper node")...) - } + c.recorder.Publish(disruptionevents.Unconsolidatable(candidates[0].Node, candidates[0].NodeClaim, "Can't replace with a cheaper node")...) } return Command{}, pscheduling.Results{}, nil } @@ -227,24 +226,21 @@ func (c *consolidation) computeSpotToSpotConsolidation(ctx context.Context, cand // Since we are sure that the replacement nodeclaim considered for the spot candidates are spot, we will enforce it through the requirements. results.NewNodeClaims[0].Requirements.Add(scheduling.NewRequirement(v1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, v1beta1.CapacityTypeSpot)) // All possible replacements for the current candidate compatible with spot offerings - instanceTypeOptionsWithSpotOfferings := - results.NewNodeClaims[0].NodeClaimTemplate.InstanceTypeOptions.Compatible(results.NewNodeClaims[0].Requirements) - - var incompatibleMinReqKey string - var numInstanceTypes int - // Possible replacements that are lower priced than the current candidate and the requirement that is not compatible with minValues - results.NewNodeClaims[0].NodeClaimTemplate.InstanceTypeOptions, incompatibleMinReqKey, numInstanceTypes = - filterByPriceWithMinValues(instanceTypeOptionsWithSpotOfferings, results.NewNodeClaims[0].Requirements, candidatePrice) + instanceTypeOptionsWithSpotOfferings := results.NewNodeClaims[0].NodeClaimTemplate.InstanceTypeOptions.Compatible(results.NewNodeClaims[0].Requirements) + // filterByPrice returns the instanceTypes that are lower priced than the current candidate and any error that indicates the input couldn't be filtered. + var err error + results.NewNodeClaims[0].NodeClaimTemplate.InstanceTypeOptions, err = filterByPrice(instanceTypeOptionsWithSpotOfferings, results.NewNodeClaims[0].Requirements, candidatePrice) + if err != nil { + if len(candidates) == 1 { + c.recorder.Publish(disruptionevents.Unconsolidatable(candidates[0].Node, candidates[0].NodeClaim, fmt.Sprintf("Filtering by price: %v", err))...) + } + return Command{}, pscheduling.Results{}, nil + } if len(results.NewNodeClaims[0].NodeClaimTemplate.InstanceTypeOptions) == 0 { if len(candidates) == 1 { - if len(incompatibleMinReqKey) > 0 { - c.recorder.Publish(disruptionevents.Unconsolidatable(candidates[0].Node, candidates[0].NodeClaim, fmt.Sprintf("minValues requirement is not met for %s", incompatibleMinReqKey))...) - } else { - c.recorder.Publish(disruptionevents.Unconsolidatable(candidates[0].Node, candidates[0].NodeClaim, "Can't replace spot node with a cheaper spot node")...) - } + c.recorder.Publish(disruptionevents.Unconsolidatable(candidates[0].Node, candidates[0].NodeClaim, "Can't replace with a cheaper node")...) } - // no instance types remain after filtering by price return Command{}, pscheduling.Results{}, nil } @@ -278,8 +274,9 @@ func (c *consolidation) computeSpotToSpotConsolidation(ctx context.Context, cand // If we had restricted instance types to min flexibility at launch at step (1) i.e CreateInstanceFromTypes(A,B,C), we would have received the instance type part of the list preventing immediate consolidation. // Taking this to 15 types, we need to only send the 15 cheapest types in the CreateInstanceFromTypes call so that the resulting instance is always in that set of 15 and we won’t immediately consolidate. if results.NewNodeClaims[0].Requirements.HasMinValues() { - // Here we are trying to get the max of the minimum instances required to satify the minimum requirement and the default 15 to cap the instances for spot-to-spot consolidation. - results.NewNodeClaims[0].NodeClaimTemplate.InstanceTypeOptions = lo.Slice(results.NewNodeClaims[0].NodeClaimTemplate.InstanceTypeOptions, 0, lo.Max([]int{MinInstanceTypesForSpotToSpotConsolidation, numInstanceTypes})) + // Here we are trying to get the max of the minimum instances required to satisfy the minimum requirement and the default 15 to cap the instances for spot-to-spot consolidation. + minInstanceTypes, _ := results.NewNodeClaims[0].NodeClaimTemplate.InstanceTypeOptions.SatisfiesMinValues(results.NewNodeClaims[0].Requirements) + results.NewNodeClaims[0].NodeClaimTemplate.InstanceTypeOptions = lo.Slice(results.NewNodeClaims[0].NodeClaimTemplate.InstanceTypeOptions, 0, lo.Max([]int{MinInstanceTypesForSpotToSpotConsolidation, minInstanceTypes})) } else { results.NewNodeClaims[0].NodeClaimTemplate.InstanceTypeOptions = lo.Slice(results.NewNodeClaims[0].NodeClaimTemplate.InstanceTypeOptions, 0, MinInstanceTypesForSpotToSpotConsolidation) } diff --git a/pkg/controllers/disruption/helpers.go b/pkg/controllers/disruption/helpers.go index 873253ea6c..a3701f6e28 100644 --- a/pkg/controllers/disruption/helpers.go +++ b/pkg/controllers/disruption/helpers.go @@ -167,28 +167,25 @@ func GetPodEvictionCost(ctx context.Context, p *v1.Pod) float64 { return clamp(-10.0, cost, 10.0) } -// filterByPriceWithMinValues returns the instanceTypes that are lower priced than the current candidate and iterates over the cumulative minimum requirement of the InstanceTypeOptions to see if it meets the minValues of requirements. -// The minValues requirement is checked again after filterByPrice as it may result in more constrained InstanceTypeOptions for a NodeClaim -func filterByPriceWithMinValues(options []*cloudprovider.InstanceType, reqs scheduling.Requirements, price float64) ([]*cloudprovider.InstanceType, string, int) { - var result []*cloudprovider.InstanceType - +// filterByPrice returns the instanceTypes that are lower priced than the current candidate. +// The minValues requirement is checked again after filterByPrice as it may result in more constrained InstanceTypeOptions for a NodeClaim. +// If the result of the filtering means that minValues can't be satisfied, we return an error. +func filterByPrice(options []*cloudprovider.InstanceType, reqs scheduling.Requirements, price float64) ([]*cloudprovider.InstanceType, error) { + var res cloudprovider.InstanceTypes for _, it := range options { launchPrice := worstLaunchPrice(it.Offerings.Available(), reqs) if launchPrice < price { - result = append(result, it) + res = append(res, it) } } - var incompatibleReqKey string - var numInstanceTypes int - // Only try to find the incompatible minValue requirement key if requirements have minValues. if reqs.HasMinValues() { - // We would have already filtered the invalid nodeclaim not meeting the minimum requirements in simulated scheduling results. + // We would have already filtered the invalid NodeClaim not meeting the minimum requirements in simulated scheduling results. // Here the instanceTypeOptions changed again based on the price and requires re-validation. - incompatibleReqKey, numInstanceTypes = pscheduling.IncompatibleReqAcrossInstanceTypes(reqs, lo.Slice(result, 0, pscheduling.MaxInstanceTypes)) + if _, err := res.SatisfiesMinValues(reqs); err != nil { + return nil, fmt.Errorf("validating minValues, %w", err) + } } - // If minValues is NOT met for any of the requirement across InstanceTypes, then return empty InstanceTypeOptions as we cannot launch with the remaining InstanceTypes. - result = lo.Ternary(len(incompatibleReqKey) > 0, []*cloudprovider.InstanceType{}, result) - return result, incompatibleReqKey, numInstanceTypes + return res, nil } func disruptionCost(ctx context.Context, pods []*v1.Pod) float64 { diff --git a/pkg/controllers/disruption/multinodeconsolidation.go b/pkg/controllers/disruption/multinodeconsolidation.go index 89dc95f63e..11fed44e79 100644 --- a/pkg/controllers/disruption/multinodeconsolidation.go +++ b/pkg/controllers/disruption/multinodeconsolidation.go @@ -136,8 +136,8 @@ func (m *MultiNodeConsolidation) firstNConsolidationOption(ctx context.Context, // required replacementHasValidInstanceTypes := false if cmd.Action() == ReplaceAction { - cmd.replacements[0].InstanceTypeOptions = filterOutSameType(cmd.replacements[0], candidatesToConsolidate) - replacementHasValidInstanceTypes = len(cmd.replacements[0].InstanceTypeOptions) > 0 + cmd.replacements[0].InstanceTypeOptions, err = filterOutSameType(cmd.replacements[0], candidatesToConsolidate) + replacementHasValidInstanceTypes = len(cmd.replacements[0].InstanceTypeOptions) > 0 && err == nil } // replacementHasValidInstanceTypes will be false if the replacement action has valid instance types remaining after filtering. @@ -169,7 +169,7 @@ func (m *MultiNodeConsolidation) firstNConsolidationOption(ctx context.Context, // This code sees that t3a.small is the cheapest type in both lists and filters it and anything more expensive out // leaving the valid consolidation: // NodeClaims=[t3a.2xlarge, t3a.2xlarge, t3a.small] -> 1 of t3a.nano -func filterOutSameType(newNodeClaim *scheduling.NodeClaim, consolidate []*Candidate) []*cloudprovider.InstanceType { +func filterOutSameType(newNodeClaim *scheduling.NodeClaim, consolidate []*Candidate) ([]*cloudprovider.InstanceType, error) { existingInstanceTypes := sets.New[string]() pricesByInstanceType := map[string]float64{} @@ -200,12 +200,7 @@ func filterOutSameType(newNodeClaim *scheduling.NodeClaim, consolidate []*Candid } } } - // computeConsolidation would have thrown error related to `incompatibleRequirementKey` if minValues from requirements is not satisfied by the InstanceTypeOptions before we get to `filterOutSameType`. - // And we re-run the check here. The instanceTypeOptions we pass here should already have been sorted by price during computeConsolidation and if the minValues is not satisfied after `filterOutSameType`, we return empty InstanceTypeOptions thereby preventing consolidation. - // We do not use the incompatiblekey, numInstanceTypes from here as part of the messaging since this runs multiple times depending on the number of nodes in the cluster and we - // already have one messaging from the computeConsolidation if minValues is not met from the requirements. - filterByPrice, _, _ := filterByPriceWithMinValues(newNodeClaim.InstanceTypeOptions, newNodeClaim.Requirements, maxPrice) - return filterByPrice + return filterByPrice(newNodeClaim.InstanceTypeOptions, newNodeClaim.Requirements, maxPrice) } func (m *MultiNodeConsolidation) Type() string { diff --git a/pkg/controllers/provisioning/scheduling/instance_selection_test.go b/pkg/controllers/provisioning/scheduling/instance_selection_test.go index 06901dec7b..d086d90aaf 100644 --- a/pkg/controllers/provisioning/scheduling/instance_selection_test.go +++ b/pkg/controllers/provisioning/scheduling/instance_selection_test.go @@ -895,7 +895,7 @@ var _ = Describe("Instance Type Selection", func() { ExpectApplied(ctx, env.Client, pod2) results, _ := prov.Schedule(ctx) for _, v := range results.PodErrors { - Expect(v.Error()).To(ContainSubstring("minValues requirement is not met for karpenter/numerical-value")) + Expect(v.Error()).To(ContainSubstring(`minValues requirement is not met for "karpenter/numerical-value"`)) } ExpectNotScheduled(ctx, env.Client, pod1) ExpectNotScheduled(ctx, env.Client, pod2) diff --git a/pkg/controllers/provisioning/scheduling/nodeclaim.go b/pkg/controllers/provisioning/scheduling/nodeclaim.go index 5495393fe0..f73351f998 100644 --- a/pkg/controllers/provisioning/scheduling/nodeclaim.go +++ b/pkg/controllers/provisioning/scheduling/nodeclaim.go @@ -21,10 +21,8 @@ import ( "strings" "sync/atomic" - "github.com/samber/lo" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" - "k8s.io/apimachinery/pkg/util/sets" "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "sigs.k8s.io/karpenter/pkg/cloudprovider" @@ -145,7 +143,7 @@ func InstanceTypeList(instanceTypeOptions []*cloudprovider.InstanceType) string } type filterResults struct { - remaining []*cloudprovider.InstanceType + remaining cloudprovider.InstanceTypes // Each of these three flags indicates if that particular criteria was met by at least one instance type requirementsMet bool fits bool @@ -156,9 +154,9 @@ type filterResults struct { // requirementsAndOffering indicates if a single instance type met the scheduling requirements and was a required offering requirementsAndOffering bool // fitsAndOffering indicates if a single instance type had enough resources and was a required offering - fitsAndOffering bool - requirementIncompatibleWithMinValues string - requests v1.ResourceList + fitsAndOffering bool + minValuesIncompatibleErr error + requests v1.ResourceList } // FailureReason returns a presentable string explaining why all instance types were filtered out @@ -170,8 +168,8 @@ func (r filterResults) FailureReason() string { } // minValues is specified in the requirements and is not met - if len(r.requirementIncompatibleWithMinValues) > 0 { - return "minValues requirement is not met for " + r.requirementIncompatibleWithMinValues + if r.minValuesIncompatibleErr != nil { + return r.minValuesIncompatibleErr.Error() } // no instance type met any of the three criteria, meaning each criteria was enough to completely prevent @@ -267,10 +265,12 @@ func filterInstanceTypesByRequirements(instanceTypes []*cloudprovider.InstanceTy } if requirements.HasMinValues() { // We don't care about the minimum number of instance types that meet our requirements here, we only care if they meet our requirements. - results.requirementIncompatibleWithMinValues, _ = IncompatibleReqAcrossInstanceTypes(requirements, results.remaining) + _, results.minValuesIncompatibleErr = results.remaining.SatisfiesMinValues(requirements) + if results.minValuesIncompatibleErr != nil { + // If minValues is NOT met for any of the requirement across InstanceTypes, then return empty InstanceTypeOptions as we cannot launch with the remaining InstanceTypes. + results.remaining = nil + } } - // If minValues is NOT met for any of the requirement across InstanceTypes, then return empty InstanceTypeOptions as we cannot launch with the remaining InstanceTypes. - results.remaining = lo.Ternary(len(results.requirementIncompatibleWithMinValues) > 0, []*cloudprovider.InstanceType{}, results.remaining) return results } @@ -291,67 +291,3 @@ func hasOffering(instanceType *cloudprovider.InstanceType, requirements scheduli } return false } - -func IncompatibleReqAcrossInstanceTypes(requirements scheduling.Requirements, instanceTypes cloudprovider.InstanceTypes) (string, int) { - // cumulativeMinRequirementsFromInstanceTypes is a map for the requirement key with the cumulative values that has minValues supported across InstanceTypeOptions - // and later fetch the invalid requirement key from the result map. - // For example: - // NodePool requirement: - // - key: node.kubernetes.io/instance-type - // operator: In - // values: ["c4.large","c4.xlarge","c5.large","c5.xlarge","m4.large","m4.xlarge"] - // minValues: 3 - // - key: karpenter.k8s.aws/instance-family - // operator: In - // values: ["c4","c5","m4"] - // minValues: 3 - // - // And if NodeClaim has InstanceTypeOptions: ["c4.large","c5.xlarge","m4.2xlarge"], it PASSES the requirements - // - // we get the map as : { - // node.kubernetes.io/instance-type: ["c4.large","c5.xlarge","m4.2xlarge"], - // karpenter.k8s.aws/instance-family: ["c4","c5","m4"] - // } - // so, returns empty key. - // - // And if NodeClaim has InstanceTypeOptions: ["c4.large","c4.xlarge","c5.2xlarge"], it FAILS the requirements - // - // we get the map as : { - // node.kubernetes.io/instance-type: ["c4.large","c4.xlarge","c5.2xlarge"], - // karpenter.k8s.aws/instance-family: ["c4","c5"] // minimum requirement failed for this. - // } - // so, returns "karpenter.k8s.aws/instance-family" - // Key -> requirement key supporting MinValues - // value -> cumulative set of values for the key from all the instanceTypes - cumulativeMinRequirementsFromInstanceTypes := make(map[string]sets.Set[string]) - // We validate if sorting by price and truncating the number of instance types to 100 breaks the minValue requirement since we pass the same bounded request to the Launch API. - // If minValue requirement fails, we return the incompatible key. - var incompatibleKey string - for i, it := range instanceTypes { - for _, req := range requirements { - if req.MinValues != nil { - if _, ok := cumulativeMinRequirementsFromInstanceTypes[req.Key]; !ok { - cumulativeMinRequirementsFromInstanceTypes[req.Key] = sets.Set[string]{} - } - cumulativeMinRequirementsFromInstanceTypes[req.Key] = - cumulativeMinRequirementsFromInstanceTypes[req.Key].Insert(it.Requirements.Get(req.Key).Values()...) - } - } - incompatibleKey = RequirementIncompatibleWithMinValues(cumulativeMinRequirementsFromInstanceTypes, requirements) - // Short-circuits the loop once all the minimum requirement is met. - if len(incompatibleKey) == 0 { - return "", i + 1 - } - } - return incompatibleKey, len(instanceTypes) -} - -func RequirementIncompatibleWithMinValues(cumulativeMinRequirementsFromInstanceTypes map[string]sets.Set[string], requirements scheduling.Requirements) string { - for key, value := range cumulativeMinRequirementsFromInstanceTypes { - // Return if any of the minvalues of requirement is not honored - if len(value) < lo.FromPtr(requirements.Get(key).MinValues) { - return key - } - } - return "" -} diff --git a/pkg/controllers/provisioning/scheduling/scheduler.go b/pkg/controllers/provisioning/scheduling/scheduler.go index d1c9324a0b..7651c2ee1f 100644 --- a/pkg/controllers/provisioning/scheduling/scheduler.go +++ b/pkg/controllers/provisioning/scheduling/scheduler.go @@ -170,34 +170,23 @@ func (r Results) NonPendingPodSchedulingErrors() string { } // TruncateInstanceTypes filters the result based on the maximum number of instanceTypes that needs -// to be considered. This could potentially impact if minValues is specified for a requirement key. So, -// this method re-evaluates the NodeClaims in the result returned by the scheduler after truncation -// and removes invalid NodeClaims, shifts the pods to errorPods so that the scheduler can re-consider those in the next iteration. This is a -// corner case where even 100 instanceTypes in the NodeClaim are failing to meet the a particular minimum requirement. +// to be considered. This filters all instance types generated in NewNodeClaims in the Results func (r Results) TruncateInstanceTypes(maxInstanceTypes int) Results { var validNewNodeClaims []*NodeClaim for _, newNodeClaim := range r.NewNodeClaims { - // The InstanceTypeOptions are truncated due to limitations in sending the number of instances to launch API which is capped to 100 today. - newNodeClaim.InstanceTypeOptions = lo.Slice(newNodeClaim.InstanceTypeOptions.OrderByPrice(newNodeClaim.NodeClaimTemplate.Requirements), 0, maxInstanceTypes) - // Only check for a validity of NodeClaim if its requirement has minValues in it. - if newNodeClaim.NodeClaimTemplate.Requirements.HasMinValues() { + // The InstanceTypeOptions are truncated due to limitations in sending the number of instances to launch API. + var err error + newNodeClaim.InstanceTypeOptions, err = newNodeClaim.InstanceTypeOptions.Truncate(newNodeClaim.Requirements, maxInstanceTypes) + if err != nil { // Check if the truncated InstanceTypeOptions in each NewNodeClaim from the results still satisfy the minimum requirements - incompatibleKey, _ := IncompatibleReqAcrossInstanceTypes(newNodeClaim.NodeClaimTemplate.Requirements, newNodeClaim.InstanceTypeOptions) - // If number of instancetypes in the nodeclaim cannot satisfy the minimum requirements, add its Pods to error map with reason. - if len(incompatibleKey) > 0 { - for _, pod := range newNodeClaim.Pods { - r.PodErrors[pod] = fmt.Errorf("pod didn’t schedule because NodePool %q couldn’t meet minValues requirements after truncating to 100 instance types", newNodeClaim.NodeClaimTemplate.NodePoolName) - } - } else { - // Add to valid nodeclaims since it meets minimum requirement. - validNewNodeClaims = append(validNewNodeClaims, newNodeClaim) + // If number of InstanceTypes in the NodeClaim cannot satisfy the minimum requirements, add its Pods to error map with reason. + for _, pod := range newNodeClaim.Pods { + r.PodErrors[pod] = fmt.Errorf("pod didn’t schedule because NodePool %q couldn’t meet minValues requirements, %w", newNodeClaim.NodeClaimTemplate.NodePoolName, err) } } else { - // NodeClaims which do not have minValues in requirement are already valid. validNewNodeClaims = append(validNewNodeClaims, newNodeClaim) } } - // Assign the new valid NodeClaims to result. r.NewNodeClaims = validNewNodeClaims return r } diff --git a/pkg/controllers/provisioning/scheduling/suite_test.go b/pkg/controllers/provisioning/scheduling/suite_test.go index 98c2a96741..2a5862e4a4 100644 --- a/pkg/controllers/provisioning/scheduling/suite_test.go +++ b/pkg/controllers/provisioning/scheduling/suite_test.go @@ -107,7 +107,7 @@ var _ = BeforeEach(func() { newCP := fake.CloudProvider{} cloudProvider.InstanceTypes, _ = newCP.GetInstanceTypes(ctx, nil) cloudProvider.CreateCalls = nil - scheduling.MaxInstanceTypes = 100 + scheduling.MaxInstanceTypes = 60 }) var _ = AfterEach(func() {