Skip to content

Commit

Permalink
Introduce UpdateDeprecatedTemplateLabels to set beta/deprecated labels
Browse files Browse the repository at this point in the history
And at the same time only set stable labels in all buildGenericLabels
implementations.

This fixes issues when a node group has 0 nodes yet and node labels are
built using buildGenericLabels and the node-template labels.

Issues include (anti-)affinity and nodeSelectors for the given labels,
giving false-negative results for candidate nodes, which leads to ASGs
never scaling up.
  • Loading branch information
codablock committed May 3, 2021
1 parent 3c28030 commit 68489c2
Show file tree
Hide file tree
Showing 8 changed files with 51 additions and 49 deletions.
11 changes: 5 additions & 6 deletions cluster-autoscaler/cloudprovider/alicloud/alicloud_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/alicloud/alibaba-cloud-sdk-go/services/ess"
klog "k8s.io/klog/v2"
kubeletapis "k8s.io/kubelet/pkg/apis"
"math/rand"
"time"
)
Expand Down Expand Up @@ -237,13 +236,13 @@ func (m *AliCloudManager) buildNodeFromTemplate(sg *Asg, template *sgTemplate) (

func buildGenericLabels(template *sgTemplate, nodeName string) map[string]string {
result := make(map[string]string)
result[kubeletapis.LabelArch] = cloudprovider.DefaultArch
result[kubeletapis.LabelOS] = cloudprovider.DefaultOS
result[apiv1.LabelArchStable] = cloudprovider.DefaultArch
result[apiv1.LabelOSStable] = cloudprovider.DefaultOS

result[apiv1.LabelInstanceType] = template.InstanceType.instanceTypeID
result[apiv1.LabelInstanceTypeStable] = template.InstanceType.instanceTypeID

result[apiv1.LabelZoneRegion] = template.Region
result[apiv1.LabelZoneFailureDomain] = template.Zone
result[apiv1.LabelTopologyRegion] = template.Region
result[apiv1.LabelTopologyZone] = template.Zone
result[apiv1.LabelHostname] = nodeName

// append custom node labels
Expand Down
12 changes: 6 additions & 6 deletions cluster-autoscaler/cloudprovider/aws/aws_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
klog "k8s.io/klog/v2"
kubeletapis "k8s.io/kubelet/pkg/apis"
provider_aws "k8s.io/legacy-cloud-providers/aws"
)

Expand Down Expand Up @@ -389,14 +388,15 @@ func (m *AwsManager) buildNodeFromTemplate(asg *asg, template *asgTemplate) (*ap

func buildGenericLabels(template *asgTemplate, nodeName string) map[string]string {
result := make(map[string]string)

// TODO: extract it somehow
result[kubeletapis.LabelArch] = cloudprovider.DefaultArch
result[kubeletapis.LabelOS] = cloudprovider.DefaultOS
result[apiv1.LabelArchStable] = cloudprovider.DefaultArch
result[apiv1.LabelOSStable] = cloudprovider.DefaultOS

result[apiv1.LabelInstanceType] = template.InstanceType.InstanceType
result[apiv1.LabelInstanceTypeStable] = template.InstanceType.InstanceType

result[apiv1.LabelZoneRegion] = template.Region
result[apiv1.LabelZoneFailureDomain] = template.Zone
result[apiv1.LabelTopologyRegion] = template.Region
result[apiv1.LabelTopologyZone] = template.Zone
result[apiv1.LabelHostname] = nodeName
return result
}
Expand Down
5 changes: 2 additions & 3 deletions cluster-autoscaler/cloudprovider/aws/aws_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ import (
apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
kubeletapis "k8s.io/kubelet/pkg/apis"
provider_aws "k8s.io/legacy-cloud-providers/aws"
)

Expand Down Expand Up @@ -83,8 +82,8 @@ func TestBuildGenericLabels(t *testing.T) {
assert.Equal(t, "us-east-1", labels[apiv1.LabelZoneRegion])
assert.Equal(t, "sillyname", labels[apiv1.LabelHostname])
assert.Equal(t, "c4.large", labels[apiv1.LabelInstanceType])
assert.Equal(t, cloudprovider.DefaultArch, labels[kubeletapis.LabelArch])
assert.Equal(t, cloudprovider.DefaultOS, labels[kubeletapis.LabelOS])
assert.Equal(t, cloudprovider.DefaultArch, labels[apiv1.LabelArchStable])
assert.Equal(t, cloudprovider.DefaultOS, labels[apiv1.LabelOSStable])
}

func TestExtractAllocatableResourcesFromAsg(t *testing.T) {
Expand Down
12 changes: 4 additions & 8 deletions cluster-autoscaler/cloudprovider/azure/azure_template.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
cloudvolume "k8s.io/cloud-provider/volume"
"k8s.io/klog/v2"
kubeletapis "k8s.io/kubelet/pkg/apis"
"math/rand"
"regexp"
"strings"
Expand All @@ -44,24 +43,21 @@ func buildInstanceOS(template compute.VirtualMachineScaleSet) string {
func buildGenericLabels(template compute.VirtualMachineScaleSet, nodeName string) map[string]string {
result := make(map[string]string)

result[kubeletapis.LabelArch] = cloudprovider.DefaultArch
result[apiv1.LabelArchStable] = cloudprovider.DefaultArch

result[kubeletapis.LabelOS] = buildInstanceOS(template)
result[apiv1.LabelOSStable] = buildInstanceOS(template)

result[apiv1.LabelInstanceType] = *template.Sku.Name
result[apiv1.LabelZoneRegion] = strings.ToLower(*template.Location)
result[apiv1.LabelInstanceTypeStable] = *template.Sku.Name
result[apiv1.LabelTopologyRegion] = strings.ToLower(*template.Location)

if template.Zones != nil && len(*template.Zones) > 0 {
failureDomains := make([]string, len(*template.Zones))
for k, v := range *template.Zones {
failureDomains[k] = strings.ToLower(*template.Location) + "-" + v
}

result[apiv1.LabelZoneFailureDomain] = strings.Join(failureDomains[:], cloudvolume.LabelMultiZoneDelimiter)
result[apiv1.LabelTopologyZone] = strings.Join(failureDomains[:], cloudvolume.LabelMultiZoneDelimiter)
} else {
result[apiv1.LabelZoneFailureDomain] = "0"
result[apiv1.LabelTopologyZone] = "0"
}

result[apiv1.LabelHostname] = nodeName
Expand Down
13 changes: 3 additions & 10 deletions cluster-autoscaler/cloudprovider/gce/templates.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,14 @@ import (
"regexp"
"strings"

"github.com/ghodss/yaml"
gce "google.golang.org/api/compute/v1"
apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
"k8s.io/autoscaler/cluster-autoscaler/utils/units"
kubeletapis "k8s.io/kubelet/pkg/apis"

"github.com/ghodss/yaml"
klog "k8s.io/klog/v2"
)

Expand Down Expand Up @@ -265,21 +263,16 @@ func BuildGenericLabels(ref GceRef, machineType string, nodeName string, os Oper
}

// TODO: extract it somehow
result[kubeletapis.LabelArch] = cloudprovider.DefaultArch
result[apiv1.LabelArchStable] = cloudprovider.DefaultArch
result[kubeletapis.LabelOS] = string(os)
result[apiv1.LabelOSStable] = string(os)

result[apiv1.LabelInstanceType] = machineType
result[apiv1.LabelInstanceTypeStable] = machineType
ix := strings.LastIndex(ref.Zone, "-")
if ix == -1 {
return nil, fmt.Errorf("unexpected zone: %s", ref.Zone)
}
result[apiv1.LabelZoneRegion] = ref.Zone[:ix]
result[apiv1.LabelZoneRegionStable] = ref.Zone[:ix]
result[apiv1.LabelZoneFailureDomain] = ref.Zone
result[apiv1.LabelZoneFailureDomainStable] = ref.Zone
result[apiv1.LabelTopologyRegion] = ref.Zone[:ix]
result[apiv1.LabelTopologyZone] = ref.Zone
result[gceCSITopologyKeyZone] = ref.Zone
result[apiv1.LabelHostname] = nodeName
return result, nil
Expand Down
13 changes: 3 additions & 10 deletions cluster-autoscaler/cloudprovider/gce/templates_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,11 @@ import (
gpuUtils "k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
"k8s.io/autoscaler/cluster-autoscaler/utils/units"

"github.com/stretchr/testify/assert"
gce "google.golang.org/api/compute/v1"
apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
quota "k8s.io/apiserver/pkg/quota/v1"
kubeletapis "k8s.io/kubelet/pkg/apis"

"github.com/stretchr/testify/assert"
)

func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
Expand Down Expand Up @@ -228,16 +226,11 @@ func TestBuildGenericLabels(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
expectedLabels := map[string]string{
apiv1.LabelZoneRegion: "us-central1",
apiv1.LabelZoneRegionStable: "us-central1",
apiv1.LabelZoneFailureDomain: "us-central1-b",
apiv1.LabelZoneFailureDomainStable: "us-central1-b",
apiv1.LabelTopologyRegion: "us-central1",
apiv1.LabelTopologyZone: "us-central1-b",
gceCSITopologyKeyZone: "us-central1-b",
apiv1.LabelHostname: "sillyname",
apiv1.LabelInstanceType: "n1-standard-8",
apiv1.LabelInstanceTypeStable: "n1-standard-8",
kubeletapis.LabelArch: cloudprovider.DefaultArch,
kubeletapis.LabelOS: tc.expectedOsLabel,
apiv1.LabelArchStable: cloudprovider.DefaultArch,
apiv1.LabelOSStable: tc.expectedOsLabel,
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ import (
huaweicloudsdkecsmodel "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/huaweicloud/huaweicloud-sdk-go-v3/services/ecs/v2/model"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
"k8s.io/klog/v2"
kubeletapis "k8s.io/kubelet/pkg/apis"
)

// ElasticCloudServerService represents the elastic cloud server interfaces.
Expand Down Expand Up @@ -569,13 +568,13 @@ func (csm *cloudServiceManager) buildNodeFromTemplate(asgName string, template *

func buildGenericLabels(template *asgTemplate, nodeName string) map[string]string {
result := make(map[string]string)
result[kubeletapis.LabelArch] = cloudprovider.DefaultArch
result[kubeletapis.LabelOS] = cloudprovider.DefaultOS
result[apiv1.LabelArchStable] = cloudprovider.DefaultArch
result[apiv1.LabelOSStable] = cloudprovider.DefaultOS

result[apiv1.LabelInstanceType] = template.name
result[apiv1.LabelInstanceTypeStable] = template.name

result[apiv1.LabelZoneRegion] = template.region
result[apiv1.LabelZoneFailureDomain] = template.zone
result[apiv1.LabelTopologyRegion] = template.region
result[apiv1.LabelTopologyZone] = template.zone
result[apiv1.LabelHostname] = nodeName

// append custom node labels
Expand Down
23 changes: 23 additions & 0 deletions cluster-autoscaler/core/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package utils

import (
"fmt"
kubeletapis "k8s.io/kubelet/pkg/apis"
"math/rand"
"reflect"
"time"
Expand Down Expand Up @@ -172,6 +173,8 @@ func GetNodeInfoFromTemplate(nodeGroup cloudprovider.NodeGroup, daemonsets []*ap
return nil, errors.ToAutoscalerError(errors.CloudProviderError, err)
}

updateDeprecatedTemplateLabels(baseNodeInfo)

pods, err := daemonset.GetDaemonSetPodsForNode(baseNodeInfo, daemonsets, predicateChecker)
if err != nil {
return nil, errors.ToAutoscalerError(errors.InternalError, err)
Expand All @@ -188,6 +191,26 @@ func GetNodeInfoFromTemplate(nodeGroup cloudprovider.NodeGroup, daemonsets []*ap
return sanitizedNodeInfo, nil
}

// UpdateDeprecatedTemplateLabels updates beta and deprecated labels from stable labels
func updateDeprecatedTemplateLabels(nodeInfo *schedulerframework.NodeInfo) {
node := nodeInfo.Node()
if v, ok := node.ObjectMeta.Labels[apiv1.LabelArchStable]; ok {
node.ObjectMeta.Labels[kubeletapis.LabelArch] = v
}
if v, ok := node.ObjectMeta.Labels[apiv1.LabelOSStable]; ok {
node.ObjectMeta.Labels[kubeletapis.LabelOS] = v
}
if v, ok := node.ObjectMeta.Labels[apiv1.LabelInstanceTypeStable]; ok {
node.ObjectMeta.Labels[apiv1.LabelInstanceType] = v
}
if v, ok := node.ObjectMeta.Labels[apiv1.LabelTopologyRegion]; ok {
node.ObjectMeta.Labels[apiv1.LabelZoneRegion] = v
}
if v, ok := node.ObjectMeta.Labels[apiv1.LabelTopologyZone]; ok {
node.ObjectMeta.Labels[apiv1.LabelZoneFailureDomain] = v
}
}

// isVirtualNode determines if the node is created by virtual kubelet
func isVirtualNode(node *apiv1.Node) bool {
return node.ObjectMeta.Labels["type"] == "virtual-kubelet"
Expand Down

0 comments on commit 68489c2

Please sign in to comment.