Skip to content

Commit

Permalink
Merge pull request #7224 from k8s-infra-cherrypick-robot/cherry-pick-…
Browse files Browse the repository at this point in the history
…7208-to-cluster-autoscaler-release-1.28

[cluster-autoscaler-release-1.28] chore: defork labels and taints
  • Loading branch information
k8s-ci-robot authored Sep 4, 2024
2 parents 9277114 + ff621d5 commit 574e5e2
Show file tree
Hide file tree
Showing 3 changed files with 212 additions and 14 deletions.
4 changes: 3 additions & 1 deletion cluster-autoscaler/cloudprovider/azure/azure_scale_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,9 @@ func (scaleSet *ScaleSet) TemplateNodeInfo() (*schedulerframework.NodeInfo, erro
return nil, err
}

node, err := buildNodeFromTemplate(scaleSet.Name, template, scaleSet.manager, scaleSet.enableDynamicInstanceList)
inputLabels := map[string]string{}
inputTaints := ""
node, err := buildNodeFromTemplate(scaleSet.Name, inputLabels, inputTaints, template, scaleSet.manager, scaleSet.enableDynamicInstanceList)

if err != nil {
return nil, err
Expand Down
156 changes: 151 additions & 5 deletions cluster-autoscaler/cloudprovider/azure/azure_template.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,61 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
"k8s.io/klog/v2"
kubeletapis "k8s.io/kubelet/pkg/apis"
)

const (
azureDiskTopologyKey string = "topology.disk.csi.azure.com/zone"
// AKSLabelPrefixValue represents the constant prefix for AKSLabelKeyPrefixValue
AKSLabelPrefixValue = "kubernetes.azure.com"
// AKSLabelKeyPrefixValue represents prefix for AKS Labels
AKSLabelKeyPrefixValue = AKSLabelPrefixValue + "/"

azureDiskTopologyKey = "topology.disk.csi.azure.com/zone"
// For NP-series SKU, the xilinx device plugin uses that resource name
// https://github.com/Xilinx/FPGA_as_a_Service/tree/master/k8s-fpga-device-plugin
xilinxFpgaResourceName = "xilinx.com/fpga-xilinx_u250_gen3x16_xdma_shell_2_1-0"

// legacyPoolNameTag is the legacy tag that AKS adds to the VMSS with its value
// being the agentpool name
legacyPoolNameTag = "poolName"
// poolNameTag is the new tag that replaces the above one
// Newly created pools and clusters will have this one on the VMSS
// instead of the legacy one. We'll have to live with both tags for a while.
poolNameTag = "aks-managed-poolName"

// This is the legacy label is added by agentbaker, agentpool={poolName} and we want to predict that
// a node added to this agentpool will have this as a node label. The value is fetched
// from the VMSS tag with key poolNameTag/legacyPoolNameTag
legacyAgentPoolNodeLabelKey = "agentpool"
// New label that replaces the above
agentPoolNodeLabelKey = AKSLabelKeyPrefixValue + "agentpool"

// Storage profile node labels
legacyStorageProfileNodeLabelKey = "storageprofile"
storageProfileNodeLabelKey = AKSLabelKeyPrefixValue + "storageprofile"

// Storage tier node labels
legacyStorageTierNodeLabelKey = "storagetier"
storageTierNodeLabelKey = AKSLabelKeyPrefixValue + "storagetier"

// Fips node label
fipsNodeLabelKey = AKSLabelKeyPrefixValue + "fips_enabled"

// OS Sku node Label
osSkuLabelKey = AKSLabelKeyPrefixValue + "os-sku"

// Security node label
securityTypeLabelKey = AKSLabelKeyPrefixValue + "security-type"

customCATrustEnabledLabelKey = AKSLabelKeyPrefixValue + "custom-ca-trust-enabled"
kataMshvVMIsolationLabelKey = AKSLabelKeyPrefixValue + "kata-mshv-vm-isolation"

// Cluster node label
clusterLabelKey = AKSLabelKeyPrefixValue + "cluster"
)

func buildNodeFromTemplate(nodeGroupName string, template compute.VirtualMachineScaleSet, manager *AzureManager, enableDynamicInstanceList bool) (*apiv1.Node, error) {
func buildNodeFromTemplate(nodeGroupName string, inputLabels map[string]string, inputTaints string,
template compute.VirtualMachineScaleSet, manager *AzureManager, enableDynamicInstanceList bool) (*apiv1.Node, error) {
node := apiv1.Node{}
nodeName := fmt.Sprintf("%s-asg-%d", nodeGroupName, rand.Int63())

Expand Down Expand Up @@ -90,7 +134,9 @@ func buildNodeFromTemplate(nodeGroupName string, template compute.VirtualMachine
node.Status.Capacity[apiv1.ResourceCPU] = *resource.NewQuantity(vcpu, resource.DecimalSI)
// isNPSeries returns if a SKU is an NP-series SKU
// SKU API reports GPUs for NP-series but it's actually FPGAs
if !isNPSeries(*template.Sku.Name) {
if isNPSeries(*template.Sku.Name) {
node.Status.Capacity[xilinxFpgaResourceName] = *resource.NewQuantity(gpuCount, resource.DecimalSI)
} else {
node.Status.Capacity[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(gpuCount, resource.DecimalSI)
}

Expand All @@ -113,16 +159,78 @@ func buildNodeFromTemplate(nodeGroupName string, template compute.VirtualMachine

// GenericLabels
node.Labels = cloudprovider.JoinStringMaps(node.Labels, buildGenericLabels(template, nodeName))

// Labels from the Scale Set's Tags
node.Labels = cloudprovider.JoinStringMaps(node.Labels, extractLabelsFromScaleSet(template.Tags))
labels := make(map[string]string)

// Prefer the explicit labels in spec coming from RP over the VMSS template
if len(inputLabels) > 0 {
labels = inputLabels
} else {
labels = extractLabelsFromScaleSet(template.Tags)
}

// Add the agentpool label, its value should come from the VMSS poolName tag
// NOTE: The plan is for agentpool label to be deprecated in favor of the aks-prefixed one
// We will have to live with both labels for a while
if node.Labels[legacyPoolNameTag] != "" {
labels[legacyAgentPoolNodeLabelKey] = node.Labels[legacyPoolNameTag]
labels[agentPoolNodeLabelKey] = node.Labels[legacyPoolNameTag]
}
if node.Labels[poolNameTag] != "" {
labels[legacyAgentPoolNodeLabelKey] = node.Labels[poolNameTag]
labels[agentPoolNodeLabelKey] = node.Labels[poolNameTag]
}

// Add the storage profile and storage tier labels
if template.VirtualMachineProfile != nil && template.VirtualMachineProfile.StorageProfile != nil && template.VirtualMachineProfile.StorageProfile.OsDisk != nil {
// ephemeral
if template.VirtualMachineProfile.StorageProfile.OsDisk.DiffDiskSettings != nil && template.VirtualMachineProfile.StorageProfile.OsDisk.DiffDiskSettings.Option == compute.Local {
labels[legacyStorageProfileNodeLabelKey] = "ephemeral"
labels[storageProfileNodeLabelKey] = "ephemeral"
} else {
labels[legacyStorageProfileNodeLabelKey] = "managed"
labels[storageProfileNodeLabelKey] = "managed"
}
if template.VirtualMachineProfile.StorageProfile.OsDisk.ManagedDisk != nil {
labels[legacyStorageTierNodeLabelKey] = string(template.VirtualMachineProfile.StorageProfile.OsDisk.ManagedDisk.StorageAccountType)
labels[storageTierNodeLabelKey] = string(template.VirtualMachineProfile.StorageProfile.OsDisk.ManagedDisk.StorageAccountType)
}
// Add ephemeral-storage value
if template.VirtualMachineProfile.StorageProfile.OsDisk.DiskSizeGB != nil {
node.Status.Capacity[apiv1.ResourceEphemeralStorage] = *resource.NewQuantity(int64(int(*template.VirtualMachineProfile.StorageProfile.OsDisk.DiskSizeGB)*1024*1024*1024), resource.DecimalSI)
klog.V(4).Infof("OS Disk Size from template is: %d", *template.VirtualMachineProfile.StorageProfile.OsDisk.DiskSizeGB)
klog.V(4).Infof("Setting ephemeral storage to: %v", node.Status.Capacity[apiv1.ResourceEphemeralStorage])
}
}

// If we are on GPU-enabled SKUs, append the accelerator
// label so that CA makes better decision when scaling from zero for GPU pools
if isNvidiaEnabledSKU(*template.Sku.Name) {
labels[GPULabel] = "nvidia"
labels[legacyGPULabel] = "nvidia"
}

// Extract allocatables from tags
resourcesFromTags := extractAllocatableResourcesFromScaleSet(template.Tags)
for resourceName, val := range resourcesFromTags {
node.Status.Capacity[apiv1.ResourceName(resourceName)] = *val
}

node.Labels = cloudprovider.JoinStringMaps(node.Labels, labels)
klog.V(4).Infof("Setting node %s labels to: %s", nodeName, node.Labels)

var taints []apiv1.Taint
// Prefer the explicit taints in spec over the VMSS template
if inputTaints != "" {
taints = extractTaintsFromSpecString(inputTaints)
} else {
taints = extractTaintsFromScaleSet(template.Tags)
}

// Taints from the Scale Set's Tags
node.Spec.Taints = extractTaintsFromScaleSet(template.Tags)
node.Spec.Taints = taints
klog.V(4).Infof("Setting node %s taints to: %s", nodeName, node.Spec.Taints)

node.Status.Conditions = cloudprovider.BuildReadyConditions()
return &node, nil
Expand All @@ -140,10 +248,15 @@ func buildInstanceOS(template compute.VirtualMachineScaleSet) string {
func buildGenericLabels(template compute.VirtualMachineScaleSet, nodeName string) map[string]string {
result := make(map[string]string)

result[kubeletapis.LabelArch] = cloudprovider.DefaultArch
result[apiv1.LabelArchStable] = cloudprovider.DefaultArch

result[kubeletapis.LabelOS] = buildInstanceOS(template)
result[apiv1.LabelOSStable] = buildInstanceOS(template)

result[apiv1.LabelInstanceType] = *template.Sku.Name
result[apiv1.LabelInstanceTypeStable] = *template.Sku.Name
result[apiv1.LabelZoneRegion] = strings.ToLower(*template.Location)
result[apiv1.LabelTopologyRegion] = strings.ToLower(*template.Location)

if template.Zones != nil && len(*template.Zones) > 0 {
Expand All @@ -157,9 +270,11 @@ func buildGenericLabels(template compute.VirtualMachineScaleSet, nodeName string
//Note that the if the customer is to have some pod affinity picking exact zone, this logic won't work.
//For now, discourage the customers from using podAffinity to pick the availability zones.
randomZone := failureDomains[rand.Intn(len(failureDomains))]
result[apiv1.LabelZoneFailureDomain] = randomZone
result[apiv1.LabelTopologyZone] = randomZone
result[azureDiskTopologyKey] = randomZone
} else {
result[apiv1.LabelZoneFailureDomain] = "0"
result[apiv1.LabelTopologyZone] = "0"
result[azureDiskTopologyKey] = ""
}
Expand Down Expand Up @@ -212,6 +327,37 @@ func extractTaintsFromScaleSet(tags map[string]*string) []apiv1.Taint {
return taints
}

// Example of a valid taints string, is the same argument to kubelet's `--register-with-taints`
// "dedicated=foo:NoSchedule,group=bar:NoExecute,app=fizz:PreferNoSchedule"
func extractTaintsFromSpecString(taintsString string) []apiv1.Taint {
taints := make([]apiv1.Taint, 0)
// First split the taints at the separator
splits := strings.Split(taintsString, ",")
for _, split := range splits {
taintSplit := strings.Split(split, "=")
if len(taintSplit) != 2 {
continue
}

taintKey := taintSplit[0]
taintValue := taintSplit[1]

r, _ := regexp.Compile("(.*):(?:NoSchedule|NoExecute|PreferNoSchedule)")
if !r.MatchString(taintValue) {
continue
}

values := strings.SplitN(taintValue, ":", 2)
taints = append(taints, apiv1.Taint{
Key: taintKey,
Value: values[0],
Effect: apiv1.TaintEffect(values[1]),
})
}

return taints
}

func extractAutoscalingOptionsFromScaleSetTags(tags map[string]*string) map[string]string {
options := make(map[string]string)
for tagName, tagValue := range tags {
Expand Down
66 changes: 58 additions & 8 deletions cluster-autoscaler/cloudprovider/azure/azure_template_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package azure

import (
"fmt"
"strings"
"testing"

"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2022-08-01/compute"
Expand Down Expand Up @@ -104,6 +105,45 @@ func TestExtractTaintsFromScaleSet(t *testing.T) {
assert.Equal(t, makeTaintSet(expectedTaints), makeTaintSet(taints))
}

func TestExtractTaintsFromSpecString(t *testing.T) {
taintsString := []string{
"dedicated=foo:NoSchedule",
"group=bar:NoExecute",
"app=fizz:PreferNoSchedule",
"k8s.io/testing/underscore/to/slash=fizz:PreferNoSchedule",
"bar=baz",
"blank=",
"nosplit=some_value",
}

expectedTaints := []apiv1.Taint{
{
Key: "dedicated",
Value: "foo",
Effect: apiv1.TaintEffectNoSchedule,
},
{
Key: "group",
Value: "bar",
Effect: apiv1.TaintEffectNoExecute,
},
{
Key: "app",
Value: "fizz",
Effect: apiv1.TaintEffectPreferNoSchedule,
},
{
Key: "k8s.io/testing/underscore/to/slash",
Value: "fizz",
Effect: apiv1.TaintEffectPreferNoSchedule,
},
}

taints := extractTaintsFromSpecString(strings.Join(taintsString, ","))
assert.Len(t, taints, 4)
assert.Equal(t, makeTaintSet(expectedTaints), makeTaintSet(taints))
}

func TestExtractAllocatableResourcesFromScaleSet(t *testing.T) {
tags := map[string]*string{
fmt.Sprintf("%s%s", nodeResourcesTagName, "cpu"): to.StringPtr("100m"),
Expand All @@ -123,14 +163,6 @@ func TestExtractAllocatableResourcesFromScaleSet(t *testing.T) {
assert.Equal(t, (&exepectedCustomAllocatable).String(), labels["nvidia.com/Tesla-P100-PCIE"].String())
}

func makeTaintSet(taints []apiv1.Taint) map[apiv1.Taint]bool {
set := make(map[apiv1.Taint]bool)
for _, taint := range taints {
set[taint] = true
}
return set
}

func TestTopologyFromScaleSet(t *testing.T) {
testNodeName := "test-node"
testSkuName := "test-sku"
Expand All @@ -144,12 +176,16 @@ func TestTopologyFromScaleSet(t *testing.T) {
Location: to.StringPtr("westus"),
}
expectedZoneValues := []string{"westus-1", "westus-2", "westus-3"}

labels := buildGenericLabels(testVmss, testNodeName)
failureDomain, ok := labels[apiv1.LabelZoneFailureDomain]
assert.True(t, ok)
topologyZone, ok := labels[apiv1.LabelTopologyZone]
assert.True(t, ok)
azureDiskTopology, ok := labels[azureDiskTopologyKey]
assert.True(t, ok)

assert.Contains(t, expectedZoneValues, failureDomain)
assert.Contains(t, expectedZoneValues, topologyZone)
assert.Contains(t, expectedZoneValues, azureDiskTopology)
}
Expand All @@ -165,10 +201,16 @@ func TestEmptyTopologyFromScaleSet(t *testing.T) {
VirtualMachineProfile: &compute.VirtualMachineScaleSetVMProfile{OsProfile: nil}},
Location: to.StringPtr("westus"),
}

expectedFailureDomain := "0"
expectedTopologyZone := "0"
expectedAzureDiskTopology := ""
labels := buildGenericLabels(testVmss, testNodeName)

failureDomain, ok := labels[apiv1.LabelZoneFailureDomain]
assert.True(t, ok)
assert.Equal(t, expectedFailureDomain, failureDomain)

topologyZone, ok := labels[apiv1.LabelTopologyZone]
assert.True(t, ok)
assert.Equal(t, expectedTopologyZone, topologyZone)
Expand All @@ -177,3 +219,11 @@ func TestEmptyTopologyFromScaleSet(t *testing.T) {
assert.True(t, ok)
assert.Equal(t, expectedAzureDiskTopology, azureDiskTopology)
}

func makeTaintSet(taints []apiv1.Taint) map[apiv1.Taint]bool {
set := make(map[apiv1.Taint]bool)
for _, taint := range taints {
set[taint] = true
}
return set
}

0 comments on commit 574e5e2

Please sign in to comment.