Skip to content

Commit

Permalink
Merged PR 2119: add missing nodeLabels to be expected by CA in case o…
Browse files Browse the repository at this point in the history
…f scale-from-zero

add missing nodeLabels to be expected by CA in case of scale-from-zero

Related work items: kubernetes#1768
  • Loading branch information
gandhipr committed Mar 5, 2024
1 parent b5f86a4 commit 44b1bd4
Show file tree
Hide file tree
Showing 3 changed files with 192 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ import (

const (
// GPULabel is the label added to nodes with GPU resource.
GPULabel = "accelerator"
GPULabel = AKSLabelKeyPrefixValue + "accelerator"
legacyGPULabel = "accelerator"
)

var (
Expand Down
5 changes: 4 additions & 1 deletion cluster-autoscaler/cloudprovider/azure/azure_scale_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ type ScaleSet struct {
minSize int
maxSize int

labels map[string]string
taints string

sizeMutex sync.Mutex
curSize int64

Expand Down Expand Up @@ -513,7 +516,7 @@ func (scaleSet *ScaleSet) TemplateNodeInfo() (*schedulerframework.NodeInfo, erro
return nil, err
}

node, err := buildNodeFromTemplate(scaleSet.Name, template, scaleSet.manager)
node, err := buildNodeFromTemplate(scaleSet.Name, scaleSet.labels, scaleSet.taints, template, scaleSet.manager)
if err != nil {
return nil, err
}
Expand Down
195 changes: 186 additions & 9 deletions cluster-autoscaler/cloudprovider/azure/azure_template.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,60 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
cloudvolume "k8s.io/cloud-provider/volume"
"k8s.io/klog/v2"
"sigs.k8s.io/cloud-provider-azure/pkg/consts"
)

const (
azureDiskTopologyKey string = "topology.disk.csi.azure.com/zone"
// AKSLabelPrefixValue represents the constant prefix for AKSLabelKeyPrefixValue
AKSLabelPrefixValue = "kubernetes.azure.com"
// AKSLabelKeyPrefixValue represents prefix for AKS Labels
AKSLabelKeyPrefixValue = AKSLabelPrefixValue + "/"

azureDiskTopologyKey = "topology.disk.csi.azure.com/zone"
// For NP-series SKU, the xilinx device plugin uses that resource name
// https://github.com/Xilinx/FPGA_as_a_Service/tree/master/k8s-fpga-device-plugin
xilinxFpgaResourceName = "xilinx.com/fpga-xilinx_u250_gen3x16_xdma_shell_2_1-0"

// legacyPoolNameTag is the legacy tag that AKS adds to the VMSS with its value
// being the agentpool name
legacyPoolNameTag = "poolName"
// poolNameTag is the new tag that replaces the above one
// Newly created pools and clusters will have this one on the VMSS
// instead of the legacy one. We'll have to live with both tags for a while.
poolNameTag = "aks-managed-poolName"

// This is the legacy label is added by agentbaker, agentpool={poolName} and we want to predict that
// a node added to this agentpool will have this as a node label. The value is fetched
// from the VMSS tag with key poolNameTag/legacyPoolNameTag
legacyAgentPoolNodeLabelKey = "agentpool"
// New label that replaces the above
agentPoolNodeLabelKey = "kubernetes.azure.com/agentpool"

// Storage profile node labels
legacyStorageProfileNodeLabelKey = "storageprofile"
storageProfileNodeLabelKey = "kubernetes.azure.com/storageprofile"

// Storage tier node labels
legacyStorageTierNodeLabelKey = "storagetier"
storageTierNodeLabelKey = "kubernetes.azure.com/storagetier"

// Fips node label
fipsNodeLabelKey = AKSLabelKeyPrefixValue + "fips_enabled"

// OS Sku node Label
osSkuLabelKey = AKSLabelKeyPrefixValue + "os-sku"

// Security node label
securityTypeLabelKey = AKSLabelKeyPrefixValue + "security-type"

// Labels defined in RP
// Since Cluster autoscaler cannot import RP, it is defined here.
// https://msazure.visualstudio.com/CloudNativeCompute/_git/aks-rp?path=/toolkit/constvalues/k8slabels/labels.go
customCATrustEnabledLabelKey = AKSLabelKeyPrefixValue + "custom-ca-trust-enabled"
kataMshvVMIsolationLabelKey = AKSLabelKeyPrefixValue + "kata-mshv-vm-isolation"

// Cluster node label
clusterLabelKey = AKSLabelKeyPrefixValue + "cluster"
)

func buildInstanceOS(template compute.VirtualMachineScaleSet) string {
Expand Down Expand Up @@ -73,7 +123,8 @@ func buildGenericLabels(template compute.VirtualMachineScaleSet, nodeName string
return result
}

func buildNodeFromTemplate(scaleSetName string, template compute.VirtualMachineScaleSet, manager *AzureManager) (*apiv1.Node, error) {
func buildNodeFromTemplate(scaleSetName string, inputLabels map[string]string, inputTaints string,
template compute.VirtualMachineScaleSet, manager *AzureManager) (*apiv1.Node, error) {
node := apiv1.Node{}
nodeName := fmt.Sprintf("%s-asg-%d", scaleSetName, rand.Int63())

Expand Down Expand Up @@ -128,11 +179,6 @@ func buildNodeFromTemplate(scaleSetName string, template compute.VirtualMachineS

node.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(memoryMb*1024*1024, resource.DecimalSI)

resourcesFromTags := extractAllocatableResourcesFromScaleSet(template.Tags)
for resourceName, val := range resourcesFromTags {
node.Status.Capacity[apiv1.ResourceName(resourceName)] = *val
}

// TODO: set real allocatable.
node.Status.Allocatable = node.Status.Capacity

Expand All @@ -151,10 +197,110 @@ func buildNodeFromTemplate(scaleSetName string, template compute.VirtualMachineS
// GenericLabels
node.Labels = cloudprovider.JoinStringMaps(node.Labels, buildGenericLabels(template, nodeName))
// Labels from the Scale Set's Tags
node.Labels = cloudprovider.JoinStringMaps(node.Labels, extractLabelsFromScaleSet(template.Tags))

labels := make(map[string]string)

// Prefer the explicit labels in spec coming from RP over the VMSS template
if len(inputLabels) > 0 {
labels = inputLabels
} else {
labels = extractLabelsFromScaleSet(template.Tags)
}

// Add the agentpool label, its value should come from the VMSS poolName tag
// NOTE: The plan is for agentpool label to be deprecated in favor of the aks-prefixed one
// We will have to live with both labels for a while
if node.Labels[legacyPoolNameTag] != "" {
labels[legacyAgentPoolNodeLabelKey] = node.Labels[legacyPoolNameTag]
labels[agentPoolNodeLabelKey] = node.Labels[legacyPoolNameTag]
}
if node.Labels[poolNameTag] != "" {
labels[legacyAgentPoolNodeLabelKey] = node.Labels[poolNameTag]
labels[agentPoolNodeLabelKey] = node.Labels[poolNameTag]
}

// Add node-role label
if node.Labels[consts.NodeLabelRole] != "" {
labels[consts.NodeLabelRole] = node.Labels[consts.NodeLabelRole]
}

if node.Labels[fipsNodeLabelKey] != "" {
labels[fipsNodeLabelKey] = node.Labels[fipsNodeLabelKey]
}

if node.Labels[osSkuLabelKey] != "" {
labels[osSkuLabelKey] = node.Labels[osSkuLabelKey]
}

if node.Labels[securityTypeLabelKey] != "" {
labels[securityTypeLabelKey] = node.Labels[securityTypeLabelKey]
}

if node.Labels[customCATrustEnabledLabelKey] != "" {
labels[customCATrustEnabledLabelKey] = node.Labels[customCATrustEnabledLabelKey]
}

if node.Labels[kataMshvVMIsolationLabelKey] != "" {
labels[kataMshvVMIsolationLabelKey] = node.Labels[kataMshvVMIsolationLabelKey]
}

if node.Labels[clusterLabelKey] != "" {
labels[clusterLabelKey] = node.Labels[clusterLabelKey]
}

// Add the storage profile and storage tier labels
if template.VirtualMachineProfile != nil && template.VirtualMachineProfile.StorageProfile != nil &&
template.VirtualMachineProfile.StorageProfile.OsDisk != nil {
// ephemeral
if template.VirtualMachineProfile.StorageProfile.OsDisk.DiffDiskSettings != nil &&
template.VirtualMachineProfile.StorageProfile.OsDisk.DiffDiskSettings.Option == compute.Local {
labels[legacyStorageProfileNodeLabelKey] = "ephemeral"
labels[storageProfileNodeLabelKey] = "ephemeral"
} else {
labels[legacyStorageProfileNodeLabelKey] = "managed"
labels[storageProfileNodeLabelKey] = "managed"
}
if template.VirtualMachineProfile.StorageProfile.OsDisk.ManagedDisk != nil {
labels[legacyStorageTierNodeLabelKey] = string(
template.VirtualMachineProfile.StorageProfile.OsDisk.ManagedDisk.StorageAccountType)
labels[storageTierNodeLabelKey] = string(
template.VirtualMachineProfile.StorageProfile.OsDisk.ManagedDisk.StorageAccountType)
}
// Add ephemeral-storage value
if template.VirtualMachineProfile.StorageProfile.OsDisk.DiskSizeGB != nil {
node.Status.Capacity[apiv1.ResourceEphemeralStorage] = *resource.NewQuantity(int64(int(*template.VirtualMachineProfile.StorageProfile.OsDisk.DiskSizeGB)*1024*1024*1024), resource.DecimalSI)
klog.V(4).Infof("OS Disk Size from template is: %d", *template.VirtualMachineProfile.StorageProfile.OsDisk.DiskSizeGB)
klog.V(4).Infof("Setting ephemeral storage to: %v", node.Status.Capacity[apiv1.ResourceEphemeralStorage])
}
}

// If we are on GPU-enabled SKUs, append the accelerator
// label so that CA makes better decision when scaling from zero for GPU pools
if isNvidiaEnabledSKU(*template.Sku.Name) {
labels[GPULabel] = "nvidia"
labels[legacyGPULabel] = "nvidia"
}

// Extract allocatables from tags
resourcesFromTags := extractAllocatableResourcesFromScaleSet(template.Tags)
for resourceName, val := range resourcesFromTags {
node.Status.Capacity[apiv1.ResourceName(resourceName)] = *val
}

node.Labels = cloudprovider.JoinStringMaps(node.Labels, labels)
klog.V(4).Infof("Setting node %s labels to: %s", nodeName, node.Labels)

var taints []apiv1.Taint
// Prefer the explicit taints in spec over the VMSS template
if len(inputTaints) > 0 {
taints = extractTaintsFromSpecString(inputTaints)
} else {
taints = extractTaintsFromScaleSet(template.Tags)
}

// Taints from the Scale Set's Tags
node.Spec.Taints = extractTaintsFromScaleSet(template.Tags)
node.Spec.Taints = taints
klog.V(4).Infof("Setting node %s taints to: %s", nodeName, node.Spec.Taints)

node.Status.Conditions = cloudprovider.BuildReadyConditions()
return &node, nil
Expand All @@ -177,6 +323,37 @@ func extractLabelsFromScaleSet(tags map[string]*string) map[string]string {
return result
}

// Example of a valid taints string, is the same argument to kubelet's `--register-with-taints`
// "dedicated=foo:NoSchedule,group=bar:NoExecute,app=fizz:PreferNoSchedule"
func extractTaintsFromSpecString(taintsString string) []apiv1.Taint {
taints := make([]apiv1.Taint, 0)
// First split the taints at the separator
splits := strings.Split(taintsString, ",")
for _, split := range splits {
taintSplit := strings.Split(split, "=")
if len(taintSplit) != 2 {
continue
}

taintKey := taintSplit[0]
taintValue := taintSplit[1]

r, _ := regexp.Compile("(.*):(?:NoSchedule|NoExecute|PreferNoSchedule)")
if !r.MatchString(taintValue) {
continue
}

values := strings.SplitN(taintValue, ":", 2)
taints = append(taints, apiv1.Taint{
Key: taintKey,
Value: values[0],
Effect: apiv1.TaintEffect(values[1]),
})
}

return taints
}

func extractTaintsFromScaleSet(tags map[string]*string) []apiv1.Taint {
taints := make([]apiv1.Taint, 0)

Expand Down

0 comments on commit 44b1bd4

Please sign in to comment.