Skip to content

Commit

Permalink
Merge pull request #6447 from Jont828/edge-zone
Browse files Browse the repository at this point in the history
Azure: add support for edge zones
  • Loading branch information
k8s-ci-robot authored Mar 17, 2024
2 parents 109998d + e8ca5fd commit 4bf83f1
Show file tree
Hide file tree
Showing 8 changed files with 217 additions and 25 deletions.
1 change: 1 addition & 0 deletions charts/cluster-autoscaler/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,7 @@ vpa:
| awsSecretAccessKey | string | `""` | AWS access secret key ([if AWS user keys used](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/aws/README.md#using-aws-credentials)) |
| azureClientID | string | `""` | Service Principal ClientID with contributor permission to Cluster and Node ResourceGroup. Required if `cloudProvider=azure` |
| azureClientSecret | string | `""` | Service Principal ClientSecret with contributor permission to Cluster and Node ResourceGroup. Required if `cloudProvider=azure` |
| azureEnableForceDelete | bool | `false` | Whether to force delete VMs or VMSS instances when scaling down. |
| azureResourceGroup | string | `""` | Azure resource group that the cluster is located. Required if `cloudProvider=azure` |
| azureSubscriptionID | string | `""` | Azure subscription where the resources are located. Required if `cloudProvider=azure` |
| azureTenantID | string | `""` | Azure tenant where the resources are located. Required if `cloudProvider=azure` |
Expand Down
2 changes: 2 additions & 0 deletions charts/cluster-autoscaler/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,8 @@ spec:
secretKeyRef:
key: VMType
name: {{ default (include "cluster-autoscaler.fullname" .) .Values.secretKeyRefNameOverride }}
- name: AZURE_ENABLE_FORCE_DELETE
value: "{{ .Values.azureEnableForceDelete }}"
{{- if .Values.azureUseWorkloadIdentityExtension }}
- name: ARM_USE_WORKLOAD_IDENTITY_EXTENSION
value: "true"
Expand Down
3 changes: 3 additions & 0 deletions charts/cluster-autoscaler/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ azureUseWorkloadIdentityExtension: false
# azureVMType -- Azure VM type.
azureVMType: "vmss"

# azureEnableForceDelete -- Whether to force delete VMs or VMSS instances when scaling down.
azureEnableForceDelete: false

# cloudConfigPath -- Configuration file for cloud provider.
cloudConfigPath: ""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ func newTestAzureManager(t *testing.T) *AzureManager {
VMType: vmTypeVMSS,
MaxDeploymentsCount: 2,
Deployment: "deployment",
EnableForceDelete: true,
},
azClient: &azClient{
virtualMachineScaleSetsClient: mockVMSSClient,
Expand Down
10 changes: 10 additions & 0 deletions cluster-autoscaler/cloudprovider/azure/azure_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ type Config struct {
CloudProviderBackoffDuration int `json:"cloudProviderBackoffDuration,omitempty" yaml:"cloudProviderBackoffDuration,omitempty"`
CloudProviderBackoffJitter float64 `json:"cloudProviderBackoffJitter,omitempty" yaml:"cloudProviderBackoffJitter,omitempty"`

// EnableForceDelete defines whether to enable force deletion on the APIs
EnableForceDelete bool `json:"enableForceDelete,omitempty" yaml:"enableForceDelete,omitempty"`

// EnableDynamicInstanceList defines whether to enable dynamic instance workflow for instance information check
EnableDynamicInstanceList bool `json:"enableDynamicInstanceList,omitempty" yaml:"enableDynamicInstanceList,omitempty"`

Expand Down Expand Up @@ -303,6 +306,13 @@ func BuildAzureConfig(configReader io.Reader) (*Config, error) {
}
}

if enableForceDelete := os.Getenv("AZURE_ENABLE_FORCE_DELETE"); enableForceDelete != "" {
cfg.EnableForceDelete, err = strconv.ParseBool(enableForceDelete)
if err != nil {
return nil, fmt.Errorf("failed to parse AZURE_ENABLE_FORCE_DELETE: %q, %v", enableForceDelete, err)
}
}

err = initializeCloudProviderRateLimitConfig(&cfg.CloudProviderRateLimitConfig)
if err != nil {
return nil, err
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,7 @@ func TestGetFilteredAutoscalingGroupsVmss(t *testing.T) {
minSize: minVal,
maxSize: maxVal,
manager: manager,
enableForceDelete: manager.config.EnableForceDelete,
curSize: 3,
sizeRefreshPeriod: manager.azureCache.refreshInterval,
instancesRefreshPeriod: defaultVmssInstancesRefreshPeriod,
Expand Down
28 changes: 26 additions & 2 deletions cluster-autoscaler/cloudprovider/azure/azure_scale_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ type ScaleSet struct {
minSize int
maxSize int

enableForceDelete bool

sizeMutex sync.Mutex
curSize int64

Expand Down Expand Up @@ -87,6 +89,7 @@ func NewScaleSet(spec *dynamic.NodeGroupSpec, az *AzureManager, curSize int64) (
sizeRefreshPeriod: az.azureCache.refreshInterval,
enableDynamicInstanceList: az.config.EnableDynamicInstanceList,
instancesRefreshJitter: az.config.VmssVmsCacheJitter,
enableForceDelete: az.config.EnableForceDelete,
}

if az.config.VmssVmsCacheTTL != 0 {
Expand Down Expand Up @@ -251,6 +254,16 @@ func (scaleSet *ScaleSet) SetScaleSetSize(size int64) error {
Sku: vmssInfo.Sku,
Location: vmssInfo.Location,
}

if vmssInfo.ExtendedLocation != nil {
op.ExtendedLocation = &compute.ExtendedLocation{
Name: vmssInfo.ExtendedLocation.Name,
Type: vmssInfo.ExtendedLocation.Type,
}

klog.V(3).Infof("Passing ExtendedLocation information if it is not nil, with Edge Zone name:(%s)", *op.ExtendedLocation.Name)
}

ctx, cancel := getContextWithTimeout(vmssContextTimeout)
defer cancel()
klog.V(3).Infof("Waiting for virtualMachineScaleSetsClient.CreateOrUpdateAsync(%s)", scaleSet.Name)
Expand Down Expand Up @@ -437,8 +450,15 @@ func (scaleSet *ScaleSet) DeleteInstances(instances []*azureRef, hasUnregistered
resourceGroup := scaleSet.manager.config.ResourceGroup

scaleSet.instanceMutex.Lock()
klog.V(3).Infof("Calling virtualMachineScaleSetsClient.DeleteInstancesAsync(%v)", requiredIds.InstanceIds)
future, rerr := scaleSet.manager.azClient.virtualMachineScaleSetsClient.DeleteInstancesAsync(ctx, resourceGroup, commonAsg.Id(), *requiredIds, false)
klog.V(3).Infof("Calling virtualMachineScaleSetsClient.DeleteInstancesAsync(%v), force delete set to %v", requiredIds.InstanceIds, scaleSet.enableForceDelete)
future, rerr := scaleSet.manager.azClient.virtualMachineScaleSetsClient.DeleteInstancesAsync(ctx, resourceGroup, commonAsg.Id(), *requiredIds, scaleSet.enableForceDelete)

if scaleSet.enableForceDelete && isOperationNotAllowed(rerr) {
klog.Infof("falling back to normal delete for instances %v for %s", requiredIds.InstanceIds, scaleSet.Name)
future, rerr = scaleSet.manager.azClient.virtualMachineScaleSetsClient.DeleteInstancesAsync(ctx, resourceGroup,
commonAsg.Id(), *requiredIds, false)
}

scaleSet.instanceMutex.Unlock()
if rerr != nil {
klog.Errorf("virtualMachineScaleSetsClient.DeleteInstancesAsync for instances %v failed: %v", requiredIds.InstanceIds, rerr)
Expand Down Expand Up @@ -746,3 +766,7 @@ func (scaleSet *ScaleSet) getOrchestrationMode() (compute.OrchestrationMode, err
}
return vmss.OrchestrationMode, nil
}

func isOperationNotAllowed(rerr *retry.Error) bool {
return rerr != nil && rerr.ServiceErrorCode() == retry.OperationNotAllowed
}
Loading

0 comments on commit 4bf83f1

Please sign in to comment.