Skip to content

Commit

Permalink
Merge pull request #4794 from marwanad/azure-scale-set-cherry-picks-1.23
Browse files Browse the repository at this point in the history
Cherry-pick #4685, #47874 - Azure vmss cache improvements
  • Loading branch information
k8s-ci-robot authored Apr 7, 2022
2 parents 009d47f + 62b554c commit 9efb637
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,7 @@ func TestGetFilteredAutoscalingGroupsVmss(t *testing.T) {
maxSize: maxVal,
manager: manager,
curSize: 3,
sizeRefreshPeriod: manager.azureCache.refreshInterval,
instancesRefreshPeriod: defaultVmssInstancesRefreshPeriod,
}}
assert.True(t, assert.ObjectsAreEqualValues(expectedAsgs, asgs), "expected %#v, but found: %#v", expectedAsgs, asgs)
Expand Down
38 changes: 25 additions & 13 deletions cluster-autoscaler/cloudprovider/azure/azure_scale_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ import (

"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2020-12-01/compute"
"github.com/Azure/go-autorest/autorest/azure"
"github.com/Azure/go-autorest/autorest/to"
)

var (
Expand All @@ -53,6 +52,9 @@ type ScaleSet struct {
sizeMutex sync.Mutex
curSize int64

lastSizeRefresh time.Time
sizeRefreshPeriod time.Duration

instancesRefreshPeriod time.Duration
instancesRefreshJitter int

Expand All @@ -67,11 +69,11 @@ func NewScaleSet(spec *dynamic.NodeGroupSpec, az *AzureManager, curSize int64) (
azureRef: azureRef{
Name: spec.Name,
},
minSize: spec.MinSize,
maxSize: spec.MaxSize,
manager: az,
curSize: curSize,

minSize: spec.MinSize,
maxSize: spec.MaxSize,
manager: az,
curSize: curSize,
sizeRefreshPeriod: az.azureCache.refreshInterval,
instancesRefreshJitter: az.config.VmssVmsCacheJitter,
}

Expand Down Expand Up @@ -140,18 +142,17 @@ func (scaleSet *ScaleSet) getCurSize() (int64, error) {
scaleSet.sizeMutex.Lock()
defer scaleSet.sizeMutex.Unlock()

if scaleSet.lastSizeRefresh.Add(scaleSet.sizeRefreshPeriod).After(time.Now()) {
klog.V(3).Infof("VMSS: %s, returning in-memory size: %d", scaleSet.Name, scaleSet.curSize)
return scaleSet.curSize, nil
}

set, err := scaleSet.getVMSSFromCache()
if err != nil {
klog.Errorf("failed to get information for VMSS: %s, error: %v", scaleSet.Name, err)
return -1, err
}

// If VMSS state is updating, return the currentSize which would've been proactively incremented or decremented by CA
if set.VirtualMachineScaleSetProperties != nil && strings.EqualFold(to.String(set.VirtualMachineScaleSetProperties.ProvisioningState), string(compute.ProvisioningStateUpdating)) {
klog.V(3).Infof("VMSS %q is in updating state, returning cached size: %d", scaleSet.Name, scaleSet.curSize)
return scaleSet.curSize, nil
}

vmssSizeMutex.Lock()
curSize := *set.Sku.Capacity
vmssSizeMutex.Unlock()
Expand All @@ -161,9 +162,10 @@ func (scaleSet *ScaleSet) getCurSize() (int64, error) {
klog.V(5).Infof("VMSS %q size changed from: %d to %d, invalidating instance cache", scaleSet.Name, scaleSet.curSize, curSize)
scaleSet.invalidateInstanceCache()
}
klog.V(3).Infof("VMSS: %s, previous size: %d, new size: %d", scaleSet.Name, scaleSet.curSize, curSize)
klog.V(3).Infof("VMSS: %s, in-memory size: %d, new size: %d", scaleSet.Name, scaleSet.curSize, curSize)

scaleSet.curSize = curSize
scaleSet.lastSizeRefresh = time.Now()
return scaleSet.curSize, nil
}

Expand Down Expand Up @@ -194,6 +196,7 @@ func (scaleSet *ScaleSet) updateVMSSCapacity(future *azure.Future) {
if err != nil {
klog.Errorf("Failed to update the capacity for vmss %s with error %v, invalidate the cache so as to get the real size from API", scaleSet.Name, err)
// Invalidate the VMSS size cache in order to fetch the size from the API.
scaleSet.invalidateLastSizeRefreshWithLock()
scaleSet.manager.invalidateCache()
}
}()
Expand Down Expand Up @@ -247,6 +250,7 @@ func (scaleSet *ScaleSet) SetScaleSetSize(size int64) error {

// Proactively set the VMSS size so autoscaler makes better decisions.
scaleSet.curSize = size
scaleSet.lastSizeRefresh = time.Now()

go scaleSet.updateVMSSCapacity(future)
return nil
Expand Down Expand Up @@ -405,6 +409,7 @@ func (scaleSet *ScaleSet) DeleteInstances(instances []*azureRef, hasUnregistered
if !hasUnregisteredNodes {
scaleSet.sizeMutex.Lock()
scaleSet.curSize -= int64(len(instanceIDs))
scaleSet.lastSizeRefresh = time.Now()
scaleSet.sizeMutex.Unlock()
}

Expand Down Expand Up @@ -567,6 +572,7 @@ func (scaleSet *ScaleSet) setInstanceStatusByProviderID(providerID string, statu
scaleSet.instanceCache[k].Status = &status
}
}
scaleSet.lastInstanceRefresh = time.Now()
}

// instanceStatusFromVM converts the VM provisioning state to cloudprovider.InstanceStatus
Expand Down Expand Up @@ -594,3 +600,9 @@ func (scaleSet *ScaleSet) invalidateInstanceCache() {
scaleSet.lastInstanceRefresh = time.Now().Add(-1 * scaleSet.instancesRefreshPeriod)
scaleSet.instanceMutex.Unlock()
}

func (scaleSet *ScaleSet) invalidateLastSizeRefreshWithLock() {
scaleSet.sizeMutex.Lock()
scaleSet.lastSizeRefresh = time.Now().Add(-1 * scaleSet.sizeRefreshPeriod)
scaleSet.sizeMutex.Unlock()
}

0 comments on commit 9efb637

Please sign in to comment.