Skip to content

Commit

Permalink
Merge pull request #3045 from marwanad/cherry-pick-3036-1.18
Browse files Browse the repository at this point in the history
Cherry-pick #3036: Proactively decrement scale set count during deletion operations
  • Loading branch information
k8s-ci-robot authored Apr 14, 2020
2 parents fa1f10b + 9387856 commit f6489c5
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 3 deletions.
25 changes: 22 additions & 3 deletions cluster-autoscaler/cloudprovider/azure/azure_scale_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,9 +235,7 @@ func (scaleSet *ScaleSet) updateVMSSCapacity(future *azure.Future) {
if err != nil {
klog.Errorf("Failed to update the capacity for vmss %s with error %v, invalidate the cache so as to get the real size from API", scaleSet.Name, err)
// Invalidate the VMSS size cache in order to fetch the size from the API.
scaleSet.sizeMutex.Lock()
defer scaleSet.sizeMutex.Unlock()
scaleSet.lastSizeRefresh = time.Now().Add(-1 * scaleSet.sizeRefreshPeriod)
scaleSet.invalidateStatusCacheWithLock()
}
}()

Expand Down Expand Up @@ -434,7 +432,18 @@ func (scaleSet *ScaleSet) DeleteInstances(instances []*azureRef) error {
ctx, cancel := getContextWithCancel()
defer cancel()
resourceGroup := scaleSet.manager.config.ResourceGroup

// Proactively decrement scale set size so that we don't
// go below minimum node count if cache data is stale
scaleSet.sizeMutex.Lock()
scaleSet.curSize--
scaleSet.sizeMutex.Unlock()

rerr := scaleSet.manager.azClient.virtualMachineScaleSetsClient.DeleteInstances(ctx, resourceGroup, commonAsg.Id(), *requiredIds)
if rerr != nil {
klog.Errorf("Failed to delete instances %v. Invalidating the cache to get the real scale set size", requiredIds)
scaleSet.invalidateStatusCacheWithLock()
}
return rerr.Error()
}

Expand Down Expand Up @@ -691,3 +700,13 @@ func (scaleSet *ScaleSet) invalidateInstanceCache() {
scaleSet.lastInstanceRefresh = time.Now().Add(-1 * vmssInstancesRefreshPeriod)
scaleSet.instanceMutex.Unlock()
}

func (scaleSet *ScaleSet) invalidateStatusCacheWithLock() {
scaleSet.sizeMutex.Lock()
scaleSet.lastSizeRefresh = time.Now().Add(-1 * scaleSet.sizeRefreshPeriod)
scaleSet.sizeMutex.Unlock()

scaleSetStatusCache.mutex.Lock()
scaleSetStatusCache.lastRefresh = time.Now().Add(-1 * scaleSet.sizeRefreshPeriod)
scaleSetStatusCache.mutex.Unlock()
}
12 changes: 12 additions & 0 deletions cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,20 @@ func TestDeleteNodes(t *testing.T) {
}
scaleSet, ok := provider.NodeGroups()[0].(*ScaleSet)
assert.True(t, ok)

targetSize, err := scaleSet.TargetSize()
assert.NoError(t, err)
assert.Equal(t, 3, targetSize)

// Perform the delete operation
err = scaleSet.DeleteNodes([]*apiv1.Node{node})
assert.NoError(t, err)

// Ensure the the cached size has been proactively decremented
targetSize, err = scaleSet.TargetSize()
assert.NoError(t, err)
assert.Equal(t, 2, targetSize)

scaleSetClient.AssertNumberOfCalls(t, "DeleteInstances", 1)
}

Expand Down

0 comments on commit f6489c5

Please sign in to comment.