Skip to content

Commit

Permalink
Merge pull request #3292 from marwanad/1.18-cherry-pick-3221-and-3284
Browse files Browse the repository at this point in the history
Cherry-pick #3221, #3284 - Async Deletions and 409 Conflicts
  • Loading branch information
k8s-ci-robot authored Jul 6, 2020
2 parents 0470c39 + 8476c5d commit fd1a370
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 13 deletions.
8 changes: 8 additions & 0 deletions cluster-autoscaler/cloudprovider/azure/azure_fakes.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,14 @@ func (client *VirtualMachineScaleSetsClientMock) DeleteInstances(ctx context.Con
return nil
}

// DeleteInstancesAsync deletes a set of instances for specified VirtualMachineScaleSet and returns the future
func (client *VirtualMachineScaleSetsClientMock) DeleteInstancesAsync(ctx context.Context, resourceGroupName string, vmScaleSetName string, vmInstanceIDs compute.VirtualMachineScaleSetVMInstanceRequiredIDs) (*azure.Future, *retry.Error) {
client.mutex.Lock()
defer client.mutex.Unlock()
client.Called(resourceGroupName, vmScaleSetName, vmInstanceIDs)
return nil, nil
}

// List gets a list of VirtualMachineScaleSets.
func (client *VirtualMachineScaleSetsClientMock) List(ctx context.Context, resourceGroupName string) (result []compute.VirtualMachineScaleSet, rerr *retry.Error) {
client.mutex.Lock()
Expand Down
29 changes: 24 additions & 5 deletions cluster-autoscaler/cloudprovider/azure/azure_scale_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -430,18 +430,37 @@ func (scaleSet *ScaleSet) DeleteInstances(instances []*azureRef) error {
defer cancel()
resourceGroup := scaleSet.manager.config.ResourceGroup

scaleSet.instanceMutex.Lock()
klog.V(3).Infof("Calling virtualMachineScaleSetsClient.DeleteInstancesAsync(%v)", requiredIds.InstanceIds)
future, rerr := scaleSet.manager.azClient.virtualMachineScaleSetsClient.DeleteInstancesAsync(ctx, resourceGroup, commonAsg.Id(), *requiredIds)
if rerr != nil {
klog.Errorf("virtualMachineScaleSetsClient.DeleteInstancesAsync for instances %v failed: %v", requiredIds.InstanceIds, err)
return rerr.Error()
}
scaleSet.instanceMutex.Unlock()

// Proactively decrement scale set size so that we don't
// go below minimum node count if cache data is stale
scaleSet.sizeMutex.Lock()
scaleSet.curSize -= int64(len(instanceIDs))
scaleSet.sizeMutex.Unlock()

rerr := scaleSet.manager.azClient.virtualMachineScaleSetsClient.DeleteInstances(ctx, resourceGroup, commonAsg.Id(), *requiredIds)
if rerr != nil {
klog.Errorf("Failed to delete instances %v. Invalidating the cache to get the real scale set size", requiredIds)
scaleSet.invalidateStatusCacheWithLock()
go scaleSet.waitForDeleteInstances(future, requiredIds)

return nil
}

func (scaleSet *ScaleSet) waitForDeleteInstances(future *azure.Future, requiredIds *compute.VirtualMachineScaleSetVMInstanceRequiredIDs) {
ctx, cancel := getContextWithCancel()
defer cancel()

httpResponse, err := scaleSet.manager.azClient.virtualMachineScaleSetsClient.WaitForAsyncOperationResult(ctx, future)
isSuccess, err := isSuccessHTTPResponse(httpResponse, err)
if isSuccess {
klog.V(3).Infof("virtualMachineScaleSetsClient.WaitForAsyncOperationResult - DeleteInstances(%v) success", requiredIds.InstanceIds)
return
}
return rerr.Error()
klog.Errorf("virtualMachineScaleSetsClient.WaitForAsyncOperationResult - DeleteInstances for instances %v failed with error: %v", requiredIds.InstanceIds, err)
}

// DeleteNodes deletes the nodes from the group.
Expand Down
13 changes: 5 additions & 8 deletions cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,6 @@ func TestTargetSize(t *testing.T) {
assert.True(t, registered)
assert.Equal(t, len(provider.NodeGroups()), 1)

ng := provider.NodeGroups()[0]
size, err := ng.TargetSize()
println(size)
targetSize, err := provider.NodeGroups()[0].TargetSize()
assert.NoError(t, err)
assert.Equal(t, 3, targetSize)
Expand Down Expand Up @@ -183,7 +180,7 @@ func TestDeleteNodes(t *testing.T) {
Status: "OK",
},
}
scaleSetClient.On("DeleteInstances", mock.Anything, "test-asg", mock.Anything, mock.Anything).Return(response, nil)
scaleSetClient.On("DeleteInstancesAsync", mock.Anything, "test-asg", mock.Anything, mock.Anything).Return(response, nil)
manager.azClient.virtualMachineScaleSetsClient = scaleSetClient
// TODO: this should call manager.Refresh() once the fetchAutoASG
// logic is refactored out
Expand Down Expand Up @@ -223,7 +220,7 @@ func TestDeleteNodes(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, 2, targetSize)

scaleSetClient.AssertNumberOfCalls(t, "DeleteInstances", 1)
scaleSetClient.AssertNumberOfCalls(t, "DeleteInstancesAsync", 1)
}

func TestDeleteNoConflictRequest(t *testing.T) {
Expand Down Expand Up @@ -265,7 +262,7 @@ func TestDeleteNoConflictRequest(t *testing.T) {
},
}

scaleSetClient.On("DeleteInstances", mock.Anything, "test-asg", mock.Anything, mock.Anything).Return(response, nil)
scaleSetClient.On("DeleteInstancesAsync", mock.Anything, "test-asg", mock.Anything, mock.Anything).Return(response, nil)
manager.azClient.virtualMachineScaleSetsClient = scaleSetClient
manager.azClient.virtualMachineScaleSetVMsClient = vmsClient

Expand All @@ -288,8 +285,8 @@ func TestDeleteNoConflictRequest(t *testing.T) {
assert.True(t, ok)

err = scaleSet.DeleteNodes([]*apiv1.Node{node})
// ensure that DeleteInstances isn't called
scaleSetClient.AssertNumberOfCalls(t, "DeleteInstances", 0)
// ensure that DeleteInstancesAsync isn't called
scaleSetClient.AssertNumberOfCalls(t, "DeleteInstancesAsync", 0)
}

func TestId(t *testing.T) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,53 @@ func (c *Client) DeleteInstances(ctx context.Context, resourceGroupName string,
return nil
}

// DeleteInstancesAsync sends the delete request to ARM client and DOEST NOT wait on the future
func (c *Client) DeleteInstancesAsync(ctx context.Context, resourceGroupName string, vmScaleSetName string, vmInstanceIDs compute.VirtualMachineScaleSetVMInstanceRequiredIDs) (*azure.Future, *retry.Error) {
mc := metrics.NewMetricContext("vmss", "delete_instances_async", resourceGroupName, c.subscriptionID, "")

// Report errors if the client is rate limited.
if !c.rateLimiterWriter.TryAccept() {
mc.RateLimitedCount()
return nil, retry.GetRateLimitError(true, "VMSSDeleteInstancesAsync")
}

// Report errors if the client is throttled.
if c.RetryAfterWriter.After(time.Now()) {
mc.ThrottledCount()
rerr := retry.GetThrottlingError("VMSSDeleteInstancesAsync", "client throttled", c.RetryAfterWriter)
return nil, rerr
}

resourceID := armclient.GetResourceID(
c.subscriptionID,
resourceGroupName,
"Microsoft.Compute/virtualMachineScaleSets",
vmScaleSetName,
)

response, rerr := c.armClient.PostResource(ctx, resourceID, "delete", vmInstanceIDs)
defer c.armClient.CloseResponse(ctx, response)

if rerr != nil {
klog.V(5).Infof("Received error in %s: resourceID: %s, error: %s", "vmss.deletevms.request", resourceID, rerr.Error())
return nil, rerr
}

err := autorest.Respond(response, azure.WithErrorUnlessStatusCode(http.StatusOK, http.StatusAccepted))
if err != nil {
klog.V(5).Infof("Received error in %s: resourceID: %s, error: %s", "vmss.deletevms.respond", resourceID, rerr.Error())
return nil, retry.GetError(response, err)
}

future, err := azure.NewFutureFromResponse(response)
if err != nil {
klog.V(5).Infof("Received error in %s: resourceID: %s, error: %s", "vmss.deletevms.future", resourceID, rerr.Error())
return nil, retry.NewError(false, err)
}

return &future, nil
}

// deleteVMSSInstances deletes the instances for a VirtualMachineScaleSet.
func (c *Client) deleteVMSSInstances(ctx context.Context, resourceGroupName string, vmScaleSetName string, vmInstanceIDs compute.VirtualMachineScaleSetVMInstanceRequiredIDs) *retry.Error {
resourceID := armclient.GetResourceID(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,7 @@ type Interface interface {

// DeleteInstances deletes the instances for a VirtualMachineScaleSet.
DeleteInstances(ctx context.Context, resourceGroupName string, vmScaleSetName string, vmInstanceIDs compute.VirtualMachineScaleSetVMInstanceRequiredIDs) *retry.Error

// DeleteInstancesAsync sends the delete request to the ARM client and DOEST NOT wait on the future
DeleteInstancesAsync(ctx context.Context, resourceGroupName string, vmScaleSetName string, vmInstanceIDs compute.VirtualMachineScaleSetVMInstanceRequiredIDs) (*azure.Future, *retry.Error)
}

0 comments on commit fd1a370

Please sign in to comment.