Skip to content

Commit

Permalink
Use cache to track vms pools
Browse files Browse the repository at this point in the history
  • Loading branch information
wenxuan0923 committed Apr 4, 2024
1 parent 46f04ae commit d665ce6
Show file tree
Hide file tree
Showing 6 changed files with 243 additions and 35 deletions.
74 changes: 53 additions & 21 deletions cluster-autoscaler/cloudprovider/azure/azure_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ type azureCache struct {
// Cache content.
resourceGroup string
vmType string
vmsPoolMap map[string]struct{} // track the nodepools that're vms pool
scaleSets map[string]compute.VirtualMachineScaleSet
virtualMachines map[string][]compute.VirtualMachine
registeredNodeGroups []cloudprovider.NodeGroup
Expand All @@ -67,6 +68,7 @@ func newAzureCache(client *azClient, cacheTTL time.Duration, resourceGroup, vmTy
refreshInterval: cacheTTL,
resourceGroup: resourceGroup,
vmType: vmType,
vmsPoolMap: make(map[string]struct{}),
scaleSets: make(map[string]compute.VirtualMachineScaleSet),
virtualMachines: make(map[string][]compute.VirtualMachine),
registeredNodeGroups: make([]cloudprovider.NodeGroup, 0),
Expand All @@ -87,6 +89,13 @@ func newAzureCache(client *azClient, cacheTTL time.Duration, resourceGroup, vmTy
return cache, nil
}

func (m *azureCache) getVMsPoolMap() map[string]struct{} {
m.mutex.Lock()
defer m.mutex.Unlock()

return m.vmsPoolMap
}

func (m *azureCache) getVirtualMachines() map[string][]compute.VirtualMachine {
m.mutex.Lock()
defer m.mutex.Unlock()
Expand Down Expand Up @@ -165,54 +174,77 @@ func (m *azureCache) fetchAzureResources() error {
m.mutex.Lock()
defer m.mutex.Unlock()

switch m.vmType {
case vmTypeVMSS:
// List all VMSS in the RG.
vmssResult, err := m.fetchScaleSets()
if err == nil {
m.scaleSets = vmssResult
} else {
return err
}
case vmTypeStandard:
// List all VMs in the RG.
vmResult, err := m.fetchVirtualMachines()
if err == nil {
m.virtualMachines = vmResult
} else {
return err
}
// fetch all the resources since CAS may be operating on mixed nodepools
// including both VMSS and VMs pools
vmssResult, err := m.fetchScaleSets()
if err == nil {
m.scaleSets = vmssResult
} else {
return err
}

vmResult, vmsPoolMap, err := m.fetchVirtualMachines()
if err == nil {
m.virtualMachines = vmResult
m.vmsPoolMap = vmsPoolMap
} else {
return err
}

return nil
}

const (
agentpoolNameTag = "aks-managed-poolName"
agentpoolTypeTag = "aks-managed-agentpool-type"
vmsPoolType = "VirtualMachines"
)

// fetchVirtualMachines returns the updated list of virtual machines in the config resource group using the Azure API.
func (m *azureCache) fetchVirtualMachines() (map[string][]compute.VirtualMachine, error) {
func (m *azureCache) fetchVirtualMachines() (map[string][]compute.VirtualMachine, map[string]struct{}, error) {
ctx, cancel := getContextWithCancel()
defer cancel()

result, err := m.azClient.virtualMachinesClient.List(ctx, m.resourceGroup)
if err != nil {
klog.Errorf("VirtualMachinesClient.List in resource group %q failed: %v", m.resourceGroup, err)
return nil, err.Error()
return nil, nil, err.Error()
}

instances := make(map[string][]compute.VirtualMachine)
// track the nodepools that're vms pools
vmsPoolMap := make(map[string]struct{})
for _, instance := range result {
if instance.Tags == nil {
continue
}

tags := instance.Tags
vmPoolName := tags["poolName"]
vmPoolName := tags[agentpoolNameTag]
// fall back to legacy tag name if not found
if vmPoolName == nil {
vmPoolName = tags["poolName"]
}

if vmPoolName == nil {
continue
}

instances[to.String(vmPoolName)] = append(instances[to.String(vmPoolName)], instance)

// if the nodepool is already in the map, skip it
if _, ok := vmsPoolMap[to.String(vmPoolName)]; ok {
continue
}

// nodes from vms pool will have tag "aks-managed-agentpool-type" set to "VirtualMachines"
if agnetpoolType := tags[agentpoolTypeTag]; agnetpoolType != nil {
if strings.EqualFold(to.String(agnetpoolType), vmsPoolType) {
vmsPoolMap[to.String(vmPoolName)] = struct{}{}
}
}
}
return instances, nil
return instances, vmsPoolMap, nil
}

// fetchScaleSets returns the updated list of scale sets in the config resource group using the Azure API.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ func newTestAzureManager(t *testing.T) *AzureManager {
mockVMSSClient.EXPECT().List(gomock.Any(), "rg").Return(expectedScaleSets, nil).AnyTimes()
mockVMSSVMClient := mockvmssvmclient.NewMockInterface(ctrl)
mockVMSSVMClient.EXPECT().List(gomock.Any(), "rg", "test-vmss", gomock.Any()).Return(expectedVMSSVMs, nil).AnyTimes()
mockVMClient := mockvmclient.NewMockInterface(ctrl)
expectedVMs := newTestVMList(3)
mockVMClient.EXPECT().List(gomock.Any(), "rg").Return(expectedVMs, nil).AnyTimes()

manager := &AzureManager{
env: azure.PublicCloud,
Expand All @@ -57,6 +60,7 @@ func newTestAzureManager(t *testing.T) *AzureManager {
azClient: &azClient{
virtualMachineScaleSetsClient: mockVMSSClient,
virtualMachineScaleSetVMsClient: mockVMSSVMClient,
virtualMachinesClient: mockVMClient,
deploymentsClient: &DeploymentsClientMock{
FakeStore: map[string]resources.DeploymentExtended{
"deployment": {
Expand Down Expand Up @@ -135,6 +139,9 @@ func TestNodeGroupForNode(t *testing.T) {
mockVMSSClient := mockvmssclient.NewMockInterface(ctrl)
mockVMSSClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup).Return(expectedScaleSets, nil)
provider.azureManager.azClient.virtualMachineScaleSetsClient = mockVMSSClient
mockVMClient := mockvmclient.NewMockInterface(ctrl)
provider.azureManager.azClient.virtualMachinesClient = mockVMClient
mockVMClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup).Return(expectedVMs, nil).AnyTimes()

if orchMode == compute.Uniform {

Expand All @@ -143,11 +150,8 @@ func TestNodeGroupForNode(t *testing.T) {
provider.azureManager.azClient.virtualMachineScaleSetVMsClient = mockVMSSVMClient
} else {

mockVMClient := mockvmclient.NewMockInterface(ctrl)
provider.azureManager.config.EnableVmssFlex = true
mockVMClient.EXPECT().ListVmssFlexVMsWithoutInstanceView(gomock.Any(), "test-asg").Return(expectedVMs, nil).AnyTimes()
provider.azureManager.azClient.virtualMachinesClient = mockVMClient

}

registered := provider.azureManager.RegisterNodeGroup(
Expand Down
5 changes: 5 additions & 0 deletions cluster-autoscaler/cloudprovider/azure/azure_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,11 @@ func (m *AzureManager) buildNodeGroupFromSpec(spec string) (cloudprovider.NodeGr
return nil, fmt.Errorf("failed to parse node group spec: %v", err)
}

vmsPoolMap := m.azureCache.getVMsPoolMap()
if _, ok := vmsPoolMap[s.Name]; ok {
return NewVMsPool(s, m), nil
}

switch m.config.VMType {
case vmTypeStandard:
return NewAgentPool(s, m)
Expand Down
9 changes: 9 additions & 0 deletions cluster-autoscaler/cloudprovider/azure/azure_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ func TestCreateAzureManagerValidConfig(t *testing.T) {
mockVMClient := mockvmclient.NewMockInterface(ctrl)
mockVMSSClient := mockvmssclient.NewMockInterface(ctrl)
mockVMSSClient.EXPECT().List(gomock.Any(), "fakeId").Return([]compute.VirtualMachineScaleSet{}, nil).Times(2)
mockVMClient.EXPECT().List(gomock.Any(), "fakeId").Return([]compute.VirtualMachine{}, nil).AnyTimes()
mockAzClient := &azClient{
virtualMachinesClient: mockVMClient,
virtualMachineScaleSetsClient: mockVMSSClient,
Expand Down Expand Up @@ -226,6 +227,7 @@ func TestCreateAzureManagerValidConfigForStandardVMType(t *testing.T) {
mockVMClient := mockvmclient.NewMockInterface(ctrl)
mockVMClient.EXPECT().List(gomock.Any(), "fakeId").Return([]compute.VirtualMachine{}, nil).Times(2)
mockVMSSClient := mockvmssclient.NewMockInterface(ctrl)
mockVMSSClient.EXPECT().List(gomock.Any(), "fakeId").Return([]compute.VirtualMachineScaleSet{}, nil).Times(2)
mockAzClient := &azClient{
virtualMachinesClient: mockVMClient,
virtualMachineScaleSetsClient: mockVMSSClient,
Expand Down Expand Up @@ -338,6 +340,7 @@ func TestCreateAzureManagerWithNilConfig(t *testing.T) {
mockVMClient := mockvmclient.NewMockInterface(ctrl)
mockVMSSClient := mockvmssclient.NewMockInterface(ctrl)
mockVMSSClient.EXPECT().List(gomock.Any(), "resourceGroup").Return([]compute.VirtualMachineScaleSet{}, nil).AnyTimes()
mockVMClient.EXPECT().List(gomock.Any(), "resourceGroup").Return([]compute.VirtualMachine{}, nil).AnyTimes()
mockAzClient := &azClient{
virtualMachinesClient: mockVMClient,
virtualMachineScaleSetsClient: mockVMSSClient,
Expand Down Expand Up @@ -663,7 +666,10 @@ func TestGetFilteredAutoscalingGroupsVmss(t *testing.T) {
expectedScaleSets := []compute.VirtualMachineScaleSet{fakeVMSSWithTags(vmssName, map[string]*string{vmssTag: &vmssTagValue, "min": &min, "max": &max})}
mockVMSSClient := mockvmssclient.NewMockInterface(ctrl)
mockVMSSClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup).Return(expectedScaleSets, nil).AnyTimes()
mockVMClient := mockvmclient.NewMockInterface(ctrl)
mockVMClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup).Return([]compute.VirtualMachine{}, nil).AnyTimes()
manager.azClient.virtualMachineScaleSetsClient = mockVMSSClient
manager.azClient.virtualMachinesClient = mockVMClient
err := manager.forceRefresh()
assert.NoError(t, err)

Expand Down Expand Up @@ -736,6 +742,9 @@ func TestFetchAutoAsgsVmss(t *testing.T) {
mockVMSSVMClient := mockvmssvmclient.NewMockInterface(ctrl)
mockVMSSVMClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup, vmssName, gomock.Any()).Return(expectedVMSSVMs, nil).AnyTimes()
manager.azClient.virtualMachineScaleSetVMsClient = mockVMSSVMClient
mockVMClient := mockvmclient.NewMockInterface(ctrl)
manager.azClient.virtualMachinesClient = mockVMClient
mockVMClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup).Return([]compute.VirtualMachine{}, nil).AnyTimes()
err := manager.forceRefresh()
assert.NoError(t, err)

Expand Down
33 changes: 22 additions & 11 deletions cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,9 @@ func TestTargetSize(t *testing.T) {
mockVMSSClient := mockvmssclient.NewMockInterface(ctrl)
mockVMSSClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup).Return(expectedScaleSets, nil).AnyTimes()
provider.azureManager.azClient.virtualMachineScaleSetsClient = mockVMSSClient
mockVMClient := mockvmclient.NewMockInterface(ctrl)
mockVMClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup).Return(expectedVMs, nil).AnyTimes()
provider.azureManager.azClient.virtualMachinesClient = mockVMClient

if orchMode == compute.Uniform {

Expand All @@ -145,9 +148,7 @@ func TestTargetSize(t *testing.T) {

} else {
provider.azureManager.config.EnableVmssFlex = true
mockVMClient := mockvmclient.NewMockInterface(ctrl)
mockVMClient.EXPECT().ListVmssFlexVMsWithoutInstanceView(gomock.Any(), "test-asg").Return(expectedVMs, nil).AnyTimes()
provider.azureManager.azClient.virtualMachinesClient = mockVMClient
}

err := provider.azureManager.forceRefresh()
Expand Down Expand Up @@ -183,6 +184,9 @@ func TestIncreaseSize(t *testing.T) {
mockVMSSClient.EXPECT().CreateOrUpdateAsync(gomock.Any(), provider.azureManager.config.ResourceGroup, "test-asg", gomock.Any()).Return(nil, nil)
mockVMSSClient.EXPECT().WaitForCreateOrUpdateResult(gomock.Any(), gomock.Any(), provider.azureManager.config.ResourceGroup).Return(&http.Response{StatusCode: http.StatusOK}, nil).AnyTimes()
provider.azureManager.azClient.virtualMachineScaleSetsClient = mockVMSSClient
mockVMClient := mockvmclient.NewMockInterface(ctrl)
mockVMClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup).Return(expectedVMs, nil).AnyTimes()
provider.azureManager.azClient.virtualMachinesClient = mockVMClient

if orchMode == compute.Uniform {

Expand All @@ -192,9 +196,7 @@ func TestIncreaseSize(t *testing.T) {
} else {

provider.azureManager.config.EnableVmssFlex = true
mockVMClient := mockvmclient.NewMockInterface(ctrl)
mockVMClient.EXPECT().ListVmssFlexVMsWithoutInstanceView(gomock.Any(), "test-asg").Return(expectedVMs, nil).AnyTimes()
provider.azureManager.azClient.virtualMachinesClient = mockVMClient
}
err := provider.azureManager.forceRefresh()
assert.NoError(t, err)
Expand Down Expand Up @@ -257,6 +259,7 @@ func TestIncreaseSizeOnVMProvisioningFailed(t *testing.T) {

expectedScaleSets := newTestVMSSList(3, "vmss-failed-upscale", "eastus", compute.Uniform)
expectedVMSSVMs := newTestVMSSVMList(3)
expectedVMs := newTestVMList(3)
expectedVMSSVMs[2].ProvisioningState = to.StringPtr(provisioningStateFailed)
if !testCase.isMissingInstanceView {
expectedVMSSVMs[2].InstanceView = &compute.VirtualMachineScaleSetVMInstanceView{Statuses: &testCase.statuses}
Expand All @@ -270,6 +273,9 @@ func TestIncreaseSizeOnVMProvisioningFailed(t *testing.T) {
mockVMSSVMClient := mockvmssvmclient.NewMockInterface(ctrl)
mockVMSSVMClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup, "vmss-failed-upscale", gomock.Any()).Return(expectedVMSSVMs, nil).AnyTimes()
manager.azClient.virtualMachineScaleSetVMsClient = mockVMSSVMClient
mockVMClient := mockvmclient.NewMockInterface(ctrl)
mockVMClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup).Return(expectedVMs, nil).AnyTimes()
manager.azClient.virtualMachinesClient = mockVMClient
manager.explicitlyConfigured["vmss-failed-upscale"] = true
registered := manager.RegisterNodeGroup(newTestScaleSet(manager, vmssName))
assert.True(t, registered)
Expand Down Expand Up @@ -359,6 +365,9 @@ func TestBelongs(t *testing.T) {
mockVMSSClient := mockvmssclient.NewMockInterface(ctrl)
mockVMSSClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup).Return(expectedScaleSets, nil)
provider.azureManager.azClient.virtualMachineScaleSetsClient = mockVMSSClient
mockVMClient := mockvmclient.NewMockInterface(ctrl)
mockVMClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup).Return(expectedVMs, nil).AnyTimes()
provider.azureManager.azClient.virtualMachinesClient = mockVMClient

if orchMode == compute.Uniform {

Expand All @@ -369,9 +378,7 @@ func TestBelongs(t *testing.T) {
} else {

provider.azureManager.config.EnableVmssFlex = true
mockVMClient := mockvmclient.NewMockInterface(ctrl)
mockVMClient.EXPECT().ListVmssFlexVMsWithoutInstanceView(gomock.Any(), "test-asg").Return(expectedVMs, nil).AnyTimes()
provider.azureManager.azClient.virtualMachinesClient = mockVMClient
}

registered := provider.azureManager.RegisterNodeGroup(
Expand Down Expand Up @@ -422,14 +429,15 @@ func TestDeleteNodes(t *testing.T) {

mockVMSSVMClient := mockvmssvmclient.NewMockInterface(ctrl)
mockVMClient := mockvmclient.NewMockInterface(ctrl)
manager.azClient.virtualMachinesClient = mockVMClient
mockVMClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup).Return(expectedVMs, nil).AnyTimes()

if orchMode == compute.Uniform {
mockVMSSVMClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup, "test-asg", gomock.Any()).Return(expectedVMSSVMs, nil).AnyTimes()
manager.azClient.virtualMachineScaleSetVMsClient = mockVMSSVMClient
} else {
manager.config.EnableVmssFlex = true
mockVMClient.EXPECT().ListVmssFlexVMsWithoutInstanceView(gomock.Any(), "test-asg").Return(expectedVMs, nil).AnyTimes()
manager.azClient.virtualMachinesClient = mockVMClient

}

Expand Down Expand Up @@ -521,6 +529,9 @@ func TestDeleteNodeUnregistered(t *testing.T) {
mockVMSSClient.EXPECT().DeleteInstancesAsync(gomock.Any(), manager.config.ResourceGroup, gomock.Any(), gomock.Any(), false).Return(nil, nil)
mockVMSSClient.EXPECT().WaitForDeleteInstancesResult(gomock.Any(), gomock.Any(), manager.config.ResourceGroup).Return(&http.Response{StatusCode: http.StatusOK}, nil).AnyTimes()
manager.azClient.virtualMachineScaleSetsClient = mockVMSSClient
mockVMClient := mockvmclient.NewMockInterface(ctrl)
mockVMClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup).Return(expectedVMs, nil).AnyTimes()
manager.azClient.virtualMachinesClient = mockVMClient

if orchMode == compute.Uniform {

Expand All @@ -530,9 +541,7 @@ func TestDeleteNodeUnregistered(t *testing.T) {
} else {

manager.config.EnableVmssFlex = true
mockVMClient := mockvmclient.NewMockInterface(ctrl)
mockVMClient.EXPECT().ListVmssFlexVMsWithoutInstanceView(gomock.Any(), "test-asg").Return(expectedVMs, nil).AnyTimes()
manager.azClient.virtualMachinesClient = mockVMClient
}
err := manager.forceRefresh()
assert.NoError(t, err)
Expand Down Expand Up @@ -678,6 +687,9 @@ func TestScaleSetNodes(t *testing.T) {
mockVMSSClient := mockvmssclient.NewMockInterface(ctrl)
mockVMSSClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup).Return(expectedScaleSets, nil).AnyTimes()
provider.azureManager.azClient.virtualMachineScaleSetsClient = mockVMSSClient
mockVMClient := mockvmclient.NewMockInterface(ctrl)
mockVMClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup).Return(expectedVMs, nil).AnyTimes()
provider.azureManager.azClient.virtualMachinesClient = mockVMClient

if orchMode == compute.Uniform {

Expand All @@ -687,9 +699,7 @@ func TestScaleSetNodes(t *testing.T) {

} else {
provider.azureManager.config.EnableVmssFlex = true
mockVMClient := mockvmclient.NewMockInterface(ctrl)
mockVMClient.EXPECT().ListVmssFlexVMsWithoutInstanceView(gomock.Any(), "test-asg").Return(expectedVMs, nil).AnyTimes()
provider.azureManager.azClient.virtualMachinesClient = mockVMClient
}

registered := provider.azureManager.RegisterNodeGroup(
Expand Down Expand Up @@ -744,6 +754,7 @@ func TestEnableVmssFlexFlag(t *testing.T) {
provider.azureManager.config.EnableVmssFlex = false
provider.azureManager.azClient.virtualMachineScaleSetsClient = mockVMSSClient
mockVMClient := mockvmclient.NewMockInterface(ctrl)
mockVMClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup).Return(expectedVMs, nil).AnyTimes()
mockVMClient.EXPECT().ListVmssFlexVMsWithoutInstanceView(gomock.Any(), "test-asg").Return(expectedVMs, nil).AnyTimes()
provider.azureManager.azClient.virtualMachinesClient = mockVMClient

Expand Down
Loading

0 comments on commit d665ce6

Please sign in to comment.