Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Azure VMs Pool] Support mixed agentpool types in Azure Cache #6689

Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 56 additions & 22 deletions cluster-autoscaler/cloudprovider/azure/azure_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ type azureCache struct {
// Cache content.
resourceGroup string
vmType string
vmsPoolMap map[string]struct{} // track the nodepools that're vms pool
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we have a more defined type as opposed to just using struct? VMpools? I observe that we have defined this inside azure_vm_pool.go

Copy link
Contributor Author

@wenxuan0923 wenxuan0923 Apr 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This map[string]struct{} is just being used as set here, since golang doesn't have set data structure. It is a common pattern as you can see many of such usage in rp code base.
The purpose is only to be able to find out if an agentpool is vms pool, given its name, so we don't need to use any specific data type here.

We are setting value like this:

vmsPoolMap[to.String(vmPoolName)] = struct{}{}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks Bryce, the Set in apimachinery is also a map, so I don't think it's necessary:

image

Copy link
Member

@Bryce-Soghigian Bryce-Soghigian Apr 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All go implementations of set will use map[T]EmptyStruct for zero bytes.

https://pkg.go.dev/k8s.io/apimachinery/pkg/util/sets#Set is the location of the set package though that works with all types. Set in pkg/fields isn't one i have heard of before.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rename the field to vmsPoolSet to make it much more clearer?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Renamed

scaleSets map[string]compute.VirtualMachineScaleSet
virtualMachines map[string][]compute.VirtualMachine
registeredNodeGroups []cloudprovider.NodeGroup
Expand All @@ -67,6 +68,7 @@ func newAzureCache(client *azClient, cacheTTL time.Duration, resourceGroup, vmTy
refreshInterval: cacheTTL,
resourceGroup: resourceGroup,
vmType: vmType,
vmsPoolMap: make(map[string]struct{}),
scaleSets: make(map[string]compute.VirtualMachineScaleSet),
virtualMachines: make(map[string][]compute.VirtualMachine),
registeredNodeGroups: make([]cloudprovider.NodeGroup, 0),
Expand All @@ -87,6 +89,13 @@ func newAzureCache(client *azClient, cacheTTL time.Duration, resourceGroup, vmTy
return cache, nil
}

func (m *azureCache) getVMsPoolMap() map[string]struct{} {
m.mutex.Lock()
defer m.mutex.Unlock()

return m.vmsPoolMap
}

func (m *azureCache) getVirtualMachines() map[string][]compute.VirtualMachine {
m.mutex.Lock()
defer m.mutex.Unlock()
Expand Down Expand Up @@ -165,54 +174,77 @@ func (m *azureCache) fetchAzureResources() error {
m.mutex.Lock()
defer m.mutex.Unlock()

switch m.vmType {
case vmTypeVMSS:
// List all VMSS in the RG.
vmssResult, err := m.fetchScaleSets()
if err == nil {
m.scaleSets = vmssResult
} else {
return err
}
case vmTypeStandard:
// List all VMs in the RG.
vmResult, err := m.fetchVirtualMachines()
if err == nil {
m.virtualMachines = vmResult
} else {
return err
}
// fetch all the resources since CAS may be operating on mixed nodepools
// including both VMSS and VMs pools
vmssResult, err := m.fetchScaleSets()
if err == nil {
m.scaleSets = vmssResult
} else {
return err
}

vmResult, vmsPoolMap, err := m.fetchVirtualMachines()
if err == nil {
m.virtualMachines = vmResult
m.vmsPoolMap = vmsPoolMap
} else {
return err
}

return nil
}

const (
agentpoolNameTag = "aks-managed-poolName"
agentpoolTypeTag = "aks-managed-agentpool-type"
vmsPoolType = "VirtualMachines"
)

// fetchVirtualMachines returns the updated list of virtual machines in the config resource group using the Azure API.
func (m *azureCache) fetchVirtualMachines() (map[string][]compute.VirtualMachine, error) {
func (m *azureCache) fetchVirtualMachines() (map[string][]compute.VirtualMachine, map[string]struct{}, error) {
ctx, cancel := getContextWithCancel()
defer cancel()

result, err := m.azClient.virtualMachinesClient.List(ctx, m.resourceGroup)
if err != nil {
klog.Errorf("VirtualMachinesClient.List in resource group %q failed: %v", m.resourceGroup, err)
return nil, err.Error()
return nil, nil, err.Error()
}

instances := make(map[string][]compute.VirtualMachine)
// track the nodepools that're vms pools
vmsPoolMap := make(map[string]struct{})
for _, instance := range result {
if instance.Tags == nil {
continue
}

tags := instance.Tags
vmPoolName := tags["poolName"]
vmPoolName := tags[agentpoolNameTag]
// fall back to legacy tag name if not found
if vmPoolName == nil {
vmPoolName = tags["poolName"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we add this as a const as well?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

}

if vmPoolName == nil {
continue
}

instances[to.String(vmPoolName)] = append(instances[to.String(vmPoolName)], instance)

// if the nodepool is already in the map, skip it
if _, ok := vmsPoolMap[to.String(vmPoolName)]; ok {
continue
}

// nodes from vms pool will have tag "aks-managed-agentpool-type" set to "VirtualMachines"
if agnetpoolType := tags[agentpoolTypeTag]; agnetpoolType != nil {
if strings.EqualFold(to.String(agnetpoolType), vmsPoolType) {
vmsPoolMap[to.String(vmPoolName)] = struct{}{}
}
}
}
return instances, nil
return instances, vmsPoolMap, nil
}

// fetchScaleSets returns the updated list of scale sets in the config resource group using the Azure API.
Expand Down Expand Up @@ -323,6 +355,7 @@ func (m *azureCache) getAutoscalingOptions(ref azureRef) map[string]string {

// FindForInstance returns node group of the given Instance
func (m *azureCache) FindForInstance(instance *azureRef, vmType string) (cloudprovider.NodeGroup, error) {
vmsPoolMap := m.getVMsPoolMap()
m.mutex.Lock()
defer m.mutex.Unlock()

Expand All @@ -340,7 +373,8 @@ func (m *azureCache) FindForInstance(instance *azureRef, vmType string) (cloudpr
return nil, nil
}

if vmType == vmTypeVMSS {
// cluster with vmss pool only
if vmType == vmTypeVMSS && len(vmsPoolMap) == 0 {
if m.areAllScaleSetsUniform() {
// Omit virtual machines not managed by vmss only in case of uniform scale set.
if ok := virtualMachineRE.Match([]byte(inst.Name)); ok {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ func newTestAzureManager(t *testing.T) *AzureManager {
mockVMSSClient.EXPECT().List(gomock.Any(), "rg").Return(expectedScaleSets, nil).AnyTimes()
mockVMSSVMClient := mockvmssvmclient.NewMockInterface(ctrl)
mockVMSSVMClient.EXPECT().List(gomock.Any(), "rg", "test-vmss", gomock.Any()).Return(expectedVMSSVMs, nil).AnyTimes()
mockVMClient := mockvmclient.NewMockInterface(ctrl)
expectedVMs := newTestVMList(3)
mockVMClient.EXPECT().List(gomock.Any(), "rg").Return(expectedVMs, nil).AnyTimes()

manager := &AzureManager{
env: azure.PublicCloud,
Expand All @@ -57,6 +60,7 @@ func newTestAzureManager(t *testing.T) *AzureManager {
azClient: &azClient{
virtualMachineScaleSetsClient: mockVMSSClient,
virtualMachineScaleSetVMsClient: mockVMSSVMClient,
virtualMachinesClient: mockVMClient,
deploymentsClient: &DeploymentsClientMock{
FakeStore: map[string]resources.DeploymentExtended{
"deployment": {
Expand Down Expand Up @@ -115,9 +119,68 @@ func TestNodeGroups(t *testing.T) {
assert.Equal(t, len(provider.NodeGroups()), 0)

registered := provider.azureManager.RegisterNodeGroup(
newTestScaleSet(provider.azureManager, "test-asg"))
newTestScaleSet(provider.azureManager, "test-asg"),
)
assert.True(t, registered)
assert.Equal(t, len(provider.NodeGroups()), 1)
registered = provider.azureManager.RegisterNodeGroup(
newTestVMsPool(provider.azureManager, "test-vms-pool"),
)
assert.True(t, registered)
assert.Equal(t, len(provider.NodeGroups()), 2)
}

func TestMixedNodeGroups(t *testing.T) {
ctrl := gomock.NewController(t)
provider := newTestProvider(t)
mockVMSSClient := mockvmssclient.NewMockInterface(ctrl)
mockVMClient := mockvmclient.NewMockInterface(ctrl)
mockVMSSVMClient := mockvmssvmclient.NewMockInterface(ctrl)
provider.azureManager.azClient.virtualMachinesClient = mockVMClient
provider.azureManager.azClient.virtualMachineScaleSetsClient = mockVMSSClient
provider.azureManager.azClient.virtualMachineScaleSetVMsClient = mockVMSSVMClient

expectedScaleSets := newTestVMSSList(3, "test-asg", "eastus", compute.Uniform)
expectedVMsPoolVMs := newTestVMsPoolVMList(3)
expectedVMSSVMs := newTestVMSSVMList(3)

mockVMSSClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup).Return(expectedScaleSets, nil).AnyTimes()
mockVMClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup).Return(expectedVMsPoolVMs, nil).AnyTimes()
mockVMSSVMClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup, "test-asg", gomock.Any()).Return(expectedVMSSVMs, nil).AnyTimes()

assert.Equal(t, len(provider.NodeGroups()), 0)
registered := provider.azureManager.RegisterNodeGroup(
newTestScaleSet(provider.azureManager, "test-asg"),
)
provider.azureManager.explicitlyConfigured["test-asg"] = true
assert.True(t, registered)

registered = provider.azureManager.RegisterNodeGroup(
newTestVMsPool(provider.azureManager, "test-vms-pool"),
)
provider.azureManager.explicitlyConfigured["test-vms-pool"] = true
assert.True(t, registered)
assert.Equal(t, len(provider.NodeGroups()), 2)

// refresh cache
provider.azureManager.forceRefresh()

// node from vmss pool
node := newApiNode(compute.Uniform, 0)
group, err := provider.NodeGroupForNode(node)
assert.NoError(t, err)
assert.NotNil(t, group, "Group should not be nil")
assert.Equal(t, group.Id(), "test-asg")
assert.Equal(t, group.MinSize(), 1)
assert.Equal(t, group.MaxSize(), 5)

// node from vms pool
vmsPoolNode := newVMsNode(0)
group, err = provider.NodeGroupForNode(vmsPoolNode)
assert.NoError(t, err)
assert.NotNil(t, group, "Group should not be nil")
assert.Equal(t, group.Id(), "test-vms-pool")
assert.Equal(t, group.MinSize(), 3)
assert.Equal(t, group.MaxSize(), 10)
}

func TestNodeGroupForNode(t *testing.T) {
Expand All @@ -135,6 +198,9 @@ func TestNodeGroupForNode(t *testing.T) {
mockVMSSClient := mockvmssclient.NewMockInterface(ctrl)
mockVMSSClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup).Return(expectedScaleSets, nil)
provider.azureManager.azClient.virtualMachineScaleSetsClient = mockVMSSClient
mockVMClient := mockvmclient.NewMockInterface(ctrl)
provider.azureManager.azClient.virtualMachinesClient = mockVMClient
mockVMClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup).Return(expectedVMs, nil).AnyTimes()

if orchMode == compute.Uniform {

Expand All @@ -143,11 +209,8 @@ func TestNodeGroupForNode(t *testing.T) {
provider.azureManager.azClient.virtualMachineScaleSetVMsClient = mockVMSSVMClient
} else {

mockVMClient := mockvmclient.NewMockInterface(ctrl)
provider.azureManager.config.EnableVmssFlex = true
mockVMClient.EXPECT().ListVmssFlexVMsWithoutInstanceView(gomock.Any(), "test-asg").Return(expectedVMs, nil).AnyTimes()
provider.azureManager.azClient.virtualMachinesClient = mockVMClient

}

registered := provider.azureManager.RegisterNodeGroup(
Expand Down
5 changes: 5 additions & 0 deletions cluster-autoscaler/cloudprovider/azure/azure_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,11 @@ func (m *AzureManager) buildNodeGroupFromSpec(spec string) (cloudprovider.NodeGr
return nil, fmt.Errorf("failed to parse node group spec: %v", err)
}

vmsPoolMap := m.azureCache.getVMsPoolMap()
if _, ok := vmsPoolMap[s.Name]; ok {
return NewVMsPool(s, m), nil
}

switch m.config.VMType {
case vmTypeStandard:
return NewAgentPool(s, m)
Expand Down
9 changes: 9 additions & 0 deletions cluster-autoscaler/cloudprovider/azure/azure_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ func TestCreateAzureManagerValidConfig(t *testing.T) {
mockVMClient := mockvmclient.NewMockInterface(ctrl)
mockVMSSClient := mockvmssclient.NewMockInterface(ctrl)
mockVMSSClient.EXPECT().List(gomock.Any(), "fakeId").Return([]compute.VirtualMachineScaleSet{}, nil).Times(2)
mockVMClient.EXPECT().List(gomock.Any(), "fakeId").Return([]compute.VirtualMachine{}, nil).Times(2)
mockAzClient := &azClient{
virtualMachinesClient: mockVMClient,
virtualMachineScaleSetsClient: mockVMSSClient,
Expand Down Expand Up @@ -226,6 +227,7 @@ func TestCreateAzureManagerValidConfigForStandardVMType(t *testing.T) {
mockVMClient := mockvmclient.NewMockInterface(ctrl)
mockVMClient.EXPECT().List(gomock.Any(), "fakeId").Return([]compute.VirtualMachine{}, nil).Times(2)
mockVMSSClient := mockvmssclient.NewMockInterface(ctrl)
mockVMSSClient.EXPECT().List(gomock.Any(), "fakeId").Return([]compute.VirtualMachineScaleSet{}, nil).Times(2)
mockAzClient := &azClient{
virtualMachinesClient: mockVMClient,
virtualMachineScaleSetsClient: mockVMSSClient,
Expand Down Expand Up @@ -338,6 +340,7 @@ func TestCreateAzureManagerWithNilConfig(t *testing.T) {
mockVMClient := mockvmclient.NewMockInterface(ctrl)
mockVMSSClient := mockvmssclient.NewMockInterface(ctrl)
mockVMSSClient.EXPECT().List(gomock.Any(), "resourceGroup").Return([]compute.VirtualMachineScaleSet{}, nil).AnyTimes()
mockVMClient.EXPECT().List(gomock.Any(), "resourceGroup").Return([]compute.VirtualMachine{}, nil).AnyTimes()
mockAzClient := &azClient{
virtualMachinesClient: mockVMClient,
virtualMachineScaleSetsClient: mockVMSSClient,
Expand Down Expand Up @@ -663,7 +666,10 @@ func TestGetFilteredAutoscalingGroupsVmss(t *testing.T) {
expectedScaleSets := []compute.VirtualMachineScaleSet{fakeVMSSWithTags(vmssName, map[string]*string{vmssTag: &vmssTagValue, "min": &min, "max": &max})}
mockVMSSClient := mockvmssclient.NewMockInterface(ctrl)
mockVMSSClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup).Return(expectedScaleSets, nil).AnyTimes()
mockVMClient := mockvmclient.NewMockInterface(ctrl)
mockVMClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup).Return([]compute.VirtualMachine{}, nil).AnyTimes()
manager.azClient.virtualMachineScaleSetsClient = mockVMSSClient
manager.azClient.virtualMachinesClient = mockVMClient
err := manager.forceRefresh()
assert.NoError(t, err)

Expand Down Expand Up @@ -736,6 +742,9 @@ func TestFetchAutoAsgsVmss(t *testing.T) {
mockVMSSVMClient := mockvmssvmclient.NewMockInterface(ctrl)
mockVMSSVMClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup, vmssName, gomock.Any()).Return(expectedVMSSVMs, nil).AnyTimes()
manager.azClient.virtualMachineScaleSetVMsClient = mockVMSSVMClient
mockVMClient := mockvmclient.NewMockInterface(ctrl)
manager.azClient.virtualMachinesClient = mockVMClient
mockVMClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup).Return([]compute.VirtualMachine{}, nil).AnyTimes()
err := manager.forceRefresh()
assert.NoError(t, err)

Expand Down
Loading
Loading