Skip to content

Commit

Permalink
VMSS Flex support for MachinePools
Browse files Browse the repository at this point in the history
  • Loading branch information
mboersma committed Dec 20, 2022
1 parent 501a908 commit 2a4710c
Show file tree
Hide file tree
Showing 44 changed files with 1,245 additions and 167 deletions.
4 changes: 3 additions & 1 deletion Tiltfile
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,9 @@ def deploy_worker_templates(template, substitutions):
calico_values = "./templates/addons/calico/values.yaml"
flavor_cmd += "; " + helm_cmd + " repo add projectcalico https://projectcalico.docs.tigera.io/charts; " + helm_cmd + " --kubeconfig ./${CLUSTER_NAME}.kubeconfig install calico projectcalico/tigera-operator -f " + calico_values + " --namespace tigera-operator --create-namespace; kubectl --kubeconfig ./${CLUSTER_NAME}.kubeconfig apply -f ./templates/addons/calico/felix-override.yaml"
if "external-cloud-provider" in flavor_name:
flavor_cmd += "; " + helm_cmd + " --kubeconfig ./${CLUSTER_NAME}.kubeconfig install --repo https://raw.githubusercontent.com/kubernetes-sigs/cloud-provider-azure/master/helm/repo cloud-provider-azure --generate-name --set infra.clusterName=${CLUSTER_NAME}"
# TODO: remove extra config once the azure-cloud-provider Helm chart release supports VMSS Flex.
flavor_cmd += "; " + helm_cmd + " --kubeconfig ./${CLUSTER_NAME}.kubeconfig install --repo https://raw.githubusercontent.com/kubernetes-sigs/cloud-provider-azure/master/helm/repo cloud-provider-azure --generate-name --set infra.clusterName=${CLUSTER_NAME}" + \
" --set cloudControllerManager.imageTag=v1.26.0 --set cloudNodeManager.imageTag=v1.26.0"
local_resource(
name = flavor_name,
cmd = flavor_cmd,
Expand Down
2 changes: 1 addition & 1 deletion api/v1beta1/azuremachine_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package v1beta1
import (
"testing"

"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-12-01/compute"
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2021-07-01/compute"
. "github.com/onsi/gomega"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/utils/pointer"
Expand Down
13 changes: 13 additions & 0 deletions api/v1beta1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -855,3 +855,16 @@ type UserManagedBootDiagnostics struct {
// +kubebuilder:validation:MaxLength=1024
StorageAccountURI string `json:"storageAccountURI"`
}

// OrchestrationModeType are the orchestration modes for the Virtual Machine Scale Set backing the AzureMachinePool
// +kubebuilder:validation:Enum=Flexible;Uniform
type OrchestrationModeType string

const (
// FlexibleOrchestrationMode is the Flexible Virtual Machine Scale Set mode which treats VMs individually rather
// than in a uniform manner.
FlexibleOrchestrationMode OrchestrationModeType = "Flexible"
// UniformOrchestrationMode is the Uniform Virtual Machine Scale Set mode which treats VMs as sub-resources of the
// Virtual Machine Scale Set rather than as individual resources.
UniformOrchestrationMode OrchestrationModeType = "Uniform"
)
40 changes: 40 additions & 0 deletions azure/converters/vmss.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,38 @@ func SDKToVMSS(sdkvmss compute.VirtualMachineScaleSet, sdkinstances []compute.Vi
return vmss
}

// SDKVMToVMSSVM converts an Azure SDK VM to a VMSS VM.
func SDKVMToVMSSVM(sdkInstance compute.VirtualMachine) *azure.VMSSVM {
instance := azure.VMSSVM{
ID: to.String(sdkInstance.ID),
}

if sdkInstance.VirtualMachineProperties == nil {
return &instance
}

instance.State = infrav1.Creating
if sdkInstance.ProvisioningState != nil {
instance.State = infrav1.ProvisioningState(to.String(sdkInstance.ProvisioningState))
}

if sdkInstance.OsProfile != nil && sdkInstance.OsProfile.ComputerName != nil {
instance.Name = *sdkInstance.OsProfile.ComputerName
}

if sdkInstance.StorageProfile != nil && sdkInstance.StorageProfile.ImageReference != nil {
imageRef := sdkInstance.StorageProfile.ImageReference
instance.Image = SDKImageToImage(imageRef, sdkInstance.Plan != nil)
}

if sdkInstance.Zones != nil && len(*sdkInstance.Zones) > 0 {
// an instance should only have 1 zone, so we select the first item of the slice
instance.AvailabilityZone = to.StringSlice(sdkInstance.Zones)[0]
}

return &instance
}

// SDKToVMSSVM converts an Azure SDK VirtualMachineScaleSetVM into an infrav1exp.VMSSVM.
func SDKToVMSSVM(sdkInstance compute.VirtualMachineScaleSetVM) *azure.VMSSVM {
// Convert resourceGroup Name ID ( ProviderID in capz objects )
Expand Down Expand Up @@ -117,3 +149,11 @@ func SDKImageToImage(sdkImageRef *compute.ImageReference, isThirdPartyImage bool
},
}
}

// GetOrchestrationMode returns the compute.OrchestrationMode for the given infrav1.OrchestrationModeType.
func GetOrchestrationMode(modeType infrav1.OrchestrationModeType) compute.OrchestrationMode {
if modeType == infrav1.FlexibleOrchestrationMode {
return compute.OrchestrationModeFlexible
}
return compute.OrchestrationModeUniform
}
100 changes: 100 additions & 0 deletions azure/converters/vmss_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2021-11-01/compute"
"github.com/Azure/go-autorest/autorest/to"
"github.com/onsi/gomega"
infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1"
"sigs.k8s.io/cluster-api-provider-azure/azure"
"sigs.k8s.io/cluster-api-provider-azure/azure/converters"
)
Expand Down Expand Up @@ -122,3 +123,102 @@ func Test_SDKToVMSS(t *testing.T) {
})
}
}

func Test_SDKVMToVMSSVM(t *testing.T) {
cases := []struct {
Name string
Subject compute.VirtualMachine
Expected *azure.VMSSVM
}{
{
Name: "minimal VM",
Subject: compute.VirtualMachine{
ID: to.StringPtr("vmID1"),
},
Expected: &azure.VMSSVM{
ID: "vmID1",
},
},
{
Name: "VM with zones",
Subject: compute.VirtualMachine{
ID: to.StringPtr("vmID2"),
VirtualMachineProperties: &compute.VirtualMachineProperties{
OsProfile: &compute.OSProfile{
ComputerName: to.StringPtr("vmwithzones"),
},
},
Zones: to.StringSlicePtr([]string{"zone0", "zone1"}),
},
Expected: &azure.VMSSVM{
ID: "vmID2",
Name: "vmwithzones",
State: "Creating",
AvailabilityZone: "zone0",
},
},
{
Name: "VM with storage",
Subject: compute.VirtualMachine{
ID: to.StringPtr("vmID3"),
VirtualMachineProperties: &compute.VirtualMachineProperties{
OsProfile: &compute.OSProfile{
ComputerName: to.StringPtr("vmwithstorage"),
},
StorageProfile: &compute.StorageProfile{
ImageReference: &compute.ImageReference{
ID: to.StringPtr("imageID"),
},
},
},
},
Expected: &azure.VMSSVM{
ID: "vmID3",
Image: infrav1.Image{
ID: to.StringPtr("imageID"),
Marketplace: &infrav1.AzureMarketplaceImage{},
},
Name: "vmwithstorage",
State: "Creating",
},
},
{
Name: "VM with provisioning state",
Subject: compute.VirtualMachine{
ID: to.StringPtr("vmID4"),
VirtualMachineProperties: &compute.VirtualMachineProperties{
OsProfile: &compute.OSProfile{
ComputerName: to.StringPtr("vmwithstate"),
},
ProvisioningState: to.StringPtr("Succeeded"),
},
},
Expected: &azure.VMSSVM{
ID: "vmID4",
Name: "vmwithstate",
State: "Succeeded",
},
},
}

for _, c := range cases {
c := c
t.Run(c.Name, func(t *testing.T) {
t.Parallel()
g := gomega.NewGomegaWithT(t)
subject := converters.SDKVMToVMSSVM(c.Subject)
g.Expect(subject).To(gomega.Equal(c.Expected))
})
}
}

func Test_GetOrchestrationMode(t *testing.T) {
g := gomega.NewGomegaWithT(t)

g.Expect(converters.GetOrchestrationMode(infrav1.FlexibleOrchestrationMode)).
To(gomega.Equal(compute.OrchestrationModeFlexible))
g.Expect(converters.GetOrchestrationMode(infrav1.UniformOrchestrationMode)).
To(gomega.Equal(compute.OrchestrationModeUniform))
g.Expect(converters.GetOrchestrationMode("invalid")).
To(gomega.Equal(compute.OrchestrationModeUniform))
}
1 change: 0 additions & 1 deletion azure/scope/clients.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ import (
"github.com/Azure/go-autorest/autorest"
"github.com/Azure/go-autorest/autorest/azure"
"github.com/Azure/go-autorest/autorest/azure/auth"

azureutil "sigs.k8s.io/cluster-api-provider-azure/util/azure"
)

Expand Down
17 changes: 11 additions & 6 deletions azure/scope/machinepool.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@ package scope
import (
"context"
"encoding/base64"
"fmt"
"strings"
"time"

autorestazure "github.com/Azure/go-autorest/autorest/azure"
"github.com/Azure/go-autorest/autorest/to"
"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -128,6 +131,7 @@ func (m *MachinePoolScope) ScaleSetSpec() azure.ScaleSetSpec {
SpotVMOptions: m.AzureMachinePool.Spec.Template.SpotVMOptions,
FailureDomains: m.MachinePool.Spec.FailureDomains,
TerminateNotificationTimeout: m.AzureMachinePool.Spec.Template.TerminateNotificationTimeout,
OrchestrationMode: m.AzureMachinePool.Spec.OrchestrationMode,
}
}

Expand Down Expand Up @@ -331,17 +335,18 @@ func (m *MachinePoolScope) applyAzureMachinePoolMachines(ctx context.Context) er
}

func (m *MachinePoolScope) createMachine(ctx context.Context, machine azure.VMSSVM) error {
if machine.InstanceID == "" {
return errors.New("machine.InstanceID must not be empty")
}
ctx, _, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.createMachine")
defer done()

if machine.Name == "" {
return errors.New("machine.Name must not be empty")
parsed, err := autorestazure.ParseResourceID(machine.ID)
if err != nil {
return errors.Wrap(err, fmt.Sprintf("failed to parse resource id %q", machine.ID))
}
instanceID := strings.ReplaceAll(parsed.ResourceName, "_", "-")

ampm := infrav1exp.AzureMachinePoolMachine{
ObjectMeta: metav1.ObjectMeta{
Name: m.AzureMachinePool.Name + "-" + machine.InstanceID,
Name: m.AzureMachinePool.Name + "-" + instanceID,
Namespace: m.AzureMachinePool.Namespace,
OwnerReferences: []metav1.OwnerReference{
{
Expand Down
88 changes: 45 additions & 43 deletions azure/services/scalesets/scalesets.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ import (
"sigs.k8s.io/cluster-api-provider-azure/azure/converters"
"sigs.k8s.io/cluster-api-provider-azure/azure/services/resourceskus"
"sigs.k8s.io/cluster-api-provider-azure/util/generators"
"sigs.k8s.io/cluster-api-provider-azure/util/slice"
"sigs.k8s.io/cluster-api-provider-azure/util/tele"
)

Expand Down Expand Up @@ -271,10 +270,23 @@ func (s *Service) patchVMSSIfNeeded(ctx context.Context, infraVMSS *azure.VMSS)
}

hasModelChanges := hasModelModifyingDifferences(infraVMSS, vmss)
if maxSurge > 0 && (hasModelChanges || !infraVMSS.HasEnoughLatestModelOrNotMixedModel()) {
var isFlex bool
for _, instance := range infraVMSS.Instances {
if instance.InstanceID == "" {
isFlex = true
break
}
}
var hasEnoughLatestModelOrNotMixedModel bool
if isFlex {
hasEnoughLatestModelOrNotMixedModel = true
} else {
hasEnoughLatestModelOrNotMixedModel = infraVMSS.HasEnoughLatestModelOrNotMixedModel()
}
if maxSurge > 0 && (hasModelChanges || !hasEnoughLatestModelOrNotMixedModel) {
// surge capacity with the intention of lowering during instance reconciliation
surge := spec.Capacity + int64(maxSurge)
log.V(4).Info("surging...", "surge", surge)
log.V(4).Info("surging...", "surge", surge, "hasModelChanges", hasModelChanges, "hasEnoughLatestModelOrNotMixedModel", hasEnoughLatestModelOrNotMixedModel)
patch.Sku.Capacity = to.Int64Ptr(surge)
}

Expand Down Expand Up @@ -372,52 +384,34 @@ func (s *Service) validateSpec(ctx context.Context) error {
}
}

// Validate DiagnosticProfile spec
if spec.DiagnosticsProfile.Boot != nil {
if spec.DiagnosticsProfile.Boot.StorageAccountType == infrav1.UserManagedDiagnosticsStorage {
if spec.DiagnosticsProfile.Boot.UserManaged == nil {
return azure.WithTerminalError(fmt.Errorf("userManaged must be specified when storageAccountType is '%s'", infrav1.UserManagedDiagnosticsStorage))
} else if spec.DiagnosticsProfile.Boot.UserManaged.StorageAccountURI == "" {
return azure.WithTerminalError(fmt.Errorf("storageAccountURI cannot be empty when storageAccountType is '%s'", infrav1.UserManagedDiagnosticsStorage))
}
}

possibleStorageAccountTypeValues := []string{
string(infrav1.DisabledDiagnosticsStorage),
string(infrav1.ManagedDiagnosticsStorage),
string(infrav1.UserManagedDiagnosticsStorage),
}

if !slice.Contains(possibleStorageAccountTypeValues, string(spec.DiagnosticsProfile.Boot.StorageAccountType)) {
return azure.WithTerminalError(fmt.Errorf("invalid storageAccountType: %s. Allowed values are %v",
spec.DiagnosticsProfile.Boot.StorageAccountType, possibleStorageAccountTypeValues))
}
}

// Checking if selected availability zones are available selected VM type in location
azsInLocation, err := s.resourceSKUCache.GetZonesWithVMSize(ctx, spec.Size, s.Scope.Location())
if err != nil {
return errors.Wrapf(err, "failed to get zones for VM type %s in location %s", spec.Size, s.Scope.Location())
}

for _, az := range spec.FailureDomains {
if !slice.Contains(azsInLocation, az) {
return azure.WithTerminalError(errors.Errorf("availability zone %s is not available for VM type %s in location %s", az, spec.Size, s.Scope.Location()))
}
}

return nil
}

func (s *Service) buildVMSSFromSpec(ctx context.Context, vmssSpec azure.ScaleSetSpec) (compute.VirtualMachineScaleSet, error) {
ctx, _, done := tele.StartSpanWithLogger(ctx, "scalesets.Service.buildVMSSFromSpec")
ctx, log, done := tele.StartSpanWithLogger(ctx, "scalesets.Service.buildVMSSFromSpec")
defer done()

sku, err := s.resourceSKUCache.Get(ctx, vmssSpec.Size, resourceskus.VirtualMachines)
if err != nil {
return compute.VirtualMachineScaleSet{}, errors.Wrapf(err, "failed to get find SKU %s in compute api", vmssSpec.Size)
}

var platformFaultDomainCount int32
if vmssSpec.FailureDomains == nil || len(vmssSpec.FailureDomains) == 0 {
// Checking if selected availability zones are available selected VM type in location
azsInLocation, err := s.resourceSKUCache.GetZonesWithVMSize(ctx, vmssSpec.Size, s.Scope.Location())
if err != nil {
log.Error(err, "failed to get the zones for location", "location", s.Scope.Location())
// From https://learn.microsoft.com/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-manage-fault-domains
// "Virtual machine scale sets are created with five fault domains by default in Azure regions with no zones."
platformFaultDomainCount = 5
} else {
platformFaultDomainCount = int32(len(azsInLocation))
}
} else {
platformFaultDomainCount = int32(len(vmssSpec.FailureDomains))
}

if vmssSpec.AcceleratedNetworking == nil {
// set accelerated networking to the capability of the VMSize
accelNet := sku.HasCapability(resourceskus.AcceleratedNetworking)
Expand Down Expand Up @@ -462,6 +456,7 @@ func (s *Service) buildVMSSFromSpec(ctx context.Context, vmssSpec azure.ScaleSet
return compute.VirtualMachineScaleSet{}, err
}

orchestrationMode := converters.GetOrchestrationMode(s.Scope.ScaleSetSpec().OrchestrationMode)
vmss := compute.VirtualMachineScaleSet{
Location: to.StringPtr(s.Scope.Location()),
Sku: &compute.Sku{
Expand All @@ -472,11 +467,9 @@ func (s *Service) buildVMSSFromSpec(ctx context.Context, vmssSpec azure.ScaleSet
Zones: to.StringSlicePtr(vmssSpec.FailureDomains),
Plan: s.generateImagePlan(ctx),
VirtualMachineScaleSetProperties: &compute.VirtualMachineScaleSetProperties{
SinglePlacementGroup: to.BoolPtr(false),
UpgradePolicy: &compute.UpgradePolicy{
Mode: compute.UpgradeModeManual,
},
Overprovision: to.BoolPtr(false),
OrchestrationMode: orchestrationMode,
PlatformFaultDomainCount: to.Int32Ptr(platformFaultDomainCount),
SinglePlacementGroup: to.BoolPtr(false),
VirtualMachineProfile: &compute.VirtualMachineScaleSetVMProfile{
OsProfile: osProfile,
StorageProfile: storageProfile,
Expand Down Expand Up @@ -517,6 +510,15 @@ func (s *Service) buildVMSSFromSpec(ctx context.Context, vmssSpec azure.ScaleSet
},
}

if orchestrationMode == compute.OrchestrationModeUniform {
vmss.VirtualMachineScaleSetProperties.Overprovision = to.BoolPtr(false)
vmss.VirtualMachineScaleSetProperties.UpgradePolicy = &compute.UpgradePolicy{Mode: compute.UpgradeModeManual}
}
if orchestrationMode == compute.OrchestrationModeFlexible {
vmss.VirtualMachineScaleSetProperties.VirtualMachineProfile.NetworkProfile.NetworkAPIVersion =
compute.NetworkAPIVersionTwoZeroTwoZeroHyphenMinusOneOneHyphenMinusZeroOne
}

// Assign Identity to VMSS
if vmssSpec.Identity == infrav1.VMIdentitySystemAssigned {
vmss.Identity = &compute.VirtualMachineScaleSetIdentity{
Expand Down
Loading

0 comments on commit 2a4710c

Please sign in to comment.