diff --git a/cluster-autoscaler/cloudprovider/aws/README.md b/cluster-autoscaler/cloudprovider/aws/README.md index 5b1a54b30cb8..cc528115d0ba 100644 --- a/cluster-autoscaler/cloudprovider/aws/README.md +++ b/cluster-autoscaler/cloudprovider/aws/README.md @@ -57,7 +57,9 @@ should be updated to restrict the resources/add conditionals: "Action": [ "autoscaling:SetDesiredCapacity", "autoscaling:TerminateInstanceInAutoScalingGroup", + "ec2:DescribeImages", "ec2:DescribeInstanceTypes", + "ec2:GetInstanceTypesFromInstanceRequirements", "eks:DescribeNodegroup" ], "Resource": ["*"] diff --git a/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go b/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go index 8515fd540223..242d29f69a02 100644 --- a/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go +++ b/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go @@ -56,8 +56,9 @@ type launchTemplate struct { } type mixedInstancesPolicy struct { - launchTemplate *launchTemplate - instanceTypesOverrides []string + launchTemplate *launchTemplate + instanceTypesOverrides []string + instanceRequirementsOverrides *autoscaling.InstanceRequirements } type asg struct { @@ -515,17 +516,31 @@ func (m *asgCache) buildAsgFromAWS(g *autoscaling.Group) (*asg, error) { } if g.MixedInstancesPolicy != nil { - getInstanceTypes := func(data []*autoscaling.LaunchTemplateOverrides) []string { - res := make([]string, len(data)) - for i := 0; i < len(data); i++ { - res[i] = aws.StringValue(data[i].InstanceType) + getInstanceTypes := func(overrides []*autoscaling.LaunchTemplateOverrides) []string { + res := []string{} + for _, override := range overrides { + if override.InstanceType != nil { + res = append(res, *override.InstanceType) + } } return res } + getInstanceTypeRequirements := func(overrides []*autoscaling.LaunchTemplateOverrides) *autoscaling.InstanceRequirements { + if len(overrides) == 1 && overrides[0].InstanceRequirements != nil { + return overrides[0].InstanceRequirements + } + return nil + } + asg.MixedInstancesPolicy = &mixedInstancesPolicy{ - launchTemplate: buildLaunchTemplateFromSpec(g.MixedInstancesPolicy.LaunchTemplate.LaunchTemplateSpecification), - instanceTypesOverrides: getInstanceTypes(g.MixedInstancesPolicy.LaunchTemplate.Overrides), + launchTemplate: buildLaunchTemplateFromSpec(g.MixedInstancesPolicy.LaunchTemplate.LaunchTemplateSpecification), + instanceTypesOverrides: getInstanceTypes(g.MixedInstancesPolicy.LaunchTemplate.Overrides), + instanceRequirementsOverrides: getInstanceTypeRequirements(g.MixedInstancesPolicy.LaunchTemplate.Overrides), + } + + if len(asg.MixedInstancesPolicy.instanceTypesOverrides) != 0 && asg.MixedInstancesPolicy.instanceRequirementsOverrides != nil { + return nil, fmt.Errorf("invalid setup of both instance type and instance requirements overrides configured") } } diff --git a/cluster-autoscaler/cloudprovider/aws/aws_manager.go b/cluster-autoscaler/cloudprovider/aws/aws_manager.go index 36e03cdd43a1..c8af57f30a55 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_manager.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_manager.go @@ -335,6 +335,7 @@ func (m *AwsManager) getAsgTemplate(asg *asg) (*asgTemplate, error) { Tags: asg.Tags, }, nil } + return nil, fmt.Errorf("ASG %q uses the unknown EC2 instance type %q", asg.Name, instanceTypeName) } @@ -404,6 +405,10 @@ func (m *AwsManager) buildNodeFromTemplate(asg *asg, template *asgTemplate) (*ap node.Status.Capacity[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(template.InstanceType.GPU, resource.DecimalSI) node.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(template.InstanceType.MemoryMb*1024*1024, resource.DecimalSI) + if err := m.updateCapacityWithRequirementsOverrides(&node.Status.Capacity, asg.MixedInstancesPolicy); err != nil { + return nil, err + } + resourcesFromTags := extractAllocatableResourcesFromAsg(template.Tags) for resourceName, val := range resourcesFromTags { node.Status.Capacity[apiv1.ResourceName(resourceName)] = *val @@ -464,6 +469,58 @@ func joinNodeLabelsChoosingUserValuesOverAPIValues(extractedLabels map[string]st return result } +func (m *AwsManager) updateCapacityWithRequirementsOverrides(capacity *apiv1.ResourceList, policy *mixedInstancesPolicy) error { + if policy == nil { + return nil + } + + instanceRequirements, err := m.getInstanceRequirementsFromMixedInstancesPolicy(policy) + if err != nil { + return fmt.Errorf("error while building node template using instance requirements: (%s)", err) + } + + if instanceRequirements.VCpuCount != nil && instanceRequirements.VCpuCount.Min != nil { + (*capacity)[apiv1.ResourceCPU] = *resource.NewQuantity(*instanceRequirements.VCpuCount.Min, resource.DecimalSI) + } + + if instanceRequirements.MemoryMiB != nil && instanceRequirements.MemoryMiB.Min != nil { + (*capacity)[apiv1.ResourceMemory] = *resource.NewQuantity(*instanceRequirements.MemoryMiB.Min*1024*1024, resource.DecimalSI) + } + + for _, manufacturer := range instanceRequirements.AcceleratorManufacturers { + if *manufacturer == autoscaling.AcceleratorManufacturerNvidia { + for _, acceleratorType := range instanceRequirements.AcceleratorTypes { + if *acceleratorType == autoscaling.AcceleratorTypeGpu { + (*capacity)[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(*instanceRequirements.AcceleratorCount.Min, resource.DecimalSI) + } + } + } + } + + return nil +} + +func (m *AwsManager) getInstanceRequirementsFromMixedInstancesPolicy(policy *mixedInstancesPolicy) (*ec2.InstanceRequirements, error) { + instanceRequirements := &ec2.InstanceRequirements{} + if policy.instanceRequirementsOverrides != nil { + var err error + instanceRequirements, err = m.awsService.getEC2RequirementsFromAutoscaling(policy.instanceRequirementsOverrides) + if err != nil { + return nil, err + } + } else if policy.launchTemplate != nil { + templateData, err := m.awsService.getLaunchTemplateData(policy.launchTemplate.name, policy.launchTemplate.version) + if err != nil { + return nil, err + } + + if templateData.InstanceRequirements != nil { + instanceRequirements = templateData.InstanceRequirements + } + } + return instanceRequirements, nil +} + func buildGenericLabels(template *asgTemplate, nodeName string) map[string]string { result := make(map[string]string) diff --git a/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go b/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go index a9783b498cd8..cd7689cb39f1 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go @@ -41,6 +41,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/service/autoscaling" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/service/ec2" "k8s.io/autoscaler/cluster-autoscaler/config" + "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" provider_aws "k8s.io/legacy-cloud-providers/aws" ) @@ -468,6 +469,37 @@ func TestBuildNodeFromTemplate(t *testing.T) { observedTaints := observedNode.Spec.Taints assert.Equal(t, 1, len(observedTaints)) assert.Equal(t, gpuTaint, observedTaints[0]) + + // Node with instance requirements + asg.MixedInstancesPolicy = &mixedInstancesPolicy{ + instanceRequirementsOverrides: &autoscaling.InstanceRequirements{ + VCpuCount: &autoscaling.VCpuCountRequest{ + Min: aws.Int64(4), + Max: aws.Int64(8), + }, + MemoryMiB: &autoscaling.MemoryMiBRequest{ + Min: aws.Int64(4), + Max: aws.Int64(8), + }, + AcceleratorTypes: []*string{aws.String(autoscaling.AcceleratorTypeGpu)}, + AcceleratorManufacturers: []*string{aws.String(autoscaling.AcceleratorManufacturerNvidia)}, + AcceleratorCount: &autoscaling.AcceleratorCountRequest{ + Min: aws.Int64(4), + Max: aws.Int64(8), + }, + }, + } + observedNode, observedErr = awsManager.buildNodeFromTemplate(asg, &asgTemplate{ + InstanceType: c5Instance, + }) + + assert.NoError(t, observedErr) + observedMemoryRequirement := observedNode.Status.Capacity[apiv1.ResourceMemory] + assert.Equal(t, int64(4*1024*1024), observedMemoryRequirement.Value()) + observedVCpuRequirement := observedNode.Status.Capacity[apiv1.ResourceCPU] + assert.Equal(t, int64(4), observedVCpuRequirement.Value()) + observedGpuRequirement := observedNode.Status.Capacity[gpu.ResourceNvidiaGPU] + assert.Equal(t, int64(4), observedGpuRequirement.Value()) } func TestExtractLabelsFromAsg(t *testing.T) { diff --git a/cluster-autoscaler/cloudprovider/aws/aws_wrapper.go b/cluster-autoscaler/cloudprovider/aws/aws_wrapper.go index 6af63ef9ea37..e43bda93533f 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_wrapper.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_wrapper.go @@ -40,7 +40,9 @@ type autoScalingI interface { // ec2I is the interface abstracting specific API calls of the EC2 service provided by AWS SDK for use in CA type ec2I interface { + DescribeImages(input *ec2.DescribeImagesInput) (*ec2.DescribeImagesOutput, error) DescribeLaunchTemplateVersions(input *ec2.DescribeLaunchTemplateVersionsInput) (*ec2.DescribeLaunchTemplateVersionsOutput, error) + GetInstanceTypesFromInstanceRequirementsPages(input *ec2.GetInstanceTypesFromInstanceRequirementsInput, fn func(*ec2.GetInstanceTypesFromInstanceRequirementsOutput, bool) bool) error } // eksI is the interface that represents a specific aspect of EKS (Elastic Kubernetes Service) which is provided by AWS SDK for use in CA @@ -215,36 +217,457 @@ func (m *awsWrapper) getAutoscalingGroupsByTags(tags map[string]string) ([]*auto } func (m *awsWrapper) getInstanceTypeByLaunchTemplate(launchTemplate *launchTemplate) (string, error) { - params := &ec2.DescribeLaunchTemplateVersionsInput{ - LaunchTemplateName: aws.String(launchTemplate.name), - Versions: []*string{aws.String(launchTemplate.version)}, + templateData, err := m.getLaunchTemplateData(launchTemplate.name, launchTemplate.version) + if err != nil { + return "", err + } + + instanceType := "" + if templateData.InstanceType != nil { + instanceType = *templateData.InstanceType + } else if templateData.InstanceRequirements != nil && templateData.ImageId != nil { + requirementsRequest, err := m.getRequirementsRequestFromEC2(templateData.InstanceRequirements) + if err != nil { + return "", fmt.Errorf("unable to get instance requirements request") + } + instanceType, err = m.getInstanceTypeFromInstanceRequirements(*templateData.ImageId, requirementsRequest) + if err != nil { + return "", err + } + } + if len(instanceType) == 0 { + return "", fmt.Errorf("unable to find instance type using launch template") + } + + return instanceType, nil +} + +func (m *awsWrapper) getInstanceTypeFromRequirementsOverrides(policy *mixedInstancesPolicy) (string, error) { + if policy.launchTemplate == nil { + return "", fmt.Errorf("no launch template found for mixed instances policy") + } + + templateData, err := m.getLaunchTemplateData(policy.launchTemplate.name, policy.launchTemplate.version) + if err != nil { + return "", err + } + + requirements, err := m.getRequirementsRequestFromAutoscaling(policy.instanceRequirementsOverrides) + if err != nil { + return "", err + } + instanceType, err := m.getInstanceTypeFromInstanceRequirements(*templateData.ImageId, requirements) + if err != nil { + return "", err + } + + return instanceType, nil +} + +func (m *awsWrapper) getLaunchTemplateData(templateName string, templateVersion string) (*ec2.ResponseLaunchTemplateData, error) { + describeTemplateInput := &ec2.DescribeLaunchTemplateVersionsInput{ + LaunchTemplateName: aws.String(templateName), + Versions: []*string{aws.String(templateVersion)}, } start := time.Now() - describeData, err := m.DescribeLaunchTemplateVersions(params) + describeData, err := m.DescribeLaunchTemplateVersions(describeTemplateInput) observeAWSRequest("DescribeLaunchTemplateVersions", err, start) + if err != nil { + return nil, err + } + if describeData == nil || len(describeData.LaunchTemplateVersions) == 0 { + return nil, fmt.Errorf("unable to find template versions for launch template %s", templateName) + } + if describeData.LaunchTemplateVersions[0].LaunchTemplateData == nil { + return nil, fmt.Errorf("no data found for launch template %s, version %s", templateName, templateVersion) + } + + return describeData.LaunchTemplateVersions[0].LaunchTemplateData, nil +} + +func (m *awsWrapper) getInstanceTypeFromInstanceRequirements(imageId string, requirementsRequest *ec2.InstanceRequirementsRequest) (string, error) { + describeImagesInput := &ec2.DescribeImagesInput{ + ImageIds: []*string{aws.String(imageId)}, + } + + start := time.Now() + describeImagesOutput, err := m.DescribeImages(describeImagesInput) + observeAWSRequest("DescribeImages", err, start) if err != nil { return "", err } - if len(describeData.LaunchTemplateVersions) == 0 { - return "", fmt.Errorf("unable to find template versions") + imageArchitectures := []*string{} + imageVirtualizationTypes := []*string{} + for _, image := range describeImagesOutput.Images { + imageArchitectures = append(imageArchitectures, image.Architecture) + imageVirtualizationTypes = append(imageVirtualizationTypes, image.VirtualizationType) + } + + requirementsInput := &ec2.GetInstanceTypesFromInstanceRequirementsInput{ + ArchitectureTypes: imageArchitectures, + InstanceRequirements: requirementsRequest, + VirtualizationTypes: imageVirtualizationTypes, + } + + start = time.Now() + instanceTypes := []string{} + err = m.GetInstanceTypesFromInstanceRequirementsPages(requirementsInput, func(page *ec2.GetInstanceTypesFromInstanceRequirementsOutput, isLastPage bool) bool { + for _, instanceType := range page.InstanceTypes { + instanceTypes = append(instanceTypes, *instanceType.InstanceType) + } + return !isLastPage + }) + observeAWSRequest("GetInstanceTypesFromInstanceRequirements", err, start) + if err != nil { + return "", fmt.Errorf("unable to get instance types from requirements") + } + + return instanceTypes[0], nil +} + +func (m *awsWrapper) getRequirementsRequestFromAutoscaling(requirements *autoscaling.InstanceRequirements) (*ec2.InstanceRequirementsRequest, error) { + requirementsRequest := ec2.InstanceRequirementsRequest{} + + // required instance requirements + requirementsRequest.MemoryMiB = &ec2.MemoryMiBRequest{ + Min: requirements.MemoryMiB.Min, + Max: requirements.MemoryMiB.Max, + } + + requirementsRequest.VCpuCount = &ec2.VCpuCountRangeRequest{ + Min: requirements.VCpuCount.Min, + Max: requirements.VCpuCount.Max, + } + + // optional instance requirements + if requirements.AcceleratorCount != nil { + requirementsRequest.AcceleratorCount = &ec2.AcceleratorCountRequest{ + Min: requirements.AcceleratorCount.Min, + Max: requirements.AcceleratorCount.Max, + } + } + + if requirements.AcceleratorManufacturers != nil { + requirementsRequest.AcceleratorManufacturers = requirements.AcceleratorManufacturers + } + + if requirements.AcceleratorNames != nil { + requirementsRequest.AcceleratorNames = requirements.AcceleratorNames + } + + if requirements.AcceleratorTotalMemoryMiB != nil { + requirementsRequest.AcceleratorTotalMemoryMiB = &ec2.AcceleratorTotalMemoryMiBRequest{ + Min: requirements.AcceleratorTotalMemoryMiB.Min, + Max: requirements.AcceleratorTotalMemoryMiB.Max, + } + } + + if requirements.AcceleratorTypes != nil { + requirementsRequest.AcceleratorTypes = requirements.AcceleratorTypes + } + + if requirements.BareMetal != nil { + requirementsRequest.BareMetal = requirements.BareMetal + } + + if requirements.BaselineEbsBandwidthMbps != nil { + requirementsRequest.BaselineEbsBandwidthMbps = &ec2.BaselineEbsBandwidthMbpsRequest{ + Min: requirements.BaselineEbsBandwidthMbps.Min, + Max: requirements.BaselineEbsBandwidthMbps.Max, + } + } + + if requirements.BurstablePerformance != nil { + requirementsRequest.BurstablePerformance = requirements.BurstablePerformance + } + + if requirements.CpuManufacturers != nil { + requirementsRequest.CpuManufacturers = requirements.CpuManufacturers + } + + if requirements.ExcludedInstanceTypes != nil { + requirementsRequest.ExcludedInstanceTypes = requirements.ExcludedInstanceTypes + } + + if requirements.InstanceGenerations != nil { + requirementsRequest.InstanceGenerations = requirements.InstanceGenerations + } + + if requirements.LocalStorage != nil { + requirementsRequest.LocalStorage = requirements.LocalStorage + } + + if requirements.LocalStorageTypes != nil { + requirementsRequest.LocalStorageTypes = requirements.LocalStorageTypes + } + + if requirements.MemoryGiBPerVCpu != nil { + requirementsRequest.MemoryGiBPerVCpu = &ec2.MemoryGiBPerVCpuRequest{ + Min: requirements.MemoryGiBPerVCpu.Min, + Max: requirements.MemoryGiBPerVCpu.Max, + } + } + + if requirements.NetworkInterfaceCount != nil { + requirementsRequest.NetworkInterfaceCount = &ec2.NetworkInterfaceCountRequest{ + Min: requirements.NetworkInterfaceCount.Min, + Max: requirements.NetworkInterfaceCount.Max, + } + } + + if requirements.OnDemandMaxPricePercentageOverLowestPrice != nil { + requirementsRequest.OnDemandMaxPricePercentageOverLowestPrice = requirements.OnDemandMaxPricePercentageOverLowestPrice + } + + if requirements.RequireHibernateSupport != nil { + requirementsRequest.RequireHibernateSupport = requirements.RequireHibernateSupport + } + + if requirements.SpotMaxPricePercentageOverLowestPrice != nil { + requirementsRequest.SpotMaxPricePercentageOverLowestPrice = requirements.SpotMaxPricePercentageOverLowestPrice + } + + if requirements.TotalLocalStorageGB != nil { + requirementsRequest.TotalLocalStorageGB = &ec2.TotalLocalStorageGBRequest{ + Min: requirements.TotalLocalStorageGB.Min, + Max: requirements.TotalLocalStorageGB.Max, + } + } + + return &requirementsRequest, nil +} + +func (m *awsWrapper) getRequirementsRequestFromEC2(requirements *ec2.InstanceRequirements) (*ec2.InstanceRequirementsRequest, error) { + requirementsRequest := ec2.InstanceRequirementsRequest{} + + // required instance requirements + requirementsRequest.MemoryMiB = &ec2.MemoryMiBRequest{ + Min: requirements.MemoryMiB.Min, + Max: requirements.MemoryMiB.Max, + } + + requirementsRequest.VCpuCount = &ec2.VCpuCountRangeRequest{ + Min: requirements.VCpuCount.Min, + Max: requirements.VCpuCount.Max, + } + + // optional instance requirements + if requirements.AcceleratorCount != nil { + requirementsRequest.AcceleratorCount = &ec2.AcceleratorCountRequest{ + Min: requirements.AcceleratorCount.Min, + Max: requirements.AcceleratorCount.Max, + } + } + + if requirements.AcceleratorManufacturers != nil { + requirementsRequest.AcceleratorManufacturers = requirements.AcceleratorManufacturers + } + + if requirements.AcceleratorNames != nil { + requirementsRequest.AcceleratorNames = requirements.AcceleratorNames + } + + if requirements.AcceleratorTotalMemoryMiB != nil { + requirementsRequest.AcceleratorTotalMemoryMiB = &ec2.AcceleratorTotalMemoryMiBRequest{ + Min: requirements.AcceleratorTotalMemoryMiB.Min, + Max: requirements.AcceleratorTotalMemoryMiB.Max, + } + } + + if requirements.AcceleratorTypes != nil { + requirementsRequest.AcceleratorTypes = requirements.AcceleratorTypes } - lt := describeData.LaunchTemplateVersions[0] - instanceType := lt.LaunchTemplateData.InstanceType + if requirements.BareMetal != nil { + requirementsRequest.BareMetal = requirements.BareMetal + } - if instanceType == nil { - return "", fmt.Errorf("unable to find instance type within launch template") + if requirements.BaselineEbsBandwidthMbps != nil { + requirementsRequest.BaselineEbsBandwidthMbps = &ec2.BaselineEbsBandwidthMbpsRequest{ + Min: requirements.BaselineEbsBandwidthMbps.Min, + Max: requirements.BaselineEbsBandwidthMbps.Max, + } } - return aws.StringValue(instanceType), nil + if requirements.BurstablePerformance != nil { + requirementsRequest.BurstablePerformance = requirements.BurstablePerformance + } + + if requirements.CpuManufacturers != nil { + requirementsRequest.CpuManufacturers = requirements.CpuManufacturers + } + + if requirements.ExcludedInstanceTypes != nil { + requirementsRequest.ExcludedInstanceTypes = requirements.ExcludedInstanceTypes + } + + if requirements.InstanceGenerations != nil { + requirementsRequest.InstanceGenerations = requirements.InstanceGenerations + } + + if requirements.LocalStorage != nil { + requirementsRequest.LocalStorage = requirements.LocalStorage + } + + if requirements.LocalStorageTypes != nil { + requirementsRequest.LocalStorageTypes = requirements.LocalStorageTypes + } + + if requirements.MemoryGiBPerVCpu != nil { + requirementsRequest.MemoryGiBPerVCpu = &ec2.MemoryGiBPerVCpuRequest{ + Min: requirements.MemoryGiBPerVCpu.Min, + Max: requirements.MemoryGiBPerVCpu.Max, + } + } + + if requirements.NetworkInterfaceCount != nil { + requirementsRequest.NetworkInterfaceCount = &ec2.NetworkInterfaceCountRequest{ + Min: requirements.NetworkInterfaceCount.Min, + Max: requirements.NetworkInterfaceCount.Max, + } + } + + if requirements.OnDemandMaxPricePercentageOverLowestPrice != nil { + requirementsRequest.OnDemandMaxPricePercentageOverLowestPrice = requirements.OnDemandMaxPricePercentageOverLowestPrice + } + + if requirements.RequireHibernateSupport != nil { + requirementsRequest.RequireHibernateSupport = requirements.RequireHibernateSupport + } + + if requirements.SpotMaxPricePercentageOverLowestPrice != nil { + requirementsRequest.SpotMaxPricePercentageOverLowestPrice = requirements.SpotMaxPricePercentageOverLowestPrice + } + + if requirements.TotalLocalStorageGB != nil { + requirementsRequest.TotalLocalStorageGB = &ec2.TotalLocalStorageGBRequest{ + Min: requirements.TotalLocalStorageGB.Min, + Max: requirements.TotalLocalStorageGB.Max, + } + } + + return &requirementsRequest, nil +} + +func (m *awsWrapper) getEC2RequirementsFromAutoscaling(autoscalingRequirements *autoscaling.InstanceRequirements) (*ec2.InstanceRequirements, error) { + ec2Requirements := ec2.InstanceRequirements{} + + // required instance requirements + ec2Requirements.MemoryMiB = &ec2.MemoryMiB{ + Min: autoscalingRequirements.MemoryMiB.Min, + Max: autoscalingRequirements.MemoryMiB.Max, + } + + ec2Requirements.VCpuCount = &ec2.VCpuCountRange{ + Min: autoscalingRequirements.VCpuCount.Min, + Max: autoscalingRequirements.VCpuCount.Max, + } + + // optional instance requirements + if autoscalingRequirements.AcceleratorCount != nil { + ec2Requirements.AcceleratorCount = &ec2.AcceleratorCount{ + Min: autoscalingRequirements.AcceleratorCount.Min, + Max: autoscalingRequirements.AcceleratorCount.Max, + } + } + + if autoscalingRequirements.AcceleratorManufacturers != nil { + ec2Requirements.AcceleratorManufacturers = autoscalingRequirements.AcceleratorManufacturers + } + + if autoscalingRequirements.AcceleratorNames != nil { + ec2Requirements.AcceleratorNames = autoscalingRequirements.AcceleratorNames + } + + if autoscalingRequirements.AcceleratorTotalMemoryMiB != nil { + ec2Requirements.AcceleratorTotalMemoryMiB = &ec2.AcceleratorTotalMemoryMiB{ + Min: autoscalingRequirements.AcceleratorTotalMemoryMiB.Min, + Max: autoscalingRequirements.AcceleratorTotalMemoryMiB.Max, + } + } + + if autoscalingRequirements.AcceleratorTypes != nil { + ec2Requirements.AcceleratorTypes = autoscalingRequirements.AcceleratorTypes + } + + if autoscalingRequirements.BareMetal != nil { + ec2Requirements.BareMetal = autoscalingRequirements.BareMetal + } + + if autoscalingRequirements.BaselineEbsBandwidthMbps != nil { + ec2Requirements.BaselineEbsBandwidthMbps = &ec2.BaselineEbsBandwidthMbps{ + Min: autoscalingRequirements.BaselineEbsBandwidthMbps.Min, + Max: autoscalingRequirements.BaselineEbsBandwidthMbps.Max, + } + } + + if autoscalingRequirements.BurstablePerformance != nil { + ec2Requirements.BurstablePerformance = autoscalingRequirements.BurstablePerformance + } + + if autoscalingRequirements.CpuManufacturers != nil { + ec2Requirements.CpuManufacturers = autoscalingRequirements.CpuManufacturers + } + + if autoscalingRequirements.ExcludedInstanceTypes != nil { + ec2Requirements.ExcludedInstanceTypes = autoscalingRequirements.ExcludedInstanceTypes + } + + if autoscalingRequirements.InstanceGenerations != nil { + ec2Requirements.InstanceGenerations = autoscalingRequirements.InstanceGenerations + } + + if autoscalingRequirements.LocalStorage != nil { + ec2Requirements.LocalStorage = autoscalingRequirements.LocalStorage + } + + if autoscalingRequirements.LocalStorageTypes != nil { + ec2Requirements.LocalStorageTypes = autoscalingRequirements.LocalStorageTypes + } + + if autoscalingRequirements.MemoryGiBPerVCpu != nil { + ec2Requirements.MemoryGiBPerVCpu = &ec2.MemoryGiBPerVCpu{ + Min: autoscalingRequirements.MemoryGiBPerVCpu.Min, + Max: autoscalingRequirements.MemoryGiBPerVCpu.Max, + } + } + + if autoscalingRequirements.NetworkInterfaceCount != nil { + ec2Requirements.NetworkInterfaceCount = &ec2.NetworkInterfaceCount{ + Min: autoscalingRequirements.NetworkInterfaceCount.Min, + Max: autoscalingRequirements.NetworkInterfaceCount.Max, + } + } + + if autoscalingRequirements.OnDemandMaxPricePercentageOverLowestPrice != nil { + ec2Requirements.OnDemandMaxPricePercentageOverLowestPrice = autoscalingRequirements.OnDemandMaxPricePercentageOverLowestPrice + } + + if autoscalingRequirements.RequireHibernateSupport != nil { + ec2Requirements.RequireHibernateSupport = autoscalingRequirements.RequireHibernateSupport + } + + if autoscalingRequirements.SpotMaxPricePercentageOverLowestPrice != nil { + ec2Requirements.SpotMaxPricePercentageOverLowestPrice = autoscalingRequirements.SpotMaxPricePercentageOverLowestPrice + } + + if autoscalingRequirements.TotalLocalStorageGB != nil { + ec2Requirements.TotalLocalStorageGB = &ec2.TotalLocalStorageGB{ + Min: autoscalingRequirements.TotalLocalStorageGB.Min, + Max: autoscalingRequirements.TotalLocalStorageGB.Max, + } + } + + return &ec2Requirements, nil } func (m *awsWrapper) getInstanceTypesForAsgs(asgs []*asg) (map[string]string, error) { results := map[string]string{} launchConfigsToQuery := map[string]string{} launchTemplatesToQuery := map[string]*launchTemplate{} + mixedInstancesPoliciesToQuery := map[string]*mixedInstancesPolicy{} for _, asg := range asgs { name := asg.AwsRef.Name @@ -255,6 +678,8 @@ func (m *awsWrapper) getInstanceTypesForAsgs(asgs []*asg) (map[string]string, er } else if asg.MixedInstancesPolicy != nil { if len(asg.MixedInstancesPolicy.instanceTypesOverrides) > 0 { results[name] = asg.MixedInstancesPolicy.instanceTypesOverrides[0] + } else if asg.MixedInstancesPolicy.instanceRequirementsOverrides != nil { + mixedInstancesPoliciesToQuery[name] = asg.MixedInstancesPolicy } else { launchTemplatesToQuery[name] = asg.MixedInstancesPolicy.launchTemplate } @@ -291,6 +716,17 @@ func (m *awsWrapper) getInstanceTypesForAsgs(asgs []*asg) (map[string]string, er } klog.V(4).Infof("Successfully queried %d launch templates", len(launchTemplatesToQuery)) + // Have to match Instance Requirements one-at-a-time, since they are configured per asg and can't be queried in bulk + for asgName, policy := range mixedInstancesPoliciesToQuery { + instanceType, err := m.getInstanceTypeFromRequirementsOverrides(policy) + if err != nil { + klog.Errorf("Failed to query instance requirements for ASG %s: %v", asgName, err) + continue + } + results[asgName] = instanceType + } + klog.V(4).Infof("Successfully queried instance requirements for %d ASGs", len(mixedInstancesPoliciesToQuery)) + return results, nil } diff --git a/cluster-autoscaler/cloudprovider/aws/aws_wrapper_test.go b/cluster-autoscaler/cloudprovider/aws/aws_wrapper_test.go index 865fb242f1e6..1fd8a764986d 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_wrapper_test.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_wrapper_test.go @@ -17,6 +17,7 @@ limitations under the License. package aws import ( + "errors" "fmt" "os" "strconv" @@ -64,11 +65,21 @@ type ec2Mock struct { mock.Mock } +func (e *ec2Mock) DescribeImages(input *ec2.DescribeImagesInput) (*ec2.DescribeImagesOutput, error) { + args := e.Called(input) + return args.Get(0).(*ec2.DescribeImagesOutput), nil +} + func (e *ec2Mock) DescribeLaunchTemplateVersions(i *ec2.DescribeLaunchTemplateVersionsInput) (*ec2.DescribeLaunchTemplateVersionsOutput, error) { args := e.Called(i) return args.Get(0).(*ec2.DescribeLaunchTemplateVersionsOutput), nil } +func (e *ec2Mock) GetInstanceTypesFromInstanceRequirementsPages(input *ec2.GetInstanceTypesFromInstanceRequirementsInput, fn func(*ec2.GetInstanceTypesFromInstanceRequirementsOutput, bool) bool) error { + args := e.Called(input, fn) + return args.Error(0) +} + type eksMock struct { mock.Mock } @@ -386,6 +397,232 @@ func TestGetInstanceTypesForAsgs(t *testing.T) { } } +func TestGetInstanceTypesFromInstanceRequirementsOverrides(t *testing.T) { + mixedInstancesPolicy := &mixedInstancesPolicy{ + launchTemplate: &launchTemplate{ + name: "launchTemplateName", + version: "1", + }, + instanceRequirementsOverrides: &autoscaling.InstanceRequirements{ + VCpuCount: &autoscaling.VCpuCountRequest{ + Min: aws.Int64(4), + Max: aws.Int64(8), + }, + MemoryMiB: &autoscaling.MemoryMiBRequest{ + Min: aws.Int64(4), + Max: aws.Int64(8), + }, + AcceleratorTypes: []*string{aws.String(autoscaling.AcceleratorTypeGpu)}, + AcceleratorManufacturers: []*string{aws.String(autoscaling.AcceleratorManufacturerNvidia)}, + AcceleratorCount: &autoscaling.AcceleratorCountRequest{ + Min: aws.Int64(4), + Max: aws.Int64(8), + }, + }, + } + + e := &ec2Mock{} + awsWrapper := &awsWrapper{ + autoScalingI: nil, + ec2I: e, + eksI: nil, + } + + e.On("DescribeLaunchTemplateVersions", &ec2.DescribeLaunchTemplateVersionsInput{ + LaunchTemplateName: aws.String("launchTemplateName"), + Versions: []*string{aws.String("1")}, + }).Return(&ec2.DescribeLaunchTemplateVersionsOutput{ + LaunchTemplateVersions: []*ec2.LaunchTemplateVersion{ + { + LaunchTemplateData: &ec2.ResponseLaunchTemplateData{ + ImageId: aws.String("123"), + }, + }, + }, + }) + + e.On("DescribeImages", &ec2.DescribeImagesInput{ + ImageIds: []*string{aws.String("123")}, + }).Return(&ec2.DescribeImagesOutput{ + Images: []*ec2.Image{ + { + Architecture: aws.String("x86_64"), + VirtualizationType: aws.String("xen"), + }, + }, + }) + + requirements, err := awsWrapper.getRequirementsRequestFromAutoscaling(mixedInstancesPolicy.instanceRequirementsOverrides) + assert.NoError(t, err) + e.On("GetInstanceTypesFromInstanceRequirementsPages", + &ec2.GetInstanceTypesFromInstanceRequirementsInput{ + ArchitectureTypes: []*string{aws.String("x86_64")}, + InstanceRequirements: requirements, + VirtualizationTypes: []*string{aws.String("xen")}, + }, + mock.AnythingOfType("func(*ec2.GetInstanceTypesFromInstanceRequirementsOutput, bool) bool"), + ).Run(func(args mock.Arguments) { + fn := args.Get(1).(func(*ec2.GetInstanceTypesFromInstanceRequirementsOutput, bool) bool) + fn(&ec2.GetInstanceTypesFromInstanceRequirementsOutput{ + InstanceTypes: []*ec2.InstanceTypeInfoFromInstanceRequirements{ + { + InstanceType: aws.String("g4dn.xlarge"), + }, + }, + }, false) + }).Return(nil) + + result, err := awsWrapper.getInstanceTypeFromRequirementsOverrides(mixedInstancesPolicy) + assert.NoError(t, err) + assert.Equal(t, "g4dn.xlarge", result) +} + +func TestGetInstanceTypesFromInstanceRequirementsInLaunchTemplate(t *testing.T) { + launchTemplate := &launchTemplate{ + name: "launchTemplateName", + version: "1", + } + + e := &ec2Mock{} + awsWrapper := &awsWrapper{ + autoScalingI: nil, + ec2I: e, + eksI: nil, + } + + instanceRequirements := &ec2.InstanceRequirements{ + VCpuCount: &ec2.VCpuCountRange{ + Min: aws.Int64(4), + Max: aws.Int64(8), + }, + MemoryMiB: &ec2.MemoryMiB{ + Min: aws.Int64(4), + Max: aws.Int64(8), + }, + AcceleratorTypes: []*string{aws.String(autoscaling.AcceleratorTypeGpu)}, + AcceleratorManufacturers: []*string{aws.String(autoscaling.AcceleratorManufacturerNvidia)}, + AcceleratorCount: &ec2.AcceleratorCount{ + Min: aws.Int64(4), + Max: aws.Int64(8), + }, + } + + e.On("DescribeLaunchTemplateVersions", &ec2.DescribeLaunchTemplateVersionsInput{ + LaunchTemplateName: aws.String("launchTemplateName"), + Versions: []*string{aws.String("1")}, + }).Return(&ec2.DescribeLaunchTemplateVersionsOutput{ + LaunchTemplateVersions: []*ec2.LaunchTemplateVersion{ + { + LaunchTemplateData: &ec2.ResponseLaunchTemplateData{ + ImageId: aws.String("123"), + InstanceRequirements: instanceRequirements, + }, + }, + }, + }) + + e.On("DescribeImages", &ec2.DescribeImagesInput{ + ImageIds: []*string{aws.String("123")}, + }).Return(&ec2.DescribeImagesOutput{ + Images: []*ec2.Image{ + { + Architecture: aws.String("x86_64"), + VirtualizationType: aws.String("xen"), + }, + }, + }) + + requirements, err := awsWrapper.getRequirementsRequestFromEC2(instanceRequirements) + assert.NoError(t, err) + e.On("GetInstanceTypesFromInstanceRequirementsPages", + &ec2.GetInstanceTypesFromInstanceRequirementsInput{ + ArchitectureTypes: []*string{aws.String("x86_64")}, + InstanceRequirements: requirements, + VirtualizationTypes: []*string{aws.String("xen")}, + }, + mock.AnythingOfType("func(*ec2.GetInstanceTypesFromInstanceRequirementsOutput, bool) bool"), + ).Run(func(args mock.Arguments) { + fn := args.Get(1).(func(*ec2.GetInstanceTypesFromInstanceRequirementsOutput, bool) bool) + fn(&ec2.GetInstanceTypesFromInstanceRequirementsOutput{ + InstanceTypes: []*ec2.InstanceTypeInfoFromInstanceRequirements{ + { + InstanceType: aws.String("g4dn.xlarge"), + }, + }, + }, false) + }).Return(nil) + + result, err := awsWrapper.getInstanceTypeByLaunchTemplate(launchTemplate) + assert.NoError(t, err) + assert.Equal(t, "g4dn.xlarge", result) +} + +func TestGetLaunchTemplateData(t *testing.T) { + e := &ec2Mock{} + awsWrapper := &awsWrapper{ + ec2I: e, + } + + testCases := []struct { + testName string + describeTemplateData *ec2.DescribeLaunchTemplateVersionsOutput + expectedData *ec2.ResponseLaunchTemplateData + expectedErr error + }{ + { + "no launch template version found", + &ec2.DescribeLaunchTemplateVersionsOutput{ + LaunchTemplateVersions: []*ec2.LaunchTemplateVersion{}, + }, + nil, + errors.New("unable to find template versions for launch template launchTemplateName"), + }, + { + "no data found for launch template", + &ec2.DescribeLaunchTemplateVersionsOutput{ + LaunchTemplateVersions: []*ec2.LaunchTemplateVersion{ + { + LaunchTemplateName: aws.String("launchTemplateName"), + LaunchTemplateData: nil, + }, + }, + }, + nil, + errors.New("no data found for launch template launchTemplateName, version 1"), + }, + { + "launch template data found successfully", + &ec2.DescribeLaunchTemplateVersionsOutput{ + LaunchTemplateVersions: []*ec2.LaunchTemplateVersion{ + { + LaunchTemplateName: aws.String("launchTemplateName"), + LaunchTemplateData: &ec2.ResponseLaunchTemplateData{ + ImageId: aws.String("123"), + }, + }, + }, + }, + &ec2.ResponseLaunchTemplateData{ + ImageId: aws.String("123"), + }, + nil, + }, + } + + describeTemplateInput := &ec2.DescribeLaunchTemplateVersionsInput{ + LaunchTemplateName: aws.String("launchTemplateName"), + Versions: []*string{aws.String("1")}, + } + + for _, testCase := range testCases { + e.On("DescribeLaunchTemplateVersions", describeTemplateInput).Return(testCase.describeTemplateData).Once() + + describeData, err := awsWrapper.getLaunchTemplateData("launchTemplateName", "1") + assert.Equal(t, testCase.expectedData, describeData) + assert.Equal(t, testCase.expectedErr, err) + } +} + func TestBuildLaunchTemplateFromSpec(t *testing.T) { assert := assert.New(t)