Skip to content

Commit

Permalink
Merge pull request #4588 from AustinSiu/feature-aws-abs
Browse files Browse the repository at this point in the history
Support attribute-based instance selection for AWS
  • Loading branch information
k8s-ci-robot authored Aug 10, 2022
2 parents 1dc25c6 + 833c6fd commit 62c3b26
Show file tree
Hide file tree
Showing 6 changed files with 798 additions and 19 deletions.
2 changes: 2 additions & 0 deletions cluster-autoscaler/cloudprovider/aws/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ should be updated to restrict the resources/add conditionals:
"Action": [
"autoscaling:SetDesiredCapacity",
"autoscaling:TerminateInstanceInAutoScalingGroup",
"ec2:DescribeImages",
"ec2:DescribeInstanceTypes",
"ec2:GetInstanceTypesFromInstanceRequirements",
"eks:DescribeNodegroup"
],
"Resource": ["*"]
Expand Down
31 changes: 23 additions & 8 deletions cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,9 @@ type launchTemplate struct {
}

type mixedInstancesPolicy struct {
launchTemplate *launchTemplate
instanceTypesOverrides []string
launchTemplate *launchTemplate
instanceTypesOverrides []string
instanceRequirementsOverrides *autoscaling.InstanceRequirements
}

type asg struct {
Expand Down Expand Up @@ -515,17 +516,31 @@ func (m *asgCache) buildAsgFromAWS(g *autoscaling.Group) (*asg, error) {
}

if g.MixedInstancesPolicy != nil {
getInstanceTypes := func(data []*autoscaling.LaunchTemplateOverrides) []string {
res := make([]string, len(data))
for i := 0; i < len(data); i++ {
res[i] = aws.StringValue(data[i].InstanceType)
getInstanceTypes := func(overrides []*autoscaling.LaunchTemplateOverrides) []string {
res := []string{}
for _, override := range overrides {
if override.InstanceType != nil {
res = append(res, *override.InstanceType)
}
}
return res
}

getInstanceTypeRequirements := func(overrides []*autoscaling.LaunchTemplateOverrides) *autoscaling.InstanceRequirements {
if len(overrides) == 1 && overrides[0].InstanceRequirements != nil {
return overrides[0].InstanceRequirements
}
return nil
}

asg.MixedInstancesPolicy = &mixedInstancesPolicy{
launchTemplate: buildLaunchTemplateFromSpec(g.MixedInstancesPolicy.LaunchTemplate.LaunchTemplateSpecification),
instanceTypesOverrides: getInstanceTypes(g.MixedInstancesPolicy.LaunchTemplate.Overrides),
launchTemplate: buildLaunchTemplateFromSpec(g.MixedInstancesPolicy.LaunchTemplate.LaunchTemplateSpecification),
instanceTypesOverrides: getInstanceTypes(g.MixedInstancesPolicy.LaunchTemplate.Overrides),
instanceRequirementsOverrides: getInstanceTypeRequirements(g.MixedInstancesPolicy.LaunchTemplate.Overrides),
}

if len(asg.MixedInstancesPolicy.instanceTypesOverrides) != 0 && asg.MixedInstancesPolicy.instanceRequirementsOverrides != nil {
return nil, fmt.Errorf("invalid setup of both instance type and instance requirements overrides configured")
}
}

Expand Down
57 changes: 57 additions & 0 deletions cluster-autoscaler/cloudprovider/aws/aws_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ func (m *AwsManager) getAsgTemplate(asg *asg) (*asgTemplate, error) {
Tags: asg.Tags,
}, nil
}

return nil, fmt.Errorf("ASG %q uses the unknown EC2 instance type %q", asg.Name, instanceTypeName)
}

Expand Down Expand Up @@ -404,6 +405,10 @@ func (m *AwsManager) buildNodeFromTemplate(asg *asg, template *asgTemplate) (*ap
node.Status.Capacity[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(template.InstanceType.GPU, resource.DecimalSI)
node.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(template.InstanceType.MemoryMb*1024*1024, resource.DecimalSI)

if err := m.updateCapacityWithRequirementsOverrides(&node.Status.Capacity, asg.MixedInstancesPolicy); err != nil {
return nil, err
}

resourcesFromTags := extractAllocatableResourcesFromAsg(template.Tags)
for resourceName, val := range resourcesFromTags {
node.Status.Capacity[apiv1.ResourceName(resourceName)] = *val
Expand Down Expand Up @@ -464,6 +469,58 @@ func joinNodeLabelsChoosingUserValuesOverAPIValues(extractedLabels map[string]st
return result
}

func (m *AwsManager) updateCapacityWithRequirementsOverrides(capacity *apiv1.ResourceList, policy *mixedInstancesPolicy) error {
if policy == nil {
return nil
}

instanceRequirements, err := m.getInstanceRequirementsFromMixedInstancesPolicy(policy)
if err != nil {
return fmt.Errorf("error while building node template using instance requirements: (%s)", err)
}

if instanceRequirements.VCpuCount != nil && instanceRequirements.VCpuCount.Min != nil {
(*capacity)[apiv1.ResourceCPU] = *resource.NewQuantity(*instanceRequirements.VCpuCount.Min, resource.DecimalSI)
}

if instanceRequirements.MemoryMiB != nil && instanceRequirements.MemoryMiB.Min != nil {
(*capacity)[apiv1.ResourceMemory] = *resource.NewQuantity(*instanceRequirements.MemoryMiB.Min*1024*1024, resource.DecimalSI)
}

for _, manufacturer := range instanceRequirements.AcceleratorManufacturers {
if *manufacturer == autoscaling.AcceleratorManufacturerNvidia {
for _, acceleratorType := range instanceRequirements.AcceleratorTypes {
if *acceleratorType == autoscaling.AcceleratorTypeGpu {
(*capacity)[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(*instanceRequirements.AcceleratorCount.Min, resource.DecimalSI)
}
}
}
}

return nil
}

func (m *AwsManager) getInstanceRequirementsFromMixedInstancesPolicy(policy *mixedInstancesPolicy) (*ec2.InstanceRequirements, error) {
instanceRequirements := &ec2.InstanceRequirements{}
if policy.instanceRequirementsOverrides != nil {
var err error
instanceRequirements, err = m.awsService.getEC2RequirementsFromAutoscaling(policy.instanceRequirementsOverrides)
if err != nil {
return nil, err
}
} else if policy.launchTemplate != nil {
templateData, err := m.awsService.getLaunchTemplateData(policy.launchTemplate.name, policy.launchTemplate.version)
if err != nil {
return nil, err
}

if templateData.InstanceRequirements != nil {
instanceRequirements = templateData.InstanceRequirements
}
}
return instanceRequirements, nil
}

func buildGenericLabels(template *asgTemplate, nodeName string) map[string]string {
result := make(map[string]string)

Expand Down
32 changes: 32 additions & 0 deletions cluster-autoscaler/cloudprovider/aws/aws_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/service/autoscaling"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/service/ec2"
"k8s.io/autoscaler/cluster-autoscaler/config"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
provider_aws "k8s.io/legacy-cloud-providers/aws"
)

Expand Down Expand Up @@ -468,6 +469,37 @@ func TestBuildNodeFromTemplate(t *testing.T) {
observedTaints := observedNode.Spec.Taints
assert.Equal(t, 1, len(observedTaints))
assert.Equal(t, gpuTaint, observedTaints[0])

// Node with instance requirements
asg.MixedInstancesPolicy = &mixedInstancesPolicy{
instanceRequirementsOverrides: &autoscaling.InstanceRequirements{
VCpuCount: &autoscaling.VCpuCountRequest{
Min: aws.Int64(4),
Max: aws.Int64(8),
},
MemoryMiB: &autoscaling.MemoryMiBRequest{
Min: aws.Int64(4),
Max: aws.Int64(8),
},
AcceleratorTypes: []*string{aws.String(autoscaling.AcceleratorTypeGpu)},
AcceleratorManufacturers: []*string{aws.String(autoscaling.AcceleratorManufacturerNvidia)},
AcceleratorCount: &autoscaling.AcceleratorCountRequest{
Min: aws.Int64(4),
Max: aws.Int64(8),
},
},
}
observedNode, observedErr = awsManager.buildNodeFromTemplate(asg, &asgTemplate{
InstanceType: c5Instance,
})

assert.NoError(t, observedErr)
observedMemoryRequirement := observedNode.Status.Capacity[apiv1.ResourceMemory]
assert.Equal(t, int64(4*1024*1024), observedMemoryRequirement.Value())
observedVCpuRequirement := observedNode.Status.Capacity[apiv1.ResourceCPU]
assert.Equal(t, int64(4), observedVCpuRequirement.Value())
observedGpuRequirement := observedNode.Status.Capacity[gpu.ResourceNvidiaGPU]
assert.Equal(t, int64(4), observedGpuRequirement.Value())
}

func TestExtractLabelsFromAsg(t *testing.T) {
Expand Down
Loading

0 comments on commit 62c3b26

Please sign in to comment.