diff --git a/pkg/cloudprovider/aws/fake/ec2api.go b/pkg/cloudprovider/aws/fake/ec2api.go index b247ea33036f..64fdba6b625d 100644 --- a/pkg/cloudprovider/aws/fake/ec2api.go +++ b/pkg/cloudprovider/aws/fake/ec2api.go @@ -382,6 +382,27 @@ func (e *EC2API) DescribeInstanceTypesPagesWithContext(_ context.Context, _ *ec2 Ipv4AddressesPerInterface: aws.Int64(60), }, }, + { + InstanceType: aws.String("m5.metal"), + SupportedUsageClasses: DefaultSupportedUsageClasses, + SupportedVirtualizationTypes: []*string{aws.String("hvm")}, + BurstablePerformanceSupported: aws.Bool(false), + BareMetal: aws.Bool(true), + Hypervisor: nil, + ProcessorInfo: &ec2.ProcessorInfo{ + SupportedArchitectures: aws.StringSlice([]string{"x86_64"}), + }, + VCpuInfo: &ec2.VCpuInfo{ + DefaultVCpus: aws.Int64(96), + }, + MemoryInfo: &ec2.MemoryInfo{ + SizeInMiB: aws.Int64(393216), + }, + NetworkInfo: &ec2.NetworkInfo{ + MaximumNetworkInterfaces: aws.Int64(15), + Ipv4AddressesPerInterface: aws.Int64(50), + }, + }, }, }, false) return nil @@ -454,6 +475,18 @@ func (e *EC2API) DescribeInstanceTypeOfferingsPagesWithContext(_ context.Context InstanceType: aws.String("c6g.large"), Location: aws.String("test-zone-1a"), }, + { + InstanceType: aws.String("m5.metal"), + Location: aws.String("test-zone-1a"), + }, + { + InstanceType: aws.String("m5.metal"), + Location: aws.String("test-zone-1b"), + }, + { + InstanceType: aws.String("m5.metal"), + Location: aws.String("test-zone-1c"), + }, }, }, false) return nil diff --git a/pkg/cloudprovider/aws/instance.go b/pkg/cloudprovider/aws/instance.go index 42ad9fb234af..eabae2c0495d 100644 --- a/pkg/cloudprovider/aws/instance.go +++ b/pkg/cloudprovider/aws/instance.go @@ -321,20 +321,24 @@ func (p *InstanceProvider) getCapacityType(nodeRequest *cloudprovider.NodeReques return v1alpha1.CapacityTypeOnDemand } -// filterInstanceTypes is used to eliminate GPU instance types from the list of possible instance types when a -// non-GPU instance type will work. If the list of instance types consists of both GPU and non-GPU types, then only -// the non-GPU types will be returned. If it has only GPU types, the list will be returned unaltered. +// filterInstanceTypes is used to eliminate less desirable instance types (like GPUs) from the list of possible instance types when +// a set of more appropriate instance types would work. If a set of more desirable instance types is not found, then the original slice +// of instance types are returned. func (p *InstanceProvider) filterInstanceTypes(instanceTypes []cloudprovider.InstanceType) []cloudprovider.InstanceType { var genericInstanceTypes []cloudprovider.InstanceType for _, it := range instanceTypes { + if aws.BoolValue(it.(*InstanceType).BareMetal) { + continue + } itRes := it.Resources() - if resources.IsZero(itRes[v1alpha1.ResourceAWSNeuron]) && - resources.IsZero(itRes[v1alpha1.ResourceAMDGPU]) && - resources.IsZero(itRes[v1alpha1.ResourceNVIDIAGPU]) { - genericInstanceTypes = append(genericInstanceTypes, it) + if !resources.IsZero(itRes[v1alpha1.ResourceAWSNeuron]) || + !resources.IsZero(itRes[v1alpha1.ResourceAMDGPU]) || + !resources.IsZero(itRes[v1alpha1.ResourceNVIDIAGPU]) { + continue } + genericInstanceTypes = append(genericInstanceTypes, it) } - // if we got some subset of non-GPU types, then prefer to use those + // if we got some subset of instance types, then prefer to use those if len(genericInstanceTypes) != 0 { return genericInstanceTypes } diff --git a/pkg/cloudprovider/aws/instancetypes.go b/pkg/cloudprovider/aws/instancetypes.go index 18d0074a837d..fb3c9da0dd20 100644 --- a/pkg/cloudprovider/aws/instancetypes.go +++ b/pkg/cloudprovider/aws/instancetypes.go @@ -147,6 +147,10 @@ func (p *InstanceTypeProvider) getInstanceTypes(ctx context.Context) (map[string Name: aws.String("supported-virtualization-type"), Values: []*string{aws.String("hvm")}, }, + { + Name: aws.String("processor-info.supported-architecture"), + Values: aws.StringSlice([]string{"x86_64", "arm64"}), + }, }, }, func(page *ec2.DescribeInstanceTypesOutput, lastPage bool) bool { for _, instanceType := range page.InstanceTypes { @@ -168,10 +172,6 @@ func (p *InstanceTypeProvider) filter(instanceType *ec2.InstanceTypeInfo) bool { if instanceType.FpgaInfo != nil { return false } - if aws.BoolValue(instanceType.BareMetal) { - return false - } - // TODO exclude if not available for spot return functional.HasAnyPrefix(aws.StringValue(instanceType.InstanceType), "m", "c", "r", "a", // Standard "i3", // Storage-optimized diff --git a/pkg/cloudprovider/aws/suite_test.go b/pkg/cloudprovider/aws/suite_test.go index 9e4f73aecffa..4ce492ef082f 100644 --- a/pkg/cloudprovider/aws/suite_test.go +++ b/pkg/cloudprovider/aws/suite_test.go @@ -166,6 +166,20 @@ var _ = Describe("Allocation", func() { ExpectNotScheduled(ctx, env.Client, pod) } }) + It("should launch on metal", func() { + for _, pod := range ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, + test.UnschedulablePod(test.PodOptions{ + NodeSelector: map[string]string{ + v1.LabelInstanceTypeStable: "m5.metal", + }, + ResourceRequirements: v1.ResourceRequirements{ + Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("1")}, + Limits: v1.ResourceList{v1.ResourceCPU: resource.MustParse("1")}, + }, + })) { + ExpectScheduled(ctx, env.Client, pod) + } + }) It("should launch AWS Pod ENI on a compatible instance type", func() { for _, pod := range ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, test.UnschedulablePod(test.PodOptions{ diff --git a/website/content/en/preview/faq.md b/website/content/en/preview/faq.md index a18867c18336..8b17f06989a5 100644 --- a/website/content/en/preview/faq.md +++ b/website/content/en/preview/faq.md @@ -93,6 +93,10 @@ Yes, see [Example Provisioner Resource]({{< ref "./provisioner/#example-provisio * Attribute-based requests are currently not possible. * You can select instances with special hardware, such as gpu. +### Can I use Bare Metal instance types? + +Yes, Karpenter supports provisioning metal instance types when a Provisioner's `node.kubernetes.io/instance-type` Requirements only include `metal` instance types. If a Provisioner's instance types are not constrained, then Karpenter will not provision metal instance types. + ### How does Karpenter dynamically select instance types? Karpenter batches pending pods and then binpacks them based on CPU, memory, and GPUs required, taking into account node overhead, VPC CNI resources required, and daemon sets that will be packed when bringing up a new node.