Skip to content

Commit

Permalink
fix: Remove support for neurondevice
Browse files Browse the repository at this point in the history
  • Loading branch information
bryantbiggs committed Oct 1, 2024
1 parent 877e5d9 commit d2ee76c
Show file tree
Hide file tree
Showing 10 changed files with 2,075 additions and 1,163 deletions.
1 change: 0 additions & 1 deletion designs/limits.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ The list of supported resource types is -
- `amd.com/gpu`
- `aws.amazon.com/neuron`
- `aws.amazon.com/neuroncore`
- `aws.amazon.com/neurondevice`
- `habana.ai/gaudi`

Limits will be defined at the per-provisioner level. We'll rely on the `karpenter.sh/provisioner-name` node label when calculating resource usage by a specific provisioner. This is useful when multiple teams share a single cluster and use separate provisioners since each team's resource consumption will be limited separately.
Expand Down
2 changes: 1 addition & 1 deletion examples/workloads/neuron.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ spec:
name: neuron
resources:
limits:
aws.amazon.com/neurondevice: "1"
aws.amazon.com/neuron: "1"
requests:
cpu: "1"
memory: 256M
Expand Down
1 change: 0 additions & 1 deletion pkg/apis/v1/labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ var (
ResourceAMDGPU corev1.ResourceName = "amd.com/gpu"
ResourceAWSNeuron corev1.ResourceName = "aws.amazon.com/neuron"
ResourceAWSNeuronCore corev1.ResourceName = "aws.amazon.com/neuroncore"
ResourceAWSNeuronDevice corev1.ResourceName = "aws.amazon.com/neurondevice"
ResourceHabanaGaudi corev1.ResourceName = "habana.ai/gaudi"
ResourceAWSPodENI corev1.ResourceName = "vpc.amazonaws.com/pod-eni"
ResourcePrivateIPv4Address corev1.ResourceName = "vpc.amazonaws.com/PrivateIPv4Address"
Expand Down
1 change: 0 additions & 1 deletion pkg/providers/instance/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,6 @@ func filterExoticInstanceTypes(instanceTypes []*cloudprovider.InstanceType) []*c
}
if !resources.IsZero(it.Capacity[v1.ResourceAWSNeuron]) ||
!resources.IsZero(it.Capacity[v1.ResourceAWSNeuronCore]) ||
!resources.IsZero(it.Capacity[v1.ResourceAWSNeuronDevice]) ||
!resources.IsZero(it.Capacity[v1.ResourceAMDGPU]) ||
!resources.IsZero(it.Capacity[v1.ResourceNVIDIAGPU]) ||
!resources.IsZero(it.Capacity[v1.ResourceHabanaGaudi]) {
Expand Down
26 changes: 13 additions & 13 deletions pkg/providers/instancetype/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -759,28 +759,28 @@ var _ = Describe("InstanceTypeProvider", func() {
}
Expect(nodeNames.Len()).To(Equal(1))
})
It("should launch instances for aws.amazon.com/neurondevice resource requests", func() {
It("should launch instances for aws.amazon.com/neuron resource requests", func() {
nodeNames := sets.NewString()
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
pods := []*corev1.Pod{
coretest.UnschedulablePod(coretest.PodOptions{
ResourceRequirements: corev1.ResourceRequirements{
Requests: corev1.ResourceList{v1.ResourceAWSNeuronDevice: resource.MustParse("2")},
Limits: corev1.ResourceList{v1.ResourceAWSNeuronDevice: resource.MustParse("2")},
Requests: corev1.ResourceList{v1.ResourceAWSNeuron: resource.MustParse("2")},
Limits: corev1.ResourceList{v1.ResourceAWSNeuron: resource.MustParse("2")},
},
}),
// Should pack onto same instance
coretest.UnschedulablePod(coretest.PodOptions{
ResourceRequirements: corev1.ResourceRequirements{
Requests: corev1.ResourceList{v1.ResourceAWSNeuronDevice: resource.MustParse("2")},
Limits: corev1.ResourceList{v1.ResourceAWSNeuronDevice: resource.MustParse("2")},
Requests: corev1.ResourceList{v1.ResourceAWSNeuron: resource.MustParse("2")},
Limits: corev1.ResourceList{v1.ResourceAWSNeuron: resource.MustParse("2")},
},
}),
// Should pack onto a separate instance
coretest.UnschedulablePod(coretest.PodOptions{
ResourceRequirements: corev1.ResourceRequirements{
Requests: corev1.ResourceList{v1.ResourceAWSNeuronDevice: resource.MustParse("4")},
Limits: corev1.ResourceList{v1.ResourceAWSNeuronDevice: resource.MustParse("4")},
Requests: corev1.ResourceList{v1.ResourceAWSNeuron: resource.MustParse("4")},
Limits: corev1.ResourceList{v1.ResourceAWSNeuron: resource.MustParse("4")},
},
}),
}
Expand Down Expand Up @@ -1909,15 +1909,15 @@ var _ = Describe("InstanceTypeProvider", func() {
coretest.UnschedulablePod(coretest.PodOptions{
NodeSelector: map[string]string{corev1.LabelTopologyZone: "test-zone-1a"},
ResourceRequirements: corev1.ResourceRequirements{
Requests: corev1.ResourceList{v1.ResourceAWSNeuronDevice: resource.MustParse("1")},
Limits: corev1.ResourceList{v1.ResourceAWSNeuronDevice: resource.MustParse("1")},
Requests: corev1.ResourceList{v1.ResourceAWSNeuron: resource.MustParse("1")},
Limits: corev1.ResourceList{v1.ResourceAWSNeuron: resource.MustParse("1")},
},
}),
coretest.UnschedulablePod(coretest.PodOptions{
NodeSelector: map[string]string{corev1.LabelTopologyZone: "test-zone-1a"},
ResourceRequirements: corev1.ResourceRequirements{
Requests: corev1.ResourceList{v1.ResourceAWSNeuronDevice: resource.MustParse("1")},
Limits: corev1.ResourceList{v1.ResourceAWSNeuronDevice: resource.MustParse("1")},
Requests: corev1.ResourceList{v1.ResourceAWSNeuron: resource.MustParse("1")},
Limits: corev1.ResourceList{v1.ResourceAWSNeuron: resource.MustParse("1")},
},
}),
}
Expand Down Expand Up @@ -2002,8 +2002,8 @@ var _ = Describe("InstanceTypeProvider", func() {
pod := coretest.UnschedulablePod(coretest.PodOptions{
NodeSelector: map[string]string{corev1.LabelInstanceTypeStable: "inf2.24xlarge"},
ResourceRequirements: corev1.ResourceRequirements{
Requests: corev1.ResourceList{v1.ResourceAWSNeuronDevice: resource.MustParse("2")},
Limits: corev1.ResourceList{v1.ResourceAWSNeuronDevice: resource.MustParse("2")},
Requests: corev1.ResourceList{v1.ResourceAWSNeuron: resource.MustParse("2")},
Limits: corev1.ResourceList{v1.ResourceAWSNeuron: resource.MustParse("2")},
},
})
ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
Expand Down
1 change: 0 additions & 1 deletion pkg/providers/instancetype/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,6 @@ func computeCapacity(ctx context.Context, info *ec2.InstanceTypeInfo, amiFamily
v1.ResourceAMDGPU: *amdGPUs(info),
v1.ResourceAWSNeuron: *awsNeuronDevices(info),
v1.ResourceAWSNeuronCore: *awsNeuronCores(info),
v1.ResourceAWSNeuronDevice: *awsNeuronDevices(info),
v1.ResourceHabanaGaudi: *habanaGaudis(info),
v1.ResourceEFA: *efas(info),
}
Expand Down
6 changes: 3 additions & 3 deletions test/suites/integration/extended_resources_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ var _ = Describe("Extended Resources", func() {
env.ExpectCreatedNodeCount("==", 1)
env.EventuallyExpectInitializedNodeCount("==", 1)
})
It("should provision nodes for a deployment that requests aws.amazon.com/neurondevice", func() {
It("should provision nodes for a deployment that requests aws.amazon.com/neuron", func() {
ExpectNeuronDevicePluginCreated()
// TODO: jmdeal@ remove AL2 pin once AL2023 accelerated AMIs are available
nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2@latest"}}
Expand All @@ -119,10 +119,10 @@ var _ = Describe("Extended Resources", func() {
},
ResourceRequirements: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
"aws.amazon.com/neurondevice": resource.MustParse("1"),
"aws.amazon.com/neuron": resource.MustParse("1"),
},
Limits: corev1.ResourceList{
"aws.amazon.com/neurondevice": resource.MustParse("1"),
"aws.amazon.com/neuron": resource.MustParse("1"),
},
},
},
Expand Down
1 change: 0 additions & 1 deletion website/content/en/preview/concepts/scheduling.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ Accelerator (e.g., GPU) values include
- `amd.com/gpu`
- `aws.amazon.com/neuron`
- `aws.amazon.com/neuroncore`
- `aws.amazon.com/neurondevice`
- `habana.ai/gaudi`

Karpenter supports accelerators, such as GPUs.
Expand Down
Loading

0 comments on commit d2ee76c

Please sign in to comment.