Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implemented volume topology aware scheduling #1015

Merged
merged 3 commits into from
Jan 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion charts/karpenter/templates/controller/rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ rules:
- apiGroups: ["karpenter.sh"]
resources: ["provisioners", "provisioners/status"]
verbs: ["create", "delete", "patch", "get", "list", "watch"]
- apiGroups: [""]
resources: ["persistentvolumes", "persistentvolumeclaims"]
verbs: ["get", "list", "watch", "update"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses"]
verbs: ["get", "list", "watch"]
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["create", "get", "patch", "update", "watch"]
Expand All @@ -67,4 +73,4 @@ rules:
- apiGroups: ["apps"]
resources: ["daemonsets"]
verbs: ["list", "watch"]
---
---
2 changes: 2 additions & 0 deletions cmd/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/aws/karpenter/pkg/controllers/counter"
"github.com/aws/karpenter/pkg/controllers/metrics"
"github.com/aws/karpenter/pkg/controllers/node"
"github.com/aws/karpenter/pkg/controllers/persistentvolumeclaim"
"github.com/aws/karpenter/pkg/controllers/provisioning"
"github.com/aws/karpenter/pkg/controllers/selection"
"github.com/aws/karpenter/pkg/controllers/termination"
Expand Down Expand Up @@ -87,6 +88,7 @@ func main() {
if err := manager.RegisterControllers(ctx,
provisioningController,
selection.NewController(manager.GetClient(), provisioningController),
persistentvolumeclaim.NewController(manager.GetClient()),
termination.NewController(ctx, manager.GetClient(), clientSet.CoreV1(), cloudProvider),
node.NewController(manager.GetClient()),
metrics.NewController(manager.GetClient(), cloudProvider),
Expand Down
4 changes: 2 additions & 2 deletions pkg/apis/provisioning/v1alpha5/constraints.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ func (c *Constraints) ValidatePod(pod *v1.Pod) error {
}
}
// The combined requirements are not compatible
combined := c.Requirements.With(podRequirements)
combined := c.Requirements.Add(podRequirements...)
for _, key := range podRequirements.Keys() {
if combined.Requirement(key).Len() == 0 {
return fmt.Errorf("invalid nodeSelector %q, %v not in %v", key, podRequirements.Requirement(key).UnsortedList(), c.Requirements.Requirement(key).UnsortedList())
Expand All @@ -68,7 +68,7 @@ func (c *Constraints) ValidatePod(pod *v1.Pod) error {
func (c *Constraints) Tighten(pod *v1.Pod) *Constraints {
return &Constraints{
Labels: c.Labels,
Requirements: c.Requirements.With(PodRequirements(pod)).Consolidate().WellKnown(),
Requirements: c.Requirements.Add(PodRequirements(pod)...).Consolidate().WellKnown(),
Taints: c.Taints,
Provider: c.Provider,
KubeletConfiguration: c.KubeletConfiguration,
Expand Down
53 changes: 5 additions & 48 deletions pkg/apis/provisioning/v1alpha5/register.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,61 +17,13 @@ package v1alpha5
import (
"context"

v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
"knative.dev/pkg/apis"
)

var (
ArchitectureAmd64 = "amd64"
ArchitectureArm64 = "arm64"
OperatingSystemLinux = "linux"

ProvisionerNameLabelKey = SchemeGroupVersion.Group + "/provisioner-name"
NotReadyTaintKey = SchemeGroupVersion.Group + "/not-ready"
DoNotEvictPodAnnotationKey = SchemeGroupVersion.Group + "/do-not-evict"
EmptinessTimestampAnnotationKey = SchemeGroupVersion.Group + "/emptiness-timestamp"
TerminationFinalizer = SchemeGroupVersion.Group + "/termination"
DefaultProvisioner = types.NamespacedName{Name: "default"}
)

var (
// RestrictedLabels are injected by Cloud Providers
RestrictedLabels = sets.NewString(
// Used internally by provisioning logic
EmptinessTimestampAnnotationKey,
v1.LabelHostname,
)

// AllowedLabelDomains are domains that may be restricted, but that is allowed because
// they are not used in a context where they may be passed as argument to kubelet.
// AllowedLabelDomains are evaluated before RestrictedLabelDomains
AllowedLabelDomains = sets.NewString(
"kops.k8s.io",
)

// These are either prohibited by the kubelet or reserved by karpenter
// They are evaluated after AllowedLabelDomains
KarpenterLabelDomain = "karpenter.sh"
RestrictedLabelDomains = sets.NewString(
"kubernetes.io",
"k8s.io",
KarpenterLabelDomain,
)
LabelCapacityType = KarpenterLabelDomain + "/capacity-type"
// WellKnownLabels supported by karpenter
WellKnownLabels = sets.NewString(
v1.LabelTopologyZone,
v1.LabelInstanceTypeStable,
v1.LabelArchStable,
v1.LabelOSStable,
LabelCapacityType,
v1.LabelHostname, // Used internally for hostname topology spread
)
DefaultHook = func(ctx context.Context, constraints *Constraints) {}
ValidateHook = func(ctx context.Context, constraints *Constraints) *apis.FieldError { return nil }
)
Expand All @@ -88,6 +40,11 @@ var (
metav1.AddToGroupVersion(scheme, SchemeGroupVersion)
return nil
})
ProvisionerNameLabelKey = SchemeGroupVersion.Group + "/provisioner-name"
NotReadyTaintKey = SchemeGroupVersion.Group + "/not-ready"
DoNotEvictPodAnnotationKey = SchemeGroupVersion.Group + "/do-not-evict"
EmptinessTimestampAnnotationKey = SchemeGroupVersion.Group + "/emptiness-timestamp"
TerminationFinalizer = SchemeGroupVersion.Group + "/termination"
)

const (
Expand Down
111 changes: 87 additions & 24 deletions pkg/apis/provisioning/v1alpha5/requirements.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,55 @@ import (
"k8s.io/apimachinery/pkg/util/sets"
)

var (
ArchitectureAmd64 = "amd64"
ArchitectureArm64 = "arm64"
OperatingSystemLinux = "linux"

// RestrictedLabels are injected by Cloud Providers
RestrictedLabels = sets.NewString(
// Used internally by provisioning logic
EmptinessTimestampAnnotationKey,
v1.LabelHostname,
)

// AllowedLabelDomains are domains that may be restricted, but that is allowed because
// they are not used in a context where they may be passed as argument to kubelet.
// AllowedLabelDomains are evaluated before RestrictedLabelDomains
AllowedLabelDomains = sets.NewString(
"kops.k8s.io",
)

// These are either prohibited by the kubelet or reserved by karpenter
// They are evaluated after AllowedLabelDomains
KarpenterLabelDomain = "karpenter.sh"
RestrictedLabelDomains = sets.NewString(
"kubernetes.io",
"k8s.io",
KarpenterLabelDomain,
)
LabelCapacityType = KarpenterLabelDomain + "/capacity-type"
// WellKnownLabels supported by karpenter
WellKnownLabels = sets.NewString(
v1.LabelTopologyZone,
v1.LabelInstanceTypeStable,
v1.LabelArchStable,
v1.LabelOSStable,
LabelCapacityType,
v1.LabelHostname, // Used internally for hostname topology spread
)
// NormalizedLabels translate aliased concepts into the controller's
// WellKnownLabels. Pod requirements are translated for compatibility,
// however, Provisioner labels are still restricted to WellKnownLabels.
// Additional labels may be injected by cloud providers.
NormalizedLabels = map[string]string{
v1.LabelFailureDomainBetaZone: v1.LabelTopologyZone,
"beta.kubernetes.io/arch": v1.LabelArchStable,
"beta.kubernetes.io/os": v1.LabelOSStable,
v1.LabelInstanceType: v1.LabelInstanceTypeStable,
}
)

// Requirements is a decorated alias type for []v1.NodeSelectorRequirements
type Requirements []v1.NodeSelectorRequirement

Expand All @@ -44,58 +93,72 @@ func (r Requirements) CapacityTypes() sets.String {
return r.Requirement(LabelCapacityType)
}

func (r Requirements) With(requirements Requirements) Requirements {
return append(r, requirements...)
func (r Requirements) Add(requirements ...v1.NodeSelectorRequirement) Requirements {
return append(r, Requirements(requirements).Normalize()...)
}

// Normalize the requirements to use WellKnownLabels
func (r Requirements) Normalize() Requirements {
normalized := Requirements{}
for _, requirement := range r {
label := requirement.Key
if normalized, ok := NormalizedLabels[requirement.Key]; ok {
label = normalized
}
normalized = append(normalized, v1.NodeSelectorRequirement{Key: label, Operator: requirement.Operator, Values: requirement.Values})
}
return normalized
}

// Consolidate combines In and NotIn requirements for each unique key, producing
// an equivalent minimal representation of the requirements. This is useful as
// requirements may be appended from a variety of sources and then consolidated.
// Caution: If a key has contains a `NotIn` operator without a corresponding
// `In` operator, the requirement will permanently be [] after consolidation. To
// avoid this, include the broadest `In` requirements before consolidating.
func (r Requirements) Consolidate() (requirements Requirements) {
for _, key := range r.Keys() {
requirements = requirements.Add(v1.NodeSelectorRequirement{
Key: key,
Operator: v1.NodeSelectorOpIn,
Values: r.Requirement(key).UnsortedList(),
})
}
return requirements
}

func LabelRequirements(labels map[string]string) (r Requirements) {
for key, value := range labels {
r = append(r, v1.NodeSelectorRequirement{Key: key, Operator: v1.NodeSelectorOpIn, Values: []string{value}})
r = r.Add(v1.NodeSelectorRequirement{Key: key, Operator: v1.NodeSelectorOpIn, Values: []string{value}})
}
return r
}

func PodRequirements(pod *v1.Pod) (r Requirements) {
for key, value := range pod.Spec.NodeSelector {
r = append(r, v1.NodeSelectorRequirement{Key: key, Operator: v1.NodeSelectorOpIn, Values: []string{value}})
r = r.Add(v1.NodeSelectorRequirement{Key: key, Operator: v1.NodeSelectorOpIn, Values: []string{value}})
}
if pod.Spec.Affinity == nil || pod.Spec.Affinity.NodeAffinity == nil {
return r
}
// Select heaviest preference and treat as a requirement. An outer loop will iteratively unconstrain them if unsatisfiable.
if preferred := pod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution; len(preferred) > 0 {
sort.Slice(preferred, func(i int, j int) bool { return preferred[i].Weight > preferred[j].Weight })
r = append(r, preferred[0].Preference.MatchExpressions...)
r = r.Add(preferred[0].Preference.MatchExpressions...)
}
// Select first requirement. An outer loop will iteratively remove OR requirements if unsatisfiable
if pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil &&
len(pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms) > 0 {
r = append(r, pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[0].MatchExpressions...)
r = r.Add(pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[0].MatchExpressions...)
}
return r
}

// Consolidate combines In and NotIn requirements for each unique key, producing
// an equivalent minimal representation of the requirements. This is useful as
// requirements may be appended from a variety of sources and then consolidated.
// Caution: If a key has contains a `NotIn` operator without a corresponding
// `In` operator, the requirement will permanently be [] after consolidation. To
// avoid this, include the broadest `In` requirements before consolidating.
func (r Requirements) Consolidate() (requirements Requirements) {
for _, key := range r.Keys() {
requirements = append(requirements, v1.NodeSelectorRequirement{
Key: key,
Operator: v1.NodeSelectorOpIn,
Values: r.Requirement(key).UnsortedList(),
})
}
return requirements
}

func (r Requirements) WellKnown() (requirements Requirements) {
for _, requirement := range r {
if WellKnownLabels.Has(requirement.Key) {
requirements = append(requirements, requirement)
requirements = requirements.Add(requirement)

}
}
return requirements
Expand Down
5 changes: 5 additions & 0 deletions pkg/cloudprovider/aws/cloudprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/aws/karpenter/pkg/apis/provisioning/v1alpha5"
"github.com/aws/karpenter/pkg/cloudprovider"
"github.com/aws/karpenter/pkg/cloudprovider/aws/apis/v1alpha1"
"github.com/aws/karpenter/pkg/utils/functional"
"github.com/aws/karpenter/pkg/utils/project"

"go.uber.org/multierr"
Expand All @@ -54,6 +55,10 @@ const (
CacheCleanupInterval = 10 * time.Minute
)

func init() {
v1alpha5.NormalizedLabels = functional.UnionStringMaps(v1alpha5.NormalizedLabels, map[string]string{"topology.ebs.csi.aws.com/zone": v1.LabelTopologyZone})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good idea

}

type CloudProvider struct {
instanceTypeProvider *InstanceTypeProvider
subnetProvider *SubnetProvider
Expand Down
3 changes: 2 additions & 1 deletion pkg/cloudprovider/aws/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"context"
"encoding/base64"
"encoding/json"
"strings"
"testing"

"github.com/Pallinder/go-randomdata"
Expand Down Expand Up @@ -120,7 +121,7 @@ var _ = Describe("Allocation", func() {
provider = &v1alpha1.AWS{
InstanceProfile: "test-instance-profile",
}
provisioner = ProvisionerWithProvider(&v1alpha5.Provisioner{ObjectMeta: metav1.ObjectMeta{Name: v1alpha5.DefaultProvisioner.Name}}, provider)
provisioner = ProvisionerWithProvider(&v1alpha5.Provisioner{ObjectMeta: metav1.ObjectMeta{Name: strings.ToLower(randomdata.SillyName())}}, provider)
provisioner.SetDefaults(ctx)
fakeEC2API.Reset()
launchTemplateCache.Flush()
Expand Down
12 changes: 3 additions & 9 deletions pkg/controllers/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ func NewManagerOrDie(ctx context.Context, config *rest.Config, options controlle
if err != nil {
panic(fmt.Sprintf("Failed to create controller newManager, %s", err.Error()))
}
if err := newManager.GetFieldIndexer().IndexField(ctx, &v1.Pod{}, "spec.nodeName", podSchedulingIndex); err != nil {
if err := newManager.GetFieldIndexer().IndexField(ctx, &v1.Pod{}, "spec.nodeName", func(o client.Object) []string {
return []string{o.(*v1.Pod).Spec.NodeName}
}); err != nil {
panic(fmt.Sprintf("Failed to setup pod indexer, %s", err.Error()))
}
return &GenericControllerManager{Manager: newManager}
Expand All @@ -57,11 +59,3 @@ func (m *GenericControllerManager) RegisterControllers(ctx context.Context, cont
}
return m
}

func podSchedulingIndex(object client.Object) []string {
pod, ok := object.(*v1.Pod)
if !ok {
return nil
}
return []string{pod.Spec.NodeName}
}
Loading