Skip to content

Commit

Permalink
fix: Fix handling for multiple Daemonset nodeAffinity selectorTerms (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathan-innis authored Dec 4, 2024
1 parent db3ffd9 commit 8f8e759
Show file tree
Hide file tree
Showing 3 changed files with 203 additions and 51 deletions.
3 changes: 1 addition & 2 deletions pkg/controllers/provisioning/scheduling/preferences.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ type Preferences struct {
}

func (p *Preferences) Relax(ctx context.Context, pod *v1.Pod) bool {
ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("Pod", klog.KRef(pod.Namespace, pod.Name)))
relaxations := []func(*v1.Pod) *string{
p.removeRequiredNodeAffinityTerm,
p.removePreferredPodAffinityTerm,
Expand All @@ -50,7 +49,7 @@ func (p *Preferences) Relax(ctx context.Context, pod *v1.Pod) bool {

for _, relaxFunc := range relaxations {
if reason := relaxFunc(pod); reason != nil {
log.FromContext(ctx).V(1).Info(fmt.Sprintf("relaxing soft constraints for pod since it previously failed to schedule, %s", lo.FromPtr(reason)))
log.FromContext(ctx).WithValues("Pod", klog.KRef(pod.Namespace, pod.Name)).V(1).Info(fmt.Sprintf("relaxing soft constraints for pod since it previously failed to schedule, %s", lo.FromPtr(reason)))
return true
}
}
Expand Down
36 changes: 23 additions & 13 deletions pkg/controllers/provisioning/scheduling/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -352,23 +352,33 @@ func (s *Scheduler) calculateExistingNodeClaims(stateNodes []*state.StateNode, d
})
}

// getDaemonOverhead determines the overhead for each NodeClaimTemplate required for daemons to schedule for any node provisioned by the NodeClaimTemplate
func getDaemonOverhead(nodeClaimTemplates []*NodeClaimTemplate, daemonSetPods []*corev1.Pod) map[*NodeClaimTemplate]corev1.ResourceList {
overhead := map[*NodeClaimTemplate]corev1.ResourceList{}
return lo.SliceToMap(nodeClaimTemplates, func(nct *NodeClaimTemplate) (*NodeClaimTemplate, corev1.ResourceList) {
return nct, resources.RequestsForPods(lo.Filter(daemonSetPods, func(p *corev1.Pod, _ int) bool { return isDaemonPodCompatible(nct, p) })...)
})
}

for _, nodeClaimTemplate := range nodeClaimTemplates {
var daemons []*corev1.Pod
for _, p := range daemonSetPods {
if err := scheduling.Taints(nodeClaimTemplate.Spec.Taints).Tolerates(p); err != nil {
continue
}
if err := nodeClaimTemplate.Requirements.Compatible(scheduling.NewPodRequirements(p), scheduling.AllowUndefinedWellKnownLabels); err != nil {
continue
}
daemons = append(daemons, p)
// isDaemonPodCompatible determines if the daemon pod is compatible with the NodeClaimTemplate for daemon scheduling
func isDaemonPodCompatible(nodeClaimTemplate *NodeClaimTemplate, pod *corev1.Pod) bool {
preferences := &Preferences{}
// Add a toleration for PreferNoSchedule since a daemon pod shouldn't respect the preference
_ = preferences.toleratePreferNoScheduleTaints(pod)
if err := scheduling.Taints(nodeClaimTemplate.Spec.Taints).Tolerates(pod); err != nil {
return false
}
for {
// We don't consider pod preferences for scheduling requirements since we know that pod preferences won't matter with Daemonset scheduling
if nodeClaimTemplate.Requirements.IsCompatible(scheduling.NewStrictPodRequirements(pod), scheduling.AllowUndefinedWellKnownLabels) {
return true
}
// If relaxing the Node Affinity term didn't succeed, then this DaemonSet can't schedule to this NodePool
// We don't consider other forms of relaxation here since we don't consider pod affinities/anti-affinities
// when considering DaemonSet schedulability
if preferences.removeRequiredNodeAffinityTerm(pod) == nil {
return false
}
overhead[nodeClaimTemplate] = resources.RequestsForPods(daemons...)
}
return overhead
}

// subtractMax returns the remaining resources after subtracting the max resource quantity per instance type. To avoid
Expand Down
Loading

0 comments on commit 8f8e759

Please sign in to comment.