Skip to content

Commit

Permalink
optimize scheduling
Browse files Browse the repository at this point in the history
Improve scheduling performance by ~30% to 1300/pods per second for a sample benchmark
consisting of 2500 pods against 400 instance types.

Before:

scheduled 7610 against 24 nodes in total in 7.609408056s 1000.0777910706909 pods/sec
400 instances 10 pods    1 nodes  14.241995ms per scheduling   1.424199ms per pod
400 instances 100 pods   1 nodes  125.097021ms per scheduling  1.25097ms per pod
400 instances 500 pods   2 nodes  442.08217ms per scheduling   884.164µs per pod
400 instances 1000 pods  3 nodes  826.467467ms per scheduling  826.467µs per pod
400 instances 1500 pods  4 nodes  1.234402934s per scheduling  822.935µs per pod
400 instances 2000 pods  6 nodes  1.670170962s per scheduling  835.085µs per pod
400 instances 2500 pods  7 nodes  2.301283317s per scheduling  920.513µs per pod

After:

scheduled 7610 against 24 nodes in total in 5.659754728s 1344.5812346516934 pods/sec
400 instances 10 pods    1 nodes  9.699477ms per scheduling    969.947µs per pod
400 instances 100 pods   1 nodes  81.907ms per scheduling      819.07µs per pod
400 instances 500 pods   2 nodes  283.424001ms per scheduling  566.848µs per pod
400 instances 1000 pods  3 nodes  564.304529ms per scheduling  564.304µs per pod
400 instances 1500 pods  4 nodes  854.86969ms per scheduling   569.913µs per pod
400 instances 2000 pods  6 nodes  1.166997512s per scheduling  583.498µs per pod
400 instances 2500 pods  7 nodes  1.571064146s per scheduling  628.425µs per pod
  • Loading branch information
tzneal committed Mar 23, 2022
1 parent 2f6e5e7 commit 754893a
Show file tree
Hide file tree
Showing 8 changed files with 42 additions and 26 deletions.
15 changes: 10 additions & 5 deletions pkg/apis/provisioning/v1alpha5/requirements.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,8 @@ func NewPodRequirements(pod *v1.Pod) Requirements {
return NewRequirements(requirements...)
}

// Add function returns a new Requirements object with new requirements inserted.
func (r Requirements) Add(requirements ...v1.NodeSelectorRequirement) Requirements {
// Deep copy to avoid mutating existing requirements
r = *r.DeepCopy()
// Combine modifies the Requirements r by inserting the new requirements.
func (r *Requirements) Combine(requirements ...v1.NodeSelectorRequirement) {
// This fail-safe measurement can be removed later when we implement test webhook.
if r.requirements == nil {
r.requirements = map[string]sets.Set{}
Expand Down Expand Up @@ -119,7 +117,14 @@ func (r Requirements) Add(requirements ...v1.NodeSelectorRequirement) Requiremen
}
r.requirements[requirement.Key] = values
}
return r
}

// Add function returns a new Requirements object with new requirements inserted.
func (r Requirements) Add(requirements ...v1.NodeSelectorRequirement) Requirements {
// Deep copy to avoid mutating existing requirements
cp := *r.DeepCopy()
cp.Combine(requirements...)
return cp
}

// Keys returns unique set of the label keys from the requirements
Expand Down
2 changes: 1 addition & 1 deletion pkg/apis/provisioning/v1alpha5/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions pkg/controllers/provisioning/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,9 @@ func (c *Controller) Apply(ctx context.Context, provisioner *v1alpha5.Provisione
return err
}
provisioner.Spec.Labels = functional.UnionStringMaps(provisioner.Spec.Labels, map[string]string{v1alpha5.ProvisionerNameLabelKey: provisioner.Name})
provisioner.Spec.Requirements = provisioner.Spec.Requirements.
Add(requirements(instanceTypes)...).
Add(v1alpha5.NewLabelRequirements(provisioner.Spec.Labels).Requirements...)
provisioner.Spec.Requirements.Combine(requirements(instanceTypes)...)
provisioner.Spec.Requirements.Combine(v1alpha5.NewLabelRequirements(provisioner.Spec.Labels).Requirements...)

if err := provisioner.Spec.Requirements.Validate(); err != nil {
return fmt.Errorf("requirements are not compatible with cloud provider, %w", err)
}
Expand Down
14 changes: 7 additions & 7 deletions pkg/controllers/provisioning/scheduling/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,18 +92,18 @@ func (n Node) Compatible(pod *v1.Pod) error {
// existing pods can support the pod resources and combined pod + provider requirements
podRequests := resources.RequestsForPods(pod)
for _, it := range n.InstanceTypeOptions {
newSize := resources.Merge(n.reservedResources(it), podRequests)
newSize := n.reservedResources(it, podRequests)
if compatibleInstanceType(tightened, it) &&
n.newPodCanFit(newSize, it) &&
n.hasCompatibleResources(resources.RequestsForPods(pod), it) {
n.hasCompatibleResources(podRequests, it) {
return nil
}
}
return errors.New("no matching instance type found")
}

func (n Node) reservedResources(it cloudprovider.InstanceType) v1.ResourceList {
return resources.Merge(it.Overhead(), n.daemonResources, n.podResources)
func (n Node) reservedResources(it cloudprovider.InstanceType, extra v1.ResourceList) v1.ResourceList {
return resources.Merge(it.Overhead(), n.daemonResources, n.podResources, extra)
}

func (n *Node) newPodCanFit(newSize v1.ResourceList, it cloudprovider.InstanceType) bool {
Expand All @@ -124,15 +124,15 @@ func (n *Node) newPodCanFit(newSize v1.ResourceList, it cloudprovider.InstanceTy
// Add adds a pod to the Node which tightens constraints, possibly reducing the available instance type options for this
// node
func (n *Node) Add(pod *v1.Pod) {
n.Requirements = n.Requirements.Add(v1alpha5.NewPodRequirements(pod).Requirements...)
n.Requirements.Combine(v1alpha5.NewPodRequirements(pod).Requirements...)

podRequests := resources.RequestsForPods(pod)
var instanceTypeOptions []cloudprovider.InstanceType
for _, it := range n.InstanceTypeOptions {
newSize := resources.Merge(n.reservedResources(it), podRequests)
newSize := n.reservedResources(it, podRequests)
if compatibleInstanceType(n.Requirements, it) &&
n.newPodCanFit(newSize, it) &&
n.hasCompatibleResources(resources.RequestsForPods(pod), it) {
n.hasCompatibleResources(podRequests, it) {
instanceTypeOptions = append(instanceTypeOptions, it)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"context"
"fmt"
"os"
"runtime/pprof"
"strings"
"testing"
"text/tabwriter"
Expand All @@ -45,13 +46,13 @@ import (
func TestSchedulingPerformance(t *testing.T) {
tw := tabwriter.NewWriter(os.Stdout, 8, 8, 2, ' ', 0)

/* cpuf, err := os.Create("schedule.cpuprofile")
if err != nil {
t.Fatalf("error creating CPU profile: %s", err)
}
pprof.StartCPUProfile(cpuf)
defer pprof.StopCPUProfile()
*/
cpuf, err := os.Create("schedule.cpuprofile")
if err != nil {
t.Fatalf("error creating CPU profile: %s", err)
}
pprof.StartCPUProfile(cpuf)
defer pprof.StopCPUProfile()

totalPods := 0
totalNodes := 0
var totalTime time.Duration
Expand Down
2 changes: 1 addition & 1 deletion pkg/controllers/provisioning/scheduling/topology.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ func (t *Topology) computeHostnameTopology(topologyGroup *TopologyGroup, constra
}
topologyGroup.Register(domains...)
// This is a bit of a hack that allows the constraints to recognize viable hostname topologies
constraints.Requirements = constraints.Requirements.Add(v1.NodeSelectorRequirement{Key: topologyGroup.Constraint.TopologyKey, Operator: v1.NodeSelectorOpIn, Values: domains})
constraints.Requirements.Combine(v1.NodeSelectorRequirement{Key: topologyGroup.Constraint.TopologyKey, Operator: v1.NodeSelectorOpIn, Values: domains})
return nil
}

Expand Down
7 changes: 6 additions & 1 deletion pkg/utils/resources/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,12 @@ func LimitsForPods(pods ...*v1.Pod) v1.ResourceList {

// Merge the resources from the variadic into a single v1.ResourceList
func Merge(resources ...v1.ResourceList) v1.ResourceList {
result := v1.ResourceList{}
if len(resources) == 0 {
return v1.ResourceList{}

}
// reserve some capacity to avoid some re-allocations
result := make(v1.ResourceList, len(resources[0]))
for _, resourceList := range resources {
for resourceName, quantity := range resourceList {
current := result[resourceName]
Expand Down
7 changes: 6 additions & 1 deletion pkg/utils/sets/sets.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,13 @@ func (s Set) Hash() (uint64, error) {
// DeepCopy creates a deep copy of the set object
// It is required by the Kubernetes CRDs code generation
func (s Set) DeepCopy() Set {
// it's faster to manually copy this then to use UnsortedList() and the constructor
values := sets.NewString()
for k := range s.values {
values[k] = sets.Empty{}
}
return Set{
values: sets.NewString(s.values.UnsortedList()...),
values: values,
complement: s.complement,
}
}
Expand Down

0 comments on commit 754893a

Please sign in to comment.