Skip to content

Commit

Permalink
scheduler: tooling for scheduler benchmarking (#11725)
Browse files Browse the repository at this point in the history
Adds a package `scheduler/benchmarks` with some examples of profiling
and benchmarking the scheduler, along with helpers for loading
real-world data for profiling.

This tooling comes out of work done for #11712. These test benchmarks
have not been added to CI because these particular profiles are mostly
examples and the runs will add an excessive amount of time to CI runs
for code that rarely changes in a way that has any chance of impacting
performance.
  • Loading branch information
tgross authored Dec 22, 2021
1 parent 2189a4c commit e32f024
Show file tree
Hide file tree
Showing 2 changed files with 281 additions and 0 deletions.
202 changes: 202 additions & 0 deletions scheduler/benchmarks/benchmarks_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
package benchmarks

import (
"fmt"
"testing"

"github.com/stretchr/testify/require"

"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/scheduler"
)

// BenchmarkSchedulerExample is an example of how to write a one-off
// benchmark for the Nomad scheduler. The starting state for your
// implementation will depend on the following environment variables:
//
// - NOMAD_BENCHMARK_DATADIR: path to data directory
// - NOMAD_BENCHMARK_SNAPSHOT: path to raft snapshot
// - neither: empty starting state
//
// You can run a profile for this benchmark with the usual -cpuprofile
// -memprofile flags.
func BenchmarkSchedulerExample(b *testing.B) {

h := NewBenchmarkingHarness(b)
var eval *structs.Evaluation

// (implement me!) this is your setup for the state and the eval
// you're going to process, all of which happens before benchmarking
// starts. If you're benchmarking a real world datadir or snapshot,
// you should assert your assumptions about the contents here.
{
upsertNodes(h, 5000, 100)

iter, err := h.State.Nodes(nil)
require.NoError(b, err)
nodes := 0
for {
raw := iter.Next()
if raw == nil {
break
}
nodes++
}
require.Equal(b, 5000, nodes)
job := generateJob(true, 600)
eval = upsertJob(h, job)
}

// (implement me!) Note that h.Process doesn't return errors for
// most states that result in blocked plans, so it's recommended
// you write an assertion section here so that you're sure you're
// benchmarking a successful run and not a failed plan.
{
err := h.Process(scheduler.NewServiceScheduler, eval)
require.NoError(b, err)
require.Len(b, h.Plans, 1)
require.False(b, h.Plans[0].IsNoOp())
}

for i := 0; i < b.N; i++ {
err := h.Process(scheduler.NewServiceScheduler, eval)
require.NoError(b, err)
}
}

// BenchmarkServiceScheduler exercises the service scheduler at a
// variety of cluster sizes, with both spread and non-spread jobs
func BenchmarkServiceScheduler(b *testing.B) {

clusterSizes := []int{1000, 5000, 10000}
rackSets := []int{10, 25, 50, 75}
jobSizes := []int{300, 600, 900, 1200}

type benchmark struct {
name string
clusterSize int
racks int
jobSize int
withSpread bool
}

benchmarks := []benchmark{}
for _, clusterSize := range clusterSizes {
for _, racks := range rackSets {
for _, jobSize := range jobSizes {
benchmarks = append(benchmarks,
benchmark{
name: fmt.Sprintf("%d nodes %d racks %d allocs spread",
clusterSize, racks, jobSize,
),
clusterSize: clusterSize, racks: racks, jobSize: jobSize,
withSpread: true,
},
)
benchmarks = append(benchmarks,
benchmark{
name: fmt.Sprintf("%d nodes %d racks %d allocs no spread",
clusterSize, racks, jobSize,
),
clusterSize: clusterSize, racks: racks, jobSize: jobSize,
withSpread: false,
},
)
}
}
}

for _, bm := range benchmarks {
b.Run(bm.name, func(b *testing.B) {
h := scheduler.NewHarness(b)
upsertNodes(h, bm.clusterSize, bm.racks)
job := generateJob(bm.withSpread, bm.jobSize)
eval := upsertJob(h, job)
b.ResetTimer()
for i := 0; i < b.N; i++ {
err := h.Process(scheduler.NewServiceScheduler, eval)
require.NoError(b, err)
}
})
}
}

func upsertJob(h *scheduler.Harness, job *structs.Job) *structs.Evaluation {
err := h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job)
if err != nil {
panic(err)
}
eval := &structs.Evaluation{
Namespace: structs.DefaultNamespace,
ID: uuid.Generate(),
Priority: job.Priority,
TriggeredBy: structs.EvalTriggerJobRegister,
JobID: job.ID,
Status: structs.EvalStatusPending,
}
err = h.State.UpsertEvals(structs.MsgTypeTestSetup,
h.NextIndex(), []*structs.Evaluation{eval})
if err != nil {
panic(err)
}
return eval
}

func generateJob(withSpread bool, jobSize int) *structs.Job {
job := mock.Job()
job.Datacenters = []string{"dc-1", "dc-2"}
if withSpread {
job.Spreads = []*structs.Spread{{Attribute: "${meta.rack}"}}
}
job.Constraints = []*structs.Constraint{}
job.TaskGroups[0].Count = jobSize
job.TaskGroups[0].Networks = nil
job.TaskGroups[0].Services = []*structs.Service{}
job.TaskGroups[0].Tasks[0].Resources = &structs.Resources{
CPU: 6000,
MemoryMB: 6000,
}
return job
}

func upsertNodes(h *scheduler.Harness, count, racks int) {

datacenters := []string{"dc-1", "dc-2"}

for i := 0; i < count; i++ {
node := mock.Node()
node.Datacenter = datacenters[i%2]
node.Meta = map[string]string{}
node.Meta["rack"] = fmt.Sprintf("r%d", i%racks)
cpuShares := 14000
memoryMB := 32000
diskMB := 100 * 1024

node.NodeResources = &structs.NodeResources{
Cpu: structs.NodeCpuResources{
CpuShares: int64(cpuShares),
},
Memory: structs.NodeMemoryResources{
MemoryMB: int64(memoryMB),
},
Disk: structs.NodeDiskResources{
DiskMB: int64(diskMB),
},
Networks: []*structs.NetworkResource{
{
Mode: "host",
Device: "eth0",
CIDR: "192.168.0.100/32",
MBits: 1000,
},
},
}

err := h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node)
if err != nil {
panic(err)
}
}
}
79 changes: 79 additions & 0 deletions scheduler/benchmarks/helpers_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package benchmarks

// Test helper functions for running scheduling tests and benchmarks
// against real world state snapshots or data directories. These live
// here and not in the the parent scheduler package because it would
// create circular imports between the scheduler and raftutils package
// (via the nomad package)

import (
"errors"
"os"
"testing"

"github.com/hashicorp/nomad/helper/raftutil"
"github.com/hashicorp/nomad/scheduler"
"github.com/stretchr/testify/require"
)

// NewBenchmarkingHarness creates a starting test harness with state
// store. The starting contents of the state store depends on which
// env var is set:
// - NOMAD_BENCHMARK_DATADIR: path to data directory
// - NOMAD_BENCHMARK_SNAPSHOT: path to raft snapshot
// - neither: empty starting state
func NewBenchmarkingHarness(t testing.TB) *scheduler.Harness {
// create the Harness and starting state.
datadir := os.Getenv("NOMAD_BENCHMARK_DATADIR")
if datadir != "" {
h, err := NewHarnessFromDataDir(t, datadir)
require.NoError(t, err)
return h
} else {
snapshotPath := os.Getenv("NOMAD_BENCHMARK_SNAPSHOT")
if snapshotPath != "" {
h, err := NewHarnessFromSnapshot(t, snapshotPath)
require.NoError(t, err)
return h
}
}
return scheduler.NewHarness(t)
}

// NewHarnessFromDataDir creates a new scheduler test harness with
// state loaded from an existing datadir.
func NewHarnessFromDataDir(t testing.TB, datadirPath string) (*scheduler.Harness, error) {
if datadirPath == "" {
return nil, errors.New("datadir path was not set")
}
fsm, err := raftutil.NewFSM(datadirPath)
if err != nil {
return nil, err
}
_, _, err = fsm.ApplyAll()
if err != nil {
return nil, err
}

return scheduler.NewHarnessWithState(t, fsm.State()), nil
}

// NewHarnessFromDataDir creates a new harness with state loaded
// from an existing raft snapshot.
func NewHarnessFromSnapshot(t testing.TB, snapshotPath string) (*scheduler.Harness, error) {
if snapshotPath == "" {
return nil, errors.New("snapshot path was not set")
}
f, err := os.Open(snapshotPath)
if err != nil {
return nil, err
}
defer f.Close()

state, _, err := raftutil.RestoreFromArchive(f)
if err != nil {
return nil, err
}

return scheduler.NewHarnessWithState(t, state), nil
}

0 comments on commit e32f024

Please sign in to comment.