Skip to content

Commit

Permalink
numa: fix cpu topology conversion for non linux systems (#18843)
Browse files Browse the repository at this point in the history
  • Loading branch information
shoenig authored Oct 24, 2023
1 parent cb3fde3 commit 951cde4
Show file tree
Hide file tree
Showing 8 changed files with 306 additions and 148 deletions.
7 changes: 3 additions & 4 deletions client/fingerprint/cpu_darwin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,13 @@ func TestCPUFingerprint_AppleSilicon(t *testing.T) {
attributes := response.Attributes
must.NotNil(t, attributes)
must.MapContainsKey(t, attributes, "cpu.modelname")
must.MapContainsKey(t, attributes, "cpu.numcores.power")
must.MapContainsKey(t, attributes, "cpu.numcores.performance")
must.MapContainsKey(t, attributes, "cpu.numcores.efficiency")
must.MapContainsKey(t, attributes, "cpu.frequency.power")
must.MapContainsKey(t, attributes, "cpu.frequency.performance")
must.MapContainsKey(t, attributes, "cpu.frequency.efficiency")
must.MapContainsKey(t, attributes, "cpu.totalcompute")
must.Positive(t, response.Resources.CPU)
must.Positive(t, response.NodeResources.Cpu.CpuShares)
must.Positive(t, response.NodeResources.Cpu.SharesPerCore())
must.Positive(t, response.NodeResources.Cpu.TotalCpuCores)
must.SliceEmpty(t, response.NodeResources.Cpu.ReservableCpuCores)

// not included for mixed core types (that we can detect)
Expand Down
80 changes: 80 additions & 0 deletions nomad/structs/cpucompat_default.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1

//go:build !linux

package structs

import (
"github.com/hashicorp/nomad/client/lib/idset"
"github.com/hashicorp/nomad/client/lib/numalib"
"github.com/hashicorp/nomad/client/lib/numalib/hw"
)

// Compatibility will translate the LegacyNodeCpuResources into NodeProcessor
// Resources, or the other way around as needed.
//
// This implementation is specific to non-linux operating systems where
// there are no reservable cores.
func (n *NodeResources) Compatibility() {
// If resources are not set there is nothing to do.
if n == nil {
return
}

// Copy values from n.Processors to n.Cpu for compatibility
//
// COMPAT: added in Nomad 1.7; can be removed in 1.9+
if n.Processors.Topology == nil && !n.Cpu.empty() {
// When we receive a node update from a pre-1.7 client it contains only
// the LegacyNodeCpuResources field, and so we synthesize a pseudo
// NodeProcessorResources field
n.Processors.Topology = topologyFromLegacy(n.Cpu)
} else if !n.Processors.empty() {
// When we receive a node update from a 1.7+ client it contains a
// NodeProcessorResources field, and we populate the LegacyNodeCpuResources
// field using that information.
n.Cpu.CpuShares = int64(n.Processors.TotalCompute())
n.Cpu.TotalCpuCores = uint16(n.Processors.Topology.UsableCores().Size())
}
}

func topologyFromLegacy(old LegacyNodeCpuResources) *numalib.Topology {
coreCount := old.TotalCpuCores

// interpret per-core frequency given total compute and total core count
frequency := hw.MHz(old.CpuShares / (int64(coreCount)))

// synthesize a set of cores that abstractly matches the legacy cpu specs
cores := make([]numalib.Core, 0, coreCount)

for i := 0; i < int(coreCount); i++ {
cores = append(cores, numalib.Core{
ID: hw.CoreID(i),
SocketID: 0, // no numa support on non-linux
NodeID: 0, // no numa support on non-linux
Grade: numalib.Performance, // assume P-cores
Disable: false, // no reservable cores on non-linux
GuessSpeed: frequency,
})
}

withheld := (frequency * hw.MHz(coreCount)) - hw.MHz(old.CpuShares)

return &numalib.Topology{
// legacy: assume one node with id 0
NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}),

// legacy: with one node the distance matrix is 1-D
Distances: numalib.SLIT{{10}},

// legacy: a pseudo representation of each actual core profile
Cores: cores,

// legacy: set since we have the value
OverrideTotalCompute: hw.MHz(old.CpuShares),

// legacy: set since we can compute the value
OverrideWitholdCompute: withheld,
}
}
51 changes: 51 additions & 0 deletions nomad/structs/cpucompat_default_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1

//go:build !linux

package structs

import (
"testing"

"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/client/lib/idset"
"github.com/hashicorp/nomad/client/lib/numalib"
"github.com/hashicorp/nomad/client/lib/numalib/hw"
"github.com/shoenig/test/must"
)

func TestNUMA_topologyFromLegacy_plain(t *testing.T) {
ci.Parallel(t)

old := LegacyNodeCpuResources{
CpuShares: 12800,
TotalCpuCores: 4,
ReservableCpuCores: nil,
}

result := topologyFromLegacy(old)

exp := &numalib.Topology{
NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}),
Distances: numalib.SLIT{{10}},
Cores: []numalib.Core{
makeLegacyCore(0),
makeLegacyCore(1),
makeLegacyCore(2),
makeLegacyCore(3),
},
OverrideTotalCompute: 12800,
OverrideWitholdCompute: 0,
}

// only compares compute total
must.Equal(t, exp, result)

// check underlying fields
must.Eq(t, exp.NodeIDs, result.NodeIDs)
must.Eq(t, exp.Distances, result.Distances)
must.Eq(t, exp.Cores, result.Cores)
must.Eq(t, exp.OverrideTotalCompute, result.OverrideTotalCompute)
must.Eq(t, exp.OverrideWitholdCompute, result.OverrideWitholdCompute)
}
83 changes: 83 additions & 0 deletions nomad/structs/cpucompat_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1

//go:build linux

package structs

import (
"github.com/hashicorp/nomad/client/lib/idset"
"github.com/hashicorp/nomad/client/lib/numalib"
"github.com/hashicorp/nomad/client/lib/numalib/hw"
"github.com/hashicorp/nomad/helper"
)

// Compatibility will translate the LegacyNodeCpuResources into NodeProcessor
// Resources, or the other way around as needed.
//
// This implementation is specific to non-linux operating systems where
// there are no reservable cores.
func (n *NodeResources) Compatibility() {
// If resources are not set there is nothing to do.
if n == nil {
return
}

// Copy values from n.Processors to n.Cpu for compatibility
//
// COMPAT: added in Nomad 1.7; can be removed in 1.9+
if n.Processors.Topology == nil && !n.Cpu.empty() {
// When we receive a node update from a pre-1.7 client it contains only
// the LegacyNodeCpuResources field, and so we synthesize a pseudo
// NodeProcessorResources field
n.Processors.Topology = topologyFromLegacy(n.Cpu)
} else if !n.Processors.empty() {
// When we receive a node update from a 1.7+ client it contains a
// NodeProcessorResources field, and we populate the LegacyNodeCpuResources
// field using that information.
n.Cpu.CpuShares = int64(n.Processors.TotalCompute())
n.Cpu.TotalCpuCores = uint16(n.Processors.Topology.UsableCores().Size())
cores := n.Processors.Topology.UsableCores().Slice()
n.Cpu.ReservableCpuCores = helper.ConvertSlice(cores, func(coreID hw.CoreID) uint16 {
return uint16(coreID)
})
}
}

func topologyFromLegacy(old LegacyNodeCpuResources) *numalib.Topology {
// interpret per-core frequency given total compute and total core count
frequency := hw.MHz(old.CpuShares / (int64(len(old.ReservableCpuCores))))

cores := helper.ConvertSlice(
old.ReservableCpuCores,
func(id uint16) numalib.Core {
return numalib.Core{
ID: hw.CoreID(id),
SocketID: 0, // legacy: assume single socket with id 0
NodeID: 0, // legacy: assume single numa node with id 0
Grade: numalib.Performance,
Disable: false, // only usable cores in the source
GuessSpeed: frequency,
}
},
)

withheld := (frequency * hw.MHz(old.TotalCpuCores)) - hw.MHz(old.CpuShares)

return &numalib.Topology{
// legacy: assume one node with id 0
NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}),

// legacy: with one node the distance matrix is 1-D
Distances: numalib.SLIT{{10}},

// legacy: a pseudo representation of each actual core profile
Cores: cores,

// legacy: set since we have the value
OverrideTotalCompute: hw.MHz(old.CpuShares),

// legacy: set since we can compute the value
OverrideWitholdCompute: withheld,
}
}
89 changes: 89 additions & 0 deletions nomad/structs/cpucompat_linux_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1

//go:build linux

package structs

import (
"testing"

"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/client/lib/idset"
"github.com/hashicorp/nomad/client/lib/numalib"
"github.com/hashicorp/nomad/client/lib/numalib/hw"
"github.com/shoenig/test/must"
)

func TestNUMA_topologyFromLegacy_plain(t *testing.T) {
ci.Parallel(t)

old := LegacyNodeCpuResources{
CpuShares: 12800,
TotalCpuCores: 4,
ReservableCpuCores: []uint16{
0, 1, 2, 3,
},
}

result := topologyFromLegacy(old)

exp := &numalib.Topology{
NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}),
Distances: numalib.SLIT{{10}},
Cores: []numalib.Core{
makeLegacyCore(0),
makeLegacyCore(1),
makeLegacyCore(2),
makeLegacyCore(3),
},
OverrideTotalCompute: 12800,
OverrideWitholdCompute: 0,
}

// only compares total compute
must.Equal(t, exp, result)

// check underlying fields
must.Eq(t, exp.NodeIDs, result.NodeIDs)
must.Eq(t, exp.Distances, result.Distances)
must.Eq(t, exp.Cores, result.Cores)
must.Eq(t, exp.OverrideTotalCompute, result.OverrideTotalCompute)
must.Eq(t, exp.OverrideWitholdCompute, result.OverrideWitholdCompute)
}

func TestNUMA_topologyFromLegacy_reservations(t *testing.T) {
ci.Parallel(t)

old := LegacyNodeCpuResources{
CpuShares: 9600,
TotalCpuCores: 4,
ReservableCpuCores: []uint16{
1, 2, 3, // core 0 excluded
},
}

result := topologyFromLegacy(old)

exp := &numalib.Topology{
NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}),
Distances: numalib.SLIT{{10}},
Cores: []numalib.Core{
makeLegacyCore(1),
makeLegacyCore(2),
makeLegacyCore(3),
},
OverrideTotalCompute: 9600,
OverrideWitholdCompute: 3200, // core 0 excluded
}

// only compares total compute
must.Equal(t, exp, result)

// check underlying fields
must.Eq(t, exp.NodeIDs, result.NodeIDs)
must.Eq(t, exp.Distances, result.Distances)
must.Eq(t, exp.Cores, result.Cores)
must.Eq(t, exp.OverrideTotalCompute, result.OverrideTotalCompute)
must.Eq(t, exp.OverrideWitholdCompute, result.OverrideWitholdCompute)
}
41 changes: 0 additions & 41 deletions nomad/structs/numa.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,7 @@ import (
"errors"
"fmt"

"github.com/hashicorp/nomad/client/lib/idset"
"github.com/hashicorp/nomad/client/lib/numalib"
"github.com/hashicorp/nomad/client/lib/numalib/hw"
"github.com/hashicorp/nomad/helper"
)

const (
Expand Down Expand Up @@ -141,41 +138,3 @@ func (r *NodeProcessorResources) TotalCompute() int {
}
return int(r.Topology.TotalCompute())
}

func topologyFromLegacy(old LegacyNodeCpuResources) *numalib.Topology {
// interpret per-core frequency given total compute and total core count
frequency := hw.MHz(old.CpuShares / (int64(len(old.ReservableCpuCores))))

cores := helper.ConvertSlice(
old.ReservableCpuCores,
func(id uint16) numalib.Core {
return numalib.Core{
ID: hw.CoreID(id),
SocketID: 0, // legacy: assume single socket with id 0
NodeID: 0, // legacy: assume single numa node with id 0
Grade: numalib.Performance,
Disable: false, // only usable cores in the source
GuessSpeed: frequency,
}
},
)

withheld := (frequency * hw.MHz(old.TotalCpuCores)) - hw.MHz(old.CpuShares)

return &numalib.Topology{
// legacy: assume one node with id 0
NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}),

// legacy: with one node the distance matrix is 1-D
Distances: numalib.SLIT{{10}},

// legacy: a pseudo representation of each actual core profile
Cores: cores,

// legacy: set since we have the value
OverrideTotalCompute: hw.MHz(old.CpuShares),

// legacy: set since we can compute the value
OverrideWitholdCompute: withheld,
}
}
Loading

0 comments on commit 951cde4

Please sign in to comment.