From 951cde4e3bffedd8b2dcfa6c96718c9b0690f00c Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Tue, 24 Oct 2023 09:12:34 -0500 Subject: [PATCH] numa: fix cpu topology conversion for non linux systems (#18843) --- client/fingerprint/cpu_darwin_test.go | 7 +- nomad/structs/cpucompat_default.go | 80 ++++++++++++++++++++++ nomad/structs/cpucompat_default_test.go | 51 ++++++++++++++ nomad/structs/cpucompat_linux.go | 83 +++++++++++++++++++++++ nomad/structs/cpucompat_linux_test.go | 89 +++++++++++++++++++++++++ nomad/structs/numa.go | 41 ------------ nomad/structs/numa_test.go | 74 -------------------- nomad/structs/structs.go | 29 -------- 8 files changed, 306 insertions(+), 148 deletions(-) create mode 100644 nomad/structs/cpucompat_default.go create mode 100644 nomad/structs/cpucompat_default_test.go create mode 100644 nomad/structs/cpucompat_linux.go create mode 100644 nomad/structs/cpucompat_linux_test.go diff --git a/client/fingerprint/cpu_darwin_test.go b/client/fingerprint/cpu_darwin_test.go index a9f16ad8209..36b0555ba3d 100644 --- a/client/fingerprint/cpu_darwin_test.go +++ b/client/fingerprint/cpu_darwin_test.go @@ -32,14 +32,13 @@ func TestCPUFingerprint_AppleSilicon(t *testing.T) { attributes := response.Attributes must.NotNil(t, attributes) must.MapContainsKey(t, attributes, "cpu.modelname") - must.MapContainsKey(t, attributes, "cpu.numcores.power") + must.MapContainsKey(t, attributes, "cpu.numcores.performance") must.MapContainsKey(t, attributes, "cpu.numcores.efficiency") - must.MapContainsKey(t, attributes, "cpu.frequency.power") + must.MapContainsKey(t, attributes, "cpu.frequency.performance") must.MapContainsKey(t, attributes, "cpu.frequency.efficiency") must.MapContainsKey(t, attributes, "cpu.totalcompute") - must.Positive(t, response.Resources.CPU) must.Positive(t, response.NodeResources.Cpu.CpuShares) - must.Positive(t, response.NodeResources.Cpu.SharesPerCore()) + must.Positive(t, response.NodeResources.Cpu.TotalCpuCores) must.SliceEmpty(t, response.NodeResources.Cpu.ReservableCpuCores) // not included for mixed core types (that we can detect) diff --git a/nomad/structs/cpucompat_default.go b/nomad/structs/cpucompat_default.go new file mode 100644 index 00000000000..72685a3d4a4 --- /dev/null +++ b/nomad/structs/cpucompat_default.go @@ -0,0 +1,80 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +//go:build !linux + +package structs + +import ( + "github.com/hashicorp/nomad/client/lib/idset" + "github.com/hashicorp/nomad/client/lib/numalib" + "github.com/hashicorp/nomad/client/lib/numalib/hw" +) + +// Compatibility will translate the LegacyNodeCpuResources into NodeProcessor +// Resources, or the other way around as needed. +// +// This implementation is specific to non-linux operating systems where +// there are no reservable cores. +func (n *NodeResources) Compatibility() { + // If resources are not set there is nothing to do. + if n == nil { + return + } + + // Copy values from n.Processors to n.Cpu for compatibility + // + // COMPAT: added in Nomad 1.7; can be removed in 1.9+ + if n.Processors.Topology == nil && !n.Cpu.empty() { + // When we receive a node update from a pre-1.7 client it contains only + // the LegacyNodeCpuResources field, and so we synthesize a pseudo + // NodeProcessorResources field + n.Processors.Topology = topologyFromLegacy(n.Cpu) + } else if !n.Processors.empty() { + // When we receive a node update from a 1.7+ client it contains a + // NodeProcessorResources field, and we populate the LegacyNodeCpuResources + // field using that information. + n.Cpu.CpuShares = int64(n.Processors.TotalCompute()) + n.Cpu.TotalCpuCores = uint16(n.Processors.Topology.UsableCores().Size()) + } +} + +func topologyFromLegacy(old LegacyNodeCpuResources) *numalib.Topology { + coreCount := old.TotalCpuCores + + // interpret per-core frequency given total compute and total core count + frequency := hw.MHz(old.CpuShares / (int64(coreCount))) + + // synthesize a set of cores that abstractly matches the legacy cpu specs + cores := make([]numalib.Core, 0, coreCount) + + for i := 0; i < int(coreCount); i++ { + cores = append(cores, numalib.Core{ + ID: hw.CoreID(i), + SocketID: 0, // no numa support on non-linux + NodeID: 0, // no numa support on non-linux + Grade: numalib.Performance, // assume P-cores + Disable: false, // no reservable cores on non-linux + GuessSpeed: frequency, + }) + } + + withheld := (frequency * hw.MHz(coreCount)) - hw.MHz(old.CpuShares) + + return &numalib.Topology{ + // legacy: assume one node with id 0 + NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}), + + // legacy: with one node the distance matrix is 1-D + Distances: numalib.SLIT{{10}}, + + // legacy: a pseudo representation of each actual core profile + Cores: cores, + + // legacy: set since we have the value + OverrideTotalCompute: hw.MHz(old.CpuShares), + + // legacy: set since we can compute the value + OverrideWitholdCompute: withheld, + } +} diff --git a/nomad/structs/cpucompat_default_test.go b/nomad/structs/cpucompat_default_test.go new file mode 100644 index 00000000000..4f6a9c83512 --- /dev/null +++ b/nomad/structs/cpucompat_default_test.go @@ -0,0 +1,51 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +//go:build !linux + +package structs + +import ( + "testing" + + "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/client/lib/idset" + "github.com/hashicorp/nomad/client/lib/numalib" + "github.com/hashicorp/nomad/client/lib/numalib/hw" + "github.com/shoenig/test/must" +) + +func TestNUMA_topologyFromLegacy_plain(t *testing.T) { + ci.Parallel(t) + + old := LegacyNodeCpuResources{ + CpuShares: 12800, + TotalCpuCores: 4, + ReservableCpuCores: nil, + } + + result := topologyFromLegacy(old) + + exp := &numalib.Topology{ + NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}), + Distances: numalib.SLIT{{10}}, + Cores: []numalib.Core{ + makeLegacyCore(0), + makeLegacyCore(1), + makeLegacyCore(2), + makeLegacyCore(3), + }, + OverrideTotalCompute: 12800, + OverrideWitholdCompute: 0, + } + + // only compares compute total + must.Equal(t, exp, result) + + // check underlying fields + must.Eq(t, exp.NodeIDs, result.NodeIDs) + must.Eq(t, exp.Distances, result.Distances) + must.Eq(t, exp.Cores, result.Cores) + must.Eq(t, exp.OverrideTotalCompute, result.OverrideTotalCompute) + must.Eq(t, exp.OverrideWitholdCompute, result.OverrideWitholdCompute) +} diff --git a/nomad/structs/cpucompat_linux.go b/nomad/structs/cpucompat_linux.go new file mode 100644 index 00000000000..9140b5e6e6b --- /dev/null +++ b/nomad/structs/cpucompat_linux.go @@ -0,0 +1,83 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +//go:build linux + +package structs + +import ( + "github.com/hashicorp/nomad/client/lib/idset" + "github.com/hashicorp/nomad/client/lib/numalib" + "github.com/hashicorp/nomad/client/lib/numalib/hw" + "github.com/hashicorp/nomad/helper" +) + +// Compatibility will translate the LegacyNodeCpuResources into NodeProcessor +// Resources, or the other way around as needed. +// +// This implementation is specific to non-linux operating systems where +// there are no reservable cores. +func (n *NodeResources) Compatibility() { + // If resources are not set there is nothing to do. + if n == nil { + return + } + + // Copy values from n.Processors to n.Cpu for compatibility + // + // COMPAT: added in Nomad 1.7; can be removed in 1.9+ + if n.Processors.Topology == nil && !n.Cpu.empty() { + // When we receive a node update from a pre-1.7 client it contains only + // the LegacyNodeCpuResources field, and so we synthesize a pseudo + // NodeProcessorResources field + n.Processors.Topology = topologyFromLegacy(n.Cpu) + } else if !n.Processors.empty() { + // When we receive a node update from a 1.7+ client it contains a + // NodeProcessorResources field, and we populate the LegacyNodeCpuResources + // field using that information. + n.Cpu.CpuShares = int64(n.Processors.TotalCompute()) + n.Cpu.TotalCpuCores = uint16(n.Processors.Topology.UsableCores().Size()) + cores := n.Processors.Topology.UsableCores().Slice() + n.Cpu.ReservableCpuCores = helper.ConvertSlice(cores, func(coreID hw.CoreID) uint16 { + return uint16(coreID) + }) + } +} + +func topologyFromLegacy(old LegacyNodeCpuResources) *numalib.Topology { + // interpret per-core frequency given total compute and total core count + frequency := hw.MHz(old.CpuShares / (int64(len(old.ReservableCpuCores)))) + + cores := helper.ConvertSlice( + old.ReservableCpuCores, + func(id uint16) numalib.Core { + return numalib.Core{ + ID: hw.CoreID(id), + SocketID: 0, // legacy: assume single socket with id 0 + NodeID: 0, // legacy: assume single numa node with id 0 + Grade: numalib.Performance, + Disable: false, // only usable cores in the source + GuessSpeed: frequency, + } + }, + ) + + withheld := (frequency * hw.MHz(old.TotalCpuCores)) - hw.MHz(old.CpuShares) + + return &numalib.Topology{ + // legacy: assume one node with id 0 + NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}), + + // legacy: with one node the distance matrix is 1-D + Distances: numalib.SLIT{{10}}, + + // legacy: a pseudo representation of each actual core profile + Cores: cores, + + // legacy: set since we have the value + OverrideTotalCompute: hw.MHz(old.CpuShares), + + // legacy: set since we can compute the value + OverrideWitholdCompute: withheld, + } +} diff --git a/nomad/structs/cpucompat_linux_test.go b/nomad/structs/cpucompat_linux_test.go new file mode 100644 index 00000000000..da4fbd100b5 --- /dev/null +++ b/nomad/structs/cpucompat_linux_test.go @@ -0,0 +1,89 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +//go:build linux + +package structs + +import ( + "testing" + + "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/client/lib/idset" + "github.com/hashicorp/nomad/client/lib/numalib" + "github.com/hashicorp/nomad/client/lib/numalib/hw" + "github.com/shoenig/test/must" +) + +func TestNUMA_topologyFromLegacy_plain(t *testing.T) { + ci.Parallel(t) + + old := LegacyNodeCpuResources{ + CpuShares: 12800, + TotalCpuCores: 4, + ReservableCpuCores: []uint16{ + 0, 1, 2, 3, + }, + } + + result := topologyFromLegacy(old) + + exp := &numalib.Topology{ + NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}), + Distances: numalib.SLIT{{10}}, + Cores: []numalib.Core{ + makeLegacyCore(0), + makeLegacyCore(1), + makeLegacyCore(2), + makeLegacyCore(3), + }, + OverrideTotalCompute: 12800, + OverrideWitholdCompute: 0, + } + + // only compares total compute + must.Equal(t, exp, result) + + // check underlying fields + must.Eq(t, exp.NodeIDs, result.NodeIDs) + must.Eq(t, exp.Distances, result.Distances) + must.Eq(t, exp.Cores, result.Cores) + must.Eq(t, exp.OverrideTotalCompute, result.OverrideTotalCompute) + must.Eq(t, exp.OverrideWitholdCompute, result.OverrideWitholdCompute) +} + +func TestNUMA_topologyFromLegacy_reservations(t *testing.T) { + ci.Parallel(t) + + old := LegacyNodeCpuResources{ + CpuShares: 9600, + TotalCpuCores: 4, + ReservableCpuCores: []uint16{ + 1, 2, 3, // core 0 excluded + }, + } + + result := topologyFromLegacy(old) + + exp := &numalib.Topology{ + NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}), + Distances: numalib.SLIT{{10}}, + Cores: []numalib.Core{ + makeLegacyCore(1), + makeLegacyCore(2), + makeLegacyCore(3), + }, + OverrideTotalCompute: 9600, + OverrideWitholdCompute: 3200, // core 0 excluded + } + + // only compares total compute + must.Equal(t, exp, result) + + // check underlying fields + must.Eq(t, exp.NodeIDs, result.NodeIDs) + must.Eq(t, exp.Distances, result.Distances) + must.Eq(t, exp.Cores, result.Cores) + must.Eq(t, exp.OverrideTotalCompute, result.OverrideTotalCompute) + must.Eq(t, exp.OverrideWitholdCompute, result.OverrideWitholdCompute) +} diff --git a/nomad/structs/numa.go b/nomad/structs/numa.go index 027427d482f..2405fc59d59 100644 --- a/nomad/structs/numa.go +++ b/nomad/structs/numa.go @@ -7,10 +7,7 @@ import ( "errors" "fmt" - "github.com/hashicorp/nomad/client/lib/idset" "github.com/hashicorp/nomad/client/lib/numalib" - "github.com/hashicorp/nomad/client/lib/numalib/hw" - "github.com/hashicorp/nomad/helper" ) const ( @@ -141,41 +138,3 @@ func (r *NodeProcessorResources) TotalCompute() int { } return int(r.Topology.TotalCompute()) } - -func topologyFromLegacy(old LegacyNodeCpuResources) *numalib.Topology { - // interpret per-core frequency given total compute and total core count - frequency := hw.MHz(old.CpuShares / (int64(len(old.ReservableCpuCores)))) - - cores := helper.ConvertSlice( - old.ReservableCpuCores, - func(id uint16) numalib.Core { - return numalib.Core{ - ID: hw.CoreID(id), - SocketID: 0, // legacy: assume single socket with id 0 - NodeID: 0, // legacy: assume single numa node with id 0 - Grade: numalib.Performance, - Disable: false, // only usable cores in the source - GuessSpeed: frequency, - } - }, - ) - - withheld := (frequency * hw.MHz(old.TotalCpuCores)) - hw.MHz(old.CpuShares) - - return &numalib.Topology{ - // legacy: assume one node with id 0 - NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}), - - // legacy: with one node the distance matrix is 1-D - Distances: numalib.SLIT{{10}}, - - // legacy: a pseudo representation of each actual core profile - Cores: cores, - - // legacy: set since we have the value - OverrideTotalCompute: hw.MHz(old.CpuShares), - - // legacy: set since we can compute the value - OverrideWitholdCompute: withheld, - } -} diff --git a/nomad/structs/numa_test.go b/nomad/structs/numa_test.go index 2aa82382553..325a318acdc 100644 --- a/nomad/structs/numa_test.go +++ b/nomad/structs/numa_test.go @@ -8,7 +8,6 @@ import ( "testing" "github.com/hashicorp/nomad/ci" - "github.com/hashicorp/nomad/client/lib/idset" "github.com/hashicorp/nomad/client/lib/numalib" "github.com/hashicorp/nomad/client/lib/numalib/hw" "github.com/shoenig/test/must" @@ -97,76 +96,3 @@ func makeLegacyCore(id hw.CoreID) numalib.Core { GuessSpeed: 3200, } } - -func TestNUMA_topologyFromLegacy_plain(t *testing.T) { - ci.Parallel(t) - - old := LegacyNodeCpuResources{ - CpuShares: 12800, - TotalCpuCores: 4, - ReservableCpuCores: []uint16{ - 0, 1, 2, 3, - }, - } - - result := topologyFromLegacy(old) - - exp := &numalib.Topology{ - NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}), - Distances: numalib.SLIT{{10}}, - Cores: []numalib.Core{ - makeLegacyCore(0), - makeLegacyCore(1), - makeLegacyCore(2), - makeLegacyCore(3), - }, - OverrideTotalCompute: 12800, - OverrideWitholdCompute: 0, - } - - // only compares total compute - must.Equal(t, exp, result) - - // check underlying fields - must.Eq(t, exp.NodeIDs, result.NodeIDs) - must.Eq(t, exp.Distances, result.Distances) - must.Eq(t, exp.Cores, result.Cores) - must.Eq(t, exp.OverrideTotalCompute, result.OverrideTotalCompute) - must.Eq(t, exp.OverrideWitholdCompute, result.OverrideWitholdCompute) -} - -func TestNUMA_topologyFromLegacy_reservations(t *testing.T) { - ci.Parallel(t) - - old := LegacyNodeCpuResources{ - CpuShares: 9600, - TotalCpuCores: 4, - ReservableCpuCores: []uint16{ - 1, 2, 3, // core 0 excluded - }, - } - - result := topologyFromLegacy(old) - - exp := &numalib.Topology{ - NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}), - Distances: numalib.SLIT{{10}}, - Cores: []numalib.Core{ - makeLegacyCore(1), - makeLegacyCore(2), - makeLegacyCore(3), - }, - OverrideTotalCompute: 9600, - OverrideWitholdCompute: 3200, // core 0 excluded - } - - // only compares total compute - must.Equal(t, exp, result) - - // check underlying fields - must.Eq(t, exp.NodeIDs, result.NodeIDs) - must.Eq(t, exp.Distances, result.Distances) - must.Eq(t, exp.Cores, result.Cores) - must.Eq(t, exp.OverrideTotalCompute, result.OverrideTotalCompute) - must.Eq(t, exp.OverrideWitholdCompute, result.OverrideWitholdCompute) -} diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index a756c6ec3ef..5d1413dd148 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -3127,35 +3127,6 @@ type NodeResources struct { MaxDynamicPort int } -// Compatibility will translate the LegacyNodeCpuResources into NodeProcessor -// Resources, or the other way around as needed. -func (n *NodeResources) Compatibility() { - // If resources are not set there is nothing to do. - if n == nil { - return - } - - // Copy values from n.Processors to n.Cpu for compatibility - // - // COMPAT: added in Nomad 1.7; can be removed in 1.9+ - if n.Processors.Topology == nil && !n.Cpu.empty() { - // When we receive a node update from a pre-1.7 client it contains only - // the LegacyNodeCpuResources field, and so we synthesize a pseudo - // NodeProcessorResources field - n.Processors.Topology = topologyFromLegacy(n.Cpu) - } else if !n.Processors.empty() { - // When we receive a node update from a 1.7+ client it contains a - // NodeProcessorResources field, and we populate the LegacyNodeCpuResources - // field using that information. - n.Cpu.CpuShares = int64(n.Processors.TotalCompute()) - n.Cpu.TotalCpuCores = uint16(n.Processors.Topology.UsableCores().Size()) - cores := n.Processors.Topology.UsableCores().Slice() - n.Cpu.ReservableCpuCores = helper.ConvertSlice(cores, func(coreID hw.CoreID) uint16 { - return uint16(coreID) - }) - } -} - func (n *NodeResources) Copy() *NodeResources { if n == nil { return nil