Skip to content

Commit

Permalink
client: fixed a bug where AMD CPUs were not correctly fingerprinting …
Browse files Browse the repository at this point in the history
…base speed (#24415)

Relates to: #19468
  • Loading branch information
mvegter authored Nov 21, 2024
1 parent 6ccfcc3 commit bfb7141
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 14 deletions.
3 changes: 3 additions & 0 deletions .changelog/24415.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
client: fixed a bug where AMD CPUs were not correctly fingerprinting base speed
```
51 changes: 37 additions & 14 deletions client/lib/numalib/detect_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,18 @@ func PlatformScanners() []SystemScanner {
}

const (
sysRoot = "/sys/devices/system"
nodeOnline = sysRoot + "/node/online"
cpuOnline = sysRoot + "/cpu/online"
distanceFile = sysRoot + "/node/node%d/distance"
cpulistFile = sysRoot + "/node/node%d/cpulist"
cpuMaxFile = sysRoot + "/cpu/cpu%d/cpufreq/cpuinfo_max_freq"
cpuBaseFile = sysRoot + "/cpu/cpu%d/cpufreq/base_frequency"
cpuSocketFile = sysRoot + "/cpu/cpu%d/topology/physical_package_id"
cpuSiblingFile = sysRoot + "/cpu/cpu%d/topology/thread_siblings_list"
deviceFiles = "/sys/bus/pci/devices"
sysRoot = "/sys/devices/system"
nodeOnline = sysRoot + "/node/online"
cpuOnline = sysRoot + "/cpu/online"
distanceFile = sysRoot + "/node/node%d/distance"
cpulistFile = sysRoot + "/node/node%d/cpulist"
cpuDriverFile = sysRoot + "/cpu/cpu%d/cpufreq/scaling_driver"
cpuMaxFile = sysRoot + "/cpu/cpu%d/cpufreq/cpuinfo_max_freq"
cpuCpccNominalFile = sysRoot + "/cpu/cpu%d/acpi_cppc/nominal_freq"
cpuIntelBaseFile = sysRoot + "/cpu/cpu%d/cpufreq/base_frequency"
cpuSocketFile = sysRoot + "/cpu/cpu%d/topology/physical_package_id"
cpuSiblingFile = sysRoot + "/cpu/cpu%d/topology/thread_siblings_list"
deviceFiles = "/sys/bus/pci/devices"
)

// pathReaderFn is a path reader function, injected into all value getters to
Expand Down Expand Up @@ -131,8 +133,8 @@ func (*Sysfs) discoverCores(st *Topology, readerFunc pathReaderFn) {
st.nodeIDs = idset.From[hw.NodeID]([]hw.NodeID{0})
const node = 0
const socket = 0
cpuMax, _ := getNumeric[hw.KHz](cpuMaxFile, 64, readerFunc, core)
base, _ := getNumeric[hw.KHz](cpuBaseFile, 64, readerFunc, core)

base, cpuMax := discoverCoreSpeeds(core, readerFunc)
st.insert(node, socket, core, Performance, cpuMax, base)
st.Nodes = st.nodeIDs.Slice()
return nil
Expand All @@ -149,9 +151,8 @@ func (*Sysfs) discoverCores(st *Topology, readerFunc pathReaderFn) {
_ = cores.ForEach(func(core hw.CoreID) error {
// best effort, zero values are defaults
socket, _ := getNumeric[hw.SocketID](cpuSocketFile, 8, readerFunc, core)
cpuMax, _ := getNumeric[hw.KHz](cpuMaxFile, 64, readerFunc, core)
base, _ := getNumeric[hw.KHz](cpuBaseFile, 64, readerFunc, core)
siblings, _ := getIDSet[hw.CoreID](cpuSiblingFile, readerFunc, core)
base, cpuMax := discoverCoreSpeeds(core, readerFunc)

// if we get an incorrect core number, this means we're not getting the right
// data from SysFS. In this case we bail and set default values.
Expand All @@ -167,6 +168,28 @@ func (*Sysfs) discoverCores(st *Topology, readerFunc pathReaderFn) {
}
}

func discoverCoreSpeeds(core hw.CoreID, readerFunc pathReaderFn) (hw.KHz, hw.KHz) {
baseSpeed := hw.KHz(0)
maxSpeed := hw.KHz(0)

driver, _ := getString(cpuDriverFile, readerFunc, core)

switch driver {
case "acpi-cpufreq":
// Indicates the highest sustained performance level of the processor
baseSpeedMHz, _ := getNumeric[hw.MHz](cpuCpccNominalFile, 64, readerFunc, core)
baseSpeed = baseSpeedMHz.KHz()
default:
// COMPAT(1.9.x): while the `base_frequency` file is specific to the `intel_pstate` scaling driver, we should
// preserve the default while we may uncover more scaling driver specific implementations.
baseSpeed, _ = getNumeric[hw.KHz](cpuIntelBaseFile, 64, readerFunc, core)
}

maxSpeed, _ = getNumeric[hw.KHz](cpuMaxFile, 64, readerFunc, core)

return baseSpeed, maxSpeed
}

func getIDSet[T idset.ID](path string, readerFunc pathReaderFn, args ...any) (*idset.Set[T], error) {
path = fmt.Sprintf(path, args...)
s, err := readerFunc(path)
Expand Down
69 changes: 69 additions & 0 deletions client/lib/numalib/detect_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,37 @@ func goodSysData(path string) ([]byte, error) {
}[path], nil
}

func goodSysDataAMD(path string) ([]byte, error) {
return map[string][]byte{
"/sys/devices/system/node/online": []byte("0-1"),
"/sys/devices/system/cpu/online": []byte("0-3"),
"/sys/devices/system/node/node0/distance": []byte("10"),
"/sys/devices/system/node/node0/cpulist": []byte("0-3"),
"/sys/devices/system/node/node1/distance": []byte("10"),
"/sys/devices/system/node/node1/cpulist": []byte("0-3"),
"/sys/devices/system/cpu/cpu0/acpi_cppc/nominal_freq": []byte("2450"),
"/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq": []byte("3500000"),
"/sys/devices/system/cpu/cpu0/cpufreq/scaling_driver": []byte("acpi-cpufreq"),
"/sys/devices/system/cpu/cpu0/topology/physical_package_id": []byte("0"),
"/sys/devices/system/cpu/cpu0/topology/thread_siblings_list": []byte("0,2"),
"/sys/devices/system/cpu/cpu1/acpi_cppc/nominal_freq": []byte("2450"),
"/sys/devices/system/cpu/cpu1/cpufreq/cpuinfo_max_freq": []byte("3500000"),
"/sys/devices/system/cpu/cpu1/cpufreq/scaling_driver": []byte("acpi-cpufreq"),
"/sys/devices/system/cpu/cpu1/topology/physical_package_id": []byte("0"),
"/sys/devices/system/cpu/cpu1/topology/thread_siblings_list": []byte("1,3"),
"/sys/devices/system/cpu/cpu2/acpi_cppc/nominal_freq": []byte("2450"),
"/sys/devices/system/cpu/cpu2/cpufreq/cpuinfo_max_freq": []byte("3500000"),
"/sys/devices/system/cpu/cpu2/cpufreq/scaling_driver": []byte("acpi-cpufreq"),
"/sys/devices/system/cpu/cpu2/topology/physical_package_id": []byte("0"),
"/sys/devices/system/cpu/cpu2/topology/thread_siblings_list": []byte("0,2"),
"/sys/devices/system/cpu/cpu3/acpi_cppc/nominal_freq": []byte("2450"),
"/sys/devices/system/cpu/cpu3/cpufreq/cpuinfo_max_freq": []byte("3500000"),
"/sys/devices/system/cpu/cpu3/cpufreq/scaling_driver": []byte("acpi-cpufreq"),
"/sys/devices/system/cpu/cpu3/topology/physical_package_id": []byte("0"),
"/sys/devices/system/cpu/cpu3/topology/thread_siblings_list": []byte("1,3"),
}[path], nil
}

func TestSysfs_discoverOnline(t *testing.T) {
st := MockTopology(&idset.Set[hw.NodeID]{}, SLIT{}, []Core{})
goodIDSet := idset.From[hw.NodeID]([]uint8{0, 1})
Expand Down Expand Up @@ -195,6 +226,44 @@ func TestSysfs_discoverCores(t *testing.T) {
},
},
}},
{"two nodes and good sys AMD data", twoNodes, goodSysDataAMD, &Topology{
nodeIDs: twoNodes,
Nodes: twoNodes.Slice(),
Cores: []Core{
{
SocketID: 1,
NodeID: 0,
ID: 0,
Grade: Performance,
BaseSpeed: 2450,
MaxSpeed: 3500,
},
{
SocketID: 1,
NodeID: 0,
ID: 1,
Grade: Performance,
BaseSpeed: 2450,
MaxSpeed: 3500,
},
{
SocketID: 1,
NodeID: 0,
ID: 2,
Grade: Performance,
BaseSpeed: 2450,
MaxSpeed: 3500,
},
{
SocketID: 1,
NodeID: 0,
ID: 3,
Grade: Performance,
BaseSpeed: 2450,
MaxSpeed: 3500,
},
},
}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down
4 changes: 4 additions & 0 deletions client/lib/numalib/hw/speeds.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ func (khz KHz) MHz() MHz {
return MHz(khz / 1000)
}

func (mhz MHz) KHz() KHz {
return KHz(mhz * 1000)
}

func (khz KHz) String() string {
return strconv.FormatUint(uint64(khz.MHz()), 10)
}

0 comments on commit bfb7141

Please sign in to comment.