From 027daaf0b638fd2dcffcecf3211c84fe82c7a06f Mon Sep 17 00:00:00 2001 From: Gabriele Galiero Casay Date: Tue, 28 Nov 2023 17:14:39 +0000 Subject: [PATCH] First version of powertelemetry library --- .gitignore | 22 + .golangci.yml | 328 ++ CONTRIBUTING.md | 2 +- LICENSE | 201 + LICENSE_OF_DEPENDENCIES.md | 12 + Makefile | 44 + README.md | 580 +++ VERSION | 1 + builder.go | 482 ++ builder_test.go | 1168 +++++ clock.go | 36 + cmd/example/main.go | 298 ++ cpufreq.go | 67 + cpufreq_test.go | 99 + errors.go | 47 + file.go | 78 + go.mod | 27 + go.sum | 62 + internal/cpuid/cpuid_count_amd64.go | 9 + internal/cpuid/cpuid_count_amd64.s | 15 + internal/cpuid/hybrid.go | 28 + internal/cpumodel/intel_family.go | 174 + internal/log/default.go | 18 + internal/log/logger.go | 71 + internal/version/version.go | 42 + metrics.go | 225 + metrics_test.go | 255 + msr.go | 474 ++ msr_test.go | 1033 +++++ perf.go | 558 +++ perf_test.go | 1180 +++++ power.go | 755 +++ power_test.go | 4093 +++++++++++++++++ rapl.go | 604 +++ rapl_test.go | 1159 +++++ testdata/alderlake_goldencove_core.json | 299 ++ .../cpu0/cpufreq/scaling_cur_freq | 1 + .../cpu-freq/cpu0/cpufreq/scaling_cur_freq | 1 + testdata/cpu-msr-cpuID-msr-not-exist/0/dummy | 0 testdata/cpu-msr-cpuID-msr-softlink/0/msr | 1 + testdata/cpu-msr-cpuID-msr-softlink/1/msr | 1 + testdata/cpu-msr-directories-not-exist/dummy | 0 .../cpu-msr-invalid-cpuID-directories/01/msr | 1 + .../1invalid/msr | 1 + testdata/cpu-msr/0/msr | 1 + testdata/cpu-msr/1/msr | 1 + testdata/cpu-msr/10/msr | 1 + testdata/cpu-msr/100/msr | 1 + testdata/cpuinfo_bad1/cpuinfo | 26 + testdata/cpuinfo_bad2/cpuinfo | 26 + testdata/cpuinfo_bad3/cpuinfo | 26 + testdata/cpuinfo_good/cpuinfo | 26 + testdata/die-id-invalid/cpu1/topology/die_id | 1 + testdata/die-id-valid/cpu1/topology/die_id | 1 + .../intel-rapl{colon}1/name | 0 .../intel-rapl{colon}0/energy_uj | 1 + .../max_energy_range_uj | 1 + .../intel-rapl{colon}0{colon}1/name | 1 + .../intel-rapl{colon}0/max_energy_range_uj | 1 + .../intel-rapl{colon}0/name | 1 + .../intel-rapl{colon}0{colon}0/invalid-name | 1 + .../intel-rapl{colon}0/name | 1 + .../intel-rapl{colon}1/name | 1 + .../intel-rapl{colon}0{colon}1/energy_uj | 1 + .../max_energy_range_uj | 1 + .../intel-rapl{colon}0{colon}1/name | 1 + .../intel-rapl{colon}0/max_energy_range_uj | 1 + .../intel-rapl{colon}0/name | 1 + .../intel-rapl{colon}0/domain | 1 + .../constraint_0_max_power_uw | 1 + .../intel-rapl/intel-rapl{colon}0/energy_uj | 1 + .../intel-rapl{colon}0{colon}0/energy_uj | 1 + .../max_energy_range_uj | 1 + .../intel-rapl{colon}0{colon}0/name | 1 + .../intel-rapl{colon}0{colon}1/energy_uj | 1 + .../max_energy_range_uj | 1 + .../intel-rapl{colon}0{colon}1/name | 1 + .../intel-rapl{colon}0/max_energy_range_uj | 1 + testdata/intel-rapl/intel-rapl{colon}0/name | 1 + .../constraint_0_max_power_uw | 1 + .../intel-rapl/intel-rapl{colon}1/energy_uj | 1 + .../intel-rapl{colon}1{colon}0/name | 1 + .../intel-rapl{colon}1/max_energy_range_uj | 1 + testdata/intel-rapl/intel-rapl{colon}1/name | 1 + .../constraint_0_max_power_uw | 0 .../intel-rapl/intel-rapl{colon}2/energy_uj | 1 + .../intel-rapl{colon}2{colon}0/energy_uj | 1 + .../max_energy_range_uj | 0 .../intel-rapl{colon}2{colon}0/name | 1 + testdata/intel-rapl/intel-rapl{colon}2/name | 1 + .../intel-rapl/intel-rapl{colon}3/energy_uj | 1 + testdata/intel-rapl/intel-rapl{colon}3/name | 1 + .../intel-rapl/intel-rapl{colon}4/energy_uj | 1 + testdata/intel-rapl/intel-rapl{colon}4/name | 1 + .../intel-rapl/intel-rapl{colon}5/energy_uj | 1 + testdata/intel-rapl/intel-rapl{colon}5/name | 1 + .../package_09_die_12/initial_max_freq_khz | 1 + .../package_09_die_12/initial_min_freq_khz | 2 + .../package_09_die_12/max_freq_khz | 0 .../package_09_die_12/min_freq_khz | 1 + .../package_10_die_03/current_freq_khz | 1 + .../package_10_die_03/initial_max_freq_khz | 1 + .../package_10_die_03/initial_min_freq_khz | 1 + .../package_10_die_03/max_freq_khz | 1 + .../package_10_die_03/min_freq_khz | 1 + testdata/proc_modules_msr_loaded | 24 + testdata/proc_modules_msr_not_loaded | 24 + testdata/proc_modules_rapl_loaded | 32 + testdata/proc_modules_rapl_not_loaded | 31 + testdata/sapphirerapids_core.json | 299 ++ testdata/symlink | 1 + testdata_setup_test.go | 98 + topology.go | 269 ++ topology_test.go | 454 ++ turbofreq.go | 352 ++ turbofreq_test.go | 772 ++++ uncorefreq.go | 143 + uncorefreq_test.go | 270 ++ unit_converter.go | 14 + 119 files changed, 17492 insertions(+), 1 deletion(-) create mode 100644 .gitignore create mode 100644 .golangci.yml create mode 100644 LICENSE create mode 100644 LICENSE_OF_DEPENDENCIES.md create mode 100644 Makefile create mode 100644 README.md create mode 100644 VERSION create mode 100644 builder.go create mode 100644 builder_test.go create mode 100644 clock.go create mode 100644 cmd/example/main.go create mode 100644 cpufreq.go create mode 100644 cpufreq_test.go create mode 100644 errors.go create mode 100644 file.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 internal/cpuid/cpuid_count_amd64.go create mode 100644 internal/cpuid/cpuid_count_amd64.s create mode 100644 internal/cpuid/hybrid.go create mode 100644 internal/cpumodel/intel_family.go create mode 100644 internal/log/default.go create mode 100644 internal/log/logger.go create mode 100644 internal/version/version.go create mode 100644 metrics.go create mode 100644 metrics_test.go create mode 100644 msr.go create mode 100644 msr_test.go create mode 100644 perf.go create mode 100644 perf_test.go create mode 100644 power.go create mode 100644 power_test.go create mode 100644 rapl.go create mode 100644 rapl_test.go create mode 100644 testdata/alderlake_goldencove_core.json create mode 100644 testdata/cpu-freq-invalid/cpu0/cpufreq/scaling_cur_freq create mode 100644 testdata/cpu-freq/cpu0/cpufreq/scaling_cur_freq create mode 100644 testdata/cpu-msr-cpuID-msr-not-exist/0/dummy create mode 100644 testdata/cpu-msr-cpuID-msr-softlink/0/msr create mode 120000 testdata/cpu-msr-cpuID-msr-softlink/1/msr create mode 100644 testdata/cpu-msr-directories-not-exist/dummy create mode 100644 testdata/cpu-msr-invalid-cpuID-directories/01/msr create mode 100644 testdata/cpu-msr-invalid-cpuID-directories/1invalid/msr create mode 100644 testdata/cpu-msr/0/msr create mode 100644 testdata/cpu-msr/1/msr create mode 100644 testdata/cpu-msr/10/msr create mode 100644 testdata/cpu-msr/100/msr create mode 100644 testdata/cpuinfo_bad1/cpuinfo create mode 100644 testdata/cpuinfo_bad2/cpuinfo create mode 100644 testdata/cpuinfo_bad3/cpuinfo create mode 100644 testdata/cpuinfo_good/cpuinfo create mode 100644 testdata/die-id-invalid/cpu1/topology/die_id create mode 100644 testdata/die-id-valid/cpu1/topology/die_id create mode 100644 testdata/intel-rapl-domain-name-empty/intel-rapl{colon}1/name create mode 100644 testdata/intel-rapl-dram-curr-energy-attr-file-not-exist/intel-rapl{colon}0/energy_uj create mode 100644 testdata/intel-rapl-dram-curr-energy-attr-file-not-exist/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/max_energy_range_uj create mode 100644 testdata/intel-rapl-dram-curr-energy-attr-file-not-exist/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/name create mode 100644 testdata/intel-rapl-dram-curr-energy-attr-file-not-exist/intel-rapl{colon}0/max_energy_range_uj create mode 100644 testdata/intel-rapl-dram-curr-energy-attr-file-not-exist/intel-rapl{colon}0/name create mode 100644 testdata/intel-rapl-dram-domain-name-not-exist/intel-rapl{colon}0/intel-rapl{colon}0{colon}0/invalid-name create mode 100644 testdata/intel-rapl-dram-domain-name-not-exist/intel-rapl{colon}0/name create mode 100644 testdata/intel-rapl-invalid-package-domain-name-id/intel-rapl{colon}1/name create mode 100644 testdata/intel-rapl-package-curr-energy-attr-file-not-exist/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/energy_uj create mode 100644 testdata/intel-rapl-package-curr-energy-attr-file-not-exist/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/max_energy_range_uj create mode 100644 testdata/intel-rapl-package-curr-energy-attr-file-not-exist/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/name create mode 100644 testdata/intel-rapl-package-curr-energy-attr-file-not-exist/intel-rapl{colon}0/max_energy_range_uj create mode 100644 testdata/intel-rapl-package-curr-energy-attr-file-not-exist/intel-rapl{colon}0/name create mode 100644 testdata/intel-rapl-package-domain-name-not-exist/intel-rapl{colon}0/domain create mode 100644 testdata/intel-rapl/intel-rapl{colon}0/constraint_0_max_power_uw create mode 100644 testdata/intel-rapl/intel-rapl{colon}0/energy_uj create mode 100644 testdata/intel-rapl/intel-rapl{colon}0/intel-rapl{colon}0{colon}0/energy_uj create mode 100644 testdata/intel-rapl/intel-rapl{colon}0/intel-rapl{colon}0{colon}0/max_energy_range_uj create mode 100644 testdata/intel-rapl/intel-rapl{colon}0/intel-rapl{colon}0{colon}0/name create mode 100644 testdata/intel-rapl/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/energy_uj create mode 100644 testdata/intel-rapl/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/max_energy_range_uj create mode 100644 testdata/intel-rapl/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/name create mode 100644 testdata/intel-rapl/intel-rapl{colon}0/max_energy_range_uj create mode 100644 testdata/intel-rapl/intel-rapl{colon}0/name create mode 100644 testdata/intel-rapl/intel-rapl{colon}1/constraint_0_max_power_uw create mode 100644 testdata/intel-rapl/intel-rapl{colon}1/energy_uj create mode 100644 testdata/intel-rapl/intel-rapl{colon}1/intel-rapl{colon}1{colon}0/name create mode 100644 testdata/intel-rapl/intel-rapl{colon}1/max_energy_range_uj create mode 100644 testdata/intel-rapl/intel-rapl{colon}1/name create mode 100644 testdata/intel-rapl/intel-rapl{colon}2/constraint_0_max_power_uw create mode 100644 testdata/intel-rapl/intel-rapl{colon}2/energy_uj create mode 100644 testdata/intel-rapl/intel-rapl{colon}2/intel-rapl{colon}2{colon}0/energy_uj create mode 100644 testdata/intel-rapl/intel-rapl{colon}2/intel-rapl{colon}2{colon}0/max_energy_range_uj create mode 100644 testdata/intel-rapl/intel-rapl{colon}2/intel-rapl{colon}2{colon}0/name create mode 100644 testdata/intel-rapl/intel-rapl{colon}2/name create mode 100644 testdata/intel-rapl/intel-rapl{colon}3/energy_uj create mode 100644 testdata/intel-rapl/intel-rapl{colon}3/name create mode 100644 testdata/intel-rapl/intel-rapl{colon}4/energy_uj create mode 100644 testdata/intel-rapl/intel-rapl{colon}4/name create mode 100644 testdata/intel-rapl/intel-rapl{colon}5/energy_uj create mode 100644 testdata/intel-rapl/intel-rapl{colon}5/name create mode 100644 testdata/intel_uncore_frequency/package_09_die_12/initial_max_freq_khz create mode 100644 testdata/intel_uncore_frequency/package_09_die_12/initial_min_freq_khz create mode 100644 testdata/intel_uncore_frequency/package_09_die_12/max_freq_khz create mode 100644 testdata/intel_uncore_frequency/package_09_die_12/min_freq_khz create mode 100644 testdata/intel_uncore_frequency/package_10_die_03/current_freq_khz create mode 100644 testdata/intel_uncore_frequency/package_10_die_03/initial_max_freq_khz create mode 100644 testdata/intel_uncore_frequency/package_10_die_03/initial_min_freq_khz create mode 100644 testdata/intel_uncore_frequency/package_10_die_03/max_freq_khz create mode 100644 testdata/intel_uncore_frequency/package_10_die_03/min_freq_khz create mode 100644 testdata/proc_modules_msr_loaded create mode 100644 testdata/proc_modules_msr_not_loaded create mode 100644 testdata/proc_modules_rapl_loaded create mode 100644 testdata/proc_modules_rapl_not_loaded create mode 100644 testdata/sapphirerapids_core.json create mode 120000 testdata/symlink create mode 100644 testdata_setup_test.go create mode 100644 topology.go create mode 100644 topology_test.go create mode 100644 turbofreq.go create mode 100644 turbofreq_test.go create mode 100644 uncorefreq.go create mode 100644 uncorefreq_test.go create mode 100644 unit_converter.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b030ec8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,22 @@ +### Go template +# Binaries for programs and plugins +/example +*.exe +*.exe~ +*.dll +*.so +*.dylib +/coverage.out + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# VSCode config +.vscode + +# Idea files +*.iml +.idea/ diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 0000000..8180bb5 --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,328 @@ +linters: + disable-all: true + enable: + - asasalint + - asciicheck + - bidichk + - bodyclose + - decorder + - dogsled + - dupword + - durationcheck + - errchkjson + - errcheck + - errname + - errorlint + - exhaustive + - exportloopref + - gci + - gocheckcompilerdirectives + - gochecksumtype + - gocritic + - godot + - goprintffuncname + - gosec + - gosimple + - govet + - inamedparam + - ineffassign + - interfacebloat + - lll + - makezero + - mirror + - misspell + - musttag + - nakedret + - nestif + - nilerr + - nolintlint + - perfsprint + - prealloc + - predeclared + - reassign + - revive + - sqlclosecheck + - staticcheck + - tenv + - testifylint + - tparallel + - typecheck + - unconvert + - unparam + - usestdlibvars + - unused + - wastedassign + - whitespace + +linters-settings: + errcheck: + # List of functions to exclude from checking, where each entry is a single function to exclude. + # See https://github.com/kisielk/errcheck#excluding-functions for details. + exclude-functions: + - "(*hash/maphash.Hash).Write" + - "(*hash/maphash.Hash).WriteByte" + - "(*hash/maphash.Hash).WriteString" + gci: + # Section configuration to compare against. + # Section names are case-insensitive and may contain parameters in (). + # The default order of sections is `standard > default > custom > blank > dot`, + # If `custom-order` is `true`, it follows the order of `sections` option. + # Default: ["standard", "default"] + sections: + - standard # Standard section: captures all standard packages. + - default # Default section: contains all imports that could not be matched to another section type. + - prefix(github.com/intel/powertelemetry) # Custom section: groups all imports with the specified Prefix. + gocritic: + # Which checks should be enabled; can't be combined with 'disabled-checks'. + # See https://go-critic.github.io/overview#checks-overview. + # To check which checks are enabled run `GL_DEBUG=gocritic golangci-lint run`. + # By default, list of stable checks is used. + enabled-checks: + - argOrder + - badCall + - badCond + - badLock + - badRegexp + - badSorting + - builtinShadowDecl + - caseOrder + - codegenComment + - commentedOutCode + - deferInLoop + - dupArg + - deprecatedComment + - dupBranchBody + - dupCase + - dupSubExpr + - dynamicFmtString + - emptyDecl + - evalOrder + - exitAfterDefer + - externalErrorReassign + - filepathJoin + - flagName + - mapKey + - nilValReturn + - offBy1 + - regexpPattern + - sloppyTestFuncName + - sloppyReassign + - sloppyTypeAssert + - sortSlice + - sprintfQuotedString + - sqlQuery + - syncMapLoadAndDelete + - truncateCmp + - uncheckedInlineErr + - unnecessaryDefer + - weakCond + gosec: + # To select a subset of rules to run. + # Available rules: https://github.com/securego/gosec#available-rules + # Default: [] - means include all rules + includes: + - G101 + - G102 + - G103 + - G106 + - G107 + - G108 + - G109 + - G110 + - G111 + - G112 + - G114 + - G201 + - G202 + - G203 + - G301 + - G302 + - G303 + - G305 + - G306 + - G401 + - G403 + - G404 + - G501 + - G502 + - G503 + - G505 + - G601 + # G104, G105, G113, G204, G304, G307, G402, G504 were not enabled intentionally + # To specify the configuration of rules. + config: + # Maximum allowed permissions mode for os.OpenFile and os.Chmod + # Default: "0600" + G302: "0640" + # Maximum allowed permissions mode for os.WriteFile and ioutil.WriteFile + # Default: "0600" + G306: "0640" + govet: + settings: + ## Check the logging function like it would be a printf + printf: + funcs: + - (github.com/influxdata/telegraf.Logger).Debugf + - (github.com/influxdata/telegraf.Logger).Infof + - (github.com/influxdata/telegraf.Logger).Warnf + - (github.com/influxdata/telegraf.Logger).Errorf + - (github.com/influxdata/telegraf.Logger).Debug + - (github.com/influxdata/telegraf.Logger).Info + - (github.com/influxdata/telegraf.Logger).Warn + - (github.com/influxdata/telegraf.Logger).Error + lll: + # Max line length, lines longer will be reported. + # '\t' is counted as 1 character by default, and can be changed with the tab-width option. + # Default: 120. + line-length: 160 + # Tab width in spaces. + # Default: 1 + tab-width: 4 + nolintlint: + # Enable to require an explanation of nonzero length after each nolint directive. + # Default: false + require-explanation: true + # Enable to require nolint directives to mention the specific linter being suppressed. + # Default: false + require-specific: true + prealloc: + # Report pre-allocation suggestions only on simple loops that have no returns/breaks/continues/gotos in them. + # Default: true + simple: false + revive: + rules: + - name: argument-limit + arguments: [ 6 ] + - name: atomic + - name: bare-return + - name: blank-imports + - name: bool-literal-in-expr + - name: call-to-gc + - name: confusing-naming + - name: confusing-results + - name: constant-logical-expr + - name: context-as-argument + - name: context-keys-type + - name: deep-exit + - name: defer + - name: dot-imports + - name: duplicated-imports + - name: early-return + - name: empty-block + - name: empty-lines + - name: error-naming + - name: error-return + - name: error-strings + - name: errorf + - name: flag-parameter + - name: function-result-limit + arguments: [ 4 ] + - name: identical-branches + - name: if-return + - name: import-shadowing + - name: increment-decrement + - name: indent-error-flow + - name: modifies-parameter + - name: modifies-value-receiver + - name: package-comments + - name: range + - name: range-val-address + - name: range-val-in-closure + - name: receiver-naming + - name: redefines-builtin-id + - name: string-of-int + - name: struct-tag + - name: superfluous-else + - name: time-naming + - name: unconditional-recursion + - name: unexported-naming + - name: unnecessary-stmt + - name: unreachable-code + - name: unused-parameter + - name: var-declaration + - name: var-naming + arguments: [[""], ["PMU"]] + - name: waitgroup-by-value + nakedret: + # make an issue if func has more lines of code than this setting and it has naked returns; default is 30 + max-func-lines: 1 + tenv: + # The option `all` will run against whole test files (`_test.go`) regardless of method/function signatures. + # Otherwise, only methods that take `*testing.T`, `*testing.B`, and `testing.TB` as arguments are checked. + # Default: false + all: true + testifylint: + # Enable specific checkers. + # https://github.com/Antonboom/testifylint#checkers + # Default: ["bool-compare", "compares", "empty", "error-is-as", "error-nil", "expected-actual", "float-compare", "len", "require-error", "suite-dont-use-pkg", "suite-extra-assert-call"] + enable: + - bool-compare + - compares + - empty + - error-is-as + - error-nil + - expected-actual + - len + - require-error + - suite-dont-use-pkg + - suite-extra-assert-call + - suite-thelper + +run: + # timeout for analysis, e.g. 30s, 5m, default is 1m + timeout: 10m + +issues: + # Maximum issues count per one linter. Set to 0 to disable. Default is 50. + max-issues-per-linter: 0 + + # Maximum count of issues with the same text. Set to 0 to disable. Default is 3. + max-same-issues: 0 + + # List of regexps of issue texts to exclude. + # + # But independently of this option we use default exclude patterns, + # it can be disabled by `exclude-use-default: false`. + # To list all excluded by default patterns execute `golangci-lint run --help` + # + # Default: https://golangci-lint.run/usage/false-positives/#default-exclusions + exclude: + # revive:var-naming + - don't use an underscore in package name + # EXC0001 errcheck: Almost all programs ignore errors on these functions and in most cases it's ok + - Error return value of .((os\.)?std(out|err)\..*|.*Close.*|.*Flush|.*Disconnect|.*Clear|os\.Remove(All)?|.*print(f|ln)?|os\.(Un)?Setenv). is not checked + # EXC0013 revive: Annoying issue about not having a comment. The rare codebase has such comments + - package comment should be of the form "(.+)... + # EXC0015 revive: Annoying issue about not having a comment. The rare codebase has such comments + - should have a package comment + + # Excluding configuration per-path, per-linter, per-text and per-source + exclude-rules: + - path: _test\.go + text: "Potential hardcoded credentials" #gosec:G101 + + - path: _test\.go + text: "Use of weak random number generator" #gosec:G404 + + # Independently of option `exclude` we use default exclude patterns, + # it can be disabled by this option. + # To list all excluded by default patterns execute `golangci-lint run --help`. + # Default: true. + exclude-use-default: false + +# output configuration options +output: + # Format: colored-line-number|line-number|json|tab|checkstyle|code-climate|junit-xml|github-actions + # + # Multiple can be specified by separating them by comma, output can be provided + # for each of them by separating format name and path by colon symbol. + # Output path can be either `stdout`, `stderr` or path to the file to write to. + # Example: "checkstyle:report.json,colored-line-number" + # + # Default: colored-line-number + format: tab + # Make issues output unique by line. + # Default: true + uniq-by-line: false + # Sort results by: filepath, line and column. + sort-results: true diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f682f4e..57eb56a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,7 +2,7 @@ ### License - is licensed under the terms in [LICENSE]. By contributing to the project, you agree to the license and copyright terms therein and release your contribution under these terms. +Power Telemetry Library is licensed under the terms in [LICENSE](LICENSE). By contributing to the project, you agree to the license and copyright terms therein and release your contribution under these terms. ### Sign your work diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/LICENSE_OF_DEPENDENCIES.md b/LICENSE_OF_DEPENDENCIES.md new file mode 100644 index 0000000..3f7f6a3 --- /dev/null +++ b/LICENSE_OF_DEPENDENCIES.md @@ -0,0 +1,12 @@ +# Licenses of dependencies + +`powertelemetry` may contain portions of the following works: + +- github.com/intel/iaevents [Apache License 2.0](https://github.com/intel/iaevents/blob/main/LICENSE) +- github.com/jmhodges/clock [MIT License](https://github.com/jmhodges/clock/blob/main/LICENSE) +- github.com/shirou/gopsutil [BSD License](https://github.com/shirou/gopsutil/blob/master/LICENSE) +- github.com/tklauser/go-sysconf [BSD 3-Clause "New" or "Revised" License](https://github.com/tklauser/go-sysconf/blob/main/LICENSE) +- github.com/tklauser/numcpus [Apache License 2.0](https://github.com/tklauser/numcpus/blob/main/LICENSE) +- golang.org/x/exp [BSD-3-Clause](https://pkg.go.dev/golang.org/x/exp?tab=licenses) +- golang.org/x/sync [BSD-3-Clause](https://pkg.go.dev/golang.org/x/exp?tab=licenses) +- golang.org/x/sys [BSD-3-Clause](https://pkg.go.dev/golang.org/x/exp?tab=licenses) diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..85ee3d1 --- /dev/null +++ b/Makefile @@ -0,0 +1,44 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +package_version_name := github.com/intel/powertelemetry/internal/version +version := $(shell cat VERSION) +tag := $(shell git describe --exact-match --tags 2>/dev/null) + +branch := $(shell git rev-parse --abbrev-ref HEAD) +commit := $(shell git rev-parse --short=8 HEAD) + +LDFLAGS := $(LDFLAGS) -X $(package_version_name).Commit=$(commit) -X $(package_version_name).Branch=$(branch) +ifneq ($(tag),) + LDFLAGS += -X $(package_version_name).Version=$(version) +else + LDFLAGS += -X $(package_version_name).Version=$(version)-$(commit) +endif + +GOFILES ?= $(shell git ls-files '*.go') +GOFMT ?= $(shell gofmt -l -s $(GOFILES)) + +build: + go build -ldflags "$(LDFLAGS)" ./cmd/example + +test: + go test -race -cover -v ./... + +coverage: + go test ./... -coverprofile=coverage.out + +fmtcheck: + @if [ ! -z "$(GOFMT)" ]; then \ + echo "[ERROR] gofmt has found errors in the following files:" ; \ + echo "$(GOFMT)" ; \ + echo "" ;\ + echo "Run make fmt to fix them." ; \ + exit 1 ;\ + fi + +tidy: + go mod verify + go mod tidy + go fix ./... + +.PHONY : build test coverage fmtcheck tidy diff --git a/README.md b/README.md new file mode 100644 index 0000000..3ba29b0 --- /dev/null +++ b/README.md @@ -0,0 +1,580 @@ +# Power Telemetry Library + +`powertelemetry` is a Golang library that provides functionalities to get power management +related metrics for Intel processors. + +## Metrics + +Metric types can be distinguished based on the host topology attributes associated with them: + +- *CPU metric*: Metric value related to a specific logical CPU (CPU ID). +- *Package metric*: Metric value related to a specific package ID (socket ID). +- *Die metric*: Metric value related to a specific die ID. + +**The following metrics are supported by Power Telemetry Library:** + +| Metric name | Type | Description | Units | +|---------------------------------------|----------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------| +| `CurrentPackagePowerConsumptionWatts` | Package | Current power consumption of processor package. | Watts | +| `CurrentDramPowerConsumptionWatts` | Package | Current power consumption of processor package DRAM subsystem. | Watts | +| `PackageThermalDesignPowerWatts` | Package | Maximum Thermal Design Power (TDP) available for processor package. | Watts | +| `MaxTurboFreqList` | Package | Maximum reachable turbo frequency for number of cores active. | MHz | +| `CurrentUncoreFrequency` | Package/Die | Current uncore frequency for die in processor package. This value is available from `intel-uncore-frequency` module for kernel >= 5.18. For older kernel versions it needs to be accessed via MSR. In case of lack of loaded `msr`, value will not be collected. | MHz | +| `InitialUncoreFrequencyMin` | Package/Die | Initial minimum uncore frequency limit for die in processor package. | MHz | +| `InitialUncoreFrequencyMax` | Package/Die | Initial maximum uncore frequency limit for die in processor package. | MHz | +| `CustomizedUncoreFrequencyMin` | Package/Die | Customized minimum uncore frequency limit for die in processor package. | MHz | +| `CustomizedUncoreFrequencyMax` | Package/Die | Customized maximum uncore frequency limit for die in processor package. | MHz | +| `CPUBaseFrequency` | Package | CPU Base Frequency (maximum non-turbo frequency) for the processor package. | MHz | +| `CPUFrequency` | CPU | Current operational frequency of CPU Core. | MHz | +| `CPUC0StateResidency` | CPU | Percentage of time that CPU Core spent in C0 Core residency state. | % | +| `CPUC1StateResidency` | CPU | Percentage of time that CPU Core spent in C1 Core residency state. | % | +| `CPUC3StateResidency` | CPU | Percentage of time that CPU Core spent in C3 Core residency state. | % | +| `CPUC6StateResidency` | CPU | Percentage of time that CPU Core spent in C6 Core residency state. | % | +| `CPUC7StateResidency` | CPU | Percentage of time that CPU Core spent in C7 Core residency state. | % | +| `CPUTemperature` | CPU | Current temperature of CPU Core. | degrees Celsius | +| `CPUBusyFrequencyMhz` | CPU | CPU Core Busy Frequency measured as frequency adjusted to CPU Core busy cycles. | MHz | +| `CPUC0SubstateC01Percent` | CPU | Percentage of time that CPU Core spent in C0.1 substate out of the total time in the C0 state. | % | +| `CPUC0SubstateC02Percent` | CPU | Percentage of time that CPU Core spent in C0.2 substate out of the total time in the C0 state. | % | +| `CPUC0SubstateC0WaitPercent` | CPU | Percentage of time that CPU Core spent in C0_Wait substate out of the total time in the C0 state. | % | + +> **Note**: Metrics that report processor C-state residencies or power consumption are calculated over elapsed intervals. + +## SW Dependencies + +### Kernel modules + +The library is mostly based on Linux Kernel modules that expose specific metrics over +`sysfs` or `devfs` interfaces. The following dependencies are expected by +the library: + +- `intel-rapl` kernel module which exposes Intel Runtime Power Limiting metrics over + `sysfs` (`/sys/devices/virtual/powercap/intel-rapl`), +- `msr` kernel module that provides access to processor model specific + registers over `devfs` (`/dev/cpu/cpu%d/msr`), +- `cpufreq` kernel module - which exposes per-CPU Frequency over `sysfs` + (`/sys/devices/system/cpu/cpu%d/cpufreq/scaling_cur_freq`), +- `intel-uncore-frequency` kernel module which exposes Intel uncore frequency metrics + over `sysfs` (`/sys/devices/system/cpu/intel_uncore_frequency`). + +Make sure that required kernel modules are loaded and running. Modules might have to be manually enabled by using `modprobe`. Depending on the kernel version, run commands: + +```sh +# rapl modules: +## kernel < 4.0 +sudo modprobe intel_rapl +## kernel >= 4.0 +sudo modprobe rapl +sudo modprobe intel_rapl_common +sudo modprobe intel_rapl_msr + +# msr module: +sudo modprobe msr + +# cpufreq module: +### integrated in kernel + +# intel-uncore-frequency module: +## only for kernel >= 5.6.0 +sudo modprobe intel-uncore-frequency +``` + +### Kernel's perf interface + +For perf-related metrics, when the application that uses this library is not running as root, +the following capability should be added to the application executable: + +```sh +sudo setcap cap_sys_admin+ep +``` + +Alternatively, `/proc/sys/kernel/perf_event_paranoid` has to be set to +value less than 1. + +Depending on environment and configuration (number of monitored CPUs +and number of enabled metrics), it might be required to increase +the limit on the number of open file descriptors allowed. +This can be done for example by using `ulimit -n` command. + +### Dependencies of metrics on system configuration + +Details of these dependencies are discussed above: + +| Metric name | Type | Dependency | +|---------------------------------------|----------------|------------------------------------------------| +| `CurrentPackagePowerConsumptionWatts` | Package | `rapl` kernel module(s) | +| `CurrentDramPowerConsumptionWatts` | Package | `rapl` kernel module(s) | +| `PackageThermalDesignPowerWatts` | Package | `rapl` kernel module(s) | +| `MaxTurboFreqList` | Package | `msr` kernel module | +| `CurrentUncoreFrequency` | Package/Die | `intel-uncore-frequency`/`msr` kernel modules* | +| `InitialUncoreFrequencyMin` | Package/Die | `intel-uncore-frequency` kernel module | +| `InitialUncoreFrequencyMax` | Package/Die | `intel-uncore-frequency` kernel module | +| `CustomizedUncoreFrequencyMin` | Package/Die | `intel-uncore-frequency` kernel module | +| `CustomizedUncoreFrequencyMax` | Package/Die | `intel-uncore-frequency` kernel module | +| `CPUBaseFrequency` | Package/Die | `msr` kernel module | +| `CPUFrequency` | CPU | `cpufreq` kernel module | +| `CPUC0StateResidency` | CPU | `msr` kernel module | +| `CPUC1StateResidency` | CPU | `msr` kernel module | +| `CPUC3StateResidency` | CPU | `msr` kernel module | +| `CPUC6StateResidency` | CPU | `msr` kernel module | +| `CPUC7StateResidency` | CPU | `msr` kernel module | +| `CPUTemperature` | CPU | `msr` kernel module | +| `CPUBusyFrequencyMhz` | CPU | `msr` kernel module | +| `CPUC0SubstateC01Percent` | CPU | kernel's `perf` interface | +| `CPUC0SubstateC02Percent` | CPU | kernel's `perf` interface | +| `CPUC0SubstateC0WaitPercent` | CPU | kernel's `perf` interface | + +*starting from kernel version 5.18, only the `intel-uncore-frequency` module +is required. For older kernel versions, the metric `CurrentUncoreFrequency` +requires the `msr` module to be enabled. + +### Root privileges + +**The application that uses this library may require +root privileges to read all the metrics** +(depending on OS type or configuration). + +Alternatively, the following capabilities can be added to +the application executable: + +```sh +# without perf-related metrics: +sudo setcap cap_sys_rawio,cap_dac_read_search+ep + +# with perf-related metrics: +sudo setcap cap_sys_rawio,cap_dac_read_search,cap_sys_admin+ep +``` + +## HW Dependencies + +Specific metrics require certain processor features to be present, otherwise +this library won't be able to read them. The user can detect supported +processor features by reading `/proc/cpuinfo` file. +The library assumes crucial properties are the same for all CPU cores in the system. + +The following `processor` properties are examined in more detail +in this section: + +- `vendor_id` +- `cpu family` +- `model` +- `flags` + +The following processor properties are required by the library: + +- Processor `vendor_id` must be `GenuineIntel` and `cpu family` must be `6` - + since data used by the library are Intel-specific. +- The following processor flags shall be present: + - `msr` shall be present for the library to read platform data from processor + model specific registers and collect the following metrics: + - `CPUC0StateResidency` + - `CPUC1StateResidency` + - `CPUC3StateResidency` + - `CPUC6StateResidency` + - `CPUC7StateResidency` + - `CPUBusyFrequencyMhz` + - `CPUTemperature` + - `CPUBaseFrequency` + - `MaxTurboFreqList` + - `CurrentUncoreFrequency` (for kernel < 5.18) + - `aperfmperf` shall be present to collect the following metrics: + - `CPUC0StateResidency` + - `CPUC1StateResidency` + - `CPUBusyFrequencyMhz` + - `dts` shall be present to collect: + - `CPUTemperature` +- Please consult the table below which metrics among those listed are supported by the host's processor `model`: + - `CPUC1StateResidency` + - `CPUC3StateResidency` + - `CPUC6StateResidency` + - `CPUC7StateResidency` + - `CPUTemperature` + - `CPUBaseFrequency` + - `CurrentUncoreFrequency` + - `InitialUncoreFrequencyMin` + - `InitialUncoreFrequencyMax` + - `CustomizedUncoreFrequencyMin` + - `CustomizedUncoreFrequencyMax` + + | Model number | Processor name | `CPUC1StateResidency`
`CPUC6StateResidency`
`CPUTemperature`
`CPUBaseFrequency` | `CPUC3StateResidency` | `CPUC7StateResidency` | `CurrentUncoreFrequency`
`InitialUncoreFrequencyMin`
`InitialUncoreFrequencyMax`
`CustomizedUncoreFrequencyMin`
`CustomizedUncoreFrequencyMax` | + |--------------|---------------------------------|:-------------------------------------------------------------------------------------------:|:---------------------:|:---------------------:|:--------------------------------------------------------------------------------------------------------------------------------------------------------------:| + | 0x1E | Intel Nehalem | ✓ | ✓ | | | + | 0x1F | Intel Nehalem-G | ✓ | ✓ | | | + | 0x1A | Intel Nehalem-EP | ✓ | ✓ | | | + | 0x2E | Intel Nehalem-EX | ✓ | ✓ | | | + | 0x25 | Intel Westmere | ✓ | ✓ | | | + | 0x2C | Intel Westmere-EP | ✓ | ✓ | | | + | 0x2F | Intel Westmere-EX | ✓ | ✓ | | | + | 0x2A | Intel Sandybridge | ✓ | ✓ | ✓ | | + | 0x2D | Intel Sandybridge-X | ✓ | ✓ | ✓ | | + | 0x3A | Intel Ivybridge | ✓ | ✓ | ✓ | | + | 0x3E | Intel Ivybridge-X | ✓ | ✓ | ✓ | | + | 0x3C | Intel Haswell | ✓ | ✓ | ✓ | | + | 0x3F | Intel Haswell-X | ✓ | ✓ | ✓ | | + | 0x45 | Intel Haswell-L | ✓ | ✓ | ✓ | | + | 0x46 | Intel Haswell-G | ✓ | ✓ | ✓ | | + | 0x3D | Intel Broadwell | ✓ | ✓ | ✓ | | + | 0x47 | Intel Broadwell-G | ✓ | ✓ | ✓ | ✓ | + | 0x4F | Intel Broadwell-X | ✓ | ✓ | | ✓ | + | 0x56 | Intel Broadwell-D | ✓ | ✓ | | ✓ | + | 0x4E | Intel Skylake-L | ✓ | ✓ | ✓ | | + | 0x5E | Intel Skylake | ✓ | ✓ | ✓ | | + | 0x55 | Intel Skylake-X | ✓ | | | ✓ | + | 0x8E | Intel KabyLake-L | ✓ | ✓ | ✓ | | + | 0x9E | Intel KabyLake | ✓ | ✓ | ✓ | | + | 0xA5 | Intel CometLake | ✓ | ✓ | ✓ | | + | 0xA6 | Intel CometLake-L | ✓ | ✓ | ✓ | | + | 0x66 | Intel CannonLake-L | ✓ | | ✓ | | + | 0x6A | Intel IceLake-X | ✓ | | | ✓ | + | 0x6C | Intel IceLake-D | ✓ | | | ✓ | + | 0x7D | Intel IceLake | ✓ | | | | + | 0x7E | Intel IceLake-L | ✓ | | ✓ | | + | 0x9D | Intel IceLake-NNPI | ✓ | | ✓ | | + | 0xA7 | Intel RocketLake | ✓ | | ✓ | | + | 0x8C | Intel TigerLake-L | ✓ | | ✓ | | + | 0x8D | Intel TigerLake | ✓ | | ✓ | | + | 0x8F | Intel Sapphire Rapids X | ✓ | | | ✓ | + | 0xCF | Intel Emerald Rapids X | ✓ | | | ✓ | + | 0xAD | Intel Granite Rapids X | ✓ | | | | + | 0x8A | Intel Lakefield | ✓ | | ✓ | | + | 0x97 | Intel AlderLake | ✓ | | ✓ | ✓ | + | 0x9A | Intel AlderLake-L | ✓ | | ✓ | ✓ | + | 0xB7 | Intel RaptorLake | ✓ | | ✓ | ✓ | + | 0xBA | Intel RaptorLake-P | ✓ | | ✓ | ✓ | + | 0xBF | Intel RaptorLake-S | ✓ | | ✓ | ✓ | + | 0xAC | Intel MeteorLake | ✓ | | ✓ | ✓ | + | 0xAA | Intel MeteorLake-L | ✓ | | ✓ | ✓ | + | 0xC6 | Intel ArrowLake | ✓ | | ✓ | | + | 0xBD | Intel LunarLake | ✓ | | ✓ | | + | 0x37 | Intel Atom® Bay Trail | ✓ | | | | + | 0x4D | Intel Atom® Avaton | ✓ | | | | + | 0x4A | Intel Atom® Merrifield | ✓ | | | | + | 0x5A | Intel Atom® Moorefield | ✓ | | | | + | 0x4C | Intel Atom® Airmont | ✓ | ✓ | | | + | 0x5C | Intel Atom® Apollo Lake | ✓ | ✓ | ✓ | | + | 0x5F | Intel Atom® Denverton | ✓ | | | | + | 0x7A | Intel Atom® Goldmont | ✓ | ✓ | ✓ | | + | 0x86 | Intel Atom® Jacobsville | ✓ | | | | + | 0x96 | Intel Atom® Elkhart Lake | ✓ | | ✓ | | + | 0x9C | Intel Atom® Jasper Lake | ✓ | | ✓ | | + | 0xBE | Intel AlderLake-N | ✓ | | ✓ | | + | 0xAF | Intel Sierra Forest | ✓ | | | | + | 0xB6 | Intel Grand Ridge | ✓ | | | | + | 0x57 | Intel Xeon® PHI Knights Landing | ✓ | | | | + | 0x85 | Intel Xeon® PHI Knights Mill | ✓ | | | | + +## How to use + +### Initialization + +`powertelemetry` library implements optional builder pattern. It allows the user to specify configuration parameters, and the dependencies to initalize needed to provide metrics information via `WithX` exported functions. + +```go +ptel, err := New(opts...) +``` + +Supported options are: + +- `WithCoreFrequency`: Option that enables access to metrics which rely on `cpufreq` kernel module. +- `WithMsr`: Option that enables access to metrics which rely on `msr` kernel module. +- `WithMsrTimeout`: Same as `WithMsr`, but it accepts an additional argument to specify the timeout for MSR reads. +- `WithIncludedCPUs/WithExcludedCPUs`: Option that allows to specify which logical CPU ID have access to `msr` and `cpufreq` kernel modules and `perf_events` kernel interface, by inclusion or exclusion. Notice that only one of these options can be used during instantiation. When omitted, all logical CPUs from host topology are accessible. +- `WithRapl`: Option that enables access to metrics which rely on `rapl` kernel module. +- `WithUncoreFrequency`: Option that enables access to metrics which rely on `intel-uncore-frequency` kernel module. +- `WithPerf`: Option that enables access to metrics which rely on `perf_events` kernel interface. It takes the path of a JSON file with perf event definitions specific for the host's CPU model. Files can be found in [`perfmon`](https://github.com/intel/perfmon) repository. +- `WithLogger`: The user can provide a custom logger. + +Refer to [Dependencies of metrics on system configuration](#dependencies-of-metrics-on-system-configuration) section to check which options need to be enabled for each metric. + +#### Example: Initialize `PowerTelemetry` using included CPUs option + +This example uses options to enable all supported metrics. Additionally, uses `WithIncludedCPUs` to limit the CPU IDs that can be used to retrieve metrics which rely on `msr` and `cpufreq` kernel modules. + +```go +pt, err := powertelemetry.New( + powertelemetry.WithIncludedCPUs([]int{0, 1, 2, 3}), + powertelemetry.WithMsr(), + powertelemetry.WithCoreFrequency(), + powertelemetry.WithRapl(), + powertelemetry.WithUncoreFrequency(), + powertelemetry.WithPerf("/path/to/events.json"), +) +``` + +#### Example: Initialize `PowerTelemetry` using excluded CPUs option + +This example uses options to enable metrics which rely on `msr` and `cpufreq` kernel modules. Additionally, uses `WithExcludedCPUs` to exclude CPU IDs that can be used to retrieve these metrics. + +```go +pt, err := powertelemetry.New( + powertelemetry.WithExcludedCPUs([]int{0, 1, 2, 3}), + powertelemetry.WithMsr(), + powertelemetry.WithCoreFrequency(), +) +``` + +### Get Metric Values + +`powertelemetry` provides exported methods to get metric values defined in the [Metrics](#metrics) section. These methods accept an argument depending on the metric type. + +The exported method to get metric values have the following naming convention: + +```go +// CPU metric type. +func (ptel *PowerTelemetry) Get(cpuID int) (, error) + +// Package metric type. +func (ptel *PowerTelemetry) Get(packageID int) (, error) + +// Package/die metric type. +func (ptel *PowerTelemetry) Get(packageID, dieID int) (, error) +``` + +Where: + +- `metric_name` corresponds to the metric name of the supported [Metrics](#metrics) section. +- `value_type` can be either `float64` or `uint64`, depending on the metric. + +There are several types of metrics depending on how values are calculated: + +- Instantaneous: The metric value corresponds to the time the specific instant in which the method is called. +- Elapsed interval: The metric value corresponds to a specific time interval. + +### Instantaneous Metrics + +Following are the metrics that provide instantaneous values: + +- `CPUTemperature` +- `CPUFrequency` +- `CPUBaseFrequency` +- `CurrentUncoreFrequency` +- `InitialUncoreFrequencyMin` +- `InitialUncoreFrequencyMax` +- `CustomizedUncoreFrequencyMin` +- `CustomizedUncoreFrequencyMax` +- `MaxTurboFreqList` +- `PackageThermalDesignPowerWatts` + +#### Example: Get the instantaneous value of CPU temperature metric + +```go +// CPU temperature metric +cpuTemp, err := pt.GetCPUTemperature(cpuID) +if err != nil { + // handle error +} +``` + +### Elapsed Interval Metrics + +The following metrics depend on elapsed intervals: + +- Metrics that rely on `msr`: + - `CPUC0StateResidency` + - `CPUC1StateResidency` + - `CPUC3StateResidency` + - `CPUC6StateResidency` + - `CPUC7StateResidency` + - `CPUBusyFrequencyMhz` +- Metrics that rely on `perf`: + - `CPUC0SubstateC01Percent` + - `CPUC0SubstateC02Percent` + - `CPUC0SubstateC0WaitPercent` +- Metrics that rely on `rapl`: + - `CurrentPackagePowerConsumptionWatts` + - `CurrentDramPowerConsumptionWatts` + +The elapsed time interval is automatically calculated between subsequent calls to retrieve metric values. It is recommended to use a scheduler to consistently retrieve metrics over a fixed time interval. + +### Metrics relying on `rapl` + +The following example shows how to retrieve the value of power related metrics based on `rapl` kernel module. + +#### Example: Get metrics which rely on `rapl` kernel module + +```go +// First read of metrics at init (t0). +ptel, err := ptel.New(WithRapl()) +if err != nil { + // handle error +} + +// Method call at t1. Metric value corresponds to time interval t1-t0. +powerInterval1, err := pt.GetCurrentPackagePowerConsumptionWatts(packageID) +if err != nil { + // handle error +} + +// Method call at t2. Metric value corresponds to time interval t2-t1. +powerInterval2, err := pt.GetCurrentPackagePowerConsumptionWatts(packageID) +if err != nil { + // handle error +} +``` + +> **Note**: The first metric reading operation happens at initialization, when `WithRapl`option is present. + +### Metrics relying on `msr` + +C-state residency metrics need an additional method call to `UpdatePerCPUMetrics` that reads all required offsets of the corresponding MSR registers prior to providing their values. + +#### Example: Get time-elapsed metrics which rely on `msr` kernel module + +```go +// First offset reading of MSR registers (call to UpdatePerCPUMetrics) at init (t0). +ptel, err := ptel.New(WithMsr()) +if err != nil { + // handle error +} + +cpuID := 0 + +// Method call at t1. Elapsed time is calculated from current and previous call to UpdatePerCPUMetrics, t1-t0. +if err := ptel.UpdatePerCPUMetrics(cpuID); err != nil { + // handle error + return +} + +// Get CPUC0StateResidency corresponding to previous elapsed interval. +c0State, err := ptel.GetCPUC0StateResidency(cpuID) +if err != nil { + // handle error +} + +// Get CPUC1StateResidency corresponding to previous elapsed interval. +c1State, err := ptel.GetCPUC1StateResidency(cpuID) +if err != nil { + // handle error +} +``` + +> **Note**: The first reading operations of the MSR register happen at initialization, when `WithMsr` or `WithMsrTimeout` options are present. + +### Metrics relying on `perf` + +C0-substate metrics need an additional `ReadPerfEvents` method call that reads all required perf events, per-CPU, prior to providing their values. + +When an instance of `PowerTelemetry` has been successfully initialized with the option `WithPerf`, perf events are activated. This means multiple file descriptors remain open. Therefore, if the user no longer needs to get `perf` specific metrics these resources need to be released, via `DeactivatePerfEvents` method call. + +> **Note**: Event activation happens at initialization, when `WithPerf` option is added. + +#### Example: Get time-elapsed metrics which rely on `perf` kernel interface + +```go +// Read events related to perf-related metrics. +// Elapsed time is calculated from current and previous call to ReadPerfEvents. +if err := ptel.ReadPerfEvents(); err != nil { + // handle error + return +} + +// Get GetCPUC0SubstateC01Percent corresponding to previous elapsed interval. +c0SubstateC01, err := ptel.GetCPUC0SubstateC01Percent(cpuID) +if err != nil { + // handle error +} + +// Get GetCPUC0SubstateC02Percent corresponding to previous elapsed interval. +c0SubstateC02, err := ptel.GetCPUC0SubstateC02Percent(cpuID) +if err != nil { + // handle error +} + +// Get GetCPUC0SubstateC0WaitPercent corresponding to previous elapsed interval. +c0SubstateC0Wait, err := ptel.GetCPUC0SubstateC0WaitPercent(cpuID) +if err != nil { + // handle error +} + +// Release resources. Close file descriptors. +err := ptel.DeactivatePerfEvents() +if err != nil { + // handle error +} +``` + +### Error Handling + +This library exposes several types of errors, providing the user the flexibility to handle them differently: + +- `MultiError`: Holds a slice of error descriptions. It is used to mark errors that happened during the initialization of `PowerTelemetry` dependencies. +- `ModuleNotInitializedError`: Used to indicate that a dependency has not been initialized, and the user tried to access it. +- `MetricNotSupportedError`: Used to indicate that a metric is not supported by the host's CPU model. + +#### Usage of `MultiError` + +When creating a new `PowerTelemetry` instance, the `New` function returns a `MultiError` if any of the dependencies, requested via options, failed to initialize. + +```go +ptel, err := powertelemetry.New( + powertelemetry.WithMsr(), + powertelemetry.WithRapl(), + powertelemetry.WithCoreFrequency(), + powertelemetry.WithUncoreFrequency(), + powertelemetry.WithPerf("/path/to/events.json"), +) + +var initErr *powertelemetry.MultiError +if err != nil { + if !errors.As(err, &initErr) { + logger.Errorf("Failed to build powertelemetry instance: %v", err) + os.Exit(1) + } + logger.Warn(err) +} +``` + +Typical scenarios that return a `MultiError` type would be: + +- Requesting a dependency, but the corresponding kernel module was not loaded previously. +- Provide invalid paths to `WithX` options that allow to specify custom path used to initialize the dependencies. +- Provide invalid JSON file for perf event definitions via `WithPerf`. + +#### Usage of `ModuleNotInitializedError` + +Calls to a [get metric method](#get-metric-values) return an error of type `ModuleNotInitializedError` if the dependency to which it relies on, has not been initialized via the corresponding `WithX` option, or it failed to initialize. + +This might be used to: + +- Prevent subsequent calls to metrics relying on the same dependency. +- Prevent subsequent calls to the same metric getter, when looping through multiple logical CPU IDs or package/die IDs. + +##### Example: `ModuleNotInitializedError` error handling for CPU frequency metric + +```go +// CPU current frequency metric +cpuID := 0 +cpuFreq, err := ptel.GetCPUFrequency(cpuID) + +var moduleErr *powertelemetry.ModuleNotInitializedError +if err != nil { + if !errors.As(err, &moduleErr) { + // Handle module not initialized error + } + // Handle other error types +} +``` + +#### Usage of `MetricNotSupportedError` + +As mentioned in [HW Dependencies](#hw-dependencies) section, specific metrics require certain processor features to be present, or specific processor models. + +`powertelemetry` library provides exported functions that allow the user to check if a metric is supported by the CPU. If not supported, a `MetricNotSupportedError` is returned, for the user to handle it. + +The exported functions have the following naming convention: + +```go +func CheckIfSupported(cpuModel int) error +``` + +This might be used to disable metric requests based on CPU model compatibility. + +##### Example: `MetricNotSupportedError` error handling for CPU C1 state residency metric + +```go +var notSupportedErr *powertelemetry.MetricNotSupportedError + +err := ptel.CheckIfCPUC1StateResidencySupported(cpuModel) +if err != nil && errors.As(err, ¬SupportedErr) + // handle not supported metric error +} +``` diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..3eefcb9 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +1.0.0 diff --git a/builder.go b/builder.go new file mode 100644 index 0000000..4f655df --- /dev/null +++ b/builder.go @@ -0,0 +1,482 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +import ( + "errors" + "fmt" + "slices" + "strings" + "time" + + "github.com/intel/powertelemetry/internal/cpumodel" + "github.com/intel/powertelemetry/internal/log" +) + +var ( + cStateOffsets = []uint32{c3Residency, c6Residency, c7Residency, maxFreqClockCount, actualFreqClockCount, timestampCounter} + cStatePerfEvents = []string{c01.String(), c02.String(), c0Wait.String(), thread.String()} +) + +// PowerTelemetry enables monitoring platform metrics. +type PowerTelemetry struct { + topology topologyReader + msr msrReaderWithStorage + uncoreFreq uncoreFreqReader + rapl raplReader + cpuFreq cpuFreqReader + perf perfReaderWithStorage + + busClock float64 + cpus []int +} + +// powerBuilder enables piecewise builds of PowerTelemetry instances. Implements functional options pattern. +// TODO: User should be able to specify custom base path for powertelemetry subsystems. +type powerBuilder struct { + topology *topologyBuilder + msr *msrBuilder + rapl *raplBuilder + coreFreq *coreFreqBuilder + uncoreFreq *uncoreFreqBuilder + perf *perfBuilder + + includedCPUs []int + excludedCPUs []int +} + +type Option func(*powerBuilder) + +// WithExcludedCPUs returns a function closure that sets a slice with excluded CPU IDs +// of a builder. +func WithExcludedCPUs(cpuIDs []int) Option { + return func(b *powerBuilder) { + b.excludedCPUs = cpuIDs + } +} + +// WithIncludedCPUs returns a function closure that sets a slice with included CPU IDs +// of a builder. +func WithIncludedCPUs(cpuIDs []int) Option { + return func(b *powerBuilder) { + b.includedCPUs = cpuIDs + } +} + +// WithMsr returns a function closure that initializes the msrBuilder struct of a builder with the default configuration. +func WithMsr() Option { + return func(b *powerBuilder) { + if b.msr == nil { + b.msr = &msrBuilder{ + msrReaderWithStorage: &msrDataWithStorage{ + msrOffsets: cStateOffsets, + msrPath: defaultMsrBasePath, + }, + } + } + } +} + +// WithMsrTimeout returns a function closure that initializes the msrBuilder struct of a builder with the default configuration +// and given msr read timeout. +func WithMsrTimeout(timeout time.Duration) Option { + return func(b *powerBuilder) { + if b.msr == nil { + b.msr = &msrBuilder{ + msrReaderWithStorage: &msrDataWithStorage{ + msrOffsets: cStateOffsets, + msrPath: defaultMsrBasePath, + }, + } + } + b.msr.timeout = timeout + } +} + +// WithRapl returns a function closure that initializes the raplBuilder struct of a builder with the default configuration. +func WithRapl(basePath ...string) Option { + var path string + if len(basePath) != 0 { + path = basePath[0] + } else { + path = defaultRaplBasePath + } + return func(b *powerBuilder) { + b.rapl = &raplBuilder{ + raplReader: &raplData{ + basePath: path, + }, + } + } +} + +// WithCoreFrequency returns a function closure that initializes the coreFreqBuilder struct of a builder with the default configuration. +func WithCoreFrequency(basePath ...string) Option { + var path string + if len(basePath) != 0 { + path = basePath[0] + } else { + path = defaultCPUFreqBasePath + } + return func(b *powerBuilder) { + b.coreFreq = &coreFreqBuilder{ + cpuFreqReader: &cpuFreqData{ + cpuFrequencyFilePath: path, + }, + } + } +} + +// WithUncoreFrequency returns a function closure that initializes the uncoreFreqBuilder struct of a builder with the default configuration. +func WithUncoreFrequency(basePath ...string) Option { + var path string + if len(basePath) != 0 { + path = basePath[0] + } else { + path = defaultUncoreFreqBasePath + } + return func(b *powerBuilder) { + b.uncoreFreq = &uncoreFreqBuilder{ + uncoreFreqReader: &uncoreFreqData{ + uncoreFreqBasePath: path, + }, + } + } +} + +// WithPerf takes a file path with perf event definition in JSON format. It returns a function closure +// that initializes a perfBuilder struct with the given JSON event definition file. +func WithPerf(jsonFile string) Option { + return func(b *powerBuilder) { + b.perf = &perfBuilder{ + perfReaderWithStorage: &perfWithStorage{ + perfReader: newPerf(), + }, + jsonPath: jsonFile, + events: cStatePerfEvents, + } + } +} + +// WithLogger returns a function closure that sets a user provided logger structure to be used to log messages. +// Note: this option is supposed to go first in the list of arguments passed to New() when creating a PowerTelemetry instance. +func WithLogger(l log.Logger) Option { + return func(b *powerBuilder) { + log.SetLogger(l) + } +} + +// New returns a PowerTelemetry instance that allows to gather power-related metrics from the host. +// An error is returned if either topology could not be initialized, or if the CPU +// model is not supported. Otherwise, a MultiError is returned if one or more user requested +// subsystems (pieces) fail to initialize. +func New(opts ...Option) (*PowerTelemetry, error) { + b := &powerBuilder{ + topology: &topologyBuilder{ + topologyReader: &topologyData{ + dieIDPath: defaultDieBasePath, + }, + }, + } + + for _, opt := range opts { + opt(b) + } + + pt := &PowerTelemetry{} + + // initialize topology + if err := b.topology.initTopology(); err != nil { + return nil, err + } + pt.topology = b.topology + logTopologyDetails(pt.topology) + + // check if processor is supported + isSupported, err := isCPUSupported(b.topology) + if err != nil { + return nil, fmt.Errorf("error retrieving host processor: %w", err) + } + if !isSupported { + return nil, errors.New("host processor is not supported") + } + + // get available CPU IDs which can be accessed to get metrics from + // (and check if no calls to both WithIncludedCPUs and WithExcludedCPUs have been done) + cpus, err := b.getAvailableCPUs() + if err != nil { + return nil, fmt.Errorf("failed to get available CPUs: %w", err) + } + + // check that not all CPU IDs are excluded. + if len(cpus) == 0 { + return nil, errors.New("no available CPUs were found") + } + pt.cpus = cpus + log.Debugf("CPU metrics related to MSR and coreFreq can be gathered for following CPUs: %v", pt.cpus) + + // custom error to mark non-critical initialization errors + multiErr := &MultiError{} + + // initialize msr + pt.msr, err = b.initMsr(cpus) + if err != nil { + multiErr.add(fmt.Sprintf("failed to initialize msr: %v", err)) + } + + // initialize rapl + pt.rapl, err = b.initRapl() + if err != nil { + multiErr.add(fmt.Sprintf("failed to initialize rapl: %v", err)) + } + + // initialize cpu frequency + // TODO: Add argument with enabled CPU IDs. + pt.cpuFreq, err = b.initCoreFreq() + if err != nil { + multiErr.add(fmt.Sprintf("failed to initialize core freq: %v", err)) + } + + // TODO: Consider to remove init method + // initialize uncore frequency + pt.uncoreFreq, err = b.initUncoreFreq() + if err != nil { + multiErr.add(fmt.Sprintf("failed to initialize uncore freq: %v", err)) + } + + // initialize perf + pt.perf, err = b.initPerf(cStatePerfEvents, cpus) + if err != nil { + multiErr.add(fmt.Sprintf("failed to initialize perf: %v", err)) + } + + // TODO: Remove this optimization. Call to bus clock should be done only when needed. + // TODO: Getting model can be done inside getBusClock, pt.topology.getModel() + model := b.topology.getCPUModel() + pt.busClock, err = pt.getBusClock(model) + if err != nil { + multiErr.add(fmt.Sprintf("failed to get bus clock for model: 0x%X: %v", model, err)) + } + + if len(multiErr.errs) > 0 { + return pt, fmt.Errorf("PowerTelemetry instance initialized with errors: %w", multiErr) + } + return pt, nil +} + +// topologyBuilder enables initialization of topology subsystem for PowerTelemetry instances. +type topologyBuilder struct { + topologyReader +} + +// msrBuilder enables configuration and initialization of msr subsystem for PowerTelemetry instances. +type msrBuilder struct { + msrReaderWithStorage + + timeout time.Duration +} + +// raplBuilder enables configuration and initialization of rapl subsystem for PowerTelemetry instances. +type raplBuilder struct { + raplReader +} + +// coreFreqBuilder enables configuration and initialization of coreFreq subsystem for PowerTelemetry instances. +type coreFreqBuilder struct { + cpuFreqReader +} + +// uncoreFreqBuilder enables configuration and initialization of uncoreFreq subsystem for PowerTelemetry instances. +type uncoreFreqBuilder struct { + uncoreFreqReader +} + +// perfBuilder enables configuration and initialization of perf subsystem for PowerTelemetry instances. +type perfBuilder struct { + perfReaderWithStorage + + jsonPath string + events []string +} + +// getAvailableCPUs returns a slice with available CPU IDs which can be accessed to get metrics from. +func (b *powerBuilder) getAvailableCPUs() ([]int, error) { + if len(b.excludedCPUs) != 0 && len(b.includedCPUs) != 0 { + return nil, errors.New("invalid CPU ID configuration, only one of both included or excluded modes allowed") + } + + numCPUs := b.topology.getCPUsNumber() + if err := validateFromRange(b.includedCPUs, 0, numCPUs-1); err != nil { + return nil, fmt.Errorf("failed to validate included CPU IDs: %w", err) + } + + if len(b.includedCPUs) != 0 { + return b.includedCPUs, nil + } + + if err := validateFromRange(b.excludedCPUs, 0, numCPUs-1); err != nil { + return nil, fmt.Errorf("failed to validate excluded CPU IDs: %w", err) + } + + cpus := make([]int, 0, numCPUs-len(b.excludedCPUs)) + for i := 0; i < numCPUs; i++ { + if slices.Contains(b.excludedCPUs, i) { + continue + } + cpus = append(cpus, i) + } + + return cpus, nil +} + +// validateFromRange takes a slice of ints, a low, and a high bound. It returns an error +// in case any element of the slice is not within the interval [low, high]. +func validateFromRange(nums []int, low, high int) error { + for _, n := range nums { + if n > high || n < low { + return fmt.Errorf("%v is out of bounds [%v, %v]", n, low, high) + } + } + return nil +} + +// initMsr takes a slice of CPU IDs and initializes the msrReaderWithStorage from the receiver's msrBuilder configuration. +// If successfully initialized, it returns an msrReaderWithStorage. Otherwise, returns +// an error. +func (b *powerBuilder) initMsr(cpus []int) (msrReaderWithStorage, error) { + if b.msr != nil { + if err := b.msr.initMsrMap(cpus, b.msr.timeout); err != nil { + return nil, err + } + return b.msr.msrReaderWithStorage, nil + } + return nil, nil +} + +// initRapl initializes the raplReader from the receiver's raplBuilder configuration. +// If successfully initialized, it returns an raplReader. Otherwise, returns an error. +func (b *powerBuilder) initRapl() (raplReader, error) { + if b.rapl != nil { + if err := b.rapl.initZoneMap(); err != nil { + return nil, err + } + return b.rapl.raplReader, nil + } + return nil, nil +} + +// initCoreFreq initializes the cpuFreqReader from the receiver's coreFreqBuilder configuration. +// If successfully initialized, it returns a cpuFreqReader. Otherwise, returns an error. +func (b *powerBuilder) initCoreFreq() (cpuFreqReader, error) { + if b.coreFreq != nil { + if err := b.coreFreq.init(); err != nil { + return nil, err + } + return b.coreFreq.cpuFreqReader, nil + } + return nil, nil +} + +// initUncoreFreq initializes the uncoreFreqReader from the receiver's uncoreFreqBuilder configuration. +// If successfully initialized, it returns a cpuFreqReader. Otherwise, returns an error. +func (b *powerBuilder) initUncoreFreq() (uncoreFreqReader, error) { + if b.uncoreFreq != nil { + if err := b.uncoreFreq.init(); err != nil { + return nil, err + } + return b.uncoreFreq.uncoreFreqReader, nil + } + return nil, nil +} + +// initPerf takes a slice of perf events and a slice of CPU IDs. It initializes the perfReaderWithStorage +// from the receiver's perfBuilder configuration. If successfully initialized, it returns an perfReaderWithStorage. +// Otherwise, returns an error. +func (b *powerBuilder) initPerf(events []string, cpus []int) (perfReaderWithStorage, error) { + if b.perf != nil { + // check if processor supports perf hardware events for cstates + model := b.topology.getCPUModel() + if !isPerfAllowed(model) { + return nil, fmt.Errorf("perf based metrics are not supported for processor model: 0x%X", model) + } + + if err := b.perf.initResolver(b.perf.jsonPath); err != nil { + return nil, fmt.Errorf("failed to init resolver: %w", err) + } + + if err := b.perf.activate(events, cpus); err != nil { + return nil, fmt.Errorf("failed to activate events: %w", err) + } + return b.perf.perfReaderWithStorage, nil + } + return nil, nil +} + +// isPerfAllowed is helper function that returns true if the processor model supports hardware +// perf events specific to cstate residency metrics. +func isPerfAllowed(model int) bool { + switch model { + case cpumodel.INTEL_FAM6_SAPPHIRERAPIDS_X: + case cpumodel.INTEL_FAM6_EMERALDRAPIDS_X: + //TODO: Hybrid models are not supported right now + //case cpumodel.INTEL_FAM6_ALDERLAKE: + //case cpumodel.INTEL_FAM6_ALDERLAKE_L: + //case cpumodel.INTEL_FAM6_RAPTORLAKE: + //case cpumodel.INTEL_FAM6_RAPTORLAKE_P: + //case cpumodel.INTEL_FAM6_RAPTORLAKE_S: + //case cpumodel.INTEL_FAM6_METEORLAKE: + //case cpumodel.INTEL_FAM6_METEORLAKE_L: + //Above list should be updated in the future with new processors supporting the required events. + default: + return false + } + return true +} + +// logTopologyDetails logs topology details such as CPU: vendor ID, family and model. +// It also logs core ID, package ID and die ID for every CPU ID. +func logTopologyDetails(t topologyReader) { + var sb strings.Builder + + sb.WriteString("Topology details:\n") + if vendorID, err := t.getCPUVendor(0); err != nil { + sb.WriteString(fmt.Sprintf(" Error retrieving the CPU vendor ID: %v\n", err)) + } else { + sb.WriteString(fmt.Sprintf(" CPU vendor ID: %s\n", vendorID)) + } + + if family, err := t.getCPUFamily(0); err != nil { + sb.WriteString(fmt.Sprintf(" Error retrieving the CPU family: %v\n", err)) + } else { + sb.WriteString(fmt.Sprintf(" CPU family: %s\n", family)) + } + + sb.WriteString(fmt.Sprintf(" CPU model: 0x%X\n", t.getCPUModel())) + + cpus := t.getCPUsNumber() + sb.WriteString(fmt.Sprintf(" Number of CPUs: %d\n", cpus)) + for cpuID := 0; cpuID < cpus; cpuID++ { + coreID, err := t.getCPUCoreID(cpuID) + if err != nil { + sb.WriteString(fmt.Sprintf(" Error retrieving the core ID: %v\n", err)) + continue + } + packageID, err := t.getCPUPackageID(cpuID) + if err != nil { + sb.WriteString(fmt.Sprintf(" Error retrieving the package ID: %v\n", err)) + continue + } + dieID, err := t.getCPUDieID(cpuID) + if err != nil { + sb.WriteString(fmt.Sprintf(" Error retrieving the die ID: %v\n", err)) + continue + } + + sb.WriteString(fmt.Sprintf(" CPU ID: %4d, core ID: %4d, package ID: %2d, die ID: %2d\n", cpuID, coreID, packageID, dieID)) + } + + log.Debugf(sb.String()) +} diff --git a/builder_test.go b/builder_test.go new file mode 100644 index 0000000..2c32d52 --- /dev/null +++ b/builder_test.go @@ -0,0 +1,1168 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +import ( + "errors" + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + + "github.com/intel/powertelemetry/internal/cpumodel" +) + +func withTopologyMock(m *topologyMock) Option { + return func(b *powerBuilder) { + b.topology = &topologyBuilder{ + topologyReader: m, + } + } +} + +func withMsrMock(m *msrMock) Option { + return func(b *powerBuilder) { + b.msr = &msrBuilder{ + msrReaderWithStorage: m, + } + } +} + +func withRaplMock(m *raplMock) Option { + return func(b *powerBuilder) { + b.rapl = &raplBuilder{ + raplReader: m, + } + } +} + +func withCoreFrequencyMock(m *coreFreqMock) Option { + return func(b *powerBuilder) { + b.coreFreq = &coreFreqBuilder{ + cpuFreqReader: m, + } + } +} + +func withUncoreFrequencyMock(m *uncoreFreqMock) Option { + return func(b *powerBuilder) { + b.uncoreFreq = &uncoreFreqBuilder{ + uncoreFreqReader: m, + } + } +} + +func withPerfMock(m *perfMock) Option { + return func(b *powerBuilder) { + b.perf = &perfBuilder{ + perfReaderWithStorage: m, + } + } +} + +func TestWithExcludedCPUs(t *testing.T) { + cpus := []int{0, 1, 2, 3} + exp := &powerBuilder{ + excludedCPUs: cpus, + } + + b := &powerBuilder{} + f := WithExcludedCPUs(cpus) + f(b) + + require.Equal(t, exp, b) +} + +func TestWithIncludedCPUs(t *testing.T) { + cpus := []int{0, 1, 2, 3} + exp := &powerBuilder{ + includedCPUs: cpus, + } + + b := &powerBuilder{} + f := WithIncludedCPUs(cpus) + f(b) + + require.Equal(t, exp, b) +} + +func TestWithMsr(t *testing.T) { + exp := &powerBuilder{ + msr: &msrBuilder{ + msrReaderWithStorage: &msrDataWithStorage{ + msrOffsets: cStateOffsets, + msrPath: defaultMsrBasePath, + }, + }, + } + + b := &powerBuilder{} + f := WithMsr() + f(b) + + require.Equal(t, exp, b) +} + +func TestWithMsrTimeout(t *testing.T) { + exp := &powerBuilder{ + msr: &msrBuilder{ + msrReaderWithStorage: &msrDataWithStorage{ + msrOffsets: cStateOffsets, + msrPath: defaultMsrBasePath, + }, + timeout: time.Minute, + }, + } + + b := &powerBuilder{} + f := WithMsrTimeout(time.Minute) + f(b) + + require.Equal(t, exp, b) +} + +func TestWithRapl(t *testing.T) { + t.Run("DefaultBasePath", func(t *testing.T) { + exp := &powerBuilder{ + rapl: &raplBuilder{ + raplReader: &raplData{ + basePath: defaultRaplBasePath, + }, + }, + } + + b := &powerBuilder{} + f := WithRapl() + f(b) + + require.Equal(t, exp, b) + }) + + t.Run("CustomBasePath", func(t *testing.T) { + customPath := "custom/rapl" + exp := &powerBuilder{ + rapl: &raplBuilder{ + raplReader: &raplData{ + basePath: customPath, + }, + }, + } + + b := &powerBuilder{} + f := WithRapl(customPath) + f(b) + + require.Equal(t, exp, b) + }) +} + +func TestWithCoreFrequency(t *testing.T) { + t.Run("DefaultBasePath", func(t *testing.T) { + exp := &powerBuilder{ + coreFreq: &coreFreqBuilder{ + cpuFreqReader: &cpuFreqData{ + cpuFrequencyFilePath: defaultCPUFreqBasePath, + }, + }, + } + + b := &powerBuilder{} + f := WithCoreFrequency() + f(b) + + require.Equal(t, exp, b) + }) + + t.Run("CustomBasePath", func(t *testing.T) { + customPath := "custom/core_freq" + exp := &powerBuilder{ + coreFreq: &coreFreqBuilder{ + cpuFreqReader: &cpuFreqData{ + cpuFrequencyFilePath: customPath, + }, + }, + } + + b := &powerBuilder{} + f := WithCoreFrequency(customPath) + f(b) + + require.Equal(t, exp, b) + }) +} + +func TestWithUncoreFrequency(t *testing.T) { + t.Run("DefaultBasePath", func(t *testing.T) { + exp := &powerBuilder{ + uncoreFreq: &uncoreFreqBuilder{ + uncoreFreqReader: &uncoreFreqData{ + uncoreFreqBasePath: defaultUncoreFreqBasePath, + }, + }, + } + + b := &powerBuilder{} + f := WithUncoreFrequency() + f(b) + + require.Equal(t, exp, b) + }) + + t.Run("CustomBasePath", func(t *testing.T) { + customPath := "custom/uncore_freq" + exp := &powerBuilder{ + uncoreFreq: &uncoreFreqBuilder{ + uncoreFreqReader: &uncoreFreqData{ + uncoreFreqBasePath: customPath, + }, + }, + } + + b := &powerBuilder{} + f := WithUncoreFrequency(customPath) + f(b) + + require.Equal(t, exp, b) + }) +} + +func TestWithPerf(t *testing.T) { + jsonFile := "testdata/sapphirerapids_core.json" + + b := &powerBuilder{} + f := WithPerf(jsonFile) + f(b) + + require.NotNil(t, b.perf) + require.NotNil(t, b.perf.perfReaderWithStorage) + require.Equal(t, jsonFile, b.perf.jsonPath) + require.Equal(t, cStatePerfEvents, b.perf.events) +} + +func TestGetAvailableCPUs(t *testing.T) { + t.Run("WithAllCPUs", func(t *testing.T) { + mTopo := &topologyMock{} + + // mock getting number of CPUs from powerBuilder.getAvailableCPUs + mTopo.On("getCPUsNumber").Return(10).Once() + + cpusExp := []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9} + + b := &powerBuilder{ + topology: &topologyBuilder{ + topologyReader: mTopo, + }, + } + + cpusOut, err := b.getAvailableCPUs() + require.NoError(t, err) + require.Equal(t, cpusExp, cpusOut) + }) + + t.Run("WithIncludedCPUs", func(t *testing.T) { + mTopo := &topologyMock{} + + // mock getting number of CPUs from powerBuilder.getAvailableCPUs + // valid CPU IDs within the range [0, 19] + mTopo.On("getCPUsNumber").Return(20) + + t.Run("OutOfBounds", func(t *testing.T) { + includedCPUs := []int{15, 16, 17, 18, 19, 20} + + b := &powerBuilder{ + topology: &topologyBuilder{ + topologyReader: mTopo, + }, + } + f := WithIncludedCPUs(includedCPUs) + f(b) + + cpusOut, err := b.getAvailableCPUs() + require.ErrorContains(t, err, "failed to validate included CPU IDs") + require.ErrorContains(t, err, "20 is out of bounds [0, 19]") + require.Nil(t, cpusOut) + mTopo.AssertExpectations(t) + }) + + t.Run("NilCPUsSlice", func(t *testing.T) { + b := &powerBuilder{ + topology: &topologyBuilder{ + topologyReader: mTopo, + }, + } + f := WithIncludedCPUs(nil) + f(b) + + cpusOut, err := b.getAvailableCPUs() + require.NoError(t, err) + require.Equal(t, []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}, cpusOut) + mTopo.AssertExpectations(t) + }) + + t.Run("EmptyCPUsSlice", func(t *testing.T) { + b := &powerBuilder{ + topology: &topologyBuilder{ + topologyReader: mTopo, + }, + } + f := WithIncludedCPUs([]int{}) + f(b) + + cpusOut, err := b.getAvailableCPUs() + require.NoError(t, err) + require.Equal(t, []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}, cpusOut) + mTopo.AssertExpectations(t) + }) + + t.Run("PartialCPUsIncluded", func(t *testing.T) { + cpusExp := []int{0, 1, 2, 3} + + b := &powerBuilder{ + topology: &topologyBuilder{ + topologyReader: mTopo, + }, + } + f := WithIncludedCPUs(cpusExp) + f(b) + + cpusOut, err := b.getAvailableCPUs() + require.NoError(t, err) + require.Equal(t, cpusExp, cpusOut) + mTopo.AssertExpectations(t) + }) + }) + + t.Run("WithExcludedCPUs", func(t *testing.T) { + mTopo := &topologyMock{} + + // mock getting number of CPUs from powerBuilder.getAvailableCPUs + mTopo.On("getCPUsNumber").Return(10) + + t.Run("OutOfBounds", func(t *testing.T) { + excludedCPUs := []int{2, 1, 0, 12, 13} + + b := &powerBuilder{ + topology: &topologyBuilder{ + topologyReader: mTopo, + }, + } + f := WithExcludedCPUs(excludedCPUs) + f(b) + + cpusOut, err := b.getAvailableCPUs() + require.ErrorContains(t, err, "failed to validate excluded CPU IDs") + require.ErrorContains(t, err, "12 is out of bounds [0, 9]") + require.Nil(t, cpusOut) + mTopo.AssertExpectations(t) + }) + + t.Run("AllCPUsExcluded", func(t *testing.T) { + excludedCPUs := []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9} + + b := &powerBuilder{ + topology: &topologyBuilder{ + topologyReader: mTopo, + }, + } + f := WithExcludedCPUs(excludedCPUs) + f(b) + + cpusOut, err := b.getAvailableCPUs() + require.NoError(t, err) + require.Empty(t, cpusOut) + mTopo.AssertExpectations(t) + }) + + t.Run("NilCPUsSlice", func(t *testing.T) { + b := &powerBuilder{ + topology: &topologyBuilder{ + topologyReader: mTopo, + }, + } + f := WithExcludedCPUs(nil) + f(b) + + cpusOut, err := b.getAvailableCPUs() + require.NoError(t, err) + require.Equal(t, []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, cpusOut) + mTopo.AssertExpectations(t) + }) + + t.Run("EmptyCPUsSlice", func(t *testing.T) { + b := &powerBuilder{ + topology: &topologyBuilder{ + topologyReader: mTopo, + }, + } + f := WithExcludedCPUs([]int{}) + f(b) + + cpusOut, err := b.getAvailableCPUs() + require.NoError(t, err) + require.Equal(t, []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, cpusOut) + mTopo.AssertExpectations(t) + }) + + t.Run("PartialCPUsExcluded", func(t *testing.T) { + cpusExcluded := []int{0, 1, 2, 3} + cpusExp := []int{4, 5, 6, 7, 8, 9} + + b := &powerBuilder{ + topology: &topologyBuilder{ + topologyReader: mTopo, + }, + } + f := WithExcludedCPUs(cpusExcluded) + f(b) + + cpusOut, err := b.getAvailableCPUs() + require.NoError(t, err) + require.Equal(t, cpusExp, cpusOut) + mTopo.AssertExpectations(t) + }) + }) + + t.Run("WithExcludedAndIncludedCPUs", func(t *testing.T) { + b := &powerBuilder{} + withIncluded := WithIncludedCPUs([]int{0, 1, 2, 3}) + withExcluded := WithExcludedCPUs([]int{0, 2}) + + withIncluded(b) + withExcluded(b) + + cpusOut, err := b.getAvailableCPUs() + require.ErrorContains(t, err, "invalid CPU ID configuration, only one of both included or excluded modes allowed") + require.Nil(t, cpusOut) + }) +} + +func TestNew(t *testing.T) { + mError := errors.New("mock error") + + t.Run("FailedToInitTopology", func(t *testing.T) { + mTopology := &topologyMock{} + + // mock initializing topology map + mTopology.On("initTopology").Return(mError).Once() + + pt, err := New( + withTopologyMock(mTopology), + ) + + require.ErrorContains(t, err, mError.Error()) + require.Nil(t, pt) + + mTopology.AssertExpectations(t) + }) + + t.Run("FailedToCheckCPU", func(t *testing.T) { + mTopology := &topologyMock{} + + // mock initializing topology map + mTopology.On("initTopology").Return(nil).Once() + + // TODO: Consider to make isCPUSupported as topology method + // mock getting CPU family and vendor from isCPUSupported + mTopology.On("getCPUFamily", 0).Return("6", nil).Once() + mTopology.On("getCPUVendor", 0).Return("", mError).Once() + + // mock getting topology CPU data from logTopologyDetails + mTopology.On("getCPUFamily", 0).Return("6", nil).Once() + mTopology.On("getCPUVendor", 0).Return("", mError).Once() + mTopology.On("getCPUModel").Return(cpumodel.INTEL_FAM6_ALDERLAKE, nil).Once() + mTopology.On("getCPUsNumber").Return(1, nil).Once() + mTopology.On("getCPUCoreID", 0).Return(0, nil).Once() + mTopology.On("getCPUPackageID", 0).Return(0, nil).Once() + mTopology.On("getCPUDieID", 0).Return(0, nil).Once() + + pt, err := New( + withTopologyMock(mTopology), + ) + + require.ErrorContains(t, err, "error retrieving host processor") + require.Nil(t, pt) + + mTopology.AssertExpectations(t) + }) + + t.Run("CPUNotSupported", func(t *testing.T) { + mTopology := &topologyMock{} + + // mock initializing topology map + mTopology.On("initTopology").Return(nil).Once() + + // mock getting CPU family and vendor from isCPUSupported + mTopology.On("getCPUFamily", 0).Return("6", nil).Once() + mTopology.On("getCPUVendor", 0).Return("AuthenticAMD", nil).Once() + + // mock getting topology CPU data from logTopologyDetails + mTopology.On("getCPUFamily", 0).Return("6", nil).Once() + mTopology.On("getCPUVendor", 0).Return("AuthenticAMD", nil).Once() + mTopology.On("getCPUModel").Return(cpumodel.INTEL_FAM6_ALDERLAKE, nil).Once() + mTopology.On("getCPUsNumber").Return(1, nil).Once() + mTopology.On("getCPUCoreID", 0).Return(0, nil).Once() + mTopology.On("getCPUPackageID", 0).Return(0, nil).Once() + mTopology.On("getCPUDieID", 0).Return(0, nil).Once() + + pt, err := New( + withTopologyMock(mTopology), + ) + + require.ErrorContains(t, err, "host processor is not supported") + require.Nil(t, pt) + + mTopology.AssertExpectations(t) + }) + + t.Run("FailedToGetAvailableCPUs", func(t *testing.T) { + mTopology := &topologyMock{} + + // mock initializing topology map + mTopology.On("initTopology").Return(nil) + + // mock getting CPU family and vendor from isCPUSupported + mTopology.On("getCPUFamily", 0).Return("6", nil) + mTopology.On("getCPUVendor", 0).Return("GenuineIntel", nil) + + // mock getting topology CPU data from logTopologyDetails + mTopology.On("getCPUModel").Return(cpumodel.INTEL_FAM6_ALDERLAKE, nil).Once() + mTopology.On("getCPUsNumber").Return(1, nil).Once() + mTopology.On("getCPUCoreID", 0).Return(0, nil).Once() + mTopology.On("getCPUPackageID", 0).Return(0, nil).Once() + mTopology.On("getCPUDieID", 0).Return(0, nil).Once() + + pt, err := New( + withTopologyMock(mTopology), + + WithExcludedCPUs([]int{5, 6, 7, 8, 9}), + WithIncludedCPUs([]int{0, 1, 2, 3, 4}), + ) + + require.ErrorContains(t, err, "failed to get available CPUs") + require.Nil(t, pt) + + mTopology.AssertExpectations(t) + }) + + t.Run("AllCPUsExcluded", func(t *testing.T) { + cpus := []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9} + + mTopology := &topologyMock{} + + // mock initializing topology map + mTopology.On("initTopology").Return(nil) + + // mock getting CPU family and vendor from isCPUSupported + mTopology.On("getCPUFamily", 0).Return("6", nil) + mTopology.On("getCPUVendor", 0).Return("GenuineIntel", nil) + + // mock getting topology CPU data from logTopologyDetails + mTopology.On("getCPUModel").Return(cpumodel.INTEL_FAM6_ALDERLAKE, nil).Once() + mTopology.On("getCPUsNumber").Return(1, nil).Once() + mTopology.On("getCPUCoreID", 0).Return(0, nil).Once() + mTopology.On("getCPUPackageID", 0).Return(0, nil).Once() + mTopology.On("getCPUDieID", 0).Return(0, nil).Once() + + // mock getting number of CPU IDs from powerBuilder.getAvailableCPUs + mTopology.On("getCPUsNumber").Return(len(cpus)) + + pt, err := New( + withTopologyMock(mTopology), + + WithExcludedCPUs(cpus), + ) + + require.ErrorContains(t, err, "no available CPUs were found") + require.Nil(t, pt) + + mTopology.AssertExpectations(t) + }) + + t.Run("With", func(t *testing.T) { + cpus := []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9} + + mTopology := &topologyMock{} + + // mock initializing topology map + mTopology.On("initTopology").Return(nil) + + // mock getting CPU family and vendor from isCPUSupported + mTopology.On("getCPUFamily", 0).Return("6", nil) + mTopology.On("getCPUVendor", 0).Return("GenuineIntel", nil) + + // mock getting model to calculate bus clock + mTopology.On("getCPUModel").Return(cpumodel.INTEL_FAM6_ICELAKE) + + // mock getting number of CPU IDs from powerBuilder.getAvailableCPUs + mTopology.On("getCPUsNumber").Return(len(cpus)) + + // mock getting topology CPU data from logTopologyDetails + mTopology.On("getCPUCoreID", mock.AnythingOfType("int")).Return(0, nil) + mTopology.On("getCPUPackageID", mock.AnythingOfType("int")).Return(0, nil) + mTopology.On("getCPUDieID", mock.AnythingOfType("int")).Return(0, nil) + + t.Run("Msr", func(t *testing.T) { + t.Run("FailedToInitMsrMap", func(t *testing.T) { + mMsr := &msrMock{} + + // mock initializing msr map from powerBuilder.initMsr + mMsr.On("initMsrMap", cpus, time.Duration(0)).Return(mError).Once() + + pt, err := New( + withTopologyMock(mTopology), + withMsrMock(mMsr), + ) + + require.ErrorContains(t, err, "failed to initialize msr") + require.NotNil(t, pt) + require.Nil(t, pt.msr) + require.Nil(t, pt.GetRaplPackageIDs()) + require.Equal(t, cpus, pt.GetMsrCPUIDs()) + + mTopology.AssertExpectations(t) + mMsr.AssertExpectations(t) + }) + + t.Run("IncludedCPUs", func(t *testing.T) { + includedCPUs := []int{0, 1, 2, 3, 4} + + mMsr := &msrMock{} + + // mock initializing msr map from powerBuilder.initMsr + mMsr.On("initMsrMap", includedCPUs, time.Duration(0)).Return(nil).Once() + + pt, err := New( + withTopologyMock(mTopology), + withMsrMock(mMsr), + + WithIncludedCPUs(includedCPUs), + ) + + require.NoError(t, err) + require.NotNil(t, pt) + require.NotNil(t, pt.msr) + require.Nil(t, pt.GetRaplPackageIDs()) + require.Equal(t, includedCPUs, pt.GetMsrCPUIDs()) + + mTopology.AssertExpectations(t) + mMsr.AssertExpectations(t) + }) + + t.Run("ExcludedCPUs", func(t *testing.T) { + excludedCPUs := []int{0, 1, 2, 3, 4} + availableCPUs := []int{5, 6, 7, 8, 9} + + mMsr := &msrMock{} + + // mock initializing msr map from powerBuilder.initMsr + mMsr.On("initMsrMap", availableCPUs, time.Duration(0)).Return(nil).Once() + + pt, err := New( + withTopologyMock(mTopology), + withMsrMock(mMsr), + + WithExcludedCPUs(excludedCPUs), + ) + + require.NoError(t, err) + require.NotNil(t, pt) + require.NotNil(t, pt.msr) + require.Nil(t, pt.GetRaplPackageIDs()) + require.Equal(t, availableCPUs, pt.GetMsrCPUIDs()) + + mTopology.AssertExpectations(t) + mMsr.AssertExpectations(t) + }) + }) + + t.Run("Rapl", func(t *testing.T) { + t.Run("FailedToInitZoneMap", func(t *testing.T) { + mRapl := &raplMock{} + + // mock initializing rapl zone map from powerBuilder.initRapl + mRapl.On("initZoneMap").Return(mError).Once() + + pt, err := New( + withTopologyMock(mTopology), + withRaplMock(mRapl), + ) + + require.ErrorContains(t, err, "failed to initialize rapl") + require.NotNil(t, pt) + require.Nil(t, pt.rapl) + require.Nil(t, pt.GetRaplPackageIDs()) + require.Equal(t, cpus, pt.GetMsrCPUIDs()) + + mTopology.AssertExpectations(t) + mRapl.AssertExpectations(t) + }) + + t.Run("Ok", func(t *testing.T) { + pt, err := New( + withTopologyMock(mTopology), + WithRapl(makeTestDataPath("testdata/intel-rapl")), + ) + + require.NoError(t, err) + require.NotNil(t, pt) + require.NotNil(t, pt.rapl) + require.Equal(t, []int{0, 1, 2, 3}, pt.GetRaplPackageIDs()) + require.Equal(t, cpus, pt.GetMsrCPUIDs()) + + mTopology.AssertExpectations(t) + }) + }) + + t.Run("CoreFrequency", func(t *testing.T) { + // TODO: Consider to remove + t.Run("FailedToInit", func(t *testing.T) { + mCoreFreq := &coreFreqMock{} + + // mock initializing core frequency from powerBuilder.initCoreFreq + mCoreFreq.On("init").Return(mError).Once() + + pt, err := New( + withTopologyMock(mTopology), + withCoreFrequencyMock(mCoreFreq), + ) + + require.ErrorContains(t, err, "failed to initialize core freq") + require.NotNil(t, pt) + require.Nil(t, pt.cpuFreq) + require.Nil(t, pt.GetRaplPackageIDs()) + require.Equal(t, cpus, pt.GetMsrCPUIDs()) + + mTopology.AssertExpectations(t) + mCoreFreq.AssertExpectations(t) + }) + + t.Run("Ok", func(t *testing.T) { + pt, err := New( + withTopologyMock(mTopology), + WithCoreFrequency("testdata/cpu-freq"), + ) + + require.NoError(t, err) + require.NotNil(t, pt) + require.NotNil(t, pt.cpuFreq) + require.Nil(t, pt.GetRaplPackageIDs()) + require.Equal(t, cpus, pt.GetMsrCPUIDs()) + + mTopology.AssertExpectations(t) + }) + }) + + t.Run("UncoreFrequency", func(t *testing.T) { + // TODO: Consider to remove + t.Run("FailedToInit", func(t *testing.T) { + mUncoreFreq := &uncoreFreqMock{} + + // mock initializing uncore frequency from powerBuilder.initUncoreFreq + mUncoreFreq.On("init").Return(mError).Once() + + pt, err := New( + withTopologyMock(mTopology), + withUncoreFrequencyMock(mUncoreFreq), + ) + + require.ErrorContains(t, err, "failed to initialize uncore freq") + require.NotNil(t, pt) + require.Nil(t, pt.cpuFreq) + require.Nil(t, pt.GetRaplPackageIDs()) + require.Equal(t, cpus, pt.GetMsrCPUIDs()) + + mTopology.AssertExpectations(t) + mUncoreFreq.AssertExpectations(t) + }) + + t.Run("Ok", func(t *testing.T) { + pt, err := New( + withTopologyMock(mTopology), + WithUncoreFrequency("testdata/intel_uncore_frequency"), + ) + + require.NoError(t, err) + require.NotNil(t, pt) + require.NotNil(t, pt.uncoreFreq) + require.Nil(t, pt.GetRaplPackageIDs()) + require.Equal(t, cpus, pt.GetMsrCPUIDs()) + + mTopology.AssertExpectations(t) + }) + }) + + t.Run("Perf", func(t *testing.T) { + // Reset mock object for perf + mTopology = &topologyMock{} + + // mock initializing topology map + mTopology.On("initTopology").Return(nil) + + // mock getting CPU family and vendor from isCPUSupported + mTopology.On("getCPUFamily", 0).Return("6", nil) + mTopology.On("getCPUVendor", 0).Return("GenuineIntel", nil) + + // mock getting number of CPUs from powerBuilder.getAvailableCPUs + mTopology.On("getCPUsNumber").Return(len(cpus)) + + // mock getting topology CPU data from logTopologyDetails + mTopology.On("getCPUCoreID", mock.AnythingOfType("int")).Return(0, nil) + mTopology.On("getCPUPackageID", mock.AnythingOfType("int")).Return(0, nil) + mTopology.On("getCPUDieID", mock.AnythingOfType("int")).Return(0, nil) + + t.Run("FailedToGetCPUModel", func(t *testing.T) { + model := cpumodel.INTEL_FAM6_ICELAKE + + // mock getting model from powerBuilder.initPerf and logTopologyDetails + mTopology.On("getCPUModel").Return(0).Twice() + + // mock getting model to calculate bus clock + mTopology.On("getCPUModel").Return(model).Once() + + pt, err := New( + withTopologyMock(mTopology), + WithPerf("events.json"), + ) + + require.NotNil(t, pt) + require.Nil(t, pt.perf) + require.ErrorContains(t, err, "failed to initialize perf: perf based metrics are not supported for processor model: 0x0") + require.Nil(t, pt.GetRaplPackageIDs()) + require.Equal(t, cpus, pt.GetPerfCPUIDs()) + + mTopology.AssertExpectations(t) + }) + + t.Run("CPUModelNotAllowed", func(t *testing.T) { + model := cpumodel.INTEL_FAM6_ICELAKE + + // mock getting model from isCPUSupported, powerBuilder.initPerf and logTopologyDetails + mTopology.On("getCPUModel").Return(model).Times(3) + + pt, err := New( + withTopologyMock(mTopology), + WithPerf("events.json"), + ) + + require.NotNil(t, pt) + require.Nil(t, pt.perf) + require.ErrorContains(t, err, "failed to initialize perf") + require.ErrorContains(t, err, fmt.Sprintf("perf based metrics are not supported for processor model: 0x%X", model)) + require.Nil(t, pt.GetRaplPackageIDs()) + require.Equal(t, cpus, pt.GetPerfCPUIDs()) + + mTopology.AssertExpectations(t) + }) + + t.Run("FailedToInitResolver", func(t *testing.T) { + // mock getting model from isCPUSupported, powerBuilder.initPerf and logTopologyDetails + mTopology.On("getCPUModel").Return(cpumodel.INTEL_FAM6_EMERALDRAPIDS_X).Times(3) + + mPerf := &perfMock{} + + // mock initialize perf resolver from powerBuilder.initPerf + mPerf.On("initResolver", mock.AnythingOfType("string")).Return(mError).Once() + + pt, err := New( + withTopologyMock(mTopology), + withPerfMock(mPerf), + ) + + require.NotNil(t, pt) + require.Nil(t, pt.perf) + require.ErrorContains(t, err, "failed to initialize perf") + require.ErrorContains(t, err, "failed to init resolver") + require.Nil(t, pt.GetRaplPackageIDs()) + require.Equal(t, cpus, pt.GetPerfCPUIDs()) + + mTopology.AssertExpectations(t) + mPerf.AssertExpectations(t) + }) + + t.Run("FailedToActivate", func(t *testing.T) { + // mock getting model from isCPUSupported, powerBuilder.initPerf and logTopologyDetails + mTopology.On("getCPUModel").Return(cpumodel.INTEL_FAM6_EMERALDRAPIDS_X).Times(3) + + mPerf := &perfMock{} + + // mock initializing perf resolver and event activation from powerBuilder.initPerf + mPerf.On("initResolver", mock.AnythingOfType("string")).Return(nil).Once() + mPerf.On("activate", cStatePerfEvents, cpus).Return(mError).Once() + + pt, err := New( + withTopologyMock(mTopology), + withPerfMock(mPerf), + ) + + require.NotNil(t, pt) + require.Nil(t, pt.perf) + require.ErrorContains(t, err, "failed to initialize perf") + require.ErrorContains(t, err, "failed to activate events") + require.Nil(t, pt.GetRaplPackageIDs()) + require.Equal(t, cpus, pt.GetPerfCPUIDs()) + + mTopology.AssertExpectations(t) + mPerf.AssertExpectations(t) + }) + + t.Run("IncludedCPUs", func(t *testing.T) { + includedCPUs := []int{1, 2, 3, 4} + + // mock getting model from isCPUSupported, powerBuilder.initPerf and logTopologyDetails + mTopology.On("getCPUModel").Return(cpumodel.INTEL_FAM6_SAPPHIRERAPIDS_X).Times(3) + + mPerf := &perfMock{} + + // mock initializing perf resolver and event activation from powerBuilder.initPerf + mPerf.On("initResolver", mock.AnythingOfType("string")).Return(nil).Once() + mPerf.On("activate", cStatePerfEvents, includedCPUs).Return(nil).Once() + + pt, err := New( + withTopologyMock(mTopology), + withPerfMock(mPerf), + + WithIncludedCPUs(includedCPUs), + ) + + require.NotNil(t, pt) + require.NotNil(t, pt.perf) + require.NoError(t, err) + require.Nil(t, pt.GetRaplPackageIDs()) + require.Equal(t, includedCPUs, pt.GetPerfCPUIDs()) + + mTopology.AssertExpectations(t) + mPerf.AssertExpectations(t) + }) + + t.Run("ExcludedCPUs", func(t *testing.T) { + excludedCPUs := []int{0, 1, 8, 9} + availableCPUs := []int{2, 3, 4, 5, 6, 7} + + // mock getting model from isCPUSupported, powerBuilder.initPerf and logTopologyDetails + mTopology.On("getCPUModel").Return(cpumodel.INTEL_FAM6_SAPPHIRERAPIDS_X).Times(3) + + mPerf := &perfMock{} + + // mock initializing perf resolver and event activation from powerBuilder.initPerf + mPerf.On("initResolver", mock.AnythingOfType("string")).Return(nil).Once() + mPerf.On("activate", cStatePerfEvents, availableCPUs).Return(nil).Once() + + pt, err := New( + withTopologyMock(mTopology), + withPerfMock(mPerf), + + WithExcludedCPUs(excludedCPUs), + ) + + require.NotNil(t, pt) + require.NotNil(t, pt.perf) + require.NoError(t, err) + require.Nil(t, pt.GetRaplPackageIDs()) + require.Equal(t, availableCPUs, pt.GetPerfCPUIDs()) + + mTopology.AssertExpectations(t) + mPerf.AssertExpectations(t) + }) + }) + + t.Run("Multiple", func(t *testing.T) { + t.Run("FailedMsrAndPerf", func(t *testing.T) { + includedCPUs := []int{0, 2, 4, 6, 8} + + // mock getting model from isCPUSupported, powerBuilder.initPerf and logTopologyDetails + mTopology.On("getCPUModel").Return(cpumodel.INTEL_FAM6_EMERALDRAPIDS_X).Times(3) + + mMsr := &msrMock{} + + // mock initializing msr map from powerBuilder.initMsr + mMsr.On("initMsrMap", includedCPUs, time.Duration(0)).Return(mError).Once() + + mPerf := &perfMock{} + + // mock initializing perf resolver and event activation from powerBuilder.initPerf + mPerf.On("initResolver", mock.AnythingOfType("string")).Return(nil).Once() + mPerf.On("activate", cStatePerfEvents, includedCPUs).Return(mError).Once() + + pt, err := New( + withTopologyMock(mTopology), + withMsrMock(mMsr), + withPerfMock(mPerf), + WithRapl(makeTestDataPath("testdata/intel-rapl")), + WithCoreFrequency("testdata/cpu-freq"), + WithUncoreFrequency("testdata/intel_uncore_frequency"), + + WithIncludedCPUs(includedCPUs), + ) + + require.ErrorContains(t, err, "failed to initialize msr") + require.ErrorContains(t, err, "failed to initialize perf") + require.Nil(t, pt.msr) + require.Nil(t, pt.perf) + require.NotNil(t, pt.rapl) + require.NotNil(t, pt.cpuFreq) + require.NotNil(t, pt.uncoreFreq) + require.Equal(t, []int{0, 1, 2, 3}, pt.GetRaplPackageIDs()) + require.Equal(t, includedCPUs, pt.GetPerfCPUIDs()) + + mTopology.AssertExpectations(t) + mMsr.AssertExpectations(t) + mPerf.AssertExpectations(t) + }) + + t.Run("Ok", func(t *testing.T) { + excludedCPUs := []int{1, 3, 5, 7, 9} + + // mock getting model from isCPUSupported and logTopologyDetails + mTopology.On("getCPUModel").Return(cpumodel.INTEL_FAM6_ALDERLAKE).Twice() + + pt, err := New( + withTopologyMock(mTopology), + WithCoreFrequency("testdata/cpu-freq"), + WithUncoreFrequency("testdata/intel_uncore_frequency"), + + WithExcludedCPUs(excludedCPUs), + ) + + require.NoError(t, err) + require.NotNil(t, pt) + require.NotNil(t, pt.cpuFreq) + require.NotNil(t, pt.uncoreFreq) + require.Nil(t, pt.GetRaplPackageIDs()) + require.Equal(t, []int{0, 2, 4, 6, 8}, pt.GetPerfCPUIDs()) + + mTopology.AssertExpectations(t) + }) + }) + }) + + t.Run("FailedToGetBusClock", func(t *testing.T) { + cpus := []int{2, 3, 4, 5, 6, 7, 8, 9} + model := cpumodel.INTEL_FAM6_ATOM_SILVERMONT + + mTopology := &topologyMock{} + + // mock initializing topology map + mTopology.On("initTopology").Return(nil) + + // mock getting CPU family and vendor from isCPUSupported + mTopology.On("getCPUFamily", 0).Return("6", nil) + mTopology.On("getCPUVendor", 0).Return("GenuineIntel", nil) + + // mock getting model to calculate bus clock + mTopology.On("getCPUModel").Return(model) + + // mock getting number of CPUs from powerBuilder.getAvailableCPUs + mTopology.On("getCPUsNumber").Return(10) + + // mock getting topology CPU data from logTopologyDetails + mTopology.On("getCPUCoreID", mock.AnythingOfType("int")).Return(0, nil) + mTopology.On("getCPUPackageID", mock.AnythingOfType("int")).Return(0, nil) + mTopology.On("getCPUDieID", mock.AnythingOfType("int")).Return(0, nil) + + t.Run("MsrIsNil", func(t *testing.T) { + pt, err := New( + withTopologyMock(mTopology), + WithIncludedCPUs(cpus), + ) + + require.ErrorContains(t, err, fmt.Sprintf("failed to get bus clock for model: 0x%X", model)) + require.ErrorContains(t, err, "\"msr\" is not initialized") + require.NotNil(t, pt) + require.Equal(t, 0.0, pt.busClock) + require.Nil(t, pt.GetRaplPackageIDs()) + require.Equal(t, cpus, pt.GetMsrCPUIDs()) + + mTopology.AssertExpectations(t) + }) + + t.Run("WithMsr", func(t *testing.T) { + t.Run("ExcludedCPUs", func(t *testing.T) { + excludedCPUs := []int{0, 1, 2, 3, 4} + availableCPUs := []int{5, 6, 7, 8, 9} + + mMsr := &msrMock{} + + // mock initializing msr map from powerBuilder.initMsr + mMsr.On("initMsrMap", availableCPUs, time.Duration(0)).Return(nil).Once() + + // mock reading msr offset MSR_FSB_FREQ + mMsr.On("read", uint32(fsbFreq), availableCPUs[0]).Return(uint64(0), mError).Once() + + // TODO: Call to WithIncludedCPUs, for instance {1,2,3, ...} check that the mock still works + pt, err := New( + withTopologyMock(mTopology), + withMsrMock(mMsr), + + WithExcludedCPUs(excludedCPUs), + ) + + require.ErrorContains(t, err, fmt.Sprintf("failed to get bus clock for model: 0x%X", model)) + require.NotNil(t, pt) + require.Equal(t, 0.0, pt.busClock) + require.Nil(t, pt.GetRaplPackageIDs()) + require.Equal(t, availableCPUs, pt.GetMsrCPUIDs()) + + mTopology.AssertExpectations(t) + mMsr.AssertExpectations(t) + }) + + t.Run("IncludedCPUs", func(t *testing.T) { + includedCPUs := []int{5, 6, 7, 8, 9} + + mMsr := &msrMock{} + + // mock initializing msr map from powerBuilder.initMsr + mMsr.On("initMsrMap", includedCPUs, time.Duration(0)).Return(nil).Once() + + // mock reading msr offset MSR_FSB_FREQ + mMsr.On("read", uint32(fsbFreq), includedCPUs[0]).Return(uint64(0), mError).Once() + + pt, err := New( + withTopologyMock(mTopology), + withMsrMock(mMsr), + + WithIncludedCPUs(includedCPUs), + ) + + require.ErrorContains(t, err, fmt.Sprintf("failed to get bus clock for model: 0x%X", model)) + require.NotNil(t, pt) + require.Equal(t, 0.0, pt.busClock) + require.Nil(t, pt.GetRaplPackageIDs()) + require.Equal(t, includedCPUs, pt.GetMsrCPUIDs()) + + mTopology.AssertExpectations(t) + mMsr.AssertExpectations(t) + }) + }) + }) +} + +func Test_IsPerfAllowed(t *testing.T) { + models := []int{ + 0xCF, //INTEL_FAM6_EMERALDRAPIDS_X + 0x8F, //INTEL_FAM6_SAPPHIRERAPIDS_X + //TODO: Hybrid models are not supported right now + //0x97, //INTEL_FAM6_ALDERLAKE + //0x9A, //INTEL_FAM6_ALDERLAKE_L + //0xB7, //INTEL_FAM6_RAPTORLAKE + //0xBA, //INTEL_FAM6_RAPTORLAKE_P + //0xBF, //INTEL_FAM6_RAPTORLAKE_S + //0xAC, //INTEL_FAM6_METEORLAKE + //0xAA, //INTEL_FAM6_METEORLAKE_L + } + + m := map[int]interface{}{} + for _, model := range models { + m[model] = struct{}{} + } + + for model := 0; model < 0xFF; model++ { + isAllowed := isPerfAllowed(model) + require.Equalf(t, m[model] != nil, isAllowed, "Model 0x%X") + } +} diff --git a/clock.go b/clock.go new file mode 100644 index 0000000..274e8ef --- /dev/null +++ b/clock.go @@ -0,0 +1,36 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +import ( + "time" + + "github.com/jmhodges/clock" +) + +var ( + // holds function definition to retrieve the current local time. + timeNowFn func() time.Time + + // holds a fake clock used to test time-sensitive code. + fakeClock clock.FakeClock +) + +// setFakeClock gates the use of a fake clock for unit tests to retrieve +// the current local time. +func setFakeClock() { + timeNowFn = fakeClock.Now +} + +// unsetFakeClock restores timeNowFn function to retrieve the current time from the host. +func unsetFakeClock() { + timeNowFn = time.Now +} + +func init() { + timeNowFn = time.Now + fakeClock = clock.NewFake() +} diff --git a/cmd/example/main.go b/cmd/example/main.go new file mode 100644 index 0000000..327bc3d --- /dev/null +++ b/cmd/example/main.go @@ -0,0 +1,298 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package main + +import ( + "context" + "errors" + "fmt" + "log" + "os" + "time" + + powertelemetry "github.com/intel/powertelemetry" + "github.com/intel/powertelemetry/internal/version" +) + +const ( + interval = 5 * time.Second // sample interval in seconds + duration = 26 * time.Second // duration of each experiment in seconds +) + +func main() { + logger := simpleLogger{} + + // Print the current version of the application + logger.Infof("Using: %s", version.GetFullVersion()) + + // TODO: Add logic to parse CPU IDs (and package IDs?) from command line + cpuID := 0 + packageID := 0 + dieID := 0 + includedCPUs := []int{cpuID} + + pt, err := powertelemetry.New( + powertelemetry.WithLogger(&logger), + // powertelemetry.WithExcludedCPUs(excludedCPUs), + powertelemetry.WithIncludedCPUs(includedCPUs), + powertelemetry.WithMsr(), + powertelemetry.WithRapl(), + powertelemetry.WithCoreFrequency(), + powertelemetry.WithUncoreFrequency(), + //powertelemetry.WithPerf(""), + ) + + var initErr *powertelemetry.MultiError + if err != nil { + if !errors.As(err, &initErr) { + logger.Errorf("Failed to build powertelemetry instance: %v", err) + os.Exit(1) + } + logger.Warn(err) + } + + // + // Per CPU ID metrics + // + logger.Info("=== Per CPU ID metrics ===") + + // CPU current frequency metric + cpuFreq, err := pt.GetCPUFrequency(cpuID) + if err != nil { + logger.Errorf("Error getting current frequency for CPU ID %v: %v", cpuID, err) + } else { + logger.Infof("CPU ID: %v, CPU current frequency[MHz]: %0.1f", cpuID, cpuFreq) + } + + // CPU temperature metric + cpuTemp, err := pt.GetCPUTemperature(cpuID) + if err != nil { + logger.Errorf("Error getting temperature for CPU ID %v: %v", cpuID, err) + } else { + logger.Infof("CPU ID: %v, CPU temperature[°C]: %v", cpuID, cpuTemp) + } + + // + // CPU MSR time-based metrics: + // + // * CPU C0/C1/C3/C6/C7 state residency + // * CPU busy frequency + // + logger.Info("=== CPU time-based metrics ===") + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + getCPUMSRMetrics := func() (string, error) { + return func(cpuID int) (string, error) { + if err := pt.UpdatePerCPUMetrics(cpuID); err != nil { + return "", fmt.Errorf("error gathering per CPU metrics for CPU ID %v: %w", cpuID, err) + } + c0State, err := pt.GetCPUC0StateResidency(cpuID) + if err != nil { + return "", fmt.Errorf("error getting CPU C0 state residency for CPU ID %v: %w", cpuID, err) + } + c1State, err := pt.GetCPUC1StateResidency(cpuID) + if err != nil { + return "", fmt.Errorf("error getting CPU C1 state residency for CPU ID %v: %w", cpuID, err) + } + c3State, err := pt.GetCPUC3StateResidency(cpuID) + if err != nil { + return "", fmt.Errorf("error getting CPU C3 state residency for CPU ID %v: %w", cpuID, err) + } + c6State, err := pt.GetCPUC6StateResidency(cpuID) + if err != nil { + return "", fmt.Errorf("error getting CPU C6 state residency for CPU ID %v: %w", cpuID, err) + } + c7State, err := pt.GetCPUC7StateResidency(cpuID) + if err != nil { + return "", fmt.Errorf("error getting CPU C7 state residency for CPU ID %v: %w", cpuID, err) + } + busyFreq, err := pt.GetCPUBusyFrequencyMhz(cpuID) + if err != nil { + return "", fmt.Errorf("error getting CPU busy frequency for CPU ID %v: %w", cpuID, err) + } + return fmt.Sprintf("CPU ID: %v, C0[%%]: %.4f, C1[%%]: %.4f, C3[%%]: %.4f, C6[%%]: %.4f, C7[%%]: %.4f, Busy freq[MHz]: %.4f", + cpuID, c0State, c1State, c3State, c6State, c7State, busyFreq), nil + }(cpuID) + } + + err = printMetricsIteratively(ctx, logger, getCPUMSRMetrics) + if err != nil { + logger.Errorf("Error while getting CPU MSR metrics: %v", err) + } + + // + // Per package ID and die ID metrics + // + logger.Info("=== Per package ID metrics ===") + + // CPU base frequency metric + cpuBaseFreq, err := pt.GetCPUBaseFrequency(packageID) + if err != nil { + logger.Errorf("Error getting CPU base frequency: %v", err) + } else { + logger.Infof("Package ID: %v, CPU base frequency[MHz]: %v", packageID, cpuBaseFreq) + } + + // Package uncore frequency limits + // Package customized uncore maximum frequency + customizedMaxFreq, err := pt.GetCustomizedUncoreFrequencyMax(packageID, dieID) + if err != nil { + logger.Errorf("Error reading customized max frequency of packageID: %v, dieID: %v: %v", packageID, dieID, err) + } else { + logger.Infof("Package ID: %v, die ID: %v, customized uncore frequency max[MHz]: %v", packageID, dieID, customizedMaxFreq) + } + + // Package customized uncore minimum frequency + customizedMinFreq, err := pt.GetCustomizedUncoreFrequencyMin(packageID, dieID) + if err != nil { + logger.Errorf("Error reading customized min frequency of packageID: %v, dieID: %v: %v", packageID, dieID, err) + } else { + logger.Infof("Package ID: %v, die ID: %v, customized uncore frequency min[MHz]: %v", packageID, dieID, customizedMinFreq) + } + + // Package initial uncore maximum frequency + initialMaxFreq, err := pt.GetInitialUncoreFrequencyMax(packageID, dieID) + if err != nil { + logger.Errorf("Error reading initial max frequency of packageID: %v, dieID: %v: %v", packageID, dieID, err) + } else { + logger.Infof("Package ID: %v, die ID: %v, initial uncore frequency max[MHz]: %v", packageID, dieID, initialMaxFreq) + } + + // Package initial uncore minimum frequency + initialMinFreq, err := pt.GetInitialUncoreFrequencyMin(packageID, dieID) + if err != nil { + logger.Errorf("Error reading initial min frequency of packageID: %v, dieID: %v: %v", packageID, dieID, err) + } else { + logger.Infof("Package ID: %v, die ID: %v, initial uncore frequency min[MHz]: %v", packageID, dieID, initialMinFreq) + } + + // Package current uncore frequency metric + currentFreq, err := pt.GetCurrentUncoreFrequency(packageID, dieID) + if err != nil { + logger.Errorf("Error reading current frequency of packageID: %v, dieID: %v: %v", packageID, dieID, err) + } else { + logger.Infof("Package ID: %v, die ID: %v, current uncore frequency[MHz]: %v", packageID, dieID, currentFreq) + } + + // Package thermal design power metric + thermalDesignPower, err := pt.GetPackageThermalDesignPowerWatts(packageID) + if err != nil { + logger.Errorf("Error getting thermal design power for package ID %v: %v", packageID, err) + } else { + logger.Infof("Package ID: %v, thermal design power[W]: %v", packageID, thermalDesignPower) + } + + maxTurboFreqList, err := pt.GetMaxTurboFreqList(packageID) + if err != nil { + logger.Errorf("Error getting max turbo frequency limit list: %v", err) + } else { + for _, v := range maxTurboFreqList { + str := fmt.Sprintf("Package ID: %v, die ID: %v, max turbo frequency: %v MHz, active cores: %v", packageID, dieID, v.Value, v.ActiveCores) + if v.Secondary { + str += ", secondary" + } + logger.Info(str) + } + } + + // + // Current power consumption metrics: + // + // * Package current power consumption + // * Package DRAM current power consumption + // + logger.Info("=== Current Power Consumption ===") + + getPowerConsumptionMetrics := func() (string, error) { + return func(packageID int) (string, error) { + packageCurrPower, err := pt.GetCurrentPackagePowerConsumptionWatts(packageID) + if err != nil { + return "", fmt.Errorf("error getting package power consumption for package ID %v: %w", packageID, err) + } + dramCurrPower, err := pt.GetCurrentDramPowerConsumptionWatts(packageID) + if err != nil { + return "", fmt.Errorf("error getting dram power consumption for package ID %v: %w", packageID, err) + } + return fmt.Sprintf("PackageID: %v, package[W]: %.4f, dram[W]: %.4f", packageID, packageCurrPower, dramCurrPower), nil + }(packageID) + } + + err = printMetricsIteratively(ctx, logger, getPowerConsumptionMetrics) + if err != nil { + logger.Errorf("Error while getting power consumption metrics: %v", err) + } + + // CPU flag support + supported, err := pt.IsFlagSupported("msr") + if err != nil { + logger.Errorf("Error while checking if flag is supported by first CPU: %v", err) + } else { + logger.Infof("Is flag 'msr' supported for first CPU: %t", supported) + } +} + +func printMetricsIteratively(ctx context.Context, logger simpleLogger, getMetrics func() (string, error)) error { + tInterval := time.NewTicker(interval) + tDuration := time.NewTicker(duration) + count := 0 + + for { + select { + case <-ctx.Done(): + tInterval.Stop() + tDuration.Stop() + return ctx.Err() + case <-tDuration.C: + tInterval.Stop() + tDuration.Stop() + return nil + case <-tInterval.C: + count++ + if line, err := getMetrics(); err != nil { + logger.Errorf("Error fetching metrics: %v", err) + } else { + logger.Infof("Sample %v: %q", count, line) + } + } + } +} + +type simpleLogger struct { +} + +func (l *simpleLogger) Debugf(format string, args ...interface{}) { + log.Printf("D! "+format, args...) +} + +func (l *simpleLogger) Debug(args ...interface{}) { + log.Print(append([]interface{}{"D! "}, args...)...) +} + +func (l *simpleLogger) Infof(format string, args ...interface{}) { + log.Printf("I! "+format, args...) +} + +func (l *simpleLogger) Info(args ...interface{}) { + log.Print(append([]interface{}{"I! "}, args...)...) +} + +func (l *simpleLogger) Warnf(format string, args ...interface{}) { + log.Printf("W! "+format, args...) +} + +func (l *simpleLogger) Warn(args ...interface{}) { + log.Print(append([]interface{}{"W! "}, args...)...) +} + +func (l *simpleLogger) Errorf(format string, args ...interface{}) { + log.Printf("E! "+format, args...) +} + +func (l *simpleLogger) Error(args ...interface{}) { + log.Print(append([]interface{}{"E! "}, args...)...) +} diff --git a/cpufreq.go b/cpufreq.go new file mode 100644 index 0000000..56c10fe --- /dev/null +++ b/cpufreq.go @@ -0,0 +1,67 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +import ( + "fmt" + "path/filepath" + "strconv" + "strings" +) + +const ( + // Path to folder with a collection of both global and individual CPU attributes. + defaultCPUFreqBasePath = "/sys/devices/system/cpu" + + // Path to a file which frequency provides the current operating frequency of the CPU. + cpuFrequencyPath = "cpufreq/scaling_cur_freq" +) + +// cpuFreqReader represents a mechanism for reading core frequency values exposed via filesystem. +type cpuFreqReader interface { + init() error + + getCPUFrequencyMhz(cpuID int) (float64, error) +} + +// cpuFreqData allows to get core frequency values exposed via filesystem. Implements cpuFreqReader interface. +type cpuFreqData struct { + cpuFrequencyFilePath string +} + +// getCPUFrequencyMhz returns CPU's current frequency read from a file. +func (c *cpuFreqData) getCPUFrequencyMhz(cpuID int) (float64, error) { + cpuFrequencyFile := c.getCPUFrequencyFilePath(cpuID) + + fileContent, err := readFile(cpuFrequencyFile) + if err != nil { + return 0, fmt.Errorf("error reading file %q: %w", cpuFrequencyFile, err) + } + + cpuFrequency, err := strconv.ParseFloat(strings.TrimRight(string(fileContent), "\n"), 64) + if err != nil { + return 0, fmt.Errorf("error while converting value from file %q: %w", cpuFrequencyFile, err) + } + return cpuFrequency * fromKiloHertzToMegaHertzRatio, nil +} + +// init checks if cpuFrequencyFilePath is a valid path. +// TODO: Consider to remove this method. +func (c *cpuFreqData) init() error { + if len(c.cpuFrequencyFilePath) == 0 { + return fmt.Errorf("base path of CPU core frequency cannot be empty") + } + if err := checkFile(c.cpuFrequencyFilePath); err != nil { + return fmt.Errorf("invalid base path of CPU core frequency: %w", err) + } + return nil +} + +// getCPUFrequencyFilePath returns the file path, from which the CPU's current frequency can be read. +func (c *cpuFreqData) getCPUFrequencyFilePath(cpuID int) string { + cpuFrequencyFilePath := filepath.Join(c.cpuFrequencyFilePath, "cpu%d", cpuFrequencyPath) + return fmt.Sprintf(cpuFrequencyFilePath, cpuID) +} diff --git a/cpufreq_test.go b/cpufreq_test.go new file mode 100644 index 0000000..5dd962e --- /dev/null +++ b/cpufreq_test.go @@ -0,0 +1,99 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +import ( + "errors" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestGetCPUFrequencyMhz(t *testing.T) { + testCases := []struct { + name string + baseCPUFrequencyPath string + cpuID int + expected float64 + err error + }{ + { + name: "Correct value of cpu's frequency has been returned.", + baseCPUFrequencyPath: "testdata/cpu-freq", + cpuID: 0, + expected: 888.888, + err: nil, + }, + { + name: "NonNumericContent", + baseCPUFrequencyPath: "testdata/cpu-freq-invalid", + cpuID: 0, + expected: 0, + err: errors.New("error while converting value from file \"testdata/cpu-freq-invalid/cpu0/cpufreq/scaling_cur_freq\""), + }, + { + name: "InvalidPath", + baseCPUFrequencyPath: "testdata/cpu-freq-invalid-path", + cpuID: 0, + expected: 0, + err: errors.New("error reading file \"testdata/cpu-freq-invalid-path/cpu0" + + "/cpufreq/scaling_cur_freq\": file \"testdata/cpu-freq-invalid-path/cpu0/cpufreq/scaling_cur_freq\" does not exist"), + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + c := cpuFreqData{ + cpuFrequencyFilePath: tc.baseCPUFrequencyPath, + } + cpuFrequencyValue, err := c.getCPUFrequencyMhz(tc.cpuID) + if tc.err != nil { + require.Error(t, err) + require.ErrorContains(t, err, tc.err.Error()) + } else { + require.NoError(t, err) + require.Equal(t, tc.expected, cpuFrequencyValue) + } + }) + } +} + +func TestCPUFreqData_Init(t *testing.T) { + testCases := []struct { + name string + cpuFreqPath string + err error + }{ + { + name: "Initialized", + cpuFreqPath: "testdata/cpu-freq", + err: nil, + }, + { + name: "EmptyString", + cpuFreqPath: "", + err: errors.New("base path of CPU core frequency cannot be empty"), + }, + { + name: "WrongPath", + cpuFreqPath: "/dummy/path", + err: errors.New("invalid base path of CPU core frequency"), + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + cpuFreq := &cpuFreqData{ + cpuFrequencyFilePath: tc.cpuFreqPath, + } + + err := cpuFreq.init() + if tc.err != nil { + require.ErrorContains(t, err, tc.err.Error()) + } else { + require.NoError(t, err) + } + }) + } +} diff --git a/errors.go b/errors.go new file mode 100644 index 0000000..80d47cb --- /dev/null +++ b/errors.go @@ -0,0 +1,47 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +import ( + "fmt" + "strings" +) + +// MultiError holds a slice of error descriptions. Implements Error interface. +// It is used to mark errors that happened during the initialization of PowerTelemetry dependencies. +type MultiError struct { + errs []string +} + +// add takes an error message and appends it to the receiver's slice of error descriptions. +func (e *MultiError) add(errMsg string) { + e.errs = append(e.errs, errMsg) +} + +// Error returns a string with all error descriptions. Implements error.Error. +func (e *MultiError) Error() string { + return strings.Join(e.errs, "; ") +} + +// ModuleNotInitializedError indicates that a module has not been initialized. +type ModuleNotInitializedError struct { + Name string //holds name of not initialized module +} + +// Error returns a reason of this error. +func (e *ModuleNotInitializedError) Error() string { + return fmt.Sprintf("module %q is not initialized", e.Name) +} + +// MetricNotSupportedError indicates that a metric is not supported. +type MetricNotSupportedError struct { + reason string +} + +// Error returns a reason of this error. +func (e *MetricNotSupportedError) Error() string { + return e.reason +} diff --git a/file.go b/file.go new file mode 100644 index 0000000..37bf428 --- /dev/null +++ b/file.go @@ -0,0 +1,78 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +import ( + "errors" + "fmt" + "io/fs" + "os" + "time" +) + +// readFile reads the contents of a file at the given path and returns them as a byte slice. +// If the file doesn't exist or can't be read, an error is returned. +func readFile(filePath string) ([]byte, error) { + // Check if the file exists and can be read. + if err := checkFile(filePath); err != nil { + return nil, err + } + + // Read the entire contents of the file into a byte slice. + fileContent, err := os.ReadFile(filePath) + if err != nil { + return nil, fmt.Errorf("error while reading file from path %q: %w", filePath, err) + } + + return fileContent, nil +} + +// readFileWithTimestamp reads the content of the given file specified as argument. +// If no error occurred, it returns a slice of bytes with file content and a timestamp. +// Otherwise, returns an error. +func readFileWithTimestamp(filePath string) ([]byte, time.Time, error) { + // Check if the file exists and can be read. + if err := checkFile(filePath); err != nil { + return nil, time.Time{}, err + } + + // Read the entire contents of the file into a byte slice. + timestamp := timeNowFn() + fileContent, err := os.ReadFile(filePath) + if err != nil { + return nil, time.Time{}, fmt.Errorf("error while reading file from path %q: %w", filePath, err) + } + return fileContent, timestamp, nil +} + +// checkFile is a helper function that returns nil if the given file path exists, +// and it is not a symlink. Otherwise, it returns an error. +func checkFile(path string) error { + if len(path) == 0 { + return errors.New("file path is empty") + } + fInfo, err := os.Lstat(path) + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + return fmt.Errorf("file %q does not exist", path) + } + return fmt.Errorf("could not get the info for file %q: %w", path, err) + } + if fMode := fInfo.Mode(); fMode&os.ModeSymlink != 0 { + return fmt.Errorf("file %q is a symlink", path) + } + return nil +} + +// fileExists checks if a file exists at the given filePath. +// It returns true if the file exists, and false otherwise. +func fileExists(filePath string) (bool, error) { + if len(filePath) == 0 { + return false, errors.New("file path is empty") + } + _, err := os.Stat(filePath) + return !os.IsNotExist(err), nil +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..ecabeb9 --- /dev/null +++ b/go.mod @@ -0,0 +1,27 @@ +module github.com/intel/powertelemetry + +go 1.21 + +require ( + github.com/intel/iaevents v1.1.0 + github.com/jmhodges/clock v1.2.0 + github.com/shirou/gopsutil/v3 v3.23.10 + github.com/stretchr/testify v1.8.4 + golang.org/x/sync v0.5.0 +) + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/go-ole/go-ole v1.2.6 // indirect + github.com/lufia/plan9stats v0.0.0-20230110061619-bbe2e5e100de // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/power-devops/perfstat v0.0.0-20221212215047-62379fc7944b // indirect + github.com/shoenig/go-m1cpu v0.1.6 // indirect + github.com/stretchr/objx v0.5.0 // indirect + github.com/tklauser/go-sysconf v0.3.12 // indirect + github.com/tklauser/numcpus v0.6.1 // indirect + github.com/yusufpapurcu/wmi v1.2.3 // indirect + golang.org/x/exp v0.0.0-20231006140011-7918f672742d // indirect + golang.org/x/sys v0.13.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..8027680 --- /dev/null +++ b/go.sum @@ -0,0 +1,62 @@ +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= +github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/intel/iaevents v1.1.0 h1:FzxMBfXk/apG2EUXUCfaq3gUQ+q+TgZ1HNMjjUILUGE= +github.com/intel/iaevents v1.1.0/go.mod h1:CyUUzXw0lHRCsmyyF7Pwco9Y7NiTNQUUlcJ7RJAazKs= +github.com/jmhodges/clock v1.2.0 h1:eq4kys+NI0PLngzaHEe7AmPT90XMGIEySD1JfV1PDIs= +github.com/jmhodges/clock v1.2.0/go.mod h1:qKjhA7x7u/lQpPB1XAqX1b1lCI/w3/fNuYpI/ZjLynI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= +github.com/lufia/plan9stats v0.0.0-20230110061619-bbe2e5e100de h1:V53FWzU6KAZVi1tPp5UIsMoUWJ2/PNwYIDXnu7QuBCE= +github.com/lufia/plan9stats v0.0.0-20230110061619-bbe2e5e100de/go.mod h1:JKx41uQRwqlTZabZc+kILPrO/3jlKnQ2Z8b7YiVw5cE= +github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= +github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= +github.com/power-devops/perfstat v0.0.0-20221212215047-62379fc7944b h1:0LFwY6Q3gMACTjAbMZBjXAqTOzOwFaj2Ld6cjeQ7Rig= +github.com/power-devops/perfstat v0.0.0-20221212215047-62379fc7944b/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= +github.com/shirou/gopsutil/v3 v3.23.10 h1:/N42opWlYzegYaVkWejXWJpbzKv2JDy3mrgGzKsh9hM= +github.com/shirou/gopsutil/v3 v3.23.10/go.mod h1:JIE26kpucQi+innVlAUnIEOSBhBUkirr5b44yr55+WE= +github.com/shoenig/go-m1cpu v0.1.6 h1:nxdKQNcEB6vzgA2E2bvzKIYRuNj7XNJ4S/aRSwKzFtM= +github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ= +github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU= +github.com/shoenig/test v0.6.4/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU= +github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI= +github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk= +github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY= +github.com/yusufpapurcu/wmi v1.2.3 h1:E1ctvB7uKFMOJw3fdOW32DwGE9I7t++CRUEMKvFoFiw= +github.com/yusufpapurcu/wmi v1.2.3/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= +golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI= +golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo= +golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE= +golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= +golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU= +gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/cpuid/cpuid_count_amd64.go b/internal/cpuid/cpuid_count_amd64.go new file mode 100644 index 0000000..d5a773c --- /dev/null +++ b/internal/cpuid/cpuid_count_amd64.go @@ -0,0 +1,9 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package cpuid + +//nolint:revive // This is to keep the function name same as in Linux kernel sources +func cpuid_count(level, count uint32) (eax, ebx, ecx, edx uint32) // implemented in cpuid_count_amd64.s diff --git a/internal/cpuid/cpuid_count_amd64.s b/internal/cpuid/cpuid_count_amd64.s new file mode 100644 index 0000000..34f2d61 --- /dev/null +++ b/internal/cpuid/cpuid_count_amd64.s @@ -0,0 +1,15 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "textflag.h" + +// func cpuid_count(level, count uint32) (eax, ebx, ecx, edx uint32) +TEXT ·cpuid_count(SB), NOSPLIT, $0-24 + MOVL level+0(FP), AX + MOVL count+4(FP), CX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET diff --git a/internal/cpuid/hybrid.go b/internal/cpuid/hybrid.go new file mode 100644 index 0000000..4281718 --- /dev/null +++ b/internal/cpuid/hybrid.go @@ -0,0 +1,28 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package cpuid + +var ( + cpuIsHybrid bool + cpuIsHybridCheckedOnce bool +) + +// IsCPUHybrid checks if CPU is hybrid (Alder Lake, Raptor Lake, Meteor Lake, etc.) +// The function ensures the actual cpuid reading is done only once. +// The cpuid value handling is aligned with the process_cpuid() function of turbostat. +func IsCPUHybrid() bool { + if !cpuIsHybridCheckedOnce { + cpuIsHybridCheckedOnce = true + maxLevel, _, _, _ := cpuid_count(0, 0) + if maxLevel >= 0x7 { + _, _, _, edx := cpuid_count(7, 0) + if (edx & (1 << 15)) != 0 { + cpuIsHybrid = true + } + } + } + return cpuIsHybrid +} diff --git a/internal/cpumodel/intel_family.go b/internal/cpumodel/intel_family.go new file mode 100644 index 0000000..714cf08 --- /dev/null +++ b/internal/cpumodel/intel_family.go @@ -0,0 +1,174 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package cpumodel + +// Model definitions for CPU models are taken from the following kernel source link: +// https://github.com/torvalds/linux/blob/master/arch/x86/include/asm/intel-family.h + +/* + * "Big Core" Processors (Branded as Core, Xeon, etc...) + * + * While adding a new CPUID for a new microarchitecture, add a new + * group to keep logically sorted out in chronological order. Within + * that group keep the CPUID for the variants sorted by model number. + * + * The defined symbol names have the following form: + * INTEL_FAM6{OPTFAMILY}_{MICROARCH}{OPTDIFF} + * where: + * OPTFAMILY Describes the family of CPUs that this belongs to. Default + * is assumed to be "_CORE" (and should be omitted). Other values + * currently in use are _ATOM and _XEON_PHI + * MICROARCH Is the code name for the micro-architecture for this core. + * N.B. Not the platform name. + * OPTDIFF If needed, a short string to differentiate by market segment. + * + * Common OPTDIFFs: + * + * - regular client parts + * _L - regular mobile parts + * _G - parts with extra graphics on + * _X - regular server parts + * _D - micro server parts + * _N,_P - other mobile parts + * _H - premium mobile parts + * _S - other client parts + * + * Historical OPTDIFFs: + * + * _EP - 2 socket server parts + * _EX - 4+ socket server parts + * + * Lines may optionally include a comment including platform or core + * names. An exception is made for skylake/kabylake where steppings seem to have gotten + * their own names :-( + */ + +//nolint:revive,godot // this is to keep CPU model definitions same as in Linux kernel sources +const ( + INTEL_FAM6_CORE_YONAH = 0x0E + + INTEL_FAM6_CORE2_MEROM = 0x0F + INTEL_FAM6_CORE2_MEROM_L = 0x16 + INTEL_FAM6_CORE2_PENRYN = 0x17 + INTEL_FAM6_CORE2_DUNNINGTON = 0x1D + + INTEL_FAM6_NEHALEM = 0x1E + INTEL_FAM6_NEHALEM_G = 0x1F /* Auburndale / Havendale */ + INTEL_FAM6_NEHALEM_EP = 0x1A + INTEL_FAM6_NEHALEM_EX = 0x2E + + INTEL_FAM6_WESTMERE = 0x25 + INTEL_FAM6_WESTMERE_EP = 0x2C + INTEL_FAM6_WESTMERE_EX = 0x2F + + INTEL_FAM6_SANDYBRIDGE = 0x2A + INTEL_FAM6_SANDYBRIDGE_X = 0x2D + INTEL_FAM6_IVYBRIDGE = 0x3A + INTEL_FAM6_IVYBRIDGE_X = 0x3E + + INTEL_FAM6_HASWELL = 0x3C + INTEL_FAM6_HASWELL_X = 0x3F + INTEL_FAM6_HASWELL_L = 0x45 + INTEL_FAM6_HASWELL_G = 0x46 + + INTEL_FAM6_BROADWELL = 0x3D + INTEL_FAM6_BROADWELL_G = 0x47 + INTEL_FAM6_BROADWELL_X = 0x4F + INTEL_FAM6_BROADWELL_D = 0x56 + + INTEL_FAM6_SKYLAKE_L = 0x4E /* Sky Lake */ + INTEL_FAM6_SKYLAKE = 0x5E /* Sky Lake */ + INTEL_FAM6_SKYLAKE_X = 0x55 /* Sky Lake */ + /* CASCADELAKE_X = 0x55 Sky Lake -- s: 7 */ + /* COOPERLAKE_X = 0x55 Sky Lake -- s: 11 */ + + INTEL_FAM6_KABYLAKE_L = 0x8E /* Sky Lake */ + /* AMBERLAKE_L = 0x8E Sky Lake -- s: 9 */ + /* COFFEELAKE_L = 0x8E Sky Lake -- s: 10 */ + /* WHISKEYLAKE_L = 0x8E Sky Lake -- s: 11,12 */ + + INTEL_FAM6_KABYLAKE = 0x9E /* Sky Lake */ + /* COFFEELAKE = 0x9E Sky Lake -- s: 10-13 */ + + INTEL_FAM6_COMETLAKE = 0xA5 /* Sky Lake */ + INTEL_FAM6_COMETLAKE_L = 0xA6 /* Sky Lake */ + + INTEL_FAM6_CANNONLAKE_L = 0x66 /* Palm Cove */ + + INTEL_FAM6_ICELAKE_X = 0x6A /* Sunny Cove */ + INTEL_FAM6_ICELAKE_D = 0x6C /* Sunny Cove */ + INTEL_FAM6_ICELAKE = 0x7D /* Sunny Cove */ + INTEL_FAM6_ICELAKE_L = 0x7E /* Sunny Cove */ + INTEL_FAM6_ICELAKE_NNPI = 0x9D /* Sunny Cove */ + + INTEL_FAM6_ROCKETLAKE = 0xA7 /* Cypress Cove */ + + INTEL_FAM6_TIGERLAKE_L = 0x8C /* Willow Cove */ + INTEL_FAM6_TIGERLAKE = 0x8D /* Willow Cove */ + + INTEL_FAM6_SAPPHIRERAPIDS_X = 0x8F /* Golden Cove */ + + INTEL_FAM6_EMERALDRAPIDS_X = 0xCF + + INTEL_FAM6_GRANITERAPIDS_X = 0xAD + INTEL_FAM6_GRANITERAPIDS_D = 0xAE + + /* "Hybrid" Processors (P-Core/E-Core) */ + + INTEL_FAM6_LAKEFIELD = 0x8A /* Sunny Cove / Tremont */ + + INTEL_FAM6_ALDERLAKE = 0x97 /* Golden Cove / Gracemont */ + INTEL_FAM6_ALDERLAKE_L = 0x9A /* Golden Cove / Gracemont */ + + INTEL_FAM6_RAPTORLAKE = 0xB7 /* Raptor Cove / Enhanced Gracemont */ + INTEL_FAM6_RAPTORLAKE_P = 0xBA + INTEL_FAM6_RAPTORLAKE_S = 0xBF + + INTEL_FAM6_METEORLAKE = 0xAC + INTEL_FAM6_METEORLAKE_L = 0xAA + + INTEL_FAM6_ARROWLAKE_H = 0xC5 + INTEL_FAM6_ARROWLAKE = 0xC6 + + INTEL_FAM6_LUNARLAKE_M = 0xBD + + /* "Small Core" Processors (Atom/E-Core) */ + + INTEL_FAM6_ATOM_BONNELL = 0x1C /* Diamondville, Pineview */ + INTEL_FAM6_ATOM_BONNELL_MID = 0x26 /* Silverthorne, Lincroft */ + + INTEL_FAM6_ATOM_SALTWELL = 0x36 /* Cedarview */ + INTEL_FAM6_ATOM_SALTWELL_MID = 0x27 /* Penwell */ + INTEL_FAM6_ATOM_SALTWELL_TABLET = 0x35 /* Cloverview */ + + INTEL_FAM6_ATOM_SILVERMONT = 0x37 /* Bay Trail, Valleyview */ + INTEL_FAM6_ATOM_SILVERMONT_D = 0x4D /* Avaton, Rangely */ + INTEL_FAM6_ATOM_SILVERMONT_MID = 0x4A /* Merriefield */ + INTEL_FAM6_ATOM_SILVERMONT_SMARTPHONE = 0x5A // INTEL_FAM6_ATOM_AIRMONT_MID in turbostat + + INTEL_FAM6_ATOM_AIRMONT = 0x4C /* Cherry Trail, Braswell */ + INTEL_FAM6_ATOM_AIRMONT_NP = 0x75 /* Lightning Mountain */ + + INTEL_FAM6_ATOM_GOLDMONT = 0x5C /* Apollo Lake */ + INTEL_FAM6_ATOM_GOLDMONT_D = 0x5F /* Denverton */ + + /* Note: the micro-architecture is "Goldmont Plus" */ + INTEL_FAM6_ATOM_GOLDMONT_PLUS = 0x7A /* Gemini Lake */ + + INTEL_FAM6_ATOM_TREMONT_D = 0x86 /* Jacobsville */ + INTEL_FAM6_ATOM_TREMONT = 0x96 /* Elkhart Lake */ + INTEL_FAM6_ATOM_TREMONT_L = 0x9C /* Jasper Lake */ + + INTEL_FAM6_ATOM_GRACEMONT = 0xBE /* Alderlake N */ + + INTEL_FAM6_ATOM_CRESTMONT_X = 0xAF /* Sierra Forest */ + INTEL_FAM6_ATOM_CRESTMONT = 0xB6 /* Grand Ridge */ + + /* Xeon Phi */ + + INTEL_FAM6_XEON_PHI_KNL = 0x57 /* Knights Landing */ + INTEL_FAM6_XEON_PHI_KNM = 0x85 /* Knights Mill */ +) diff --git a/internal/log/default.go b/internal/log/default.go new file mode 100644 index 0000000..52eb457 --- /dev/null +++ b/internal/log/default.go @@ -0,0 +1,18 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package log + +// defaultLogger defines a default no-op logging structure. +type defaultLogger struct{} + +func (l *defaultLogger) Errorf(_ string, _ ...interface{}) {} +func (l *defaultLogger) Error(_ ...interface{}) {} +func (l *defaultLogger) Debugf(_ string, _ ...interface{}) {} +func (l *defaultLogger) Debug(_ ...interface{}) {} +func (l *defaultLogger) Warnf(_ string, _ ...interface{}) {} +func (l *defaultLogger) Warn(_ ...interface{}) {} +func (l *defaultLogger) Infof(_ string, _ ...interface{}) {} +func (l *defaultLogger) Info(_ ...interface{}) {} diff --git a/internal/log/logger.go b/internal/log/logger.go new file mode 100644 index 0000000..1f24460 --- /dev/null +++ b/internal/log/logger.go @@ -0,0 +1,71 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package log + +// Logger defines an interface for logging. +type Logger interface { + Errorf(format string, args ...interface{}) + Error(args ...interface{}) + Debugf(format string, args ...interface{}) + Debug(args ...interface{}) + Warnf(format string, args ...interface{}) + Warn(args ...interface{}) + Infof(format string, args ...interface{}) + Info(args ...interface{}) +} + +// log defines a variable that stores the actual logger pointer. +var log Logger = &defaultLogger{} + +// SetLogger sets a user provided logger structure to be used to log messages. +// If the provided logger is a nil pointer, a default no-op logger will be set. +func SetLogger(l Logger) { + if l != nil { + log = l + } else { + log = &defaultLogger{} + } +} + +// Errorf logs an error message. +func Errorf(format string, args ...interface{}) { + log.Errorf(format, args...) +} + +// Error logs an error message. +func Error(args ...interface{}) { + log.Error(args...) +} + +// Debugf logs a debug message. +func Debugf(format string, args ...interface{}) { + log.Debugf(format, args...) +} + +// Debug logs a debug message. +func Debug(args ...interface{}) { + log.Debug(args...) +} + +// Warnf logs a warning message. +func Warnf(format string, args ...interface{}) { + log.Warnf(format, args...) +} + +// Warn logs a warning message. +func Warn(args ...interface{}) { + log.Warn(args...) +} + +// Infof logs an information message. +func Infof(format string, args ...interface{}) { + log.Infof(format, args...) +} + +// Info logs an information message. +func Info(args ...interface{}) { + log.Info(args...) +} diff --git a/internal/version/version.go b/internal/version/version.go new file mode 100644 index 0000000..fb6573c --- /dev/null +++ b/internal/version/version.go @@ -0,0 +1,42 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package version + +import ( + "fmt" + "strings" +) + +// Set via LDFLAGS -X. +var ( + LibName = "powertelemetry" + Version = "unknown" + Branch = "" + Commit = "" +) + +func GetFullVersion() string { + var parts = []string{LibName} + + if Version != "" { + parts = append(parts, Version) + } else { + parts = append(parts, "unknown") + } + + if Branch != "" || Commit != "" { + if Branch == "" { + Branch = "unknown" + } + if Commit == "" { + Commit = "unknown" + } + git := fmt.Sprintf("(git: %s@%s)", Branch, Commit) + parts = append(parts, git) + } + + return strings.Join(parts, " ") +} diff --git a/metrics.go b/metrics.go new file mode 100644 index 0000000..61b822f --- /dev/null +++ b/metrics.go @@ -0,0 +1,225 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +import ( + "fmt" + + "github.com/intel/powertelemetry/internal/cpumodel" +) + +// CheckIfCPUC1StateResidencySupported checks if CPU C1 state residency metric is supported by CPU model. +// Returns MetricNotSupportedError if metric is not supported by the CPU model; otherwise, returns nil. +func CheckIfCPUC1StateResidencySupported(cpuModel int) error { + if !isC1C6BaseTempSupported(cpuModel) { + return &MetricNotSupportedError{fmt.Sprintf("c1 state residency metric not supported by CPU model: 0x%X", cpuModel)} + } + + return nil +} + +// CheckIfCPUC3StateResidencySupported checks if CPU C3 state residency metric is supported by CPU model. +// Returns MetricNotSupportedError if metric is not supported by the CPU model; otherwise, returns nil. +func CheckIfCPUC3StateResidencySupported(cpuModel int) error { + if !isC3Supported(cpuModel) { + return &MetricNotSupportedError{fmt.Sprintf("c3 state residency metric not supported by CPU model: 0x%X", cpuModel)} + } + + return nil +} + +// CheckIfCPUC6StateResidencySupported checks if CPU C6 state residency metric is supported by CPU model. +// Returns MetricNotSupportedError if metric is not supported by the CPU model; otherwise, returns nil. +func CheckIfCPUC6StateResidencySupported(cpuModel int) error { + if !isC1C6BaseTempSupported(cpuModel) { + return &MetricNotSupportedError{fmt.Sprintf("c6 state residency metric not supported by CPU model: 0x%X", cpuModel)} + } + + return nil +} + +// CheckIfCPUC7StateResidencySupported checks if CPU C7 state residency metric is supported by CPU model. +// Returns MetricNotSupportedError if metric is not supported by the CPU model; otherwise, returns nil. +func CheckIfCPUC7StateResidencySupported(cpuModel int) error { + if !isC7Supported(cpuModel) { + return &MetricNotSupportedError{fmt.Sprintf("c7 state residency metric not supported by CPU model: 0x%X", cpuModel)} + } + + return nil +} + +// CheckIfCPUBaseFrequencySupported checks if CPU base frequency metric is supported by CPU model. +// Returns MetricNotSupportedError if metric is not supported by the CPU model; otherwise, returns nil. +func CheckIfCPUBaseFrequencySupported(cpuModel int) error { + if !isC1C6BaseTempSupported(cpuModel) { + return &MetricNotSupportedError{fmt.Sprintf("cpu base frequency metric not supported by CPU model: 0x%X", cpuModel)} + } + + return nil +} + +// CheckIfCPUTemperatureSupported checks if CPU temperature metric is supported by CPU model. +// Returns MetricNotSupportedError if metric is not supported by the CPU model; otherwise, returns nil. +func CheckIfCPUTemperatureSupported(cpuModel int) error { + if !isC1C6BaseTempSupported(cpuModel) { + return &MetricNotSupportedError{fmt.Sprintf("cpu temperature metric not supported by CPU model: 0x%X", cpuModel)} + } + + return nil +} + +func isC1C6BaseTempSupported(cpuModel int) bool { + switch cpuModel { + case + cpumodel.INTEL_FAM6_NEHALEM, + cpumodel.INTEL_FAM6_NEHALEM_G, + cpumodel.INTEL_FAM6_NEHALEM_EP, + cpumodel.INTEL_FAM6_NEHALEM_EX, + cpumodel.INTEL_FAM6_WESTMERE, + cpumodel.INTEL_FAM6_WESTMERE_EP, + cpumodel.INTEL_FAM6_WESTMERE_EX, + cpumodel.INTEL_FAM6_SANDYBRIDGE, + cpumodel.INTEL_FAM6_SANDYBRIDGE_X, + cpumodel.INTEL_FAM6_IVYBRIDGE, + cpumodel.INTEL_FAM6_IVYBRIDGE_X, + cpumodel.INTEL_FAM6_HASWELL, + cpumodel.INTEL_FAM6_HASWELL_X, + cpumodel.INTEL_FAM6_HASWELL_L, + cpumodel.INTEL_FAM6_HASWELL_G, + cpumodel.INTEL_FAM6_BROADWELL, + cpumodel.INTEL_FAM6_BROADWELL_G, + cpumodel.INTEL_FAM6_BROADWELL_X, + cpumodel.INTEL_FAM6_BROADWELL_D, + cpumodel.INTEL_FAM6_SKYLAKE_L, + cpumodel.INTEL_FAM6_SKYLAKE, + cpumodel.INTEL_FAM6_SKYLAKE_X, + cpumodel.INTEL_FAM6_KABYLAKE_L, + cpumodel.INTEL_FAM6_KABYLAKE, + cpumodel.INTEL_FAM6_COMETLAKE, + cpumodel.INTEL_FAM6_COMETLAKE_L, + cpumodel.INTEL_FAM6_CANNONLAKE_L, + cpumodel.INTEL_FAM6_ICELAKE_X, + cpumodel.INTEL_FAM6_ICELAKE_D, + cpumodel.INTEL_FAM6_ICELAKE, + cpumodel.INTEL_FAM6_ICELAKE_L, + cpumodel.INTEL_FAM6_ICELAKE_NNPI, + cpumodel.INTEL_FAM6_ROCKETLAKE, + cpumodel.INTEL_FAM6_TIGERLAKE_L, + cpumodel.INTEL_FAM6_TIGERLAKE, + cpumodel.INTEL_FAM6_SAPPHIRERAPIDS_X, + cpumodel.INTEL_FAM6_EMERALDRAPIDS_X, + cpumodel.INTEL_FAM6_GRANITERAPIDS_X, + cpumodel.INTEL_FAM6_LAKEFIELD, + cpumodel.INTEL_FAM6_ALDERLAKE, + cpumodel.INTEL_FAM6_ALDERLAKE_L, + cpumodel.INTEL_FAM6_RAPTORLAKE, + cpumodel.INTEL_FAM6_RAPTORLAKE_P, + cpumodel.INTEL_FAM6_RAPTORLAKE_S, + cpumodel.INTEL_FAM6_METEORLAKE, + cpumodel.INTEL_FAM6_METEORLAKE_L, + cpumodel.INTEL_FAM6_ARROWLAKE, + cpumodel.INTEL_FAM6_LUNARLAKE_M, + cpumodel.INTEL_FAM6_ATOM_SILVERMONT, + cpumodel.INTEL_FAM6_ATOM_SILVERMONT_D, + cpumodel.INTEL_FAM6_ATOM_SILVERMONT_MID, + cpumodel.INTEL_FAM6_ATOM_SILVERMONT_SMARTPHONE, + cpumodel.INTEL_FAM6_ATOM_AIRMONT, + cpumodel.INTEL_FAM6_ATOM_GOLDMONT, + cpumodel.INTEL_FAM6_ATOM_GOLDMONT_D, + cpumodel.INTEL_FAM6_ATOM_GOLDMONT_PLUS, + cpumodel.INTEL_FAM6_ATOM_TREMONT_D, + cpumodel.INTEL_FAM6_ATOM_TREMONT, + cpumodel.INTEL_FAM6_ATOM_TREMONT_L, + cpumodel.INTEL_FAM6_ATOM_GRACEMONT, + cpumodel.INTEL_FAM6_ATOM_CRESTMONT_X, + cpumodel.INTEL_FAM6_ATOM_CRESTMONT, + cpumodel.INTEL_FAM6_XEON_PHI_KNL, + cpumodel.INTEL_FAM6_XEON_PHI_KNM: + return true + } + return false +} + +func isC3Supported(cpuModel int) bool { + switch cpuModel { + case + cpumodel.INTEL_FAM6_NEHALEM, + cpumodel.INTEL_FAM6_NEHALEM_G, + cpumodel.INTEL_FAM6_NEHALEM_EP, + cpumodel.INTEL_FAM6_NEHALEM_EX, + cpumodel.INTEL_FAM6_WESTMERE, + cpumodel.INTEL_FAM6_WESTMERE_EP, + cpumodel.INTEL_FAM6_WESTMERE_EX, + cpumodel.INTEL_FAM6_SANDYBRIDGE, + cpumodel.INTEL_FAM6_SANDYBRIDGE_X, + cpumodel.INTEL_FAM6_IVYBRIDGE, + cpumodel.INTEL_FAM6_IVYBRIDGE_X, + cpumodel.INTEL_FAM6_HASWELL, + cpumodel.INTEL_FAM6_HASWELL_X, + cpumodel.INTEL_FAM6_HASWELL_L, + cpumodel.INTEL_FAM6_HASWELL_G, + cpumodel.INTEL_FAM6_BROADWELL, + cpumodel.INTEL_FAM6_BROADWELL_G, + cpumodel.INTEL_FAM6_BROADWELL_X, + cpumodel.INTEL_FAM6_BROADWELL_D, + cpumodel.INTEL_FAM6_SKYLAKE_L, + cpumodel.INTEL_FAM6_SKYLAKE, + cpumodel.INTEL_FAM6_KABYLAKE_L, + cpumodel.INTEL_FAM6_KABYLAKE, + cpumodel.INTEL_FAM6_COMETLAKE, + cpumodel.INTEL_FAM6_COMETLAKE_L, + cpumodel.INTEL_FAM6_ATOM_AIRMONT, + cpumodel.INTEL_FAM6_ATOM_GOLDMONT, + cpumodel.INTEL_FAM6_ATOM_GOLDMONT_PLUS: + return true + } + return false +} + +func isC7Supported(cpuModel int) bool { + switch cpuModel { + case + cpumodel.INTEL_FAM6_SANDYBRIDGE, + cpumodel.INTEL_FAM6_SANDYBRIDGE_X, + cpumodel.INTEL_FAM6_IVYBRIDGE, + cpumodel.INTEL_FAM6_IVYBRIDGE_X, + cpumodel.INTEL_FAM6_HASWELL, + cpumodel.INTEL_FAM6_HASWELL_X, + cpumodel.INTEL_FAM6_HASWELL_L, + cpumodel.INTEL_FAM6_HASWELL_G, + cpumodel.INTEL_FAM6_BROADWELL, + cpumodel.INTEL_FAM6_BROADWELL_G, + cpumodel.INTEL_FAM6_SKYLAKE_L, + cpumodel.INTEL_FAM6_SKYLAKE, + cpumodel.INTEL_FAM6_KABYLAKE_L, + cpumodel.INTEL_FAM6_KABYLAKE, + cpumodel.INTEL_FAM6_COMETLAKE, + cpumodel.INTEL_FAM6_COMETLAKE_L, + cpumodel.INTEL_FAM6_CANNONLAKE_L, + cpumodel.INTEL_FAM6_ICELAKE_L, + cpumodel.INTEL_FAM6_ICELAKE_NNPI, + cpumodel.INTEL_FAM6_ROCKETLAKE, + cpumodel.INTEL_FAM6_TIGERLAKE_L, + cpumodel.INTEL_FAM6_TIGERLAKE, + cpumodel.INTEL_FAM6_LAKEFIELD, + cpumodel.INTEL_FAM6_ALDERLAKE, + cpumodel.INTEL_FAM6_ALDERLAKE_L, + cpumodel.INTEL_FAM6_RAPTORLAKE, + cpumodel.INTEL_FAM6_RAPTORLAKE_P, + cpumodel.INTEL_FAM6_RAPTORLAKE_S, + cpumodel.INTEL_FAM6_METEORLAKE, + cpumodel.INTEL_FAM6_METEORLAKE_L, + cpumodel.INTEL_FAM6_ARROWLAKE, + cpumodel.INTEL_FAM6_LUNARLAKE_M, + cpumodel.INTEL_FAM6_ATOM_GOLDMONT, + cpumodel.INTEL_FAM6_ATOM_GOLDMONT_PLUS, + cpumodel.INTEL_FAM6_ATOM_TREMONT, + cpumodel.INTEL_FAM6_ATOM_TREMONT_L, + cpumodel.INTEL_FAM6_ATOM_GRACEMONT: + return true + } + return false +} diff --git a/metrics_test.go b/metrics_test.go new file mode 100644 index 0000000..0050291 --- /dev/null +++ b/metrics_test.go @@ -0,0 +1,255 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestCheckIfCPUC1StateResidencySupported(t *testing.T) { + m := make(map[int]interface{}) + for _, v := range c1c6BaseTempModels { + m[v] = struct{}{} + } + + for model := 0; model < 0xFF; model++ { + err := CheckIfCPUC1StateResidencySupported(model) + if m[model] != nil { + require.NoError(t, err, "CPU model 0x%X should support c1 state residency", model) + } else { + require.ErrorContains(t, err, fmt.Sprintf("c1 state residency metric not supported by CPU model: 0x%X", model), + "CPU model 0x%X shouldn't support c1 state residency", model) + } + } +} + +func TestCheckIfCPUC3StateResidencySupported(t *testing.T) { + m := make(map[int]interface{}) + for _, v := range c3Models { + m[v] = struct{}{} + } + + for model := 0; model < 0xFF; model++ { + err := CheckIfCPUC3StateResidencySupported(model) + if m[model] != nil { + require.NoError(t, err, "CPU model 0x%X should support c3 state residency", model) + } else { + require.ErrorContains(t, err, fmt.Sprintf("c3 state residency metric not supported by CPU model: 0x%X", model), + "CPU model 0x%X shouldn't support c3 state residency", model) + } + } +} + +func TestCheckIfCPUC6StateResidencySupported(t *testing.T) { + m := make(map[int]interface{}) + for _, v := range c1c6BaseTempModels { + m[v] = struct{}{} + } + + for model := 0; model < 0xFF; model++ { + err := CheckIfCPUC6StateResidencySupported(model) + if m[model] != nil { + require.NoError(t, err, "CPU model 0x%X should support c6 state residency", model) + } else { + require.ErrorContains(t, err, fmt.Sprintf("c6 state residency metric not supported by CPU model: 0x%X", model), + "CPU model 0x%X shouldn't support c6 state residency", model) + } + } +} + +func TestCheckIfCPUC7StateResidencySupported(t *testing.T) { + m := make(map[int]interface{}) + for _, v := range c7Models { + m[v] = struct{}{} + } + + for model := 0; model < 0xFF; model++ { + err := CheckIfCPUC7StateResidencySupported(model) + if m[model] != nil { + require.NoError(t, err, "CPU model 0x%X should support c7 state residency", model) + } else { + require.ErrorContains(t, err, fmt.Sprintf("c7 state residency metric not supported by CPU model: 0x%X", model), + "CPU model 0x%X shouldn't support c7 state residency", model) + } + } +} + +func TestCheckIfCPUBaseFrequencySupported(t *testing.T) { + m := make(map[int]interface{}) + for _, v := range c1c6BaseTempModels { + m[v] = struct{}{} + } + + for model := 0; model < 0xFF; model++ { + err := CheckIfCPUBaseFrequencySupported(model) + if m[model] != nil { + require.NoError(t, err, "CPU model 0x%X should support cpu base frequency", model) + } else { + require.ErrorContains(t, err, fmt.Sprintf("cpu base frequency metric not supported by CPU model: 0x%X", model), + "CPU model 0x%X shouldn't support cpu base frequency", model) + } + } +} + +func TestCheckIfCPUTemperatureSupported(t *testing.T) { + m := make(map[int]interface{}) + for _, v := range c1c6BaseTempModels { + m[v] = struct{}{} + } + + for model := 0; model < 0xFF; model++ { + err := CheckIfCPUTemperatureSupported(model) + if m[model] != nil { + require.NoError(t, err, "CPU model 0x%X should support cpu temperature", model) + } else { + require.ErrorContains(t, err, fmt.Sprintf("cpu temperature metric not supported by CPU model: 0x%X", model), + "CPU model 0x%X shouldn't support cpu temperature", model) + } + } +} + +var ( + c1c6BaseTempModels = []int{ + 0x1E, // INTEL_FAM6_NEHALEM + 0x1F, // INTEL_FAM6_NEHALEM_G + 0x1A, // INTEL_FAM6_NEHALEM_EP + 0x2E, // INTEL_FAM6_NEHALEM_EX + 0x25, // INTEL_FAM6_WESTMERE + 0x2C, // INTEL_FAM6_WESTMERE_EP + 0x2F, // INTEL_FAM6_WESTMERE_EX + 0x2A, // INTEL_FAM6_SANDYBRIDGE + 0x2D, // INTEL_FAM6_SANDYBRIDGE_X + 0x3A, // INTEL_FAM6_IVYBRIDGE + 0x3E, // INTEL_FAM6_IVYBRIDGE_X + 0x3C, // INTEL_FAM6_HASWELL + 0x3F, // INTEL_FAM6_HASWELL_X + 0x45, // INTEL_FAM6_HASWELL_L + 0x46, // INTEL_FAM6_HASWELL_G + 0x3D, // INTEL_FAM6_BROADWELL + 0x47, // INTEL_FAM6_BROADWELL_G + 0x4F, // INTEL_FAM6_BROADWELL_X + 0x56, // INTEL_FAM6_BROADWELL_D + 0x4E, // INTEL_FAM6_SKYLAKE_L + 0x5E, // INTEL_FAM6_SKYLAKE + 0x55, // INTEL_FAM6_SKYLAKE_X + 0x8E, // INTEL_FAM6_KABYLAKE_L + 0x9E, // INTEL_FAM6_KABYLAKE + 0xA5, // INTEL_FAM6_COMETLAKE + 0xA6, // INTEL_FAM6_COMETLAKE_L + 0x66, // INTEL_FAM6_CANNONLAKE_L + 0x6A, // INTEL_FAM6_ICELAKE_X + 0x6C, // INTEL_FAM6_ICELAKE_D + 0x7D, // INTEL_FAM6_ICELAKE + 0x7E, // INTEL_FAM6_ICELAKE_L + 0x9D, // INTEL_FAM6_ICELAKE_NNPI + 0xA7, // INTEL_FAM6_ROCKETLAKE + 0x8C, // INTEL_FAM6_TIGERLAKE_L + 0x8D, // INTEL_FAM6_TIGERLAKE + 0x8F, // INTEL_FAM6_SAPPHIRERAPIDS_X + 0xCF, // INTEL_FAM6_EMERALDRAPIDS_X + 0xAD, // INTEL_FAM6_GRANITERAPIDS_X + 0x8A, // INTEL_FAM6_LAKEFIELD + 0x97, // INTEL_FAM6_ALDERLAKE + 0x9A, // INTEL_FAM6_ALDERLAKE_L + 0xB7, // INTEL_FAM6_RAPTORLAKE + 0xBA, // INTEL_FAM6_RAPTORLAKE_P + 0xBF, // INTEL_FAM6_RAPTORLAKE_S + 0xAC, // INTEL_FAM6_METEORLAKE + 0xAA, // INTEL_FAM6_METEORLAKE_L + 0xC6, // INTEL_FAM6_ARROWLAKE + 0xBD, // INTEL_FAM6_LUNARLAKE_M + 0x37, // INTEL_FAM6_ATOM_SILVERMONT + 0x4D, // INTEL_FAM6_ATOM_SILVERMONT_D + 0x4A, // INTEL_FAM6_ATOM_SILVERMONT_MID + 0x5A, // INTEL_FAM6_ATOM_SILVERMONT_SMARTPHONE + 0x4C, // INTEL_FAM6_ATOM_AIRMONT + 0x5C, // INTEL_FAM6_ATOM_GOLDMONT + 0x5F, // INTEL_FAM6_ATOM_GOLDMONT_D + 0x7A, // INTEL_FAM6_ATOM_GOLDMONT_PLUS + 0x86, // INTEL_FAM6_ATOM_TREMONT_D + 0x96, // INTEL_FAM6_ATOM_TREMONT + 0x9C, // INTEL_FAM6_ATOM_TREMONT_L + 0xBE, // INTEL_FAM6_ATOM_GRACEMONT + 0xAF, // INTEL_FAM6_ATOM_CRESTMONT_X + 0xB6, // INTEL_FAM6_ATOM_CRESTMONT + 0x57, // INTEL_FAM6_XEON_PHI_KNL + 0x85, // INTEL_FAM6_XEON_PHI_KNM + } + + c3Models = []int{ + 0x1E, // INTEL_FAM6_NEHALEM + 0x1F, // INTEL_FAM6_NEHALEM_G + 0x1A, // INTEL_FAM6_NEHALEM_EP + 0x2E, // INTEL_FAM6_NEHALEM_EX + 0x25, // INTEL_FAM6_WESTMERE + 0x2C, // INTEL_FAM6_WESTMERE_EP + 0x2F, // INTEL_FAM6_WESTMERE_EX + 0x2A, // INTEL_FAM6_SANDYBRIDGE + 0x2D, // INTEL_FAM6_SANDYBRIDGE_X + 0x3A, // INTEL_FAM6_IVYBRIDGE + 0x3E, // INTEL_FAM6_IVYBRIDGE_X + 0x3C, // INTEL_FAM6_HASWELL + 0x3F, // INTEL_FAM6_HASWELL_X + 0x45, // INTEL_FAM6_HASWELL_L + 0x46, // INTEL_FAM6_HASWELL_G + 0x3D, // INTEL_FAM6_BROADWELL + 0x47, // INTEL_FAM6_BROADWELL_G + 0x4F, // INTEL_FAM6_BROADWELL_X + 0x56, // INTEL_FAM6_BROADWELL_D + 0x4E, // INTEL_FAM6_SKYLAKE_L + 0x5E, // INTEL_FAM6_SKYLAKE + 0x8E, // INTEL_FAM6_KABYLAKE_L + 0x9E, // INTEL_FAM6_KABYLAKE + 0xA5, // INTEL_FAM6_COMETLAKE + 0xA6, // INTEL_FAM6_COMETLAKE_L + 0x4C, // INTEL_FAM6_ATOM_AIRMONT + 0x5C, // INTEL_FAM6_ATOM_GOLDMONT + 0x7A, // INTEL_FAM6_ATOM_GOLDMONT_PLUS + } + + c7Models = []int{ + 0x2A, // INTEL_FAM6_SANDYBRIDGE + 0x2D, // INTEL_FAM6_SANDYBRIDGE_X + 0x3A, // INTEL_FAM6_IVYBRIDGE + 0x3E, // INTEL_FAM6_IVYBRIDGE_X + 0x3C, // INTEL_FAM6_HASWELL + 0x3F, // INTEL_FAM6_HASWELL_X + 0x45, // INTEL_FAM6_HASWELL_L + 0x46, // INTEL_FAM6_HASWELL_G + 0x3D, // INTEL_FAM6_BROADWELL + 0x47, // INTEL_FAM6_BROADWELL_G + 0x4E, // INTEL_FAM6_SKYLAKE_L + 0x5E, // INTEL_FAM6_SKYLAKE + 0x8E, // INTEL_FAM6_KABYLAKE_L + 0x9E, // INTEL_FAM6_KABYLAKE + 0xA5, // INTEL_FAM6_COMETLAKE + 0xA6, // INTEL_FAM6_COMETLAKE_L + 0x66, // INTEL_FAM6_CANNONLAKE_L + 0x7E, // INTEL_FAM6_ICELAKE_L + 0x9D, // INTEL_FAM6_ICELAKE_NNPI + 0xA7, // INTEL_FAM6_ROCKETLAKE + 0x8C, // INTEL_FAM6_TIGERLAKE_L + 0x8D, // INTEL_FAM6_TIGERLAKE + 0x8A, // INTEL_FAM6_LAKEFIELD + 0x97, // INTEL_FAM6_ALDERLAKE + 0x9A, // INTEL_FAM6_ALDERLAKE_L + 0xB7, // INTEL_FAM6_RAPTORLAKE + 0xBA, // INTEL_FAM6_RAPTORLAKE_P + 0xBF, // INTEL_FAM6_RAPTORLAKE_S + 0xAC, // INTEL_FAM6_METEORLAKE + 0xAA, // INTEL_FAM6_METEORLAKE_L + 0xC6, // INTEL_FAM6_ARROWLAKE + 0xBD, // INTEL_FAM6_LUNARLAKE_M + 0x5C, // INTEL_FAM6_ATOM_GOLDMONT + 0x7A, // INTEL_FAM6_ATOM_GOLDMONT_PLUS + 0x96, // INTEL_FAM6_ATOM_TREMONT + 0x9C, // INTEL_FAM6_ATOM_TREMONT_L + 0xBE, // INTEL_FAM6_ATOM_GRACEMONT + } +) diff --git a/msr.go b/msr.go new file mode 100644 index 0000000..182a623 --- /dev/null +++ b/msr.go @@ -0,0 +1,474 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +import ( + "encoding/binary" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "regexp" + "strconv" + "time" +) + +const ( + // base path comprising all per-CPU ID MSR files. + defaultMsrBasePath = "/dev/cpu" + + // file name of the binary MSR file specific for each CPU ID. + msrFile = "msr" +) + +var ( + // regex used to check CPU ID format as numeric value without leading zeroes. + cpuIDRegex = regexp.MustCompile("^(0|[1-9][0-9]*)$") + + // regex used to check MSR module within the loaded kernel modules list. + msrModuleRegex = regexp.MustCompile(`\bmsr\b`) +) + +// msrReg represents a CPU ID specific MSR register with the ability to read offset +// values. +type msrReg interface { + // getPath gets the absolute path of the MSR file. + getPath() string + + // getCPUID gets the CPU ID corresponding to the MSR register. + getCPUID() int + + // read returns the MSR value of the given offset. + read(offset uint32) (uint64, error) + + // readAll takes a slice of offsets and returns a map with offset key + // and content of the MSR offset value. + readAll(offsets []uint32) (map[uint32]uint64, error) +} + +// msr represents a CPU ID specific MSR register. Implements msrReg interface. +type msr struct { + path string + cpuID int + timeout time.Duration +} + +// resultError is used for transmitting a value or an err through the channel. +type resultError struct { + value uint64 + err error +} + +// newMsr creates a new MSR register, initializing the CPU ID for this specific +// register and the path where to find the MSR file. +func newMsr(path string, timeout time.Duration) (msrReg, error) { + cpuIDStr := filepath.Base(path) + if !cpuIDRegex.MatchString(cpuIDStr) { + return nil, fmt.Errorf("invalid format for CPU ID in path %q", path) + } + cpuID, err := strconv.Atoi(cpuIDStr) + if err != nil { + return nil, fmt.Errorf("error converting parsed CPU ID from path to numeric") + } + cpuMsr := filepath.Join(path, msrFile) + if err := checkFile(cpuMsr); err != nil { + return nil, fmt.Errorf("invalid MSR file for cpu ID %v: %w", cpuID, err) + } + return &msr{ + path: cpuMsr, + cpuID: cpuID, + timeout: timeout, + }, nil +} + +// getPath returns the MSR file path of the receiver. +func (m *msr) getPath() string { + return m.path +} + +// getCPUID returns the CPU ID corresponding to the receiver. +func (m *msr) getCPUID() int { + return m.cpuID +} + +// read takes an address, specified as offset, and returns an 8-byte value with +// the address content of the given CPU ID's MSR. +func (m *msr) read(offset uint32) (uint64, error) { + f, err := os.OpenFile(m.path, os.O_RDONLY, 0400) + if err != nil { + return 0, err + } + defer f.Close() + return readOffset(offset, f, m.timeout) +} + +// readAll takes a slice of addresses, specified as offsets, and returns a map +// with offset key and the offset content of the given CPU ID's MSR as value. +// Each read offset operation is performed in a separate goroutine. In case an +// error occurs, the function returns a nil map with the corresponding error. +func (m *msr) readAll(offsets []uint32) (map[uint32]uint64, error) { + f, err := os.OpenFile(m.path, os.O_RDONLY, 0400) + if err != nil { + return nil, err + } + defer f.Close() + + errCh := make(chan error) + msrOffsetChannels := make(map[uint32]chan uint64) + for _, offset := range offsets { + msrOffsetChannels[offset] = make(chan uint64) + } + + for offset, ch := range msrOffsetChannels { + go func(off uint32, ch chan uint64, errCh chan error) { + v, err := readOffset(off, f, m.timeout) + if err != nil { + errCh <- err + return + } + ch <- v + }(offset, ch, errCh) + } + + valuesMap := make(map[uint32]uint64) + for offset, ch := range msrOffsetChannels { + select { + case err := <-errCh: + return nil, fmt.Errorf("error reading MSR offsets: %w", err) + case v := <-ch: + valuesMap[offset] = v + } + } + return valuesMap, nil +} + +// readOffset is a helper function that takes an address, specified as offset, an io.ReaderAt interface, and timeout. +// It returns an 8-byte value with the address content of the given reader argument. +func readOffset(offset uint32, reader io.ReaderAt, timeout time.Duration) (uint64, error) { + // read without timeout + if timeout <= 0 { + return readOffsetWithoutTimeout(offset, reader) + } + + // read with timeout + resultCh := make(chan resultError, 1) + go func(resCh chan<- resultError) { + buf := make([]byte, 8) + if _, err := reader.ReadAt(buf, int64(offset)); err != nil { + if errors.Is(err, io.EOF) { + resCh <- resultError{value: 0, err: fmt.Errorf("offset 0x%x is out-of-bounds", offset)} + } else { + resCh <- resultError{value: 0, err: fmt.Errorf("error when reading file at offset 0x%x: %w", offset, err)} + } + } else { + resCh <- resultError{value: binary.LittleEndian.Uint64(buf), err: nil} + } + + close(resCh) + }(resultCh) + + t := time.NewTimer(timeout) + select { + case <-t.C: + return 0, fmt.Errorf("timeout when reading file at offset 0x%x", offset) + case result := <-resultCh: + if !t.Stop() { + <-t.C + } + if result.err != nil { + return 0, result.err + } + + return result.value, nil + } +} + +// readOffsetWithoutTimeout is a helper function that takes an address, specified as offset and an io.ReaderAt interface. +// It returns an 8-byte value with the address content of the given reader argument. +func readOffsetWithoutTimeout(offset uint32, reader io.ReaderAt) (uint64, error) { + buf := make([]byte, 8) + if _, err := reader.ReadAt(buf, int64(offset)); err != nil { + if errors.Is(err, io.EOF) { + return 0, fmt.Errorf("offset 0x%x is out-of-bounds", offset) + } + return 0, fmt.Errorf("error when reading file at offset 0x%x: %w", offset, err) + } + + return binary.LittleEndian.Uint64(buf), nil +} + +// msrRegWithStorage represents a CPU ID specific MSR register with the ability to read and +// store offset values. Two types of stored offset values are supported: +// - offset values from the last read operation. +// - delta offset values defined as the subtraction between offset values from last read +// and offset values from the previous read operation. +type msrRegWithStorage interface { + msrReg + + // getOffsetValues gets a map with offset key and offset value of the latest read operation. + getOffsetValues() map[uint32]uint64 + + // getOffsetDeltas gets a map with offset key and delta offset value between the latest and + // the previous read operation. + getOffsetDeltas() map[uint32]uint64 + + // getTimestampDelta gets the timestamp delta between the last offset values reading operation + // and its previous reading operation. + getTimestampDelta() time.Duration + + // update gets MSR values and updates the storage. + update() error +} + +// msrWithStorage represents a CPU ID specific MSR register with the ability to read and +// store offset values. Implements msrRegWithStorage interface. +// The offset values in the storage correspond to values for offsets specified in offsets +// field. +type msrWithStorage struct { + msrReg + offsets []uint32 + offsetValues map[uint32]uint64 // offset values from the last read operation + offsetDeltas map[uint32]uint64 // delta offset values between the latest and its previous reading operation + timestamp time.Time // timestamp of the last reading operation + timestampDelta time.Duration // timestamp delta between the last read and its previous reading operation +} + +// newMsrWithStorage creates a new MSR register with the ability to read and store multiple MSR +// offset values, provided as argument. First creates an MSR register, then decorates it adding +// storage for both offset values from the last read operation and delta offset values between +// the latest and its previous reading operation. +func newMsrWithStorage(path string, offsets []uint32, timeout time.Duration) (msrRegWithStorage, error) { + if len(offsets) == 0 { + return nil, errors.New("no offsets were provided") + } + + msr, err := newMsr(path, timeout) + if err != nil { + return nil, fmt.Errorf("error creating MSR register for CPU path %q: %w", path, err) + } + + return &msrWithStorage{ + msrReg: msr, + offsets: offsets, + offsetValues: make(map[uint32]uint64), + offsetDeltas: make(map[uint32]uint64), + }, nil +} + +// getOffsetValues returns a map with offset key and offset values from the last read operation +// of the receiver. +func (m *msrWithStorage) getOffsetValues() map[uint32]uint64 { + return m.offsetValues +} + +// setOffsetValues sets the given map of offset key and offset values to the receiver. +func (m *msrWithStorage) setOffsetValues(offsetValues map[uint32]uint64) { + m.offsetValues = offsetValues +} + +// getOffsetDeltas returns a map with offset key and delta offset values from the last read operation +// of the receiver. +func (m *msrWithStorage) getOffsetDeltas() map[uint32]uint64 { + return m.offsetDeltas +} + +// setOffsetDeltas sets the given map of offset key and delta offset values to the receiver. +func (m *msrWithStorage) setOffsetDeltas(offsetDeltas map[uint32]uint64) { + m.offsetDeltas = offsetDeltas +} + +// update performs reading operations along the offsets specified by the receiver. It updates +// last read offset values and delta offset values of the receiver. +func (m *msrWithStorage) update() error { + latest, err := m.readAll(m.offsets) + if err != nil { + return err + } + + // Get time interval between offset MSR read and its previous reading operation + newTimestamp := timeNowFn() + m.timestampDelta, m.timestamp = newTimestamp.Sub(m.timestamp), newTimestamp + + prev := m.getOffsetValues() + deltasMap := make(map[uint32]uint64, len(latest)) + for offset := range latest { + deltasMap[offset] = latest[offset] - prev[offset] + } + + m.setOffsetDeltas(deltasMap) + m.setOffsetValues(latest) + return nil +} + +// getTimestampDelta returns the timestamp delta between the offset values last reading operations +// and its previous reading operation. +func (m *msrWithStorage) getTimestampDelta() time.Duration { + return m.timestampDelta +} + +// msrReaderWithStorage represents per-CPU ID MSR registers of the host with the ability to read single +// MSR offset values, read and store multiple MSR offset values, and eventually provide the MSR delta +// offset values between latest and its previous reading operation. +type msrReaderWithStorage interface { + initMsrMap(cpuIDs []int, timeout time.Duration) error + + // isMsrLoaded check if MSR kernel module is loaded. + isMsrLoaded(modulesPath string) (bool, error) + + // read returns the MSR value for a given offset and CPU ID. + read(offset uint32, cpuID int) (uint64, error) + + // update takes a CPU ID, reads multiple MSR offset values and updates the storage. + update(cpuID int) error + + // getOffsetDeltas takes a CPU ID and returns MSR delta offset values between latest and its previous reading operation. + getOffsetDeltas(cpuID int) (map[uint32]uint64, error) + + // getTimestampDelta takes a CPU ID and returns the time interval between the last offset value reading operation + // and its previous reading operation. + getTimestampDelta(cpuID int) (time.Duration, error) +} + +// msrDataWithStorage represents per-CPU ID MSR registers of the host with offset values storage capabilities. +// +// It represents the hierarchy tree for /dev/cpu directory: +// +// /dev/cpu +// ├── 0 (CPU ID) +// │ └── msr (MSR binary file) +// ├── 1 +// │ └── msr +// ├── 2 +// │ └── msr +// ∙ +// └── n +// +// Each map entry corresponds to an MSR register, which allows reading operations +// to specific addresses (offsets), as well as storage of the offset values read. +type msrDataWithStorage struct { + msrPath string + msrOffsets []uint32 + + msrMap map[int]msrRegWithStorage +} + +// initMsrMap initializes a map of CPU ID key and MSR register value with storage. Each MSR register is able to update +// the storage values and deltas of provided offsets in the offsets slice field. Field msrCPUIDs holds values of CPU IDs +// for which an MSR register is initialized. In case msrCPUIDs is nil, MSR registers for all CPU IDs found in system +// file will be initialized. It ensures that each CPU ID directory is properly formatted and binary MSR file exists. +// In case of malformed base path tree, an error is returned. +func (m *msrDataWithStorage) initMsrMap(cpuIDs []int, timeout time.Duration) error { + if len(m.msrOffsets) == 0 { + return errors.New("MSR offsets argument cannot be empty") + } + + if len(m.msrPath) == 0 { + return errors.New("base path for MSR files cannot be an empty string") + } + if err := checkFile(m.msrPath); err != nil { + return fmt.Errorf("invalid MSR base path %q: %w", m.msrPath, err) + } + + cpuDirs, err := os.ReadDir(m.msrPath) + if err != nil { + return fmt.Errorf("error reading directory %q: %w", m.msrPath, err) + } + + // Declaring map for constant time search + filterCPUIDsMap := make(map[string]struct{}) + for _, cpuID := range cpuIDs { + cpuIDStr := strconv.FormatUint(uint64(cpuID), 10) + filterCPUIDsMap[cpuIDStr] = struct{}{} + } + isFilterEmpty := len(filterCPUIDsMap) == 0 + + msrMap := make(map[int]msrRegWithStorage) + for _, cpuDirEntry := range cpuDirs { + cpuDir := cpuDirEntry.Name() + if !cpuDirEntry.IsDir() || !cpuIDRegex.MatchString(cpuDir) { + continue + } + + // Skip only if filterCPUIDs are not empty and there is no corresponding value in the map to cpuDir. + // if filterCPUIDs are empty, then we read all values from the directory. + if _, isCPUIDFiltered := filterCPUIDsMap[cpuDir]; !isFilterEmpty && !isCPUIDFiltered { + continue + } + + cpuPath := filepath.Join(m.msrPath, cpuDir) + cpuMsrWithStorage, err := newMsrWithStorage(cpuPath, m.msrOffsets, timeout) + if err != nil { + return fmt.Errorf("error creating MSR register with storage for CPU path %q: %w", cpuPath, err) + } + + err = cpuMsrWithStorage.update() + if err != nil { + return fmt.Errorf("error initializing the MSR register storage for CPU ID %v: %w", cpuMsrWithStorage.getCPUID(), err) + } + msrMap[cpuMsrWithStorage.getCPUID()] = cpuMsrWithStorage + } + + if len(msrMap) == 0 { + return fmt.Errorf("could not find valid CPU MSR files for path: %q", m.msrPath) + } + + m.msrMap = msrMap + return nil +} + +// isMsrLoaded returns true if MSR kernel module is loaded, otherwise returns false. +func (m *msrDataWithStorage) isMsrLoaded(modulesPath string) (bool, error) { + if err := checkFile(modulesPath); err != nil { + return false, err + } + data, err := os.ReadFile(modulesPath) + if err != nil { + return false, fmt.Errorf("could not read file %q: %w", modulesPath, err) + } + matches := msrModuleRegex.FindAll(data, -1) + return len(matches) > 0, nil +} + +// read takes a CPU ID and offset and returns an 8-byte value with the contents +// of the associated MSR register. +func (m *msrDataWithStorage) read(offset uint32, cpuID int) (uint64, error) { + reg, ok := m.msrMap[cpuID] + if !ok { + return 0, fmt.Errorf("could not find MSR register for CPU ID: %v", cpuID) + } + return reg.read(offset) +} + +// update takes a CPU ID, performs reading operations along the offsets, storing the results +// within the storage. +func (m *msrDataWithStorage) update(cpuID int) error { + reg, ok := m.msrMap[cpuID] + if !ok { + return fmt.Errorf("could not find MSR register for CPU ID: %v", cpuID) + } + return reg.update() +} + +// getOffsetDeltas takes a CPU ID and returns a map with offset keys and delta offset values between +// latest and its previous reading offsets operation. +func (m *msrDataWithStorage) getOffsetDeltas(cpuID int) (map[uint32]uint64, error) { + reg, ok := m.msrMap[cpuID] + if !ok { + return nil, fmt.Errorf("could not find MSR register for CPU ID: %v", cpuID) + } + return reg.getOffsetDeltas(), nil +} + +// getTimestampDelta takes a CPU ID and returns the time interval between the last offset value reading +// operation and its previous reading operation. +func (m *msrDataWithStorage) getTimestampDelta(cpuID int) (time.Duration, error) { + reg, ok := m.msrMap[cpuID] + if !ok { + return time.Duration(0), fmt.Errorf("could not find MSR register for CPU ID: %v", cpuID) + } + return reg.getTimestampDelta(), nil +} diff --git a/msr_test.go b/msr_test.go new file mode 100644 index 0000000..b883600 --- /dev/null +++ b/msr_test.go @@ -0,0 +1,1033 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +import ( + "errors" + "fmt" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" +) + +func TestNewMsr(t *testing.T) { + testCases := []struct { + name string + path string + msr msrReg + err error + }{ + { + name: "CPUIDNonNumeric", + path: "testdata/cpu-msr-invalid-cpuID-directories/1invalid", + msr: nil, + err: errors.New("invalid format for CPU ID in path"), + }, + { + name: "CPUIDWithLeadingZeroes", + path: "testdata/cpu-msr-invalid-cpuID-directories/01", + msr: nil, + err: errors.New("invalid format for CPU ID in path"), + }, + { + name: "CPUIDMsrFileNotExist", + path: "testdata/cpu-msr-cpuID-msr-not-exist/1", + msr: nil, + err: fmt.Errorf("invalid MSR file for cpu ID 1: file \"testdata/cpu-msr-cpuID-msr-not-exist/1/msr\" does not exist"), + }, + { + name: "CPUIDMsrFileSymlink", + path: "testdata/cpu-msr-cpuID-msr-softlink/1", + msr: nil, + err: fmt.Errorf("invalid MSR file for cpu ID 1: file \"testdata/cpu-msr-cpuID-msr-softlink/1/msr\" is a symlink"), + }, + { + name: "Valid", + path: "testdata/cpu-msr/0", + msr: &msr{ + path: "testdata/cpu-msr/0/msr", + cpuID: 0, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + m, err := newMsr(tc.path, 0) + require.Equal(t, tc.msr, m) + if tc.err != nil { + require.ErrorContains(t, err, tc.err.Error()) + } else { + require.NoError(t, err) + } + }) + } +} + +func TestMsrGetters(t *testing.T) { + cpuID := 0 + cpuPath := "testdata/cpu-msr/0" + m, err := newMsr(cpuPath, 0) + require.NoError(t, err) + require.Equal(t, filepath.Join(cpuPath, msrFile), m.getPath()) + require.Equal(t, cpuID, m.getCPUID()) +} + +// TestMsrRead tests read a single offset value of the corresponding MSR register. Valid MSR registers have +// paths within "testdata/cpu-msr" directory. Each directory corresponds to CPU ID-specific MSR path, which +// comprises a binary file named "msr". All valid MSR binary files have the same 16-byte content, written byte +// by byte, using little endian. +// +// 0x01 0x23 0x45 0x67 0x89 0xab 0xcd 0xef +// 0xfe 0xdc 0xba 0x98 0x76 0x54 0x32 0x10 +// +// Example: +// The result of reading operation given offset 0, would result in an 8-byte value corresponding to the first row: +// +// 0x01 0x23 0x45 0x67 0x89 0xab 0xcd 0xef +// +// Since values were written byte by byte using little endian, the result of the operation is: +// +// 0xef 0xcd 0xab 0x89 0x67 0x45 0x23 0x01 -> 0xefcdab8967452301. +func TestMsrRead(t *testing.T) { + testCases := []struct { + name string + cpuMsrPath string + offset uint32 + timeout time.Duration + expected uint64 + err error + }{ + { + name: "MsrFileNotExists", + cpuMsrPath: "testdata/cpu-msr-cpuID-msr-not-exist/0/msr", + offset: 0x0, + expected: 0, + err: errors.New("open testdata/cpu-msr-cpuID-msr-not-exist/0/msr: no such file or directory"), + }, + { + name: "ZeroBytesOffset", + cpuMsrPath: "testdata/cpu-msr/0/msr", + offset: 0x0, + expected: 0xefcdab8967452301, + err: nil, + }, + { + name: "4BytesOffset", + cpuMsrPath: "testdata/cpu-msr/0/msr", + offset: 0x4, + expected: 0x98badcfeefcdab89, + err: nil, + }, + { + name: "8BytesOffset", + cpuMsrPath: "testdata/cpu-msr/0/msr", + offset: 0x8, + expected: 0x1032547698badcfe, + err: nil, + }, + { + name: "OutOfBoundsOffset", + cpuMsrPath: "testdata/cpu-msr/0/msr", + offset: 0xb, + expected: 0, + err: errors.New("offset 0xb is out-of-bounds"), + }, + { + name: "MsrFileNotExistsWithExtremelyLargeTimeout", + cpuMsrPath: "testdata/cpu-msr-cpuID-msr-not-exist/0/msr", + offset: 0x0, + timeout: time.Hour, + expected: 0, + err: errors.New("open testdata/cpu-msr-cpuID-msr-not-exist/0/msr: no such file or directory"), + }, + { + name: "ZeroBytesOffsetWithExtremelyLargeTimeout", + cpuMsrPath: "testdata/cpu-msr/0/msr", + offset: 0x0, + timeout: time.Hour, + expected: 0xefcdab8967452301, + err: nil, + }, + { + name: "4BytesOffsetWithExtremelyLargeTimeout", + cpuMsrPath: "testdata/cpu-msr/0/msr", + offset: 0x4, + timeout: time.Hour, + expected: 0x98badcfeefcdab89, + err: nil, + }, + { + name: "8BytesOffsetWithExtremelyLargeTimeout", + cpuMsrPath: "testdata/cpu-msr/0/msr", + offset: 0x8, + timeout: time.Hour, + expected: 0x1032547698badcfe, + err: nil, + }, + { + name: "OutOfBoundsOffsetWithExtremelyLargeTimeout", + cpuMsrPath: "testdata/cpu-msr/0/msr", + offset: 0xb, + timeout: time.Hour, + expected: 0, + err: errors.New("offset 0xb is out-of-bounds"), + }, + { + name: "MsrFileNotExistsWithExtremelySmallTimeout", + cpuMsrPath: "testdata/cpu-msr-cpuID-msr-not-exist/0/msr", + offset: 0x0, + timeout: time.Nanosecond, + expected: 0, + err: errors.New("open testdata/cpu-msr-cpuID-msr-not-exist/0/msr: no such file or directory"), + }, + { + name: "4BytesOffsetWithExtremelySmallTimeout", + cpuMsrPath: "testdata/cpu-msr/0/msr", + offset: 0x4, + timeout: time.Nanosecond, + expected: 0, + err: errors.New("timeout when reading file at offset 0x4"), + }, + { + name: "8BytesOffsetWithExtremelySmallTimeout", + cpuMsrPath: "testdata/cpu-msr/0/msr", + offset: 0x8, + timeout: time.Nanosecond, + expected: 0, + err: errors.New("timeout when reading file at offset 0x8"), + }, + { + name: "OutOfBoundsOffsetWithExtremelySmallTimeout", + cpuMsrPath: "testdata/cpu-msr/0/msr", + offset: 0xb, + timeout: time.Nanosecond, + expected: 0, + err: errors.New("timeout when reading file at offset 0xb"), + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + m := &msr{ + path: tc.cpuMsrPath, + timeout: tc.timeout, + } + + out, err := m.read(tc.offset) + if tc.err != nil { + require.ErrorContains(t, err, tc.err.Error()) + } else { + require.NoError(t, err) + } + require.Equal(t, tc.expected, out) + }) + } +} + +func TestMsrReadAll(t *testing.T) { + testCases := []struct { + name string + cpuMsrPath string + offsets []uint32 + timeout time.Duration + expected map[uint32]uint64 + err error + }{ + { + name: "NoError", + cpuMsrPath: "testdata/cpu-msr/0/msr", + offsets: []uint32{0x00, 0x02, 0x04, 0x05, 0x06, 0x08}, + expected: map[uint32]uint64{ + 0x00: 0xefcdab8967452301, + 0x02: 0xdcfeefcdab896745, + 0x04: 0x98badcfeefcdab89, + 0x05: 0x7698badcfeefcdab, + 0x06: 0x547698badcfeefcd, + 0x08: 0x1032547698badcfe, + }, + err: nil, + }, + { + name: "OutOfBoundsMsrError", + cpuMsrPath: "testdata/cpu-msr/0/msr", + offsets: []uint32{0x00, 0x02, 0x04, 0x05, 0x06, 0x0b}, + expected: nil, + err: errors.New("is out-of-bounds"), + }, + { + name: "MsrFileNotExist", + cpuMsrPath: "testdata/cpu-msr-cpuID-msr-not-exist/0/msr", + offsets: []uint32{0x00, 0x02, 0x04, 0x05, 0x06, 0x0b}, + expected: nil, + err: errors.New("open testdata/cpu-msr-cpuID-msr-not-exist/0/msr: no such file or directory"), + }, + { + name: "NoErrorWithExtremelyLargeTimeout", + cpuMsrPath: "testdata/cpu-msr/0/msr", + offsets: []uint32{0x00, 0x02, 0x04, 0x05, 0x06, 0x08}, + timeout: time.Hour, + expected: map[uint32]uint64{ + 0x00: 0xefcdab8967452301, + 0x02: 0xdcfeefcdab896745, + 0x04: 0x98badcfeefcdab89, + 0x05: 0x7698badcfeefcdab, + 0x06: 0x547698badcfeefcd, + 0x08: 0x1032547698badcfe, + }, + err: nil, + }, + { + name: "OutOfBoundsMsrErrorWithExtremelyLargeTimeout", + cpuMsrPath: "testdata/cpu-msr/0/msr", + offsets: []uint32{0x00, 0x02, 0x04, 0x05, 0x06, 0x0b}, + timeout: time.Hour, + expected: nil, + err: errors.New("is out-of-bounds"), + }, + { + name: "NoError", + cpuMsrPath: "testdata/cpu-msr/0/msr", + offsets: []uint32{0x00, 0x02, 0x04, 0x05, 0x06, 0x08}, + timeout: time.Nanosecond, + expected: nil, + err: errors.New("error reading MSR offsets: timeout when reading file at offset"), + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + m := &msr{ + path: tc.cpuMsrPath, + timeout: tc.timeout, + } + + out, err := m.readAll(tc.offsets) + if tc.err != nil { + require.ErrorContains(t, err, tc.err.Error()) + } else { + require.NoError(t, err) + } + require.Equal(t, tc.expected, out) + }) + } +} + +func TestNewMsrWithStorage(t *testing.T) { + t.Run("WithoutOffsets", func(t *testing.T) { + reqOffsets := []uint32{} + cpuMsrDir := "testdata/cpu-msr/0" + + m, err := newMsrWithStorage(cpuMsrDir, reqOffsets, 0) + require.Nil(t, m) + require.ErrorContains(t, err, "no offsets were provided") + }) + + t.Run("WithOffsets", func(t *testing.T) { + reqOffsets := []uint32{c3Residency, c6Residency, c7Residency, maxFreqClockCount, actualFreqClockCount, timestampCounter} + cpuMsrDir := "testdata/cpu-msr/0" + cpuMsrFile := filepath.Join(cpuMsrDir, msrFile) + + expected := &msrWithStorage{ + msrReg: &msr{ + path: cpuMsrFile, + cpuID: 0, + }, + offsets: reqOffsets, + offsetValues: map[uint32]uint64{}, + offsetDeltas: map[uint32]uint64{}, + } + + m, err := newMsrWithStorage(cpuMsrDir, reqOffsets, 0) + require.NoError(t, err) + require.Equal(t, expected, m) + }) +} + +func TestMsrWithStorageGetters(t *testing.T) { + mReg, err := newMsr("testdata/cpu-msr/0", 0) + require.NoError(t, err) + + expectedValues := map[uint32]uint64{ + 0x00: 2, + 0x01: 2, + 0x02: 2, + 0x03: 2, + 0x04: 2, + 0x05: 2, + } + + expectedDeltas := map[uint32]uint64{ + 0x00: 1, + 0x01: 1, + 0x02: 1, + 0x03: 1, + 0x04: 1, + 0x05: 1, + } + + expectedTimestampDelta := 10 * time.Second + + m := &msrWithStorage{ + msrReg: mReg, + timestampDelta: expectedTimestampDelta, + offsetValues: expectedValues, + offsetDeltas: expectedDeltas, + } + require.Equal(t, expectedValues, m.getOffsetValues()) + require.Equal(t, expectedDeltas, m.getOffsetDeltas()) + require.Equal(t, expectedTimestampDelta, m.getTimestampDelta()) +} + +func TestMsrWithStorageSetters(t *testing.T) { + mReg, err := newMsr("testdata/cpu-msr/0", 0) + require.NoError(t, err) + + m := &msrWithStorage{ + msrReg: mReg, + offsetValues: nil, + offsetDeltas: nil, + } + + expectedValues := map[uint32]uint64{ + 0x00: 2, + 0x01: 2, + 0x02: 2, + 0x03: 2, + 0x04: 2, + 0x05: 2, + } + + expectedDeltas := map[uint32]uint64{ + 0x00: 1, + 0x01: 1, + 0x02: 1, + 0x03: 1, + 0x04: 1, + 0x05: 1, + } + + m.setOffsetValues(expectedValues) + m.setOffsetDeltas(expectedDeltas) + require.Equal(t, expectedValues, m.getOffsetValues()) + require.Equal(t, expectedDeltas, m.getOffsetDeltas()) +} + +type msrTimeSensitiveSuite struct { + suite.Suite +} + +func (s *msrTimeSensitiveSuite) SetupTest() { + setFakeClock() + fakeClock.Set(time.Now()) +} + +func (s *msrTimeSensitiveSuite) TearDownTest() { + unsetFakeClock() +} + +func (s *msrTimeSensitiveSuite) TestMsrWithStorageUpdate() { + mReg, err := newMsr("testdata/cpu-msr/0", 0) + s.Require().NoError(err) + + m := &msrWithStorage{ + msrReg: mReg, + offsets: []uint32{0x00, 0x02, 0x04, 0x05, 0x06, 0x08}, + offsetValues: map[uint32]uint64{}, + offsetDeltas: map[uint32]uint64{}, + timestamp: fakeClock.Now(), + } + + expectedValues := map[uint32]uint64{ + 0x00: uint64(0xefcdab8967452301), + 0x02: uint64(0xdcfeefcdab896745), + 0x04: uint64(0x98badcfeefcdab89), + 0x05: uint64(0x7698badcfeefcdab), + 0x06: uint64(0x547698badcfeefcd), + 0x08: uint64(0x1032547698badcfe), + } + + d1 := 10 * time.Second + fakeClock.Add(d1) + + s.Require().NoError(m.update()) + s.Require().Equal(expectedValues, m.getOffsetValues()) + s.Require().Equal(expectedValues, m.getOffsetDeltas()) + s.Require().Equal(d1, m.getTimestampDelta()) + + expectedDeltas := map[uint32]uint64{ + 0x00: 0, + 0x02: 0, + 0x04: 0, + 0x05: 0, + 0x06: 0, + 0x08: 0, + } + + d2 := 5 * time.Second + fakeClock.Add(d2) + + s.Require().NoError(m.update()) + s.Require().Equal(expectedValues, m.getOffsetValues()) + s.Require().Equal(expectedDeltas, m.getOffsetDeltas()) + s.Require().Equal(d2, m.getTimestampDelta()) +} + +func TestMsrTimeSensitive(t *testing.T) { + suite.Run(t, new(msrTimeSensitiveSuite)) +} + +func (s *msrTimeSensitiveSuite) TestInitMsrMap() { + testCases := []struct { + name string + msrPath string + offsets []uint32 + cpuIDs []int + msrMap map[int]msrRegWithStorage + err error + }{ + { + name: "OffsetsNotProvided", + offsets: []uint32{}, + msrMap: nil, + err: errors.New("MSR offsets argument cannot be empty"), + }, + { + name: "InvalidMsrBasePath", + msrPath: "", + offsets: []uint32{0x00, 0x02, 0x04, 0x05, 0x06, 0x08}, + msrMap: nil, + err: errors.New("base path for MSR files cannot be an empty string"), + }, + { + name: "MsrBasePathNotExist", + msrPath: "/dummy/path", + offsets: []uint32{0x00, 0x02, 0x04, 0x05, 0x06, 0x08}, + msrMap: nil, + err: errors.New("invalid MSR base path \"/dummy/path\""), + }, + { + name: "MsrBasePathCPUDirectoriesNotExist", + msrPath: "testdata/cpu-msr-directories-not-exist", + offsets: []uint32{0x00, 0x02, 0x04, 0x05, 0x06, 0x08}, + msrMap: nil, + err: errors.New("could not find valid CPU MSR files for path: \"testdata/cpu-msr-directories-not-exist\""), + }, + { + name: "MsrBasePathIsNotADir", + msrPath: "testdata/cpu-msr/0/msr", + offsets: []uint32{0x00, 0x02, 0x04, 0x05, 0x06, 0x08}, + msrMap: nil, + err: errors.New("error reading directory \"testdata/cpu-msr/0/msr\""), + }, + { + name: "MsrBasePathDirectoriesCPUIDNotFound", + msrPath: "testdata/cpu-msr-invalid-cpuID-directories", + offsets: []uint32{0x00, 0x02, 0x04, 0x05, 0x06, 0x08}, + msrMap: nil, + err: errors.New("could not find valid CPU MSR files for path: \"testdata/cpu-msr-invalid-cpuID-directories\""), + }, + { + name: "MsrPathWithMissingCPUIDMsrFile", + msrPath: "testdata/cpu-msr-cpuID-msr-not-exist", + offsets: []uint32{0x00, 0x02, 0x04, 0x05, 0x06, 0x08}, + msrMap: nil, + err: errors.New("error creating MSR register for CPU path \"testdata/cpu-msr-cpuID-msr-not-exist/0\": invalid MSR file for cpu ID 0: " + + "file \"testdata/cpu-msr-cpuID-msr-not-exist/0/msr\" does not exist"), + }, + { + name: "MsrPathWithCPUIDMsrSymlink", + msrPath: "testdata/cpu-msr-cpuID-msr-softlink", + offsets: []uint32{0x00, 0x02, 0x04, 0x05, 0x06, 0x08}, + msrMap: nil, + err: errors.New("error creating MSR register for CPU path \"testdata/cpu-msr-cpuID-msr-softlink/1\": invalid MSR file for cpu ID 1: " + + "file \"testdata/cpu-msr-cpuID-msr-softlink/1/msr\" is a symlink"), + }, + { + name: "OffsetoutOfBounds", + msrPath: "testdata/cpu-msr", + offsets: []uint32{0x00, 0x02, 0x04, 0x05, 0x06, 0x0b}, + msrMap: nil, + err: errors.New("error initializing the MSR register storage for CPU ID 0: " + + "error reading MSR offsets: offset 0xb is out-of-bounds"), + }, + { + name: "MsrPathValid", + msrPath: "testdata/cpu-msr", + offsets: []uint32{0x00, 0x02, 0x04, 0x05, 0x06, 0x08}, + msrMap: map[int]msrRegWithStorage{ + 0: &msrWithStorage{ + msrReg: &msr{ + cpuID: 0, + path: "testdata/cpu-msr/0/msr", + }, + offsets: []uint32{0x00, 0x02, 0x04, 0x05, 0x06, 0x08}, + timestamp: fakeClock.Now(), + timestampDelta: fakeClock.Now().Sub(time.Time{}), + offsetValues: map[uint32]uint64{ + 0x00: uint64(0xefcdab8967452301), + 0x02: uint64(0xdcfeefcdab896745), + 0x04: uint64(0x98badcfeefcdab89), + 0x05: uint64(0x7698badcfeefcdab), + 0x06: uint64(0x547698badcfeefcd), + 0x08: uint64(0x1032547698badcfe), + }, + offsetDeltas: map[uint32]uint64{ + 0x00: uint64(0xefcdab8967452301), + 0x02: uint64(0xdcfeefcdab896745), + 0x04: uint64(0x98badcfeefcdab89), + 0x05: uint64(0x7698badcfeefcdab), + 0x06: uint64(0x547698badcfeefcd), + 0x08: uint64(0x1032547698badcfe), + }, + }, + 1: &msrWithStorage{ + msrReg: &msr{ + cpuID: 1, + path: "testdata/cpu-msr/1/msr", + }, + offsets: []uint32{0x00, 0x02, 0x04, 0x05, 0x06, 0x08}, + offsetValues: map[uint32]uint64{ + 0x00: uint64(0xefcdab8967452301), + 0x02: uint64(0xdcfeefcdab896745), + 0x04: uint64(0x98badcfeefcdab89), + 0x05: uint64(0x7698badcfeefcdab), + 0x06: uint64(0x547698badcfeefcd), + 0x08: uint64(0x1032547698badcfe), + }, + offsetDeltas: map[uint32]uint64{ + 0x00: uint64(0xefcdab8967452301), + 0x02: uint64(0xdcfeefcdab896745), + 0x04: uint64(0x98badcfeefcdab89), + 0x05: uint64(0x7698badcfeefcdab), + 0x06: uint64(0x547698badcfeefcd), + 0x08: uint64(0x1032547698badcfe), + }, + timestamp: fakeClock.Now(), + timestampDelta: fakeClock.Now().Sub(time.Time{}), + }, + 10: &msrWithStorage{ + msrReg: &msr{ + cpuID: 10, + path: "testdata/cpu-msr/10/msr", + }, + offsets: []uint32{0x00, 0x02, 0x04, 0x05, 0x06, 0x08}, + offsetValues: map[uint32]uint64{ + 0x00: uint64(0xefcdab8967452301), + 0x02: uint64(0xdcfeefcdab896745), + 0x04: uint64(0x98badcfeefcdab89), + 0x05: uint64(0x7698badcfeefcdab), + 0x06: uint64(0x547698badcfeefcd), + 0x08: uint64(0x1032547698badcfe), + }, + offsetDeltas: map[uint32]uint64{ + 0x00: uint64(0xefcdab8967452301), + 0x02: uint64(0xdcfeefcdab896745), + 0x04: uint64(0x98badcfeefcdab89), + 0x05: uint64(0x7698badcfeefcdab), + 0x06: uint64(0x547698badcfeefcd), + 0x08: uint64(0x1032547698badcfe), + }, + timestamp: fakeClock.Now(), + timestampDelta: fakeClock.Now().Sub(time.Time{}), + }, + 100: &msrWithStorage{ + msrReg: &msr{ + cpuID: 100, + path: "testdata/cpu-msr/100/msr", + }, + offsets: []uint32{0x00, 0x02, 0x04, 0x05, 0x06, 0x08}, + offsetValues: map[uint32]uint64{ + 0x00: uint64(0xefcdab8967452301), + 0x02: uint64(0xdcfeefcdab896745), + 0x04: uint64(0x98badcfeefcdab89), + 0x05: uint64(0x7698badcfeefcdab), + 0x06: uint64(0x547698badcfeefcd), + 0x08: uint64(0x1032547698badcfe), + }, + offsetDeltas: map[uint32]uint64{ + 0x00: uint64(0xefcdab8967452301), + 0x02: uint64(0xdcfeefcdab896745), + 0x04: uint64(0x98badcfeefcdab89), + 0x05: uint64(0x7698badcfeefcdab), + 0x06: uint64(0x547698badcfeefcd), + 0x08: uint64(0x1032547698badcfe), + }, + timestamp: fakeClock.Now(), + timestampDelta: fakeClock.Now().Sub(time.Time{}), + }, + }, + err: nil, + }, + { + name: "MsrPathValidSpecified_0_10", + msrPath: "testdata/cpu-msr", + offsets: []uint32{0x00, 0x02}, + cpuIDs: []int{0, 10}, + msrMap: map[int]msrRegWithStorage{ + 0: &msrWithStorage{ + msrReg: &msr{ + cpuID: 0, + path: "testdata/cpu-msr/0/msr", + }, + offsets: []uint32{0x00, 0x02}, + offsetValues: map[uint32]uint64{ + 0x00: uint64(0xefcdab8967452301), + 0x02: uint64(0xdcfeefcdab896745), + }, + offsetDeltas: map[uint32]uint64{ + 0x00: uint64(0xefcdab8967452301), + 0x02: uint64(0xdcfeefcdab896745), + }, + timestamp: fakeClock.Now(), + timestampDelta: fakeClock.Now().Sub(time.Time{}), + }, + 10: &msrWithStorage{ + msrReg: &msr{ + cpuID: 10, + path: "testdata/cpu-msr/10/msr", + }, + offsets: []uint32{0x00, 0x02}, + offsetValues: map[uint32]uint64{ + 0x00: uint64(0xefcdab8967452301), + 0x02: uint64(0xdcfeefcdab896745), + }, + offsetDeltas: map[uint32]uint64{ + 0x00: uint64(0xefcdab8967452301), + 0x02: uint64(0xdcfeefcdab896745), + }, + timestamp: fakeClock.Now(), + timestampDelta: fakeClock.Now().Sub(time.Time{}), + }, + }, + err: nil, + }, + } + + for _, tc := range testCases { + s.Run(tc.name, func() { + m := &msrDataWithStorage{ + msrPath: tc.msrPath, + msrOffsets: tc.offsets, + } + + err := m.initMsrMap(tc.cpuIDs, 0) + s.Require().Equal(tc.msrMap, m.msrMap) + if tc.err != nil { + s.Require().ErrorContains(err, tc.err.Error()) + } else { + s.Require().NoError(err) + } + }) + } +} + +func TestMsrDataWithStorageRead(t *testing.T) { + testCases := []struct { + name string + msrPath string + cpuID int + offset uint32 + expected uint64 + err error + }{ + { + name: "InvalidCPUID", + msrPath: "testdata/cpu-msr", + cpuID: 2, + expected: 0, + err: errors.New("could not find MSR register for CPU ID: 2"), + }, + { + name: "ZeroBytesOffset", + msrPath: "testdata/cpu-msr", + cpuID: 0, + offset: 0x0, + expected: uint64(0xefcdab8967452301), + err: nil, + }, + { + name: "4BytesOffset", + msrPath: "testdata/cpu-msr", + cpuID: 0, + offset: 0x4, + expected: uint64(0x98badcfeefcdab89), + err: nil, + }, + { + name: "8BytesOffset", + msrPath: "testdata/cpu-msr", + cpuID: 0, + offset: 0x8, + expected: uint64(0x1032547698badcfe), + err: nil, + }, + { + name: "OutOfBoundsOffset", + msrPath: "testdata/cpu-msr", + cpuID: 0, + offset: 0xb, + expected: uint64(0), + err: errors.New("offset 0xb is out-of-bounds"), + }, + } + + mReg, err := newMsr("testdata/cpu-msr/0", 0) + require.NoError(t, err) + m := &msrDataWithStorage{ + msrMap: map[int]msrRegWithStorage{ + 0: &msrWithStorage{ + msrReg: mReg, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + out, errRead := m.read(tc.offset, tc.cpuID) + require.Equal(t, tc.expected, out) + if tc.err != nil { + require.ErrorContains(t, errRead, tc.err.Error()) + } else { + require.NoError(t, errRead) + } + }) + } +} + +func TestMsrDataWithStorageIsMsrLoaded(t *testing.T) { + testCases := []struct { + desc string + filePath string + expected bool + err error + }{ + { + desc: "EmptyFilename", + filePath: "", + expected: false, + err: errors.New("file path is empty"), + }, + { + desc: "FileNotExist", + filePath: "dummy_proc_modules_file", + expected: false, + err: errors.New("file \"dummy_proc_modules_file\" does not exist"), + }, + { + desc: "PathIsADir", + filePath: "testdata", + expected: false, + err: errors.New("could not read file \"testdata\": read testdata: is a directory"), + }, + { + desc: "Symlink", + filePath: "testdata/symlink", + expected: false, + err: errors.New("file \"testdata/symlink\" is a symlink"), + }, + { + desc: "NotLoaded", + filePath: "testdata/proc_modules_msr_not_loaded", + expected: false, + err: nil, + }, + { + desc: "Loaded", + filePath: "testdata/proc_modules_msr_loaded", + expected: true, + err: nil, + }, + } + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + m := &msrDataWithStorage{} + out, err := m.isMsrLoaded(tc.filePath) + require.Equal(t, tc.expected, out) + if tc.err != nil { + require.EqualError(t, err, tc.err.Error()) + } else { + require.NoError(t, err) + } + }) + } +} + +func TestMsrDataWithStorageUpdate(t *testing.T) { + testCases := []struct { + name string + msrPath string + cpuID int + offsets []uint32 + expectedMsrInfo map[uint32]uint64 + expectedMsrInfoDelta map[uint32]uint64 + err error + }{ + { + name: "InvalidCPUID", + msrPath: "testdata/cpu-msr", + cpuID: 2, + err: errors.New("could not find MSR register for CPU ID: 2"), + }, + { + name: "Valid", + msrPath: "testdata/cpu-msr", + cpuID: 0, + offsets: []uint32{0x00, 0x02, 0x04, 0x05, 0x06, 0x08}, + expectedMsrInfo: map[uint32]uint64{ + 0x00: uint64(0xefcdab8967452301), + 0x02: uint64(0xdcfeefcdab896745), + 0x04: uint64(0x98badcfeefcdab89), + 0x05: uint64(0x7698badcfeefcdab), + 0x06: uint64(0x547698badcfeefcd), + 0x08: uint64(0x1032547698badcfe), + }, + expectedMsrInfoDelta: map[uint32]uint64{ + 0x00: uint64(0xefcdab8967452301), + 0x02: uint64(0xdcfeefcdab896745), + 0x04: uint64(0x98badcfeefcdab89), + 0x05: uint64(0x7698badcfeefcdab), + 0x06: uint64(0x547698badcfeefcd), + 0x08: uint64(0x1032547698badcfe), + }, + err: nil, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + m := &msrDataWithStorage{ + msrMap: map[int]msrRegWithStorage{ + 0: &msrWithStorage{ + msrReg: &msr{ + path: "testdata/cpu-msr/0/msr", + cpuID: 0, + }, + offsets: tc.offsets, + offsetValues: map[uint32]uint64{}, + offsetDeltas: map[uint32]uint64{}, + }, + }, + } + + err := m.update(tc.cpuID) + if tc.err != nil { + require.ErrorContains(t, err, tc.err.Error()) + } else { + require.NoError(t, err) + require.Equal(t, tc.expectedMsrInfo, m.msrMap[tc.cpuID].getOffsetValues()) + require.Equal(t, tc.expectedMsrInfoDelta, m.msrMap[tc.cpuID].getOffsetDeltas()) + } + }) + } +} + +func TestMsrDataWithStorageGetOffsetDeltas(t *testing.T) { + m := &msrDataWithStorage{ + msrMap: map[int]msrRegWithStorage{ + 0: &msrWithStorage{ + msrReg: &msr{ + path: "testdata/cpu-msr/0/msr", + cpuID: 0, + }, + offsets: []uint32{0x00, 0x02, 0x04}, + offsetValues: map[uint32]uint64{}, + offsetDeltas: map[uint32]uint64{}, + }, + }, + } + + t.Run("InvalidCPUID", func(t *testing.T) { + cpuID := 2 + deltas, err := m.getOffsetDeltas(cpuID) + require.Nil(t, deltas) + require.ErrorContains(t, err, fmt.Sprintf("could not find MSR register for CPU ID: %v", cpuID)) + }) + + t.Run("WithoutUpdate", func(t *testing.T) { + cpuID := 0 + deltasExp := map[uint32]uint64{} + deltasOut, err := m.getOffsetDeltas(cpuID) + require.NoError(t, err) + require.Equal(t, deltasExp, deltasOut) + }) + + t.Run("WithUpdate", func(t *testing.T) { + cpuID := 0 + + require.NoError(t, m.update(cpuID)) + + deltasExp := map[uint32]uint64{ + 0x00: uint64(0xefcdab8967452301), + 0x02: uint64(0xdcfeefcdab896745), + 0x04: uint64(0x98badcfeefcdab89), + } + deltasOut, err := m.getOffsetDeltas(cpuID) + require.NoError(t, err) + require.Equal(t, deltasExp, deltasOut) + + require.NoError(t, m.update(cpuID)) + deltasExp = map[uint32]uint64{ + 0x00: 0, + 0x02: 0, + 0x04: 0, + } + deltasOut, err = m.getOffsetDeltas(cpuID) + require.NoError(t, err) + require.Equal(t, deltasExp, deltasOut) + }) +} + +func (s *msrTimeSensitiveSuite) TestMsrDataWithStorageGetTimestampDelta() { + m := &msrDataWithStorage{ + msrMap: map[int]msrRegWithStorage{ + 0: &msrWithStorage{ + msrReg: &msr{ + path: "testdata/cpu-msr/0/msr", + cpuID: 0, + }, + offsets: []uint32{0x00, 0x02, 0x04}, + offsetValues: map[uint32]uint64{}, + offsetDeltas: map[uint32]uint64{}, + }, + }, + } + + s.Run("InvalidCPUID", func() { + cpuID := 2 + tsDeltaExp := time.Duration(0) + tsDeltaOut, err := m.getTimestampDelta(cpuID) + s.Require().Equal(tsDeltaExp, tsDeltaOut) + s.Require().ErrorContains(err, fmt.Sprintf("could not find MSR register for CPU ID: %v", cpuID)) + }) + + s.Run("WithoutUpdate", func() { + cpuID := 0 + tsDeltaExp := time.Duration(0) + tsDeltaOut, err := m.getTimestampDelta(cpuID) + s.Require().NoError(err) + s.Require().Equal(tsDeltaExp, tsDeltaOut) + }) + + s.Run("WithUpdate", func() { + cpuID := 0 + + s.Require().NoError(m.update(cpuID)) + + d := 10 * time.Second + fakeClock.Add(d) + + s.Require().NoError(m.update(cpuID)) + + tsDeltaOut, err := m.getTimestampDelta(cpuID) + s.Require().NoError(err) + s.Require().Equal(d, tsDeltaOut) + }) +} diff --git a/perf.go b/perf.go new file mode 100644 index 0000000..bb52ab8 --- /dev/null +++ b/perf.go @@ -0,0 +1,558 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +import ( + "bytes" + "errors" + "fmt" + "math/big" + "os" + "strconv" + "strings" + "syscall" + + ev "github.com/intel/iaevents" + "golang.org/x/sync/errgroup" +) + +// File path which monitors the maximum number of file-handles that the Linux +// kernel can allocate. +const fileMaxPath = "/proc/sys/fs/file-max" + +// fileInfoProvider reads contents of files and provides the maximum number of +// file descriptors that a process may allocate. +// TODO: Consider to move rlimit into a new single method interface. +// TODO: Move this interface to a separate file. +type fileInfoProvider interface { + // readFile reads the contents of a file. + readFile(path string) ([]byte, error) + + // rlimit returns the maximum number of file descriptors that a process may allocate. + rlimit() (uint64, error) +} + +// fsHelper implements fileInfoProvider interface. +type fsHelper struct{} + +// readFile reads the contents of the given file path. +func (*fsHelper) readFile(path string) ([]byte, error) { + return os.ReadFile(path) +} + +// rlimit returns the maximum number of file descriptors that a process may allocate. +// It makes a syscall to get RLIMIT_NOFILE property. +func (*fsHelper) rlimit() (uint64, error) { + var rLimit syscall.Rlimit + err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &rLimit) + return rLimit.Cur, err +} + +// getMaxFd is a helper function that takes a fileInfoProvider interface and returns +// the maximum number of file-handles that the Linux kernel can allocate. +func getMaxFd(fp fileInfoProvider) (uint64, error) { + buf, err := fp.readFile(fileMaxPath) + if err != nil { + return 0, fmt.Errorf("could not read file %q: %w", fileMaxPath, err) + } + + maxFd, err := strconv.ParseUint(string(bytes.TrimRight(buf, "\n")), 10, 64) + if err != nil { + return 0, fmt.Errorf("could not parse file content %v to uint64: %w", maxFd, err) + } + + return maxFd, nil +} + +// checkFileDescriptors is a helper function that takes the number of estimated file descriptors +// needed and a fileInfoProvider, and returns nil if the number of estimated file descriptors does +// not exceed the maximum number of file-handles that the Linux kernel can allocate. Otherwise, returns +// an error. +// TODO: Add information about max number of file handles into README.md. +func checkFileDescriptors(fd uint64, reader fileInfoProvider) error { + maxFd, err := getMaxFd(reader) + if err != nil { + return fmt.Errorf("error retrieving kernel max file descriptors: %w", err) + } + + if fd > maxFd { + return fmt.Errorf("required file descriptors %d, exceeds the maximum number of available file descriptors %d", fd, maxFd) + } + + limit, err := reader.rlimit() + if err != nil { + return fmt.Errorf("error retrieving process max file descriptors: %w", err) + } + + if fd > limit { + return fmt.Errorf("required file descriptors %d, exceeds the maximum number of available file descriptors that a process may allocate %d", fd, limit) + } + return nil +} + +// multiply is a helper function that calculates the product of two uint64 values. +// If overflow occurs, it returns an error. +func multiply(a, b uint64) (uint64, error) { + bigA := new(big.Int).SetUint64(a) + bigB := new(big.Int).SetUint64(b) + + res := new(big.Int).Mul(bigA, bigB) + if !res.IsUint64() { + return 0, fmt.Errorf("value could not be represented as uint64: %v", res) + } + return res.Uint64(), nil +} + +// c0StateType is an enum type to identify event names corresponding to C0 substate metrics. +type c0StateType int + +// c0StateType enum defines supported event names for C0 substate metrics. +const ( + c01 c0StateType = iota + c02 + c0Wait + thread +) + +// Helper function to return a string representation of c0StateType. +func (t c0StateType) String() string { + switch t { + case c01: + return "CPU_CLK_UNHALTED.C01" + case c02: + return "CPU_CLK_UNHALTED.C02" + case c0Wait: + return "CPU_CLK_UNHALTED.C0_WAIT" + case thread: + return "CPU_CLK_UNHALTED.THREAD" + default: + return "" + } +} + +// coreMetric represents the values of a core event read at a specific time instant. +type coreMetric struct { + name string + cpuID int + + values ev.CounterValue + scaled uint64 +} + +// eventsResolver resolves event names, from a core event group, to custom events which +// be activated. +type eventsResolver interface { + resolveEvents(events []string) ([]ev.CustomizableEvent, error) +} + +// eventsResolverImpl implements eventsResolver interface. +type eventsResolverImpl struct { + reader ev.Reader + transformer ev.Transformer +} + +// resolveEvents takes a core event group with event names and resolves them into +// custom events that can be activated. +func (r *eventsResolverImpl) resolveEvents(events []string) ([]ev.CustomizableEvent, error) { + if len(events) == 0 { + return nil, errors.New("event group cannot be empty") + } + + customEvents := make([]ev.CustomizableEvent, len(events)) + for i, event := range events { + var err error + customEvents[i], err = r.resolveEvent(event) + if err != nil { + return nil, fmt.Errorf("error resolving event %q: %w", event, err) + } + } + return customEvents, nil +} + +// resolveEvent takes an event name string and returns a custom event that be activated. +func (r *eventsResolverImpl) resolveEvent(name string) (ev.CustomizableEvent, error) { + if r.transformer == nil { + return ev.CustomizableEvent{}, errors.New("transformer is nil") + } + + perfEvents, err := r.transformer.Transform(r.reader, ev.NewNameMatcher(name)) + if err != nil { + return ev.CustomizableEvent{}, err + } + + if len(perfEvents) == 0 { + return ev.CustomizableEvent{}, errors.New("event could not be resolved") + } + + return ev.CustomizableEvent{ + Event: perfEvents[0], + }, nil +} + +// placementMaker takes a slice of cores and an event, which is the leader of +// the event group, and returns core placements needed for activation of each +// event of the group. +type placementMaker interface { + makeCorePlacement(cpuIDs []int, factory ev.PlacementFactory) ([]ev.PlacementProvider, error) +} + +// placementMakerImpl implements placementMaker interface. +type placementMakerImpl struct{} + +// makeCorePlacement takes a slice of cores and makes core placements for the given +// PlacementProvider. +func (*placementMakerImpl) makeCorePlacement(cpuIDs []int, factory ev.PlacementFactory) ([]ev.PlacementProvider, error) { + var cpuPlacements []ev.PlacementProvider + var err error + + switch len(cpuIDs) { + case 0: + return nil, errors.New("no CPU IDs were provided") + case 1: + cpuPlacements, err = ev.NewCorePlacements(factory, cpuIDs[0]) + if err != nil { + return nil, fmt.Errorf("failed to create single core placement: %w", err) + } + default: + cpuPlacements, err = ev.NewCorePlacements(factory, cpuIDs[0], cpuIDs[1:]...) + if err != nil { + return nil, fmt.Errorf("failed to create multiple core placements: %w", err) + } + } + return cpuPlacements, nil +} + +// eventGroupActivator activates custom core events using the given core PlacementProvider. +type eventGroupActivator interface { + activateEventsAsGroup(p ev.PlacementProvider, events []ev.CustomizableEvent) ([]*ev.ActiveEvent, error) +} + +// eventGroupActivatorImpl implements eventGroupActivator interface. +type eventGroupActivatorImpl struct{} + +// activateEventsAsGroup takes a core PlacementProvider and a slice of custom events, and +// returns a slice of events which have been successfully activated. +func (*eventGroupActivatorImpl) activateEventsAsGroup(p ev.PlacementProvider, events []ev.CustomizableEvent) ([]*ev.ActiveEvent, error) { + activeEventGroup, err := ev.ActivateGroup(p, ev.NewEventTargetProcess(-1, 0), events) + return activeEventGroup.Events(), err +} + +// eventsActivator activates a group of core events. +type eventsActivator interface { + activateEvents(customEvents []ev.CustomizableEvent, cores []int) ([]*ev.ActiveEvent, error) +} + +// eventsActivatorImpl implements eventsActivator interface. +type eventsActivatorImpl struct { + placementMaker placementMaker + perfActivator eventGroupActivator +} + +// activateGroup takes a group of core events and activates them. +func (a *eventsActivatorImpl) activateEvents(customEvents []ev.CustomizableEvent, cores []int) ([]*ev.ActiveEvent, error) { + if len(customEvents) == 0 { + return nil, errors.New("no custom events provided") + } + + if len(cores) == 0 { + return nil, errors.New("no cores provided") + } + + leader := customEvents[0] + placements, err := a.placementMaker.makeCorePlacement(cores, leader.Event) + if err != nil { + return nil, fmt.Errorf("failed to make core placements: %w", err) + } + + activeEvents := make([]*ev.ActiveEvent, 0) + for _, placement := range placements { + events, err := a.perfActivator.activateEventsAsGroup(placement, customEvents) + if err != nil { + return activeEvents, fmt.Errorf("failed to activate events as a group: %w", err) + } + activeEvents = append(activeEvents, events...) + } + return activeEvents, nil +} + +// valuesReader reads values of an active core event. +type valuesReader interface { + readValue(event *ev.ActiveEvent) (ev.CounterValue, error) +} + +// valuesReaderImpl implements valuesReader interface. +type valuesReaderImpl struct{} + +// readValue takes an active event and returns its values. +// It is a wrapper of ReadValue method of an ev.ActiveEvent value type. +func (*valuesReaderImpl) readValue(event *ev.ActiveEvent) (ev.CounterValue, error) { + return event.ReadValue() +} + +// eventsReader reads the values of a group of active core events. +type eventsReader interface { + readEvents(events []*ev.ActiveEvent) ([]coreMetric, error) +} + +// eventsReaderImpl implements eventsReader interface. +type eventsReaderImpl struct { + eventReader valuesReader +} + +// readEvents takes a group of active core events and returns a slice of coreMetrics. +// Each coreMetric has read values specific for an event name and core. +// TODO: Rework implementation to accept context propagated from top of the call stack. +func (r *eventsReaderImpl) readEvents(events []*ev.ActiveEvent) ([]coreMetric, error) { + if len(events) == 0 { + return nil, errors.New("no active events provided") + } + + metrics := make([]coreMetric, len(events)) + errGroup := errgroup.Group{} + for i, event := range events { + if event == nil || event.PerfEvent == nil { + return nil, errors.New("invalid active event") + } + + index := i + activeEvent := event + + errGroup.Go(func() error { + values, err := r.eventReader.readValue(activeEvent) + if err != nil { + return fmt.Errorf("failed to read values for event %q: %w", activeEvent, err) + } + + cpu, _ := activeEvent.PMUPlacement() + metrics[index] = coreMetric{ + values: values, + cpuID: cpu, + name: activeEvent.PerfEvent.Name, + } + return nil + }) + } + + if err := errGroup.Wait(); err != nil { + return nil, err + } + return metrics, nil +} + +// eventDeactivator deactivates an active core event. +type eventDeactivator interface { + deactivateEvent(event *ev.ActiveEvent) error +} + +// eventDeactivatorImpl implements eventDeactivator interface. +type eventDeactivatorImpl struct{} + +// deactivateEvent takes an active core event and deactivates it. If the event could not +// be deactivated successfully an error is returned. This method is a wrapper of Deactivate +// method of ev.ActiveEvent value type. +func (*eventDeactivatorImpl) deactivateEvent(event *ev.ActiveEvent) error { + return event.Deactivate() +} + +// eventsDeactivator deactivates a group of active core events. +type eventsDeactivator interface { + deactivateEvents(events []*ev.ActiveEvent) ([]*ev.ActiveEvent, error) +} + +// eventsDeactivatorImpl implements eventsDeactivator interface. +type eventsDeactivatorImpl struct { + perfDeactivator eventDeactivator +} + +// deactivateEvents takes a slice of active core events and deactivates them. +func (d *eventsDeactivatorImpl) deactivateEvents(events []*ev.ActiveEvent) ([]*ev.ActiveEvent, error) { + var err error + failedToDeactivate := make([]string, 0) + activeEvents := make([]*ev.ActiveEvent, 0) + + for _, event := range events { + if event == nil || event.PerfEvent == nil { + continue + } + + if err := d.perfDeactivator.deactivateEvent(event); err != nil { + failedToDeactivate = append(failedToDeactivate, event.PerfEvent.Name) + activeEvents = append(activeEvents, event) + } + } + + if len(failedToDeactivate) != 0 { + err = fmt.Errorf("failed to deactivate events: %q", strings.Join(failedToDeactivate, ", ")) + } + return activeEvents, err +} + +// perfReader activates, reads and deactivates groups of core events accessible via `perf_events` +// kernel interface. +type perfReader interface { + initResolver(jsonFile string) error + + activate(events []string, cores []int) error + + read() ([]coreMetric, error) + + deactivate() error +} + +// perf implements perfReader interface. It keeps track of the current active events. +type perf struct { + resolver eventsResolver + activator eventsActivator + deactivator eventsDeactivator + valuesReader eventsReader + fileInfoReader fileInfoProvider + + activeEvents []*ev.ActiveEvent +} + +// newPerf takes a path string, corresponding to a JSON file which comprises processor model +// specific events. +func newPerf() perfReader { + return &perf{ + activator: &eventsActivatorImpl{ + placementMaker: &placementMakerImpl{}, + perfActivator: &eventGroupActivatorImpl{}, + }, + deactivator: &eventsDeactivatorImpl{&eventDeactivatorImpl{}}, + valuesReader: &eventsReaderImpl{&valuesReaderImpl{}}, + fileInfoReader: &fsHelper{}, + } +} + +func (p *perf) initResolver(jsonFile string) error { + reader := ev.NewFilesReader() + if err := reader.AddFiles(jsonFile); err != nil { + return fmt.Errorf("error adding file to reader: %w", err) + } + + p.resolver = &eventsResolverImpl{ + reader: reader, + transformer: ev.NewPerfTransformer(), + } + return nil +} + +// activate takes a slice of core event names and cores. It resolves the given event +// names into perf events and activates them. If number of file descriptors needed to +// read the events it returns an error. +// TODO: Do not receive events from arguments. +func (p *perf) activate(events []string, cores []int) error { + // resolve + customEvents, err := p.resolver.resolveEvents(events) + if err != nil { + return fmt.Errorf("error resolving event: %w", err) + } + + // calculate file descriptors needed to access all events + numEvents := uint64(len(customEvents)) + numCores := uint64(len(cores)) + fd, err := multiply(numEvents, numCores) + if err != nil { + return err + } + + // check maximum allowed number of file descriptors + err = checkFileDescriptors(fd, p.fileInfoReader) + if err != nil { + return fmt.Errorf("error checking available file descriptors: %w", err) + } + + // activate + p.activeEvents, err = p.activator.activateEvents(customEvents, cores) + if err != nil { + return fmt.Errorf("error during event activation: %w", err) + } + return nil +} + +// deactivate deactivates all active events. If an event or events could not +// be successfully deactivated, an error is returned. +func (p *perf) deactivate() error { + var err error + p.activeEvents, err = p.deactivator.deactivateEvents(p.activeEvents) + return err +} + +// read performs a single read of all active events and returns a slice with the metrics for each one. +// Events need to be activated previously by calling resolve method. +// TODO: Rework implementation to accept context propagated from top of the call stack. +func (p *perf) read() ([]coreMetric, error) { + return p.valuesReader.readEvents(p.activeEvents) +} + +// perfReaderWithStorage decorates perfReader with the ability to store core event read +// values and to retrieve all metrics that belong to a specific CPU ID. +type perfReaderWithStorage interface { + perfReader + + update() error + + getCoreMetrics(cpuID int) []coreMetric +} + +// perfWithStorage implements perfReaderWithStorage interface. The content of metrics field +// are the core event values read from the last call to read method. +type perfWithStorage struct { + // TODO: Evaluate implications of either embedding perf or perfReader + perfReader + + metrics []coreMetric +} + +// update reads values for active core events specified by the receiver. It updates the metrics +// field with the latest values returned by read method and calculates scaled value of a metric. +func (p *perfWithStorage) update() error { + var err error + p.metrics, err = p.read() + if err != nil { + return err + } + + for i := range p.metrics { + p.metrics[i].scaled, err = scaleMetricValues(p.metrics[i].values) + if err != nil { + return err + } + } + return nil +} + +// scaleMetricValues calculates scaled value from metric values. Scaled value is equal to +// raw * enabled / running. If running value is equal to 0, then the raw value will be returned. +func scaleMetricValues(values ev.CounterValue) (uint64, error) { + enabledBig := new(big.Int).SetUint64(values.Enabled) + runningBig := new(big.Int).SetUint64(values.Running) + rawBig := new(big.Int).SetUint64(values.Raw) + + if values.Enabled != values.Running && values.Running != uint64(0) { + product := new(big.Int).Mul(rawBig, enabledBig) + scaled := new(big.Int).Div(product, runningBig) + + if !scaled.IsUint64() { + return 0, fmt.Errorf("scaled value could not be represented as uint64: %v", scaled) + } + return scaled.Uint64(), nil + } + return rawBig.Uint64(), nil +} + +// getCoreMetrics takes a CPU ID as argument and returns all core metrics specific to this core +// stored in metrics field. +func (p *perfWithStorage) getCoreMetrics(cpuID int) []coreMetric { + metrics := make([]coreMetric, 0) + for _, metric := range p.metrics { + if metric.cpuID == cpuID { + metrics = append(metrics, metric) + } + } + + return metrics +} diff --git a/perf_test.go b/perf_test.go new file mode 100644 index 0000000..78eebaf --- /dev/null +++ b/perf_test.go @@ -0,0 +1,1180 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +import ( + "errors" + "fmt" + "math" + "testing" + "time" + + ev "github.com/intel/iaevents" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +func TestPerf_C0StateType_String(t *testing.T) { + t.Run("C01", func(t *testing.T) { + c0State := c0StateType(0) + require.Equal(t, "CPU_CLK_UNHALTED.C01", c0State.String()) + }) + + t.Run("C02", func(t *testing.T) { + c0State := c0StateType(1) + require.Equal(t, "CPU_CLK_UNHALTED.C02", c0State.String()) + }) + + t.Run("C0_Wait", func(t *testing.T) { + c0State := c0StateType(2) + require.Equal(t, "CPU_CLK_UNHALTED.C0_WAIT", c0State.String()) + }) + + t.Run("Thread", func(t *testing.T) { + c0State := c0StateType(3) + require.Equal(t, "CPU_CLK_UNHALTED.THREAD", c0State.String()) + }) + + t.Run("Invalid", func(t *testing.T) { + c0State := c0StateType(4) + require.Equal(t, "", c0State.String()) + }) +} + +type mockTransformer struct { + mock.Mock +} + +func (m *mockTransformer) Transform(reader ev.Reader, matcher ev.Matcher) ([]*ev.PerfEvent, error) { + args := m.Called(reader, matcher) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).([]*ev.PerfEvent), args.Error(1) +} + +func TestPerf_EventsResolver_ResolveEvent(t *testing.T) { + mError := "error mock" + mEventName := "Event.Mock" + mTransformer := &mockTransformer{} + mResolver := &eventsResolverImpl{ + transformer: mTransformer, + } + + t.Run("TransformerIsNil", func(t *testing.T) { + mResolver := &eventsResolverImpl{} + + _, err := mResolver.resolveEvent(mEventName) + require.Error(t, err) + require.ErrorContains(t, err, "transformer is nil") + }) + + t.Run("TransformReturnsError", func(t *testing.T) { + mTransformer.On("Transform", nil, ev.NewNameMatcher(mEventName)).Return(nil, errors.New(mError)).Once() + + _, err := mResolver.resolveEvent(mEventName) + require.Error(t, err) + require.ErrorContains(t, err, mError) + mTransformer.AssertExpectations(t) + }) + + t.Run("NoTransformedEvents", func(t *testing.T) { + mCustom := ev.CustomizableEvent{} + mTransformer.On("Transform", nil, ev.NewNameMatcher(mEventName)).Return([]*ev.PerfEvent{}, nil).Once() + + res, err := mResolver.resolveEvent(mEventName) + require.Error(t, err) + require.ErrorContains(t, err, "event could not be resolved") + require.Equal(t, mCustom, res) + mTransformer.AssertExpectations(t) + }) + + t.Run("EventSuccessfullyTransformed", func(t *testing.T) { + mPerfEvent := &ev.PerfEvent{ + Name: mEventName, + } + mCustomEvent := ev.CustomizableEvent{ + Event: mPerfEvent, + } + + mTransformer.On("Transform", nil, ev.NewNameMatcher(mEventName)).Return([]*ev.PerfEvent{mPerfEvent}, nil).Once() + res, err := mResolver.resolveEvent(mEventName) + + require.NoError(t, err) + require.Equal(t, res, mCustomEvent) + }) +} + +func TestPerf_EventsResolver_ResolveEvents(t *testing.T) { + t.Run("NoEvents", func(t *testing.T) { + mTransformer := &mockTransformer{} + var mResolver eventsResolver = &eventsResolverImpl{ + transformer: mTransformer, + } + + customEvents, err := mResolver.resolveEvents(nil) + require.Nil(t, customEvents) + require.ErrorContains(t, err, "event group cannot be empty") + mTransformer.AssertExpectations(t) + }) + + t.Run("FailedToResolveEvent", func(t *testing.T) { + events := []string{ + "Event.Mock.1", + "Event.Mock.2", + "Event.Mock.3", + } + + matcher1 := ev.NewNameMatcher(events[0]) + matcher2 := ev.NewNameMatcher(events[1]) + + perfEvent := ev.PerfEvent{Name: events[0]} + + mTransformer := &mockTransformer{} + mTransformer.On("Transform", nil, matcher1).Return([]*ev.PerfEvent{&perfEvent}, nil).Once() + mTransformer.On("Transform", nil, matcher2).Return(nil, errors.New("mock error")).Once() + + var mResolver eventsResolver = &eventsResolverImpl{ + transformer: mTransformer, + } + + customEvents, err := mResolver.resolveEvents(events) + require.Nil(t, customEvents) + require.ErrorContains(t, err, fmt.Sprintf("error resolving event %q", events[1])) + mTransformer.AssertExpectations(t) + }) + + t.Run("EventsResolved", func(t *testing.T) { + events := []string{ + "Event.Mock.1", + "Event.Mock.2", + "Event.Mock.3", + } + + mTransformer := &mockTransformer{} + customEventsExp := []ev.CustomizableEvent{} + for _, event := range events { + matcher := ev.NewNameMatcher(event) + perfEvent := &ev.PerfEvent{Name: event} + mTransformer.On("Transform", nil, matcher).Return([]*ev.PerfEvent{perfEvent}, nil).Once() + + customEventsExp = append(customEventsExp, ev.CustomizableEvent{Event: perfEvent}) + } + + var mResolver eventsResolver = &eventsResolverImpl{ + transformer: mTransformer, + } + + customEvents, err := mResolver.resolveEvents(events) + require.Equal(t, customEventsExp, customEvents) + require.NoError(t, err) + mTransformer.AssertExpectations(t) + }) +} + +type mockValuesReader struct { + mock.Mock +} + +func (m *mockValuesReader) readValue(event *ev.ActiveEvent) (ev.CounterValue, error) { + args := m.Called(event) + return args.Get(0).(ev.CounterValue), args.Error(1) +} + +type eventWithValues struct { + event *ev.ActiveEvent + values ev.CounterValue +} + +func TestPerf_EventsReader_ReadEvents(t *testing.T) { + t.Run("NoActiveEvents", func(t *testing.T) { + mReader := &mockValuesReader{} + var mEventsReader eventsReader = &eventsReaderImpl{mReader} + + metrics, err := mEventsReader.readEvents(nil) + require.Nil(t, metrics) + require.ErrorContains(t, err, "no active events provided") + mReader.AssertExpectations(t) + }) + + t.Run("InvalidActiveEvent", func(t *testing.T) { + var mEventsReader eventsReader = &eventsReaderImpl{} + activeEvents := []*ev.ActiveEvent{nil} + + metrics, err := mEventsReader.readEvents(activeEvents) + require.Nil(t, metrics) + require.ErrorContains(t, err, "invalid active event") + + activeEvents = []*ev.ActiveEvent{{PerfEvent: nil}} + + metrics, err = mEventsReader.readEvents(activeEvents) + require.Nil(t, metrics) + require.ErrorContains(t, err, "invalid active event") + }) + + t.Run("FailedToReadValue", func(t *testing.T) { + mReader := &mockValuesReader{} + var mEventsReader eventsReader = &eventsReaderImpl{mReader} + + mErr := errors.New("mock error") + + events := []string{ + "Event.Mock.1", + "Event.Mock.2", + "Event.Mock.3", + } + + activeEvents := []*ev.ActiveEvent{} + for _, event := range events { + activeEvent := &ev.ActiveEvent{ + PerfEvent: &ev.PerfEvent{ + Name: event, + }, + } + activeEvents = append(activeEvents, activeEvent) + } + + mReader.On("readValue", activeEvents[0]).Return(ev.CounterValue{}, nil).Once() + mReader.On("readValue", activeEvents[1]).Return(ev.CounterValue{}, mErr).Once() + mReader.On("readValue", activeEvents[2]).Return(ev.CounterValue{}, nil).Once() + + metrics, err := mEventsReader.readEvents(activeEvents) + require.Nil(t, metrics) + require.ErrorContains(t, err, fmt.Sprintf("failed to read values for event %q", activeEvents[1])) + mReader.AssertExpectations(t) + }) + + t.Run("EventValuesRead", func(t *testing.T) { + mReader := &mockValuesReader{} + var mEventsReader eventsReader = &eventsReaderImpl{mReader} + + metricsExp := []coreMetric{} + mEvents := []eventWithValues{ + { + event: &ev.ActiveEvent{PerfEvent: &ev.PerfEvent{Name: "Event.1"}}, + values: ev.CounterValue{ + Raw: 123456789, + Enabled: 1289175421, + Running: 2374652324, + }, + }, + { + event: &ev.ActiveEvent{PerfEvent: &ev.PerfEvent{Name: "Event.2"}}, + values: ev.CounterValue{ + Raw: 987654321, + Enabled: 4217641289, + Running: 4901621382, + }, + }, + } + + activeEvents := []*ev.ActiveEvent{} + for _, activeEv := range mEvents { + activeEvents = append(activeEvents, activeEv.event) + + cpu, _ := activeEv.event.PMUPlacement() + metric := coreMetric{ + name: activeEv.event.PerfEvent.Name, + cpuID: cpu, + + values: activeEv.values, + } + metricsExp = append(metricsExp, metric) + + mReader.On("readValue", activeEv.event).Return(activeEv.values, nil).Once() + } + + metricsOut, err := mEventsReader.readEvents(activeEvents) + require.NoError(t, err) + require.Equal(t, metricsExp, metricsOut) + mReader.AssertExpectations(t) + }) +} + +type mockPlacementMaker struct { + mock.Mock +} + +func (m *mockPlacementMaker) makeCorePlacement(cpuIDs []int, factory ev.PlacementFactory) ([]ev.PlacementProvider, error) { + args := m.Called(cpuIDs, factory) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).([]ev.PlacementProvider), args.Error(1) +} + +type mockEventsActivator struct { + mock.Mock +} + +func (m *mockEventsActivator) activateEventsAsGroup(p ev.PlacementProvider, events []ev.CustomizableEvent) ([]*ev.ActiveEvent, error) { + args := m.Called(p, events) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).([]*ev.ActiveEvent), args.Error(1) +} + +func TestPerf_EventsActivator_ActivateEvents(t *testing.T) { + var mEventsActivator eventsActivator + + mPlacementMaker := &mockPlacementMaker{} + mActivator := &mockEventsActivator{} + mEventsActivator = &eventsActivatorImpl{ + placementMaker: mPlacementMaker, + perfActivator: mActivator, + } + mError := errors.New("mock error") + + leader := &ev.PerfEvent{Name: "Event.Mock.1"} + event := &ev.PerfEvent{Name: "Event.Mock.2"} + + customEvents := []ev.CustomizableEvent{ + {Event: leader}, + {Event: event}, + } + placements := []ev.PlacementProvider{&ev.Placement{}, &ev.Placement{}} + + cores := []int{0, 1, 2, 3} + + activeEvents := []*ev.ActiveEvent{} + for _, customEvent := range customEvents { + activeEvent := &ev.ActiveEvent{PerfEvent: customEvent.Event} + activeEvents = append(activeEvents, activeEvent) + } + + t.Run("NoCustomEvents", func(t *testing.T) { + activeEvents, err := mEventsActivator.activateEvents(nil, cores) + require.Nil(t, activeEvents) + require.ErrorContains(t, err, "no custom events provided") + }) + + t.Run("NoCores", func(t *testing.T) { + activeEvents, err := mEventsActivator.activateEvents(customEvents, nil) + require.Nil(t, activeEvents) + require.ErrorContains(t, err, "no cores provided") + }) + + t.Run("FailedToMakePlacement", func(t *testing.T) { + mPlacementMaker.On("makeCorePlacement", cores, leader).Return(nil, mError).Once() + + activeEvents, err := mEventsActivator.activateEvents(customEvents, cores) + require.Nil(t, activeEvents) + require.ErrorContains(t, err, fmt.Sprintf("failed to make core placements: %s", mError.Error())) + mPlacementMaker.AssertExpectations(t) + }) + + t.Run("FailedToActivateEvents", func(t *testing.T) { + activeEventsExp := []*ev.ActiveEvent{ + {PerfEvent: customEvents[0].Event}, + {PerfEvent: customEvents[1].Event}, + } + mPlacementMaker.On("makeCorePlacement", cores, leader).Return(placements, nil).Once() + mActivator.On("activateEventsAsGroup", placements[0], customEvents).Return(activeEventsExp, nil).Once() + mActivator.On("activateEventsAsGroup", placements[1], customEvents).Return(nil, mError).Once() + + activeEventsOut, err := mEventsActivator.activateEvents(customEvents, cores) + require.Equal(t, activeEventsExp, activeEventsOut) + require.ErrorContains(t, err, "failed to activate events as a group") + mPlacementMaker.AssertExpectations(t) + mActivator.AssertExpectations(t) + }) + + t.Run("EventsActivated", func(t *testing.T) { + activeEventsExp := []*ev.ActiveEvent{} + + mPlacementMaker.On("makeCorePlacement", cores, leader).Return(placements, nil).Once() + for _, placement := range placements { + mActivator.On("activateEventsAsGroup", placement, customEvents).Return(activeEvents, nil).Once() + activeEventsExp = append(activeEventsExp, activeEvents...) + } + + activeEventsOut, err := mEventsActivator.activateEvents(customEvents, cores) + require.Equal(t, activeEventsExp, activeEventsOut) + require.NoError(t, err) + mPlacementMaker.AssertExpectations(t) + mActivator.AssertExpectations(t) + }) +} + +type mockPlacementFactory struct { + err error +} + +func (m *mockPlacementFactory) NewPlacements(_ string, cpu int, cpus ...int) ([]ev.PlacementProvider, error) { + if m.err != nil { + return nil, m.err + } + + placements := make([]ev.PlacementProvider, 0) + placements = append(placements, &ev.Placement{ + CPU: cpu, + PMUType: 4, + }) + + for _, cpu := range cpus { + placements = append(placements, &ev.Placement{ + CPU: cpu, + PMUType: 4, + }) + } + return placements, nil +} + +func TestPerf_PlacementMaker_MakeCorePlacement(t *testing.T) { + mockError := errors.New("mock error") + + testCases := []struct { + name string + cpuIDs []int + perfEvent ev.PlacementFactory + expected []ev.PlacementProvider + err error + }{ + { + name: "NoCPUIDsProvided", + cpuIDs: nil, + perfEvent: &ev.PerfEvent{}, + expected: nil, + err: errors.New("no CPU IDs were provided"), + }, + { + name: "SingleCorePlacementFailed", + cpuIDs: []int{0}, + perfEvent: &mockPlacementFactory{mockError}, + expected: nil, + err: errors.New("failed to create single core placement"), + }, + { + name: "MultipleCorePlacementFailed", + cpuIDs: []int{0, 1}, + perfEvent: &mockPlacementFactory{mockError}, + expected: nil, + err: errors.New("failed to create multiple core placements"), + }, + { + name: "SingleCorePlacement", + cpuIDs: []int{0}, + perfEvent: &mockPlacementFactory{nil}, + expected: []ev.PlacementProvider{ + &ev.Placement{CPU: 0, PMUType: 4}, + }, + err: nil, + }, + { + name: "MultipleCorePlacements", + cpuIDs: []int{0, 1, 2, 3}, + perfEvent: &mockPlacementFactory{nil}, + expected: []ev.PlacementProvider{ + &ev.Placement{CPU: 0, PMUType: 4}, + &ev.Placement{CPU: 1, PMUType: 4}, + &ev.Placement{CPU: 2, PMUType: 4}, + &ev.Placement{CPU: 3, PMUType: 4}, + }, + err: nil, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + maker := &placementMakerImpl{} + providers, err := maker.makeCorePlacement(tc.cpuIDs, tc.perfEvent) + + require.Equal(t, tc.expected, providers) + if tc.err != nil { + require.ErrorContains(t, err, tc.err.Error()) + } else { + require.NoError(t, err) + } + }) + } +} + +type mockEventDeactivator struct { + mock.Mock +} + +func (m *mockEventDeactivator) deactivateEvent(event *ev.ActiveEvent) error { + args := m.Called(event) + return args.Error(0) +} + +func TestPerf_EventsDeactivator_DeactivateEvents(t *testing.T) { + events := []*ev.ActiveEvent{ + {PerfEvent: &ev.PerfEvent{Name: "Event.Mock.1", Uncore: false}}, + {PerfEvent: &ev.PerfEvent{Name: "Event.Mock.2", Uncore: false}}, + {PerfEvent: &ev.PerfEvent{Name: "Event.Mock.3", Uncore: false}}, + {PerfEvent: nil}, + nil, + } + + t.Run("FailToDeactivate", func(t *testing.T) { + mEventDeactivator := &mockEventDeactivator{} + mEventDeactivator.On("deactivateEvent", events[0]).Return(nil).Once() + mEventDeactivator.On("deactivateEvent", events[1]).Return(errors.New("mock error")).Once() + mEventDeactivator.On("deactivateEvent", events[2]).Return(nil).Once() + + activeEventsExp := []*ev.ActiveEvent{events[1]} + + var mEventsDeactivator eventsDeactivator = &eventsDeactivatorImpl{ + perfDeactivator: mEventDeactivator, + } + + activeEventsOut, err := mEventsDeactivator.deactivateEvents(events) + require.Equal(t, activeEventsExp, activeEventsOut) + require.ErrorContains(t, err, "failed to deactivate events") + mEventDeactivator.AssertExpectations(t) + }) + + t.Run("EventsDeactivated", func(t *testing.T) { + mEventDeactivator := &mockEventDeactivator{} + mEventDeactivator.On("deactivateEvent", events[0]).Return(nil).Once() + mEventDeactivator.On("deactivateEvent", events[1]).Return(nil).Once() + mEventDeactivator.On("deactivateEvent", events[2]).Return(nil).Once() + + activeEventsExp := []*ev.ActiveEvent{} + + var mEventsDeactivator eventsDeactivator = &eventsDeactivatorImpl{ + perfDeactivator: mEventDeactivator, + } + + activeEventsOut, err := mEventsDeactivator.deactivateEvents(events) + require.Empty(t, activeEventsOut) + require.Equal(t, activeEventsExp, activeEventsOut) + require.NoError(t, err) + mEventDeactivator.AssertExpectations(t) + }) +} + +type mockFileInfoProvider struct { + mock.Mock +} + +func (m *mockFileInfoProvider) readFile(name string) ([]byte, error) { + args := m.Called(name) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).([]byte), args.Error(1) +} + +func (m *mockFileInfoProvider) rlimit() (uint64, error) { + args := m.Called() + return args.Get(0).(uint64), args.Error(1) +} + +func TestPerf_Helper_Multiply(t *testing.T) { + t.Run("Overflow", func(t *testing.T) { + maxUint64 := uint64(math.MaxUint64) + b := uint64(10000) + + res, err := multiply(maxUint64, b) + require.Equal(t, uint64(0), res) + require.ErrorContains(t, err, "could not be represented as uint64") + }) + + t.Run("Valid", func(t *testing.T) { + a := uint64(math.MaxUint64 >> 2) + b := uint64(2) + + _, err := multiply(a, b) + require.NoError(t, err) + }) +} + +func TestPerf_Helper_GetMaxFd(t *testing.T) { + t.Run("ReadFileMaxError", func(t *testing.T) { + mError := errors.New("mock error") + + mFileInfoProvider := &mockFileInfoProvider{} + mFileInfoProvider.On("readFile", fileMaxPath).Return([]byte{}, mError).Once() + + _, err := getMaxFd(mFileInfoProvider) + require.ErrorContains(t, err, mError.Error()) + mFileInfoProvider.AssertExpectations(t) + }) + + t.Run("FileContentError", func(t *testing.T) { + fileContent := []byte("invalid") + + mFileInfoProvider := &mockFileInfoProvider{} + mFileInfoProvider.On("readFile", fileMaxPath).Return(fileContent, nil).Once() + + fd, err := getMaxFd(mFileInfoProvider) + require.Equal(t, uint64(0), fd) + require.ErrorContains(t, err, "could not parse file content") + mFileInfoProvider.AssertExpectations(t) + }) + + t.Run("Valid", func(t *testing.T) { + fdExp := uint64(25) + fileContent := []byte(fmt.Sprintf("%d\n", fdExp)) + + mFileInfoProvider := &mockFileInfoProvider{} + mFileInfoProvider.On("readFile", fileMaxPath).Return(fileContent, nil).Once() + + fd, err := getMaxFd(mFileInfoProvider) + require.Equal(t, fdExp, fd) + require.NoError(t, err) + mFileInfoProvider.AssertExpectations(t) + }) +} + +func TestPerf_Helper_CheckFileDescriptor(t *testing.T) { + t.Run("ReadHardLimitError", func(t *testing.T) { + fd := uint64(25) + fdMax := uint64(0) + fileContent := []byte(fmt.Sprintf("%d\n", fdMax)) + + mFileInfoProvider := &mockFileInfoProvider{} + mFileInfoProvider.On("readFile", fileMaxPath).Return(fileContent, errors.New("mock error")).Once() + + err := checkFileDescriptors(fd, mFileInfoProvider) + require.ErrorContains(t, err, "error retrieving kernel max file descriptors") + mFileInfoProvider.AssertExpectations(t) + }) + + t.Run("HardLimitExceeded", func(t *testing.T) { + fd := uint64(100) + fdMax := uint64(25) + fileContent := []byte(fmt.Sprintf("%d\n", fdMax)) + + mFileInfoProvider := &mockFileInfoProvider{} + mFileInfoProvider.On("readFile", fileMaxPath).Return(fileContent, nil).Once() + + err := checkFileDescriptors(fd, mFileInfoProvider) + require.ErrorContains(t, err, fmt.Sprintf("required file descriptors %d, exceeds the maximum number of "+ + "available file descriptors %d", fd, fdMax)) + mFileInfoProvider.AssertExpectations(t) + }) + + t.Run("ReadSoftLimitError", func(t *testing.T) { + fd := uint64(100) + fdMax := uint64(125) + fileContent := []byte(fmt.Sprintf("%d\n", fdMax)) + + mError := errors.New("mock error") + rlimit := uint64(0) + + mFileInfoProvider := &mockFileInfoProvider{} + mFileInfoProvider.On("readFile", fileMaxPath).Return(fileContent, nil).Once() + mFileInfoProvider.On("rlimit").Return(rlimit, mError).Once() + + err := checkFileDescriptors(fd, mFileInfoProvider) + require.ErrorContains(t, err, "error retrieving process max file descriptors") + mFileInfoProvider.AssertExpectations(t) + }) + + t.Run("SoftLimitExceeded", func(t *testing.T) { + fd := uint64(25) + fdMax := uint64(100) + fileContent := []byte(fmt.Sprintf("%d\n", fdMax)) + rlimit := uint64(20) + + mFileInfoProvider := &mockFileInfoProvider{} + mFileInfoProvider.On("readFile", fileMaxPath).Return(fileContent, nil).Once() + mFileInfoProvider.On("rlimit").Return(rlimit, nil).Once() + + err := checkFileDescriptors(fd, mFileInfoProvider) + require.Error(t, err, fmt.Sprintf("required file descriptors %d, exceeds the maximum number of"+ + "available file descriptors that a process may allocate %d", fd, rlimit)) + mFileInfoProvider.AssertExpectations(t) + }) + + t.Run("Valid", func(t *testing.T) { + fd := uint64(25) + fdMax := uint64(100) + fileContent := []byte(fmt.Sprintf("%d\n", fdMax)) + rlimit := uint64(50) + + mFileInfoProvider := &mockFileInfoProvider{} + mFileInfoProvider.On("readFile", fileMaxPath).Return(fileContent, nil).Once() + mFileInfoProvider.On("rlimit").Return(rlimit, nil).Once() + + err := checkFileDescriptors(fd, mFileInfoProvider) + require.NoError(t, err) + mFileInfoProvider.AssertExpectations(t) + }) +} + +func TestPerf_Perf_InitResolver(t *testing.T) { + t.Run("WithValidJSONFile", func(t *testing.T) { + perf := newPerf() + require.NotNil(t, perf) + require.NoError(t, perf.initResolver("testdata/sapphirerapids_core.json")) + }) + + t.Run("WithInvalidJSONFile", func(t *testing.T) { + perf := newPerf() + require.NotNil(t, perf) + require.ErrorContains(t, perf.initResolver("dummy.json"), "error adding file to reader") + }) +} + +type mockResolver struct { + mock.Mock +} + +func (m *mockResolver) resolveEvents(events []string) ([]ev.CustomizableEvent, error) { + args := m.Called(events) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).([]ev.CustomizableEvent), args.Error(1) +} + +type mockActivator struct { + mock.Mock +} + +func (m *mockActivator) activateEvents(customEvents []ev.CustomizableEvent, cores []int) ([]*ev.ActiveEvent, error) { + args := m.Called(customEvents, cores) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).([]*ev.ActiveEvent), args.Error(1) +} + +func TestPerf_Perf_Activate(t *testing.T) { + events := []string{ + "Event.Mock.1", + "Event.Mock.2", + } + + cores := []int{0, 1, 2, 3} + + customEvents := []ev.CustomizableEvent{} + activeEvents := []*ev.ActiveEvent{} + for _, event := range events { + perfEvent := &ev.PerfEvent{Name: event, Uncore: false, PMUName: "cpu", PMUTypes: []ev.NamedPMUType{{Name: "cpu", PMUType: 4}}} + customEvents = append(customEvents, ev.CustomizableEvent{ + Event: perfEvent, + }) + + activeEvents = append(activeEvents, &ev.ActiveEvent{ + PerfEvent: perfEvent, + }) + } + + mError := errors.New("mock error") + + t.Run("FailedToResolve", func(t *testing.T) { + mResolver := &mockResolver{} + mResolver.On("resolveEvents", events).Return(nil, mError).Once() + + perf := &perf{ + resolver: mResolver, + } + + require.ErrorContains(t, perf.activate(events, cores), "error resolving event") + mResolver.AssertExpectations(t) + }) + + t.Run("FailedToCheckFileDescriptors", func(t *testing.T) { + mResolver := &mockResolver{} + mResolver.On("resolveEvents", events).Return(customEvents, nil).Once() + + mFileInfoProvider := &mockFileInfoProvider{} + mFileInfoProvider.On("readFile", fileMaxPath).Return(nil, mError).Once() + + perf := &perf{ + resolver: mResolver, + fileInfoReader: mFileInfoProvider, + } + + require.ErrorContains(t, perf.activate(events, cores), "error checking available file descriptors") + mResolver.AssertExpectations(t) + mFileInfoProvider.AssertExpectations(t) + }) + + t.Run("FailedToActivateEvents", func(t *testing.T) { + mResolver := &mockResolver{} + mResolver.On("resolveEvents", events).Return(customEvents, nil).Once() + + fdMax := uint64(10) + fileContent := []byte(fmt.Sprintf("%d\n", fdMax)) + rlimit := uint64(10) + + mFileInfoProvider := &mockFileInfoProvider{} + mFileInfoProvider.On("readFile", fileMaxPath).Return(fileContent, nil).Once() + mFileInfoProvider.On("rlimit").Return(rlimit, nil).Once() + + // Only two events were activated + activeEvents := []*ev.ActiveEvent{ + {PerfEvent: customEvents[0].Event}, + {PerfEvent: customEvents[1].Event}, + } + mActivator := &mockActivator{} + mActivator.On("activateEvents", customEvents, cores).Return(activeEvents, mError) + + perf := &perf{ + resolver: mResolver, + activator: mActivator, + fileInfoReader: mFileInfoProvider, + } + + require.ErrorContains(t, perf.activate(events, cores), "error during event activation") + require.Equal(t, activeEvents, perf.activeEvents) + mResolver.AssertExpectations(t) + mFileInfoProvider.AssertExpectations(t) + mActivator.AssertExpectations(t) + }) + + t.Run("EventsActivated", func(t *testing.T) { + mResolver := &mockResolver{} + mResolver.On("resolveEvents", events).Return(customEvents, nil).Once() + + fdMax := uint64(10) + fileContent := []byte(fmt.Sprintf("%d\n", fdMax)) + rlimit := uint64(10) + + mFileInfoProvider := &mockFileInfoProvider{} + mFileInfoProvider.On("readFile", fileMaxPath).Return(fileContent, nil).Once() + mFileInfoProvider.On("rlimit").Return(rlimit, nil).Once() + + mActivator := &mockActivator{} + mActivator.On("activateEvents", customEvents, cores).Return(activeEvents, nil) + + perf := &perf{ + resolver: mResolver, + activator: mActivator, + fileInfoReader: mFileInfoProvider, + } + + require.NoError(t, perf.activate(events, cores)) + require.Equal(t, activeEvents, perf.activeEvents) + mResolver.AssertExpectations(t) + mFileInfoProvider.AssertExpectations(t) + mActivator.AssertExpectations(t) + }) +} + +type mockEventsReader struct { + mock.Mock +} + +func (m *mockEventsReader) readEvents(events []*ev.ActiveEvent) ([]coreMetric, error) { + args := m.Called(events) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).([]coreMetric), args.Error(1) +} + +func TestPerf_Perf_Read(t *testing.T) { + setFakeClock() + fakeClock.Set(time.Now()) + defer unsetFakeClock() + + mEvents := []eventWithValues{ + { + event: &ev.ActiveEvent{PerfEvent: &ev.PerfEvent{Name: "Event.1"}}, + values: ev.CounterValue{ + Raw: 123456789, + Enabled: 1289175421, + Running: 2374652324, + }, + }, + { + event: &ev.ActiveEvent{PerfEvent: &ev.PerfEvent{Name: "Event.2"}}, + values: ev.CounterValue{ + Raw: 987654321, + Enabled: 4217641289, + Running: 4901621382, + }, + }, + } + + activeEvents := []*ev.ActiveEvent{} + metricsExp := []coreMetric{} + for _, mEvent := range mEvents { + activeEvents = append(activeEvents, mEvent.event) + + cpu, _ := mEvent.event.PMUPlacement() + metric := coreMetric{ + name: mEvent.event.PerfEvent.Name, + cpuID: cpu, + + values: mEvent.values, + } + metricsExp = append(metricsExp, metric) + } + + t.Run("FailedToRead", func(t *testing.T) { + mReader := &mockEventsReader{} + mReader.On("readEvents", activeEvents).Return(nil, errors.New("mock error")) + + perf := &perf{ + valuesReader: mReader, + activeEvents: activeEvents, + } + + metrics, err := perf.read() + require.Nil(t, metrics) + require.ErrorContains(t, err, "mock error") + mReader.AssertExpectations(t) + }) + + t.Run("EventValuesRead", func(t *testing.T) { + mReader := &mockEventsReader{} + mReader.On("readEvents", activeEvents).Return(metricsExp, nil) + + perf := &perf{ + valuesReader: mReader, + activeEvents: activeEvents, + } + + metricsOut, err := perf.read() + require.Equal(t, metricsExp, metricsOut) + require.NoError(t, err) + mReader.AssertExpectations(t) + }) +} + +type mockEventsDeactivator struct { + mock.Mock +} + +func (m *mockEventsDeactivator) deactivateEvents(events []*ev.ActiveEvent) ([]*ev.ActiveEvent, error) { + args := m.Called(events) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).([]*ev.ActiveEvent), args.Error(1) +} + +func TestPerf_Perf_Deactivate(t *testing.T) { + events := []*ev.ActiveEvent{ + {PerfEvent: &ev.PerfEvent{Name: "Event.Mock.1", Uncore: false}}, + {PerfEvent: &ev.PerfEvent{Name: "Event.Mock.2", Uncore: false}}, + {PerfEvent: &ev.PerfEvent{Name: "Event.Mock.3", Uncore: false}}, + {PerfEvent: nil}, + nil, + } + + t.Run("FailedToDeactivate", func(t *testing.T) { + activeEventsExp := []*ev.ActiveEvent{events[1]} + mError := fmt.Errorf("failed to deactivate events: %s", events[1].PerfEvent.Name) + + mDeactivator := &mockEventsDeactivator{} + mDeactivator.On("deactivateEvents", events).Return(activeEventsExp, mError).Once() + + perf := &perf{ + deactivator: mDeactivator, + + activeEvents: events, + } + + err := perf.deactivate() + require.Equal(t, activeEventsExp, perf.activeEvents) + require.ErrorContains(t, err, mError.Error()) + mDeactivator.AssertExpectations(t) + }) + + t.Run("EventsDeactivated", func(t *testing.T) { + activeEventsExp := []*ev.ActiveEvent{} + + mDeactivator := &mockEventsDeactivator{} + mDeactivator.On("deactivateEvents", events).Return(activeEventsExp, nil).Once() + + perf := &perf{ + deactivator: mDeactivator, + + activeEvents: events, + } + + err := perf.deactivate() + require.Equal(t, activeEventsExp, perf.activeEvents) + require.NoError(t, err) + mDeactivator.AssertExpectations(t) + }) +} + +func TestPerf_PerfWithStorage_Update(t *testing.T) { + t.Run("FailedToRead", func(t *testing.T) { + mEventName := "Event.1" + mActiveEvents := []*ev.ActiveEvent{{PerfEvent: &ev.PerfEvent{Name: mEventName}}} + mReader := &mockEventsReader{} + mReader.On("readEvents", mActiveEvents).Return(nil, errors.New("mock error")).Once() + + perfWithStorage := &perfWithStorage{ + perfReader: &perf{ + valuesReader: mReader, + activeEvents: mActiveEvents, + }, + metrics: make([]coreMetric, 0), + } + + err := perfWithStorage.update() + require.ErrorContains(t, err, "mock error") + mReader.AssertExpectations(t) + }) + + t.Run("FailedToScale", func(t *testing.T) { + mEventName := "Event.1" + mActiveEvents := []*ev.ActiveEvent{{PerfEvent: &ev.PerfEvent{Name: mEventName}}} + mMetrics := []coreMetric{ + { + name: mEventName, + cpuID: 0, + values: ev.CounterValue{ + Raw: 500, + Enabled: math.MaxUint64, + Running: 1, + }, + }, + } + + metricsExp := []coreMetric{ + mMetrics[0], + } + metricsExp[0].scaled = 0 + + mReader := &mockEventsReader{} + mReader.On("readEvents", mActiveEvents).Return(mMetrics, nil).Once() + + perfWithStorage := &perfWithStorage{ + perfReader: &perf{ + valuesReader: mReader, + activeEvents: mActiveEvents, + }, + metrics: make([]coreMetric, 0), + } + + err := perfWithStorage.update() + require.Equal(t, metricsExp, perfWithStorage.metrics) + require.ErrorContains(t, err, "scaled value could not be represented as uint64") + mReader.AssertExpectations(t) + }) + + t.Run("UpdatedWithoutScaling", func(t *testing.T) { + mEventName1 := "Event.1" + mEventName2 := "Event.2" + mActiveEvents := []*ev.ActiveEvent{ + {PerfEvent: &ev.PerfEvent{Name: mEventName1}}, + {PerfEvent: &ev.PerfEvent{Name: mEventName2}}, + } + mMetrics := []coreMetric{ + { + name: "Event.1", + cpuID: 0, + values: ev.CounterValue{ + Raw: 881235, + Enabled: 881235, + Running: 881235, + }, + }, + { + name: "Event.2", + cpuID: 0, + values: ev.CounterValue{ + Raw: 123456, + Enabled: 123456, + Running: 0, + }, + }, + } + + metricsExp := []coreMetric{ + mMetrics[0], + mMetrics[1], + } + metricsExp[0].scaled = mMetrics[0].values.Raw + metricsExp[1].scaled = mMetrics[1].values.Raw + + mReader := &mockEventsReader{} + mReader.On("readEvents", mActiveEvents).Return(mMetrics, nil).Once() + + perfWithStorage := &perfWithStorage{ + perfReader: &perf{ + valuesReader: mReader, + activeEvents: mActiveEvents, + }, + metrics: make([]coreMetric, 0), + } + + err := perfWithStorage.update() + require.Equal(t, metricsExp, perfWithStorage.metrics) + require.NoError(t, err) + mReader.AssertExpectations(t) + }) + + t.Run("UpdatedWithScaling", func(t *testing.T) { + mEventName1 := "Event.1" + mEventName2 := "Event.2" + mActiveEvents := []*ev.ActiveEvent{ + {PerfEvent: &ev.PerfEvent{Name: mEventName1}}, + {PerfEvent: &ev.PerfEvent{Name: mEventName2}}, + } + mMetrics := []coreMetric{ + { + name: mEventName1, + cpuID: 0, + values: ev.CounterValue{ + Raw: 123456789, + Enabled: 1289175421, + Running: 2374652324, + }, + }, + { + name: mEventName2, + cpuID: 0, + values: ev.CounterValue{ + Raw: 987654321, + Enabled: 4217641289, + Running: 4901621382, + }, + }, + } + + metricsExp := []coreMetric{ + mMetrics[0], + mMetrics[1], + } + metricsExp[0].scaled = 67023478 + metricsExp[1].scaled = 849835456 + + mReader := &mockEventsReader{} + mReader.On("readEvents", mActiveEvents).Return(mMetrics, nil).Once() + + perfWithStorage := &perfWithStorage{ + perfReader: &perf{ + valuesReader: mReader, + activeEvents: mActiveEvents, + }, + metrics: make([]coreMetric, 0), + } + + err := perfWithStorage.update() + require.Equal(t, metricsExp, perfWithStorage.metrics) + require.NoError(t, err) + mReader.AssertExpectations(t) + }) +} + +func TestPerf_PerfWithStorage_GetCoreMetrics(t *testing.T) { + metrics := []coreMetric{ + {name: "Mock.Event.1", cpuID: 1}, + {name: "Mock.Event.1", cpuID: 2}, + {name: "Mock.Event.2", cpuID: 1}, + {name: "Mock.Event.3", cpuID: 2}, + } + + var perf perfReaderWithStorage = &perfWithStorage{ + metrics: metrics, + } + + t.Run("CoreNotFound", func(t *testing.T) { + metricsOut := perf.getCoreMetrics(0) + require.Equal(t, []coreMetric{}, metricsOut) + }) + + t.Run("CoreMetricsFound", func(t *testing.T) { + metricsExp := []coreMetric{metrics[0], metrics[2]} + metricsOut := perf.getCoreMetrics(1) + require.Equal(t, metricsExp, metricsOut) + }) +} diff --git a/power.go b/power.go new file mode 100644 index 0000000..f9b67c7 --- /dev/null +++ b/power.go @@ -0,0 +1,755 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +import ( + "errors" + "fmt" + "strings" + + "github.com/intel/powertelemetry/internal/cpumodel" +) + +// MSR offset definitions. +const ( + uncorePerfStatus = 0x621 // UNCORE_PERF_STATUS + + fsbFreq = 0xCD // MSR_FSB_FREQ + platformInfo = 0xCE // MSR_PLATFORM_INFO + + temperatureTarget = 0x1A2 // MSR_TEMPERATURE_TARGET + thermalStatus = 0x19C // IA32_THERM_STATUS + + c3Residency = 0x3FC // MSR_CORE_C3_RESIDENCY + c6Residency = 0x3FD // MSR_CORE_C6_RESIDENCY + c7Residency = 0x3FE // MSR_CORE_C7_RESIDENCY + maxFreqClockCount = 0xE7 // IA32_MPERF + actualFreqClockCount = 0xE8 // IA32_APERF + timestampCounter = 0x10 // IA32_TIME_STAMP_COUNTER + + turboRatioLimit = 0x1AD // MSR_TURBO_RATIO_LIMIT + turboRatioLimit1 = 0x1AE // MSR_TURBO_RATIO_LIMIT1 + turboRatioLimit2 = 0x1AF // MSR_TURBO_RATIO_LIMIT2 + secondaryTurboRatioLimit = 0x650 // MSR_SECONDARY_TURBO_RATIO_LIMIT + atomCoreTurboRatios = 0x66C // MSR_ATOM_CORE_TURBO_RATIOS +) + +// GetInitialUncoreFrequencyMin retrieves the minimum initial uncore frequency limit (in MHz) for the specified package and die. +func (pt *PowerTelemetry) GetInitialUncoreFrequencyMin(packageID, dieID int) (float64, error) { + if pt.uncoreFreq == nil { + return 0.0, &ModuleNotInitializedError{Name: "uncore_frequency"} + } + return pt.uncoreFreq.getUncoreFrequencyMhz(packageID, dieID, "initial_min") +} + +// GetCustomizedUncoreFrequencyMin retrieves the minimum customized uncore frequency limit (in MHz) for the specified package and die. +func (pt *PowerTelemetry) GetCustomizedUncoreFrequencyMin(packageID, dieID int) (float64, error) { + if pt.uncoreFreq == nil { + return 0.0, &ModuleNotInitializedError{Name: "uncore_frequency"} + } + return pt.uncoreFreq.getUncoreFrequencyMhz(packageID, dieID, "min") +} + +// GetInitialUncoreFrequencyMax retrieves the maximum initial uncore frequency limit (in MHz) for the specified package and die. +func (pt *PowerTelemetry) GetInitialUncoreFrequencyMax(packageID, dieID int) (float64, error) { + if pt.uncoreFreq == nil { + return 0.0, &ModuleNotInitializedError{Name: "uncore_frequency"} + } + return pt.uncoreFreq.getUncoreFrequencyMhz(packageID, dieID, "initial_max") +} + +// GetCustomizedUncoreFrequencyMax retrieves the maximum customized uncore frequency limit (in MHz) for the specified package and die. +func (pt *PowerTelemetry) GetCustomizedUncoreFrequencyMax(packageID, dieID int) (float64, error) { + if pt.uncoreFreq == nil { + return 0.0, &ModuleNotInitializedError{Name: "uncore_frequency"} + } + return pt.uncoreFreq.getUncoreFrequencyMhz(packageID, dieID, "max") +} + +// GetCurrentUncoreFrequency takes a package ID and returns the current uncore frequency +// value (in MHz). First it tries to retrieve this value from sysfs. In case of error, +// it attempts to get this value from the CPU ID's MSR corresponding to the given package ID. +func (pt *PowerTelemetry) GetCurrentUncoreFrequency(packageID, dieID int) (float64, error) { + // Get current uncore frequency value from sysfs + if pt.uncoreFreq != nil { + currFreq, err := pt.uncoreFreq.getUncoreFrequencyMhz(packageID, dieID, "current") + if err == nil { + return currFreq, nil + } + } + + // Fallback method to get the value via MSR + if pt.msr == nil { + return 0.0, &ModuleNotInitializedError{Name: "msr"} + } + + // Get CPU ID within the package ID + cpuID, err := pt.getCPUIDFromPackageID(packageID) + if err != nil { + return 0.0, err + } + + // 32-bit [31:0] value of UNCORE_PERF_STATUS msr offset. + res, err := pt.msr.read(uncorePerfStatus, cpuID) + if err != nil { + return 0.0, err + } + + // Mask to obtain 6:0 bits corresponding to CURRENT_CLR_RATIO + // in steps of 100 MHz. Multiplying by a factor of 100 results in MHz unit. + return float64(res&0x3F) * 100, nil +} + +// GetCPUBaseFrequency returns the base frequency of the CPU in MHz. It takes +// a package ID as an argument and calculates the frequency using msr value. +// If an error occurs, it returns 0 and the error message. +func (pt *PowerTelemetry) GetCPUBaseFrequency(packageID int) (uint64, error) { + if pt.msr == nil { + return 0, &ModuleNotInitializedError{Name: "msr"} + } + + model := pt.topology.getCPUModel() + if err := CheckIfCPUBaseFrequencySupported(model); err != nil { + return 0, err + } + + cpuID, err := pt.getCPUIDFromPackageID(packageID) + if err != nil { + return 0, fmt.Errorf("could not find CPU ID for package ID %v: %w", packageID, err) + } + + res, err := pt.msr.read(platformInfo, cpuID) + if err != nil { + return 0, err + } + + // Mask to obtain 15:8 bits corresponding to MSR_PLATFORM_INFO + // and the nominal TSC frequency can be determined by multiplying this number by + // the proper bus speed. + return uint64(float64((res>>8)&0xFF) * pt.busClock), nil +} + +// GetCPUFrequency returns current frequency value of specified cpu (in MHz). +func (pt *PowerTelemetry) GetCPUFrequency(cpuID int) (float64, error) { + if pt.cpuFreq == nil { + return 0.0, &ModuleNotInitializedError{Name: "cpu_frequency"} + } + return pt.cpuFreq.getCPUFrequencyMhz(cpuID) +} + +// isCPUSupported returns true if the processor is supported by the library and false otherwise. +func isCPUSupported(t topologyReader) (bool, error) { + family, err := t.getCPUFamily(0) + if err != nil { + return false, fmt.Errorf("error retrieving the CPU family: %w", err) + } + vendorID, err := t.getCPUVendor(0) + if err != nil { + return false, fmt.Errorf("error retrieving the CPU vendorID: %w", err) + } + return strings.Contains(family, "6") && strings.Contains(vendorID, "GenuineIntel"), nil +} + +// GetCurrentPackagePowerConsumptionWatts takes a package ID and returns the current package domain +// power consumption package, in Watts. +func (pt *PowerTelemetry) GetCurrentPackagePowerConsumptionWatts(packageID int) (float64, error) { + if pt.rapl == nil { + return 0.0, &ModuleNotInitializedError{Name: "rapl"} + } + return pt.rapl.getCurrentPowerConsumptionWatts(packageID, packageDomain.String()) +} + +// GetCurrentDramPowerConsumptionWatts takes a package ID and returns the current package domain +// power consumption package, in Watts. +func (pt *PowerTelemetry) GetCurrentDramPowerConsumptionWatts(packageID int) (float64, error) { + if pt.rapl == nil { + return 0.0, &ModuleNotInitializedError{Name: "rapl"} + } + return pt.rapl.getCurrentPowerConsumptionWatts(packageID, dramDomain.String()) +} + +// GetPackageThermalDesignPowerWatts takes a package ID and returns its maximum allowed power, in Watts. +func (pt *PowerTelemetry) GetPackageThermalDesignPowerWatts(packageID int) (float64, error) { + if pt.rapl == nil { + return 0.0, &ModuleNotInitializedError{Name: "rapl"} + } + return pt.rapl.getMaxPowerConstraintWatts(packageID) +} + +// getBusClock returns the bus clock of CPU according to its model. If calculating the +// bus clock speed requires reading of MSR, then cpuID is being used to access the appropriate +// register. If the model is unknown, then 0 is returned as the bus clock with an appropriate error. +func (pt *PowerTelemetry) getBusClock(model int) (float64, error) { + switch model { + case + cpumodel.INTEL_FAM6_ATOM_SILVERMONT, + cpumodel.INTEL_FAM6_ATOM_SILVERMONT_D, + cpumodel.INTEL_FAM6_ATOM_SILVERMONT_MID, + cpumodel.INTEL_FAM6_ATOM_SILVERMONT_SMARTPHONE: + cpuID, err := pt.getFirstAvailableCPU() + if err != nil { + return 0.0, err + } + return pt.getSilvermontBusClock(cpuID) + + case cpumodel.INTEL_FAM6_ATOM_AIRMONT: + cpuID, err := pt.getFirstAvailableCPU() + if err != nil { + return 0.0, err + } + return pt.getAirmontBusClock(cpuID) + + case + cpumodel.INTEL_FAM6_SANDYBRIDGE, + cpumodel.INTEL_FAM6_SANDYBRIDGE_X, + cpumodel.INTEL_FAM6_IVYBRIDGE, + cpumodel.INTEL_FAM6_IVYBRIDGE_X, + cpumodel.INTEL_FAM6_HASWELL, + cpumodel.INTEL_FAM6_HASWELL_X, + cpumodel.INTEL_FAM6_HASWELL_L, + cpumodel.INTEL_FAM6_HASWELL_G, + cpumodel.INTEL_FAM6_BROADWELL, + cpumodel.INTEL_FAM6_BROADWELL_G, + cpumodel.INTEL_FAM6_BROADWELL_X, + cpumodel.INTEL_FAM6_BROADWELL_D, + cpumodel.INTEL_FAM6_SKYLAKE_L, + cpumodel.INTEL_FAM6_SKYLAKE, + cpumodel.INTEL_FAM6_SKYLAKE_X, + cpumodel.INTEL_FAM6_KABYLAKE_L, + cpumodel.INTEL_FAM6_KABYLAKE, + cpumodel.INTEL_FAM6_COMETLAKE, + cpumodel.INTEL_FAM6_COMETLAKE_L, + cpumodel.INTEL_FAM6_CANNONLAKE_L, + cpumodel.INTEL_FAM6_ICELAKE_X, + cpumodel.INTEL_FAM6_ICELAKE_D, + cpumodel.INTEL_FAM6_ICELAKE, + cpumodel.INTEL_FAM6_ICELAKE_L, + cpumodel.INTEL_FAM6_ICELAKE_NNPI, + cpumodel.INTEL_FAM6_ROCKETLAKE, + cpumodel.INTEL_FAM6_TIGERLAKE_L, + cpumodel.INTEL_FAM6_TIGERLAKE, + cpumodel.INTEL_FAM6_SAPPHIRERAPIDS_X, + cpumodel.INTEL_FAM6_EMERALDRAPIDS_X, + cpumodel.INTEL_FAM6_GRANITERAPIDS_X, + cpumodel.INTEL_FAM6_LAKEFIELD, + cpumodel.INTEL_FAM6_ALDERLAKE, + cpumodel.INTEL_FAM6_ALDERLAKE_L, + cpumodel.INTEL_FAM6_RAPTORLAKE, + cpumodel.INTEL_FAM6_RAPTORLAKE_P, + cpumodel.INTEL_FAM6_RAPTORLAKE_S, + cpumodel.INTEL_FAM6_METEORLAKE, + cpumodel.INTEL_FAM6_METEORLAKE_L, + cpumodel.INTEL_FAM6_ARROWLAKE, + cpumodel.INTEL_FAM6_LUNARLAKE_M, + cpumodel.INTEL_FAM6_ATOM_GOLDMONT, + cpumodel.INTEL_FAM6_ATOM_GOLDMONT_D, + cpumodel.INTEL_FAM6_ATOM_GOLDMONT_PLUS, + cpumodel.INTEL_FAM6_ATOM_TREMONT_D, + cpumodel.INTEL_FAM6_ATOM_TREMONT, + cpumodel.INTEL_FAM6_ATOM_TREMONT_L, + cpumodel.INTEL_FAM6_ATOM_GRACEMONT, + cpumodel.INTEL_FAM6_ATOM_CRESTMONT_X, + cpumodel.INTEL_FAM6_ATOM_CRESTMONT, + cpumodel.INTEL_FAM6_XEON_PHI_KNL, + cpumodel.INTEL_FAM6_XEON_PHI_KNM: + return 100.0, nil + + case + cpumodel.INTEL_FAM6_NEHALEM, + cpumodel.INTEL_FAM6_NEHALEM_G, + cpumodel.INTEL_FAM6_NEHALEM_EP, + cpumodel.INTEL_FAM6_NEHALEM_EX, + cpumodel.INTEL_FAM6_WESTMERE, + cpumodel.INTEL_FAM6_WESTMERE_EP, + cpumodel.INTEL_FAM6_WESTMERE_EX: + return 133.0, nil + + default: + return 0.0, fmt.Errorf("busClock is not supported by the CPU model: %v", model) + } +} + +// getSilvermontBusClock returns busClock for Silvermont-based processors. It +// takes a cpuID argument and reads the frequency value from the MSR. +// If an error occurs during the execution, it returns 0.0 and a proper error message. +func (pt *PowerTelemetry) getSilvermontBusClock(cpuID int) (float64, error) { + if pt.msr == nil { + return 0.0, &ModuleNotInitializedError{Name: "msr"} + } + + // From MSR_FSB_FREQ for Silvermont Microarchitecture + silvermontFreqTable := []float64{83.3, 100.0, 133.3, 116.7, 80.0} + res, err := pt.msr.read(fsbFreq, cpuID) + if err != nil { + return 0.0, fmt.Errorf("error while reading MSR value: %w", err) + } + // Since register has 3 bits we mask 0x7 or 111 to extract three least significant bits + indx := int(res & 0x7) + if indx >= len(silvermontFreqTable) { + return 0.0, fmt.Errorf("error while getting bus clock: index %d is outside of bounds", indx) + } + return silvermontFreqTable[indx], nil +} + +// getAirmontBusClock returns busClock for Airmont-based processors. It +// takes a cpuID argument and reads the frequency value from the MSR. +// If an error occurs during the execution, it returns 0.0 and a proper error message. +func (pt *PowerTelemetry) getAirmontBusClock(cpuID int) (float64, error) { + if pt.msr == nil { + return 0.0, &ModuleNotInitializedError{Name: "msr"} + } + + // From MSR_FSB_FREQ for Airmont Microarchitecture + airmontFreqTable := []float64{83.3, 100.0, 133.3, 116.7, 80.0, 93.3, 90.0, 88.9, 87.5} + res, err := pt.msr.read(fsbFreq, cpuID) + if err != nil { + return 0.0, fmt.Errorf("error while reading MSR value: %w", err) + } + // Since register has 4 bits we mask 0xF or 1111 to extract four least significant bits + indx := int(res & 0xF) + if indx >= len(airmontFreqTable) { + return 0.0, fmt.Errorf("error while getting bus clock: index %d is outside of bounds", indx) + } + return airmontFreqTable[indx], nil +} + +// GetCPUTemperature takes a cpu ID and returns its temperature, in degrees Celsius. +// CPU temperature is calculated based on cpu-specific msr offsets: +// temp[C] = MSR_TEMPERATURE_TARGET[23:16] - IA32_THERM_STATUS[22:16] +// If an error occurs while reading msr offsets, the function returns zero value for +// the temperature and the corresponding error. +func (pt *PowerTelemetry) GetCPUTemperature(cpuID int) (uint64, error) { + if pt.msr == nil { + return 0, &ModuleNotInitializedError{Name: "msr"} + } + + model := pt.topology.getCPUModel() + if err := CheckIfCPUTemperatureSupported(model); err != nil { + return 0, err + } + + // 64-bit [63:0] value of MSR_TEMPERATURE_TARGET msr offset. + res, err := pt.msr.read(uint32(temperatureTarget), cpuID) + if err != nil { + return 0, err + } + // Throttle temperature corresponds to MSR_TEMPERATURE_TARGET[23:16] in degree Celsius. + throttleTemp := (res >> 16) & 0xFF + + // 64-bit [63:0] value of IA32_THERM_STATUS msr offset. + res, err = pt.msr.read(uint32(thermalStatus), cpuID) + if err != nil { + return 0, err + } + // Temperature offset corresponds to IA32_THERM_STATUS[22:16] in degree Celsius. + temp := (res >> 16) & 0x7F + return throttleTemp - temp, nil +} + +// GetCPUC0StateResidency takes a CPU ID and returns its C0 state residency metric, as a percentage. +func (pt *PowerTelemetry) GetCPUC0StateResidency(cpuID int) (float64, error) { + if pt.msr == nil { + return 0, &ModuleNotInitializedError{Name: "msr"} + } + + deltas, err := pt.msr.getOffsetDeltas(cpuID) + if err != nil { + return 0.0, fmt.Errorf("error retrieving offset deltas for CPU ID %v: %w", cpuID, err) + } + + mperfDelta, ok := deltas[maxFreqClockCount] + if !ok { + return 0.0, fmt.Errorf("mperf offset delta not found for CPU ID: %v", cpuID) + } + + tscDelta, ok := deltas[timestampCounter] + if !ok { + return 0.0, fmt.Errorf("timestamp counter offset delta not found for CPU ID: %v", cpuID) + } + + if tscDelta == 0 { + return 0.0, fmt.Errorf("timestamp counter offset delta is zero for CPU ID: %v", cpuID) + } + return (float64(mperfDelta) / float64(tscDelta)) * 100, nil +} + +// GetCPUC1StateResidency takes a CPU ID and returns its C1 state residency metric, as a percentage. +func (pt *PowerTelemetry) GetCPUC1StateResidency(cpuID int) (float64, error) { + if pt.msr == nil { + return 0, &ModuleNotInitializedError{Name: "msr"} + } + + model := pt.topology.getCPUModel() + if err := CheckIfCPUC1StateResidencySupported(model); err != nil { + return 0, err + } + + deltas, err := pt.msr.getOffsetDeltas(cpuID) + if err != nil { + return 0.0, fmt.Errorf("error retrieving offset deltas for CPU ID %v: %w", cpuID, err) + } + + mperfDelta, ok := deltas[maxFreqClockCount] + if !ok { + return 0.0, fmt.Errorf("mperf offset delta not found for CPU ID: %v", cpuID) + } + + c3Delta, ok := deltas[c3Residency] + if !ok { + return 0.0, fmt.Errorf("c3 state residency offset delta not found for CPU ID: %v", cpuID) + } + + c6Delta, ok := deltas[c6Residency] + if !ok { + return 0.0, fmt.Errorf("c6 state residency offset delta not found for CPU ID: %v", cpuID) + } + + c7Delta, ok := deltas[c7Residency] + if !ok { + return 0.0, fmt.Errorf("c7 state residency offset delta not found for CPU ID: %v", cpuID) + } + + tscDelta, ok := deltas[timestampCounter] + if !ok { + return 0.0, fmt.Errorf("timestamp counter offset delta not found for CPU ID: %v", cpuID) + } + + if tscDelta == 0 { + return 0.0, fmt.Errorf("timestamp counter offset delta is zero for CPU ID: %v", cpuID) + } + c1Norm := float64(tscDelta-mperfDelta-c3Delta-c6Delta-c7Delta) / float64(tscDelta) + return c1Norm * 100, nil +} + +// GetCPUC3StateResidency takes a CPU ID and returns its C3 state residency metric, as a percentage. +func (pt *PowerTelemetry) GetCPUC3StateResidency(cpuID int) (float64, error) { + if pt.msr == nil { + return 0, &ModuleNotInitializedError{Name: "msr"} + } + + model := pt.topology.getCPUModel() + if err := CheckIfCPUC3StateResidencySupported(model); err != nil { + return 0, err + } + + deltas, err := pt.msr.getOffsetDeltas(cpuID) + if err != nil { + return 0.0, fmt.Errorf("error retrieving offset deltas for CPU ID %v: %w", cpuID, err) + } + + // MSR_CORE_C3_RESIDENCY[63:0]_2 - MSR_CORE_C3_RESIDENCY[63:0]_1 + c3Delta, ok := deltas[c3Residency] + if !ok { + return 0.0, fmt.Errorf("c3 state residency offset delta not found for CPU ID: %v", cpuID) + } + + // IA32_TIME_STAMP_COUNTER[63:0]_2 - IA32_TIME_STAMP_COUNTER[63:0]_1 + tscDelta, ok := deltas[timestampCounter] + if !ok { + return 0.0, fmt.Errorf("timestamp counter offset delta not found for CPU ID: %v", cpuID) + } + + if tscDelta == 0 { + return 0.0, fmt.Errorf("timestamp counter offset delta is zero for CPU ID: %v", cpuID) + } + return (float64(c3Delta) / float64(tscDelta)) * 100, nil +} + +// GetCPUC6StateResidency takes a CPU ID and returns its C6 state residency metric, as a percentage. +// C6 state residency is calculated within a time interval and the formula is as follows: +// c6[%] = 100 *(MSR_CORE_C6_RESIDENCY_2 - MSR_CORE_C6_RESIDENCY_1) / (IA32_TIME_STAMP_COUNTER_2 - IA32_TIME_STAMP_COUNTER_1). +func (pt *PowerTelemetry) GetCPUC6StateResidency(cpuID int) (float64, error) { + if pt.msr == nil { + return 0, &ModuleNotInitializedError{Name: "msr"} + } + + model := pt.topology.getCPUModel() + if err := CheckIfCPUC6StateResidencySupported(model); err != nil { + return 0, err + } + + deltas, err := pt.msr.getOffsetDeltas(cpuID) + if err != nil { + return 0.0, fmt.Errorf("error retrieving offset deltas for CPU ID %v: %w", cpuID, err) + } + + // MSR_CORE_C6_RESIDENCY[63:0]_2 - MSR_CORE_C6_RESIDENCY[63:0]_1 + c6Delta, ok := deltas[c6Residency] + if !ok { + return 0.0, fmt.Errorf("c6 state residency offset delta not found for CPU ID: %v", cpuID) + } + + // IA32_TIME_STAMP_COUNTER[63:0]_2 - IA32_TIME_STAMP_COUNTER[63:0]_1 + tscDelta, ok := deltas[timestampCounter] + if !ok { + return 0.0, fmt.Errorf("timestamp counter offset delta not found for CPU ID: %v", cpuID) + } + + if tscDelta == 0 { + return 0.0, fmt.Errorf("timestamp counter offset delta is zero for CPU ID: %v", cpuID) + } + return (float64(c6Delta) / float64(tscDelta)) * 100, nil +} + +// GetCPUC7StateResidency takes a CPU ID and returns its C7 state residency metric, as a percentage. +func (pt *PowerTelemetry) GetCPUC7StateResidency(cpuID int) (float64, error) { + if pt.msr == nil { + return 0, &ModuleNotInitializedError{Name: "msr"} + } + + model := pt.topology.getCPUModel() + if err := CheckIfCPUC7StateResidencySupported(model); err != nil { + return 0, err + } + + deltas, err := pt.msr.getOffsetDeltas(cpuID) + if err != nil { + return 0.0, fmt.Errorf("error retrieving offset deltas for CPU ID %v: %w", cpuID, err) + } + + // MSR_CORE_C7_RESIDENCY[63:0]_2 - MSR_CORE_C7_RESIDENCY[63:0]_1 + c7Delta, ok := deltas[c7Residency] + if !ok { + return 0.0, fmt.Errorf("c7 state residency offset delta not found for CPU ID: %v", cpuID) + } + + // IA32_TIME_STAMP_COUNTER[63:0]_2 - IA32_TIME_STAMP_COUNTER[63:0]_1 + tscDelta, ok := deltas[timestampCounter] + if !ok { + return 0.0, fmt.Errorf("timestamp counter offset delta not found for CPU ID: %v", cpuID) + } + + if tscDelta == 0 { + return 0.0, fmt.Errorf("timestamp counter offset delta is zero for CPU ID: %v", cpuID) + } + return (float64(c7Delta) / float64(tscDelta)) * 100, nil +} + +// GetCPUBusyFrequencyMhz takes a CPU ID and returns its busy frequency metric, in MHz. +func (pt *PowerTelemetry) GetCPUBusyFrequencyMhz(cpuID int) (float64, error) { + if pt.msr == nil { + return 0, &ModuleNotInitializedError{Name: "msr"} + } + + deltas, err := pt.msr.getOffsetDeltas(cpuID) + if err != nil { + return 0.0, fmt.Errorf("error retrieving offset deltas for CPU ID %v: %w", cpuID, err) + } + + timestampDelta, err := pt.msr.getTimestampDelta(cpuID) + if err != nil { + return 0.0, fmt.Errorf("error retrieving timestamp delta for CPU ID %v: %w", cpuID, err) + } + if timestampDelta <= 0 { + return 0.0, errors.New("timestamp delta must be greater than zero") + } + + mperfDelta, ok := deltas[maxFreqClockCount] + if !ok { + return 0.0, fmt.Errorf("mperf offset delta not found for CPU ID: %v", cpuID) + } + if mperfDelta == 0 { + return 0.0, fmt.Errorf("mperf offset delta is zero for CPU ID: %v", cpuID) + } + + aperfDelta, ok := deltas[actualFreqClockCount] + if !ok { + return 0.0, fmt.Errorf("aperf offset delta not found for CPU ID: %v", cpuID) + } + + tscDelta, ok := deltas[timestampCounter] + if !ok { + return 0.0, fmt.Errorf("timestamp counter offset delta not found for CPU ID: %v", cpuID) + } + + return float64(tscDelta) * fromProcessorCyclesToHertz * + (float64(aperfDelta) / float64(mperfDelta)) / (float64(timestampDelta.Nanoseconds()) * fromNanosecondsToSecondsRatio), nil +} + +// UpdatePerCPUMetrics takes a CPU ID and updates the msr storage with offset values corresponding to +// msr file for CPU ID. +func (pt *PowerTelemetry) UpdatePerCPUMetrics(cpuID int) error { + if pt.msr == nil { + return &ModuleNotInitializedError{Name: "msr"} + } + return pt.msr.update(cpuID) +} + +// IsFlagSupported takes a flag's value and returns true if first CPU supports it and false if it doesn't. +func (pt *PowerTelemetry) IsFlagSupported(flag string) (bool, error) { + flags, err := pt.topology.getCPUFlags(0) + if err != nil { + return false, fmt.Errorf("error retrieving CPU flags: %w", err) + } + for _, f := range flags { + if f == flag { + return true, nil + } + } + return false, nil +} + +// ReadPerfEvents reads the perf events related to supported C0 state residency metrics +// and updates the storage to make metrics available. If one or more events could not be +// read an error is returned. +func (pt *PowerTelemetry) ReadPerfEvents() error { + if pt.perf == nil { + return &ModuleNotInitializedError{Name: "perf"} + } + return pt.perf.update() +} + +// DeactivatePerfEvents deactivates all active events. If an event or events could not +// be successfully deactivated, an error is returned. +// This method should be explicitly called to avoid resource leakage. +func (pt *PowerTelemetry) DeactivatePerfEvents() error { + if pt.perf == nil { + return &ModuleNotInitializedError{Name: "perf"} + } + return pt.perf.deactivate() +} + +// GetCPUC0SubstateC01Percent takes a CPU ID and returns a value indicating the percentage of time +// the processor spent in its C0.1 substate out of the total time in the C0 state. +// C0.1 is characterized by a light-weight slower wakeup time but more power-saving optimized state. +func (pt *PowerTelemetry) GetCPUC0SubstateC01Percent(cpuID int) (float64, error) { + return pt.getPerfMetricRatio(cpuID, c01.String(), thread.String()) +} + +// GetCPUC0SubstateC02Percent takes a CPU ID and returns a value indicating the percentage of time +// the processor spent in its C0.2 substate out of the total time in the C0 state. +// C0.2 is characterized by a light-weight faster wakeup time but less power saving optimized state. +func (pt *PowerTelemetry) GetCPUC0SubstateC02Percent(cpuID int) (float64, error) { + return pt.getPerfMetricRatio(cpuID, c02.String(), thread.String()) +} + +// GetCPUC0SubstateC0WaitPercent takes a CPU ID and returns a value indicating the percentage of time +// the processor spent in its C0_Wait substate out of the total time in the C0 state. +// CPU is in C0_Wait substate when the thread is in the C0.1 or C0.2 or running a PAUSE in C0 ACPI state. +func (pt *PowerTelemetry) GetCPUC0SubstateC0WaitPercent(cpuID int) (float64, error) { + return pt.getPerfMetricRatio(cpuID, c0Wait.String(), thread.String()) +} + +// getPerfMetricRatio is a helper method that takes a CPU ID, a target metric name and reference metric name. +// First, it fetches the specified metrics from the perf storage. Then, it calculates the percentage of the target +// metric, with respect to the reference metric. +func (pt *PowerTelemetry) getPerfMetricRatio(cpuID int, target, reference string) (float64, error) { + if pt.perf == nil { + return 0.0, &ModuleNotInitializedError{Name: "perf"} + } + coreMetrics := pt.perf.getCoreMetrics(cpuID) + if len(coreMetrics) == 0 { + return 0.0, fmt.Errorf("no core metrics found for CPU ID: %v", cpuID) + } + + targetMetric, err := getMetric(coreMetrics, target) + if err != nil { + return 0.0, err + } + + refMetric, err := getMetric(coreMetrics, reference) + if err != nil { + return 0.0, err + } + + if refMetric.scaled == 0 { + return 0.0, fmt.Errorf("zero scaled value for reference metric: %q", reference) + } + + return float64(targetMetric.scaled) / float64(refMetric.scaled) * 100, nil +} + +// getMetric is a helper function that takes a slice of coreMetrics and a string name, +// and returns the first coreMetric corresponding to the name specified. +func getMetric(metrics []coreMetric, name string) (coreMetric, error) { + for _, metric := range metrics { + if strings.Contains(metric.name, name) { + return metric, nil + } + } + return coreMetric{}, fmt.Errorf("could not find metric: %q", name) +} + +// getCPUIDFromPackageID takes a package ID and returns a CPU ID within that package ID +// that can be used to read msr values from. +func (pt *PowerTelemetry) getCPUIDFromPackageID(packageID int) (int, error) { + for _, cpu := range pt.cpus { + pkgID, _ := pt.topology.getCPUPackageID(cpu) + if pkgID == packageID { + return cpu, nil + } + } + return 0, fmt.Errorf("unable to get CPU ID for package ID: %v", packageID) +} + +// getFirstAvailableCPU returns the first CPU ID from the slice of available CPUs +// for which msr can be accessed. If no CPUs are available it returns an error. +func (pt *PowerTelemetry) getFirstAvailableCPU() (int, error) { + if len(pt.cpus) == 0 { + return 0, errors.New("no available CPUs were found") + } + return pt.cpus[0], nil +} + +// GetPackageIDs returns a slice with ordered package IDs of the host. +func (pt *PowerTelemetry) GetPackageIDs() []int { + return pt.topology.getPackageIDs() +} + +// GetRaplPackageIDs returns a slice with package IDs of the host for which rapl has access to. +// If rapl is not initialized, it returns nil. +func (pt *PowerTelemetry) GetRaplPackageIDs() []int { + if pt.rapl == nil { + return nil + } + return pt.rapl.getPackageIDs() +} + +// GetMsrCPUIDs returns a slice with available CPU IDs of the host, for which msr has access to. +func (pt *PowerTelemetry) GetMsrCPUIDs() []int { + return pt.cpus +} + +// GetPerfCPUIDs returns a slice with available CPU IDs of the host, for which perf has access to. +func (pt *PowerTelemetry) GetPerfCPUIDs() []int { + // TODO: This implementation should be changed when this library will + // support hybrid CPUs. Only performance cores should be returned here + // so that the result may be a subset of slice pt.cpus. + return pt.cpus +} + +// GetCPUPackageID gets cpu's package ID value. If no cpu is found for the corresponding cpuID +// an error is returned. +func (pt *PowerTelemetry) GetCPUPackageID(cpuID int) (int, error) { + packageID, err := pt.topology.getCPUPackageID(cpuID) + if err != nil { + return 0, fmt.Errorf("error retrieving package ID: %w", err) + } + + return packageID, nil +} + +// GetCPUCoreID gets cpu's core ID value. If no cpu is found for the corresponding cpuID +// an error is returned. +func (pt *PowerTelemetry) GetCPUCoreID(cpuID int) (int, error) { + coreID, err := pt.topology.getCPUCoreID(cpuID) + if err != nil { + return 0, fmt.Errorf("error retrieving core ID: %w", err) + } + + return coreID, nil +} + +// GetPackageDieIDs gets package's die ID values. If no package is found for the corresponding packageID +// an error is returned. +func (pt *PowerTelemetry) GetPackageDieIDs(packageID int) ([]int, error) { + dies, err := pt.topology.getPackageDieIDs(packageID) + if err != nil { + return nil, fmt.Errorf("error retrieving dies: %w", err) + } + + return dies, nil +} diff --git a/power_test.go b/power_test.go new file mode 100644 index 0000000..306085b --- /dev/null +++ b/power_test.go @@ -0,0 +1,4093 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +import ( + "errors" + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + + "github.com/intel/powertelemetry/internal/cpumodel" +) + +// msrMock represents a mock for msrDataWithStorage type. Implements msrReaderWithStorage interface. +type msrMock struct { + mock.Mock +} + +func (m *msrMock) initMsrMap(cpuIDs []int, timeout time.Duration) error { + args := m.Called(cpuIDs, timeout) + return args.Error(0) +} + +func (m *msrMock) read(offset uint32, cpuID int) (uint64, error) { + args := m.Called(offset, cpuID) + return args.Get(0).(uint64), args.Error(1) +} + +func (m *msrMock) isMsrLoaded(modulesPath string) (bool, error) { + args := m.Called(modulesPath) + return args.Bool(0), args.Error(1) +} + +func (m *msrMock) update(cpuID int) error { + args := m.Called(cpuID) + return args.Error(0) +} + +func (m *msrMock) getOffsetDeltas(cpuID int) (map[uint32]uint64, error) { + args := m.Called(cpuID) + return args.Get(0).(map[uint32]uint64), args.Error(1) +} + +func (m *msrMock) getTimestampDelta(cpuID int) (time.Duration, error) { + args := m.Called(cpuID) + return args.Get(0).(time.Duration), args.Error(1) +} + +type coreFreqMock struct { + mock.Mock +} + +func (m *coreFreqMock) init() error { + args := m.Called() + return args.Error(0) +} + +func (m *coreFreqMock) getCPUFrequencyMhz(cpuID int) (float64, error) { + args := m.Called(cpuID) + return args.Get(0).(float64), args.Error(1) +} + +// uncoreFreqMock represents a mock for uncoreFreqData type. Implements uncoreFreqReader interface. +type uncoreFreqMock struct { + mock.Mock +} + +func (m *uncoreFreqMock) init() error { + args := m.Called() + return args.Error(0) +} + +func (m *uncoreFreqMock) getUncoreFrequencyMhz(packageID, dieID int, freqType string) (float64, error) { + args := m.Called(packageID, dieID, freqType) + return args.Get(0).(float64), args.Error(1) +} + +func TestGetInitialUncoreFrequencyMin(t *testing.T) { + pt := &PowerTelemetry{ + uncoreFreq: &uncoreFreqData{ + uncoreFreqBasePath: "testdata/intel_uncore_frequency", + }, + } + + t.Run("UncoreFreqIsNil", func(t *testing.T) { + packageID := 0 + dieID := 0 + freqExp := 0.0 + + ptel := &PowerTelemetry{} + + freqOut, err := ptel.GetInitialUncoreFrequencyMin(packageID, dieID) + require.Equal(t, freqExp, freqOut) + require.ErrorContains(t, err, "\"uncore_frequency\" is not initialized") + }) + + t.Run("FreqFileNotExist", func(t *testing.T) { + packageID := 10 + dieID := 5 + freqExp := 0.0 + + freqOut, err := pt.GetInitialUncoreFrequencyMin(packageID, dieID) + require.ErrorContains(t, err, "failed to read frequency file") + require.Equal(t, freqExp, freqOut) + }) + + t.Run("InvalidFreqValue", func(t *testing.T) { + packageID := 9 + dieID := 12 + freqExp := 0.0 + + freqOut, err := pt.GetInitialUncoreFrequencyMin(packageID, dieID) + require.ErrorContains(t, err, "failed to convert frequency file content to float64") + require.Equal(t, freqExp, freqOut) + }) + + t.Run("Valid", func(t *testing.T) { + packageID := 10 + dieID := 3 + freqExp := 1000.0 + + freqOut, err := pt.GetInitialUncoreFrequencyMin(packageID, dieID) + require.NoError(t, err) + require.Equal(t, freqExp, freqOut) + }) +} + +func TestGetCustomizedUncoreFrequencyMin(t *testing.T) { + pt := &PowerTelemetry{ + uncoreFreq: &uncoreFreqData{ + uncoreFreqBasePath: "testdata/intel_uncore_frequency", + }, + } + + t.Run("UncoreFreqIsNil", func(t *testing.T) { + packageID := 0 + dieID := 0 + freqExp := 0.0 + + ptel := &PowerTelemetry{} + + freqOut, err := ptel.GetCustomizedUncoreFrequencyMin(packageID, dieID) + require.Equal(t, freqExp, freqOut) + require.ErrorContains(t, err, "\"uncore_frequency\" is not initialized") + }) + + t.Run("FreqFileNotExist", func(t *testing.T) { + packageID := 10 + dieID := 5 + freqExp := 0.0 + + freqOut, err := pt.GetCustomizedUncoreFrequencyMin(packageID, dieID) + require.ErrorContains(t, err, "failed to read frequency file") + require.Equal(t, freqExp, freqOut) + }) + + t.Run("InvalidFreqValue", func(t *testing.T) { + packageID := 9 + dieID := 12 + freqExp := 0.0 + + freqOut, err := pt.GetCustomizedUncoreFrequencyMin(packageID, dieID) + require.ErrorContains(t, err, "failed to convert frequency file content to float64") + require.Equal(t, freqExp, freqOut) + }) + + t.Run("Valid", func(t *testing.T) { + packageID := 10 + dieID := 3 + freqExp := 1100.0 + + freqOut, err := pt.GetCustomizedUncoreFrequencyMin(packageID, dieID) + require.NoError(t, err) + require.Equal(t, freqExp, freqOut) + }) +} + +func TestGetInitialUncoreFrequencyMax(t *testing.T) { + pt := &PowerTelemetry{ + uncoreFreq: &uncoreFreqData{ + uncoreFreqBasePath: "testdata/intel_uncore_frequency", + }, + } + + t.Run("UncoreFreqIsNil", func(t *testing.T) { + packageID := 0 + dieID := 0 + freqExp := 0.0 + + ptel := &PowerTelemetry{} + + freqOut, err := ptel.GetInitialUncoreFrequencyMax(packageID, dieID) + require.Equal(t, freqExp, freqOut) + require.ErrorContains(t, err, "\"uncore_frequency\" is not initialized") + }) + + t.Run("FreqFileNotExist", func(t *testing.T) { + packageID := 10 + dieID := 5 + freqExp := 0.0 + + freqOut, err := pt.GetInitialUncoreFrequencyMax(packageID, dieID) + require.ErrorContains(t, err, "failed to read frequency file") + require.Equal(t, freqExp, freqOut) + }) + + t.Run("InvalidFreqValue", func(t *testing.T) { + packageID := 9 + dieID := 12 + freqExp := 0.0 + + freqOut, err := pt.GetInitialUncoreFrequencyMax(packageID, dieID) + require.ErrorContains(t, err, "failed to convert frequency file content to float64") + require.Equal(t, freqExp, freqOut) + }) + + t.Run("Valid", func(t *testing.T) { + packageID := 10 + dieID := 3 + freqExp := 2000.0 + + freqOut, err := pt.GetInitialUncoreFrequencyMax(packageID, dieID) + require.NoError(t, err) + require.Equal(t, freqExp, freqOut) + }) +} + +func TestGetCustomizedUncoreFrequencyMax(t *testing.T) { + pt := &PowerTelemetry{ + uncoreFreq: &uncoreFreqData{ + uncoreFreqBasePath: "testdata/intel_uncore_frequency", + }, + } + + t.Run("UncoreFreqIsNil", func(t *testing.T) { + packageID := 0 + dieID := 0 + freqExp := 0.0 + + ptel := &PowerTelemetry{} + + freqOut, err := ptel.GetCustomizedUncoreFrequencyMax(packageID, dieID) + require.Equal(t, freqExp, freqOut) + require.ErrorContains(t, err, "\"uncore_frequency\" is not initialized") + }) + + t.Run("FreqFileNotExist", func(t *testing.T) { + packageID := 10 + dieID := 5 + freqExp := 0.0 + + freqOut, err := pt.GetCustomizedUncoreFrequencyMax(packageID, dieID) + require.ErrorContains(t, err, "failed to read frequency file") + require.Equal(t, freqExp, freqOut) + }) + + t.Run("InvalidFreqValue", func(t *testing.T) { + packageID := 9 + dieID := 12 + freqExp := 0.0 + + freqOut, err := pt.GetCustomizedUncoreFrequencyMax(packageID, dieID) + require.ErrorContains(t, err, "failed to convert frequency file content to float64") + require.Equal(t, freqExp, freqOut) + }) + + t.Run("Valid", func(t *testing.T) { + packageID := 10 + dieID := 3 + freqExp := 1900.0 + + freqOut, err := pt.GetCustomizedUncoreFrequencyMax(packageID, dieID) + require.NoError(t, err) + require.Equal(t, freqExp, freqOut) + }) +} + +func TestGetCurrentUncoreFrequency(t *testing.T) { + newTopology := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: { + vendorID: "IdOfVendor", + family: "13", + packageID: 0, + }, + 1: { + vendorID: "IdOfVendor", + family: "13", + packageID: 1, + }, + 2: { + vendorID: "IdOfVendor", + family: "13", + packageID: 0, + }, + }, + } + + t.Run("FromFileSystem", func(t *testing.T) { + packageID := 0 + dieID := 1 + uncoreFreqExp := 2000.0 + + m := &uncoreFreqMock{} + + // mock getting the current uncore frequency + m.On("getUncoreFrequencyMhz", packageID, dieID, "current").Return(uncoreFreqExp, nil).Once() + + pt := &PowerTelemetry{ + uncoreFreq: m, + } + + uncoreFreqOut, err := pt.GetCurrentUncoreFrequency(packageID, dieID) + + require.NoError(t, err) + require.Equal(t, uncoreFreqExp, uncoreFreqOut) + m.AssertExpectations(t) + }) + + t.Run("FromMsr", func(t *testing.T) { + t.Run("UncoreFreqFailed", func(t *testing.T) { + packageID := 1 + dieID := 0 + msrValue := uint64(0xffffff08) + uncoreFreqExp := 800.0 + + mUncoreFreq := &uncoreFreqMock{} + + // mock getting current uncore frequency + mUncoreFreq.On("getUncoreFrequencyMhz", packageID, dieID, "current").Return(0.0, errors.New("failed to read current uncore frequency file")).Once() + + mMsr := &msrMock{} + + // mock reading msr offset UNCORE_PERF_STATUS of CPU ID 1 + mMsr.On("read", uint32(uncorePerfStatus), 1).Return(msrValue, nil).Once() + + pt := &PowerTelemetry{ + topology: newTopology, + uncoreFreq: mUncoreFreq, + msr: mMsr, + + cpus: []int{0, 1, 2}, + } + + uncoreFreqOut, err := pt.GetCurrentUncoreFrequency(packageID, dieID) + + require.Equal(t, uncoreFreqExp, uncoreFreqOut) + require.NoError(t, err) + mUncoreFreq.AssertExpectations(t) + mMsr.AssertExpectations(t) + }) + + t.Run("UncoreFreqIsNil", func(t *testing.T) { + packageID := 1 + dieID := 0 + msrValue := uint64(0xffffff08) + uncoreFreqExp := 800.0 + + mMsr := &msrMock{} + + // mock reading msr offset UNCORE_PERF_STATUS of CPU ID 1 + mMsr.On("read", uint32(uncorePerfStatus), 1).Return(msrValue, nil).Once() + + pt := &PowerTelemetry{ + topology: newTopology, + msr: mMsr, + + cpus: []int{0, 1, 2}, + } + + uncoreFreqOut, err := pt.GetCurrentUncoreFrequency(packageID, dieID) + + require.Equal(t, uncoreFreqExp, uncoreFreqOut) + require.NoError(t, err) + mMsr.AssertExpectations(t) + }) + }) + + t.Run("FromMsrFailed", func(t *testing.T) { + t.Run("MsrIsNil", func(t *testing.T) { + packageID := 0 + dieID := 1 + uncoreFreqExp := 0.0 + + pt := &PowerTelemetry{} + + uncoreFreqOut, err := pt.GetCurrentUncoreFrequency(packageID, dieID) + + require.ErrorContains(t, err, "\"msr\" is not initialized") + require.Equal(t, uncoreFreqExp, uncoreFreqOut) + }) + + t.Run("PackageIDNotFound", func(t *testing.T) { + packageID := 2 + dieID := 1 + uncoreFreqExp := 0.0 + + mMsr := &msrMock{} + mUncoreFreq := &uncoreFreqMock{} + + // mock getting current uncore frequency + mUncoreFreq.On("getUncoreFrequencyMhz", packageID, dieID, "current").Return(0.0, errors.New("failed to read current uncore frequency file")).Once() + + pt := &PowerTelemetry{ + topology: newTopology, + uncoreFreq: mUncoreFreq, + msr: mMsr, + + cpus: []int{0, 1, 2}, + } + + uncoreFreqOut, err := pt.GetCurrentUncoreFrequency(packageID, dieID) + require.Equal(t, uncoreFreqExp, uncoreFreqOut) + require.ErrorContains(t, err, "unable to get CPU ID for package ID: 2") + mUncoreFreq.AssertExpectations(t) + mMsr.AssertExpectations(t) + }) + + t.Run("ReadMsrError", func(t *testing.T) { + packageID := 1 + dieID := 0 + uncoreFreqExp := 0.0 + errMsg := "error reading msr file" + + mUncoreFreq := &uncoreFreqMock{} + + // mock getting current uncore frequency + mUncoreFreq.On("getUncoreFrequencyMhz", packageID, dieID, "current").Return(0.0, errors.New("failed to read current uncore frequency file")).Once() + + mMsr := &msrMock{} + + // mock reading msr offset UNCORE_PERF_STATUS of CPU ID 1 + mMsr.On("read", uint32(uncorePerfStatus), 1).Return(uint64(0), errors.New(errMsg)).Once() + + pt := &PowerTelemetry{ + topology: newTopology, + uncoreFreq: mUncoreFreq, + msr: mMsr, + + cpus: []int{0, 1, 2}, + } + + uncoreFreqOut, err := pt.GetCurrentUncoreFrequency(packageID, dieID) + + require.Equal(t, uncoreFreqExp, uncoreFreqOut) + require.ErrorContains(t, err, errMsg) + mUncoreFreq.AssertExpectations(t) + mMsr.AssertExpectations(t) + }) + }) +} + +func TestGetCPUFrequency(t *testing.T) { + pt := &PowerTelemetry{ + cpuFreq: &cpuFreqData{ + cpuFrequencyFilePath: "testdata/cpu-freq", + }, + } + + t.Run("CPUFreqIsNil", func(t *testing.T) { + expectedFreq := 0.0 + + ptel := &PowerTelemetry{} + + actualFreq, err := ptel.GetCPUFrequency(0) + require.Equal(t, expectedFreq, actualFreq) + require.ErrorContains(t, err, "\"cpu_frequency\" is not initialized") + }) + + t.Run("Valid", func(t *testing.T) { + expectedFreq := 888.888 + actualFreq, err := pt.GetCPUFrequency(0) + require.Equal(t, expectedFreq, actualFreq) + require.NoError(t, err) + }) + + t.Run("Invalid", func(t *testing.T) { + expectedFreq := 0.0 + expectedError := "error reading file" + actualFreq, err := pt.GetCPUFrequency(1) + require.Equal(t, expectedFreq, actualFreq) + require.ErrorContains(t, err, expectedError) + }) +} + +func TestGetBusClock(t *testing.T) { + type msrFreqTuple struct { + msrValue uint64 + freq float64 + } + + models100 := []int{ + 0x2A, // INTEL_FAM6_SANDYBRIDGE + 0x2D, // INTEL_FAM6_SANDYBRIDGE_X + 0x3A, // INTEL_FAM6_IVYBRIDGE + 0x3E, // INTEL_FAM6_IVYBRIDGE_X + 0x3C, // INTEL_FAM6_HASWELL + 0x3F, // INTEL_FAM6_HASWELL_X + 0x45, // INTEL_FAM6_HASWELL_L + 0x46, // INTEL_FAM6_HASWELL_G + 0x3D, // INTEL_FAM6_BROADWELL + 0x47, // INTEL_FAM6_BROADWELL_G + 0x4F, // INTEL_FAM6_BROADWELL_X + 0x56, // INTEL_FAM6_BROADWELL_D + 0x4E, // INTEL_FAM6_SKYLAKE_L + 0x5E, // INTEL_FAM6_SKYLAKE + 0x55, // INTEL_FAM6_SKYLAKE_X + 0x8E, // INTEL_FAM6_KABYLAKE_L + 0x9E, // INTEL_FAM6_KABYLAKE + 0xA5, // INTEL_FAM6_COMETLAKE + 0xA6, // INTEL_FAM6_COMETLAKE_L + 0x66, // INTEL_FAM6_CANNONLAKE_L + 0x6A, // INTEL_FAM6_ICELAKE_X + 0x6C, // INTEL_FAM6_ICELAKE_D + 0x7D, // INTEL_FAM6_ICELAKE + 0x7E, // INTEL_FAM6_ICELAKE_L + 0x9D, // INTEL_FAM6_ICELAKE_NNPI + 0xA7, // INTEL_FAM6_ROCKETLAKE + 0x8C, // INTEL_FAM6_TIGERLAKE_L + 0x8D, // INTEL_FAM6_TIGERLAKE + 0x8F, // INTEL_FAM6_SAPPHIRERAPIDS_X + 0xCF, // INTEL_FAM6_EMERALDRAPIDS_X + 0xAD, // INTEL_FAM6_GRANITERAPIDS_X + 0x8A, // INTEL_FAM6_LAKEFIELD + 0x97, // INTEL_FAM6_ALDERLAKE + 0x9A, // INTEL_FAM6_ALDERLAKE_L + 0xB7, // INTEL_FAM6_RAPTORLAKE + 0xBA, // INTEL_FAM6_RAPTORLAKE_P + 0xBF, // INTEL_FAM6_RAPTORLAKE_S + 0xAC, // INTEL_FAM6_METEORLAKE + 0xAA, // INTEL_FAM6_METEORLAKE_L + 0xC6, // INTEL_FAM6_ARROWLAKE + 0xBD, // INTEL_FAM6_LUNARLAKE_M + 0x5C, // INTEL_FAM6_ATOM_GOLDMONT + 0x5F, // INTEL_FAM6_ATOM_GOLDMONT_D + 0x7A, // INTEL_FAM6_ATOM_GOLDMONT_PLUS + 0x86, // INTEL_FAM6_ATOM_TREMONT_D + 0x96, // INTEL_FAM6_ATOM_TREMONT + 0x9C, // INTEL_FAM6_ATOM_TREMONT_L + 0xBE, // INTEL_FAM6_ATOM_GRACEMONT + 0xAF, // INTEL_FAM6_ATOM_CRESTMONT_X + 0xB6, // INTEL_FAM6_ATOM_CRESTMONT + 0x57, // INTEL_FAM6_XEON_PHI_KNL + 0x85, // INTEL_FAM6_XEON_PHI_KNM + } + + models133 := []int{ + 0x1E, // INTEL_FAM6_NEHALEM + 0x1F, // INTEL_FAM6_NEHALEM_G + 0x1A, // INTEL_FAM6_NEHALEM_EP + 0x2E, // INTEL_FAM6_NEHALEM_EX + 0x25, // INTEL_FAM6_WESTMERE + 0x2C, // INTEL_FAM6_WESTMERE_EP + 0x2F, // INTEL_FAM6_WESTMERE_EX + } + + modelsSilvermont := []int{ + 0x37, // INTEL_FAM6_ATOM_SILVERMONT + 0x4D, // INTEL_FAM6_ATOM_SILVERMONT_D + 0x4A, // INTEL_FAM6_ATOM_SILVERMONT_MID + 0x5A, // INTEL_FAM6_ATOM_SILVERMONT_SMARTPHONE + } + + modelsAirmont := []int{ + 0x4C, // INTEL_FAM6_ATOM_AIRMONT + } + + supportedModels := make([]int, 0) + supportedModels = append(supportedModels, models100...) + supportedModels = append(supportedModels, models133...) + supportedModels = append(supportedModels, modelsSilvermont...) + supportedModels = append(supportedModels, modelsAirmont...) + + supportedModelMap := map[int]interface{}{} + for _, m := range supportedModels { + supportedModelMap[m] = struct{}{} + } + + t.Run("BusClockSilvermont", func(t *testing.T) { + t.Run("NoCPUsAvailable", func(t *testing.T) { + busClockExp := 0.0 + + for _, model := range modelsSilvermont { + pt := &PowerTelemetry{ + cpus: []int{}, // no CPU IDs available + } + + busClockOut, err := pt.getBusClock(model) + require.Equal(t, busClockExp, busClockOut) + require.ErrorContains(t, err, "no available CPUs were found") + } + }) + + t.Run("MsrIsNil", func(t *testing.T) { + busClockExp := 0.0 + + for _, model := range modelsSilvermont { + pt := &PowerTelemetry{ + cpus: []int{0}, + } + + busClockOut, err := pt.getBusClock(model) + require.Equal(t, busClockExp, busClockOut) + require.ErrorContains(t, err, "\"msr\" is not initialized") + } + }) + + t.Run("FailedToReadMsr", func(t *testing.T) { + cpuID := 0 + busClockExp := 0.0 + mError := errors.New("mock error") + + for _, model := range modelsSilvermont { + mMsr := msrMock{} + mMsr.On("read", uint32(fsbFreq), cpuID).Return(uint64(0), mError).Once() + + pt := &PowerTelemetry{ + msr: &mMsr, + cpus: []int{cpuID}, + } + + busClockOut, err := pt.getBusClock(model) + require.Equal(t, busClockExp, busClockOut) + require.ErrorContains(t, err, mError.Error()) + mMsr.AssertExpectations(t) + } + }) + + t.Run("InvalidFrequencyIndex", func(t *testing.T) { + cpuID := 0 + busClockExp := 0.0 + + for _, model := range modelsSilvermont { + mMsr := msrMock{} + mMsrValue := uint64(0xF5) + mMsr.On("read", uint32(fsbFreq), cpuID).Return(mMsrValue, nil).Once() + + pt := &PowerTelemetry{ + msr: &mMsr, + cpus: []int{cpuID}, + } + + busClockOut, err := pt.getBusClock(model) + require.Equal(t, busClockExp, busClockOut) + require.ErrorContains(t, err, fmt.Sprintf("error while getting bus clock: index %d is outside of bounds", 5)) + mMsr.AssertExpectations(t) + } + }) + + t.Run("Ok", func(t *testing.T) { + cpuID := 0 + silvermontTuples := []msrFreqTuple{ + {0x00, 83.3}, + {0x01, 100.0}, + {0x02, 133.3}, + {0x03, 116.7}, + {0x04, 80.0}, + } + for _, model := range modelsSilvermont { + for _, tuple := range silvermontTuples { + mMsr := msrMock{} + mMsrValue := tuple.msrValue + mMsr.On("read", uint32(fsbFreq), cpuID).Return(mMsrValue, nil).Once() + + pt := &PowerTelemetry{ + msr: &mMsr, + cpus: []int{cpuID}, + } + + busClockExp := tuple.freq + busClockOut, err := pt.getBusClock(model) + require.Equal(t, busClockExp, busClockOut) + require.NoError(t, err) + mMsr.AssertExpectations(t) + } + } + }) + }) + + t.Run("BusClockAirmont", func(t *testing.T) { + t.Run("NoCPUsAvailable", func(t *testing.T) { + busClockExp := 0.0 + + for _, model := range modelsAirmont { + pt := &PowerTelemetry{ + cpus: []int{}, // no CPU IDs available + } + + busClockOut, err := pt.getBusClock(model) + require.Equal(t, busClockExp, busClockOut) + require.ErrorContains(t, err, "no available CPUs were found") + } + }) + + t.Run("MsrIsNil", func(t *testing.T) { + busClockExp := 0.0 + + for _, model := range modelsAirmont { + pt := &PowerTelemetry{ + cpus: []int{0}, + } + + busClockOut, err := pt.getBusClock(model) + require.Equal(t, busClockExp, busClockOut) + require.ErrorContains(t, err, "\"msr\" is not initialized") + } + }) + + t.Run("FailedToReadMsr", func(t *testing.T) { + cpuID := 0 + busClockExp := 0.0 + mError := errors.New("mock error") + + for _, model := range modelsAirmont { + mMsr := msrMock{} + mMsr.On("read", uint32(fsbFreq), cpuID).Return(uint64(0), mError).Once() + + pt := &PowerTelemetry{ + msr: &mMsr, + cpus: []int{cpuID}, + } + + busClockOut, err := pt.getBusClock(model) + require.Equal(t, busClockExp, busClockOut) + require.ErrorContains(t, err, mError.Error()) + mMsr.AssertExpectations(t) + } + }) + + t.Run("InvalidFrequencyIndex", func(t *testing.T) { + cpuID := 0 + busClockExp := 0.0 + + for _, model := range modelsAirmont { + mMsr := msrMock{} + mMsrValue := uint64(0xF9) + mMsr.On("read", uint32(fsbFreq), cpuID).Return(mMsrValue, nil).Once() + + pt := &PowerTelemetry{ + msr: &mMsr, + cpus: []int{cpuID}, + } + + busClockOut, err := pt.getBusClock(model) + require.Equal(t, busClockExp, busClockOut) + require.ErrorContains(t, err, fmt.Sprintf("error while getting bus clock: index %d is outside of bounds", 9)) + mMsr.AssertExpectations(t) + } + }) + + t.Run("Ok", func(t *testing.T) { + cpuID := 0 + airmontTuples := []msrFreqTuple{ + {0x00, 83.3}, + {0x01, 100.0}, + {0x02, 133.3}, + {0x03, 116.7}, + {0x04, 80.0}, + {0x05, 93.3}, + {0x06, 90.0}, + {0x07, 88.9}, + {0x08, 87.5}, + } + for _, model := range modelsAirmont { + for _, tuple := range airmontTuples { + mMsr := msrMock{} + mMsrValue := tuple.msrValue + mMsr.On("read", uint32(fsbFreq), cpuID).Return(mMsrValue, nil).Once() + + pt := &PowerTelemetry{ + msr: &mMsr, + cpus: []int{cpuID}, + } + + busClockExp := tuple.freq + busClockOut, err := pt.getBusClock(model) + require.Equal(t, busClockExp, busClockOut) + require.NoError(t, err) + mMsr.AssertExpectations(t) + } + } + }) + }) + + t.Run("BusClock100.0", func(t *testing.T) { + busClockExp := 100.0 + pt := &PowerTelemetry{} + + for _, model := range models100 { + busClockOut, err := pt.getBusClock(model) + require.NoError(t, err) + require.Equalf(t, busClockExp, busClockOut, "Model 0x%X", model) + } + }) + + t.Run("BusClock133.0", func(t *testing.T) { + busClockExp := 133.0 + pt := &PowerTelemetry{} + + for _, model := range models133 { + busClockOut, err := pt.getBusClock(model) + require.NoError(t, err) + require.Equalf(t, busClockExp, busClockOut, "Model 0x%X", model) + } + }) + + t.Run("UnsupportedModels", func(t *testing.T) { + busClockExp := 0.0 + pt := &PowerTelemetry{} + + for model := 0; model < 0xFF; model++ { + if supportedModelMap[model] == nil { + busClockOut, err := pt.getBusClock(model) + require.Equalf(t, busClockExp, busClockOut, "Model 0x%X", model) + require.ErrorContains(t, err, fmt.Sprintf("busClock is not supported by the CPU model: %v", model)) + } + } + }) +} + +func TestGetCPUTemperature(t *testing.T) { + t.Run("MsrIsNil", func(t *testing.T) { + cpuID := 0 + + tempExp := uint64(0) + + pt := &PowerTelemetry{} + + tempOut, err := pt.GetCPUTemperature(cpuID) + require.Equal(t, tempExp, tempOut) + require.ErrorContains(t, err, "\"msr\" is not initialized") + }) + + t.Run("FailedToReadTemperatureTarget", func(t *testing.T) { + cpuID := 0 + mError := errors.New("mock error") + + tempExp := uint64(0) + + m := &msrMock{} + m.On("read", uint32(temperatureTarget), cpuID).Return(uint64(0), mError).Once() + + pt := &PowerTelemetry{ + topology: &topologyData{ + model: cpumodel.INTEL_FAM6_SAPPHIRERAPIDS_X, + }, + msr: m, + } + + temOut, err := pt.GetCPUTemperature(cpuID) + require.Equal(t, tempExp, temOut) + require.ErrorContains(t, err, mError.Error()) + m.AssertExpectations(t) + }) + + t.Run("ModelNotSupported", func(t *testing.T) { + cpuID := 0 + // CPU temp metric not supported by this model. + cpuModel := cpumodel.INTEL_FAM6_GRANITERAPIDS_D + + tempExp := uint64(0) + + m := &msrMock{} + + pt := &PowerTelemetry{ + msr: m, + topology: &topologyData{ + model: cpuModel, + }, + } + + tempOut, err := pt.GetCPUTemperature(cpuID) + require.Equal(t, tempExp, tempOut) + require.ErrorContains(t, err, fmt.Sprintf("cpu temperature metric not supported by CPU model: 0x%X", cpuModel)) + m.AssertExpectations(t) + }) + + t.Run("FailedToReadThermalStatus", func(t *testing.T) { + cpuID := 0 + tempTargetValue := uint64(0x680a00) + mError := errors.New("mock error") + + tempExp := uint64(0) + + m := &msrMock{} + m.On("read", uint32(temperatureTarget), cpuID).Return(tempTargetValue, nil).Once() + m.On("read", uint32(thermalStatus), cpuID).Return(uint64(0), mError).Once() + + pt := &PowerTelemetry{ + topology: &topologyData{ + model: cpumodel.INTEL_FAM6_SAPPHIRERAPIDS_X, + }, + msr: m, + } + + temOut, err := pt.GetCPUTemperature(cpuID) + require.Equal(t, tempExp, temOut) + require.ErrorContains(t, err, mError.Error()) + m.AssertExpectations(t) + }) + + t.Run("Temp23Celsius", func(t *testing.T) { + cpuID := 0 + tempTargetValue := uint64(0x680a00) + thermalStatusValue := uint64(0x88510000) + + tempExp := uint64(23) + + m := &msrMock{} + m.On("read", uint32(temperatureTarget), cpuID).Return(tempTargetValue, nil).Once() + m.On("read", uint32(thermalStatus), cpuID).Return(thermalStatusValue, nil).Once() + + pt := &PowerTelemetry{ + topology: &topologyData{ + model: cpumodel.INTEL_FAM6_EMERALDRAPIDS_X, + }, + msr: m, + } + + temOut, err := pt.GetCPUTemperature(cpuID) + require.Equal(t, tempExp, temOut) + require.NoError(t, err) + m.AssertExpectations(t) + }) + + t.Run("Temp36Celsius", func(t *testing.T) { + cpuID := 0 + tempTargetValue := uint64(0x630a00) + thermalStatusValue := uint64(0x883f0800) + + tempExp := uint64(36) + + m := &msrMock{} + m.On("read", uint32(temperatureTarget), cpuID).Return(tempTargetValue, nil).Once() + m.On("read", uint32(thermalStatus), cpuID).Return(thermalStatusValue, nil).Once() + + pt := &PowerTelemetry{ + topology: &topologyData{ + model: cpumodel.INTEL_FAM6_EMERALDRAPIDS_X, + }, + msr: m, + } + + temOut, err := pt.GetCPUTemperature(cpuID) + require.Equal(t, tempExp, temOut) + require.NoError(t, err) + m.AssertExpectations(t) + }) +} + +func TestGetCPUBaseFrequency(t *testing.T) { + t.Run("MsrIsNil", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + + // expected output + expectedFreq := uint64(0) + + // power telemetry instance definition + pt := PowerTelemetry{ + topology: &topologyData{}, + } + + actualFreq, err := pt.GetCPUBaseFrequency(cpuID) + require.ErrorContains(t, err, "\"msr\" is not initialized") + require.Equal(t, expectedFreq, actualFreq) + }) + + t.Run("NoCPUIDsAvailable", func(t *testing.T) { + // input arguments for test case + packageID := 0 + busClk := 100.0 + + // expected output + expectedFreq := uint64(0) + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: { + packageID: 0, + }, + 1: { + packageID: 0, + }, + 2: { + packageID: 0, + }, + 3: { + packageID: 0, + }, + 4: { + packageID: 0, + }, + }, + model: cpumodel.INTEL_FAM6_EMERALDRAPIDS_X, + } + + // msr definition + m := &msrMock{} + + // power telemetry instance definition + pt := PowerTelemetry{ + topology: topo, + msr: m, + cpus: []int{}, // no cpus available + busClock: busClk, + } + + actualFreq, err := pt.GetCPUBaseFrequency(packageID) + require.ErrorContains(t, err, fmt.Sprintf("could not find CPU ID for package ID %v", packageID)) + require.Equal(t, expectedFreq, actualFreq) + m.AssertExpectations(t) + }) + + t.Run("CPUIDNotAvailable", func(t *testing.T) { + // input arguments for test case + packageID := 1 + busClk := 100.0 + + // expected output + expectedFreq := uint64(0) + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: { + packageID: 0, + }, + 1: { + packageID: 1, + }, + 2: { + packageID: 0, + }, + 3: { + packageID: 1, + }, + 4: { + packageID: 0, + }, + }, + model: cpumodel.INTEL_FAM6_EMERALDRAPIDS_X, + } + + // msr definition + m := &msrMock{} + + // power telemetry instance definition + pt := PowerTelemetry{ + topology: topo, + msr: m, + cpus: []int{0, 2, 4}, + busClock: busClk, + } + + actualFreq, err := pt.GetCPUBaseFrequency(packageID) + require.ErrorContains(t, err, fmt.Sprintf("could not find CPU ID for package ID %v", packageID)) + require.Equal(t, expectedFreq, actualFreq) + m.AssertExpectations(t) + }) + + t.Run("BaseFreq", func(t *testing.T) { + // input arguments for test case + cpuID := 2 + packageID := 1 + cpuModel := cpumodel.INTEL_FAM6_ATOM_SILVERMONT + msrValue := uint64(0x1234) + busClk := 100.0 + + // expected output + expectedFreq := uint64(1800) + + // msr definition + m := &msrMock{} + m.On("read", uint32(platformInfo), cpuID).Return(msrValue, nil) + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: { + packageID: 0, + }, + 1: { + packageID: 0, + }, + 2: { + packageID: packageID, + }, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := PowerTelemetry{ + msr: m, + topology: topo, + cpus: []int{0, 1, 2}, + busClock: busClk, + } + + actualFreq, err := pt.GetCPUBaseFrequency(packageID) + require.NoError(t, err) + require.Equal(t, expectedFreq, actualFreq) + m.AssertExpectations(t) + }) + + t.Run("BaseFreqFractional", func(t *testing.T) { + // input arguments for test case + packageID := 0 + cpuID := 0 + cpuModel := cpumodel.INTEL_FAM6_ATOM_SILVERMONT + msrValue := uint64(0x1234) + busClk := 116.7 + + // expected output + expectedFreq := uint64(2100) + + // msr definition + m := &msrMock{} + m.On("read", uint32(platformInfo), cpuID).Return(msrValue, nil) + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: { + packageID: packageID, + }, + 1: { + packageID: packageID, + }, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := PowerTelemetry{ + msr: m, + topology: topo, + busClock: busClk, + cpus: []int{0, 1}, + } + + actualFreq, err := pt.GetCPUBaseFrequency(packageID) + require.NoError(t, err) + require.Equal(t, expectedFreq, actualFreq) + m.AssertExpectations(t) + }) + + t.Run("UnsupportedModel", func(t *testing.T) { + // input arguments for test case + packageID := 1 + cpuModel := cpumodel.INTEL_FAM6_CORE2_MEROM + + // expected output + expectedFreq := uint64(0) + expectedErr := fmt.Errorf("base frequency metric not supported by CPU model: 0x%X", cpuModel) + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: { + packageID: 0, + }, + 1: { + packageID: 0, + }, + 2: { + packageID: packageID, + }, + }, + model: cpuModel, + } + + // msr definition + m := &msrMock{} + + // power telemetry instance definition + pt := PowerTelemetry{ + topology: topo, + msr: m, + cpus: []int{0, 1, 2}, + } + + actualFreq, err := pt.GetCPUBaseFrequency(packageID) + require.Equal(t, expectedFreq, actualFreq) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("ErrorReadingMsr", func(t *testing.T) { + // input arguments for test case + packageID := 0 + cpuID := 0 + cpuModel := cpumodel.INTEL_FAM6_ATOM_SILVERMONT + msrValue := uint64(0x1234) + + // expected output + expectedFreq := uint64(0) + expectedErr := errors.New("error reading msr") + + // msr definition + m := &msrMock{} + m.On("read", uint32(platformInfo), cpuID).Return(msrValue, errors.New("error reading msr")) + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: { + packageID: packageID, + }, + 1: { + packageID: packageID, + }, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := PowerTelemetry{ + msr: m, + topology: topo, + cpus: []int{0, 1}, + } + + actualFreq, err := pt.GetCPUBaseFrequency(cpuID) + require.ErrorContains(t, err, expectedErr.Error()) + require.Equal(t, expectedFreq, actualFreq) + m.AssertExpectations(t) + }) +} + +type msrGetOffsetDeltasResult struct { + values map[uint32]uint64 + err error +} + +type msrGetTimestampDeltaResult struct { + value time.Duration + err error +} + +func TestGetCPUC0StateResidency(t *testing.T) { + t.Run("MsrIsNil", func(t *testing.T) { + cpuID := 1 + c0Exp := 0.0 + + pt := &PowerTelemetry{} + + c0Out, err := pt.GetCPUC0StateResidency(cpuID) + require.Equal(t, c0Exp, c0Out) + require.ErrorContains(t, err, "\"msr\" is not initialized") + }) + + t.Run("InvalidCPUID", func(t *testing.T) { + // input arguments for test case + cpuID := 1 + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: nil, + err: errors.New("CPU ID 1 not found"), + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("error retrieving offset deltas for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // power telemetry instance definition + pt := &PowerTelemetry{ + msr: m, + } + + out, err := pt.GetCPUC0StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("TSCDeltaZero", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + maxFreqClockCount: 200, + timestampCounter: 0, + }, + err: nil, + } + // expected output + expectedResult := 0.0 + expectedErr := errors.New("timestamp counter offset delta is zero for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // power telemetry instance definition + pt := &PowerTelemetry{ + msr: m, + } + + out, err := pt.GetCPUC0StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("MperfOffsetDeltaNotFound", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + timestampCounter: 5000, + }, + err: nil, + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("mperf offset delta not found for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // power telemetry instance definition + pt := &PowerTelemetry{ + msr: m, + } + + out, err := pt.GetCPUC0StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("TSCOffsetDeltaNotFound", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + maxFreqClockCount: 200, + }, + err: nil, + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("timestamp counter offset delta not found for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // power telemetry instance definition + pt := &PowerTelemetry{ + msr: m, + } + + out, err := pt.GetCPUC0StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("C0State2Per", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + maxFreqClockCount: 100000000, + timestampCounter: 5000000000, + }, + err: nil, + } + + // expected output + expectedResult := 2.0 + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // power telemetry instance definition + pt := &PowerTelemetry{ + msr: m, + } + + out, err := pt.GetCPUC0StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.NoError(t, err) + m.AssertExpectations(t) + }) +} + +func TestGetCPUC1StateResidency(t *testing.T) { + t.Run("MsrIsNil", func(t *testing.T) { + cpuID := 1 + c1Exp := 0.0 + + pt := PowerTelemetry{ + topology: &topologyData{}, + } + + c1Out, err := pt.GetCPUC1StateResidency(cpuID) + require.Equal(t, c1Exp, c1Out) + require.ErrorContains(t, err, "\"msr\" is not initialized") + }) + + t.Run("InvalidCPUID", func(t *testing.T) { + // input arguments for test case + cpuID := 1 + cpuModel := cpumodel.INTEL_FAM6_ATOM_SILVERMONT + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: nil, + err: errors.New("CPU ID 1 not found"), + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("error retrieving offset deltas for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC1StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("MperfOffsetDeltaNotFound", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + cpuModel := cpumodel.INTEL_FAM6_ATOM_SILVERMONT + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + timestampCounter: 5000, + }, + err: nil, + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("mperf offset delta not found for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC1StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("C3OffsetDeltaNotFound", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + cpuModel := cpumodel.INTEL_FAM6_ATOM_SILVERMONT + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + timestampCounter: 5000, + maxFreqClockCount: 100, + }, + err: nil, + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("c3 state residency offset delta not found for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC1StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("C6OffsetDeltaNotFound", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + cpuModel := cpumodel.INTEL_FAM6_ATOM_SILVERMONT + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + timestampCounter: 5000, + maxFreqClockCount: 100, + c3Residency: 100, + }, + err: nil, + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("c6 state residency offset delta not found for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC1StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("C7OffsetDeltaNotFound", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + cpuModel := cpumodel.INTEL_FAM6_ATOM_SILVERMONT + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + timestampCounter: 5000, + maxFreqClockCount: 100, + c3Residency: 100, + c6Residency: 100, + }, + err: nil, + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("c7 state residency offset delta not found for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC1StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("TSCOffsetDeltaNotFound", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + cpuModel := cpumodel.INTEL_FAM6_ATOM_SILVERMONT + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + maxFreqClockCount: 100, + c3Residency: 100, + c6Residency: 100, + c7Residency: 100, + }, + err: nil, + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("timestamp counter offset delta not found for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC1StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("TSCDeltaZero", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + cpuModel := cpumodel.INTEL_FAM6_ATOM_SILVERMONT + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + timestampCounter: 0, + maxFreqClockCount: 100, + c3Residency: 100, + c6Residency: 100, + c7Residency: 100, + }, + err: nil, + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("timestamp counter offset delta is zero for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC1StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("C1Status20Per", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + cpuModel := cpumodel.INTEL_FAM6_ATOM_SILVERMONT + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + timestampCounter: 500, + maxFreqClockCount: 100, + c3Residency: 100, + c6Residency: 100, + c7Residency: 100, + }, + err: nil, + } + + // expected output + expectedResult := 20.0 + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC1StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.NoError(t, err) + m.AssertExpectations(t) + }) + + t.Run("UnsupportedModel", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + cpuModel := cpumodel.INTEL_FAM6_CORE2_MEROM + + // expected output + expectedResult := 0.0 + expectedErr := fmt.Errorf("c1 state residency metric not supported by CPU model: 0x%X", cpuModel) + + // msr definition + m := &msrMock{} + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC1StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("InvalidCPUModel", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("c1 state residency metric not supported by CPU model: 0x0") + + // msr definition + m := &msrMock{} + + // topology definition + topo := &topologyData{} + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC1StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) +} + +func TestGetCPUC3StateResidency(t *testing.T) { + t.Run("MsrIsNil", func(t *testing.T) { + cpuID := 1 + c3Exp := 0.0 + + pt := PowerTelemetry{ + topology: &topologyData{}, + } + + c3Out, err := pt.GetCPUC3StateResidency(cpuID) + require.Equal(t, c3Exp, c3Out) + require.ErrorContains(t, err, "\"msr\" is not initialized") + }) + + t.Run("InvalidCPUID", func(t *testing.T) { + // input arguments for test case + cpuID := 1 + cpuModel := cpumodel.INTEL_FAM6_HASWELL + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: nil, + err: errors.New("CPU ID 1 not found"), + } + + // expected output + expectedErr := errors.New("error retrieving offset deltas for CPU ID") + expectedResult := 0.0 + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC3StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("C3OffsetDeltaNotFound", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + cpuModel := cpumodel.INTEL_FAM6_HASWELL + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + timestampCounter: 5000, + }, + err: nil, + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("c3 state residency offset delta not found for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC3StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("C3State4Per", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + cpuModel := cpumodel.INTEL_FAM6_HASWELL + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + c3Residency: 200000000, + timestampCounter: 5000000000, + }, + err: nil, + } + + // expected output + expectedResult := 4.0 + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC3StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.NoError(t, err) + m.AssertExpectations(t) + }) + + t.Run("TSCOffsetDeltaNotFound", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + cpuModel := cpumodel.INTEL_FAM6_HASWELL + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + c3Residency: 200000000, + }, + err: nil, + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("timestamp counter offset delta not found for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC3StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("TSCDeltaZero", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + cpuModel := cpumodel.INTEL_FAM6_HASWELL + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + c3Residency: 200, + timestampCounter: 0, + }, + err: nil, + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("timestamp counter offset delta is zero for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC3StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("UnsupportedModel", func(t *testing.T) { + // input arguments for test case + cpuModel := cpumodel.INTEL_FAM6_CORE2_MEROM + cpuID := 0 + + // expected output + expectedResult := 0.0 + expectedErr := fmt.Errorf("c3 state residency metric not supported by CPU model: 0x%X", cpuModel) + + // msr definition + m := &msrMock{} + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC3StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("InvalidCPUModel", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("c3 state residency metric not supported by CPU model: 0x0") + + // msr definition + m := &msrMock{} + + // topology definition + topo := &topologyData{} + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC3StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) +} + +func TestGetCPUC6StateResidency(t *testing.T) { + t.Run("MsrIsNil", func(t *testing.T) { + cpuID := 1 + c6Exp := 0.0 + + pt := PowerTelemetry{ + topology: &topologyData{}, + } + + c6Out, err := pt.GetCPUC6StateResidency(cpuID) + require.Equal(t, c6Exp, c6Out) + require.ErrorContains(t, err, "\"msr\" is not initialized") + }) + + t.Run("InvalidCPUID", func(t *testing.T) { + // input arguments for test case + cpuID := 1 + cpuModel := cpumodel.INTEL_FAM6_ATOM_SILVERMONT + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: nil, + err: errors.New("CPU ID 1 not found"), + } + + // expected output + expectedErr := errors.New("error retrieving offset deltas for CPU ID") + expectedResult := 0.0 + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC6StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("C6OffsetDeltaNotFound", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + cpuModel := cpumodel.INTEL_FAM6_ATOM_SILVERMONT + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + timestampCounter: 5000, + }, + err: nil, + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("c6 state residency offset delta not found for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC6StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("C6State4Per", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + cpuModel := cpumodel.INTEL_FAM6_ATOM_SILVERMONT + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + c6Residency: 200000000, + timestampCounter: 5000000000, + }, + err: nil, + } + + // expected output + expectedResult := 4.0 + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC6StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.NoError(t, err) + m.AssertExpectations(t) + }) + + t.Run("TSCOffsetDeltaNotFound", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + cpuModel := cpumodel.INTEL_FAM6_ATOM_SILVERMONT + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + c6Residency: 200000000, + }, + err: nil, + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("timestamp counter offset delta not found for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC6StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("TSCDeltaZero", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + cpuModel := cpumodel.INTEL_FAM6_ATOM_SILVERMONT + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + c6Residency: 200, + timestampCounter: 0, + }, + err: nil, + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("timestamp counter offset delta is zero for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC6StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("UnsupportedModel", func(t *testing.T) { + // input arguments for test case + cpuModel := cpumodel.INTEL_FAM6_CORE2_MEROM + cpuID := 0 + + // expected output + expectedResult := 0.0 + expectedErr := fmt.Errorf("c6 state residency metric not supported by CPU model: 0x%X", cpuModel) + + // msr definition + m := &msrMock{} + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC6StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("InvalidCPUModel", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("c6 state residency metric not supported by CPU model: 0x0") + + // msr definition + m := &msrMock{} + + // topology definition + topo := &topologyData{} + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC6StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) +} + +func TestGetCPUC7StateResidency(t *testing.T) { + t.Run("MsrIsNil", func(t *testing.T) { + cpuID := 1 + c7Exp := 0.0 + + pt := PowerTelemetry{ + topology: &topologyData{}, + } + + c7Out, err := pt.GetCPUC7StateResidency(cpuID) + require.Equal(t, c7Exp, c7Out) + require.ErrorContains(t, err, "\"msr\" is not initialized") + }) + + t.Run("InvalidCPUID", func(t *testing.T) { + // input arguments for test case + cpuID := 1 + cpuModel := cpumodel.INTEL_FAM6_SKYLAKE + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: nil, + err: errors.New("CPU ID 1 not found"), + } + + // expected output + expectedErr := errors.New("error retrieving offset deltas for CPU ID") + expectedResult := 0.0 + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC7StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("C7OffsetDeltaNotFound", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + cpuModel := cpumodel.INTEL_FAM6_SKYLAKE + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + timestampCounter: 5000, + }, + err: nil, + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("c7 state residency offset delta not found for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC7StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("C7State4Per", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + cpuModel := cpumodel.INTEL_FAM6_SKYLAKE + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + c7Residency: 200000000, + timestampCounter: 5000000000, + }, + err: nil, + } + + // expected output + expectedResult := 4.0 + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC7StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.NoError(t, err) + m.AssertExpectations(t) + }) + + t.Run("TSCOffsetDeltaNotFound", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + cpuModel := cpumodel.INTEL_FAM6_SKYLAKE + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + c7Residency: 200000000, + }, + err: nil, + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("timestamp counter offset delta not found for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC7StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("TSCDeltaZero", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + cpuModel := cpumodel.INTEL_FAM6_SKYLAKE + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + c7Residency: 200, + timestampCounter: 0, + }, + err: nil, + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("timestamp counter offset delta is zero for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC7StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("UnsupportedModel", func(t *testing.T) { + // input arguments for test case + cpuModel := cpumodel.INTEL_FAM6_CORE2_MEROM + cpuID := 0 + + // expected output + expectedResult := 0.0 + expectedErr := fmt.Errorf("c7 state residency metric not supported by CPU model: 0x%X", cpuModel) + + // msr definition + m := &msrMock{} + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC7StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("InvalidCPUModel", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("c7 state residency metric not supported by CPU model") + + // msr definition + m := &msrMock{} + + // topology definition + topo := &topologyData{} + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUC7StateResidency(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) +} + +func TestGetCPUBusyFrequencyMhz(t *testing.T) { + t.Run("MsrIsNil", func(t *testing.T) { + cpuID := 1 + busyFreqExp := 0.0 + + pt := &PowerTelemetry{} + + busyFreqOut, err := pt.GetCPUBusyFrequencyMhz(cpuID) + require.Equal(t, busyFreqExp, busyFreqOut) + require.ErrorContains(t, err, "\"msr\" is not initialized") + }) + + t.Run("DeltasInvalidCPUID", func(t *testing.T) { + // input arguments for test case + cpuID := 1 + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: nil, + err: errors.New("CPU ID 1 not found"), + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("error retrieving offset deltas for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + + // power telemetry instance definition + pt := &PowerTelemetry{ + msr: m, + } + + out, err := pt.GetCPUBusyFrequencyMhz(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("TimestampDeltaInvalidCPUID", func(t *testing.T) { + // input arguments for test case + cpuID := 1 + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + actualFreqClockCount: 100000000000, + maxFreqClockCount: 200000000, + timestampCounter: 1000000, + }, + err: nil, + } + mockTimestampDelta := &msrGetTimestampDeltaResult{ + value: time.Duration(0), + err: errors.New("CPU ID 1 not found"), + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("error retrieving timestamp delta for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + m.On("getTimestampDelta", cpuID).Return(mockTimestampDelta.value, mockTimestampDelta.err).Once() + + // power telemetry instance definition + pt := &PowerTelemetry{ + msr: m, + } + + out, err := pt.GetCPUBusyFrequencyMhz(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("TimestampDeltaInvalidCPUID", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + actualFreqClockCount: 100000000000, + maxFreqClockCount: 200000000, + timestampCounter: 1000000, + }, + err: nil, + } + mockTimestampDelta := &msrGetTimestampDeltaResult{ + value: time.Duration(0), + err: errors.New("CPU ID 1 not found"), + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("error retrieving timestamp delta for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + m.On("getTimestampDelta", cpuID).Return(mockTimestampDelta.value, mockTimestampDelta.err).Once() + + // power telemetry instance definition + pt := &PowerTelemetry{ + msr: m, + } + + out, err := pt.GetCPUBusyFrequencyMhz(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("InvalidTimeInterval", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + actualFreqClockCount: 100000000000, + maxFreqClockCount: 200000000, + timestampCounter: 1000000, + }, + err: nil, + } + mockTimestampDelta := &msrGetTimestampDeltaResult{ + value: -100, + err: nil, + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("timestamp delta must be greater than zero") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + m.On("getTimestampDelta", cpuID).Return(mockTimestampDelta.value, mockTimestampDelta.err).Once() + + // power telemetry instance definition + pt := &PowerTelemetry{ + msr: m, + } + + out, err := pt.GetCPUBusyFrequencyMhz(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("MperfOffsetDeltaNotFound", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + timestampCounter: 5000, + }, + err: nil, + } + mockTimestampDelta := &msrGetTimestampDeltaResult{ + value: 1, + err: nil, + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("mperf offset delta not found for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + m.On("getTimestampDelta", cpuID).Return(mockTimestampDelta.value, mockTimestampDelta.err).Once() + + // power telemetry instance definition + pt := &PowerTelemetry{ + msr: m, + } + + out, err := pt.GetCPUBusyFrequencyMhz(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("MperfOffsetDeltaZero", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + timestampCounter: 5000, + maxFreqClockCount: 0, + }, + err: nil, + } + mockTimestampDelta := &msrGetTimestampDeltaResult{ + value: 1, + err: nil, + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("mperf offset delta is zero for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + m.On("getTimestampDelta", cpuID).Return(mockTimestampDelta.value, mockTimestampDelta.err).Once() + + // power telemetry instance definition + pt := &PowerTelemetry{ + msr: m, + } + + out, err := pt.GetCPUBusyFrequencyMhz(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("AperfOffsetDeltaNotFound", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + timestampCounter: 5000, + maxFreqClockCount: 100, + }, + err: nil, + } + mockTimestampDelta := &msrGetTimestampDeltaResult{ + value: 1, + err: nil, + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("aperf offset delta not found for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + m.On("getTimestampDelta", cpuID).Return(mockTimestampDelta.value, mockTimestampDelta.err).Once() + + // power telemetry instance definition + pt := &PowerTelemetry{ + msr: m, + } + + out, err := pt.GetCPUBusyFrequencyMhz(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("TSCOffsetDeltaNotFound", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + actualFreqClockCount: 100000000000, + maxFreqClockCount: 200000000, + }, + err: nil, + } + mockTimestampDelta := &msrGetTimestampDeltaResult{ + value: 1, + err: nil, + } + + // expected output + expectedResult := 0.0 + expectedErr := errors.New("timestamp counter offset delta not found for CPU ID") + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + m.On("getTimestampDelta", cpuID).Return(mockTimestampDelta.value, mockTimestampDelta.err).Once() + + // power telemetry instance definition + pt := &PowerTelemetry{ + msr: m, + } + + out, err := pt.GetCPUBusyFrequencyMhz(cpuID) + require.Equal(t, expectedResult, out) + require.ErrorContains(t, err, expectedErr.Error()) + m.AssertExpectations(t) + }) + + t.Run("BusyFreq", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + cpuModel := cpumodel.INTEL_FAM6_ATOM_SILVERMONT + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + actualFreqClockCount: 100000000000, + maxFreqClockCount: 200000000, + timestampCounter: 1000000, + }, + err: nil, + } + mockTimestampDelta := &msrGetTimestampDeltaResult{ + value: 1000000000, + err: nil, + } + + // expected output + expectedResult := 500.0 + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + m.On("getTimestampDelta", cpuID).Return(mockTimestampDelta.value, mockTimestampDelta.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUBusyFrequencyMhz(cpuID) + require.Equal(t, expectedResult, out) + require.NoError(t, err) + m.AssertExpectations(t) + }) + + t.Run("UnsupportedModel", func(t *testing.T) { + // input arguments for test case + cpuID := 0 + cpuModel := cpumodel.INTEL_FAM6_CORE2_MEROM + mockOffsetDeltas := &msrGetOffsetDeltasResult{ + values: map[uint32]uint64{ + actualFreqClockCount: 100000000000, + maxFreqClockCount: 200000000, + timestampCounter: 1000000, + }, + err: nil, + } + mockTimestampDelta := &msrGetTimestampDeltaResult{ + value: 1000000000, + err: nil, + } + + // expected output + expectedResult := 500.0 + + // msr definition + m := &msrMock{} + m.On("getOffsetDeltas", cpuID).Return(mockOffsetDeltas.values, mockOffsetDeltas.err).Once() + m.On("getTimestampDelta", cpuID).Return(mockTimestampDelta.value, mockTimestampDelta.err).Once() + + // topology definition + topo := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: {}, + }, + model: cpuModel, + } + + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: topo, + msr: m, + } + + out, err := pt.GetCPUBusyFrequencyMhz(cpuID) + require.Equal(t, expectedResult, out) + require.NoError(t, err) + m.AssertExpectations(t) + }) +} + +func TestUpdatePerCPUMetrics(t *testing.T) { + t.Run("MsrIsNil", func(t *testing.T) { + cpuID := 0 + + pt := &PowerTelemetry{} + + err := pt.UpdatePerCPUMetrics(cpuID) + require.ErrorContains(t, err, "\"msr\" is not initialized") + }) + + t.Run("FailedToUpdate", func(t *testing.T) { + cpuID := 0 + errExpected := errors.New("error while updating storage") + m := &msrMock{} + m.On("update", cpuID).Return(errExpected).Once() + + pt := &PowerTelemetry{ + msr: m, + } + + err := pt.UpdatePerCPUMetrics(cpuID) + require.ErrorContains(t, err, errExpected.Error()) + m.AssertExpectations(t) + }) + + t.Run("Updated", func(t *testing.T) { + cpuID := 0 + m := &msrMock{} + m.On("update", cpuID).Return(nil).Once() + + pt := &PowerTelemetry{ + msr: m, + } + + err := pt.UpdatePerCPUMetrics(cpuID) + require.NoError(t, err) + m.AssertExpectations(t) + }) +} + +func TestIsCPUSupported(t *testing.T) { + t.Run("True", func(t *testing.T) { + mTopology := &topologyMock{} + mTopology.On("getCPUFamily", 0).Return("6", nil).Once() + mTopology.On("getCPUVendor", 0).Return("GenuineIntel", nil).Once() + + isSupported, err := isCPUSupported(mTopology) + require.True(t, isSupported) + require.NoError(t, err) + mTopology.AssertExpectations(t) + }) + + t.Run("False", func(t *testing.T) { + t.Run("FamilyNotIntel6", func(t *testing.T) { + mTopology := &topologyMock{} + mTopology.On("getCPUFamily", 0).Return("", nil).Once() + mTopology.On("getCPUVendor", 0).Return("GenuineIntel", nil).Once() + + isSupported, err := isCPUSupported(mTopology) + require.False(t, isSupported) + require.NoError(t, err) + mTopology.AssertExpectations(t) + }) + + t.Run("VendorNotGenuineIntel", func(t *testing.T) { + mTopology := &topologyMock{} + mTopology.On("getCPUFamily", 0).Return("6", nil).Once() + mTopology.On("getCPUVendor", 0).Return("AuthenticAMD", nil).Once() + + isSupported, err := isCPUSupported(mTopology) + require.False(t, isSupported) + require.NoError(t, err) + mTopology.AssertExpectations(t) + }) + + t.Run("FailedToGetFamily", func(t *testing.T) { + mError := errors.New("mock error") + mTopology := &topologyMock{} + mTopology.On("getCPUFamily", 0).Return("", mError).Once() + + isSupported, err := isCPUSupported(mTopology) + require.False(t, isSupported) + require.ErrorContains(t, err, mError.Error()) + mTopology.AssertExpectations(t) + }) + + t.Run("FailedToGetVendor", func(t *testing.T) { + mError := errors.New("mock error") + mTopology := &topologyMock{} + mTopology.On("getCPUFamily", 0).Return("6", nil).Once() + mTopology.On("getCPUVendor", 0).Return("", mError).Once() + + isSupported, err := isCPUSupported(mTopology) + require.False(t, isSupported) + require.ErrorContains(t, err, mError.Error()) + mTopology.AssertExpectations(t) + }) + }) +} + +func TestIsFlagSupported(t *testing.T) { + tests := []struct { + name string + topology topologyReader + err error + expected bool + }{ + { + name: "InvalidCPUID", + topology: &topologyData{}, + err: errors.New("error retrieving CPU flags"), + expected: false, + }, + { + name: "FlagNotSupported", + topology: &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: { + flags: []string{"flag2"}, + }, + }, + }, + err: nil, + expected: false, + }, + { + name: "FlagSupported", + topology: &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: { + flags: []string{"flag"}, + }, + }, + }, + err: nil, + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: tt.topology, + } + // actual output + actual, err := pt.IsFlagSupported("flag") + require.Equal(t, tt.expected, actual) + if tt.err != nil { + require.ErrorContains(t, err, tt.err.Error()) + } else { + require.NoError(t, err) + } + }) + } +} + +func TestGetCPUPackageID(t *testing.T) { + tests := []struct { + name string + topology topologyReader + err error + expected int + }{ + { + name: "InvalidPackageIDNoCPUs", + topology: &topologyData{}, + err: errors.New("error retrieving package ID"), + expected: 0, + }, + { + name: "InvalidPackageIDCPUDoesNotExist", + topology: &topologyData{ + topologyMap: map[int]*cpuInfo{ + 1: { + packageID: 1, + }, + }, + }, + err: errors.New("error retrieving package ID"), + expected: 0, + }, + { + name: "ValidPackageID", + topology: &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: { + packageID: 1, + }, + }, + }, + err: nil, + expected: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: tt.topology, + } + // actual output + actual, err := pt.GetCPUPackageID(0) + require.Equal(t, tt.expected, actual) + if tt.err != nil { + require.ErrorContains(t, err, tt.err.Error()) + } else { + require.NoError(t, err) + } + }) + } +} + +func TestGetCPUCoreID(t *testing.T) { + tests := []struct { + name string + topology topologyReader + err error + expected int + }{ + { + name: "InvalidCoreIDNoCPUs", + topology: &topologyData{}, + err: errors.New("error retrieving core ID"), + expected: 0, + }, + { + name: "InvalidCoreIDCPUDoesNotExist", + topology: &topologyData{ + topologyMap: map[int]*cpuInfo{ + 1: { + coreID: 1, + }, + }, + }, + err: errors.New("error retrieving core ID"), + expected: 0, + }, + { + name: "ValidCoreID", + topology: &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: { + coreID: 1, + }, + }, + }, + err: nil, + expected: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: tt.topology, + } + // actual output + actual, err := pt.GetCPUCoreID(0) + require.Equal(t, tt.expected, actual) + if tt.err != nil { + require.ErrorContains(t, err, tt.err.Error()) + } else { + require.NoError(t, err) + } + }) + } +} + +func TestGetPackageDieIDs(t *testing.T) { + tests := []struct { + name string + topology topologyReader + err error + expected []int + }{ + { + name: "InvalidPackageIDNoDies", + topology: &topologyData{}, + err: errors.New("error retrieving dies"), + expected: nil, + }, + { + name: "InvalidPackageIDPackageDoesNotExist", + topology: &topologyData{ + packageDies: map[int][]int{ + 1: { + 0, 1, + }, + }, + }, + err: errors.New("error retrieving dies"), + expected: nil, + }, + { + name: "ValidPackageID", + topology: &topologyData{ + packageDies: map[int][]int{ + 0: { + 1, 2, + }, + }, + }, + err: nil, + expected: []int{1, 2}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // power telemetry instance definition + pt := &PowerTelemetry{ + topology: tt.topology, + } + // actual output + actual, err := pt.GetPackageDieIDs(0) + require.Equal(t, tt.expected, actual) + if tt.err != nil { + require.ErrorContains(t, err, tt.err.Error()) + } else { + require.NoError(t, err) + } + }) + } +} + +type raplMock struct { + mock.Mock +} + +func (m *raplMock) initZoneMap() error { + args := m.Called() + return args.Error(0) +} + +func (m *raplMock) getPackageIDs() []int { + args := m.Called() + return args.Get(0).([]int) +} + +func (m *raplMock) isRaplLoaded(modulesPath string) (bool, error) { + args := m.Called(modulesPath) + return args.Bool(0), args.Error(1) +} + +func (m *raplMock) getCurrentPowerConsumptionWatts(packageID int, domain string) (float64, error) { + args := m.Called(packageID, domain) + return args.Get(0).(float64), args.Error(1) +} + +func (m *raplMock) getMaxPowerConstraintWatts(packageID int) (float64, error) { + args := m.Called(packageID) + return args.Get(0).(float64), args.Error(1) +} + +func TestPower_GetCurrentPackagePowerConsumptionWatts(t *testing.T) { + t.Run("RaplIsNil", func(t *testing.T) { + packageID := 0 + currPowerExp := 0.0 + + pt := &PowerTelemetry{} + currPowerOut, err := pt.GetCurrentPackagePowerConsumptionWatts(packageID) + require.Equal(t, currPowerExp, currPowerOut) + require.ErrorContains(t, err, "\"rapl\" is not initialized") + }) + + t.Run("FailedToGetCurrentPower", func(t *testing.T) { + packageID := 0 + currPowerExp := 0.0 + + mError := errors.New("mock error") + mRapl := &raplMock{} + mRapl.On("getCurrentPowerConsumptionWatts", packageID, packageDomain.String()).Return(currPowerExp, mError).Once() + + pt := &PowerTelemetry{ + rapl: mRapl, + } + + currPowerOut, err := pt.GetCurrentPackagePowerConsumptionWatts(packageID) + require.Equal(t, currPowerExp, currPowerOut) + require.ErrorContains(t, err, mError.Error()) + mRapl.AssertExpectations(t) + }) + + t.Run("Ok", func(t *testing.T) { + packageID := 0 + currPowerExp := 30.0 + + mRapl := &raplMock{} + mRapl.On("getCurrentPowerConsumptionWatts", packageID, packageDomain.String()).Return(currPowerExp, nil).Once() + + pt := &PowerTelemetry{ + rapl: mRapl, + } + + currPowerOut, err := pt.GetCurrentPackagePowerConsumptionWatts(packageID) + require.Equal(t, currPowerExp, currPowerOut) + require.NoError(t, err) + mRapl.AssertExpectations(t) + }) +} + +func TestPower_GetCurrentDramPowerConsumptionWatts(t *testing.T) { + t.Run("RaplIsNil", func(t *testing.T) { + packageID := 0 + currPowerExp := 0.0 + + pt := &PowerTelemetry{} + currPowerOut, err := pt.GetCurrentDramPowerConsumptionWatts(packageID) + require.Equal(t, currPowerExp, currPowerOut) + require.ErrorContains(t, err, "\"rapl\" is not initialized") + }) + + t.Run("FailedToGetCurrentPower", func(t *testing.T) { + packageID := 0 + currPowerExp := 0.0 + + mError := errors.New("mock error") + mRapl := &raplMock{} + mRapl.On("getCurrentPowerConsumptionWatts", packageID, dramDomain.String()).Return(currPowerExp, mError).Once() + + pt := &PowerTelemetry{ + rapl: mRapl, + } + + currPowerOut, err := pt.GetCurrentDramPowerConsumptionWatts(packageID) + require.Equal(t, currPowerExp, currPowerOut) + require.ErrorContains(t, err, mError.Error()) + mRapl.AssertExpectations(t) + }) + + t.Run("Ok", func(t *testing.T) { + packageID := 0 + currPowerExp := 30.0 + + mRapl := &raplMock{} + mRapl.On("getCurrentPowerConsumptionWatts", packageID, dramDomain.String()).Return(currPowerExp, nil).Once() + + pt := &PowerTelemetry{ + rapl: mRapl, + } + + currPowerOut, err := pt.GetCurrentDramPowerConsumptionWatts(packageID) + require.Equal(t, currPowerExp, currPowerOut) + require.NoError(t, err) + mRapl.AssertExpectations(t) + }) +} + +func TestPower_GetPackageThermalDesignPowerWatts(t *testing.T) { + t.Run("RaplIsNil", func(t *testing.T) { + packageID := 0 + currPowerExp := 0.0 + + pt := &PowerTelemetry{} + currPowerOut, err := pt.GetPackageThermalDesignPowerWatts(packageID) + require.Equal(t, currPowerExp, currPowerOut) + require.ErrorContains(t, err, "\"rapl\" is not initialized") + }) + + t.Run("FailedToGetMaxPower", func(t *testing.T) { + packageID := 0 + maxPowerExp := 0.0 + + mError := errors.New("mock error") + mRapl := &raplMock{} + mRapl.On("getMaxPowerConstraintWatts", packageID).Return(maxPowerExp, mError).Once() + + pt := &PowerTelemetry{ + rapl: mRapl, + } + + maxPowerOut, err := pt.GetPackageThermalDesignPowerWatts(packageID) + require.Equal(t, maxPowerExp, maxPowerOut) + require.ErrorContains(t, err, mError.Error()) + mRapl.AssertExpectations(t) + }) + + t.Run("Ok", func(t *testing.T) { + packageID := 0 + maxPowerExp := 30.0 + + mRapl := &raplMock{} + mRapl.On("getMaxPowerConstraintWatts", packageID).Return(maxPowerExp, nil).Once() + + pt := &PowerTelemetry{ + rapl: mRapl, + } + + maxPowerOut, err := pt.GetPackageThermalDesignPowerWatts(packageID) + require.Equal(t, maxPowerExp, maxPowerOut) + require.NoError(t, err) + mRapl.AssertExpectations(t) + }) +} + +type perfMock struct { + mock.Mock +} + +func (m *perfMock) activate(events []string, cores []int) error { + args := m.Called(events, cores) + return args.Error(0) +} + +func (m *perfMock) read() ([]coreMetric, error) { + args := m.Called() + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).([]coreMetric), args.Error(1) +} + +func (m *perfMock) deactivate() error { + args := m.Called() + return args.Error(0) +} + +func (m *perfMock) initResolver(jsonFile string) error { + args := m.Called(jsonFile) + return args.Error(0) +} + +func (m *perfMock) update() error { + args := m.Called() + return args.Error(0) +} + +func (m *perfMock) getCoreMetrics(cpuID int) []coreMetric { + args := m.Called(cpuID) + if args.Get(0) == nil { + return nil + } + return args.Get(0).([]coreMetric) +} + +func TestPower_GetCPUC0SubstateC01Percent(t *testing.T) { + t.Run("PerfIsNil", func(t *testing.T) { + cpuID := 0 + c01Exp := 0.0 + + pt := &PowerTelemetry{} + + c01Out, err := pt.GetCPUC0SubstateC01Percent(cpuID) + require.ErrorContains(t, err, "\"perf\" is not initialized") + require.Equal(t, c01Exp, c01Out) + }) + + t.Run("GetCoreMetricsError", func(t *testing.T) { + cpuID := 0 + c01Exp := 0.0 + + mPerf := &perfMock{} + mMetrics := []coreMetric{} + mPerf.On("getCoreMetrics", cpuID).Return(mMetrics).Once() + + pt := &PowerTelemetry{ + perf: mPerf, + } + + c01Out, err := pt.GetCPUC0SubstateC01Percent(cpuID) + require.ErrorContains(t, err, fmt.Sprintf("no core metrics found for CPU ID: %v", cpuID)) + require.Equal(t, c01Exp, c01Out) + mPerf.AssertExpectations(t) + }) + + t.Run("C01MetricNotFound", func(t *testing.T) { + cpuID := 0 + c01Exp := 0.0 + + mPerf := &perfMock{} + mMetrics := []coreMetric{ + { + name: c02.String(), + cpuID: cpuID, + scaled: 100, + }, + { + name: thread.String(), + cpuID: cpuID, + scaled: 5000, + }, + } + mPerf.On("getCoreMetrics", cpuID).Return(mMetrics).Once() + + pt := &PowerTelemetry{ + perf: mPerf, + } + + c01Out, err := pt.GetCPUC0SubstateC01Percent(cpuID) + require.ErrorContains(t, err, fmt.Sprintf("could not find metric: %q", c01.String())) + require.Equal(t, c01Exp, c01Out) + mPerf.AssertExpectations(t) + }) + + t.Run("ThreadMetricNotFound", func(t *testing.T) { + cpuID := 0 + c01Exp := 0.0 + + mPerf := &perfMock{} + mMetrics := []coreMetric{ + { + name: c01.String(), + cpuID: cpuID, + scaled: 100, + }, + } + mPerf.On("getCoreMetrics", cpuID).Return(mMetrics).Once() + + pt := &PowerTelemetry{ + perf: mPerf, + } + + c01Out, err := pt.GetCPUC0SubstateC01Percent(cpuID) + require.ErrorContains(t, err, fmt.Sprintf("could not find metric: %q", thread.String())) + require.Equal(t, c01Exp, c01Out) + mPerf.AssertExpectations(t) + }) + + t.Run("ThreadMetricIsZero", func(t *testing.T) { + cpuID := 0 + c01Exp := 0.0 + + mPerf := &perfMock{} + mMetrics := []coreMetric{ + { + name: c01.String(), + cpuID: cpuID, + scaled: 100, + }, + { + name: thread.String(), + cpuID: cpuID, + scaled: 0, + }, + } + mPerf.On("getCoreMetrics", cpuID).Return(mMetrics).Once() + + pt := &PowerTelemetry{ + perf: mPerf, + } + + c01Out, err := pt.GetCPUC0SubstateC01Percent(cpuID) + require.ErrorContains(t, err, fmt.Sprintf("zero scaled value for reference metric: %q", thread.String())) + require.Equal(t, c01Exp, c01Out) + mPerf.AssertExpectations(t) + }) + + t.Run("C01State2Per", func(t *testing.T) { + cpuID := 0 + c01Exp := 2.0 + + mPerf := &perfMock{} + mMetrics := []coreMetric{ + { + name: c01.String(), + cpuID: cpuID, + scaled: 100, + }, + { + name: thread.String(), + cpuID: cpuID, + scaled: 5000, + }, + } + mPerf.On("getCoreMetrics", cpuID).Return(mMetrics).Once() + + pt := &PowerTelemetry{ + perf: mPerf, + } + + c01Out, err := pt.GetCPUC0SubstateC01Percent(cpuID) + require.NoError(t, err) + require.Equal(t, c01Exp, c01Out) + mPerf.AssertExpectations(t) + }) +} + +func TestPower_GetCPUC0SubstateC02Percent(t *testing.T) { + t.Run("PerfIsNil", func(t *testing.T) { + cpuID := 0 + c02Exp := 0.0 + + pt := &PowerTelemetry{} + + c02Out, err := pt.GetCPUC0SubstateC02Percent(cpuID) + require.ErrorContains(t, err, "\"perf\" is not initialized") + require.Equal(t, c02Exp, c02Out) + }) + + t.Run("GetCoreMetricsError", func(t *testing.T) { + cpuID := 0 + c02Exp := 0.0 + + mPerf := &perfMock{} + mMetrics := []coreMetric{} + mPerf.On("getCoreMetrics", cpuID).Return(mMetrics).Once() + + pt := &PowerTelemetry{ + perf: mPerf, + } + + c02Out, err := pt.GetCPUC0SubstateC02Percent(cpuID) + require.ErrorContains(t, err, fmt.Sprintf("no core metrics found for CPU ID: %v", cpuID)) + require.Equal(t, c02Exp, c02Out) + mPerf.AssertExpectations(t) + }) + + t.Run("C02MetricNotFound", func(t *testing.T) { + cpuID := 0 + c02Exp := 0.0 + + mPerf := &perfMock{} + mMetrics := []coreMetric{ + { + name: c01.String(), + cpuID: cpuID, + scaled: 100, + }, + { + name: thread.String(), + cpuID: cpuID, + scaled: 5000, + }, + } + mPerf.On("getCoreMetrics", cpuID).Return(mMetrics).Once() + + pt := &PowerTelemetry{ + perf: mPerf, + } + + c02Out, err := pt.GetCPUC0SubstateC02Percent(cpuID) + require.ErrorContains(t, err, fmt.Sprintf("could not find metric: %q", c02.String())) + require.Equal(t, c02Exp, c02Out) + mPerf.AssertExpectations(t) + }) + + t.Run("ThreadMetricNotFound", func(t *testing.T) { + cpuID := 0 + c02Exp := 0.0 + + mPerf := &perfMock{} + mMetrics := []coreMetric{ + { + name: c02.String(), + cpuID: cpuID, + scaled: 100, + }, + } + mPerf.On("getCoreMetrics", cpuID).Return(mMetrics).Once() + + pt := &PowerTelemetry{ + perf: mPerf, + } + + c02Out, err := pt.GetCPUC0SubstateC02Percent(cpuID) + require.ErrorContains(t, err, fmt.Sprintf("could not find metric: %q", thread.String())) + require.Equal(t, c02Exp, c02Out) + mPerf.AssertExpectations(t) + }) + + t.Run("ThreadMetricIsZero", func(t *testing.T) { + cpuID := 0 + c02Exp := 0.0 + + mPerf := &perfMock{} + mMetrics := []coreMetric{ + { + name: c02.String(), + cpuID: cpuID, + scaled: 100, + }, + { + name: thread.String(), + cpuID: cpuID, + scaled: 0, + }, + } + mPerf.On("getCoreMetrics", cpuID).Return(mMetrics).Once() + + pt := &PowerTelemetry{ + perf: mPerf, + } + + c02Out, err := pt.GetCPUC0SubstateC02Percent(cpuID) + require.ErrorContains(t, err, fmt.Sprintf("zero scaled value for reference metric: %q", thread.String())) + require.Equal(t, c02Exp, c02Out) + mPerf.AssertExpectations(t) + }) + + t.Run("C02State4Per", func(t *testing.T) { + cpuID := 0 + c02Exp := 4.0 + + mPerf := &perfMock{} + mMetrics := []coreMetric{ + { + name: c02.String(), + cpuID: cpuID, + scaled: 200, + }, + { + name: thread.String(), + cpuID: cpuID, + scaled: 5000, + }, + } + mPerf.On("getCoreMetrics", cpuID).Return(mMetrics).Once() + + pt := &PowerTelemetry{ + perf: mPerf, + } + + c02Out, err := pt.GetCPUC0SubstateC02Percent(cpuID) + require.NoError(t, err) + require.Equal(t, c02Exp, c02Out) + mPerf.AssertExpectations(t) + }) +} + +func TestPower_GetCPUC0SubstateC0WaitPercent(t *testing.T) { + t.Run("PerfIsNil", func(t *testing.T) { + cpuID := 0 + c0WaitExp := 0.0 + + pt := &PowerTelemetry{} + + c0WaitOut, err := pt.GetCPUC0SubstateC0WaitPercent(cpuID) + require.ErrorContains(t, err, "\"perf\" is not initialized") + require.Equal(t, c0WaitExp, c0WaitOut) + }) + + t.Run("GetCoreMetricsError", func(t *testing.T) { + cpuID := 0 + c0WaitExp := 0.0 + + mPerf := &perfMock{} + mMetrics := []coreMetric{} + mPerf.On("getCoreMetrics", cpuID).Return(mMetrics).Once() + + pt := &PowerTelemetry{ + perf: mPerf, + } + + c0WaitOut, err := pt.GetCPUC0SubstateC0WaitPercent(cpuID) + require.ErrorContains(t, err, fmt.Sprintf("no core metrics found for CPU ID: %v", cpuID)) + require.Equal(t, c0WaitExp, c0WaitOut) + mPerf.AssertExpectations(t) + }) + + t.Run("C0WaitMetricNotFound", func(t *testing.T) { + cpuID := 0 + c0WaitExp := 0.0 + + mPerf := &perfMock{} + mMetrics := []coreMetric{ + { + name: c01.String(), + cpuID: cpuID, + scaled: 100, + }, + { + name: thread.String(), + cpuID: cpuID, + scaled: 5000, + }, + } + mPerf.On("getCoreMetrics", cpuID).Return(mMetrics).Once() + + pt := &PowerTelemetry{ + perf: mPerf, + } + + c0WaitOut, err := pt.GetCPUC0SubstateC0WaitPercent(cpuID) + require.ErrorContains(t, err, fmt.Sprintf("could not find metric: %q", c0Wait.String())) + require.Equal(t, c0WaitExp, c0WaitOut) + mPerf.AssertExpectations(t) + }) + + t.Run("ThreadMetricNotFound", func(t *testing.T) { + cpuID := 0 + c0WaitExp := 0.0 + + mPerf := &perfMock{} + mMetrics := []coreMetric{ + { + name: c0Wait.String(), + cpuID: cpuID, + scaled: 100, + }, + } + mPerf.On("getCoreMetrics", cpuID).Return(mMetrics).Once() + + pt := &PowerTelemetry{ + perf: mPerf, + } + + c0WaitOut, err := pt.GetCPUC0SubstateC0WaitPercent(cpuID) + require.ErrorContains(t, err, fmt.Sprintf("could not find metric: %q", thread.String())) + require.Equal(t, c0WaitExp, c0WaitOut) + mPerf.AssertExpectations(t) + }) + + t.Run("ThreadMetricIsZero", func(t *testing.T) { + cpuID := 0 + c0WaitExp := 0.0 + + mPerf := &perfMock{} + mMetrics := []coreMetric{ + { + name: c0Wait.String(), + cpuID: cpuID, + scaled: 100, + }, + { + name: thread.String(), + cpuID: cpuID, + scaled: 0, + }, + } + mPerf.On("getCoreMetrics", cpuID).Return(mMetrics).Once() + + pt := &PowerTelemetry{ + perf: mPerf, + } + + c0WaitOut, err := pt.GetCPUC0SubstateC0WaitPercent(cpuID) + require.ErrorContains(t, err, fmt.Sprintf("zero scaled value for reference metric: %q", thread.String())) + require.Equal(t, c0WaitExp, c0WaitOut) + mPerf.AssertExpectations(t) + }) + + t.Run("C0WaitState10Per", func(t *testing.T) { + cpuID := 0 + c0WaitExp := 10.0 + + mPerf := &perfMock{} + mMetrics := []coreMetric{ + { + name: c0Wait.String(), + cpuID: cpuID, + scaled: 500, + }, + { + name: thread.String(), + cpuID: cpuID, + scaled: 5000, + }, + } + mPerf.On("getCoreMetrics", cpuID).Return(mMetrics).Once() + + pt := &PowerTelemetry{ + perf: mPerf, + } + + c0WaitOut, err := pt.GetCPUC0SubstateC0WaitPercent(cpuID) + require.NoError(t, err) + require.Equal(t, c0WaitExp, c0WaitOut) + mPerf.AssertExpectations(t) + }) +} + +func TestPower_ReadPerfEvents(t *testing.T) { + t.Run("PerfIsNil", func(t *testing.T) { + pt := &PowerTelemetry{} + + err := pt.ReadPerfEvents() + require.ErrorContains(t, err, "\"perf\" is not initialized") + }) + + t.Run("FailedToRead", func(t *testing.T) { + mError := errors.New("error while reading perf events") + mPerf := &perfMock{} + mPerf.On("update").Return(mError).Once() + + pt := &PowerTelemetry{ + perf: mPerf, + } + + err := pt.ReadPerfEvents() + require.ErrorContains(t, err, mError.Error()) + mPerf.AssertExpectations(t) + }) + + t.Run("SuccessfulRead", func(t *testing.T) { + mPerf := &perfMock{} + mPerf.On("update").Return(nil).Once() + + pt := &PowerTelemetry{ + perf: mPerf, + } + + err := pt.ReadPerfEvents() + require.NoError(t, err) + mPerf.AssertExpectations(t) + }) +} + +func TestPower_DeactivatePerfEvents(t *testing.T) { + t.Run("PerfIsNil", func(t *testing.T) { + pt := &PowerTelemetry{} + + err := pt.DeactivatePerfEvents() + require.ErrorContains(t, err, "\"perf\" is not initialized") + }) + + t.Run("FailedToDeactivate", func(t *testing.T) { + mError := errors.New("error while reading perf events") + mPerf := &perfMock{} + mPerf.On("deactivate").Return(mError).Once() + + pt := &PowerTelemetry{ + perf: mPerf, + } + + err := pt.DeactivatePerfEvents() + require.ErrorContains(t, err, mError.Error()) + mPerf.AssertExpectations(t) + }) + + t.Run("SuccessfulDeactivation", func(t *testing.T) { + mPerf := &perfMock{} + mPerf.On("deactivate").Return(nil).Once() + + pt := &PowerTelemetry{ + perf: mPerf, + } + + err := pt.DeactivatePerfEvents() + require.NoError(t, err) + mPerf.AssertExpectations(t) + }) +} + +func TestPower_GetPackageIDs(t *testing.T) { + testCases := []struct { + name string + packageIDs []int + }{ + { + name: "Empty", + packageIDs: []int{}, + }, + { + name: "NotEmpty", + packageIDs: []int{0, 1, 2}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + pt := &PowerTelemetry{ + topology: &topologyData{ + packageIDs: tc.packageIDs, + }, + } + + require.Equal(t, tc.packageIDs, pt.GetPackageIDs()) + }) + } +} + +func TestPower_GetRaplPackageIDs(t *testing.T) { + testCases := []struct { + name string + raplZones map[int]powerZone + packageIDs []int + }{ + { + name: "MapZonesIsNil", + raplZones: nil, + packageIDs: []int{}, + }, + { + name: "MapZonesIsEmpty", + raplZones: map[int]powerZone{}, + packageIDs: []int{}, + }, + { + name: "MapZonesIsUnordered", + raplZones: map[int]powerZone{ + 1: &zone{}, + 0: &zone{}, + 4: &zone{}, + 2: &zone{}, + }, + packageIDs: []int{0, 1, 2, 4}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + pt := &PowerTelemetry{ + rapl: &raplData{ + zones: tc.raplZones, + }, + } + + require.Equal(t, tc.packageIDs, pt.GetRaplPackageIDs()) + }) + } + + t.Run("RaplIsNil", func(t *testing.T) { + pt := &PowerTelemetry{ + rapl: nil, + } + + require.Nil(t, pt.GetRaplPackageIDs()) + }) +} diff --git a/rapl.go b/rapl.go new file mode 100644 index 0000000..aab0fd8 --- /dev/null +++ b/rapl.go @@ -0,0 +1,604 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +import ( + "bufio" + "errors" + "fmt" + "os" + "path/filepath" + "regexp" + "slices" + "strconv" + "strings" + "time" +) + +const ( + // control zone path where rapl exposes power capping capabilities to userspace. + defaultRaplBasePath = "/sys/devices/virtual/powercap/intel-rapl" + + // pattern string to identify the path of a package domain zone. + zonePattern = "intel-rapl\\:[0-9]*" + + // pattern string to identify the path of a package domain subzone, i.e. dram domain. + subzonePattern = "intel-rapl\\:[0-9]*\\:[0-9]*" + + // file name of the maximum energy attribute supported by power capping. + maxEnergyAttrFile = "max_energy_range_uj" + + // file name of the current energy attribute supported by power capping. + currEnergyAttrFile = "energy_uj" + + // file name of the maximum allowed power constraint attribute supported by power capping. + maxPowerConstraintAttrFile = "constraint_0_max_power_uw" +) + +var ( + // regex used to check name format of a package domain zone. + packageNameRegex = regexp.MustCompile("^package-(0|[1-9][0-9]*)$") + + // regex used to check the path format of a package domain zone. + zoneRegex = regexp.MustCompile(zonePattern) + + // regex used to check the path format of a package domain subzone. + subzoneRegex = regexp.MustCompile(subzonePattern) +) + +// domainType is an enum type to identify specific intel rapl control domains. +type domainType int + +// domainType enum defines supported intel rapl control domains. +const ( + packageDomain domainType = iota + dramDomain +) + +// Helper function to return a string representation of domainType. +func (d domainType) String() string { + switch d { + case packageDomain: + return "package" + case dramDomain: + return "dram" + default: + return "" + } +} + +// attrType is an enum type to identify specific zone attributes. +type attrType int + +// attrType enum defines supported zone attributes. +const ( + currEnergyAttr attrType = iota + maxEnergyAttr + maxPowerConstraintAttr +) + +// Helper function to return a string representation of attrType. +func (e attrType) String() string { + switch e { + case currEnergyAttr: + return "currEnergy" + case maxEnergyAttr: + return "maxEnergy" + case maxPowerConstraintAttr: + return "maxPower" + default: + return "" + } +} + +// attrSample represents a timestamped zone attribute sample. +type attrSample struct { + value float64 + timestamp time.Time +} + +// powerZone represents a generic power zone accessible by power capping interface. +type powerZone interface { + // getName gets the name of a zone. + getName() string + + // getPath gets the absolute path of a zone. + getPath() string + + // addSubzone takes a powerZone and adds it as a child zone. + addSubzone(subzone powerZone) + + // getSubzones returns all the child zones of the given zone. + getSubzones() []powerZone + + // getDomainSubzone returns the child zone with specified domain. + getDomainSubzone(domain string) powerZone + + // getEnergySample returns the last energy sample stored in the given zone. + getEnergySample() attrSample + + // setEnergySample sets the given energy sample to the given zone. + setEnergySample(s attrSample) + + // readAttribute gets a timestamped sample of the specified attribute. + readAttribute(attribute string) (attrSample, error) +} + +// zone represents a generic power zone that can be monitored using power capping +// method determined by the control type the given zone belongs to. +// zones are hierarchical, meaning a parent zone can have multiple child zones +// or subzones, representing different parts of the system. +// +// An example of hierarchical directory tree for a zone is as follows: +// +// root-zone +// ├── package-zone:0 +// │ ├── package-zone:0:0 (device subzone) +// │ ├── package-zone:0:1 +// │ ∙ +// | └── package-zone:0:n +// ├── package-zone:1 +// │ ├── package-zone:1:0 (device subzone) +// │ ├── package-zone:1:1 +// │ ∙ +// │ └── package-zone:1:m +// ∙ +// └── package-zone:l +// +// Each zone has capabilities to retrieve timestamped values of zone attributes +// from files. +// +// package-zone:0 +// ├── constraint_0_max_power_uw (maximum allowed power attribute) +// ├── energy_uj (current energy attribute) +// └── max_energy_range_uj (maximum energy attribute) +// +// energy field of a zone stores the last measured current energy attribute. +type zone struct { + name string + path string + energy attrSample + subzones []powerZone +} + +// newZoneFromPath creates a new zone, initializing its name with +// contents of name file located at the specified path. If name file +// does not exist, or it is empty an error is returned. +func newZoneFromPath(path string) (powerZone, error) { + f := filepath.Join(path, "name") + data, err := readFile(f) + if err != nil { + return nil, err + } + + name := strings.TrimRight(string(data), "\n") + if len(name) == 0 { + return nil, fmt.Errorf("zone domain cannot be empty") + } + + return &zone{ + name: name, + path: path, + energy: attrSample{ + value: 0.0, + timestamp: time.Time{}, + }, + subzones: make([]powerZone, 0), + }, nil +} + +// addSubzone takes a zone as an argument and adds it as a child +// of the receiver zone. +func (z *zone) addSubzone(subzone powerZone) { + z.subzones = append(z.subzones, subzone) +} + +// getName returns the name of a zone. +func (z *zone) getName() string { + return z.name +} + +// getPath returns the path at which a zone is located. +func (z *zone) getPath() string { + return z.path +} + +// getDomainSubzone loops through the subzones of the receiver zone, +// and returns the zone matching the name given as argument. +// If there are no matches it returns nil. +func (z *zone) getDomainSubzone(domain string) powerZone { + for _, subzone := range z.subzones { + if strings.Contains(subzone.getName(), domain) { + return subzone + } + } + return nil +} + +// getSubzones returns a slice with subzones of the receiver zone. +func (z *zone) getSubzones() []powerZone { + return z.subzones +} + +// getEnergySample returns the last energy sample collected for the +// receiver zone. +func (z *zone) getEnergySample() attrSample { + return z.energy +} + +// setEnergySample sets the given energy sample to the receiver zone. +func (z *zone) setEnergySample(s attrSample) { + z.energy = s +} + +// readAttribute returns a timestamped sample of the specified attribute +// for a given zone. +func (z *zone) readAttribute(attribute string) (attrSample, error) { + var attrFilePath string + switch attribute { + case currEnergyAttr.String(): + attrFilePath = filepath.Join(z.path, currEnergyAttrFile) + case maxEnergyAttr.String(): + attrFilePath = filepath.Join(z.path, maxEnergyAttrFile) + case maxPowerConstraintAttr.String(): + attrFilePath = filepath.Join(z.path, maxPowerConstraintAttrFile) + default: + return attrSample{}, fmt.Errorf("unsupported attribute %q", attribute) + } + + data, timestamp, err := readFileWithTimestamp(attrFilePath) + if err != nil { + return attrSample{}, fmt.Errorf("error reading file %q: %w", attrFilePath, err) + } + val, err := strconv.ParseFloat(strings.TrimRight(string(data), "\n"), 64) + if err != nil { + return attrSample{}, fmt.Errorf("error converting attribute file content to float64: %w", err) + } + return attrSample{ + value: val, + timestamp: timestamp, + }, nil +} + +// packageZone is a specialized case of powerZone. It extends functionality +// of a generic zone adding validation for fields specific to package domain zones. +type packageZone struct { + powerZone +} + +// getPackageID returns the package ID of the package domain zone. It performs +// validation between the content name file of the package zone and its path. +func (p *packageZone) getPackageID() (int, error) { + name := p.getName() + path := p.getPath() + + if !packageNameRegex.MatchString(name) { + return 0, fmt.Errorf("invalid package domain name for zone at path %q", path) + } + packageIDFromName := strings.Split(name, "-")[1] + + if !zoneRegex.MatchString(filepath.Base(path)) { + return 0, fmt.Errorf("invalid package domain zone path %q", path) + } + packageIDFromPath := strings.Split(path, ":")[1] + + if packageIDFromPath != packageIDFromName { + return 0, fmt.Errorf("package ID mismatch between zone path %q and zone name %q", path, name) + } + return strconv.Atoi(packageIDFromName) +} + +// isPackageZone is a helper function that returns true if the power zone provided +// as argument is a package zone. Otherwise, it returns false. +func isPackageZone(z powerZone) bool { + return packageNameRegex.MatchString(z.getName()) +} + +// raplReader checks if rapl kernel module is loaded and exposes power metrics supported by +// power capping interface. +// +// Exposed metric are: +// - Per-package ID current power consumption. +// - Per-dram current power consumption. +// - Per-package ID maximum allowed power. +type raplReader interface { + // initZoneMap initializes a map of zones that represents the hierarchy tree for intel-rapl + // control zones of the host. + initZoneMap() error + + // getPackageIDs returns an ordered slice with package IDs within the map of zones. + getPackageIDs() []int + + // isRaplLoaded check if intel-rapl kernel module is loaded. + isRaplLoaded(modulesPath string) (bool, error) + + // getCurrentPowerConsumptionWatts takes a package ID and domain, and returns the current power consumption. + getCurrentPowerConsumptionWatts(packageID int, domain string) (float64, error) + + // getMaxPowerConstraintWatts takes a package ID and returns the maximum allowed power. + getMaxPowerConstraintWatts(packageID int) (float64, error) +} + +// raplData represents per-package ID power zone tree of the intel rapl control zone +// of the host. +// +// It represents the hierarchy tree for intel-rapl control zone: +// +// /sys/devices/virtual/powercap/intel-rapl/ +// ├── intel-rapl:0 (package zone) +// │ ├── intel-rapl:0:0 (device subzone) +// │ ├── intel-rapl:0:1 +// │ ∙ +// | └── intel-rapl:0:n +// ├── intel-rapl:1 (package zone) +// │ ├── intel-rapl:1:0 (device subzone) +// │ ├── intel-rapl:1:1 +// │ ∙ +// │ └── intel-rapl:1:m +// ∙ +// └── intel-rapl:l (package zone) +// +// Each entry map corresponds to a package zone, which in turns has subzones corresponding +// to specific devices. +type raplData struct { + basePath string + zones map[int]powerZone +} + +// initZoneMap initializes the zone map of the receiver with the power zone tree corresponding +// to the host configuration. It validates that the root zone is a valid package domain +// zone. In case of malformed power zone trees, an error is returned. +func (r *raplData) initZoneMap() error { + if len(r.basePath) == 0 { + return errors.New("base path of rapl control zone cannot be empty") + } + if err := checkFile(r.basePath); err != nil { + return fmt.Errorf("invalid base path of rapl control zone: %w", err) + } + + zoneDirs, err := os.ReadDir(r.basePath) + if err != nil { + return fmt.Errorf("error reading path %q: %w", r.basePath, err) + } + + // initialize package domain zones + zones := make(map[int]powerZone, len(zoneDirs)) + for _, zoneDir := range zoneDirs { + zoneName := zoneDir.Name() + if !zoneDir.IsDir() || !zoneRegex.MatchString(zoneName) { + continue + } + zonePath := filepath.Join(r.basePath, zoneName) + newZone, err := newZoneFromPath(zonePath) + if err != nil { + return fmt.Errorf("error creating zone for path %q: %w", zonePath, err) + } + + // skip if zone is not a package zone + if !isPackageZone(newZone) { + continue + } + + // validate fields for the package zone + pkgZone := &packageZone{newZone} + packageID, err := pkgZone.getPackageID() + if err != nil { + return fmt.Errorf("error validating package domain zone: %w", err) + } + + // initialize per package domain subzones + subzoneDirs, err := os.ReadDir(zonePath) + if err != nil { + return fmt.Errorf("error reading directory %q: %w", zonePath, err) + } + + for _, subzoneDir := range subzoneDirs { + subzoneName := subzoneDir.Name() + if !subzoneDir.IsDir() || !subzoneRegex.MatchString(subzoneName) { + continue + } + subzonePath := filepath.Join(zonePath, subzoneName) + subzone, err := newZoneFromPath(subzonePath) + if err != nil { + return fmt.Errorf("error creating subzone for path %q: %w", subzonePath, err) + } + newZone.addSubzone(subzone) + } + zones[packageID] = newZone + } + + if len(zones) == 0 { + return fmt.Errorf("no package zones found for base path %q", r.basePath) + } + + // read and store a timestamped value of current energy attribute + // for each package and dram domain zones. + for _, pkgZone := range zones { + s, err := pkgZone.readAttribute(currEnergyAttr.String()) + if err != nil { + return fmt.Errorf("error initializing current energy attribute for package domain zone %q: %w", pkgZone.getPath(), err) + } + pkgZone.setEnergySample(s) + if dramZone := pkgZone.getDomainSubzone(dramDomain.String()); dramZone != nil { + s, err = dramZone.readAttribute(currEnergyAttr.String()) + if err != nil { + return fmt.Errorf("error initializing current energy attribute for dram domain zone %q: %w", dramZone.getPath(), err) + } + dramZone.setEnergySample(s) + } + } + r.zones = zones + return nil +} + +// getPackageIDs returns an ordered slice with package IDs within the map of zones. +func (r *raplData) getPackageIDs() []int { + pkgIDs := make([]int, 0, len(r.zones)) + + for packageID := range r.zones { + pkgIDs = append(pkgIDs, packageID) + } + slices.Sort(pkgIDs) + return pkgIDs +} + +// isRaplLoaded returns true if intel rapl kernel module and its dependencies are +// loaded, otherwise returns false. +// TODO: Review implementation of this function to cover older kernel versions. +func (r *raplData) isRaplLoaded(modulesPath string) (bool, error) { + if err := checkFile(modulesPath); err != nil { + return false, err + } + + f, err := os.Open(modulesPath) + if err != nil { + return false, fmt.Errorf("error opening file %q: %w", modulesPath, err) + } + defer f.Close() + + raplModules := map[string]bool{ + "rapl": false, + "intel_rapl_msr": false, + "intel_rapl_common": false, + } + scanner := bufio.NewScanner(f) + for scanner.Scan() { + mod := strings.Split(scanner.Text(), " ")[0] + if _, ok := raplModules[mod]; ok { + raplModules[mod] = true + } + } + + if err := scanner.Err(); err != nil { + return false, fmt.Errorf("could not read file %q: %w", modulesPath, err) + } + + res := raplModules["rapl"] && raplModules["intel_rapl_msr"] && + raplModules["intel_rapl_common"] + return res, nil +} + +// getEnergyAttributeWithTimestamp returns per-domain energy attribute, in Microjoules, for +// a specific package ID, and the timestamp of the operation. +func (r *raplData) getEnergyAttributeWithTimestamp(packageID int, domain, energyAttribute string) (attrSample, error) { + z, ok := r.zones[packageID] + if !ok { + return attrSample{}, fmt.Errorf("could not find zone for package ID: %v", packageID) + } + + switch domain { + case packageDomain.String(): + case dramDomain.String(): + z = z.getDomainSubzone(domain) + if z == nil { + return attrSample{}, fmt.Errorf("could not find dram subzone for package ID: %v", packageID) + } + default: + return attrSample{}, fmt.Errorf("unsupported rapl domain %q", domain) + } + + sample, err := z.readAttribute(energyAttribute) + if err != nil { + return attrSample{}, fmt.Errorf("error reading energy attribute %q: %w", energyAttribute, err) + } + return sample, nil +} + +// getLastMeasuredEnergyAttribute gets the per-domain last measured current energy attribute for a specific +// package ID. +func (r *raplData) getLastMeasuredEnergyAttribute(packageID int, domain string) (attrSample, error) { + z, ok := r.zones[packageID] + if !ok { + return attrSample{}, fmt.Errorf("could not find zone for package ID: %v", packageID) + } + + switch domain { + case packageDomain.String(): + case dramDomain.String(): + z = z.getDomainSubzone(domain) + if z == nil { + return attrSample{}, fmt.Errorf("could not find dram subzone for package ID: %v", packageID) + } + default: + return attrSample{}, fmt.Errorf("unsupported rapl domain %q", domain) + } + return z.getEnergySample(), nil +} + +// setLastMeasuredEnergyAttribute sets the per-domain last measured current energy attribute to the one +// provided as argument, for a specific package ID. +func (r *raplData) setLastMeasuredEnergyAttribute(packageID int, domain string, sample attrSample) error { + z, ok := r.zones[packageID] + if !ok { + return fmt.Errorf("could not find zone for package ID: %v", packageID) + } + + switch domain { + case packageDomain.String(): + case dramDomain.String(): + z = z.getDomainSubzone(domain) + if z == nil { + return fmt.Errorf("could not find dram subzone for package ID: %v", packageID) + } + default: + return fmt.Errorf("unsupported rapl domain %q", domain) + } + + z.setEnergySample(sample) + return nil +} + +// getCurrentPowerConsumptionWatts returns per-domain current power consumption, in Watts, for a +// specific package ID. +func (r *raplData) getCurrentPowerConsumptionWatts(packageID int, domain string) (float64, error) { + var power float64 + + // Get last measured current energy attribute for the corresponding zone + s1, err := r.getLastMeasuredEnergyAttribute(packageID, domain) + if err != nil { + return 0.0, fmt.Errorf("error getting last measured current energy attribute for %q domain: %w", domain, err) + } + + // Get current energy attribute for the corresponding zone + s2, err := r.getEnergyAttributeWithTimestamp(packageID, domain, currEnergyAttr.String()) + if err != nil { + return 0.0, fmt.Errorf("error reading current energy attribute for %q domain: %w", domain, err) + } + + // Calculate power as the ratio between the delta of energy samples and time delta + timeDelta := s2.timestamp.Sub(s1.timestamp).Seconds() + if s2.value > s1.value { + power = fromMicrojoulesToJoulesRatio * (s2.value - s1.value) / timeDelta + } else { + // The value of current energy attribute is reset to zero when it reaches the value of maximum + // energy attribute. In this case the value of maximum energy attribute is used to calculate the + // energy delta. + sMax, err := r.getEnergyAttributeWithTimestamp(packageID, domain, maxEnergyAttr.String()) + if err != nil { + return 0.0, fmt.Errorf("error reading current energy attribute for %q domain: %w", domain, err) + } + power = fromMicrojoulesToJoulesRatio * (sMax.value + s2.value - s1.value) / timeDelta + } + + // Set current energy attribute of the corresponding zone as last measured + if err = r.setLastMeasuredEnergyAttribute(packageID, domain, s2); err != nil { + return 0.0, fmt.Errorf("error setting current energy attribute for %q domain: %w", domain, err) + } + return power, nil +} + +// getMaxPowerConstraintWatts returns the maximum allowed power, in Watts, for a specific package ID. +func (r *raplData) getMaxPowerConstraintWatts(packageID int) (float64, error) { + z, ok := r.zones[packageID] + if !ok { + return 0.0, fmt.Errorf("could not find zone for package ID: %v", packageID) + } + s, err := z.readAttribute(maxPowerConstraintAttr.String()) + if err != nil { + return 0.0, fmt.Errorf("error reading max power constraint attribute for package ID: %v: %w", packageID, err) + } + return s.value * fromMicrowattsToWatts, nil +} diff --git a/rapl_test.go b/rapl_test.go new file mode 100644 index 0000000..e345dac --- /dev/null +++ b/rapl_test.go @@ -0,0 +1,1159 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +import ( + "errors" + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" +) + +func TestDomainTypeToString(t *testing.T) { + t.Run("Package", func(t *testing.T) { + packageType := domainType(0) + require.Equal(t, "package", packageType.String()) + }) + + t.Run("Dram", func(t *testing.T) { + dramType := domainType(1) + require.Equal(t, "dram", dramType.String()) + }) + + t.Run("Invalid", func(t *testing.T) { + invalidType := domainType(2) + require.Equal(t, "", invalidType.String()) + }) +} + +func TestAttrTypeToString(t *testing.T) { + t.Run("CurrentEnergy", func(t *testing.T) { + currEnergyType := attrType(0) + require.Equal(t, "currEnergy", currEnergyType.String()) + }) + + t.Run("MaximumEnergy", func(t *testing.T) { + maxEnergyType := attrType(1) + require.Equal(t, "maxEnergy", maxEnergyType.String()) + }) + + t.Run("MaximumPower", func(t *testing.T) { + maxPowerType := attrType(2) + require.Equal(t, "maxPower", maxPowerType.String()) + }) + + t.Run("Invalid", func(t *testing.T) { + invalidType := attrType(3) + require.Equal(t, "", invalidType.String()) + }) +} + +func TestZoneGetters(t *testing.T) { + zoneName := "package-0" + zonePath := "testdata/intel-rapl/intel-rapl:0" + + dramZone := &zone{ + name: "dram", + path: "testdata/intel-rapl/intel-rapl:0/intel-rapl:0:0", + subzones: make([]powerZone, 0), + } + subZones := []powerZone{dramZone} + + sample := attrSample{ + value: 100000, + timestamp: time.Now(), + } + + z := &zone{ + name: zoneName, + path: zonePath, + energy: sample, + subzones: subZones, + } + + require.Equal(t, zoneName, z.getName()) + require.Equal(t, zonePath, z.getPath()) + require.Equal(t, sample, z.getEnergySample()) + require.Equal(t, subZones, z.getSubzones()) + require.Equal(t, dramZone, z.getDomainSubzone(dramDomain.String())) +} + +func TestZoneSetters(t *testing.T) { + z := &zone{ + name: "package-0", + path: "testdata/intel-rapl/intel-rapl:0", + energy: attrSample{}, + subzones: make([]powerZone, 0), + } + + // set energy sample to package domain zone + s := attrSample{ + value: 100000, + timestamp: time.Now(), + } + z.setEnergySample(s) + + // set dram subzone as child of package zone + dramZone := &zone{ + name: "dram", + path: "testdata/intel-rapl/intel-rapl:0/intel-rapl:0:0", + subzones: make([]powerZone, 0), + } + z.addSubzone(dramZone) + + require.Equal(t, s, z.getEnergySample()) + require.Equal(t, dramZone, z.getDomainSubzone(dramDomain.String())) +} + +func (s *raplTimeSensitiveTestSuite) TestZoneReadAttribute() { + testCases := []struct { + name string + path string + attr string + sampleExp attrSample + err error + }{ + { + name: "Unsupported", + path: makeTestDataPath("testdata/intel-rapl/intel-rapl:0"), + attr: "minEnergy", + sampleExp: attrSample{}, + err: errors.New("unsupported attribute \"minEnergy\""), + }, + { + name: "MissingFile", + path: makeTestDataPath("testdata/intel-rapl/intel-rapl:1/intel-rapl:1:0"), + attr: currEnergyAttr.String(), + sampleExp: attrSample{}, + err: errors.New(`error reading file "` + makeTestDataPath("testdata/intel-rapl/intel-rapl:1/intel-rapl:1:0/energy_uj") + `"`), + }, + { + name: "FileContentNonNumeric", + path: makeTestDataPath("testdata/intel-rapl/intel-rapl:1"), + attr: maxEnergyAttr.String(), + sampleExp: attrSample{}, + err: errors.New("error converting attribute file content to float64"), + }, + { + name: "CurrEnergy", + path: makeTestDataPath("testdata/intel-rapl/intel-rapl:0"), + attr: currEnergyAttr.String(), + sampleExp: attrSample{ + value: 206999074695, + timestamp: fakeClock.Now(), + }, + err: nil, + }, + { + name: "MaxEnergy", + path: makeTestDataPath("testdata/intel-rapl/intel-rapl:0/intel-rapl:0:1"), + attr: maxEnergyAttr.String(), + sampleExp: attrSample{ + value: 65712999613, + timestamp: fakeClock.Now(), + }, + err: nil, + }, + { + name: "MaxPowerConstraint", + path: makeTestDataPath("testdata/intel-rapl/intel-rapl:0"), + attr: maxPowerConstraintAttr.String(), + sampleExp: attrSample{ + value: 250000000, + timestamp: fakeClock.Now(), + }, + err: nil, + }, + } + + for _, tc := range testCases { + s.Run(tc.name, func() { + z := &zone{ + name: tc.name, + path: tc.path, + } + + sampleOut, err := z.readAttribute(tc.attr) + s.Require().Equal(tc.sampleExp, sampleOut) + if tc.err != nil { + s.Require().ErrorContains(err, tc.err.Error()) + } else { + s.Require().NoError(err) + } + }) + } +} + +func TestPackageZoneGetPackageID(t *testing.T) { + testCases := []struct { + name string + zoneName string + zonePath string + packageIDExp int + err error + }{ + { + name: "InvalidName", + zoneName: "package-socket", + zonePath: "intel-rapl:0", + packageIDExp: 0, + err: errors.New("invalid package domain name for zone at path \"intel-rapl:0\""), + }, + { + name: "InvalidPath", + zoneName: "package-0", + zonePath: "rapl:0", + packageIDExp: 0, + err: errors.New("invalid package domain zone path \"rapl:0\""), + }, + { + name: "PackageIDMismatch", + zoneName: "package-1", + zonePath: "intel-rapl:0", + packageIDExp: 0, + err: errors.New("package ID mismatch between zone path \"intel-rapl:0\" and zone name \"package-1\""), + }, + { + name: "PackageNameWithLeadingZeroes", + zoneName: "package-01", + zonePath: "intel-rapl:1", + packageIDExp: 0, + err: errors.New("invalid package domain name for zone at path \"intel-rapl:1\""), + }, + { + name: "PackageZonePathWithLeadingZeroes", + zoneName: "package-1", + zonePath: "intel-rapl:01", + packageIDExp: 0, + err: errors.New("package ID mismatch between zone path \"intel-rapl:01\" and zone name \"package-1\""), + }, + { + name: "PackageID_1", + zoneName: "package-1", + zonePath: "intel-rapl:1", + packageIDExp: 1, + err: nil, + }, + { + name: "PackageID_10", + zoneName: "package-10", + zonePath: "intel-rapl:10", + packageIDExp: 10, + err: nil, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + z := &packageZone{ + &zone{ + name: tc.zoneName, + path: tc.zonePath, + }, + } + + packageIDOut, err := z.getPackageID() + require.Equal(t, tc.packageIDExp, packageIDOut) + if err != nil { + require.ErrorContains(t, err, tc.err.Error()) + } else { + require.NoError(t, err) + } + }) + } +} + +func TestIsRaplLoaded(t *testing.T) { + testCases := []struct { + name string + filePath string + expected bool + err error + }{ + { + name: "EmptyFilename", + filePath: "", + expected: false, + err: errors.New("file path is empty"), + }, + { + name: "FileNotExist", + filePath: "dummy_proc_modules_file", + expected: false, + err: errors.New("file \"dummy_proc_modules_file\" does not exist"), + }, + { + name: "PathIsADir", + filePath: "testdata", + expected: false, + err: errors.New("could not read file \"testdata\": read testdata: is a directory"), + }, + { + name: "Symlink", + filePath: "testdata/symlink", + expected: false, + err: errors.New("file \"testdata/symlink\" is a symlink"), + }, + { + name: "NotLoaded", + filePath: "testdata/proc_modules_rapl_not_loaded", + expected: false, + err: nil, + }, + { + name: "Loaded", + filePath: "testdata/proc_modules_rapl_loaded", + expected: true, + err: nil, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + r := &raplData{} + out, err := r.isRaplLoaded(tc.filePath) + require.Equal(t, tc.expected, out) + if tc.err != nil { + require.EqualError(t, err, tc.err.Error()) + } else { + require.NoError(t, err) + } + }) + } +} + +type raplTimeSensitiveTestSuite struct { + suite.Suite +} + +func (s *raplTimeSensitiveTestSuite) SetupTest() { + setFakeClock() + fakeClock.Set(time.Now()) +} + +func (s *raplTimeSensitiveTestSuite) TearDownTest() { + unsetFakeClock() +} + +func TestRaplTimeSensitive(t *testing.T) { + suite.Run(t, new(raplTimeSensitiveTestSuite)) +} + +func (s *raplTimeSensitiveTestSuite) TestInitZoneMap() { + testCases := []struct { + name string + raplPath string + raplZones map[int]powerZone + err error + }{ + { + name: "RaplPathEmpty", + raplPath: "", + raplZones: nil, + err: errors.New("base path of rapl control zone cannot be empty"), + }, + { + name: "RaplPathNotExist", + raplPath: "/dummy/path", + raplZones: nil, + err: errors.New("file \"/dummy/path\" does not exist"), + }, + { + name: "RaplPathInvalid", + raplPath: makeTestDataPath("testdata/"), + raplZones: nil, + err: errors.New(`no package zones found for base path "` + makeTestDataPath("testdata/") + `"`), + }, + { + name: "RaplPathIsNotADir", + raplPath: makeTestDataPath("testdata/intel-rapl/intel-rapl:0/name"), + raplZones: nil, + err: errors.New(`error reading path "` + makeTestDataPath("testdata/intel-rapl/intel-rapl:0/name") + `"`), + }, + { + name: "PackageDomainNameFileNotExist", + raplPath: makeTestDataPath("testdata/intel-rapl-package-domain-name-not-exist"), + raplZones: nil, + err: errors.New(`file "` + + makeTestDataPath("testdata/intel-rapl-package-domain-name-not-exist/intel-rapl:0/name") + `" does not exist`), + }, + { + name: "PackageDomainNameFileEmpty", + raplPath: makeTestDataPath("testdata/intel-rapl-domain-name-empty"), + raplZones: nil, + err: errors.New("zone domain cannot be empty"), + }, + { + name: "DramDomainNameFileNotExist", + raplPath: makeTestDataPath("testdata/intel-rapl-dram-domain-name-not-exist"), + raplZones: nil, + err: errors.New(`file "` + + makeTestDataPath("testdata/intel-rapl-dram-domain-name-not-exist/intel-rapl:0/intel-rapl:0:0/name") + `" does not exist`), + }, + { + name: "MismatchPackageDomainID", + raplPath: makeTestDataPath("testdata/intel-rapl-invalid-package-domain-name-id"), + raplZones: nil, + err: errors.New(`package ID mismatch between zone path "` + + makeTestDataPath("testdata/intel-rapl-invalid-package-domain-name-id/intel-rapl:1") + `" and zone name "package-0"`), + }, + { + name: "PackageCurrentEnergyAttributeFileNotExist", + raplPath: makeTestDataPath("testdata/intel-rapl-package-curr-energy-attr-file-not-exist"), + raplZones: nil, + err: errors.New(`error reading file "` + + makeTestDataPath("testdata/intel-rapl-package-curr-energy-attr-file-not-exist/intel-rapl:0/energy_uj") + `"`), + }, + { + name: "DramCurrentEnergyAttributeFileNotExist", + raplPath: makeTestDataPath("testdata/intel-rapl-dram-curr-energy-attr-file-not-exist"), + raplZones: nil, + err: errors.New(`error reading file "` + + makeTestDataPath("testdata/intel-rapl-dram-curr-energy-attr-file-not-exist/intel-rapl:0/intel-rapl:0:1/energy_uj") + `"`), + }, + { + name: "RaplPathValid", + raplPath: makeTestDataPath("testdata/intel-rapl"), + raplZones: map[int]powerZone{ + 0: &zone{ + name: "package-0", + path: makeTestDataPath("testdata/intel-rapl/intel-rapl:0"), + energy: attrSample{ + value: 206999074695, + timestamp: fakeClock.Now(), + }, + subzones: []powerZone{ + &zone{ + name: "domain", + path: makeTestDataPath("testdata/intel-rapl/intel-rapl:0/intel-rapl:0:0"), + subzones: make([]powerZone, 0), + }, + &zone{ + name: "dram", + path: makeTestDataPath("testdata/intel-rapl/intel-rapl:0/intel-rapl:0:1"), + energy: attrSample{ + value: 64155753419, + timestamp: fakeClock.Now(), + }, + subzones: make([]powerZone, 0), + }, + }, + }, + 1: &zone{ + name: "package-1", + path: makeTestDataPath("testdata/intel-rapl/intel-rapl:1"), + energy: attrSample{ + value: 206999075695, + timestamp: fakeClock.Now(), + }, + subzones: []powerZone{ + &zone{ + name: "socket", + path: makeTestDataPath("testdata/intel-rapl/intel-rapl:1/intel-rapl:1:0"), + subzones: make([]powerZone, 0), + }, + }, + }, + 2: &zone{ + name: "package-2", + path: makeTestDataPath("testdata/intel-rapl/intel-rapl:2"), + energy: attrSample{ + value: 205999075695, + timestamp: fakeClock.Now(), + }, + subzones: []powerZone{ + &zone{ + name: "dram", + path: makeTestDataPath("testdata/intel-rapl/intel-rapl:2/intel-rapl:2:0"), + energy: attrSample{ + value: 66155553419, + timestamp: fakeClock.Now(), + }, + subzones: make([]powerZone, 0), + }, + }, + }, + 3: &zone{ + name: "package-3", + path: makeTestDataPath("testdata/intel-rapl/intel-rapl:3"), + energy: attrSample{ + value: 205888075695, + timestamp: fakeClock.Now(), + }, + subzones: make([]powerZone, 0), + }, + }, + }, + } + + for _, tc := range testCases { + s.Run(tc.name, func() { + rapl := &raplData{ + basePath: tc.raplPath, + } + + err := rapl.initZoneMap() + s.Require().Equal(tc.raplZones, rapl.zones) + if tc.err != nil { + s.Require().ErrorContains(err, tc.err.Error()) + } else { + s.Require().NoError(err) + } + }) + } +} + +func (s *raplTimeSensitiveTestSuite) TestGetEnergyAttributeWithTimestamp() { + testCase := []struct { + name string + packageID int + domain string + energyAttr string + sample attrSample + err error + }{ + { + name: "InvalidPackageID", + packageID: 3, + domain: "package", + energyAttr: "currEnergy", + err: errors.New("could not find zone for package ID: 3"), + }, + { + name: "InvalidDomainType", + packageID: 0, + domain: "invalid", + energyAttr: "currEnergy", + err: errors.New("unsupported rapl domain \"invalid\""), + }, + { + name: "DramDomainNotExist", + packageID: 1, + domain: "dram", + energyAttr: "currEnergy", + err: errors.New("could not find dram subzone for package ID: 1"), + }, + { + name: "InvalidEnergyAttribute", + packageID: 0, + domain: "package", + energyAttr: "invalid", + err: errors.New("unsupported attribute \"invalid\""), + }, + { + name: "EnergyAttributeFileNotExist", + packageID: 2, + domain: "package", + energyAttr: "maxEnergy", + err: errors.New(`file "` + makeTestDataPath("testdata/intel-rapl/intel-rapl:2/max_energy_range_uj") + `" does not exist`), + }, + { + name: "EnergyAttributeFileNonNumeric", + packageID: 1, + domain: "package", + energyAttr: "maxEnergy", + err: errors.New("error reading energy attribute \"maxEnergy\": error converting attribute file content to float64"), + }, + { + name: "EnergyAttributeFileEmpty", + packageID: 2, + domain: "dram", + energyAttr: "maxEnergy", + err: errors.New("error reading energy attribute \"maxEnergy\": error converting attribute file content to float64"), + }, + { + name: "PackageMaxEnergyAttribute", + packageID: 0, + domain: "package", + energyAttr: "maxEnergy", + sample: attrSample{ + value: 262143328850.0, + timestamp: fakeClock.Now(), + }, + err: nil, + }, + { + name: "DramCurrEnergyAttribute", + packageID: 0, + domain: "dram", + energyAttr: "currEnergy", + sample: attrSample{ + value: 64155753419.0, + timestamp: fakeClock.Now(), + }, + err: nil, + }, + } + + r := &raplData{ + zones: map[int]powerZone{ + 0: &zone{ + name: "package-0", + path: makeTestDataPath("testdata/intel-rapl/intel-rapl:0"), + subzones: []powerZone{ + &zone{ + name: "dram", + path: makeTestDataPath("testdata/intel-rapl/intel-rapl:0/intel-rapl:0:1"), + subzones: make([]powerZone, 0), + }, + }, + }, + 1: &zone{ + name: "package-1", + path: makeTestDataPath("testdata/intel-rapl/intel-rapl:1"), + subzones: []powerZone{}, + }, + 2: &zone{ + name: "package-2", + path: makeTestDataPath("testdata/intel-rapl/intel-rapl:2"), + subzones: []powerZone{ + &zone{ + name: "dram", + path: makeTestDataPath("testdata/intel-rapl/intel-rapl:2/intel-rapl:2:0"), + subzones: make([]powerZone, 0), + }, + }, + }, + }, + } + + for _, tc := range testCase { + s.Run(tc.name, func() { + outSample, err := r.getEnergyAttributeWithTimestamp(tc.packageID, tc.domain, tc.energyAttr) + if tc.err != nil { + s.Require().ErrorContains(err, tc.err.Error()) + } else { + s.Require().NoError(err) + s.Require().Equal(tc.sample, outSample) + } + }) + } +} + +func (s *raplTimeSensitiveTestSuite) TestGetLastMeasuredEnergyAttribute() { + testCases := []struct { + name string + packageID int + domain string + sample attrSample + err error + }{ + { + name: "InvalidPackageID", + packageID: 3, + domain: "package", + err: errors.New("could not find zone for package ID: 3"), + }, + { + name: "InvalidDomainType", + packageID: 0, + domain: "socket", + err: errors.New("unsupported rapl domain \"socket\""), + }, + { + name: "DramSubzoneNotExist", + packageID: 1, + domain: "dram", + err: errors.New("could not find dram subzone for package ID: 1"), + }, + { + name: "PackageCurrEnergy", + packageID: 0, + domain: "package", + sample: attrSample{ + value: 206999074695.0, + timestamp: fakeClock.Now(), + }, + err: nil, + }, + { + name: "DramCurrEnergy", + packageID: 0, + domain: "dram", + sample: attrSample{ + value: 64155753419.0, + timestamp: fakeClock.Now(), + }, + err: nil, + }, + } + + r := &raplData{ + zones: map[int]powerZone{ + 0: &zone{ + name: "package-0", + path: "testdata/intel-rapl/intel-rapl:0", + energy: attrSample{ + value: 206999074695.0, + timestamp: fakeClock.Now(), + }, + subzones: []powerZone{ + &zone{ + name: "dram", + path: "testdata/intel-rapl/intel-rapl:0/intel-rapl:0:1", + energy: attrSample{ + value: 64155753419.0, + timestamp: fakeClock.Now(), + }, + subzones: make([]powerZone, 0), + }, + }, + }, + 1: &zone{ + name: "package-1", + path: "testdata/intel-rapl/intel-rapl:1", + subzones: []powerZone{}, + }, + }, + } + + for _, tc := range testCases { + s.Run(tc.name, func() { + outSample, err := r.getLastMeasuredEnergyAttribute(tc.packageID, tc.domain) + s.Require().Equal(tc.sample, outSample) + if tc.err != nil { + s.Require().ErrorContains(err, tc.err.Error()) + } + }) + } +} + +func (s *raplTimeSensitiveTestSuite) TestSetLastMeasuredEnergyAttribute() { + testCases := []struct { + name string + packageID int + domain string + sample attrSample + err error + }{ + { + name: "InvalidPackageID", + packageID: 5, + domain: "package", + err: errors.New("could not find zone for package ID: 5"), + }, + { + name: "InvalidDomainType", + packageID: 0, + domain: "socket", + err: errors.New("unsupported rapl domain \"socket\""), + }, + { + name: "DramSubzoneNotExist", + packageID: 1, + domain: "dram", + err: errors.New("could not find dram subzone for package ID: 1"), + }, + { + name: "PackageCurrEnergy", + packageID: 0, + domain: "package", + sample: attrSample{ + value: 206999074695.0, + timestamp: fakeClock.Now(), + }, + err: nil, + }, + { + name: "DramCurrEnergy", + packageID: 0, + domain: "dram", + sample: attrSample{ + value: 64155753419.0, + timestamp: fakeClock.Now(), + }, + err: nil, + }, + } + + r := &raplData{ + zones: map[int]powerZone{ + 0: &zone{ + name: "package-0", + path: "testdata/intel-rapl/intel-rapl:0", + subzones: []powerZone{ + &zone{ + name: "dram", + path: "testdata/intel-rapl/intel-rapl:0/intel-rapl:0:1", + subzones: make([]powerZone, 0), + }, + }, + }, + 1: &zone{ + name: "package-1", + path: "testdata/intel-rapl/intel-rapl:1", + subzones: []powerZone{}, + }, + }, + } + + for _, tc := range testCases { + s.Run(tc.name, func() { + err := r.setLastMeasuredEnergyAttribute(tc.packageID, tc.domain, tc.sample) + if tc.err != nil { + s.Require().ErrorContains(err, tc.err.Error()) + } else { + outSample, err := r.getLastMeasuredEnergyAttribute(tc.packageID, tc.domain) + s.Require().NoError(err) + s.Require().Equal(tc.sample, outSample) + } + }) + } +} + +// zoneMock represents a mock for raplData type. Implements raplReader interface. +type zoneMock struct { + mock.Mock +} + +func (m *zoneMock) getName() string { + args := m.Called() + return args.String(0) +} + +func (m *zoneMock) getPath() string { + args := m.Called() + return args.String(0) +} + +func (m *zoneMock) addSubzone(subzone powerZone) { + m.Called(subzone) +} + +func (m *zoneMock) getDomainSubzone(domain string) powerZone { + args := m.Called(domain) + if args.Get(0) == nil { + return nil + } + return args.Get(0).(powerZone) +} + +func (m *zoneMock) getSubzones() []powerZone { + args := m.Called() + return args.Get(0).([]powerZone) +} + +func (m *zoneMock) getEnergySample() attrSample { + args := m.Called() + return args.Get(0).(attrSample) +} + +func (m *zoneMock) setEnergySample(e attrSample) { + m.Called(e) +} + +func (m *zoneMock) readAttribute(attribute string) (attrSample, error) { + args := m.Called(attribute) + return args.Get(0).(attrSample), args.Error(1) +} + +func (s *raplTimeSensitiveTestSuite) TestGetCurrentPowerConsumptionWatt() { + s.Run("InvalidPackageID", func() { + packageID := 1 + domain := packageDomain.String() + expPower := 0.0 + errMsg := fmt.Sprintf("error getting last measured current energy attribute for %q domain: could not find zone for package ID: %v", domain, packageID) + + m := &zoneMock{} + r := &raplData{ + zones: map[int]powerZone{ + 0: &zone{}, + }, + } + + outPower, err := r.getCurrentPowerConsumptionWatts(packageID, domain) + s.Require().Equal(expPower, outPower) + s.Require().ErrorContains(err, errMsg) + m.AssertExpectations(s.T()) + }) + + s.Run("InvalidDomain", func() { + packageID := 0 + domain := "socket" + expPower := 0.0 + errMsg := fmt.Sprintf("error getting last measured current energy attribute for %q domain: unsupported rapl domain %q", domain, domain) + + m := &zoneMock{} + r := &raplData{ + zones: map[int]powerZone{ + 0: m, + }, + } + + outPower, err := r.getCurrentPowerConsumptionWatts(packageID, domain) + s.Require().Equal(expPower, outPower) + s.Require().ErrorContains(err, errMsg) + m.AssertExpectations(s.T()) + }) + + s.Run("PackageCurrentEnergyAttrError", func() { + packageID := 0 + pkg := packageDomain.String() + energyAttr := currEnergyAttr.String() + errMsg := fmt.Sprintf("error reading energy attribute %q", energyAttr) + expPower := 0.0 + + m := &zoneMock{} + m.On("getEnergySample").Return(attrSample{1000, fakeClock.Now()}, nil).Once() + m.On("readAttribute", energyAttr).Return(attrSample{}, errors.New(errMsg)).Once() + r := &raplData{ + zones: map[int]powerZone{ + 0: m, + }, + } + + outPower, err := r.getCurrentPowerConsumptionWatts(packageID, pkg) + s.Require().Equal(expPower, outPower) + s.Require().ErrorContains(err, errMsg) + m.AssertExpectations(s.T()) + }) + + s.Run("PackageWithoutResetCount", func() { + packageID := 0 + pkg := packageDomain.String() + s1 := attrSample{4000000, fakeClock.Now()} + s2 := attrSample{5000000, fakeClock.Now().Add(time.Second)} + expPower := 1.0 + + m := &zoneMock{} + m.On("getEnergySample").Return(s1, nil).Once() + m.On("readAttribute", currEnergyAttr.String()).Return(s2, nil).Once() + m.On("setEnergySample", s2).Once() + r := &raplData{ + zones: map[int]powerZone{ + 0: m, + }, + } + + outPower, err := r.getCurrentPowerConsumptionWatts(packageID, pkg) + s.Require().Equal(expPower, outPower) + s.Require().NoError(err) + m.AssertExpectations(s.T()) + }) + + s.Run("DramSetLastEnergyAttrError", func() { + packageID := 0 + dram := dramDomain.String() + s1 := attrSample{4000000, fakeClock.Now()} + s2 := attrSample{5000000, fakeClock.Now().Add(time.Second)} + errMsg := fmt.Sprintf("could not find dram subzone for package ID: %v", packageID) + expPower := 0.0 + + m := &zoneMock{} + m.On("getDomainSubzone", dram).Return(m).Once() + m.On("getEnergySample").Return(s1, nil).Once() + m.On("getDomainSubzone", dram).Return(m).Once() + m.On("readAttribute", currEnergyAttr.String()).Return(s2, nil).Once() + m.On("getDomainSubzone", dram).Return(nil).Once() + r := &raplData{ + zones: map[int]powerZone{ + 0: m, + }, + } + + outPower, err := r.getCurrentPowerConsumptionWatts(packageID, dram) + s.Require().Equal(expPower, outPower) + s.Require().ErrorContains(err, errMsg) + m.AssertExpectations(s.T()) + }) + + s.Run("PackageWithResetCountError", func() { + packageID := 0 + pkg := packageDomain.String() + expPower := 0.0 + energyAttr := maxEnergyAttr.String() + errMsg := fmt.Sprintf("error reading energy attribute %q", energyAttr) + + m := &zoneMock{} + m.On("getEnergySample").Return(attrSample{4000000, fakeClock.Now()}, nil).Once() + m.On("readAttribute", currEnergyAttr.String()).Return(attrSample{1000000, fakeClock.Now().Add(time.Second)}, nil).Once() + m.On("readAttribute", energyAttr).Return(attrSample{}, errors.New(errMsg)).Once() + r := &raplData{ + zones: map[int]powerZone{ + 0: m, + }, + } + + outPower, err := r.getCurrentPowerConsumptionWatts(packageID, pkg) + s.Require().Equal(expPower, outPower) + s.Require().ErrorContains(err, errMsg) + m.AssertExpectations(s.T()) + }) + + s.Run("PackageWithResetCount", func() { + packageID := 0 + domain := packageDomain.String() + s1 := attrSample{4000000, fakeClock.Now()} + s2 := attrSample{1000000, fakeClock.Now().Add(time.Second)} + sMax := attrSample{4000000, time.Time{}} + expPower := 1.0 + + m := &zoneMock{} + m.On("getEnergySample").Return(s1, nil).Once() + m.On("readAttribute", currEnergyAttr.String()).Return(s2, nil).Once() + m.On("readAttribute", maxEnergyAttr.String()).Return(sMax, nil).Once() + m.On("setEnergySample", s2).Once() + r := &raplData{ + zones: map[int]powerZone{ + 0: m, + }, + } + + outPower, err := r.getCurrentPowerConsumptionWatts(packageID, domain) + s.Require().Equal(expPower, outPower) + s.Require().NoError(err) + m.AssertExpectations(s.T()) + }) + + s.Run("DramWithoutResetCount", func() { + packageID := 0 + dram := dramDomain.String() + s1 := attrSample{3000000, fakeClock.Now()} + s2 := attrSample{5000000, fakeClock.Now().Add(time.Second)} + expPower := 2.0 + + m := &zoneMock{} + m.On("getDomainSubzone", dram).Return(m) + m.On("getEnergySample").Return(s1, nil).Once() + m.On("readAttribute", currEnergyAttr.String()).Return(s2, nil).Once() + m.On("setEnergySample", s2).Once() + r := &raplData{ + zones: map[int]powerZone{ + 0: m, + }, + } + + outPower, err := r.getCurrentPowerConsumptionWatts(packageID, dram) + s.Require().Equal(expPower, outPower) + s.Require().NoError(err) + m.AssertExpectations(s.T()) + }) + + s.Run("DramWithResetCount", func() { + packageID := 0 + dram := dramDomain.String() + s1 := attrSample{3000000, fakeClock.Now()} + s2 := attrSample{1000000, fakeClock.Now().Add(time.Second)} + sMax := attrSample{4000000, time.Time{}} + expPower := 2.0 + + m := &zoneMock{} + m.On("getDomainSubzone", dram).Return(m) + m.On("getEnergySample").Return(s1, nil).Once() + m.On("readAttribute", currEnergyAttr.String()).Return(s2, nil).Once() + m.On("readAttribute", maxEnergyAttr.String()).Return(sMax, nil).Once() + m.On("setEnergySample", s2).Once() + r := &raplData{ + zones: map[int]powerZone{ + 0: m, + }, + } + + outPower, err := r.getCurrentPowerConsumptionWatts(packageID, dram) + s.Require().Equal(expPower, outPower) + s.Require().NoError(err) + m.AssertExpectations(s.T()) + }) +} + +func TestGetMaxPowerConstraintWatts(t *testing.T) { + testCases := []struct { + name string + packageID int + power float64 + err error + }{ + { + name: "InvalidPackageID", + packageID: 4, + power: 0.0, + err: errors.New("could not find zone for package ID: 4"), + }, + { + name: "AttributeFileNotExist", + packageID: 3, + power: 0.0, + err: errors.New(`error reading file "` + makeTestDataPath("testdata/intel-rapl/intel-rapl:3/constraint_0_max_power_uw") + `"`), + }, + { + name: "AttributeFileEmpty", + packageID: 2, + power: 0.0, + err: errors.New("error converting attribute file content to float64"), + }, + { + name: "AttributeFileNonNumeric", + packageID: 1, + power: 0.0, + err: errors.New("error converting attribute file content to float64"), + }, + { + name: "Valid", + packageID: 0, + power: 250.0, + err: nil, + }, + } + + r := &raplData{ + basePath: makeTestDataPath("testdata/intel-rapl"), + } + require.NoError(t, r.initZoneMap()) + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + outPower, err := r.getMaxPowerConstraintWatts(tc.packageID) + if tc.err != nil { + require.ErrorContains(t, err, tc.err.Error()) + } else { + require.NoError(t, err) + require.Equal(t, tc.power, outPower) + } + }) + } +} + +func TestGetPackageIDs(t *testing.T) { + testCases := []struct { + name string + raplZones map[int]powerZone + packageIDs []int + }{ + { + name: "ZonesMapIsNil", + raplZones: nil, + packageIDs: []int{}, + }, + { + name: "ZonesMapIsEmpty", + raplZones: map[int]powerZone{}, + packageIDs: []int{}, + }, + { + name: "ZonesMapIsUnordered", + raplZones: map[int]powerZone{ + 1: &zone{}, + 0: &zone{}, + 4: &zone{}, + 3: &zone{}, + }, + packageIDs: []int{0, 1, 3, 4}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + r := &raplData{ + zones: tc.raplZones, + } + require.Equal(t, tc.packageIDs, r.getPackageIDs()) + }) + } +} diff --git a/testdata/alderlake_goldencove_core.json b/testdata/alderlake_goldencove_core.json new file mode 100644 index 0000000..c35336f --- /dev/null +++ b/testdata/alderlake_goldencove_core.json @@ -0,0 +1,299 @@ +{ + "Header": { + "Copyright": "Copyright (c) 2001 - 2023 Intel Corporation. All rights reserved.", + "Info": "Performance Monitoring Events for 12th and 13th Generation Intel(R) Core(TM) Processor - V1.21", + "DatePublished": "04/25/2023", + "Version": "1.21", + "Legend": "" + }, + "Events": [ + { + "EventCode": "0x00", + "UMask": "0x02", + "EventName": "CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Core cycles when the thread is not in halt state", + "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events.", + "Counter": "Fixed counter 1", + "PEBScounters": "33", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0x00", + "UMask": "0x03", + "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Reference cycles when the core is not in halt state.", + "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", + "Counter": "Fixed counter 2", + "PEBScounters": "34", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0x3c", + "UMask": "0x00", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "BriefDescription": "Thread cycles when thread is not in halt state", + "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0x3c", + "UMask": "0x01", + "EventName": "CPU_CLK_UNHALTED.REF_TSC_P", + "BriefDescription": "Reference cycles when the core is not in halt state.", + "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0x3c", + "UMask": "0x02", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", + "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted.", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "25003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0x3c", + "UMask": "0x08", + "EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED", + "BriefDescription": "Core crystal clock cycles. Cycle counts are evenly distributed between active threads in the Core.", + "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0xec", + "UMask": "0x02", + "EventName": "CPU_CLK_UNHALTED.DISTRIBUTED", + "BriefDescription": "Cycle counts are evenly distributed between active threads in the Core.", + "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0xec", + "UMask": "0x10", + "EventName": "CPU_CLK_UNHALTED.C01", + "BriefDescription": "Core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state.", + "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0xec", + "UMask": "0x20", + "EventName": "CPU_CLK_UNHALTED.C02", + "BriefDescription": "Core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state.", + "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0xec", + "UMask": "0x40", + "EventName": "CPU_CLK_UNHALTED.PAUSE", + "BriefDescription": "CPU_CLK_UNHALTED.PAUSE", + "PublicDescription": "CPU_CLK_UNHALTED.PAUSE", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0xec", + "UMask": "0x40", + "EventName": "CPU_CLK_UNHALTED.PAUSE_INST", + "BriefDescription": "CPU_CLK_UNHALTED.PAUSE_INST", + "PublicDescription": "CPU_CLK_UNHALTED.PAUSE_INST", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "1", + "Invert": "0", + "EdgeDetect": "1", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0xec", + "UMask": "0x70", + "EventName": "CPU_CLK_UNHALTED.C0_WAIT", + "BriefDescription": "Core clocks when the thread is in the C0.1 or C0.2 or running a PAUSE in C0 ACPI state.", + "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction.", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + } + ] +} \ No newline at end of file diff --git a/testdata/cpu-freq-invalid/cpu0/cpufreq/scaling_cur_freq b/testdata/cpu-freq-invalid/cpu0/cpufreq/scaling_cur_freq new file mode 100644 index 0000000..2e65efe --- /dev/null +++ b/testdata/cpu-freq-invalid/cpu0/cpufreq/scaling_cur_freq @@ -0,0 +1 @@ +a \ No newline at end of file diff --git a/testdata/cpu-freq/cpu0/cpufreq/scaling_cur_freq b/testdata/cpu-freq/cpu0/cpufreq/scaling_cur_freq new file mode 100644 index 0000000..d71f7cf --- /dev/null +++ b/testdata/cpu-freq/cpu0/cpufreq/scaling_cur_freq @@ -0,0 +1 @@ +888888 diff --git a/testdata/cpu-msr-cpuID-msr-not-exist/0/dummy b/testdata/cpu-msr-cpuID-msr-not-exist/0/dummy new file mode 100644 index 0000000..e69de29 diff --git a/testdata/cpu-msr-cpuID-msr-softlink/0/msr b/testdata/cpu-msr-cpuID-msr-softlink/0/msr new file mode 100644 index 0000000..d79c8ee --- /dev/null +++ b/testdata/cpu-msr-cpuID-msr-softlink/0/msr @@ -0,0 +1 @@ +#Eg�����ܺ�vT2 \ No newline at end of file diff --git a/testdata/cpu-msr-cpuID-msr-softlink/1/msr b/testdata/cpu-msr-cpuID-msr-softlink/1/msr new file mode 120000 index 0000000..f8162e1 --- /dev/null +++ b/testdata/cpu-msr-cpuID-msr-softlink/1/msr @@ -0,0 +1 @@ +iapower/testdata/cpu-msr-cpuID-msr-softlink/0/msr \ No newline at end of file diff --git a/testdata/cpu-msr-directories-not-exist/dummy b/testdata/cpu-msr-directories-not-exist/dummy new file mode 100644 index 0000000..e69de29 diff --git a/testdata/cpu-msr-invalid-cpuID-directories/01/msr b/testdata/cpu-msr-invalid-cpuID-directories/01/msr new file mode 100644 index 0000000..d79c8ee --- /dev/null +++ b/testdata/cpu-msr-invalid-cpuID-directories/01/msr @@ -0,0 +1 @@ +#Eg�����ܺ�vT2 \ No newline at end of file diff --git a/testdata/cpu-msr-invalid-cpuID-directories/1invalid/msr b/testdata/cpu-msr-invalid-cpuID-directories/1invalid/msr new file mode 100644 index 0000000..d79c8ee --- /dev/null +++ b/testdata/cpu-msr-invalid-cpuID-directories/1invalid/msr @@ -0,0 +1 @@ +#Eg�����ܺ�vT2 \ No newline at end of file diff --git a/testdata/cpu-msr/0/msr b/testdata/cpu-msr/0/msr new file mode 100644 index 0000000..d79c8ee --- /dev/null +++ b/testdata/cpu-msr/0/msr @@ -0,0 +1 @@ +#Eg�����ܺ�vT2 \ No newline at end of file diff --git a/testdata/cpu-msr/1/msr b/testdata/cpu-msr/1/msr new file mode 100644 index 0000000..d79c8ee --- /dev/null +++ b/testdata/cpu-msr/1/msr @@ -0,0 +1 @@ +#Eg�����ܺ�vT2 \ No newline at end of file diff --git a/testdata/cpu-msr/10/msr b/testdata/cpu-msr/10/msr new file mode 100644 index 0000000..d79c8ee --- /dev/null +++ b/testdata/cpu-msr/10/msr @@ -0,0 +1 @@ +#Eg�����ܺ�vT2 \ No newline at end of file diff --git a/testdata/cpu-msr/100/msr b/testdata/cpu-msr/100/msr new file mode 100644 index 0000000..d79c8ee --- /dev/null +++ b/testdata/cpu-msr/100/msr @@ -0,0 +1 @@ +#Eg�����ܺ�vT2 \ No newline at end of file diff --git a/testdata/cpuinfo_bad1/cpuinfo b/testdata/cpuinfo_bad1/cpuinfo new file mode 100644 index 0000000..cfdf073 --- /dev/null +++ b/testdata/cpuinfo_bad1/cpuinfo @@ -0,0 +1,26 @@ +processor : + +vendor_id : IdOfVendor +cpu family : 13 +model : 23 +model name : NameOfModel +stepping : 1 +microcode : 111 +cpu MHz : 55.5 +cache size : 12 KB +physical id : 2 +siblings : 5 +core id : 66 +cpu cores : 2 +apicid : 22 +initial apicid : 1 +fpu : no +fpu_exception : no +cpuid level : 0 +wp : no +flags : no flags +bugs : no bugs +bogomips : 2.2222 +clflush size : 64 +cache_alignment : 64 +address sizes : 10 bits physical, 607 bits virtual +power management: \ No newline at end of file diff --git a/testdata/cpuinfo_bad2/cpuinfo b/testdata/cpuinfo_bad2/cpuinfo new file mode 100644 index 0000000..7d79eab --- /dev/null +++ b/testdata/cpuinfo_bad2/cpuinfo @@ -0,0 +1,26 @@ +processor : 0 +vendor_id : IdOfVendor +cpu family : 13 +model : 23 +model name : NameOfModel +stepping : + +microcode : 111 +cpu MHz : 55.5 +cache size : 12 KB +physical id : 2 +siblings : 5 +core id : 66 +cpu cores : 2 +apicid : 22 +initial apicid : 1 +fpu : no +fpu_exception : no +cpuid level : 0 +wp : no +flags : no flags +bugs : no bugs +bogomips : 2.2222 +clflush size : 64 +cache_alignment : 64 +address sizes : 10 bits physical, 607 bits virtual +power management: \ No newline at end of file diff --git a/testdata/cpuinfo_bad3/cpuinfo b/testdata/cpuinfo_bad3/cpuinfo new file mode 100644 index 0000000..ba6f129 --- /dev/null +++ b/testdata/cpuinfo_bad3/cpuinfo @@ -0,0 +1,26 @@ +processor : 0 +vendor_id : IdOfVendor +cpu family : 13 +model : 23 +model name : NameOfModel +stepping : 1 +microcode : 111 +cpu MHz : 55.5 +cache size : + +physical id : 2 +siblings : 5 +core id : 66 +cpu cores : 2 +apicid : 22 +initial apicid : 1 +fpu : no +fpu_exception : no +cpuid level : 0 +wp : no +flags : no flags +bugs : no bugs +bogomips : 2.2222 +clflush size : 64 +cache_alignment : 64 +address sizes : 10 bits physical, 607 bits virtual +power management: \ No newline at end of file diff --git a/testdata/cpuinfo_good/cpuinfo b/testdata/cpuinfo_good/cpuinfo new file mode 100644 index 0000000..ca4f68e --- /dev/null +++ b/testdata/cpuinfo_good/cpuinfo @@ -0,0 +1,26 @@ +processor : 1 +vendor_id : IdOfVendor +cpu family : 13 +model : 23 +model name : NameOfModel +stepping : 1 +microcode : 111 +cpu MHz : 55.5 +cache size : 12 KB +physical id : 2 +siblings : 5 +core id : 66 +cpu cores : 2 +apicid : 22 +initial apicid : 1 +fpu : no +fpu_exception : no +cpuid level : 0 +wp : no +flags : no flags +bugs : no bugs +bogomips : 2.2222 +clflush size : 64 +cache_alignment : 64 +address sizes : 10 bits physical, 607 bits virtual +power management: \ No newline at end of file diff --git a/testdata/die-id-invalid/cpu1/topology/die_id b/testdata/die-id-invalid/cpu1/topology/die_id new file mode 100644 index 0000000..c04142e --- /dev/null +++ b/testdata/die-id-invalid/cpu1/topology/die_id @@ -0,0 +1 @@ +badValue \ No newline at end of file diff --git a/testdata/die-id-valid/cpu1/topology/die_id b/testdata/die-id-valid/cpu1/topology/die_id new file mode 100644 index 0000000..56a6051 --- /dev/null +++ b/testdata/die-id-valid/cpu1/topology/die_id @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/testdata/intel-rapl-domain-name-empty/intel-rapl{colon}1/name b/testdata/intel-rapl-domain-name-empty/intel-rapl{colon}1/name new file mode 100644 index 0000000..e69de29 diff --git a/testdata/intel-rapl-dram-curr-energy-attr-file-not-exist/intel-rapl{colon}0/energy_uj b/testdata/intel-rapl-dram-curr-energy-attr-file-not-exist/intel-rapl{colon}0/energy_uj new file mode 100644 index 0000000..cd87cc7 --- /dev/null +++ b/testdata/intel-rapl-dram-curr-energy-attr-file-not-exist/intel-rapl{colon}0/energy_uj @@ -0,0 +1 @@ +206999074695 diff --git a/testdata/intel-rapl-dram-curr-energy-attr-file-not-exist/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/max_energy_range_uj b/testdata/intel-rapl-dram-curr-energy-attr-file-not-exist/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/max_energy_range_uj new file mode 100644 index 0000000..89276e1 --- /dev/null +++ b/testdata/intel-rapl-dram-curr-energy-attr-file-not-exist/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/max_energy_range_uj @@ -0,0 +1 @@ +65712999613 \ No newline at end of file diff --git a/testdata/intel-rapl-dram-curr-energy-attr-file-not-exist/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/name b/testdata/intel-rapl-dram-curr-energy-attr-file-not-exist/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/name new file mode 100644 index 0000000..6ca84d3 --- /dev/null +++ b/testdata/intel-rapl-dram-curr-energy-attr-file-not-exist/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/name @@ -0,0 +1 @@ +dram diff --git a/testdata/intel-rapl-dram-curr-energy-attr-file-not-exist/intel-rapl{colon}0/max_energy_range_uj b/testdata/intel-rapl-dram-curr-energy-attr-file-not-exist/intel-rapl{colon}0/max_energy_range_uj new file mode 100644 index 0000000..7f43201 --- /dev/null +++ b/testdata/intel-rapl-dram-curr-energy-attr-file-not-exist/intel-rapl{colon}0/max_energy_range_uj @@ -0,0 +1 @@ +262143328850 \ No newline at end of file diff --git a/testdata/intel-rapl-dram-curr-energy-attr-file-not-exist/intel-rapl{colon}0/name b/testdata/intel-rapl-dram-curr-energy-attr-file-not-exist/intel-rapl{colon}0/name new file mode 100644 index 0000000..772d67f --- /dev/null +++ b/testdata/intel-rapl-dram-curr-energy-attr-file-not-exist/intel-rapl{colon}0/name @@ -0,0 +1 @@ +package-0 \ No newline at end of file diff --git a/testdata/intel-rapl-dram-domain-name-not-exist/intel-rapl{colon}0/intel-rapl{colon}0{colon}0/invalid-name b/testdata/intel-rapl-dram-domain-name-not-exist/intel-rapl{colon}0/intel-rapl{colon}0{colon}0/invalid-name new file mode 100644 index 0000000..6ca84d3 --- /dev/null +++ b/testdata/intel-rapl-dram-domain-name-not-exist/intel-rapl{colon}0/intel-rapl{colon}0{colon}0/invalid-name @@ -0,0 +1 @@ +dram diff --git a/testdata/intel-rapl-dram-domain-name-not-exist/intel-rapl{colon}0/name b/testdata/intel-rapl-dram-domain-name-not-exist/intel-rapl{colon}0/name new file mode 100644 index 0000000..a35383c --- /dev/null +++ b/testdata/intel-rapl-dram-domain-name-not-exist/intel-rapl{colon}0/name @@ -0,0 +1 @@ +package-0 diff --git a/testdata/intel-rapl-invalid-package-domain-name-id/intel-rapl{colon}1/name b/testdata/intel-rapl-invalid-package-domain-name-id/intel-rapl{colon}1/name new file mode 100644 index 0000000..772d67f --- /dev/null +++ b/testdata/intel-rapl-invalid-package-domain-name-id/intel-rapl{colon}1/name @@ -0,0 +1 @@ +package-0 \ No newline at end of file diff --git a/testdata/intel-rapl-package-curr-energy-attr-file-not-exist/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/energy_uj b/testdata/intel-rapl-package-curr-energy-attr-file-not-exist/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/energy_uj new file mode 100644 index 0000000..85dae6b --- /dev/null +++ b/testdata/intel-rapl-package-curr-energy-attr-file-not-exist/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/energy_uj @@ -0,0 +1 @@ +64155753419 diff --git a/testdata/intel-rapl-package-curr-energy-attr-file-not-exist/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/max_energy_range_uj b/testdata/intel-rapl-package-curr-energy-attr-file-not-exist/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/max_energy_range_uj new file mode 100644 index 0000000..89276e1 --- /dev/null +++ b/testdata/intel-rapl-package-curr-energy-attr-file-not-exist/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/max_energy_range_uj @@ -0,0 +1 @@ +65712999613 \ No newline at end of file diff --git a/testdata/intel-rapl-package-curr-energy-attr-file-not-exist/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/name b/testdata/intel-rapl-package-curr-energy-attr-file-not-exist/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/name new file mode 100644 index 0000000..6ca84d3 --- /dev/null +++ b/testdata/intel-rapl-package-curr-energy-attr-file-not-exist/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/name @@ -0,0 +1 @@ +dram diff --git a/testdata/intel-rapl-package-curr-energy-attr-file-not-exist/intel-rapl{colon}0/max_energy_range_uj b/testdata/intel-rapl-package-curr-energy-attr-file-not-exist/intel-rapl{colon}0/max_energy_range_uj new file mode 100644 index 0000000..7f43201 --- /dev/null +++ b/testdata/intel-rapl-package-curr-energy-attr-file-not-exist/intel-rapl{colon}0/max_energy_range_uj @@ -0,0 +1 @@ +262143328850 \ No newline at end of file diff --git a/testdata/intel-rapl-package-curr-energy-attr-file-not-exist/intel-rapl{colon}0/name b/testdata/intel-rapl-package-curr-energy-attr-file-not-exist/intel-rapl{colon}0/name new file mode 100644 index 0000000..772d67f --- /dev/null +++ b/testdata/intel-rapl-package-curr-energy-attr-file-not-exist/intel-rapl{colon}0/name @@ -0,0 +1 @@ +package-0 \ No newline at end of file diff --git a/testdata/intel-rapl-package-domain-name-not-exist/intel-rapl{colon}0/domain b/testdata/intel-rapl-package-domain-name-not-exist/intel-rapl{colon}0/domain new file mode 100644 index 0000000..a35383c --- /dev/null +++ b/testdata/intel-rapl-package-domain-name-not-exist/intel-rapl{colon}0/domain @@ -0,0 +1 @@ +package-0 diff --git a/testdata/intel-rapl/intel-rapl{colon}0/constraint_0_max_power_uw b/testdata/intel-rapl/intel-rapl{colon}0/constraint_0_max_power_uw new file mode 100644 index 0000000..cb6adc5 --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}0/constraint_0_max_power_uw @@ -0,0 +1 @@ +250000000 \ No newline at end of file diff --git a/testdata/intel-rapl/intel-rapl{colon}0/energy_uj b/testdata/intel-rapl/intel-rapl{colon}0/energy_uj new file mode 100644 index 0000000..cd87cc7 --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}0/energy_uj @@ -0,0 +1 @@ +206999074695 diff --git a/testdata/intel-rapl/intel-rapl{colon}0/intel-rapl{colon}0{colon}0/energy_uj b/testdata/intel-rapl/intel-rapl{colon}0/intel-rapl{colon}0{colon}0/energy_uj new file mode 100644 index 0000000..573541a --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}0/intel-rapl{colon}0{colon}0/energy_uj @@ -0,0 +1 @@ +0 diff --git a/testdata/intel-rapl/intel-rapl{colon}0/intel-rapl{colon}0{colon}0/max_energy_range_uj b/testdata/intel-rapl/intel-rapl{colon}0/intel-rapl{colon}0{colon}0/max_energy_range_uj new file mode 100644 index 0000000..c227083 --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}0/intel-rapl{colon}0{colon}0/max_energy_range_uj @@ -0,0 +1 @@ +0 \ No newline at end of file diff --git a/testdata/intel-rapl/intel-rapl{colon}0/intel-rapl{colon}0{colon}0/name b/testdata/intel-rapl/intel-rapl{colon}0/intel-rapl{colon}0{colon}0/name new file mode 100644 index 0000000..d23ab7a --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}0/intel-rapl{colon}0{colon}0/name @@ -0,0 +1 @@ +domain diff --git a/testdata/intel-rapl/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/energy_uj b/testdata/intel-rapl/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/energy_uj new file mode 100644 index 0000000..85dae6b --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/energy_uj @@ -0,0 +1 @@ +64155753419 diff --git a/testdata/intel-rapl/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/max_energy_range_uj b/testdata/intel-rapl/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/max_energy_range_uj new file mode 100644 index 0000000..89276e1 --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/max_energy_range_uj @@ -0,0 +1 @@ +65712999613 \ No newline at end of file diff --git a/testdata/intel-rapl/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/name b/testdata/intel-rapl/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/name new file mode 100644 index 0000000..6ca84d3 --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}0/intel-rapl{colon}0{colon}1/name @@ -0,0 +1 @@ +dram diff --git a/testdata/intel-rapl/intel-rapl{colon}0/max_energy_range_uj b/testdata/intel-rapl/intel-rapl{colon}0/max_energy_range_uj new file mode 100644 index 0000000..7f43201 --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}0/max_energy_range_uj @@ -0,0 +1 @@ +262143328850 \ No newline at end of file diff --git a/testdata/intel-rapl/intel-rapl{colon}0/name b/testdata/intel-rapl/intel-rapl{colon}0/name new file mode 100644 index 0000000..772d67f --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}0/name @@ -0,0 +1 @@ +package-0 \ No newline at end of file diff --git a/testdata/intel-rapl/intel-rapl{colon}1/constraint_0_max_power_uw b/testdata/intel-rapl/intel-rapl{colon}1/constraint_0_max_power_uw new file mode 100644 index 0000000..d96dc95 --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}1/constraint_0_max_power_uw @@ -0,0 +1 @@ +abcdef \ No newline at end of file diff --git a/testdata/intel-rapl/intel-rapl{colon}1/energy_uj b/testdata/intel-rapl/intel-rapl{colon}1/energy_uj new file mode 100644 index 0000000..81940e6 --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}1/energy_uj @@ -0,0 +1 @@ +206999075695 \ No newline at end of file diff --git a/testdata/intel-rapl/intel-rapl{colon}1/intel-rapl{colon}1{colon}0/name b/testdata/intel-rapl/intel-rapl{colon}1/intel-rapl{colon}1{colon}0/name new file mode 100644 index 0000000..3cdd2fd --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}1/intel-rapl{colon}1{colon}0/name @@ -0,0 +1 @@ +socket diff --git a/testdata/intel-rapl/intel-rapl{colon}1/max_energy_range_uj b/testdata/intel-rapl/intel-rapl{colon}1/max_energy_range_uj new file mode 100644 index 0000000..d96dc95 --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}1/max_energy_range_uj @@ -0,0 +1 @@ +abcdef \ No newline at end of file diff --git a/testdata/intel-rapl/intel-rapl{colon}1/name b/testdata/intel-rapl/intel-rapl{colon}1/name new file mode 100644 index 0000000..c4cf4c4 --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}1/name @@ -0,0 +1 @@ +package-1 \ No newline at end of file diff --git a/testdata/intel-rapl/intel-rapl{colon}2/constraint_0_max_power_uw b/testdata/intel-rapl/intel-rapl{colon}2/constraint_0_max_power_uw new file mode 100644 index 0000000..e69de29 diff --git a/testdata/intel-rapl/intel-rapl{colon}2/energy_uj b/testdata/intel-rapl/intel-rapl{colon}2/energy_uj new file mode 100644 index 0000000..139c235 --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}2/energy_uj @@ -0,0 +1 @@ +205999075695 \ No newline at end of file diff --git a/testdata/intel-rapl/intel-rapl{colon}2/intel-rapl{colon}2{colon}0/energy_uj b/testdata/intel-rapl/intel-rapl{colon}2/intel-rapl{colon}2{colon}0/energy_uj new file mode 100644 index 0000000..ef2de58 --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}2/intel-rapl{colon}2{colon}0/energy_uj @@ -0,0 +1 @@ +66155553419 diff --git a/testdata/intel-rapl/intel-rapl{colon}2/intel-rapl{colon}2{colon}0/max_energy_range_uj b/testdata/intel-rapl/intel-rapl{colon}2/intel-rapl{colon}2{colon}0/max_energy_range_uj new file mode 100644 index 0000000..e69de29 diff --git a/testdata/intel-rapl/intel-rapl{colon}2/intel-rapl{colon}2{colon}0/name b/testdata/intel-rapl/intel-rapl{colon}2/intel-rapl{colon}2{colon}0/name new file mode 100644 index 0000000..6ca84d3 --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}2/intel-rapl{colon}2{colon}0/name @@ -0,0 +1 @@ +dram diff --git a/testdata/intel-rapl/intel-rapl{colon}2/name b/testdata/intel-rapl/intel-rapl{colon}2/name new file mode 100644 index 0000000..56d0a46 --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}2/name @@ -0,0 +1 @@ +package-2 diff --git a/testdata/intel-rapl/intel-rapl{colon}3/energy_uj b/testdata/intel-rapl/intel-rapl{colon}3/energy_uj new file mode 100644 index 0000000..198b7c9 --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}3/energy_uj @@ -0,0 +1 @@ +205888075695 \ No newline at end of file diff --git a/testdata/intel-rapl/intel-rapl{colon}3/name b/testdata/intel-rapl/intel-rapl{colon}3/name new file mode 100644 index 0000000..6baf10b --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}3/name @@ -0,0 +1 @@ +package-3 \ No newline at end of file diff --git a/testdata/intel-rapl/intel-rapl{colon}4/energy_uj b/testdata/intel-rapl/intel-rapl{colon}4/energy_uj new file mode 100644 index 0000000..198b7c9 --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}4/energy_uj @@ -0,0 +1 @@ +205888075695 \ No newline at end of file diff --git a/testdata/intel-rapl/intel-rapl{colon}4/name b/testdata/intel-rapl/intel-rapl{colon}4/name new file mode 100644 index 0000000..1d838a2 --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}4/name @@ -0,0 +1 @@ +psys \ No newline at end of file diff --git a/testdata/intel-rapl/intel-rapl{colon}5/energy_uj b/testdata/intel-rapl/intel-rapl{colon}5/energy_uj new file mode 100644 index 0000000..198b7c9 --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}5/energy_uj @@ -0,0 +1 @@ +205888075695 \ No newline at end of file diff --git a/testdata/intel-rapl/intel-rapl{colon}5/name b/testdata/intel-rapl/intel-rapl{colon}5/name new file mode 100644 index 0000000..0f31176 --- /dev/null +++ b/testdata/intel-rapl/intel-rapl{colon}5/name @@ -0,0 +1 @@ +core \ No newline at end of file diff --git a/testdata/intel_uncore_frequency/package_09_die_12/initial_max_freq_khz b/testdata/intel_uncore_frequency/package_09_die_12/initial_max_freq_khz new file mode 100644 index 0000000..54c125d --- /dev/null +++ b/testdata/intel_uncore_frequency/package_09_die_12/initial_max_freq_khz @@ -0,0 +1 @@ +abac diff --git a/testdata/intel_uncore_frequency/package_09_die_12/initial_min_freq_khz b/testdata/intel_uncore_frequency/package_09_die_12/initial_min_freq_khz new file mode 100644 index 0000000..9a66829 --- /dev/null +++ b/testdata/intel_uncore_frequency/package_09_die_12/initial_min_freq_khz @@ -0,0 +1,2 @@ + 200000 + \ No newline at end of file diff --git a/testdata/intel_uncore_frequency/package_09_die_12/max_freq_khz b/testdata/intel_uncore_frequency/package_09_die_12/max_freq_khz new file mode 100644 index 0000000..e69de29 diff --git a/testdata/intel_uncore_frequency/package_09_die_12/min_freq_khz b/testdata/intel_uncore_frequency/package_09_die_12/min_freq_khz new file mode 100644 index 0000000..b498fd4 --- /dev/null +++ b/testdata/intel_uncore_frequency/package_09_die_12/min_freq_khz @@ -0,0 +1 @@ +/ diff --git a/testdata/intel_uncore_frequency/package_10_die_03/current_freq_khz b/testdata/intel_uncore_frequency/package_10_die_03/current_freq_khz new file mode 100644 index 0000000..f829b34 --- /dev/null +++ b/testdata/intel_uncore_frequency/package_10_die_03/current_freq_khz @@ -0,0 +1 @@ +1500000 \ No newline at end of file diff --git a/testdata/intel_uncore_frequency/package_10_die_03/initial_max_freq_khz b/testdata/intel_uncore_frequency/package_10_die_03/initial_max_freq_khz new file mode 100644 index 0000000..deebb18 --- /dev/null +++ b/testdata/intel_uncore_frequency/package_10_die_03/initial_max_freq_khz @@ -0,0 +1 @@ +2000000 diff --git a/testdata/intel_uncore_frequency/package_10_die_03/initial_min_freq_khz b/testdata/intel_uncore_frequency/package_10_die_03/initial_min_freq_khz new file mode 100644 index 0000000..749fce6 --- /dev/null +++ b/testdata/intel_uncore_frequency/package_10_die_03/initial_min_freq_khz @@ -0,0 +1 @@ +1000000 diff --git a/testdata/intel_uncore_frequency/package_10_die_03/max_freq_khz b/testdata/intel_uncore_frequency/package_10_die_03/max_freq_khz new file mode 100644 index 0000000..0f89143 --- /dev/null +++ b/testdata/intel_uncore_frequency/package_10_die_03/max_freq_khz @@ -0,0 +1 @@ +1900000 diff --git a/testdata/intel_uncore_frequency/package_10_die_03/min_freq_khz b/testdata/intel_uncore_frequency/package_10_die_03/min_freq_khz new file mode 100644 index 0000000..1b1150c --- /dev/null +++ b/testdata/intel_uncore_frequency/package_10_die_03/min_freq_khz @@ -0,0 +1 @@ +1100000 diff --git a/testdata/proc_modules_msr_loaded b/testdata/proc_modules_msr_loaded new file mode 100644 index 0000000..2d05efb --- /dev/null +++ b/testdata/proc_modules_msr_loaded @@ -0,0 +1,24 @@ +msr 16384 0 - Live 0x0000000000000000 +intel_uncore_frequency 16384 0 - Live 0x0000000000000000 +binfmt_misc 24576 1 - Live 0x0000000000000000 +xt_conntrack 16384 3 - Live 0x0000000000000000 +xt_tcpudp 20480 3 - Live 0x0000000000000000 +xt_REDIRECT 20480 1 - Live 0x0000000000000000 +xt_comment 16384 872 - Live 0x0000000000000000 +nft_compat 20480 879 - Live 0x0000000000000000 +nft_counter 16384 874 - Live 0x0000000000000000 +nft_chain_nat 16384 2 - Live 0x0000000000000000 +nf_nat 49152 2 xt_REDIRECT,nft_chain_nat, Live 0x0000000000000000 +nf_conntrack 172032 3 xt_conntrack,xt_REDIRECT,nf_nat, Live 0x0000000000000000 +nf_defrag_ipv6 24576 1 nf_conntrack, Live 0x0000000000000000 +nf_defrag_ipv4 16384 1 nf_conntrack, Live 0x0000000000000000 +nf_tables 249856 3085 nft_compat,nft_counter,nft_chain_nat, Live 0x0000000000000000 +nfnetlink 20480 2 nft_compat,nf_tables, Live 0x0000000000000000 +8021q 36864 0 - Live 0x0000000000000000 +garp 20480 1 8021q, Live 0x0000000000000000 +mrp 20480 1 8021q, Live 0x0000000000000000 +stp 16384 1 garp, Live 0x0000000000000000 +llc 16384 2 garp,stp, Live 0x0000000000000000 +bonding 196608 0 - Live 0x0000000000000000 +tls 114688 1 bonding, Live 0x0000000000000000 +ipmi_ssif 40960 0 - Live 0x0000000000000000 diff --git a/testdata/proc_modules_msr_not_loaded b/testdata/proc_modules_msr_not_loaded new file mode 100644 index 0000000..c9755ee --- /dev/null +++ b/testdata/proc_modules_msr_not_loaded @@ -0,0 +1,24 @@ +binfmt_misc 24576 1 - Live 0x0000000000000000 +xt_conntrack 16384 3 - Live 0x0000000000000000 +xt_tcpudp 20480 3 - Live 0x0000000000000000 +xt_REDIRECT 20480 1 - Live 0x0000000000000000 +xt_comment 16384 872 - Live 0x0000000000000000 +nft_compat 20480 879 - Live 0x0000000000000000 +nft_counter 16384 874 - Live 0x0000000000000000 +nft_chain_nat 16384 2 - Live 0x0000000000000000 +nf_nat 49152 2 xt_REDIRECT,nft_chain_nat, Live 0x0000000000000000 +nf_conntrack 172032 3 xt_conntrack,xt_REDIRECT,nf_nat, Live 0x0000000000000000 +nf_defrag_ipv6 24576 1 nf_conntrack, Live 0x0000000000000000 +nf_defrag_ipv4 16384 1 nf_conntrack, Live 0x0000000000000000 +nf_tables 249856 3085 nft_compat,nft_counter,nft_chain_nat, Live 0x0000000000000000 +nfnetlink 20480 2 nft_compat,nf_tables, Live 0x0000000000000000 +8021q 36864 0 - Live 0x0000000000000000 +garp 20480 1 8021q, Live 0x0000000000000000 +mrp 20480 1 8021q, Live 0x0000000000000000 +stp 16384 1 garp, Live 0x0000000000000000 +llc 16384 2 garp,stp, Live 0x0000000000000000 +bonding 196608 0 - Live 0x0000000000000000 +tls 114688 1 bonding, Live 0x0000000000000000 +ipmi_ssif 40960 0 - Live 0x0000000000000000 +intel_rapl_msr 20480 0 - Live 0x0000000000000000 +intel_rapl_common 40960 1 intel_rapl_msr, Live 0x0000000000000000 diff --git a/testdata/proc_modules_rapl_loaded b/testdata/proc_modules_rapl_loaded new file mode 100644 index 0000000..efa395b --- /dev/null +++ b/testdata/proc_modules_rapl_loaded @@ -0,0 +1,32 @@ +msr 16384 0 - Live 0x0000000000000000 +intel_uncore_frequency 16384 0 - Live 0x0000000000000000 +binfmt_misc 24576 1 - Live 0x0000000000000000 +xt_conntrack 16384 3 - Live 0x0000000000000000 +xt_tcpudp 20480 3 - Live 0x0000000000000000 +xt_REDIRECT 20480 1 - Live 0x0000000000000000 +xt_comment 16384 872 - Live 0x0000000000000000 +nft_compat 20480 879 - Live 0x0000000000000000 +nft_counter 16384 874 - Live 0x0000000000000000 +nft_chain_nat 16384 2 - Live 0x0000000000000000 +nf_nat 49152 2 xt_REDIRECT,nft_chain_nat, Live 0x0000000000000000 +nf_conntrack 172032 3 xt_conntrack,xt_REDIRECT,nf_nat, Live 0x0000000000000000 +nf_defrag_ipv6 24576 1 nf_conntrack, Live 0x0000000000000000 +nf_defrag_ipv4 16384 1 nf_conntrack, Live 0x0000000000000000 +nf_tables 249856 3085 nft_compat,nft_counter,nft_chain_nat, Live 0x0000000000000000 +nfnetlink 20480 2 nft_compat,nf_tables, Live 0x0000000000000000 +8021q 36864 0 - Live 0x0000000000000000 +garp 20480 1 8021q, Live 0x0000000000000000 +mrp 20480 1 8021q, Live 0x0000000000000000 +stp 16384 1 garp, Live 0x0000000000000000 +llc 16384 2 garp,stp, Live 0x0000000000000000 +bonding 196608 0 - Live 0x0000000000000000 +tls 114688 1 bonding, Live 0x0000000000000000 +ipmi_ssif 40960 0 - Live 0x0000000000000000 +intel_rapl_msr 20480 0 - Live 0x0000000000000000 +intel_rapl_common 40960 1 intel_rapl_msr, Live 0x0000000000000000 +sb_edac 36864 0 - Live 0x0000000000000000 +nls_iso8859_1 16384 1 - Live 0x0000000000000000 +x86_pkg_temp_thermal 20480 0 - Live 0x0000000000000000 +intel_powerclamp 20480 0 - Live 0x0000000000000000 +rapl 20480 0 - Live 0x0000000000000000 +intel_cstate 20480 0 - Live 0x0000000000000000 diff --git a/testdata/proc_modules_rapl_not_loaded b/testdata/proc_modules_rapl_not_loaded new file mode 100644 index 0000000..7ef6435 --- /dev/null +++ b/testdata/proc_modules_rapl_not_loaded @@ -0,0 +1,31 @@ +msr 16384 0 - Live 0x0000000000000000 +intel_uncore_frequency 16384 0 - Live 0x0000000000000000 +binfmt_misc 24576 1 - Live 0x0000000000000000 +xt_conntrack 16384 3 - Live 0x0000000000000000 +xt_tcpudp 20480 3 - Live 0x0000000000000000 + +xt_REDIRECT 20480 1 - Live 0x0000000000000000 +xt_comment 16384 872 - Live 0x0000000000000000 +nft_compat 20480 879 - Live 0x0000000000000000 +nft_counter 16384 874 - Live 0x0000000000000000 +nft_chain_nat 16384 2 - Live 0x0000000000000000 +nf_nat 49152 2 xt_REDIRECT,nft_chain_nat, Live 0x0000000000000000 +nf_conntrack 172032 3 xt_conntrack,xt_REDIRECT,nf_nat, Live 0x0000000000000000 +nf_defrag_ipv6 24576 1 nf_conntrack, Live 0x0000000000000000 +nf_defrag_ipv4 16384 1 nf_conntrack, Live 0x0000000000000000 +nf_tables 249856 3085 nft_compat,nft_counter,nft_chain_nat, Live 0x0000000000000000 +nfnetlink 20480 2 nft_compat,nf_tables, Live 0x0000000000000000 +8021q 36864 0 - Live 0x0000000000000000 +garp 20480 1 8021q, Live 0x0000000000000000 +mrp 20480 1 8021q, Live 0x0000000000000000 +stp 16384 1 garp, Live 0x0000000000000000 +llc 16384 2 garp,stp, Live 0x0000000000000000 +bonding 196608 0 - Live 0x0000000000000000 +tls 114688 1 bonding, Live 0x0000000000000000 +ipmi_ssif 40960 0 - Live 0x0000000000000000 +sb_edac 36864 0 - Live 0x0000000000000000 +nls_iso8859_1 16384 1 - Live 0x0000000000000000 +x86_pkg_temp_thermal 20480 0 - Live 0x0000000000000000 +intel_powerclamp 20480 0 - Live 0x0000000000000000 +rapl 20480 0 - Live 0x0000000000000000 +intel_cstate 20480 0 - Live 0x0000000000000000 diff --git a/testdata/sapphirerapids_core.json b/testdata/sapphirerapids_core.json new file mode 100644 index 0000000..6451550 --- /dev/null +++ b/testdata/sapphirerapids_core.json @@ -0,0 +1,299 @@ +{ + "Header": { + "Copyright": "Copyright (c) 2001 - 2023 Intel Corporation. All rights reserved.", + "Info": "Performance Monitoring Events for 4th Generation Intel(R) Xeon(R) Processor Scalable Family based on Sapphire Rapids microarchitecture - V1.15", + "DatePublished": "06/28/2023", + "Version": "1.15", + "Legend": "" + }, + "Events": [ + { + "EventCode": "0x00", + "UMask": "0x02", + "EventName": "CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Core cycles when the thread is not in halt state", + "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events.", + "Counter": "Fixed counter 1", + "PEBScounters": "33", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0x00", + "UMask": "0x03", + "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Reference cycles when the core is not in halt state.", + "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", + "Counter": "Fixed counter 2", + "PEBScounters": "34", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0x3c", + "UMask": "0x00", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "BriefDescription": "Thread cycles when thread is not in halt state", + "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0x3c", + "UMask": "0x01", + "EventName": "CPU_CLK_UNHALTED.REF_TSC_P", + "BriefDescription": "Reference cycles when the core is not in halt state.", + "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0x3c", + "UMask": "0x02", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", + "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted.", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "25003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0x3c", + "UMask": "0x08", + "EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED", + "BriefDescription": "Core crystal clock cycles. Cycle counts are evenly distributed between active threads in the Core.", + "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0xec", + "UMask": "0x02", + "EventName": "CPU_CLK_UNHALTED.DISTRIBUTED", + "BriefDescription": "Cycle counts are evenly distributed between active threads in the Core.", + "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0xec", + "UMask": "0x10", + "EventName": "CPU_CLK_UNHALTED.C01", + "BriefDescription": "Core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state.", + "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0xec", + "UMask": "0x20", + "EventName": "CPU_CLK_UNHALTED.C02", + "BriefDescription": "Core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state.", + "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0xec", + "UMask": "0x40", + "EventName": "CPU_CLK_UNHALTED.PAUSE", + "BriefDescription": "CPU_CLK_UNHALTED.PAUSE", + "PublicDescription": "CPU_CLK_UNHALTED.PAUSE", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0xec", + "UMask": "0x40", + "EventName": "CPU_CLK_UNHALTED.PAUSE_INST", + "BriefDescription": "CPU_CLK_UNHALTED.PAUSE_INST", + "PublicDescription": "CPU_CLK_UNHALTED.PAUSE_INST", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "1", + "Invert": "0", + "EdgeDetect": "1", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + }, + { + "EventCode": "0xec", + "UMask": "0x70", + "EventName": "CPU_CLK_UNHALTED.C0_WAIT", + "BriefDescription": "Core clocks when the thread is in the C0.1 or C0.2 or running a PAUSE in C0 ACPI state.", + "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction.", + "Counter": "0,1,2,3,4,5,6,7", + "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "2000003", + "MSRIndex": "0x00", + "MSRValue": "0x00", + "CollectPEBSRecord": "2", + "TakenAlone": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0", + "PEBS": "0", + "Data_LA": "0", + "L1_Hit_Indication": "0", + "Errata": "null", + "Offcore": "0", + "Deprecated": "0", + "Speculative": "1" + } + ] +} \ No newline at end of file diff --git a/testdata/symlink b/testdata/symlink new file mode 120000 index 0000000..298b6eb --- /dev/null +++ b/testdata/symlink @@ -0,0 +1 @@ +./proc_modules_msr_loaded \ No newline at end of file diff --git a/testdata_setup_test.go b/testdata_setup_test.go new file mode 100644 index 0000000..3259991 --- /dev/null +++ b/testdata_setup_test.go @@ -0,0 +1,98 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +import ( + "fmt" + "io/fs" + "os" + "path/filepath" + "strings" + "testing" +) + +// TestMain rolls out testdata in a temporary directory, runs all tests, and cleans up. +func TestMain(m *testing.M) { + err := setupTestData() + if err == nil { + m.Run() + } + + teardownTestData() + + if err != nil { + fmt.Printf("TestData setup failed: %v\n", err) + os.Exit(1) + } +} + +var tempTestDataDir string + +// makeTestDataPath returns an absolute path created from the temporary directory and +// the relative path. +func makeTestDataPath(rel string) string { + return filepath.Join(tempTestDataDir, rel) +} + +// setupTestData copies all testdata directory structure and files to a temporary directory. +// While copying, files and directories are renamed to replace a colon placeholder "{colon}" +// with a regular colon character. +// +// E.g. the directory "intel-rapl{colon}0{colon}0" will be renamed to "intel-rapl:0:0". +// +// This workaround eliminates the issue with the ZIP archiver, which is used by the Go +// toolchain when importing a library. The ZIP archiver doesn't allow several characters +// including colon to be used in file or directory names. +// +// The workaround requires files and directories in the testdata directory to be named +// the way that colon characters are replaced with a "{colon}" placeholder. +// +// E.g. an "intel-rapl:0:0" directory should be named "intel-rapl{colon}0{colon}0". +// +// TODO: this whole implementation of the ZIP archiver workaround has to be moved under +// the rapl package along with rapl related unit tests when the powertelemetry library architecture +// is reworked to be package based. +func setupTestData() error { + var err error + tempTestDataDir, err = os.MkdirTemp("", "sampledir") + if err != nil { + return err + } + + srcDir := "testdata" + + return filepath.WalkDir(srcDir, func(path string, d fs.DirEntry, _ error) error { + v := path + colonPlaceholder := "{colon}" + colonCharacter := ":" + if strings.Contains(v, colonPlaceholder) { + v = strings.ReplaceAll(v, colonPlaceholder, colonCharacter) + } + + destPath := filepath.Join(tempTestDataDir, v) + + if d.IsDir() { + return os.Mkdir(destPath, 0750) + } else if d.Type() == fs.ModeSymlink { + link, err := os.Readlink(path) + if err != nil { + return err + } + return os.Symlink(link, destPath) + } + + data, err := os.ReadFile(path) + if err != nil { + return err + } + return os.WriteFile(destPath, data, 0640) + }) +} + +// teardownTestData removes the temporary directory with all its contents. +func teardownTestData() { + _ = os.RemoveAll(tempTestDataDir) +} diff --git a/topology.go b/topology.go new file mode 100644 index 0000000..439e8e8 --- /dev/null +++ b/topology.go @@ -0,0 +1,269 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +import ( + "fmt" + "path/filepath" + "slices" + "strconv" + "strings" + + cpuUtil "github.com/shirou/gopsutil/v3/cpu" +) + +const ( + // base path which holds global and individual CPU attributes. + defaultDieBasePath = "/sys/devices/system/cpu" + + // name for die ID attribute file, corresponding to a specific CPU ID. + dieFilename = "topology/die_id" +) + +// topologyGetter gets topology information of the host. +type topologyGetter interface { + getCPUVendor(cpuID int) (string, error) + getCPUFamily(cpuID int) (string, error) + getCPUDieID(cpuID int) (int, error) + getCPUPackageID(cpuID int) (int, error) + getCPUCoreID(cpuID int) (int, error) + getCPUModel() int + getCPUFlags(cpuID int) ([]string, error) + getCPUsNumber() int + getPackageDieIDs(packageID int) ([]int, error) + getPackageIDs() []int +} + +// topologyReader provides per-CPU ID attribute information of the host. +type topologyReader interface { + // initTopology parses topology information from the host. + initTopology() error + + topologyGetter +} + +// cpuInfo represents attribute information of a CPU. +type cpuInfo struct { + vendorID string + family string + dieID int + packageID int + coreID int + flags []string +} + +// topologyData provides information about the processor of the host, including the number of CPUs present, +// CPUs details, CPU model, mapping between packages and dies and all package IDs. +// Implements topologyReader interface. +type topologyData struct { + dieIDPath string + + topologyMap map[int]*cpuInfo + packageDies map[int][]int + packageIDs []int + model int +} + +// initTopology initializes information about the processor of the host, including the number of CPUs present, +// CPUs details, CPU model, mapping between packages and dies and all package IDs. +func (t *topologyData) initTopology() error { + cpus, err := cpuUtil.Info() + if err != nil { + return fmt.Errorf("error occurred while parsing CPU information: %w", err) + } + if len(cpus) == 0 { + return fmt.Errorf("no CPUs were found") + } + + modelParsed := false + cpuInfoMap := make(map[int]*cpuInfo, len(cpus)) + for _, singleCPUInfo := range cpus { + info, err := parseCPUInfo(singleCPUInfo) + if err != nil { + return err + } + + if !modelParsed { + t.model, err = strconv.Atoi(singleCPUInfo.Model) + if err != nil { + return fmt.Errorf("error parsing model: %w", err) + } + modelParsed = true + } + + cpuInfoMap[int(singleCPUInfo.CPU)] = info + } + + t.packageDies = make(map[int][]int) + // Attempt to retrieve die ID for each CPU ID from sysfs + // If not retrieved, default value is zero, as in turbostat. + for cpuID, cInfo := range cpuInfoMap { + cpuDir := "cpu" + strconv.Itoa(cpuID) + dieName := filepath.Join(t.dieIDPath, cpuDir, dieFilename) + dieID, err := extractDieID(dieName) + if err != nil { + continue + } + cpuInfoMap[cpuID].dieID = dieID + t.addDieToPackageDies(cInfo.packageID, dieID) + } + t.topologyMap = cpuInfoMap + + // slices.Compact replaces consecutive runs of equal elements with a single copy + // (therefore, the slice must be sorted earlier to remove duplicates) + for packageID, dies := range t.packageDies { + slices.Sort(dies) + t.packageDies[packageID] = slices.Compact(dies) + } + + // Get ordered slice of unique package IDs. + t.packageIDs = packageIDs(t.topologyMap) + + return nil +} + +// parseCPUInfo parses information from single CPU. +func parseCPUInfo(infoStat cpuUtil.InfoStat) (*cpuInfo, error) { + physicalID, err := strconv.Atoi(infoStat.PhysicalID) + if err != nil { + return nil, fmt.Errorf("error parsing physical ID: %w", err) + } + + coreID, err := strconv.Atoi(infoStat.CoreID) + if err != nil { + return nil, fmt.Errorf("error parsing core ID: %w", err) + } + + return &cpuInfo{ + vendorID: infoStat.VendorID, + family: infoStat.Family, + packageID: physicalID, + coreID: coreID, + flags: infoStat.Flags, + }, nil +} + +// packageIDs takes a topology map and returns a sorted slice with unique package IDs. +func packageIDs(topologyMap map[int]*cpuInfo) []int { + pkgIDs := make([]int, 0, len(topologyMap)) + for _, info := range topologyMap { + pkgIDs = append(pkgIDs, info.packageID) + } + slices.Sort(pkgIDs) + return slices.Compact(pkgIDs) +} + +// extractDieID extracts id of die from dieFile. +func extractDieID(dieFile string) (dieID int, err error) { + // Return 0 in case die_id does not exist + exists, err := fileExists(dieFile) + if err != nil { + return 0, fmt.Errorf("error opening file %q: %w", dieFile, err) + } + if !exists { + return 0, nil + } + + fileContent, err := readFile(dieFile) + if err != nil { + return 0, fmt.Errorf("error reading file %q: %w", dieFile, err) + } + + dieID, err = strconv.Atoi(strings.TrimSpace(string(fileContent))) + if err != nil { + return 0, fmt.Errorf("error converting die ID value from the file %q to int: %w", dieFile, err) + } + + return dieID, nil +} + +func (t *topologyData) addDieToPackageDies(packageID int, dieID int) { + dies, ok := t.packageDies[packageID] + if ok { + dies = append(dies, dieID) + } else { + dies = []int{dieID} + } + t.packageDies[packageID] = dies +} + +// getCPUVendor gets cpu's vendorID value. If no cpu is found for the corresponding cpuID +// an error is returned. +func (t *topologyData) getCPUVendor(cpuID int) (string, error) { + if info, ok := t.topologyMap[cpuID]; ok { + return info.vendorID, nil + } + return "", fmt.Errorf("cpu: %d doesn't exist", cpuID) +} + +// getCPUFamily gets cpu's family value. If no cpu is found for the corresponding cpuID +// an error is returned. +func (t *topologyData) getCPUFamily(cpuID int) (string, error) { + if info, ok := t.topologyMap[cpuID]; ok { + return info.family, nil + } + return "", fmt.Errorf("cpu: %d doesn't exist", cpuID) +} + +// getCPUDieID gets cpu's dieID value. If no cpu is found for the corresponding cpuID +// an error is returned. +func (t *topologyData) getCPUDieID(cpuID int) (int, error) { + if info, ok := t.topologyMap[cpuID]; ok { + return info.dieID, nil + } + return 0, fmt.Errorf("cpu: %d doesn't exist", cpuID) +} + +// getCPUPackageID gets cpu's package ID value. If no cpu is found for the corresponding cpuID +// an error is returned. +func (t *topologyData) getCPUPackageID(cpuID int) (int, error) { + if info, ok := t.topologyMap[cpuID]; ok { + return info.packageID, nil + } + return 0, fmt.Errorf("cpu: %d doesn't exist", cpuID) +} + +// getCPUCoreID gets cpu's core ID value. If no cpu is found for the corresponding cpuID +// an error is returned. +func (t *topologyData) getCPUCoreID(cpuID int) (int, error) { + if info, ok := t.topologyMap[cpuID]; ok { + return info.coreID, nil + } + return 0, fmt.Errorf("cpu: %d doesn't exist", cpuID) +} + +// getCPUModel gets model value of CPU. +func (t *topologyData) getCPUModel() int { + return t.model +} + +// getCPUFlags gets cpu's flags' values. If no cpu is found for the corresponding cpuID +// an error is returned. +func (t *topologyData) getCPUFlags(cpuID int) ([]string, error) { + if info, ok := t.topologyMap[cpuID]; ok { + return info.flags, nil + } + return nil, fmt.Errorf("cpu: %d doesn't exist", cpuID) +} + +// getCPUsNumber returns the number of logical CPUs on a server. +func (t *topologyData) getCPUsNumber() int { + return len(t.topologyMap) +} + +// getCPUDieID gets cpu's dieID value. If no cpu is found for the corresponding cpuID +// an error is returned. +func (t *topologyData) getPackageDieIDs(packageID int) ([]int, error) { + if dies, ok := t.packageDies[packageID]; ok { + return dies, nil + } + return nil, fmt.Errorf("package: %d doesn't exist", packageID) +} + +// getPackageIDs returns a slice with ordered package IDs of the host topology. +func (t *topologyData) getPackageIDs() []int { + return t.packageIDs +} diff --git a/topology_test.go b/topology_test.go new file mode 100644 index 0000000..4332f79 --- /dev/null +++ b/topology_test.go @@ -0,0 +1,454 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +import ( + "errors" + "fmt" + "testing" + + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +type topologyMock struct { + mock.Mock +} + +func (m *topologyMock) initTopology() error { + args := m.Called() + return args.Error(0) +} + +func (m *topologyMock) getCPUVendor(cpuID int) (string, error) { + args := m.Called(cpuID) + return args.String(0), args.Error(1) +} + +func (m *topologyMock) getCPUFamily(cpuID int) (string, error) { + args := m.Called(cpuID) + return args.String(0), args.Error(1) +} + +func (m *topologyMock) getCPUDieID(cpuID int) (int, error) { + args := m.Called(cpuID) + return args.Int(0), args.Error(1) +} + +func (m *topologyMock) getCPUPackageID(cpuID int) (int, error) { + args := m.Called(cpuID) + return args.Int(0), args.Error(1) +} + +func (m *topologyMock) getCPUCoreID(cpuID int) (int, error) { + args := m.Called(cpuID) + return args.Int(0), args.Error(1) +} + +func (m *topologyMock) getCPUModel() int { + args := m.Called() + return args.Int(0) +} + +func (m *topologyMock) getCPUFlags(cpuID int) ([]string, error) { + args := m.Called(cpuID) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).([]string), args.Error(1) +} + +func (m *topologyMock) getCPUsNumber() int { + args := m.Called() + return args.Int(0) +} + +func (m *topologyMock) getPackageDieIDs(packageID int) ([]int, error) { + args := m.Called(packageID) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).([]int), args.Error(1) +} + +func (m *topologyMock) getPackageIDs() []int { + args := m.Called() + if args.Get(0) == nil { + return nil + } + return args.Get(0).([]int) +} + +func TestCpuFields(t *testing.T) { + flagsExp := []string{"msr", "dts"} + topology := &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: { + vendorID: "vendorID", + family: "family", + flags: flagsExp, + dieID: 2, + packageID: 2, + }, + }, + } + + t.Run("Validate cpu's family value", func(t *testing.T) { + cpuID := 0 + expected := "family" + res, err := topology.getCPUFamily(cpuID) + require.NoError(t, err) + require.EqualValues(t, expected, res) + }) + + t.Run("Validate cpu's vendorID value", func(t *testing.T) { + cpuID := 0 + expected := "vendorID" + res, err := topology.getCPUVendor(cpuID) + require.NoError(t, err) + require.EqualValues(t, expected, res) + }) + + t.Run("Family's value doesn't exist in map for given key", func(t *testing.T) { + cpuID := 1 + expected := "" + res, err := topology.getCPUFamily(cpuID) + require.EqualValues(t, expected, res) + require.Error(t, err) + require.ErrorContains(t, err, fmt.Sprintf("cpu: %d doesn't exist", cpuID)) + }) + + t.Run("VendorID's value doesn't exist in map for given key", func(t *testing.T) { + cpuID := 1 + expected := "" + res, err := topology.getCPUVendor(cpuID) + require.Error(t, err) + require.ErrorContains(t, err, fmt.Sprintf("cpu: %d doesn't exist", cpuID)) + require.EqualValues(t, expected, res) + }) + + t.Run("PackageID value doesn't exist in map for given key", func(t *testing.T) { + cpuID := 1 + expected := 0 + res, err := topology.getCPUPackageID(cpuID) + require.Error(t, err) + require.ErrorContains(t, err, fmt.Sprintf("cpu: %d doesn't exist", cpuID)) + require.EqualValues(t, expected, res) + }) + + t.Run("CoreID value doesn't exist in map for given key", func(t *testing.T) { + cpuID := 1000 + expected := 0 + res, err := topology.getCPUCoreID(cpuID) + require.Error(t, err) + require.ErrorContains(t, err, fmt.Sprintf("cpu: %d doesn't exist", cpuID)) + require.EqualValues(t, expected, res) + }) + + t.Run("DieID's value doesn't exist in map for given key", func(t *testing.T) { + cpuID := 1 + expected := 0 + res, err := topology.getCPUDieID(cpuID) + require.Error(t, err) + require.ErrorContains(t, err, fmt.Sprintf("cpu: %d doesn't exist", cpuID)) + require.EqualValues(t, expected, res) + }) + + t.Run("Validate cpu's packageID value", func(t *testing.T) { + cpuID := 0 + expected := 2 + res, err := topology.getCPUPackageID(cpuID) + require.NoError(t, err) + require.EqualValues(t, expected, res) + }) + + t.Run("Validate cpu's coreID value", func(t *testing.T) { + cpuID := 0 + expected := 0 + res, err := topology.getCPUCoreID(cpuID) + require.NoError(t, err) + require.EqualValues(t, expected, res) + }) + + t.Run("Validate cpu's dieID value", func(t *testing.T) { + cpuID := 0 + expected := 2 + res, err := topology.getCPUDieID(cpuID) + require.NoError(t, err) + require.EqualValues(t, expected, res) + }) + + t.Run("ValidCPUID", func(t *testing.T) { + cpuID := 0 + expected := flagsExp + res, err := topology.getCPUFlags(cpuID) + require.NoError(t, err) + require.EqualValues(t, expected, res) + }) + + t.Run("InvalidCPUID", func(t *testing.T) { + cpuID := 1 + expected := []string(nil) + res, err := topology.getCPUFlags(cpuID) + require.Error(t, err) + require.ErrorContains(t, err, fmt.Sprintf("cpu: %d doesn't exist", cpuID)) + require.EqualValues(t, expected, res) + }) +} + +// TestExtractDieID checks if dieID value extracted from file is correct in different test cases. +func TestExtractDieID(t *testing.T) { + testCases := []struct { + desc string + filePath string + expectedDieID int + err error + }{ + { + desc: "Extracted", + filePath: "testdata/die-id-valid/cpu1/topology/die_id", + expectedDieID: 1, + err: nil, + }, + { + desc: "EmptyFilename", + filePath: "", + expectedDieID: 0, + err: errors.New("error opening file \"\""), + }, + { + desc: "DirectoryInsteadOfFile", + filePath: "testdata", + expectedDieID: 0, + err: errors.New("error reading file \"testdata\": error while reading file from path \"testdata\""), + }, + { + desc: "FileNotExist", + filePath: "testdata/die-id-valid/cpu1/topology/die_id_badPath", + expectedDieID: 0, + err: nil, + }, + { + desc: "NotExtracted", + filePath: "testdata/die-id-invalid/cpu1/topology/die_id", + expectedDieID: 0, + err: errors.New("error converting die ID value from the file \"testdata/die-id-invalid/cpu1/topology/die_id\" to int"), + }, + } + + for _, testCase := range testCases { + t.Run(testCase.desc, func(t *testing.T) { + resDieID, err := extractDieID(testCase.filePath) + require.Equal(t, testCase.expectedDieID, resDieID) + if testCase.err != nil { + require.ErrorContains(t, err, testCase.err.Error()) + } else { + require.NoError(t, err) + } + }) + } +} + +// TestInitTopology checks if cpuInfo value of topology struct is correct in different test cases. +func TestInitTopology(t *testing.T) { + testCases := []struct { + name string + cpuInfoPath string + diePath string + topologyMapExp map[int]*cpuInfo + packageDiesExp map[int][]int + packageIDsExp []int + err error + }{ + { + name: "InitializedWithValidDieIDPath", + diePath: "testdata/die-id-valid", + cpuInfoPath: "testdata/cpuinfo_good/", + topologyMapExp: map[int]*cpuInfo{ + 1: { + vendorID: "IdOfVendor", + family: "13", + dieID: 1, + packageID: 2, + coreID: 66, + flags: []string{"no", "flags"}, + }, + }, + packageDiesExp: map[int][]int{ + 2: { + 1, + }, + }, + packageIDsExp: []int{2}, + err: nil, + }, + { + name: "InitializedWithInvalidDieIDPath", + diePath: "testdata/die-id-invalid", + cpuInfoPath: "testdata/cpuinfo_good/", + topologyMapExp: map[int]*cpuInfo{ + 1: { + vendorID: "IdOfVendor", + family: "13", + dieID: 0, + packageID: 2, + coreID: 66, + flags: []string{"no", "flags"}, + }, + }, + packageDiesExp: map[int][]int{}, + packageIDsExp: []int{2}, + err: nil, + }, + { + name: "InitializedWithoutDieIDPath", + cpuInfoPath: "testdata/cpuinfo_good/", + topologyMapExp: map[int]*cpuInfo{ + 1: { + vendorID: "IdOfVendor", + family: "13", + dieID: 0, + packageID: 2, + coreID: 66, + flags: []string{"no", "flags"}, + }, + }, + packageDiesExp: map[int][]int{ + 2: { + 0, + }, + }, + packageIDsExp: []int{2}, + err: nil, + }, + { + name: "InvalidProcessorField", + diePath: "testdata/die-id-valid", + cpuInfoPath: "testdata/cpuinfo_bad1/", + topologyMapExp: nil, + err: errors.New("error occurred while parsing CPU information"), + }, + { + name: "InvalidSteppingField", + diePath: "testdata/die-id-valid", + cpuInfoPath: "testdata/cpuinfo_bad2/", + topologyMapExp: nil, + err: errors.New("error occurred while parsing CPU information"), + }, + { + name: "InvalidCacheSizeField", + diePath: "testdata/die-id-valid", + cpuInfoPath: "testdata/cpuinfo_bad3/", + topologyMapExp: nil, + err: errors.New("error occurred while parsing CPU information"), + }, + { + name: "InvalidCpuInfoPath", + diePath: "testdata/die-id-valid", + cpuInfoPath: "testdata/cpuinfo_bad_path", + topologyMapExp: nil, + err: errors.New("no CPUs were found"), + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Setenv("HOST_PROC", tc.cpuInfoPath) + + newTopology := &topologyData{ + dieIDPath: tc.diePath, + } + + err := newTopology.initTopology() + require.Equal(t, tc.topologyMapExp, newTopology.topologyMap) + require.Equal(t, tc.packageDiesExp, newTopology.packageDies) + require.Equal(t, tc.packageIDsExp, newTopology.getPackageIDs()) + if tc.err != nil { + require.ErrorContains(t, err, tc.err.Error()) + } else { + require.NoError(t, err) + } + }) + } +} + +func TestGetCPUsNumber(t *testing.T) { + testCases := []struct { + name string + topology topologyReader + numberOfCPUs int + }{ + { + name: "0_CPUs", + topology: &topologyData{ + topologyMap: make(map[int]*cpuInfo), + }, + numberOfCPUs: 0, + }, + { + name: "3_CPUs", + topology: &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: nil, + 1: nil, + 2: nil, + }, + }, + numberOfCPUs: 3, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + actual := tc.topology.getCPUsNumber() + require.Equal(t, tc.numberOfCPUs, actual) + }) + } +} + +func TestPackageIDs(t *testing.T) { + testCases := []struct { + name string + topologyMap map[int]*cpuInfo + packageIDs []int + }{ + { + name: "EmptyTopologyMap", + topologyMap: map[int]*cpuInfo{}, + packageIDs: []int{}, + }, + { + name: "Found", + topologyMap: map[int]*cpuInfo{ + 0: { + packageID: 0, + }, + 1: { + packageID: 1, + }, + 2: { + packageID: 0, + }, + 3: { + packageID: 0, + }, + 4: { + packageID: 1, + }, + 5: { + packageID: 2, + }, + }, + packageIDs: []int{0, 1, 2}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.packageIDs, packageIDs(tc.topologyMap)) + }) + } +} diff --git a/turbofreq.go b/turbofreq.go new file mode 100644 index 0000000..fb75982 --- /dev/null +++ b/turbofreq.go @@ -0,0 +1,352 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +import ( + "fmt" + + "github.com/intel/powertelemetry/internal/cpuid" + "github.com/intel/powertelemetry/internal/cpumodel" +) + +// MaxTurboFreq is an item of a list of max turbo frequencies and related active cores. +type MaxTurboFreq struct { + Value uint64 // Maximum reachable turbo frequency in MHz + ActiveCores uint32 // Maximum number of active cores for the reachable turbo frequency + Secondary bool // Attribute indicating if the list item is related to secondary cores of a hybrid architecture +} + +// isHybrid points to a function that checks if CPU is hybrid. +var isHybrid = cpuid.IsCPUHybrid + +// GetMaxTurboFreqList returns a list of max turbo frequencies and related active cores +// according to the package ID. +func (pt *PowerTelemetry) GetMaxTurboFreqList(packageID int) ([]MaxTurboFreq, error) { + if pt.msr == nil { + return nil, &ModuleNotInitializedError{Name: "msr"} + } + + cpuID, err := pt.getCPUIDFromPackageID(packageID) + if err != nil { + return nil, err + } + + model := pt.topology.getCPUModel() + list := []MaxTurboFreq{} + if hasHswTurboRatioLimit(model) { + out, err := pt.dumpHswTurboRatioLimits(cpuID) + if err != nil { + return nil, fmt.Errorf("dump hsw: %w", err) + } + list = append(list, out...) + } + + if hasIvtTurboRatioLimit(model) { + out, err := pt.dumpIvtTurboRatioLimits(cpuID) + if err != nil { + return nil, fmt.Errorf("dump ivt: %w", err) + } + list = append(list, out...) + } + + if hasTurboRatioLimit(model) { + out, err := pt.dumpTurboRatioLimits(turboRatioLimit, model, cpuID) + if err != nil { + return nil, fmt.Errorf("dump turbo ratio limits: %w", err) + } + list = append(list, out...) + + if isHybrid != nil && isHybrid() { + out, err := pt.dumpTurboRatioLimits(secondaryTurboRatioLimit, model, cpuID) + if err != nil { + return nil, err + } + list = append(list, out...) + } + } + + if hasAtomTurboRatioLimit(model) { + out, err := pt.dumpAtomTurboRatioLimits(cpuID) + if err != nil { + return nil, fmt.Errorf("dump atom: %w", err) + } + list = append(list, out...) + } + + if hasKnlTurboRatioLimit(model) { + out, err := pt.dumpKnlTurboRatioLimits(cpuID) + if err != nil { + return nil, fmt.Errorf("dump knl: %w", err) + } + list = append(list, out...) + } + + return list, nil +} + +// hasHswTurboRatioLimit checks if the model supports the Haswell turbo ratio limit. +func hasHswTurboRatioLimit(model int) bool { + return model == cpumodel.INTEL_FAM6_HASWELL_X // HSW Xeon +} + +// hasKnlTurboRatioLimit checks if the model supports the Knights Landing turbo ratio limit. +func hasKnlTurboRatioLimit(model int) bool { + switch model { + case + cpumodel.INTEL_FAM6_XEON_PHI_KNL, // Knights Landing + cpumodel.INTEL_FAM6_XEON_PHI_KNM: // Knights Mill + return true + } + return false +} + +// hasIvtTurboRatioLimit checks if the model supports the Ivy Bridge turbo ratio limit. +func hasIvtTurboRatioLimit(model int) bool { + switch model { + case + cpumodel.INTEL_FAM6_IVYBRIDGE_X, // IVB Xeon + cpumodel.INTEL_FAM6_HASWELL_X: // HSW Xeon + return true + } + return false +} + +// hasSlvMsrs checks if the model supports Silvermont MSRs. +func hasSlvMsrs(model int) bool { + switch model { + case + cpumodel.INTEL_FAM6_ATOM_SILVERMONT, + cpumodel.INTEL_FAM6_ATOM_SILVERMONT_MID, + cpumodel.INTEL_FAM6_ATOM_SILVERMONT_SMARTPHONE: // INTEL_FAM6_ATOM_AIRMONT_MID in turbostat + return true + } + return false +} + +// hasAtomTurboRatioLimit checks if the model supports the Atom turbo ratio limit. +func hasAtomTurboRatioLimit(model int) bool { + return hasSlvMsrs(model) +} + +// hasTurboRatioLimit checks if the model has turbo ratio limit support. +func hasTurboRatioLimit(model int) bool { + if hasSlvMsrs(model) { + return false + } + + switch model { + // Nehalem compatible, but do not include turbo-ratio limit support + case + cpumodel.INTEL_FAM6_NEHALEM_EX, // Nehalem-EX Xeon - Beckton + cpumodel.INTEL_FAM6_WESTMERE_EX, + cpumodel.INTEL_FAM6_XEON_PHI_KNL, // PHI - Knights Landing (different MSR definition) + cpumodel.INTEL_FAM6_XEON_PHI_KNM: // Knights Mill + return false + } + + return true +} + +// hasTurboRatioGroupLimits checks if the model supports turbo ratio group limits. +func hasTurboRatioGroupLimits(model int) bool { + switch model { + case + cpumodel.INTEL_FAM6_ATOM_GOLDMONT, + cpumodel.INTEL_FAM6_SKYLAKE_X, + cpumodel.INTEL_FAM6_ICELAKE_X, + cpumodel.INTEL_FAM6_ICELAKE_D, + cpumodel.INTEL_FAM6_SAPPHIRERAPIDS_X, + cpumodel.INTEL_FAM6_EMERALDRAPIDS_X, + cpumodel.INTEL_FAM6_ATOM_GOLDMONT_D, + cpumodel.INTEL_FAM6_ATOM_TREMONT_D: + return true + default: + return false + } +} + +// dumpHswTurboRatioLimits returns a list of max turbo frequencies and related active cores +// of a Haswell based CPU. +func (pt *PowerTelemetry) dumpHswTurboRatioLimits(cpuID int) ([]MaxTurboFreq, error) { + msrValue, err := pt.msr.read(turboRatioLimit2, cpuID) + if err != nil { + return nil, fmt.Errorf("can't read MSR 0x%X: %w", turboRatioLimit2, err) + } + + list := []MaxTurboFreq{} + + // Get two least significant octets of the 64-bit MSR value, which represent ratios, and add items with positive ratios to the list. + ratio := (msrValue >> 8) & 0xFF + if ratio > 0 { + list = append(list, MaxTurboFreq{ActiveCores: 18, Value: uint64(float64(ratio) * pt.busClock)}) + } + ratio = (msrValue >> 0) & 0xFF + if ratio > 0 { + list = append(list, MaxTurboFreq{ActiveCores: 17, Value: uint64(float64(ratio) * pt.busClock)}) + } + + return list, nil +} + +// dumpIvtTurboRatioLimits returns a list of max turbo frequencies and related active cores +// of an Ivy Bridge based CPU. +func (pt *PowerTelemetry) dumpIvtTurboRatioLimits(cpuID int) ([]MaxTurboFreq, error) { + msrValue, err := pt.msr.read(turboRatioLimit1, cpuID) + if err != nil { + return nil, fmt.Errorf("can't read MSR 0x%X: %w", turboRatioLimit1, err) + } + + list := []MaxTurboFreq{} + + // Get 8 octets of the 64-bit MSR value, which represent ratios, and add items with positive ratios to the list. + ratio := (msrValue >> 56) & 0xFF + if ratio > 0 { + list = append(list, MaxTurboFreq{ActiveCores: 16, Value: uint64(float64(ratio) * pt.busClock)}) + } + ratio = (msrValue >> 48) & 0xFF + if ratio > 0 { + list = append(list, MaxTurboFreq{ActiveCores: 15, Value: uint64(float64(ratio) * pt.busClock)}) + } + ratio = (msrValue >> 40) & 0xFF + if ratio > 0 { + list = append(list, MaxTurboFreq{ActiveCores: 14, Value: uint64(float64(ratio) * pt.busClock)}) + } + ratio = (msrValue >> 32) & 0xFF + if ratio > 0 { + list = append(list, MaxTurboFreq{ActiveCores: 13, Value: uint64(float64(ratio) * pt.busClock)}) + } + ratio = (msrValue >> 24) & 0xFF + if ratio > 0 { + list = append(list, MaxTurboFreq{ActiveCores: 12, Value: uint64(float64(ratio) * pt.busClock)}) + } + ratio = (msrValue >> 16) & 0xFF + if ratio > 0 { + list = append(list, MaxTurboFreq{ActiveCores: 11, Value: uint64(float64(ratio) * pt.busClock)}) + } + ratio = (msrValue >> 8) & 0xFF + if ratio > 0 { + list = append(list, MaxTurboFreq{ActiveCores: 10, Value: uint64(float64(ratio) * pt.busClock)}) + } + ratio = (msrValue >> 0) & 0xFF + if ratio > 0 { + list = append(list, MaxTurboFreq{ActiveCores: 9, Value: uint64(float64(ratio) * pt.busClock)}) + } + + return list, nil +} + +// dumpTurboRatioLimits returns a list of max turbo frequencies and related active cores +// of a CPU supporting turbo ratio limits. +func (pt *PowerTelemetry) dumpTurboRatioLimits(trlMsrOffset uint32, model int, cpuID int) ([]MaxTurboFreq, error) { + msrValue, err := pt.msr.read(trlMsrOffset, cpuID) + if err != nil { + return nil, fmt.Errorf("can't read MSR 0x%X: %w", trlMsrOffset, err) + } + + secondary := trlMsrOffset == secondaryTurboRatioLimit + + var coreCounts uint64 + if hasTurboRatioGroupLimits(model) { + coreCounts, err = pt.msr.read(turboRatioLimit1, cpuID) + if err != nil { + return nil, fmt.Errorf("can't read MSR 0x%X: %w", turboRatioLimit1, err) + } + } else { + coreCounts = 0x0807060504030201 + } + + list := []MaxTurboFreq{} + + // Iterate over 8 octets of the 64-bit MSR value and the core counts value, get the ratio and the group size, + // then add items with positive ratios to the list. + for shift := 56; shift >= 0; shift -= 8 { + ratio := (msrValue >> shift) & 0xFF + groupSize := (coreCounts >> shift) & 0xFF + if ratio > 0 { + list = append(list, MaxTurboFreq{ + ActiveCores: uint32(groupSize), + Value: uint64(float64(ratio) * pt.busClock), + Secondary: secondary, + }) + } + } + + return list, nil +} + +// dumpAtomTurboRatioLimits returns a list of max turbo frequencies and related active cores +// of an Atom based CPU. +func (pt *PowerTelemetry) dumpAtomTurboRatioLimits(cpuID int) ([]MaxTurboFreq, error) { + msrValue, err := pt.msr.read(atomCoreTurboRatios, cpuID) + if err != nil { + return nil, fmt.Errorf("can't read MSR 0x%X: %w", atomCoreTurboRatios, err) + } + + list := []MaxTurboFreq{} + + // Get 4 least significant octets of the 64-bit MSR value, which represent ratios, + // and add items with positive ratios to the list. + ratio := (msrValue >> 24) & 0x3F + if ratio > 0 { + list = append(list, MaxTurboFreq{ActiveCores: 4, Value: uint64(float64(ratio) * pt.busClock)}) + } + ratio = (msrValue >> 16) & 0x3F + if ratio > 0 { + list = append(list, MaxTurboFreq{ActiveCores: 3, Value: uint64(float64(ratio) * pt.busClock)}) + } + ratio = (msrValue >> 8) & 0x3F + if ratio > 0 { + list = append(list, MaxTurboFreq{ActiveCores: 2, Value: uint64(float64(ratio) * pt.busClock)}) + } + ratio = (msrValue >> 0) & 0x3F + if ratio > 0 { + list = append(list, MaxTurboFreq{ActiveCores: 1, Value: uint64(float64(ratio) * pt.busClock)}) + } + + return list, nil +} + +// dumpKnlTurboRatioLimits returns a list of max turbo frequencies and related active cores +// of a Knights Landing based CPU. +func (pt *PowerTelemetry) dumpKnlTurboRatioLimits(cpuID int) ([]MaxTurboFreq, error) { + msrValue, err := pt.msr.read(turboRatioLimit, cpuID) + if err != nil { + return nil, fmt.Errorf("can't read MSR 0x%X: %w", turboRatioLimit, err) + } + + list := []MaxTurboFreq{} + + const bucketsNo = 7 + cores := [bucketsNo]uint64{} + ratio := [bucketsNo]uint64{} + + bNr := 0 + cores[bNr] = (msrValue & 0xFF) >> 1 // Get maximum number of cores in Group 0 + ratio[bNr] = (msrValue >> 8) & 0xFF // Get maximum ratio limit for Group 0 + + // Iterate over octets 3..8 of the 64-bit MSR value, get the number of incremental cores added to Group 1..6 + // and get the group ratio delta for Group 1..6 + for i := 16; i < 64; i += 8 { + deltaCores := (msrValue >> i) & 0x1F + deltaRatio := (msrValue >> (i + 5)) & 0x7 + + cores[bNr+1] = cores[bNr] + deltaCores + ratio[bNr+1] = ratio[bNr] - deltaRatio + bNr++ + } + + // Iterate over the pairs of cores and ratios, add the first pair along with others with unique ratios to the list + for i := bucketsNo - 1; i >= 0; i-- { + if ((i > 0) && (ratio[i] != ratio[i-1])) || (i == 0) { + list = append(list, MaxTurboFreq{ + ActiveCores: uint32(cores[i]), + Value: uint64(float64(ratio[i]) * pt.busClock), + }) + } + } + + return list, nil +} diff --git a/turbofreq_test.go b/turbofreq_test.go new file mode 100644 index 0000000..2c0cea5 --- /dev/null +++ b/turbofreq_test.go @@ -0,0 +1,772 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +import ( + "strconv" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestMaxTurboFreq_hasHswTurboRatioLimit(t *testing.T) { + selectedModels := []int{ + 0x3F, // INTEL_FAM6_HASWELL_X + } + + m := make(map[int]interface{}) + for _, v := range selectedModels { + m[v] = struct{}{} + } + + for model := 0; model < 0xFF; model++ { + ret := hasHswTurboRatioLimit(model) + require.Equalf(t, m[model] != nil, ret, "Model 0x%X", model) + } +} + +func TestMaxTurboFreq_hasKnlTurboRatioLimit(t *testing.T) { + selectedModels := []int{ + 0x57, // INTEL_FAM6_XEON_PHI_KNL + 0x85, // INTEL_FAM6_XEON_PHI_KNM + } + + m := make(map[int]interface{}) + for _, v := range selectedModels { + m[v] = struct{}{} + } + + for model := 0; model < 0xFF; model++ { + ret := hasKnlTurboRatioLimit(model) + require.Equalf(t, m[model] != nil, ret, "Model 0x%X", model) + } +} + +func TestMaxTurboFreq_hasIvtTurboRatioLimit(t *testing.T) { + selectedModels := []int{ + 0x3E, // INTEL_FAM6_IVYBRIDGE_X + 0x3F, // INTEL_FAM6_HASWELL_X + } + + m := make(map[int]interface{}) + for _, v := range selectedModels { + m[v] = struct{}{} + } + + for model := 0; model < 0xFF; model++ { + ret := hasIvtTurboRatioLimit(model) + require.Equalf(t, m[model] != nil, ret, "Model 0x%X", model) + } +} + +func TestMaxTurboFreq_hasSlvMsrs(t *testing.T) { + selectedModels := []int{ + 0x37, // INTEL_FAM6_ATOM_SILVERMONT + 0x4A, // INTEL_FAM6_ATOM_SILVERMONT_MID + 0x5A, // INTEL_FAM6_ATOM_AIRMONT_MID + } + + m := make(map[int]interface{}) + for _, v := range selectedModels { + m[v] = struct{}{} + } + + for model := 0; model < 0xFF; model++ { + ret := hasSlvMsrs(model) + require.Equalf(t, m[model] != nil, ret, "Model 0x%X", model) + } +} + +func TestMaxTurboFreq_hasAtomTurboRatioLimit(t *testing.T) { + selectedModels := []int{ + 0x37, // INTEL_FAM6_ATOM_SILVERMONT + 0x4A, // INTEL_FAM6_ATOM_SILVERMONT_MID + 0x5A, // INTEL_FAM6_ATOM_AIRMONT_MID + } + + m := make(map[int]interface{}) + for _, v := range selectedModels { + m[v] = struct{}{} + } + + for model := 0; model < 0xFF; model++ { + ret := hasAtomTurboRatioLimit(model) + require.Equalf(t, m[model] != nil, ret, "Model 0x%X", model) + } +} + +func TestMaxTurboFreq_hasTurboRatioLimit(t *testing.T) { + selectedModels := []int{ + 0x37, // INTEL_FAM6_ATOM_SILVERMONT + 0x4A, // INTEL_FAM6_ATOM_SILVERMONT_MID + 0x5A, // INTEL_FAM6_ATOM_AIRMONT_MID + 0x2E, // INTEL_FAM6_NEHALEM_EX + 0x2F, // INTEL_FAM6_WESTMERE_EX + 0x57, // INTEL_FAM6_XEON_PHI_KNL + 0x85, // INTEL_FAM6_XEON_PHI_KNM + } + + m := make(map[int]interface{}) + for _, v := range selectedModels { + m[v] = struct{}{} + } + + for model := 0; model < 0xFF; model++ { + ret := hasTurboRatioLimit(model) + + // Note: inverse test + require.Equalf(t, m[model] == nil, ret, "Model 0x%X", model) + } +} + +func TestMaxTurboFreq_hasTurboRatioGroupLimits(t *testing.T) { + selectedModels := []int{ + 0x5C, // INTEL_FAM6_ATOM_GOLDMONT + 0x55, // INTEL_FAM6_SKYLAKE_X + 0x6A, // INTEL_FAM6_ICELAKE_X + 0x6C, // INTEL_FAM6_ICELAKE_D + 0x8F, // INTEL_FAM6_SAPPHIRERAPIDS_X + 0x5F, // INTEL_FAM6_ATOM_GOLDMONT_D + 0x86, // INTEL_FAM6_ATOM_TREMONT_D + 0xCF, // INTEL_FAM6_EMERALDRAPIDS_X + } + + m := make(map[int]interface{}) + for _, v := range selectedModels { + m[v] = struct{}{} + } + + for model := 0; model < 0xFF; model++ { + ret := hasTurboRatioGroupLimits(model) + require.Equalf(t, m[model] != nil, ret, "Model 0x%X", model) + } +} + +func TestMaxTurboFreq_dumpHswTurboRatioLimits(t *testing.T) { + cases := []struct { + description string + cpuID int + busClock float64 + msrValue uint64 + expected []MaxTurboFreq + }{ + { + description: "Normal case A", busClock: 133.3, msrValue: 0x0000000000001716, + expected: []MaxTurboFreq{ + {ActiveCores: 18, Value: 3065}, + {ActiveCores: 17, Value: 2932}, + }, + }, + { + description: "Normal case B", busClock: 100.0, msrValue: 0x0000000000001110, + expected: []MaxTurboFreq{ + {ActiveCores: 18, Value: 1700}, + {ActiveCores: 17, Value: 1600}, + }, + }, + { + description: "One ratio limit is zero", busClock: 100.0, msrValue: 0x0000000000001100, + expected: []MaxTurboFreq{ + {ActiveCores: 18, Value: 1700}, + }, + }, + { + description: "More ratio limits returned", busClock: 100.0, msrValue: 0x1716151413121110, + expected: []MaxTurboFreq{ + {ActiveCores: 18, Value: 1700}, + {ActiveCores: 17, Value: 1600}, + }, + }, + { + description: "Ratio limits zeroed", busClock: 100.0, msrValue: 0x0000000000000000, + expected: []MaxTurboFreq{}, + }, + } + + for _, v := range cases { + t.Run(v.description, func(t *testing.T) { + m := &msrMock{} + pt := &PowerTelemetry{msr: m, busClock: v.busClock} + + // Mock reading from MSR_TURBO_RATIO_LIMIT2 + m.On("read", uint32(0x1AF), 0).Return(v.msrValue, nil).Once() + + list, err := pt.dumpHswTurboRatioLimits(v.cpuID) + require.NoError(t, err) + require.Equal(t, v.expected, list) + }) + } +} + +func TestMaxTurboFreq_dumpIvtTurboRatioLimits(t *testing.T) { + cases := []struct { + description string + cpuID int + busClock float64 + msrValue uint64 + expected []MaxTurboFreq + }{ + { + description: "Normal case A", busClock: 133.3, msrValue: 0x1817161514131211, + expected: []MaxTurboFreq{ + {ActiveCores: 16, Value: 3199}, + {ActiveCores: 15, Value: 3065}, + {ActiveCores: 14, Value: 2932}, + {ActiveCores: 13, Value: 2799}, + {ActiveCores: 12, Value: 2666}, + {ActiveCores: 11, Value: 2532}, + {ActiveCores: 10, Value: 2399}, + {ActiveCores: 9, Value: 2266}, + }, + }, + { + description: "Normal case B", busClock: 100.0, msrValue: 0x1716151413121110, + expected: []MaxTurboFreq{ + {ActiveCores: 16, Value: 2300}, + {ActiveCores: 15, Value: 2200}, + {ActiveCores: 14, Value: 2100}, + {ActiveCores: 13, Value: 2000}, + {ActiveCores: 12, Value: 1900}, + {ActiveCores: 11, Value: 1800}, + {ActiveCores: 10, Value: 1700}, + {ActiveCores: 9, Value: 1600}, + }, + }, + { + description: "One ratio limit is zero", busClock: 100.0, msrValue: 0x1716150013121110, + expected: []MaxTurboFreq{ + {ActiveCores: 16, Value: 2300}, + {ActiveCores: 15, Value: 2200}, + {ActiveCores: 14, Value: 2100}, + {ActiveCores: 12, Value: 1900}, + {ActiveCores: 11, Value: 1800}, + {ActiveCores: 10, Value: 1700}, + {ActiveCores: 9, Value: 1600}, + }, + }, + { + description: "Ratio limits zeroed", busClock: 100.0, msrValue: 0x0000000000000000, + expected: []MaxTurboFreq{}, + }, + } + + for _, v := range cases { + t.Run(v.description, func(t *testing.T) { + m := &msrMock{} + pt := &PowerTelemetry{msr: m, busClock: v.busClock} + + // Mock reading from MSR_TURBO_RATIO_LIMIT1 + m.On("read", uint32(0x1AE), 0).Return(v.msrValue, nil).Once() + + list, err := pt.dumpIvtTurboRatioLimits(v.cpuID) + require.NoError(t, err) + require.Equal(t, v.expected, list) + }) + } +} + +func TestMaxTurboFreq_dumpTurboRatioLimits(t *testing.T) { + cases := []struct { + description string + cpuID int + busClock float64 + model int + msrValue1 uint64 // Mock reading from MSR_TURBO_RATIO_LIMIT + msrValue2 uint64 // Mock reading from MSR_TURBO_RATIO_LIMIT1 + expected []MaxTurboFreq + }{ + { + description: "Normal case, the model supports group limits", // INTEL_FAM6_ICELAKE_D + busClock: 133.3, msrValue1: 0x1817161514131211, msrValue2: 0x100E0C0A08060402, model: 0x6C, + expected: []MaxTurboFreq{ + {ActiveCores: 16, Value: 3199}, + {ActiveCores: 14, Value: 3065}, + {ActiveCores: 12, Value: 2932}, + {ActiveCores: 10, Value: 2799}, + {ActiveCores: 8, Value: 2666}, + {ActiveCores: 6, Value: 2532}, + {ActiveCores: 4, Value: 2399}, + {ActiveCores: 2, Value: 2266}, + }, + }, + { + description: "Normal case, the model doesn't support group limits", + busClock: 100.0, msrValue1: 0x1716151413121110, msrValue2: 0x0000000000000000, model: 0x00, + expected: []MaxTurboFreq{ + {ActiveCores: 8, Value: 2300}, + {ActiveCores: 7, Value: 2200}, + {ActiveCores: 6, Value: 2100}, + {ActiveCores: 5, Value: 2000}, + {ActiveCores: 4, Value: 1900}, + {ActiveCores: 3, Value: 1800}, + {ActiveCores: 2, Value: 1700}, + {ActiveCores: 1, Value: 1600}, + }, + }, + { + description: "One ratio limit is zero", + busClock: 100.0, msrValue1: 0x1700151413121110, msrValue2: 0x0000000000000000, model: 0x00, + expected: []MaxTurboFreq{ + {ActiveCores: 8, Value: 2300}, + {ActiveCores: 6, Value: 2100}, + {ActiveCores: 5, Value: 2000}, + {ActiveCores: 4, Value: 1900}, + {ActiveCores: 3, Value: 1800}, + {ActiveCores: 2, Value: 1700}, + {ActiveCores: 1, Value: 1600}, + }, + }, + { + description: "Ratio limits zeroed", + busClock: 100.0, msrValue1: 0x0000000000000000, msrValue2: 0x0000000000000000, model: 0x00, + expected: []MaxTurboFreq{}, + }, + } + + for _, v := range cases { + t.Run(v.description, func(t *testing.T) { + m := &msrMock{} + pt := &PowerTelemetry{msr: m, busClock: v.busClock} + + // Mock reading from MSR_TURBO_RATIO_LIMIT + m.On("read", uint32(0x1AD), 0).Return(v.msrValue1, nil).Once() + + // Mock reading from MSR_TURBO_RATIO_LIMIT1 + m.On("read", uint32(0x1AE), 0).Return(v.msrValue2, nil).Once() + + list, err := pt.dumpTurboRatioLimits(turboRatioLimit, v.model, v.cpuID) + require.NoError(t, err) + require.Equal(t, v.expected, list) + }) + } +} + +func TestMaxTurboFreq_dumpAtomTurboRatioLimits(t *testing.T) { + cases := []struct { + description string + cpuID int + busClock float64 + msrValue uint64 + expected []MaxTurboFreq + }{ + { + description: "Normal case A", busClock: 133.3, msrValue: 0x0000000014131211, + expected: []MaxTurboFreq{ + {ActiveCores: 4, Value: 2666}, + {ActiveCores: 3, Value: 2532}, + {ActiveCores: 2, Value: 2399}, + {ActiveCores: 1, Value: 2266}, + }, + }, + { + description: "Normal case B", busClock: 100.0, msrValue: 0x0000000013121110, + expected: []MaxTurboFreq{ + {ActiveCores: 4, Value: 1900}, + {ActiveCores: 3, Value: 1800}, + {ActiveCores: 2, Value: 1700}, + {ActiveCores: 1, Value: 1600}, + }, + }, + { + description: "One ratio limit is zero", busClock: 100.0, msrValue: 0x0000000013120010, + expected: []MaxTurboFreq{ + {ActiveCores: 4, Value: 1900}, + {ActiveCores: 3, Value: 1800}, + {ActiveCores: 1, Value: 1600}, + }, + }, + { + description: "More ratio limits returned", busClock: 100.0, msrValue: 0x1716151413121110, + expected: []MaxTurboFreq{ + {ActiveCores: 4, Value: 1900}, + {ActiveCores: 3, Value: 1800}, + {ActiveCores: 2, Value: 1700}, + {ActiveCores: 1, Value: 1600}, + }, + }, + { + description: "Ratio limits zeroed", busClock: 100.0, msrValue: 0x0000000000000000, + expected: []MaxTurboFreq{}, + }, + } + + for _, v := range cases { + t.Run(v.description, func(t *testing.T) { + m := &msrMock{} + pt := &PowerTelemetry{msr: m, busClock: v.busClock} + + // Mock reading from MSR_ATOM_CORE_TURBO_RATIOS + m.On("read", uint32(0x66C), 0).Return(v.msrValue, nil).Once() + + list, err := pt.dumpAtomTurboRatioLimits(v.cpuID) + require.NoError(t, err) + require.Equal(t, v.expected, list) + }) + } +} + +func TestMaxTurboFreq_dumpKnlTurboRatioLimits(t *testing.T) { + cases := []struct { + description string + cpuID int + busClock float64 + msrBinaryValue string // String binary representation for easier understanding of bit field values + expected []MaxTurboFreq + }{ + { + description: "Normal case A", busClock: 133.3, + msrBinaryValue: "" + // Note: this it to fix indentation to improve readability + "001" + "00010" + // bits 63:56 + "001" + "00010" + // bits 55:48 + "001" + "00010" + // bits 47:40 + "001" + "00010" + // bits 39:32 + "001" + "00010" + // bits 31:24 + "001" + "00010" + // bits 23:16 + "00010000" + // bits 15:8 + "0000010" + "0", // bits 7:0 + expected: []MaxTurboFreq{ + {ActiveCores: 14, Value: 1333}, + {ActiveCores: 12, Value: 1466}, + {ActiveCores: 10, Value: 1599}, + {ActiveCores: 8, Value: 1732}, + {ActiveCores: 6, Value: 1866}, + {ActiveCores: 4, Value: 1999}, + {ActiveCores: 2, Value: 2132}, + }, + }, + { + description: "Normal case B", busClock: 100.0, + msrBinaryValue: "" + + "001" + "00001" + // bits 63:56 + "001" + "00001" + // bits 55:48 + "001" + "00001" + // bits 47:40 + "001" + "00001" + // bits 39:32 + "001" + "00001" + // bits 31:24 + "001" + "00001" + // bits 23:16 + "00010100" + // bits 15:8 + "0000100" + "0", // bits 7:0 + expected: []MaxTurboFreq{ + {ActiveCores: 10, Value: 1400}, + {ActiveCores: 9, Value: 1500}, + {ActiveCores: 8, Value: 1600}, + {ActiveCores: 7, Value: 1700}, + {ActiveCores: 6, Value: 1800}, + {ActiveCores: 5, Value: 1900}, + {ActiveCores: 4, Value: 2000}, + }, + }, + { + description: "One group ratio delta is zero", busClock: 100.0, + msrBinaryValue: "" + + "001" + "00001" + // bits 63:56 + "001" + "00001" + // bits 55:48 + "000" + "00001" + // bits 47:40 + "001" + "00001" + // bits 39:32 + "001" + "00001" + // bits 31:24 + "001" + "00001" + // bits 23:16 + "00010100" + // bits 15:8 + "0000100" + "0", // bits 7:0 + expected: []MaxTurboFreq{ + {ActiveCores: 10, Value: 1500}, + {ActiveCores: 9, Value: 1600}, + {ActiveCores: 7, Value: 1700}, + {ActiveCores: 6, Value: 1800}, + {ActiveCores: 5, Value: 1900}, + {ActiveCores: 4, Value: 2000}, + }, + }, + { + description: "One cores delta is zero", busClock: 100.0, + msrBinaryValue: "" + + "001" + "00001" + // bits 63:56 + "001" + "00001" + // bits 55:48 + "001" + "00001" + // bits 47:40 + "001" + "00000" + // bits 39:32 + "001" + "00001" + // bits 31:24 + "001" + "00001" + // bits 23:16 + "00010100" + // bits 15:8 + "0000100" + "0", // bits 7:0 + expected: []MaxTurboFreq{ + {ActiveCores: 9, Value: 1400}, + {ActiveCores: 8, Value: 1500}, + {ActiveCores: 7, Value: 1600}, + {ActiveCores: 6, Value: 1700}, + {ActiveCores: 6, Value: 1800}, + {ActiveCores: 5, Value: 1900}, + {ActiveCores: 4, Value: 2000}, + }, + }, + { + description: "Ratio limits zeroed", busClock: 100.0, + msrBinaryValue: "" + + "000" + "00000" + // bits 63:56 + "000" + "00000" + // bits 55:48 + "000" + "00000" + // bits 47:40 + "000" + "00000" + // bits 39:32 + "000" + "00000" + // bits 31:24 + "000" + "00000" + // bits 23:16 + "00000000" + // bits 15:8 + "0000000" + "0", // bits 7:0 + expected: []MaxTurboFreq{ + {ActiveCores: 0, Value: 0}, + }, + }, + } + + for _, v := range cases { + t.Run(v.description, func(t *testing.T) { + m := &msrMock{} + pt := &PowerTelemetry{msr: m, busClock: v.busClock} + + msr, err := strconv.ParseUint(v.msrBinaryValue, 2, 64) + require.NoError(t, err) + + // Mock reading from MSR_TURBO_RATIO_LIMIT + m.On("read", uint32(0x1AD), 0).Return(msr, nil).Once() + + list, err := pt.dumpKnlTurboRatioLimits(v.cpuID) + require.NoError(t, err) + require.Equal(t, v.expected, list) + }) + } +} + +func TestGetMaxTurboFreqList(t *testing.T) { + cases := []struct { + description string + model int + hybrid bool + busClock float64 + msrValue1 uint64 // MSR_TURBO_RATIO_LIMIT + msrValue2 uint64 // MSR_TURBO_RATIO_LIMIT1 + msrValue3 uint64 // MSR_TURBO_RATIO_LIMIT2 + msrValue4 uint64 // MSR_ATOM_CORE_TURBO_RATIOS + msrValue5 uint64 // MSR_SECONDARY_TURBO_RATIO_LIMIT + msrBinaryValue string // MSR_TURBO_RATIO_LIMIT (string binary representation) + expected []MaxTurboFreq + }{ + { + description: "Haswell X", busClock: 100.0, model: 0x3F, // INTEL_FAM6_HASWELL_X + hybrid: false, + msrValue1: 0x1716151413121110, + msrValue2: 0x1F1E1D1C1B1A1918, + msrValue3: 0x0000000000002120, + msrValue4: 0x0000000000000000, + msrValue5: 0x0000000000000000, + msrBinaryValue: "", + expected: []MaxTurboFreq{ + {ActiveCores: 18, Value: 3300}, + {ActiveCores: 17, Value: 3200}, + {ActiveCores: 16, Value: 3100}, + {ActiveCores: 15, Value: 3000}, + {ActiveCores: 14, Value: 2900}, + {ActiveCores: 13, Value: 2800}, + {ActiveCores: 12, Value: 2700}, + {ActiveCores: 11, Value: 2600}, + {ActiveCores: 10, Value: 2500}, + {ActiveCores: 9, Value: 2400}, + {ActiveCores: 8, Value: 2300}, + {ActiveCores: 7, Value: 2200}, + {ActiveCores: 6, Value: 2100}, + {ActiveCores: 5, Value: 2000}, + {ActiveCores: 4, Value: 1900}, + {ActiveCores: 3, Value: 1800}, + {ActiveCores: 2, Value: 1700}, + {ActiveCores: 1, Value: 1600}, + }, + }, + { + description: "Ivy Bridge X", busClock: 100.0, model: 0x3E, // INTEL_FAM6_IVYBRIDGE_X + hybrid: false, + msrValue1: 0x1716151413121110, + msrValue2: 0x1F1E1D1C1B1A1918, + msrValue3: 0x0000000000000000, + msrValue4: 0x0000000000000000, + msrValue5: 0x0000000000000000, + msrBinaryValue: "", + expected: []MaxTurboFreq{ + {ActiveCores: 16, Value: 3100}, + {ActiveCores: 15, Value: 3000}, + {ActiveCores: 14, Value: 2900}, + {ActiveCores: 13, Value: 2800}, + {ActiveCores: 12, Value: 2700}, + {ActiveCores: 11, Value: 2600}, + {ActiveCores: 10, Value: 2500}, + {ActiveCores: 9, Value: 2400}, + {ActiveCores: 8, Value: 2300}, + {ActiveCores: 7, Value: 2200}, + {ActiveCores: 6, Value: 2100}, + {ActiveCores: 5, Value: 2000}, + {ActiveCores: 4, Value: 1900}, + {ActiveCores: 3, Value: 1800}, + {ActiveCores: 2, Value: 1700}, + {ActiveCores: 1, Value: 1600}, + }, + }, + { + description: "Yonah", busClock: 100.0, model: 0x0E, // INTEL_FAM6_CORE_YONAH + hybrid: false, + msrValue1: 0x1716151413121110, + msrValue2: 0x0000000000000000, + msrValue3: 0x0000000000000000, + msrValue4: 0x0000000000000000, + msrValue5: 0x0000000000000000, + msrBinaryValue: "", + expected: []MaxTurboFreq{ + {ActiveCores: 8, Value: 2300}, + {ActiveCores: 7, Value: 2200}, + {ActiveCores: 6, Value: 2100}, + {ActiveCores: 5, Value: 2000}, + {ActiveCores: 4, Value: 1900}, + {ActiveCores: 3, Value: 1800}, + {ActiveCores: 2, Value: 1700}, + {ActiveCores: 1, Value: 1600}, + }, + }, + { + description: "Alderlake", busClock: 100.0, model: 0x97, // INTEL_FAM6_ALDERLAKE + hybrid: true, + msrValue1: 0x1716151413121110, + msrValue2: 0x0000000000000000, + msrValue3: 0x0000000000000000, + msrValue4: 0x0000000000000000, + msrValue5: 0x1716151413121110, + msrBinaryValue: "", + expected: []MaxTurboFreq{ + {ActiveCores: 8, Value: 2300, Secondary: false}, + {ActiveCores: 7, Value: 2200, Secondary: false}, + {ActiveCores: 6, Value: 2100, Secondary: false}, + {ActiveCores: 5, Value: 2000, Secondary: false}, + {ActiveCores: 4, Value: 1900, Secondary: false}, + {ActiveCores: 3, Value: 1800, Secondary: false}, + {ActiveCores: 2, Value: 1700, Secondary: false}, + {ActiveCores: 1, Value: 1600, Secondary: false}, + {ActiveCores: 8, Value: 2300, Secondary: true}, + {ActiveCores: 7, Value: 2200, Secondary: true}, + {ActiveCores: 6, Value: 2100, Secondary: true}, + {ActiveCores: 5, Value: 2000, Secondary: true}, + {ActiveCores: 4, Value: 1900, Secondary: true}, + {ActiveCores: 3, Value: 1800, Secondary: true}, + {ActiveCores: 2, Value: 1700, Secondary: true}, + {ActiveCores: 1, Value: 1600, Secondary: true}, + }, + }, + { + description: "Atom Silvermont", busClock: 100.0, model: 0x37, // INTEL_FAM6_ATOM_SILVERMONT + hybrid: false, + msrValue1: 0x0000000000000000, + msrValue2: 0x0000000000000000, + msrValue3: 0x0000000000000000, + msrValue4: 0x0000000013121110, + msrValue5: 0x0000000000000000, + msrBinaryValue: "", + expected: []MaxTurboFreq{ + {ActiveCores: 4, Value: 1900}, + {ActiveCores: 3, Value: 1800}, + {ActiveCores: 2, Value: 1700}, + {ActiveCores: 1, Value: 1600}, + }, + }, + { + description: "Nehalem Ex", busClock: 100.0, model: 0x2E, // INTEL_FAM6_NEHALEM_EX + hybrid: false, + msrValue1: 0x1716151413121110, + msrValue2: 0x1817161514131211, + msrValue3: 0x1918171615141312, + msrValue4: 0x1A19181716151413, + msrValue5: 0x0000000000000000, + msrBinaryValue: "", + expected: []MaxTurboFreq{}, + }, + { + description: "Knights Landing", busClock: 100.0, model: 0x57, // INTEL_FAM6_XEON_PHI_KNL + hybrid: false, + msrValue1: 0x0000000000000000, + msrValue2: 0x0000000000000000, + msrValue3: 0x0000000000000000, + msrValue4: 0x0000000000000000, + msrValue5: 0x0000000000000000, + msrBinaryValue: "" + + "001" + "00010" + // bits 63:56 + "001" + "00010" + // bits 55:48 + "001" + "00010" + // bits 47:40 + "001" + "00010" + // bits 39:32 + "001" + "00010" + // bits 31:24 + "001" + "00010" + // bits 23:16 + "00010000" + // bits 15:8 + "0000010" + "0", // bits 7:0 + expected: []MaxTurboFreq{ + {ActiveCores: 14, Value: 1000}, + {ActiveCores: 12, Value: 1100}, + {ActiveCores: 10, Value: 1200}, + {ActiveCores: 8, Value: 1300}, + {ActiveCores: 6, Value: 1400}, + {ActiveCores: 4, Value: 1500}, + {ActiveCores: 2, Value: 1600}, + }, + }, + { + description: "Unknown model, zeroed MSRs", busClock: 100.0, model: 0x00, + hybrid: false, + msrValue1: 0x0000000000000000, + msrValue2: 0x0000000000000000, + msrValue3: 0x0000000000000000, + msrValue4: 0x0000000000000000, + msrValue5: 0x0000000000000000, + msrBinaryValue: "", + expected: []MaxTurboFreq{}, + }, + } + + for _, v := range cases { + t.Run(v.description, func(t *testing.T) { + m := &msrMock{} + pt := &PowerTelemetry{ + msr: m, + busClock: v.busClock, + topology: &topologyData{ + topologyMap: map[int]*cpuInfo{ + 0: { + packageID: 0, + }, + }, + model: v.model, + }, + cpus: []int{0}, + } + + isHybrid = func() bool { + return v.hybrid + } + + // If msrBinaryValue is not defined, use msrValue1 to pass to MSR_TURBO_RATIO_LIMIT + var msr uint64 + msr, err := strconv.ParseUint(v.msrBinaryValue, 2, 64) + if err != nil { + msr = v.msrValue1 + } + + // Mock reading from MSR_TURBO_RATIO_LIMIT + m.On("read", uint32(0x1AD), 0).Return(msr, nil).Once() + + // Mock reading from MSR_TURBO_RATIO_LIMIT1 + m.On("read", uint32(0x1AE), 0).Return(v.msrValue2, nil).Once() + + // Mock reading from MSR_TURBO_RATIO_LIMIT2 + m.On("read", uint32(0x1AF), 0).Return(v.msrValue3, nil).Once() + + // Mock reading from MSR_ATOM_CORE_TURBO_RATIOS + m.On("read", uint32(0x66C), 0).Return(v.msrValue4, nil).Once() + + // Mock reading from MSR_SECONDARY_TURBO_RATIO_LIMIT + m.On("read", uint32(0x650), 0).Return(v.msrValue5, nil).Once() + + list, err := pt.GetMaxTurboFreqList(0) + require.NoError(t, err) + require.Equal(t, v.expected, list) + }) + } +} diff --git a/uncorefreq.go b/uncorefreq.go new file mode 100644 index 0000000..468f945 --- /dev/null +++ b/uncorefreq.go @@ -0,0 +1,143 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +import ( + "errors" + "fmt" + "path/filepath" + "strconv" + "strings" +) + +const ( + // base path comprising the uncore frequency type files. + defaultUncoreFreqBasePath = "/sys/devices/system/cpu/intel_uncore_frequency" + + // pattern string to identify the specific package ID of an uncore frequency path. + packageIDPattern = "package_%s" + + // pattern string to identify the specific die ID of an uncore frequency path. + dieIDPattern = "die_%s" + + // pattern to identify file name of the uncore frequency type path. + freqTypeFilePattern = "%s_freq_khz" +) + +// uncoreFreqType is an enum type to identify specific uncore frequency parameters. +type uncoreFreqType int + +// uncoreFreqType enum defines supported uncore frequency parameters. +const ( + initialMax uncoreFreqType = iota + initialMin + customizedMax + customizedMin + current +) + +// Helper function to return a string representation of uncoreFreqType. +func (f uncoreFreqType) String() string { + switch f { + case initialMax: + return "initial_max" + case initialMin: + return "initial_min" + case customizedMax: + return "max" + case customizedMin: + return "min" + case current: + return "current" + default: + return "" + } +} + +// Helper function takes a string representation of uncoreFreqType and returns its equivalent +// uncoreFreqType. If the string is not a valid frequency type, an error is returned. +func toUncoreFreqType(freqType string) (uncoreFreqType, error) { + switch freqType { + case "initial_max": + return initialMax, nil + case "initial_min": + return initialMin, nil + case "max": + return customizedMax, nil + case "min": + return customizedMin, nil + case "current": + return current, nil + default: + return 0, fmt.Errorf("unsupported uncore frequency type %q", freqType) + } +} + +// getUncoreFreqPath is a helper function that takes a package ID, die ID, a frequency type and returns +// a string with the corresponding file name of the frequency type parameter. +func getUncoreFreqPath(packageID, dieID int, freqType string) (string, error) { + uFreqType, err := toUncoreFreqType(freqType) + if err != nil { + return "", err + } + freqTypeFile := fmt.Sprintf(freqTypeFilePattern, uFreqType) + + // format packageID and dieID as two-digit strings + packageIDPrefix := fmt.Sprintf(packageIDPattern, fmt.Sprintf("%02d", packageID)) + dieIDPrefix := fmt.Sprintf(dieIDPattern, fmt.Sprintf("%02d", dieID)) + prefix := fmt.Sprintf("%s_%s", packageIDPrefix, dieIDPrefix) + + return filepath.Join(prefix, freqTypeFile), nil +} + +// uncoreFreqReader represents a mechanism for reading uncore frequency values exposed via filesystem. +type uncoreFreqReader interface { + init() error + + // getUncoreFrequencyMhz takes a package ID, die ID and a frequency type and returns its value. + getUncoreFrequencyMhz(packageID, dieID int, freqType string) (float64, error) +} + +// uncoreFreqData allows to get uncore frequency values exposed via filesystem. Implements uncoreFreqReader interface. +type uncoreFreqData struct { + uncoreFreqBasePath string +} + +// init checks if uncoreFreqBasePath is a valid path. +// TODO: Consider to remove this method. +func (u *uncoreFreqData) init() error { + if len(u.uncoreFreqBasePath) == 0 { + return errors.New("base path of uncore frequency cannot be empty") + } + if err := checkFile(u.uncoreFreqBasePath); err != nil { + return fmt.Errorf("invalid base path of uncore frequency: %w", err) + } + return nil +} + +// getUncoreFrequencyMhz retrieves the uncore frequency value, in MHz, for the given package ID and die ID +// and the specified frequency type. +func (u *uncoreFreqData) getUncoreFrequencyMhz(packageID, dieID int, freqType string) (float64, error) { + // Create the path to the frequency file based on the input parameters. + freqPath, err := getUncoreFreqPath(packageID, dieID, freqType) + if err != nil { + return 0, fmt.Errorf("failed to get frequency path: %w", err) + } + + // Read the contents of the frequency file. + path := filepath.Join(u.uncoreFreqBasePath, freqPath) + content, err := readFile(path) + if err != nil { + return 0, fmt.Errorf("failed to read frequency file: %w", err) + } + + // Convert the file contents to a float64 value. + freqKhz, err := strconv.ParseFloat(strings.TrimRight(string(content), "\n"), 64) + if err != nil { + return 0, fmt.Errorf("failed to convert frequency file content to float64: %w", err) + } + return freqKhz * fromKiloHertzToMegaHertzRatio, nil +} diff --git a/uncorefreq_test.go b/uncorefreq_test.go new file mode 100644 index 0000000..05f210a --- /dev/null +++ b/uncorefreq_test.go @@ -0,0 +1,270 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +import ( + "errors" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestUncoreFreqTypeToString(t *testing.T) { + t.Run("InitialMax", func(t *testing.T) { + freqType := uncoreFreqType(0) + require.Equal(t, "initial_max", freqType.String()) + }) + t.Run("InitialMin", func(t *testing.T) { + freqType := uncoreFreqType(1) + require.Equal(t, "initial_min", freqType.String()) + }) + t.Run("CustomizedMax", func(t *testing.T) { + freqType := uncoreFreqType(2) + require.Equal(t, "max", freqType.String()) + }) + t.Run("CustomizedMin", func(t *testing.T) { + freqType := uncoreFreqType(3) + require.Equal(t, "min", freqType.String()) + }) + t.Run("Current", func(t *testing.T) { + freqType := uncoreFreqType(4) + require.Equal(t, "current", freqType.String()) + }) + t.Run("Invalid", func(t *testing.T) { + freqType := uncoreFreqType(5) + require.Equal(t, "", freqType.String()) + }) +} + +func TestGetUncoreFrequencyPath(t *testing.T) { + testCases := []struct { + name string + packageID int + dieID int + freqType string + expected string + err error + }{ + { + name: "InvalidFreqType", + packageID: 1, + dieID: 0, + freqType: "invalid", + expected: "", + err: errors.New("unsupported uncore frequency type \"invalid\""), + }, + { + name: "InitialMax", + packageID: 1, + dieID: 0, + freqType: "initial_max", + expected: "package_01_die_00/initial_max_freq_khz", + err: nil, + }, + { + name: "InitialMin", + packageID: 1, + dieID: 0, + freqType: "initial_min", + expected: "package_01_die_00/initial_min_freq_khz", + err: nil, + }, + { + name: "CustomizedMax", + packageID: 1, + dieID: 0, + freqType: "max", + expected: "package_01_die_00/max_freq_khz", + err: nil, + }, + { + name: "CustomizedMin", + packageID: 1, + dieID: 0, + freqType: "min", + expected: "package_01_die_00/min_freq_khz", + err: nil, + }, + { + name: "Current", + packageID: 0, + dieID: 1, + freqType: "current", + expected: "package_00_die_01/current_freq_khz", + err: nil, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + uncoreFreqPath, err := getUncoreFreqPath(tc.packageID, tc.dieID, tc.freqType) + if tc.err != nil { + require.Error(t, err) + require.ErrorContains(t, err, tc.err.Error()) + } else { + require.NoError(t, err) + require.Equal(t, tc.expected, uncoreFreqPath) + } + }) + } +} + +func TestUncoreFreqData_Init(t *testing.T) { + testCases := []struct { + name string + uncoreFreqPath string + err error + }{ + { + name: "UncoreFreqPathEmpty", + uncoreFreqPath: "", + err: errors.New("base path of uncore frequency cannot be empty"), + }, + { + name: "UncoreFreqPathNotExist", + uncoreFreqPath: "/dummy/path", + err: errors.New("invalid base path of uncore frequency"), + }, + { + name: "UncoreFreqPathValid", + uncoreFreqPath: "testdata/intel_uncore_frequency", + err: nil, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + uFreqData := &uncoreFreqData{ + uncoreFreqBasePath: tc.uncoreFreqPath, + } + + err := uFreqData.init() + if tc.err != nil { + require.ErrorContains(t, err, tc.err.Error()) + } else { + require.NoError(t, err) + } + }) + } +} + +func TestGetUncoreFrequencyMhz(t *testing.T) { + testCases := []struct { + name string + packageID int + dieID int + freqType string + expected float64 + err error + }{ + { + name: "InitialMax", + packageID: 10, + dieID: 3, + freqType: "initial_max", + expected: 2000, + err: nil, + }, + { + name: "InitialMin", + packageID: 10, + dieID: 3, + freqType: "initial_min", + expected: 1000, + err: nil, + }, + { + name: "CustomizedMax", + packageID: 10, + dieID: 3, + freqType: "max", + expected: 1900, + err: nil, + }, + { + name: "CustomizedMin", + packageID: 10, + dieID: 3, + freqType: "min", + expected: 1100, + err: nil, + }, + { + name: "Current", + packageID: 10, + dieID: 3, + freqType: "current", + expected: 1500, + err: nil, + }, + { + name: "InvalidInitialMaxValue", + packageID: 9, + dieID: 12, + freqType: "initial_max", + expected: 0, + err: errors.New("failed to convert frequency file content to float64"), + }, + { + name: "InvalidInitialMinValue", + packageID: 9, + dieID: 12, + freqType: "initial_min", + expected: 0, + err: errors.New("failed to convert frequency file content to float64"), + }, + { + name: "InvalidCustomizedMaxValue", + packageID: 9, + dieID: 12, + freqType: "max", + expected: 0, + err: errors.New("failed to convert frequency file content to float64"), + }, + { + name: "InvalidCustomizedMinValue", + packageID: 9, + dieID: 12, + freqType: "min", + expected: 0, + err: errors.New("failed to convert frequency file content to float64"), + }, + { + name: "InvalidFreqType", + packageID: 9, + dieID: 12, + freqType: "invalid", + expected: 0, + err: errors.New("failed to get frequency path: unsupported uncore frequency type \"invalid\""), + }, + { + name: "FreqTypeFileNotExist", + packageID: 9, + dieID: 12, + freqType: "current", + expected: 0, + err: errors.New("failed to read frequency file: file \"testdata/intel_uncore_frequency/package_09_die_12/current_freq_khz\" does not exist"), + }, + } + + u := &uncoreFreqData{ + uncoreFreqBasePath: "testdata/intel_uncore_frequency", + } + require.NoError(t, u.init()) + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + uncoreFreq, err := u.getUncoreFrequencyMhz(tc.packageID, tc.dieID, tc.freqType) + if tc.err != nil { + require.Error(t, err) + require.ErrorContains(t, err, tc.err.Error()) + } else { + require.NoError(t, err) + require.Equal(t, tc.expected, uncoreFreq) + } + }) + } +} diff --git a/unit_converter.go b/unit_converter.go new file mode 100644 index 0000000..8afa763 --- /dev/null +++ b/unit_converter.go @@ -0,0 +1,14 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux && amd64 + +package powertelemetry + +const ( + fromKiloHertzToMegaHertzRatio = 1e-3 + fromMicrojoulesToJoulesRatio = 1e-6 + fromMicrowattsToWatts = 1e-6 + fromProcessorCyclesToHertz = 1e-6 + fromNanosecondsToSecondsRatio = 1e-9 +)