From 2a0c5e7fa5289ef6bfb5a030b7ea3aadaa7c02f8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 4 Jan 2024 19:30:39 +0000 Subject: [PATCH 01/16] Bump microsoft/setup-msbuild from 1.1.3 to 1.3.1 Bumps [microsoft/setup-msbuild](https://github.com/microsoft/setup-msbuild) from 1.1.3 to 1.3.1. - [Release notes](https://github.com/microsoft/setup-msbuild/releases) - [Changelog](https://github.com/microsoft/setup-msbuild/blob/main/building-release.md) - [Commits](https://github.com/microsoft/setup-msbuild/compare/34cfbaee7f672c76950673338facd8a73f637506...1ff57057b5cfdc39105cd07a01d78e9b0ea0c14c) --- updated-dependencies: - dependency-name: microsoft/setup-msbuild dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/ci-cov-windows-report.yml | 2 +- .github/workflows/ci-cov-windows.yml | 2 +- .github/workflows/ci-windows.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-cov-windows-report.yml b/.github/workflows/ci-cov-windows-report.yml index 5f1b6a9b..11338e72 100644 --- a/.github/workflows/ci-cov-windows-report.yml +++ b/.github/workflows/ci-cov-windows-report.yml @@ -22,7 +22,7 @@ jobs: submodules: recursive - name: Add MSBuild to PATH - uses: microsoft/setup-msbuild@34cfbaee7f672c76950673338facd8a73f637506 # v1.1.3 + uses: microsoft/setup-msbuild@1ff57057b5cfdc39105cd07a01d78e9b0ea0c14c # v1.3.1 - name: coverity-windows run: | diff --git a/.github/workflows/ci-cov-windows.yml b/.github/workflows/ci-cov-windows.yml index 9e0da344..2eacc286 100644 --- a/.github/workflows/ci-cov-windows.yml +++ b/.github/workflows/ci-cov-windows.yml @@ -26,7 +26,7 @@ jobs: submodules: recursive - name: Add MSBuild to PATH - uses: microsoft/setup-msbuild@34cfbaee7f672c76950673338facd8a73f637506 # v1.1.3 + uses: microsoft/setup-msbuild@1ff57057b5cfdc39105cd07a01d78e9b0ea0c14c # v1.3.1 - name: coverity-windows run: | diff --git a/.github/workflows/ci-windows.yml b/.github/workflows/ci-windows.yml index 5576953d..8d6644d7 100644 --- a/.github/workflows/ci-windows.yml +++ b/.github/workflows/ci-windows.yml @@ -35,7 +35,7 @@ jobs: run: | cmake --build ${{github.workspace}}\build --config ${{env.BUILD_TYPE}} --parallel - name: Add MSBuild to PATH - uses: microsoft/setup-msbuild@34cfbaee7f672c76950673338facd8a73f637506 # v1.1.3 + uses: microsoft/setup-msbuild@1ff57057b5cfdc39105cd07a01d78e9b0ea0c14c # v1.3.1 - name: Build MSR driver run: | chdir ${{github.workspace}}\src\WinMSRDriver From b6c960039c42f94882392d61f1ffc86fb4b1e89c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 4 Jan 2024 19:30:43 +0000 Subject: [PATCH 02/16] Bump docker/setup-buildx-action from 1.7.0 to 3.0.0 Bumps [docker/setup-buildx-action](https://github.com/docker/setup-buildx-action) from 1.7.0 to 3.0.0. - [Release notes](https://github.com/docker/setup-buildx-action/releases) - [Commits](https://github.com/docker/setup-buildx-action/compare/f211e3e9ded2d9377c8cadc4489a4e38014bc4c9...f95db51fddba0c2d1ec667646a06c2ce06100226) --- updated-dependencies: - dependency-name: docker/setup-buildx-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/docker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index c7e77546..f5c4a211 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -34,7 +34,7 @@ jobs: uses: docker/setup-qemu-action@68827325e0b33c7199eb31dd4e31fbe9023e06e3 # v3.0.0 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@f211e3e9ded2d9377c8cadc4489a4e38014bc4c9 # v1.7.0 + uses: docker/setup-buildx-action@f95db51fddba0c2d1ec667646a06c2ce06100226 # v3.0.0 - name: Cache Docker layers uses: actions/cache@704facf57e6136b1bc63b828d79edcd491f0ee84 # v3.3.2 From aa565e6f6bab99ae542020e08b4003869b2b2c78 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 4 Jan 2024 19:30:50 +0000 Subject: [PATCH 03/16] Bump cross-platform-actions/action from 0.21.0 to 0.22.0 Bumps [cross-platform-actions/action](https://github.com/cross-platform-actions/action) from 0.21.0 to 0.22.0. - [Release notes](https://github.com/cross-platform-actions/action/releases) - [Changelog](https://github.com/cross-platform-actions/action/blob/master/changelog.md) - [Commits](https://github.com/cross-platform-actions/action/compare/0e13474a2ba7ba382e217ad80230150266611323...5800fa0060a22edf69992a779adac3d2bb3a6f8a) --- updated-dependencies: - dependency-name: cross-platform-actions/action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/freebsd_build.yml | 2 +- .github/workflows/freebsd_scan_build.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/freebsd_build.yml b/.github/workflows/freebsd_build.yml index b8ca676e..3b256ba2 100644 --- a/.github/workflows/freebsd_build.yml +++ b/.github/workflows/freebsd_build.yml @@ -25,7 +25,7 @@ jobs: submodules: recursive - name: build in FreeBSD VM id: build - uses: cross-platform-actions/action@0e13474a2ba7ba382e217ad80230150266611323 # v0.21.0 + uses: cross-platform-actions/action@5800fa0060a22edf69992a779adac3d2bb3a6f8a # v0.22.0 with: memory: 2048 shell: sh diff --git a/.github/workflows/freebsd_scan_build.yml b/.github/workflows/freebsd_scan_build.yml index a82bc34e..893e90a9 100644 --- a/.github/workflows/freebsd_scan_build.yml +++ b/.github/workflows/freebsd_scan_build.yml @@ -25,7 +25,7 @@ jobs: submodules: recursive - name: clang scan build in FreeBSD VM id: clang-scan-build - uses: cross-platform-actions/action@0e13474a2ba7ba382e217ad80230150266611323 # v0.21.0 + uses: cross-platform-actions/action@5800fa0060a22edf69992a779adac3d2bb3a6f8a # v0.22.0 with: memory: 2048 shell: sh From ad5d7c97b8fc01349dbaae9de2798bb7de2f4f55 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 4 Jan 2024 19:30:46 +0000 Subject: [PATCH 04/16] Bump actions/dependency-review-action from 3.1.4 to 3.1.5 Bumps [actions/dependency-review-action](https://github.com/actions/dependency-review-action) from 3.1.4 to 3.1.5. - [Release notes](https://github.com/actions/dependency-review-action/releases) - [Commits](https://github.com/actions/dependency-review-action/compare/01bc87099ba56df1e897b6874784491ea6309bc4...c74b580d73376b7750d3d2a50bfb8adc2c937507) --- updated-dependencies: - dependency-name: actions/dependency-review-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/dependency-review.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml index 49fda441..f4a8bb47 100644 --- a/.github/workflows/dependency-review.yml +++ b/.github/workflows/dependency-review.yml @@ -26,4 +26,4 @@ jobs: - name: 'Checkout Repository' uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - name: 'Dependency Review' - uses: actions/dependency-review-action@01bc87099ba56df1e897b6874784491ea6309bc4 # v3.1.4 + uses: actions/dependency-review-action@c74b580d73376b7750d3d2a50bfb8adc2c937507 # v3.1.5 From fb3c0b134ac20582a430e2e3e376c88dbd2bb6b4 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 10 Jan 2024 13:34:22 +0100 Subject: [PATCH 05/16] make PCM::getMaxNumOfCBoxes more robust Change-Id: I88571ef5803963285ebef98b67344452a81f5b8a --- src/cpucounters.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 0e7e36fa..095fc71b 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -9169,6 +9169,10 @@ uint32 PCM::getMaxNumOfCBoxes() const num += (uint32)weight32(value); delete h; } + else + { + num = 0; + } } catch (std::exception& e) { @@ -9201,11 +9205,11 @@ uint32 PCM::getMaxNumOfCBoxes() const num = (uint32)num_phys_cores_per_socket; } #ifdef PCM_USE_PERF - if (num == 0) + if (num <= 0) { num = (uint32)enumeratePerfPMUs("cbox", 100).size(); } - if (num == 0) + if (num <= 0) { num = (uint32)enumeratePerfPMUs("cha", 100).size(); } From 78e7235412413f502d888f06091482d208dfb9ce Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Mon, 15 Jan 2024 08:54:45 +0100 Subject: [PATCH 06/16] pcm-numa add -c option, pcm-msr add a few options (#522) * pcm-numa add -c option, pcm-msr add a few options --------- Authored-by: Galbi, Duane E --- src/pcm-msr.cpp | 164 +++++++++++++++++++++++++++++++++-------------- src/pcm-numa.cpp | 16 ++++- src/utils.cpp | 37 +++++++++++ src/utils.h | 2 + 4 files changed, 170 insertions(+), 49 deletions(-) diff --git a/src/pcm-msr.cpp b/src/pcm-msr.cpp index 86b3dccd..ca6f5263 100644 --- a/src/pcm-msr.cpp +++ b/src/pcm-msr.cpp @@ -16,6 +16,9 @@ #ifdef _MSC_VER #include "freegetopt/getopt.h" #endif +#include +#include +#include using namespace pcm; @@ -24,16 +27,25 @@ void print_usage(const char * progname) std::cout << "Usage " << progname << " [-w value] [-c core] [-a] [-d] msr\n\n"; std::cout << " Reads/writes specified msr (model specific register) \n"; std::cout << " -w value : write the value before reading \n"; - std::cout << " -c core : perform msr read/write on specified core (default is 0)\n"; + std::cout << " -c corelist : perform msr read/write on specified cores (default is 0)\n"; + std::cout << " (examples: -c 10 -c 10-11 -c 4,6,12-20,6)\n"; + std::cout << " -x : print core number in hex (instead of decimal)\n"; std::cout << " -b low:high : read or write only low..high bits of the register\n"; std::cout << " -d : output all numbers in dec (default is hex)\n"; - std::cout << " -a : perform msr read/write operations on all cores\n"; + std::cout << " -a : perform msr read/write operations on all cores (same as -c -1)\n"; + std::cout << " -s : iterate with seconds between each iteration\n"; + std::cout << " -o : write results of each iteration to file \n"; std::cout << " --version : print application version\n"; std::cout << "\n"; } PCM_MAIN_NOTHROW; +bool outflag = false; +FILE *ofile; +int loop_cnt = 0; +std::list corelist; + int mainThrows(int argc, char * argv[]) { if(print_version(argc, argv)) @@ -45,13 +57,15 @@ int mainThrows(int argc, char * argv[]) uint64 value = 0; bool write = false; - int core = 0; + bool core_in_dec = true; int msr = -1; bool dec = false; std::pair bits{-1, -1}; + float sleep_delay = -1; + std::string outfile; int my_opt = -1; - while ((my_opt = getopt(argc, argv, "w:c:dab:")) != -1) + while ((my_opt = getopt(argc, argv, "xw:c:dab:s:o:")) != -1) { switch (my_opt) { @@ -59,14 +73,24 @@ int mainThrows(int argc, char * argv[]) write = true; value = read_number(optarg); break; + case 'x': + core_in_dec = false; + break; + case 's': + sleep_delay = atof(optarg); + break; + case 'o': + outfile = optarg; + break; case 'c': - core = (int)read_number(optarg); + corelist = extract_integer_list(optarg); break; case 'd': dec = true; break; case 'a': - core = -1; + corelist.clear(); + corelist.push_back(-1); break; case 'b': bits = parseBitsParameter(optarg); @@ -76,6 +100,12 @@ int mainThrows(int argc, char * argv[]) return -1; } } + if (corelist.size()==0) corelist.push_back(0); + if (1==2){ + for (auto const &v : corelist){ + printf("coreid=%d\n",v); + } + } if (optind >= argc) { @@ -99,56 +129,94 @@ int mainThrows(int argc, char * argv[]) return -1; } #endif - auto doOne = [&dec, &write, &msr, &bits](int core, uint64 value) - { - try { - MsrHandle h(core); - if (!dec) std::cout << std::hex << std::showbase; - if (!readOldValueHelper(bits, value, write, [&h, &msr](uint64 & old_value){ return h.read(msr, &old_value) == 8; })) - { - std::cout << " Read error!\n"; - return; - } - if (write) + if (outfile.length() > 0){ + outflag = true; + ofile = fopen(outfile.c_str(),"w"); + if (ofile==NULL){ + printf("ERROR: can not open '%s' (skipping write)\n",outfile.c_str()); + printf(" (maybe a sudo issue .. need o+rwx on directory)\n"); + outflag = false; + } + } + while(1){ + for (std::list::iterator it=corelist.begin(); it != corelist.end(); ++it){ + int core = *it; + + // lambda funtion [) + auto doOne = [&dec, &write, &msr, &bits, &it, &core_in_dec](int core, uint64 value) { - std::cout << " Writing " << value << " to MSR " << msr << " on core " << core << "\n"; - if (h.write(msr, value) != 8) + try { + MsrHandle h(core); + if (!dec) std::cout << std::hex << std::showbase; + if (!readOldValueHelper(bits, value, write, [&h, &msr](uint64 & old_value){ return h.read(msr, &old_value) == 8; })) + { + std::cout << " Read error!\n"; + return; + } + if (write) + { + std::cout << " Writing " << value << " to MSR " << msr << " on core " << core << "\n"; + if (h.write(msr, value) != 8) + { + std::cout << " Write error!\n"; + } + } + value = 0; + if (h.read(msr, &value) == 8) + { + uint64 value2 = value; + extractBitsPrintHelper(bits, value, dec); + char cname[100]; + if (core_in_dec) sprintf(cname,"%d",core); + else sprintf(cname,"0x%x",core); + std::cout << " from MSR " << msr << " on core " << cname << "\n"; + auto itx = it; + itx++; + if (itx == corelist.end()) std::cout << "\n"; + if (outflag){ + if (bits.first >= 0){ + uint32 value3 = extract_bits(value2,bits.first,bits.second); + if (dec)fprintf(ofile,"%d,%u\n",loop_cnt,value3); + else fprintf(ofile,"%d,0x%x\n",loop_cnt,value3); + }else{ + if (dec)fprintf(ofile,"%d,%llu\n",loop_cnt,value2); + else fprintf(ofile,"%d,0x%llx\n",loop_cnt,value2); + } + fflush(ofile); + } + } + else + { + std::cout << " Read error!\n"; + } + } + catch (std::exception & e) { - std::cout << " Write error!\n"; + std::cerr << "Error accessing MSRs: " << e.what() << "\n"; + std::cerr << "Please check if the program can access MSR drivers.\n"; } - } - value = 0; - if (h.read(msr, &value) == 8) + }; // end of lambda definition + + if (core >= 0) { - extractBitsPrintHelper(bits, value, dec); - std::cout << " from MSR " << msr << " on core " << core << "\n\n"; + doOne(core, value); } else { - std::cout << " Read error!\n"; - } - } - catch (std::exception & e) - { - std::cerr << "Error accessing MSRs: " << e.what() << "\n"; - std::cerr << "Please check if the program can access MSR drivers.\n"; - } - }; - if (core >= 0) - { - doOne(core, value); - } - else - { - set_signal_handlers(); - auto m = PCM::getInstance(); - for (uint32 i = 0; i < m->getNumCores(); ++i) - { - if (m->isCoreOnline(i)) - { - doOne(i, value); + set_signal_handlers(); + auto m = PCM::getInstance(); + for (uint32 i = 0; i < m->getNumCores(); ++i) + { + if (m->isCoreOnline(i)) + { + doOne(i, value); + } + } } - } + } + if (sleep_delay == -1) break; + loop_cnt++; + MySleepMs(sleep_delay*1000.0); } return 0; } diff --git a/src/pcm-numa.cpp b/src/pcm-numa.cpp index e900a2c2..a6af76d0 100644 --- a/src/pcm-numa.cpp +++ b/src/pcm-numa.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include "cpucounters.h" #include "utils.h" #ifdef _MSC_VER @@ -45,6 +46,8 @@ void print_usage(const string & progname) cout << " Supported are: \n"; cout << " -h | --help | /h => print this help and exit\n"; cout << " -silent => silence information output and print only measurements\n"; + cout << " -c=corelist => check specified cores (default all cores)\n"; + cout << " (examples: -c=10 -c=10-11 -c=4,6,12-20,6)\n"; cout << " --version => print application version\n"; cout << " -pid PID | /pid PID => collect core metrics only for specified process ID\n"; cout << " -csv[=file.csv] | /csv[=file.csv] => output compact CSV format to screen or\n" @@ -120,6 +123,8 @@ int mainThrows(int argc, char * argv[]) parsePID(argc, argv, pid); + std::list corelist; + if (argc > 1) do { argv++; @@ -130,6 +135,11 @@ int mainThrows(int argc, char * argv[]) { continue; } + else if (extract_argument_value(*argv, {"-c"}, arg_value)) + { + const char *pstr = arg_value.c_str(); + corelist = extract_integer_list(pstr); + } else if (check_argument_equals(*argv, {"--help", "-h", "/h"})) { print_usage(program); @@ -213,6 +223,9 @@ int mainThrows(int argc, char * argv[]) uint64 BeforeTime = 0, AfterTime = 0; SystemCounterState SysBeforeState, SysAfterState; const uint32 ncores = m->getNumCores(); + if (corelist.size()==0){ + for (int ii = 0; ii < (int)ncores; ++ii) corelist.push_back(ii); + } vector BeforeState, AfterState; vector DummySocketStates; @@ -262,8 +275,9 @@ int mainThrows(int argc, char * argv[]) else cout << "Core | IPC | Instructions | Cycles | Local DRAM accesses | Remote DRAM Accesses \n"; - for (uint32 i = 0; i < ncores; ++i) + for (int ix : corelist) { + uint32 i = ix; if (csv) cout << i << ","; else diff --git a/src/utils.cpp b/src/utils.cpp index 2f68d53b..c8de7e87 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -750,6 +750,43 @@ double parse_delay(const char *arg, const std::string& progname, print_usage_fun } } +std::list extract_integer_list(const char *optarg){ + const char *pstr = optarg; + std::list corelist; + std::string snum1, snum2; + std::string *pnow = &snum1; + char nchar = ','; + while(*pstr != '\0' || nchar != ','){ + nchar = ','; + if (*pstr != '\0'){ + nchar = *pstr; + pstr++; + } + //printf("c=%c\n",nchar); + if (nchar=='-' && pnow == &snum1 && snum1.size()>0){ + pnow = &snum2; + }else if (nchar == ','){ + if (!snum1.empty() && !snum2.empty()){ + int num1 = atoi(snum1.c_str()), num2 =atoi(snum2.c_str()); + if (num2 < num1) std::swap(num1,num2); + if (num1 < 0) num1 = 0; + for (int ix=num1; ix <= num2; ix++){ + corelist.push_back(ix); + } + }else if (!snum1.empty()){ + int num1 = atoi(snum1.c_str()); + corelist.push_back(num1); + } + snum1.clear(); + snum2.clear(); + pnow = &snum1; + }else if (nchar != ' '){ + pnow->push_back(nchar); + } + } + return(corelist); +} + bool extract_argument_value(const char* arg, std::initializer_list arg_names, std::string& value) { const auto arg_len = strlen(arg); diff --git a/src/utils.h b/src/utils.h index 06ebd823..09fea74c 100644 --- a/src/utils.h +++ b/src/utils.h @@ -15,6 +15,7 @@ #include #include "types.h" #include +#include #include #include #include @@ -73,6 +74,7 @@ namespace pcm { #endif // _MSC_VER typedef void (* print_usage_func)(const std::string & progname); +std::list extract_integer_list(const char *optarg); double parse_delay(const char * arg, const std::string & progname, print_usage_func print_usage_func); bool extract_argument_value(const char * arg, std::initializer_list arg_names, std::string & value); bool check_argument_equals(const char * arg, std::initializer_list arg_names); From f6acbda32375cffa7cc2e3ed69c86cd7f66b8ba8 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 17 Jan 2024 15:03:16 +0100 Subject: [PATCH 07/16] refactor cbo uncore pmu code Change-Id: I176db386886c53a2df758db34191c8cb3b461d59 --- src/cpucounters.cpp | 286 +++++++++++++++++--------------------------- src/cpucounters.h | 209 +++++++++++++++++++++++++++++--- src/pcm-memory.cpp | 4 +- src/pcm-raw.cpp | 10 +- src/utils.h | 101 ++++++++++++++++ 5 files changed, 408 insertions(+), 202 deletions(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 095fc71b..f631e7f5 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -125,106 +125,6 @@ bool PCM::initWinRing0Lib() #define cpu_set_t cpuset_t #endif -class TemporalThreadAffinity // speedup trick for Linux, FreeBSD, DragonFlyBSD, Windows -{ - TemporalThreadAffinity(); // forbidden -#if defined(__FreeBSD__) || (defined(__DragonFly__) && __DragonFly_version >= 400707) - cpu_set_t old_affinity; - bool restore; - -public: - TemporalThreadAffinity(uint32 core_id, bool checkStatus = true, const bool restore_ = true) - : restore(restore_) - { - assert(core_id < 1024); - auto res = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &old_affinity); - if (res != 0) - { - std::cerr << "ERROR: pthread_getaffinity_np for core " << core_id << " failed with code " << res << "\n"; - throw std::exception(); - } - cpu_set_t new_affinity; - CPU_ZERO(&new_affinity); - CPU_SET(core_id, &new_affinity); - // CPU_CMP() returns true if old_affinity is NOT equal to new_affinity - if (!(CPU_CMP(&old_affinity, &new_affinity))) - { - restore = false; - return; // the same affinity => return - } - res = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &new_affinity); - if (res != 0 && checkStatus) - { - std::cerr << "ERROR: pthread_setaffinity_np for core " << core_id << " failed with code " << res << "\n"; - throw std::exception(); - } - } - ~TemporalThreadAffinity() - { - if (restore) pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &old_affinity); - } - bool supported() const { return true; } - -#elif defined(__linux__) - cpu_set_t * old_affinity; - static constexpr auto maxCPUs = 8192; - const size_t set_size; - bool restore; - -public: - TemporalThreadAffinity(const uint32 core_id, bool checkStatus = true, const bool restore_ = true) - : set_size(CPU_ALLOC_SIZE(maxCPUs)), restore(restore_) - { - assert(core_id < maxCPUs); - old_affinity = CPU_ALLOC(maxCPUs); - assert(old_affinity); - auto res = pthread_getaffinity_np(pthread_self(), set_size, old_affinity); - if (res != 0) - { - std::cerr << "ERROR: pthread_getaffinity_np for core " << core_id << " failed with code " << res << "\n"; - throw std::exception(); - } - cpu_set_t * new_affinity = CPU_ALLOC(maxCPUs); - assert(new_affinity); - CPU_ZERO_S(set_size, new_affinity); - CPU_SET_S(core_id, set_size, new_affinity); - if (CPU_EQUAL_S(set_size, old_affinity, new_affinity)) - { - CPU_FREE(new_affinity); - restore = false; - return; - } - res = pthread_setaffinity_np(pthread_self(), set_size, new_affinity); - CPU_FREE(new_affinity); - if (res != 0 && checkStatus) - { - std::cerr << "ERROR: pthread_setaffinity_np for core " << core_id << " failed with code " << res << "\n"; - throw std::exception(); - } - } - ~TemporalThreadAffinity() - { - if (restore) pthread_setaffinity_np(pthread_self(), set_size, old_affinity); - CPU_FREE(old_affinity); - } - bool supported() const { return true; } -#elif defined(_MSC_VER) - ThreadGroupTempAffinity affinity; -public: - TemporalThreadAffinity(uint32 core, bool checkStatus = true, const bool restore = true) - : affinity(core, checkStatus, restore) - { - } - bool supported() const { return true; } -#else // not implemented for os x -public: - TemporalThreadAffinity(uint32) { } - TemporalThreadAffinity(uint32, bool) {} - bool supported() const { return false; } -#endif -}; - - PCM * PCM::instance = NULL; /* @@ -1602,7 +1502,7 @@ void PCM::printSystemTopology() const if (hybrid == false) { - std::cerr << "Last level cache slices per socket: " << getMaxNumOfCBoxes() << "\n"; + std::cerr << "Last level cache slices per socket: " << getMaxNumOfUncorePMUs(CBO_PMU_ID) << "\n"; } std::cerr << "Core PMU (perfmon) version: " << perfmon_version << "\n"; std::cerr << "Number of core PMU generic (programmable) counters: " << core_gen_counter_num_max << "\n"; @@ -2026,6 +1926,13 @@ void PCM::initUncoreObjects() initUncorePMUsDirect(); } + auto countPMUs = [&](const uint32 s, const int pmu_id) + { + size_t count = 0; + forAllUncorePMUs(s, pmu_id, [&count](UncorePMU&) { ++count; }); + return count; + }; + std::cerr << "Info: " << uboxPMUs.size() << " UBOX units detected.\n"; for (uint32 s = 0; s < (uint32)num_sockets; ++s) { @@ -2033,7 +1940,7 @@ void PCM::initUncoreObjects() " " << ((s < pcuPMUs.size()) ? pcuPMUs[s].size() : 0) << " PCU units detected." " " << ((s < iioPMUs.size()) ? iioPMUs[s].size() : 0) << " IIO units detected." " " << ((s < irpPMUs.size()) ? irpPMUs[s].size() : 0) << " IRP units detected." - " " << ((s < cboPMUs.size()) ? cboPMUs[s].size() : 0) << " CHA/CBO units detected." + " " << countPMUs(s, CBO_PMU_ID) << " CHA/CBO units detected." " " << ((s < mdfPMUs.size()) ? mdfPMUs[s].size() : 0) << " MDF units detected." " " << ((s < cxlPMUs.size()) ? cxlPMUs[s].size() : 0) << " CXL units detected." "\n"; @@ -2080,6 +1987,7 @@ void PCM::globalFreezeUncoreCountersInternal(const unsigned long long int freeze void PCM::initUncorePMUsDirect() { + uncorePMUs.resize(num_sockets); pcuPMUs.resize(num_sockets); for (uint32 s = 0; s < (uint32)num_sockets; ++s) { @@ -2528,17 +2436,16 @@ void PCM::initUncorePMUsDirect() if (hasPCICFGUncore() && MSR.size()) { - cboPMUs.resize(num_sockets); for (uint32 s = 0; s < (uint32)num_sockets; ++s) { + uncorePMUs[s].resize(1); auto & handle = MSR[socketRefCore[s]]; - for (uint32 cbo = 0; cbo < getMaxNumOfCBoxes(); ++cbo) + for (uint32 cbo = 0; cbo < getMaxNumOfCBoxesInternal(); ++cbo) { assert(CX_MSR_PMON_BOX_CTL(cbo)); const auto filter1MSR = CX_MSR_PMON_BOX_FILTER1(cbo); std::shared_ptr filter1MSRHandle = filter1MSR ? std::make_shared(handle, filter1MSR) : std::shared_ptr(); - cboPMUs[s].push_back( - UncorePMU( + uncorePMUs[s][0][CBO_PMU_ID].push_back(std::make_shared( std::make_shared(handle, CX_MSR_PMON_BOX_CTL(cbo)), std::make_shared(handle, CX_MSR_PMON_CTLY(cbo, 0)), std::make_shared(handle, CX_MSR_PMON_CTLY(cbo, 1)), @@ -2600,6 +2507,7 @@ void PCM::initUncorePMUsDirect() #ifdef PCM_USE_PERF std::vector enumeratePerfPMUs(const std::string & type, int max_id); void populatePerfPMUs(unsigned socket_, const std::vector & ids, std::vector & pmus, bool fixed, bool filter0 = false, bool filter1 = false); +void populatePerfPMUs(unsigned socket_, const std::vector& ids, std::vector& pmus, bool fixed, bool filter0 = false, bool filter1 = false); std::vector > enumerateIDXPerfPMUs(const std::string & type, int max_id); void populateIDXPerfPMUs(unsigned socket_, const std::vector > & ids, std::vector & pmus); @@ -2608,17 +2516,18 @@ void populateIDXPerfPMUs(unsigned socket_, const std::vector > & out) { @@ -4726,13 +4635,9 @@ void PCM::cleanupUncorePMUs(const bool silent) pmu.second.cleanup(); } } - for (auto & sCBOPMUs : cboPMUs) - { - for (auto & pmu : sCBOPMUs) - { - pmu.cleanup(); - } - } + + forAllUncorePMUs([](UncorePMU & p) { p.cleanup(); }); + for (auto& sMDFPMUs : mdfPMUs) { for (auto& pmu : sMDFPMUs) @@ -5856,13 +5761,7 @@ void PCM::freezeServerUncoreCounters() } } - if (size_t(i) < cboPMUs.size()) - { - for (auto& pmu : cboPMUs[i]) - { - pmu.freeze(UNC_PMON_UNIT_CTL_FRZ_EN); - } - } + forAllUncorePMUs(i, CBO_PMU_ID, [](UncorePMU& pmu) { pmu.freeze(UNC_PMON_UNIT_CTL_FRZ_EN); }); if (size_t(i) < mdfPMUs.size()) { @@ -5911,13 +5810,7 @@ void PCM::unfreezeServerUncoreCounters() } } - if (size_t(i) < cboPMUs.size()) - { - for (auto& pmu : cboPMUs[i]) - { - pmu.unfreeze(UNC_PMON_UNIT_CTL_FRZ_EN); - } - } + forAllUncorePMUs(i, CBO_PMU_ID, [](UncorePMU& pmu) { pmu.unfreeze(UNC_PMON_UNIT_CTL_FRZ_EN); }); if (size_t(i) < mdfPMUs.size()) { @@ -6039,8 +5932,8 @@ void PCM::readAndAggregateUncoreMCCounters(const uint32 socket, CounterStateType { if (LLCReadMissLatencyMetricsAvailable()) { - result.TOROccupancyIAMiss += getCBOCounterState(socket, EventPosition::TOR_OCCUPANCY); - result.TORInsertsIAMiss += getCBOCounterState(socket, EventPosition::TOR_INSERTS); + result.TOROccupancyIAMiss += getUncoreCounterState(CBO_PMU_ID, socket, EventPosition::TOR_OCCUPANCY); + result.TORInsertsIAMiss += getUncoreCounterState(CBO_PMU_ID, socket, EventPosition::TOR_INSERTS); } if (LLCReadMissLatencyMetricsAvailable() || uncoreFrequencyMetricAvailable()) @@ -6074,8 +5967,8 @@ void PCM::readAndAggregateUncoreMCCounters(const uint32 socket, CounterStateType { if (hasCHA()) { - result.UncHARequests += getCBOCounterState(socket, EventPosition::REQUESTS_ALL); - result.UncHALocalRequests += getCBOCounterState(socket, EventPosition::REQUESTS_LOCAL); + result.UncHARequests += getUncoreCounterState(CBO_PMU_ID, socket, EventPosition::REQUESTS_ALL); + result.UncHALocalRequests += getUncoreCounterState(CBO_PMU_ID, socket, EventPosition::REQUESTS_LOCAL); } else { @@ -6687,13 +6580,9 @@ ServerUncoreCounterState PCM::getServerUncoreCounterState(uint32 socket) { uint32 refCore = socketRefCore[socket]; TemporalThreadAffinity tempThreadAffinity(refCore); - for (uint32 cbo = 0; socket < cboPMUs.size() && cbo < cboPMUs[socket].size() && cbo < ServerUncoreCounterState::maxCBOs; ++cbo) - { - for (int i = 0; i < ServerUncoreCounterState::maxCounters && size_t(i) < cboPMUs[socket][cbo].size(); ++i) - { - result.CBOCounter[cbo][i] = *(cboPMUs[socket][cbo].counterValue[i]); - } - } + + readUncoreCounterValues(result, socket, CBO_PMU_ID); + for (uint32 mdf = 0; socket < mdfPMUs.size() && mdf < mdfPMUs[socket].size() && mdf < ServerUncoreCounterState::maxMDFs; ++mdf) { for (int i = 0; i < ServerUncoreCounterState::maxCounters && size_t(i) < mdfPMUs[socket][mdf].size(); ++i) @@ -7987,6 +7876,44 @@ void populatePerfPMUs(unsigned socket_, const std::vector & ids, std::vecto } } +void populatePerfPMUs(unsigned socket_, const std::vector& ids, std::vector& pmus, bool fixed, bool filter0, bool filter1) +{ + for (const auto& id : ids) + { + std::array, 4> controlRegs = { + std::make_shared(socket_, id), + std::make_shared(socket_, id), + std::make_shared(socket_, id), + std::make_shared(socket_, id) + }; + std::shared_ptr counterReg0 = std::make_shared(controlRegs[0]); + std::shared_ptr counterReg1 = std::make_shared(controlRegs[1]); + std::shared_ptr counterReg2 = std::make_shared(controlRegs[2]); + std::shared_ptr counterReg3 = std::make_shared(controlRegs[3]); + std::shared_ptr fixedControlReg = std::make_shared(socket_, id, true); + std::shared_ptr fixedCounterReg = std::make_shared(fixedControlReg); + std::shared_ptr filterReg0 = std::make_shared(controlRegs, 0); + std::shared_ptr filterReg1 = std::make_shared(controlRegs, 1); + pmus.push_back( + std::make_shared( + std::make_shared(), + controlRegs[0], + controlRegs[1], + controlRegs[2], + controlRegs[3], + counterReg0, + counterReg1, + counterReg2, + counterReg3, + fixed ? fixedControlReg : std::shared_ptr(), + fixed ? fixedCounterReg : std::shared_ptr(), + filter0 ? filterReg0 : std::shared_ptr(), + filter1 ? filterReg1 : std::shared_ptr() + ) + ); + } +} + std::vector > enumerateIDXPerfPMUs(const std::string & type, int max_id) { uint32 numaNode=0xff; @@ -9146,7 +9073,7 @@ inline uint32 weight32(uint32 n) return count; } -uint32 PCM::getMaxNumOfCBoxes() const +uint32 PCM::getMaxNumOfCBoxesInternal() const { static int num = -1; if (num >= 0) @@ -9397,60 +9324,50 @@ void PCM::programPCIeEventGroup(eventGroup_t &eventGroup) void PCM::programCbo(const uint64 * events, const uint32 opCode, const uint32 nc_, const uint32 llc_lookup_tid_filter, const uint32 loc, const uint32 rem) { - for (size_t i = 0; (i < cboPMUs.size()) && MSR.size(); ++i) - { - uint32 refCore = socketRefCore[i]; - TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux - - for(uint32 cbo = 0; cbo < getMaxNumOfCBoxes() && cbo < cboPMUs[i].size(); ++cbo) + programUncorePMUs(CBO_PMU_ID, [&](UncorePMU & pmu) { - cboPMUs[i][cbo].initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN); + pmu.initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN); if (ICX != cpu_model && SNOWRIDGE != cpu_model && SPR != cpu_model) - programCboOpcodeFilter(opCode, cboPMUs[i][cbo], nc_, 0, loc, rem); + programCboOpcodeFilter(opCode, pmu, nc_, 0, loc, rem); - if((HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model) && llc_lookup_tid_filter != 0) - *cboPMUs[i][cbo].filter[0] = llc_lookup_tid_filter; + if ((HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model) && llc_lookup_tid_filter != 0) + *pmu.filter[0] = llc_lookup_tid_filter; - PCM::program(cboPMUs[i][cbo], events, events + ServerUncoreCounterState::maxCounters, UNC_PMON_UNIT_CTL_FRZ_EN); + PCM::program(pmu, events, events + ServerUncoreCounterState::maxCounters, UNC_PMON_UNIT_CTL_FRZ_EN); - for (int c = 0; c < ServerUncoreCounterState::maxCounters && size_t(c) < cboPMUs[i][cbo].size(); ++c) + for (int c = 0; c < ServerUncoreCounterState::maxCounters && size_t(c) < pmu.size(); ++c) { - *cboPMUs[i][cbo].counterValue[c] = 0; + *pmu.counterValue[c] = 0; } } - } + ); } void PCM::programCboRaw(const uint64* events, const uint64 filter0, const uint64 filter1) { - for (size_t i = 0; (i < cboPMUs.size()) && MSR.size(); ++i) - { - uint32 refCore = socketRefCore[i]; - TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux - - for (uint32 cbo = 0; cbo < getMaxNumOfCBoxes(); ++cbo) + programUncorePMUs(CBO_PMU_ID, [&](UncorePMU& pmu) { - cboPMUs[i][cbo].initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN); + pmu.initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN); - if (cboPMUs[i][cbo].filter[0].get()) + if (pmu.filter[0].get()) { - *cboPMUs[i][cbo].filter[0] = filter0; + *pmu.filter[0] = filter0; } - if (cboPMUs[i][cbo].filter[1].get()) + if (pmu.filter[1].get()) { - *cboPMUs[i][cbo].filter[1] = filter1; + *pmu.filter[1] = filter1; } - PCM::program(cboPMUs[i][cbo], events, events + 4, UNC_PMON_UNIT_CTL_FRZ_EN); + PCM::program(pmu, events, events + 4, UNC_PMON_UNIT_CTL_FRZ_EN); - for (int c = 0; c < 4; ++c) + for (int c = 0; c < ServerUncoreCounterState::maxCounters && size_t(c) < pmu.size(); ++c) { - *cboPMUs[i][cbo].counterValue[c] = 0; + *pmu.counterValue[c] = 0; } } - } + ); } void PCM::programMDF(const uint64* events) @@ -9676,16 +9593,27 @@ bool PCM::supportIDXAccelDev() const return retval; } -uint64 PCM::getCBOCounterState(const uint32 socket_, const uint32 ctr_) +uint64 PCM::getUncoreCounterState(const int pmu_id, const size_t socket, const uint32 ctr) const { uint64 result = 0; - const uint32 refCore = socketRefCore[socket_]; - TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux - - for(auto & pmu: cboPMUs[socket_]) + if (socket < uncorePMUs.size() && ctr < ServerUncoreCounterState::maxCounters) { - result += *pmu.counterValue[ctr_]; + for (size_t die = 0; die < uncorePMUs[socket].size(); ++die) + { + TemporalThreadAffinity tempThreadAffinity(socketRefCore[socket]); // speedup trick for Linux + const auto pmusIter = uncorePMUs[socket][die].find(pmu_id); + if (pmusIter != uncorePMUs[socket][die].end()) + { + for (const auto& pmu : pmusIter->second) + { + if (pmu.get()) + { + result += *(pmu->counterValue[ctr]); + } + } + } + } } return result; } @@ -9703,13 +9631,13 @@ uint64 PCM::getUncoreClocks(const uint32 socket_) PCIeCounterState PCM::getPCIeCounterState(const uint32 socket_, const uint32 ctr_) { PCIeCounterState result; - result.data = getCBOCounterState(socket_, ctr_); + result.data = getUncoreCounterState(CBO_PMU_ID, socket_, ctr_); return result; } uint64 PCM::getPCIeCounterData(const uint32 socket_, const uint32 ctr_) { - return getCBOCounterState(socket_, ctr_); + return getUncoreCounterState(CBO_PMU_ID, socket_, ctr_); } void PCM::initLLCReadMissLatencyEvents(uint64 * events, uint32 & opCode) diff --git a/src/cpucounters.h b/src/cpucounters.h index 8dffcad4..f52899d7 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -251,6 +251,9 @@ class UncorePMU HWRegisterPtr fixedCounterControl; HWRegisterPtr fixedCounterValue; HWRegisterPtr filter[2]; + enum { + maxCounters = 8 + }; UncorePMU(const HWRegisterPtr& unitControl_, const HWRegisterPtr& counterControl0, @@ -288,6 +291,8 @@ class UncorePMU void resetUnfreeze(const uint32 extra); }; +typedef std::shared_ptr UncorePMURef; + class IDX_PMU { typedef std::shared_ptr HWRegisterPtr; @@ -635,6 +640,129 @@ class PCM_API PCM bool programmed_core_pmu{false}; std::vector > MSR; std::vector > serverUncorePMUs; + + typedef std::vector UncorePMUArrayType; +public: + enum UncorePMUIDs + { + CBO_PMU_ID, + INVALID_PMU_ID + }; +private: + typedef std::unordered_map UncorePMUMapType; + // socket -> die -> pmu map -> pmu ref array + std::vector< std::vector > uncorePMUs; + + template + void forAllUncorePMUs(F f) + { + for (auto& s : uncorePMUs) + { + for (auto& d : s) + { + for (auto& p : d) + { + for (auto& e : p.second) + { + if (e.get()) + { + f(*e); + } + } + } + } + } + } + + template + void forAllUncorePMUs(const int pmu_id, F f) + { + for (auto& s : uncorePMUs) + { + for (auto& d : s) + { + for (auto& e : d[pmu_id]) + { + if (e.get()) + { + f(*e); + } + } + } + } + } + + template + void forAllUncorePMUs(const size_t socket_id, const int pmu_id, F f) + { + if (socket_id < uncorePMUs.size()) + { + for (auto& d : uncorePMUs[socket_id]) + { + for (auto& e : d[pmu_id]) + { + if (e.get()) + { + f(*e); + } + } + } + } + } + + template + void readUncoreCounterValues(T& result, const size_t socket, const int pmu_id) const + { + if (socket < uncorePMUs.size()) + { + result.Counters.resize(uncorePMUs[socket].size()); + for (size_t die = 0; die < uncorePMUs[socket].size(); ++die) + { + TemporalThreadAffinity tempThreadAffinity(socketRefCore[socket]); // speedup trick for Linux + + const auto& pmuIter = uncorePMUs[socket][die].find(pmu_id); + if (pmuIter != uncorePMUs[socket][die].end()) + { + result.Counters[die][pmu_id].resize(pmuIter->second.size()); + for (size_t unit = 0; unit < pmuIter->second.size(); ++unit) + { + auto& pmu = pmuIter->second[unit]; + for (size_t i = 0; pmu.get() != nullptr && i < pmu->size(); ++i) + { + // std::cerr << "s " << socket << " d " << die << " pmu " << pmu_id << " unit " << unit << " ctr " << i << "\n"; + result.Counters[die][pmu_id][unit][i] = *(pmu->counterValue[i]); + } + } + } + } + } + } + + uint64 getUncoreCounterState(const int pmu_id, const size_t socket, const uint32 ctr) const; + + template + void programUncorePMUs(const int pmu_id, F pmuFunc) + { + if (MSR.empty()) return; + + for (size_t socket = 0; socket < uncorePMUs.size(); ++socket) + { + for (size_t die = 0; die < uncorePMUs[socket].size(); ++die) + { + TemporalThreadAffinity tempThreadAffinity(socketRefCore[socket]); // speedup trick for Linux + + for (size_t unit = 0; unit < uncorePMUs[socket][die][pmu_id].size(); ++unit) + { + auto& pmu = uncorePMUs[socket][die][pmu_id][unit]; + if (pmu.get()) + { + pmuFunc(*pmu); + } + } + } + } + } + std::vector > pcuPMUs; std::vector > iioPMUs; std::vector > irpPMUs; @@ -645,7 +773,6 @@ class PCM_API PCM std::vector > energy_status; std::vector > dram_energy_status; std::vector > pp_energy_status; - std::vector > cboPMUs; std::vector > mdfPMUs; std::vector>> cxlPMUs; // socket X CXL ports X UNIT {0,1} @@ -693,6 +820,23 @@ class PCM_API PCM bool linux_arch_perfmon = false; public: + + size_t getMaxNumOfUncorePMUs(const int pmu_id) const + { + size_t count = 0ULL; + for (auto& s : uncorePMUs) + { + for (auto& d : s) + { + const auto iter = d.find(pmu_id); + if (iter != d.end()) + { + count += iter->second.size(); + } + } + } + return count; + } enum { MAX_PP = 1 }; // max power plane number on Intel architecture (client) enum { MAX_C_STATE = 10 }; // max C-state on Intel architecture @@ -1039,11 +1183,11 @@ class PCM_API PCM uint64 CX_MSR_PMON_BOX_FILTER1(uint32 Cbo) const; uint64 CX_MSR_PMON_CTLY(uint32 Cbo, uint32 Ctl) const; uint64 CX_MSR_PMON_BOX_CTL(uint32 Cbo) const; + uint32 getMaxNumOfCBoxesInternal() const; void programCboOpcodeFilter(const uint32 opc0, UncorePMU & pmu, const uint32 nc_, const uint32 opc1, const uint32 loc, const uint32 rem); void initLLCReadMissLatencyEvents(uint64 * events, uint32 & opCode); void initCHARequestEvents(uint64 * events); void programCbo(); - uint64 getCBOCounterState(const uint32 socket, const uint32 ctr_); template static void program(UncorePMU& pmu, const Iterator& eventsBegin, const Iterator& eventsEnd, const uint32 extra) { @@ -1185,9 +1329,6 @@ class PCM_API PCM */ unsigned getMaxRMID() const; - //! \brief Returns the number of CBO or CHA units per socket - uint32 getMaxNumOfCBoxes() const; - //! \brief Returns the number of IIO stacks per socket uint32 getMaxNumOfIIOStacks() const; @@ -2880,16 +3021,32 @@ uint64 getM3UPICounter(uint32 port, uint32 counter, const CounterStateType& befo return after.M3UPICounter[port][counter] - before.M3UPICounter[port][counter]; } -/*! \brief Direct read of CHA or CBO PMU counter (counter meaning depends on the programming: power/performance/etc) +/*! \brief Direct read of uncore PMU counter (counter meaning depends on the programming: power/performance/etc) \param counter counter number - \param cbo cbo or cha number + \param pmu_id ID of PMU (unit type: CBO, etc) + \param unit uncore unit ID \param before CPU counter state before the experiment \param after CPU counter state after the experiment */ template -uint64 getCBOCounter(uint32 cbo, uint32 counter, const CounterStateType& before, const CounterStateType& after) +uint64 getUncoreCounter(const int pmu_id, uint32 unit, uint32 counter, const CounterStateType& before, const CounterStateType& after) { - return after.CBOCounter[cbo][counter] - before.CBOCounter[cbo][counter]; + for (size_t die = 0; counter < UncorePMU::maxCounters && die < after.Counters.size(); ++die) + { + assert(die < before.Counters.size()); + const auto afterIter = after.Counters[die].find(pmu_id); + const auto beforeIter = before.Counters[die].find(pmu_id); + if (afterIter != after.Counters[die].end() && beforeIter != before.Counters[die].end()) + { + assert(afterIter->second.size() == beforeIter->second.size()); + if (unit < afterIter->second.size()) + { + return afterIter->second[unit][counter] - beforeIter->second[unit][counter]; + } + unit -= afterIter->second.size(); + } + } + return 0ULL; } /*! \brief Direct read of MDF PMU counter (counter meaning depends on the programming: power/performance/etc) @@ -3253,7 +3410,7 @@ class UncoreCounterState }; -//! \brief Server uncore power counter state +//! \brief Server uncore counter state //! class ServerUncoreCounterState : public UncoreCounterState { @@ -3262,12 +3419,11 @@ class ServerUncoreCounterState : public UncoreCounterState maxControllers = 4, maxChannels = 32, maxXPILinks = 6, - maxCBOs = 128, maxMDFs = 128, maxIIOStacks = 16, maxCXLPorts = 6, maxPUnits = 5, - maxCounters = 8 + maxCounters = UncorePMU::maxCounters }; enum EventPosition { @@ -3282,10 +3438,32 @@ class ServerUncoreCounterState : public UncoreCounterState PMMReads, PMMWrites }; -private: + + // typedef std::array CounterArrayType; + class CounterArrayType + { + std::array data; + public: + CounterArrayType() : data{{}} + { + std::fill(data.begin(), data.end(), 0ULL); + } + const uint64& operator [] (size_t i) const + { + return data[i]; + } + uint64& operator [] (size_t i) + { + return data[i]; + } + }; + typedef std::vector PMUCounterArrayType; + typedef std::unordered_map PMUMapCounterArrayType; + // die -> pmu map -> PMUs -> counters + std::vector Counters; + std::array, maxXPILinks> xPICounter; std::array, maxXPILinks> M3UPICounter; - std::array, maxCBOs> CBOCounter; std::array, maxMDFs> MDFCounter; std::array, maxIIOStacks> IIOCounter; std::array, maxIIOStacks> IRPCounter; @@ -3316,7 +3494,7 @@ class ServerUncoreCounterState : public UncoreCounterState template friend uint64 getM3UPICounter(uint32 port, uint32 counter, const CounterStateType& before, const CounterStateType& after); template - friend uint64 getCBOCounter(uint32 cbo, uint32 counter, const CounterStateType& before, const CounterStateType& after); + friend uint64 getUncoreCounter(const int pmu_id, uint32 unit, uint32 counter, const CounterStateType& before, const CounterStateType& after); template friend uint64 getMDFCounter(uint32 mdf, uint32 counter, const CounterStateType& before, const CounterStateType& after); template @@ -3352,7 +3530,6 @@ class ServerUncoreCounterState : public UncoreCounterState ServerUncoreCounterState() : xPICounter{{}}, M3UPICounter{{}}, - CBOCounter{{}}, MDFCounter{{}}, IIOCounter{{}}, IRPCounter{{}}, diff --git a/src/pcm-memory.cpp b/src/pcm-memory.cpp index a049ade9..328c3bf1 100644 --- a/src/pcm-memory.cpp +++ b/src/pcm-memory.cpp @@ -1116,11 +1116,11 @@ class CHAEventCollector uint64 result = 0; for (uint32 i = 0; i < pcm->getNumSockets(); ++i) { - for (uint32 cbo = 0; cbo < pcm->getMaxNumOfCBoxes(); ++cbo) + for (uint32 cbo = 0; cbo < pcm->getMaxNumOfUncorePMUs(PCM::CBO_PMU_ID); ++cbo) { for (uint32 ctr = 0; ctr < 4 && ctr < eventGroups[curGroup].size(); ++ctr) { - result += getCBOCounter(cbo, ctr, before[i], after[i]); + result += getUncoreCounter(PCM::CBO_PMU_ID, cbo, ctr, before[i], after[i]); } } } diff --git a/src/pcm-raw.cpp b/src/pcm-raw.cpp index 7c61d27c..b97a2aea 100644 --- a/src/pcm-raw.cpp +++ b/src/pcm-raw.cpp @@ -1674,9 +1674,9 @@ void printTransposed(const PCM::RawPMUConfigs& curPMUConfigs, else if (type == "cbo" || type == "cha") { choose(outputType, - [&]() { printUncoreRows(nullptr, (uint32) m->getMaxNumOfCBoxes(), "C"); }, - [&]() { printUncoreRows(nullptr, (uint32) m->getMaxNumOfCBoxes(), type); }, - [&]() { printUncoreRows([](const uint32 u, const uint32 i, const ServerUncoreCounterState& before, const ServerUncoreCounterState& after) { return getCBOCounter(u, i, before, after); }, (uint32)m->getMaxNumOfCBoxes(), "C"); + [&]() { printUncoreRows(nullptr, (uint32) m->getMaxNumOfUncorePMUs(PCM::CBO_PMU_ID), "C"); }, + [&]() { printUncoreRows(nullptr, (uint32) m->getMaxNumOfUncorePMUs(PCM::CBO_PMU_ID), type); }, + [&]() { printUncoreRows([](const uint32 u, const uint32 i, const ServerUncoreCounterState& before, const ServerUncoreCounterState& after) { return getUncoreCounter(PCM::CBO_PMU_ID, u, i, before, after); }, (uint32)m->getMaxNumOfUncorePMUs(PCM::CBO_PMU_ID), "C"); }); } else if (type == "mdf") @@ -2046,7 +2046,7 @@ void print(const PCM::RawPMUConfigs& curPMUConfigs, { for (uint32 s = 0; s < m->getNumSockets(); ++s) { - for (uint32 cbo = 0; cbo < m->getMaxNumOfCBoxes(); ++cbo) + for (uint32 cbo = 0; cbo < m->getMaxNumOfUncorePMUs(PCM::CBO_PMU_ID); ++cbo) { int i = 0; for (auto& event : events) @@ -2054,7 +2054,7 @@ void print(const PCM::RawPMUConfigs& curPMUConfigs, choose(outputType, [s, cbo]() { cout << "SKT" << s << "C" << cbo << separator; }, [&event, &i]() { if (event.second.empty()) cout << "CBOEvent" << i << separator; else cout << event.second << separator; }, - [&]() { cout << getCBOCounter(cbo, i, BeforeUncoreState[s], AfterUncoreState[s]) << separator; }); + [&]() { cout << getUncoreCounter(PCM::CBO_PMU_ID, cbo, i, BeforeUncoreState[s], AfterUncoreState[s]) << separator; }); ++i; } } diff --git a/src/utils.h b/src/utils.h index 09fea74c..de5a2858 100644 --- a/src/utils.h +++ b/src/utils.h @@ -583,4 +583,105 @@ inline void extractBitsPrintHelper(const std::pair & bits, T & valu void restrictDriverAccessNative(LPCTSTR path); #endif + +class TemporalThreadAffinity // speedup trick for Linux, FreeBSD, DragonFlyBSD, Windows +{ + TemporalThreadAffinity(); // forbidden +#if defined(__FreeBSD__) || (defined(__DragonFly__) && __DragonFly_version >= 400707) + cpu_set_t old_affinity; + bool restore; + +public: + TemporalThreadAffinity(uint32 core_id, bool checkStatus = true, const bool restore_ = true) + : restore(restore_) + { + assert(core_id < 1024); + auto res = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &old_affinity); + if (res != 0) + { + std::cerr << "ERROR: pthread_getaffinity_np for core " << core_id << " failed with code " << res << "\n"; + throw std::exception(); + } + cpu_set_t new_affinity; + CPU_ZERO(&new_affinity); + CPU_SET(core_id, &new_affinity); + // CPU_CMP() returns true if old_affinity is NOT equal to new_affinity + if (!(CPU_CMP(&old_affinity, &new_affinity))) + { + restore = false; + return; // the same affinity => return + } + res = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &new_affinity); + if (res != 0 && checkStatus) + { + std::cerr << "ERROR: pthread_setaffinity_np for core " << core_id << " failed with code " << res << "\n"; + throw std::exception(); + } + } + ~TemporalThreadAffinity() + { + if (restore) pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &old_affinity); + } + bool supported() const { return true; } + +#elif defined(__linux__) + cpu_set_t* old_affinity; + static constexpr auto maxCPUs = 8192; + const size_t set_size; + bool restore; + +public: + TemporalThreadAffinity(const uint32 core_id, bool checkStatus = true, const bool restore_ = true) + : set_size(CPU_ALLOC_SIZE(maxCPUs)), restore(restore_) + { + assert(core_id < maxCPUs); + old_affinity = CPU_ALLOC(maxCPUs); + assert(old_affinity); + auto res = pthread_getaffinity_np(pthread_self(), set_size, old_affinity); + if (res != 0) + { + std::cerr << "ERROR: pthread_getaffinity_np for core " << core_id << " failed with code " << res << "\n"; + throw std::exception(); + } + cpu_set_t* new_affinity = CPU_ALLOC(maxCPUs); + assert(new_affinity); + CPU_ZERO_S(set_size, new_affinity); + CPU_SET_S(core_id, set_size, new_affinity); + if (CPU_EQUAL_S(set_size, old_affinity, new_affinity)) + { + CPU_FREE(new_affinity); + restore = false; + return; + } + res = pthread_setaffinity_np(pthread_self(), set_size, new_affinity); + CPU_FREE(new_affinity); + if (res != 0 && checkStatus) + { + std::cerr << "ERROR: pthread_setaffinity_np for core " << core_id << " failed with code " << res << "\n"; + throw std::exception(); + } + } + ~TemporalThreadAffinity() + { + if (restore) pthread_setaffinity_np(pthread_self(), set_size, old_affinity); + CPU_FREE(old_affinity); + } + bool supported() const { return true; } +#elif defined(_MSC_VER) + ThreadGroupTempAffinity affinity; +public: + TemporalThreadAffinity(uint32 core, bool checkStatus = true, const bool restore = true) + : affinity(core, checkStatus, restore) + { + } + bool supported() const { return true; } +#else // not implemented for os x +public: + TemporalThreadAffinity(uint32) { } + TemporalThreadAffinity(uint32, bool) {} + bool supported() const { return false; } +#endif +}; + + } // namespace pcm From bb57b7f43f4eb5c7523d370c70f3c0202fde3f5b Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 17 Jan 2024 19:03:44 +0100 Subject: [PATCH 08/16] refactor MDF PMU code Change-Id: I33f28712ef6e9e2f779ed61205401c6bab256e61 --- src/cpucounters.cpp | 101 ++++++++++++++++++++------------------------ src/cpucounters.h | 22 +--------- src/pcm-raw.cpp | 10 ++--- 3 files changed, 51 insertions(+), 82 deletions(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index f631e7f5..669c70a9 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -1941,7 +1941,7 @@ void PCM::initUncoreObjects() " " << ((s < iioPMUs.size()) ? iioPMUs[s].size() : 0) << " IIO units detected." " " << ((s < irpPMUs.size()) ? irpPMUs[s].size() : 0) << " IRP units detected." " " << countPMUs(s, CBO_PMU_ID) << " CHA/CBO units detected." - " " << ((s < mdfPMUs.size()) ? mdfPMUs[s].size() : 0) << " MDF units detected." + " " << countPMUs(s, MDF_PMU_ID) << " MDF units detected." " " << ((s < cxlPMUs.size()) ? cxlPMUs[s].size() : 0) << " CXL units detected." "\n"; } @@ -2080,6 +2080,36 @@ void PCM::initUncorePMUsDirect() } }; + auto addPMUsFromDiscoveryRef = [this, &handle, &s](std::vector& out, const unsigned int pmuType, const int filter0 = -1) + { + if (uncorePMUDiscovery.get()) + { + for (size_t box = 0; box < uncorePMUDiscovery->getNumBoxes(pmuType, s); ++box) + { + if (uncorePMUDiscovery->getBoxAccessType(pmuType, s, box) == UncorePMUDiscovery::accessTypeEnum::MSR + && uncorePMUDiscovery->getBoxNumRegs(pmuType, s, box) >= 4) + { + out.push_back( + std::make_shared( + std::make_shared(handle, uncorePMUDiscovery->getBoxCtlAddr(pmuType, s, box)), + std::make_shared(handle, uncorePMUDiscovery->getBoxCtlAddr(pmuType, s, box, 0)), + std::make_shared(handle, uncorePMUDiscovery->getBoxCtlAddr(pmuType, s, box, 1)), + std::make_shared(handle, uncorePMUDiscovery->getBoxCtlAddr(pmuType, s, box, 2)), + std::make_shared(handle, uncorePMUDiscovery->getBoxCtlAddr(pmuType, s, box, 3)), + std::make_shared(handle, uncorePMUDiscovery->getBoxCtrAddr(pmuType, s, box, 0)), + std::make_shared(handle, uncorePMUDiscovery->getBoxCtrAddr(pmuType, s, box, 1)), + std::make_shared(handle, uncorePMUDiscovery->getBoxCtrAddr(pmuType, s, box, 2)), + std::make_shared(handle, uncorePMUDiscovery->getBoxCtrAddr(pmuType, s, box, 3)), + std::shared_ptr(), + std::shared_ptr(), + (filter0 < 0) ? std::shared_ptr() : std::make_shared(handle, uncorePMUDiscovery->getBoxCtlAddr(pmuType, s, box) + filter0) // filters not supported by discovery + ) + ); + } + } + } + }; + switch (cpu_model) { case IVYTOWN: @@ -2138,9 +2168,9 @@ void PCM::initUncorePMUsDirect() switch (cpu_model) { case SPR: - mdfPMUs.resize(num_sockets); - addPMUsFromDiscovery(mdfPMUs[s], SPR_MDF_BOX_TYPE); - if (mdfPMUs[s].empty()) + uncorePMUs[s].resize(1); + addPMUsFromDiscoveryRef(uncorePMUs[s][0][MDF_PMU_ID], SPR_MDF_BOX_TYPE); + if (uncorePMUs[s][0][MDF_PMU_ID].empty()) { std::cerr << "ERROR: MDF PMU not found\n"; } @@ -2519,7 +2549,6 @@ void PCM::initUncorePMUsPerf() uncorePMUs.resize(num_sockets); iioPMUs.resize(num_sockets); irpPMUs.resize(num_sockets); - mdfPMUs.resize(num_sockets); pcuPMUs.resize(num_sockets); for (uint32 s = 0; s < (uint32)num_sockets; ++s) { @@ -2528,7 +2557,7 @@ void PCM::initUncorePMUsPerf() populatePerfPMUs(s, enumeratePerfPMUs("ubox", 100), uboxPMUs, true); populatePerfPMUs(s, enumeratePerfPMUs("cbox", 100), uncorePMUs[s][0][CBO_PMU_ID], false, true, true); populatePerfPMUs(s, enumeratePerfPMUs("cha", 200), uncorePMUs[s][0][CBO_PMU_ID], false, true, true); - populatePerfPMUs(s, enumeratePerfPMUs("mdf", 200), mdfPMUs[s], false, true, true); + populatePerfPMUs(s, enumeratePerfPMUs("mdf", 200), uncorePMUs[s][0][MDF_PMU_ID], false, true, true); auto populateMapPMUs = [&s](const std::string& type, std::vector > & out) { std::vector PMUVector; @@ -4638,13 +4667,6 @@ void PCM::cleanupUncorePMUs(const bool silent) forAllUncorePMUs([](UncorePMU & p) { p.cleanup(); }); - for (auto& sMDFPMUs : mdfPMUs) - { - for (auto& pmu : sMDFPMUs) - { - pmu.cleanup(); - } - } for (auto& spcuPMUs : pcuPMUs) { for (auto& pmu : spcuPMUs) @@ -5763,13 +5785,8 @@ void PCM::freezeServerUncoreCounters() forAllUncorePMUs(i, CBO_PMU_ID, [](UncorePMU& pmu) { pmu.freeze(UNC_PMON_UNIT_CTL_FRZ_EN); }); - if (size_t(i) < mdfPMUs.size()) - { - for (auto& pmu : mdfPMUs[i]) - { - pmu.freeze(UNC_PMON_UNIT_CTL_FRZ_EN); - } - } + forAllUncorePMUs(i, MDF_PMU_ID, [](UncorePMU& pmu) { pmu.freeze(UNC_PMON_UNIT_CTL_FRZ_EN); }); + } for (auto& sPMUs : cxlPMUs) { @@ -5812,13 +5829,8 @@ void PCM::unfreezeServerUncoreCounters() forAllUncorePMUs(i, CBO_PMU_ID, [](UncorePMU& pmu) { pmu.unfreeze(UNC_PMON_UNIT_CTL_FRZ_EN); }); - if (size_t(i) < mdfPMUs.size()) - { - for (auto& pmu : mdfPMUs[i]) - { - pmu.unfreeze(UNC_PMON_UNIT_CTL_FRZ_EN); - } - } + forAllUncorePMUs(i, MDF_PMU_ID, [](UncorePMU& pmu) { pmu.unfreeze(UNC_PMON_UNIT_CTL_FRZ_EN); }); + } for (auto& sPMUs : cxlPMUs) { @@ -6583,16 +6595,8 @@ ServerUncoreCounterState PCM::getServerUncoreCounterState(uint32 socket) readUncoreCounterValues(result, socket, CBO_PMU_ID); - for (uint32 mdf = 0; socket < mdfPMUs.size() && mdf < mdfPMUs[socket].size() && mdf < ServerUncoreCounterState::maxMDFs; ++mdf) - { - for (int i = 0; i < ServerUncoreCounterState::maxCounters && size_t(i) < mdfPMUs[socket][mdf].size(); ++i) - { - if (mdfPMUs[socket][mdf].counterValue[i].get()) - { - result.MDFCounter[mdf][i] = *(mdfPMUs[socket][mdf].counterValue[i]); - } - } - } + readUncoreCounterValues(result, socket, MDF_PMU_ID); + for (uint32 stack = 0; socket < iioPMUs.size() && stack < iioPMUs[socket].size() && stack < ServerUncoreCounterState::maxIIOStacks; ++stack) { for (int i = 0; i < ServerUncoreCounterState::maxCounters && size_t(i) < iioPMUs[socket][stack].size(); ++i) @@ -9155,15 +9159,6 @@ uint32 PCM::getMaxNumOfIIOStacks() const return 0; } -uint32 PCM::getMaxNumOfMDFs() const -{ - if (mdfPMUs.size() > 0) - { - return (uint32)mdfPMUs[0].size(); - } - return 0; -} - void PCM::programCboOpcodeFilter(const uint32 opc0, UncorePMU & pmu, const uint32 nc_, const uint32 opc1, const uint32 loc, const uint32 rem) { if(JAKETOWN == cpu_model) @@ -9372,18 +9367,12 @@ void PCM::programCboRaw(const uint64* events, const uint64 filter0, const uint64 void PCM::programMDF(const uint64* events) { - for (size_t i = 0; (i < mdfPMUs.size()) && MSR.size(); ++i) + programUncorePMUs(MDF_PMU_ID, [&](UncorePMU& pmu) { - uint32 refCore = socketRefCore[i]; - TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux - - for (auto & pmu : mdfPMUs[i]) - { - pmu.initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN); + pmu.initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN); - PCM::program(pmu, events, events + 4, UNC_PMON_UNIT_CTL_FRZ_EN); - } - } + PCM::program(pmu, events, events + 4, UNC_PMON_UNIT_CTL_FRZ_EN); + }); } void PCM::programUBOX(const uint64* events) diff --git a/src/cpucounters.h b/src/cpucounters.h index f52899d7..27a6adb7 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -646,6 +646,7 @@ class PCM_API PCM enum UncorePMUIDs { CBO_PMU_ID, + MDF_PMU_ID, INVALID_PMU_ID }; private: @@ -773,7 +774,6 @@ class PCM_API PCM std::vector > energy_status; std::vector > dram_energy_status; std::vector > pp_energy_status; - std::vector > mdfPMUs; std::vector>> cxlPMUs; // socket X CXL ports X UNIT {0,1} std::vector > memory_bw_local; @@ -1332,9 +1332,6 @@ class PCM_API PCM //! \brief Returns the number of IIO stacks per socket uint32 getMaxNumOfIIOStacks() const; - //! \brief Returns the number of MDFs boxes per socket - uint32 getMaxNumOfMDFs() const; - /*! \brief Returns the number of IDX accel devs \param accel index of IDX accel */ @@ -3049,18 +3046,6 @@ uint64 getUncoreCounter(const int pmu_id, uint32 unit, uint32 counter, const Cou return 0ULL; } -/*! \brief Direct read of MDF PMU counter (counter meaning depends on the programming: power/performance/etc) - \param counter counter number - \param mdf mdf number - \param before CPU counter state before the experiment - \param after CPU counter state after the experiment -*/ -template -uint64 getMDFCounter(uint32 mdf, uint32 counter, const CounterStateType& before, const CounterStateType& after) -{ - return after.MDFCounter[mdf][counter] - before.MDFCounter[mdf][counter]; -} - /*! \brief Direct read of UBOX PMU counter (counter meaning depends on the programming: power/performance/etc) \param counter counter number \param before CPU counter state before the experiment @@ -3419,7 +3404,6 @@ class ServerUncoreCounterState : public UncoreCounterState maxControllers = 4, maxChannels = 32, maxXPILinks = 6, - maxMDFs = 128, maxIIOStacks = 16, maxCXLPorts = 6, maxPUnits = 5, @@ -3464,7 +3448,6 @@ class ServerUncoreCounterState : public UncoreCounterState std::array, maxXPILinks> xPICounter; std::array, maxXPILinks> M3UPICounter; - std::array, maxMDFs> MDFCounter; std::array, maxIIOStacks> IIOCounter; std::array, maxIIOStacks> IRPCounter; std::array, maxCXLPorts> CXLCMCounter; @@ -3496,8 +3479,6 @@ class ServerUncoreCounterState : public UncoreCounterState template friend uint64 getUncoreCounter(const int pmu_id, uint32 unit, uint32 counter, const CounterStateType& before, const CounterStateType& after); template - friend uint64 getMDFCounter(uint32 mdf, uint32 counter, const CounterStateType& before, const CounterStateType& after); - template friend uint64 getUBOXCounter(uint32 counter, const CounterStateType& before, const CounterStateType& after); template friend uint64 getIIOCounter(uint32 stack, uint32 counter, const CounterStateType& before, const CounterStateType& after); @@ -3530,7 +3511,6 @@ class ServerUncoreCounterState : public UncoreCounterState ServerUncoreCounterState() : xPICounter{{}}, M3UPICounter{{}}, - MDFCounter{{}}, IIOCounter{{}}, IRPCounter{{}}, CXLCMCounter{{}}, diff --git a/src/pcm-raw.cpp b/src/pcm-raw.cpp index b97a2aea..67d1d873 100644 --- a/src/pcm-raw.cpp +++ b/src/pcm-raw.cpp @@ -1682,9 +1682,9 @@ void printTransposed(const PCM::RawPMUConfigs& curPMUConfigs, else if (type == "mdf") { choose(outputType, - [&]() { printUncoreRows(nullptr, (uint32) m->getMaxNumOfMDFs(), "MDF"); }, - [&]() { printUncoreRows(nullptr, (uint32) m->getMaxNumOfMDFs(), type); }, - [&]() { printUncoreRows([](const uint32 u, const uint32 i, const ServerUncoreCounterState& before, const ServerUncoreCounterState& after) { return getMDFCounter(u, i, before, after); }, (uint32)m->getMaxNumOfMDFs(), "MDF"); + [&]() { printUncoreRows(nullptr, (uint32) m->getMaxNumOfUncorePMUs(PCM::MDF_PMU_ID), "MDF"); }, + [&]() { printUncoreRows(nullptr, (uint32) m->getMaxNumOfUncorePMUs(PCM::MDF_PMU_ID), type); }, + [&]() { printUncoreRows([](const uint32 u, const uint32 i, const ServerUncoreCounterState& before, const ServerUncoreCounterState& after) { return getUncoreCounter(PCM::MDF_PMU_ID, u, i, before, after); }, (uint32)m->getMaxNumOfUncorePMUs(PCM::MDF_PMU_ID), "MDF"); }); } else if (type == "irp") @@ -2064,7 +2064,7 @@ void print(const PCM::RawPMUConfigs& curPMUConfigs, { for (uint32 s = 0; s < m->getNumSockets(); ++s) { - for (uint32 mdf = 0; mdf < m->getMaxNumOfMDFs(); ++mdf) + for (uint32 mdf = 0; mdf < m->getMaxNumOfUncorePMUs(PCM::MDF_PMU_ID); ++mdf) { int i = 0; for (auto& event : events) @@ -2072,7 +2072,7 @@ void print(const PCM::RawPMUConfigs& curPMUConfigs, choose(outputType, [s, mdf]() { cout << "SKT" << s << "MDF" << mdf << separator; }, [&event, &i]() { if (event.second.empty()) cout << "MDFEvent" << i << separator; else cout << event.second << separator; }, - [&]() { cout << getMDFCounter(mdf, i, BeforeUncoreState[s], AfterUncoreState[s]) << separator; }); + [&]() { cout << getUncoreCounter(PCM::MDF_PMU_ID, mdf, i, BeforeUncoreState[s], AfterUncoreState[s]) << separator; }); ++i; } } From 64adee93d1fad0f7f9ba1ef569f5c0615816dba5 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Thu, 18 Jan 2024 18:52:04 +0100 Subject: [PATCH 09/16] refactor PCU PMU code Change-Id: If09e3232a49edad86bdc7b29311e7d8d023ddd97 --- src/cpucounters.cpp | 76 ++++++++++++++------------------------------- src/cpucounters.h | 27 ++-------------- src/pcm-power.cpp | 18 +++++------ src/pcm-raw.cpp | 10 +++--- 4 files changed, 40 insertions(+), 91 deletions(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 669c70a9..f2a8caae 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -1937,7 +1937,7 @@ void PCM::initUncoreObjects() for (uint32 s = 0; s < (uint32)num_sockets; ++s) { std::cerr << "Socket " << s << ":" << - " " << ((s < pcuPMUs.size()) ? pcuPMUs[s].size() : 0) << " PCU units detected." + " " << countPMUs(s, PCU_PMU_ID) << " PCU units detected." " " << ((s < iioPMUs.size()) ? iioPMUs[s].size() : 0) << " IIO units detected." " " << ((s < irpPMUs.size()) ? irpPMUs[s].size() : 0) << " IRP units detected." " " << countPMUs(s, CBO_PMU_ID) << " CHA/CBO units detected." @@ -1988,7 +1988,6 @@ void PCM::globalFreezeUncoreCountersInternal(const unsigned long long int freeze void PCM::initUncorePMUsDirect() { uncorePMUs.resize(num_sockets); - pcuPMUs.resize(num_sockets); for (uint32 s = 0; s < (uint32)num_sockets; ++s) { auto & handle = MSR[socketRefCore[s]]; @@ -2114,8 +2113,9 @@ void PCM::initUncorePMUsDirect() { case IVYTOWN: case JAKETOWN: - pcuPMUs[s].push_back( - UncorePMU( + uncorePMUs[s].resize(1); + uncorePMUs[s][0][PCU_PMU_ID].push_back( + std::make_shared( std::make_shared(handle, JKTIVT_PCU_MSR_PMON_BOX_CTL_ADDR), std::make_shared(handle, JKTIVT_PCU_MSR_PMON_CTL0_ADDR), std::make_shared(handle, JKTIVT_PCU_MSR_PMON_CTL1_ADDR), @@ -2137,8 +2137,9 @@ void PCM::initUncorePMUsDirect() case HASWELLX: case SKX: case ICX: - pcuPMUs[s].push_back( - UncorePMU( + uncorePMUs[s].resize(1); + uncorePMUs[s][0][PCU_PMU_ID].push_back( + std::make_shared( std::make_shared(handle, HSX_PCU_MSR_PMON_BOX_CTL_ADDR), std::make_shared(handle, HSX_PCU_MSR_PMON_CTL0_ADDR), std::make_shared(handle, HSX_PCU_MSR_PMON_CTL1_ADDR), @@ -2155,14 +2156,14 @@ void PCM::initUncorePMUsDirect() ); break; case SPR: - addPMUsFromDiscovery(pcuPMUs[s], SPR_PCU_BOX_TYPE, 0xE); - if (pcuPMUs[s].empty()) + uncorePMUs[s].resize(1); + addPMUsFromDiscoveryRef(uncorePMUs[s][0][PCU_PMU_ID], SPR_PCU_BOX_TYPE, 0xE); + if (uncorePMUs[s][0][PCU_PMU_ID].empty()) { std::cerr << "ERROR: PCU PMU not found\n"; } break; } - assert(pcuPMUs[s].size() <= ServerUncoreCounterState::maxPUnits); // add MDF PMUs switch (cpu_model) @@ -2549,11 +2550,10 @@ void PCM::initUncorePMUsPerf() uncorePMUs.resize(num_sockets); iioPMUs.resize(num_sockets); irpPMUs.resize(num_sockets); - pcuPMUs.resize(num_sockets); for (uint32 s = 0; s < (uint32)num_sockets; ++s) { uncorePMUs[s].resize(1); - populatePerfPMUs(s, enumeratePerfPMUs("pcu", 100), pcuPMUs[s], false, true); + populatePerfPMUs(s, enumeratePerfPMUs("pcu", 100), uncorePMUs[s][0][PCU_PMU_ID], false, true); populatePerfPMUs(s, enumeratePerfPMUs("ubox", 100), uboxPMUs, true); populatePerfPMUs(s, enumeratePerfPMUs("cbox", 100), uncorePMUs[s][0][CBO_PMU_ID], false, true, true); populatePerfPMUs(s, enumeratePerfPMUs("cha", 200), uncorePMUs[s][0][CBO_PMU_ID], false, true, true); @@ -4667,13 +4667,6 @@ void PCM::cleanupUncorePMUs(const bool silent) forAllUncorePMUs([](UncorePMU & p) { p.cleanup(); }); - for (auto& spcuPMUs : pcuPMUs) - { - for (auto& pmu : spcuPMUs) - { - pmu.cleanup(); - } - } for (auto& sPMUs : cxlPMUs) { for (auto& pmus : sPMUs) @@ -5436,28 +5429,17 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof void PCM::programPCU(uint32* PCUCntConf, const uint64 filter) { - for (int i = 0; (i < (int)serverUncorePMUs.size()) && MSR.size(); ++i) + programUncorePMUs(PCU_PMU_ID, [&PCUCntConf, &filter](UncorePMU& pmu) { - if (i >= (int)pcuPMUs.size()) - { - continue; - } - - uint32 refCore = socketRefCore[i]; - TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux + pmu.initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN); - for (auto& pmu : pcuPMUs[i]) + if (pmu.filter[0].get()) { - pmu.initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN); - - if (pmu.filter[0].get()) - { - *pmu.filter[0] = filter; - } - - program(pmu, &PCUCntConf[0], &PCUCntConf[4], UNC_PMON_UNIT_CTL_FRZ_EN); + *pmu.filter[0] = filter; } - } + + program(pmu, &PCUCntConf[0], &PCUCntConf[4], UNC_PMON_UNIT_CTL_FRZ_EN); + }); } PCM::ErrorCode PCM::program(const RawPMUConfigs& curPMUConfigs_, const bool silent, const int pid) @@ -5762,10 +5744,7 @@ void PCM::freezeServerUncoreCounters() const auto refCore = socketRefCore[i]; TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux - for (auto& pmu : pcuPMUs[i]) - { - pmu.freeze(UNC_PMON_UNIT_CTL_FRZ_EN); - } + forAllUncorePMUs(i, PCU_PMU_ID, [](UncorePMU& pmu) { pmu.freeze(UNC_PMON_UNIT_CTL_FRZ_EN); }); if (IIOEventsAvailable()) { @@ -5806,10 +5785,7 @@ void PCM::unfreezeServerUncoreCounters() const auto refCore = socketRefCore[i]; TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux - for (auto& pmu : pcuPMUs[i]) - { - pmu.unfreeze(UNC_PMON_UNIT_CTL_FRZ_EN); - } + forAllUncorePMUs(i, PCU_PMU_ID, [](UncorePMU& pmu) { pmu.unfreeze(UNC_PMON_UNIT_CTL_FRZ_EN); }); if (IIOEventsAvailable()) { @@ -6619,14 +6595,9 @@ ServerUncoreCounterState PCM::getServerUncoreCounterState(uint32 socket) result.UBOXCounter[i] = *(uboxPMUs[socket].counterValue[i]); result.UncClocks = getUncoreClocks(socket); } - for (size_t u = 0; socket < pcuPMUs.size() && u < pcuPMUs[socket].size(); ++u) - { - for (int i = 0; i < ServerUncoreCounterState::maxCounters && size_t(i) < pcuPMUs[socket][u].size(); ++i) - { - assert(u < ServerUncoreCounterState::maxPUnits); - result.PCUCounter[u][i] = *pcuPMUs[socket][u].counterValue[i]; - } - } + + readUncoreCounterValues(result, socket, PCU_PMU_ID); + for (size_t p = 0; p < getNumCXLPorts(socket); ++p) { for (int i = 0; i < ServerUncoreCounterState::maxCounters && socket < cxlPMUs.size() && size_t(i) < cxlPMUs[socket][p].first.size(); ++i) @@ -6638,7 +6609,6 @@ ServerUncoreCounterState PCM::getServerUncoreCounterState(uint32 socket) result.CXLDPCounter[p][i] = *cxlPMUs[socket][p].second.counterValue[i]; } } - // std::cout << "values read: " << result.PCUCounter[0] << " " << result.PCUCounter[1] << " " << result.PCUCounter[2] << " " << result.PCUCounter[3] << "\n"; uint64 val=0; //MSR[refCore]->read(MSR_PKG_ENERGY_STATUS,&val); //std::cout << "Energy status: " << val << "\n"; diff --git a/src/cpucounters.h b/src/cpucounters.h index 27a6adb7..46925bac 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -647,6 +647,7 @@ class PCM_API PCM { CBO_PMU_ID, MDF_PMU_ID, + PCU_PMU_ID, INVALID_PMU_ID }; private: @@ -764,7 +765,7 @@ class PCM_API PCM } } - std::vector > pcuPMUs; + // TODO: gradually move other PMUs to the uncorePMUs structure std::vector > iioPMUs; std::vector > irpPMUs; std::vector uboxPMUs; @@ -1875,12 +1876,6 @@ class PCM_API PCM } return 0; } - //! \brief Returns the number of PUnits per socket - //! \return number of PUnits per socket - uint64 getPUnitsPerSocket() const - { - return (pcuPMUs.empty() == false) ? pcuPMUs[0].size() : 0; - } //! \brief Returns the number of detected integrated memory controllers per socket uint32 getMCPerSocket() const @@ -3131,17 +3126,6 @@ uint64 getEDCCounter(uint32 channel, uint32 counter, const CounterStateType & be return 0ULL; } -/*! \brief Direct read of power control unit PMU counter (counter meaning depends on the programming: power/performance/etc) - \param counter counter number - \param before CPU counter state before the experiment - \param after CPU counter state after the experiment -*/ -template -uint64 getPCUCounter(uint32 unit, uint32 counter, const CounterStateType & before, const CounterStateType & after) -{ - return after.PCUCounter[unit][counter] - before.PCUCounter[unit][counter]; -} - /*! \brief Returns clock ticks of power control unit \param before CPU counter state before the experiment \param after CPU counter state after the experiment @@ -3149,7 +3133,7 @@ uint64 getPCUCounter(uint32 unit, uint32 counter, const CounterStateType & befor template uint64 getPCUClocks(uint32 unit, const CounterStateType & before, const CounterStateType & after) { - return getPCUCounter(unit, 0, before, after); + return getUncoreCounter(PCM::PCU_PMU_ID, unit, 0, before, after); } /*! \brief Returns energy consumed by processor, excluding DRAM (measured in internal units) @@ -3406,7 +3390,6 @@ class ServerUncoreCounterState : public UncoreCounterState maxXPILinks = 6, maxIIOStacks = 16, maxCXLPorts = 6, - maxPUnits = 5, maxCounters = UncorePMU::maxCounters }; enum EventPosition @@ -3459,7 +3442,6 @@ class ServerUncoreCounterState : public UncoreCounterState std::array, maxControllers> M2MCounter; // M2M/iMC boxes x counter std::array, maxControllers> HACounter; // HA boxes x counter std::array, maxChannels> EDCCounter; // EDC controller X counter - std::array, maxPUnits> PCUCounter; std::unordered_map freeRunningCounter; int32 PackageThermalHeadroom; uint64 InvariantTSC; // invariant time stamp counter @@ -3493,8 +3475,6 @@ class ServerUncoreCounterState : public UncoreCounterState template friend uint64 getEDCCounter(uint32 channel, uint32 counter, const CounterStateType & before, const CounterStateType & after); template - friend uint64 getPCUCounter(uint32 unit, uint32 counter, const CounterStateType & before, const CounterStateType & after); - template friend uint64 getConsumedEnergy(const CounterStateType & before, const CounterStateType & after); template friend uint64 getDRAMConsumedEnergy(const CounterStateType & before, const CounterStateType & after); @@ -3522,7 +3502,6 @@ class ServerUncoreCounterState : public UncoreCounterState M2MCounter{{}}, HACounter{{}}, EDCCounter{{}}, - PCUCounter{{}}, PackageThermalHeadroom(0), InvariantTSC(0) { diff --git a/src/pcm-power.cpp b/src/pcm-power.cpp index 581e486d..b3220534 100644 --- a/src/pcm-power.cpp +++ b/src/pcm-power.cpp @@ -76,14 +76,14 @@ uint64 getPPDCycles(uint32 channel, const ServerUncoreCounterState & before, con double getNormalizedPCUCounter(uint32 unit, uint32 counter, const ServerUncoreCounterState & before, const ServerUncoreCounterState & after) { - return double(getPCUCounter(unit, counter, before, after)) / double(getPCUClocks(unit, before, after)); + return double(getUncoreCounter(PCM::PCU_PMU_ID, unit, counter, before, after)) / double(getPCUClocks(unit, before, after)); } double getNormalizedPCUCounter(uint32 unit, uint32 counter, const ServerUncoreCounterState & before, const ServerUncoreCounterState & after, PCM * m) { const uint64 PCUClocks = (m->getPCUFrequency() * getInvariantTSC(before, after)) / m->getNominalFrequency(); // cout << "PCM Debug: PCU clocks " << PCUClocks << " PCU frequency: " << m->getPCUFrequency() << "\n"; - return double(getPCUCounter(unit, counter, before, after)) / double(PCUClocks); + return double(getUncoreCounter(PCM::PCU_PMU_ID, unit, counter, before, after)) / double(PCUClocks); } int default_freq_band[3] = { 12, 20, 40 }; @@ -422,12 +422,12 @@ int mainThrows(int argc, char * argv[]) } } - for (uint32 u = 0; u < m->getPUnitsPerSocket(); ++u) + for (uint32 u = 0; u < m->getMaxNumOfUncorePMUs(PCM::PCU_PMU_ID); ++u) { auto printHeader = [&socket,&m,&u, &BeforeState, &AfterState] (const bool printPCUClocks) { cout << "S" << socket; - if (m->getPUnitsPerSocket() > 1) + if (m->getMaxNumOfUncorePMUs(PCM::PCU_PMU_ID) > 1) { cout << "U" << u; } @@ -488,10 +488,10 @@ int mainThrows(int argc, char * argv[]) break; case 5: printHeader(true); - cout << "; Frequency transition count: " << getPCUCounter(u, 1, BeforeState[socket], AfterState[socket]) << " " + cout << "; Frequency transition count: " << getUncoreCounter(PCM::PCU_PMU_ID, u, 1, BeforeState[socket], AfterState[socket]) << " " << "; Cycles spent changing frequency: " << getNormalizedPCUCounter(u, 2, BeforeState[socket], AfterState[socket], m) * 100. << " %"; if (PCM::HASWELLX == cpu_model) { - cout << "; UFS transition count: " << getPCUCounter(u, 3, BeforeState[socket], AfterState[socket]) << " "; + cout << "; UFS transition count: " << getUncoreCounter(PCM::PCU_PMU_ID, u, 3, BeforeState[socket], AfterState[socket]) << " "; cout << "; UFS transition cycles: " << getNormalizedPCUCounter(u, 0, BeforeState[socket], AfterState[socket], m) * 100. << " %"; } cout << "\n"; @@ -500,7 +500,7 @@ int mainThrows(int argc, char * argv[]) printHeader(false); if (cpu_model == PCM::HASWELLX || PCM::BDX_DE == cpu_model) cout << "; PC1e+ residency: " << getNormalizedPCUCounter(u, 0, BeforeState[socket], AfterState[socket], m) * 100. << " %" - "; PC1e+ transition count: " << getPCUCounter(u, 1, BeforeState[socket], AfterState[socket]) << " "; + "; PC1e+ transition count: " << getUncoreCounter(PCM::PCU_PMU_ID, u, 1, BeforeState[socket], AfterState[socket]) << " "; switch (cpu_model) { @@ -512,10 +512,10 @@ int mainThrows(int argc, char * argv[]) case PCM::SNOWRIDGE: case PCM::SPR: cout << "; PC2 residency: " << getPackageCStateResidency(2, BeforeState[socket], AfterState[socket]) * 100. << " %"; - cout << "; PC2 transitions: " << getPCUCounter(u, 2, BeforeState[socket], AfterState[socket]) << " "; + cout << "; PC2 transitions: " << getUncoreCounter(PCM::PCU_PMU_ID, u, 2, BeforeState[socket], AfterState[socket]) << " "; cout << "; PC3 residency: " << getPackageCStateResidency(3, BeforeState[socket], AfterState[socket]) * 100. << " %"; cout << "; PC6 residency: " << getPackageCStateResidency(6, BeforeState[socket], AfterState[socket]) * 100. << " %"; - cout << "; PC6 transitions: " << getPCUCounter(u, 3, BeforeState[socket], AfterState[socket]) << " "; + cout << "; PC6 transitions: " << getUncoreCounter(PCM::PCU_PMU_ID, u, 3, BeforeState[socket], AfterState[socket]) << " "; break; } diff --git a/src/pcm-raw.cpp b/src/pcm-raw.cpp index 67d1d873..2787a1ad 100644 --- a/src/pcm-raw.cpp +++ b/src/pcm-raw.cpp @@ -1657,9 +1657,9 @@ void printTransposed(const PCM::RawPMUConfigs& curPMUConfigs, else if (type == "pcu") { choose(outputType, - [&]() { printUncoreRows(nullptr, (uint32) m->getPUnitsPerSocket(), "P"); }, - [&]() { printUncoreRows(nullptr, (uint32) m->getPUnitsPerSocket(), type); }, - [&]() { printUncoreRows([](const uint32 u, const uint32 i, const ServerUncoreCounterState& before, const ServerUncoreCounterState& after) { return getPCUCounter(u, i, before, after); }, 1U, ""); + [&]() { printUncoreRows(nullptr, (uint32) m->getMaxNumOfUncorePMUs(PCM::PCU_PMU_ID), "P"); }, + [&]() { printUncoreRows(nullptr, (uint32) m->getMaxNumOfUncorePMUs(PCM::PCU_PMU_ID), type); }, + [&]() { printUncoreRows([](const uint32 u, const uint32 i, const ServerUncoreCounterState& before, const ServerUncoreCounterState& after) { return getUncoreCounter(PCM::PCU_PMU_ID, u, i, before, after); }, 1U, ""); }); } else if (type == "ubox") @@ -1952,7 +1952,7 @@ void print(const PCM::RawPMUConfigs& curPMUConfigs, { for (uint32 s = 0; s < m->getNumSockets(); ++s) { - for (uint32 u = 0; u < m->getPUnitsPerSocket(); ++u) + for (uint32 u = 0; u < m->getMaxNumOfUncorePMUs(PCM::PCU_PMU_ID); ++u) { int i = 0; for (auto& event : events) @@ -1960,7 +1960,7 @@ void print(const PCM::RawPMUConfigs& curPMUConfigs, choose(outputType, [s, u]() { cout << "SKT" << s << "P" << u << separator; }, [&event, &i]() { if (event.second.empty()) cout << "PCUEvent" << i << separator; else cout << event.second << separator; }, - [&]() { cout << getPCUCounter(u, i, BeforeUncoreState[s], AfterUncoreState[s]) << separator; }); + [&]() { cout << getUncoreCounter(PCM::PCU_PMU_ID, u, i, BeforeUncoreState[s], AfterUncoreState[s]) << separator; }); ++i; } } From 16e270f17c7207332ae6c6390567c069f17d779b Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Fri, 19 Jan 2024 11:00:36 +0100 Subject: [PATCH 10/16] fixes Change-Id: I7dc199499e54c3d873644b3bbf240bae8c142c35 --- src/cpucounters.h | 5 +++-- src/pcm-power.cpp | 7 ++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/cpucounters.h b/src/cpucounters.h index 46925bac..3bf36eba 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -822,11 +822,12 @@ class PCM_API PCM public: - size_t getMaxNumOfUncorePMUs(const int pmu_id) const + size_t getMaxNumOfUncorePMUs(const int pmu_id, const size_t socket = 0) const { size_t count = 0ULL; - for (auto& s : uncorePMUs) + if (socket < uncorePMUs.size()) { + const auto & s = uncorePMUs[socket]; for (auto& d : s) { const auto iter = d.find(pmu_id); diff --git a/src/pcm-power.cpp b/src/pcm-power.cpp index b3220534..a8fa7899 100644 --- a/src/pcm-power.cpp +++ b/src/pcm-power.cpp @@ -76,7 +76,12 @@ uint64 getPPDCycles(uint32 channel, const ServerUncoreCounterState & before, con double getNormalizedPCUCounter(uint32 unit, uint32 counter, const ServerUncoreCounterState & before, const ServerUncoreCounterState & after) { - return double(getUncoreCounter(PCM::PCU_PMU_ID, unit, counter, before, after)) / double(getPCUClocks(unit, before, after)); + const auto clk = getPCUClocks(unit, before, after); + if (clk) + { + return double(getUncoreCounter(PCM::PCU_PMU_ID, unit, counter, before, after)) / double(clk); + } + return -1.0; } double getNormalizedPCUCounter(uint32 unit, uint32 counter, const ServerUncoreCounterState & before, const ServerUncoreCounterState & after, PCM * m) From 6a47355e930d04db2b940174099b80159204793f Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Fri, 19 Jan 2024 11:50:59 +0100 Subject: [PATCH 11/16] refactor UBOX PMU code Change-Id: Id6ae34f8c0fb38d2243cb02a523c9c01dd107888 --- src/cpucounters.cpp | 111 ++++++++++++++++++++++++-------------------- src/cpucounters.h | 19 +------- src/pcm-raw.cpp | 4 +- 3 files changed, 65 insertions(+), 69 deletions(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index f2a8caae..be2df1ac 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -1926,22 +1926,15 @@ void PCM::initUncoreObjects() initUncorePMUsDirect(); } - auto countPMUs = [&](const uint32 s, const int pmu_id) - { - size_t count = 0; - forAllUncorePMUs(s, pmu_id, [&count](UncorePMU&) { ++count; }); - return count; - }; - - std::cerr << "Info: " << uboxPMUs.size() << " UBOX units detected.\n"; for (uint32 s = 0; s < (uint32)num_sockets; ++s) { std::cerr << "Socket " << s << ":" << - " " << countPMUs(s, PCU_PMU_ID) << " PCU units detected." + " " << getMaxNumOfUncorePMUs(PCU_PMU_ID, s) << " PCU units detected." " " << ((s < iioPMUs.size()) ? iioPMUs[s].size() : 0) << " IIO units detected." " " << ((s < irpPMUs.size()) ? irpPMUs[s].size() : 0) << " IRP units detected." - " " << countPMUs(s, CBO_PMU_ID) << " CHA/CBO units detected." - " " << countPMUs(s, MDF_PMU_ID) << " MDF units detected." + " " << getMaxNumOfUncorePMUs(CBO_PMU_ID, s) << " CHA/CBO units detected." + " " << getMaxNumOfUncorePMUs(MDF_PMU_ID, s) << " MDF units detected." + " " << getMaxNumOfUncorePMUs(UBOX_PMU_ID, s) << " UBOX units detected." " " << ((s < cxlPMUs.size()) ? cxlPMUs[s].size() : 0) << " CXL units detected." "\n"; } @@ -1996,17 +1989,20 @@ void PCM::initUncorePMUsDirect() if (IVYTOWN == cpu_model || JAKETOWN == cpu_model) { - uboxPMUs.push_back( - UncorePMU( - std::shared_ptr(), + uncorePMUs[s].resize(1); + std::vector > CounterControlRegs{ std::make_shared(handle, JKTIVT_UBOX_MSR_PMON_CTL0_ADDR), - std::make_shared(handle, JKTIVT_UBOX_MSR_PMON_CTL1_ADDR), - std::shared_ptr(), - std::shared_ptr(), + std::make_shared(handle, JKTIVT_UBOX_MSR_PMON_CTL1_ADDR) + }, + CounterValueRegs{ std::make_shared(handle, JKTIVT_UBOX_MSR_PMON_CTR0_ADDR), - std::make_shared(handle, JKTIVT_UBOX_MSR_PMON_CTR1_ADDR), - std::shared_ptr(), + std::make_shared(handle, JKTIVT_UBOX_MSR_PMON_CTR1_ADDR) + }; + uncorePMUs[s][0][UBOX_PMU_ID].push_back( + std::make_shared( std::shared_ptr(), + CounterControlRegs, + CounterValueRegs, std::make_shared(handle, JKTIVT_UCLK_FIXED_CTL_ADDR), std::make_shared(handle, JKTIVT_UCLK_FIXED_CTR_ADDR) ) @@ -2014,17 +2010,20 @@ void PCM::initUncorePMUsDirect() } else if (SPR == cpu_model) { - uboxPMUs.push_back( - UncorePMU( - std::make_shared(handle, SPR_UBOX_MSR_PMON_BOX_CTL_ADDR), + uncorePMUs[s].resize(1); + std::vector > CounterControlRegs{ std::make_shared(handle, SPR_UBOX_MSR_PMON_CTL0_ADDR), - std::make_shared(handle, SPR_UBOX_MSR_PMON_CTL1_ADDR), - std::shared_ptr(), - std::shared_ptr(), + std::make_shared(handle, SPR_UBOX_MSR_PMON_CTL1_ADDR) + }, + CounterValueRegs{ std::make_shared(handle, SPR_UBOX_MSR_PMON_CTR0_ADDR), - std::make_shared(handle, SPR_UBOX_MSR_PMON_CTR1_ADDR), - std::shared_ptr(), - std::shared_ptr(), + std::make_shared(handle, SPR_UBOX_MSR_PMON_CTR1_ADDR) + }; + uncorePMUs[s][0][UBOX_PMU_ID].push_back( + std::make_shared( + std::make_shared(handle, SPR_UBOX_MSR_PMON_BOX_CTL_ADDR), + CounterControlRegs, + CounterValueRegs, std::make_shared(handle, SPR_UCLK_FIXED_CTL_ADDR), std::make_shared(handle, SPR_UCLK_FIXED_CTR_ADDR) ) @@ -2032,17 +2031,20 @@ void PCM::initUncorePMUsDirect() } else if (isServerCPU() && hasPCICFGUncore()) { - uboxPMUs.push_back( - UncorePMU( - std::shared_ptr(), + uncorePMUs[s].resize(1); + std::vector > CounterControlRegs{ std::make_shared(handle, UBOX_MSR_PMON_CTL0_ADDR), std::make_shared(handle, UBOX_MSR_PMON_CTL1_ADDR), - std::shared_ptr(), - std::shared_ptr(), + }, + CounterValueRegs{ std::make_shared(handle, UBOX_MSR_PMON_CTR0_ADDR), std::make_shared(handle, UBOX_MSR_PMON_CTR1_ADDR), + }; + uncorePMUs[s][0][UBOX_PMU_ID].push_back( + std::make_shared( std::shared_ptr(), - std::shared_ptr(), + CounterControlRegs, + CounterValueRegs, std::make_shared(handle, UCLK_FIXED_CTL_ADDR), std::make_shared(handle, UCLK_FIXED_CTR_ADDR) ) @@ -2554,7 +2556,7 @@ void PCM::initUncorePMUsPerf() { uncorePMUs[s].resize(1); populatePerfPMUs(s, enumeratePerfPMUs("pcu", 100), uncorePMUs[s][0][PCU_PMU_ID], false, true); - populatePerfPMUs(s, enumeratePerfPMUs("ubox", 100), uboxPMUs, true); + populatePerfPMUs(s, enumeratePerfPMUs("ubox", 100), uncorePMUs[s][0][UBOX_PMU_ID], true); populatePerfPMUs(s, enumeratePerfPMUs("cbox", 100), uncorePMUs[s][0][CBO_PMU_ID], false, true, true); populatePerfPMUs(s, enumeratePerfPMUs("cha", 200), uncorePMUs[s][0][CBO_PMU_ID], false, true, true); populatePerfPMUs(s, enumeratePerfPMUs("mdf", 200), uncorePMUs[s][0][MDF_PMU_ID], false, true, true); @@ -6590,11 +6592,9 @@ ServerUncoreCounterState PCM::getServerUncoreCounterState(uint32 socket) } } } - for (int i = 0; i < 2 && socket < uboxPMUs.size(); ++i) - { - result.UBOXCounter[i] = *(uboxPMUs[socket].counterValue[i]); - result.UncClocks = getUncoreClocks(socket); - } + + readUncoreCounterValues(result, socket, UBOX_PMU_ID); + result.UncClocks = getUncoreClocks(socket); readUncoreCounterValues(result, socket, PCU_PMU_ID); @@ -9347,19 +9347,17 @@ void PCM::programMDF(const uint64* events) void PCM::programUBOX(const uint64* events) { - for (size_t s = 0; (s < uboxPMUs.size()) && MSR.size(); ++s) + programUncorePMUs(UBOX_PMU_ID, [&events](UncorePMU& pmu) { - uint32 refCore = socketRefCore[s]; - TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux - uboxPMUs[s].initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN); + pmu.initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN); - *uboxPMUs[s].fixedCounterControl = UCLK_FIXED_CTL_EN; + *pmu.fixedCounterControl = UCLK_FIXED_CTL_EN; if (events) { - PCM::program(uboxPMUs[s], events, events + 2, 0); + PCM::program(pmu, events, events + 2, 0); } - } + }); } void PCM::controlQATTelemetry(uint32 dev, uint32 operation) @@ -9577,12 +9575,25 @@ uint64 PCM::getUncoreCounterState(const int pmu_id, const size_t socket, const u return result; } -uint64 PCM::getUncoreClocks(const uint32 socket_) +uint64 PCM::getUncoreClocks(const uint32 socket_id) { uint64 result = 0; - if (socket_ < uboxPMUs.size()) + if (socket_id < uncorePMUs.size()) { - result = *uboxPMUs[socket_].fixedCounterValue; + for (auto& d : uncorePMUs[socket_id]) + { + const auto iter = d.find(UBOX_PMU_ID); + if (iter != d.end()) + { + for (auto& pmu : iter->second) + { + if (pmu.get()) + { + result += *pmu->fixedCounterValue; + } + } + } + } } return result; } diff --git a/src/cpucounters.h b/src/cpucounters.h index 3bf36eba..c386a54b 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -648,6 +648,7 @@ class PCM_API PCM CBO_PMU_ID, MDF_PMU_ID, PCU_PMU_ID, + UBOX_PMU_ID, INVALID_PMU_ID }; private: @@ -768,7 +769,6 @@ class PCM_API PCM // TODO: gradually move other PMUs to the uncorePMUs structure std::vector > iioPMUs; std::vector > irpPMUs; - std::vector uboxPMUs; std::vector > idxPMUs; double joulesPerEnergyUnit; @@ -2425,7 +2425,7 @@ class PCM_API PCM bool uncoreFrequencyMetricAvailable() const { - return MSR.empty() == false && uboxPMUs.size() == getNumSockets() && getNumCores() == getNumOnlineCores(); + return MSR.empty() == false && getMaxNumOfUncorePMUs(UBOX_PMU_ID) > 0ULL && getNumCores() == getNumOnlineCores(); } bool LatencyMetricsAvailable() const @@ -3042,17 +3042,6 @@ uint64 getUncoreCounter(const int pmu_id, uint32 unit, uint32 counter, const Cou return 0ULL; } -/*! \brief Direct read of UBOX PMU counter (counter meaning depends on the programming: power/performance/etc) - \param counter counter number - \param before CPU counter state before the experiment - \param after CPU counter state after the experiment -*/ -template -uint64 getUBOXCounter(uint32 counter, const CounterStateType& before, const CounterStateType& after) -{ - return after.UBOXCounter[counter] - before.UBOXCounter[counter]; -} - /*! \brief Direct read of IIO PMU counter (counter meaning depends on the programming: power/performance/etc) \param counter counter number \param stack IIO stack number @@ -3436,7 +3425,6 @@ class ServerUncoreCounterState : public UncoreCounterState std::array, maxIIOStacks> IRPCounter; std::array, maxCXLPorts> CXLCMCounter; std::array, maxCXLPorts> CXLDPCounter; - std::array UBOXCounter; std::array DRAMClocks; std::array HBMClocks; std::array, maxChannels> MCCounter; // channel X counter @@ -3462,8 +3450,6 @@ class ServerUncoreCounterState : public UncoreCounterState template friend uint64 getUncoreCounter(const int pmu_id, uint32 unit, uint32 counter, const CounterStateType& before, const CounterStateType& after); template - friend uint64 getUBOXCounter(uint32 counter, const CounterStateType& before, const CounterStateType& after); - template friend uint64 getIIOCounter(uint32 stack, uint32 counter, const CounterStateType& before, const CounterStateType& after); template friend uint64 getIRPCounter(uint32 stack, uint32 counter, const CounterStateType& before, const CounterStateType& after); @@ -3496,7 +3482,6 @@ class ServerUncoreCounterState : public UncoreCounterState IRPCounter{{}}, CXLCMCounter{{}}, CXLDPCounter{{}}, - UBOXCounter{{}}, DRAMClocks{{}}, HBMClocks{{}}, MCCounter{{}}, diff --git a/src/pcm-raw.cpp b/src/pcm-raw.cpp index 2787a1ad..47b9f78c 100644 --- a/src/pcm-raw.cpp +++ b/src/pcm-raw.cpp @@ -1667,7 +1667,7 @@ void printTransposed(const PCM::RawPMUConfigs& curPMUConfigs, choose(outputType, [&]() { printUncoreRows(nullptr, 1U, ""); }, [&]() { printUncoreRows(nullptr, 1U, type); }, - [&]() { printUncoreRows([](const uint32, const uint32 i, const ServerUncoreCounterState& before, const ServerUncoreCounterState& after) { return getUBOXCounter(i, before, after); }, 1U, + [&]() { printUncoreRows([](const uint32, const uint32 i, const ServerUncoreCounterState& before, const ServerUncoreCounterState& after) { return getUncoreCounter(PCM::UBOX_PMU_ID, 0, i, before, after); }, 1U, "UncoreClocks", [](const uint32, const ServerUncoreCounterState& before, const ServerUncoreCounterState& after) { return getUncoreClocks(before, after); }); }); } @@ -2037,7 +2037,7 @@ void print(const PCM::RawPMUConfigs& curPMUConfigs, choose(outputType, [s]() { cout << "SKT" << s << separator; }, [&event, &i]() { if (event.second.empty()) cout << "UBOXEvent" << i << separator; else cout << event.second << separator; }, - [&]() { cout << getUBOXCounter(i, BeforeUncoreState[s], AfterUncoreState[s]) << separator; }); + [&]() { cout << getUncoreCounter(PCM::UBOX_PMU_ID, 0, i, BeforeUncoreState[s], AfterUncoreState[s]) << separator; }); ++i; } } From 70940483f01ba57bbe1885ce2fb4a61439b8363c Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Fri, 19 Jan 2024 12:04:34 +0100 Subject: [PATCH 12/16] drop unused function Change-Id: I8d90f19c82ef8ca86a990d574808dfe0f083e252 --- src/cpucounters.cpp | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index be2df1ac..42309195 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -2051,36 +2051,6 @@ void PCM::initUncorePMUsDirect() ); } - auto addPMUsFromDiscovery = [this, &handle, &s](std::vector & out, const unsigned int pmuType, const int filter0 = -1) - { - if (uncorePMUDiscovery.get()) - { - for (size_t box = 0; box < uncorePMUDiscovery->getNumBoxes(pmuType, s); ++box) - { - if (uncorePMUDiscovery->getBoxAccessType(pmuType, s, box) == UncorePMUDiscovery::accessTypeEnum::MSR - && uncorePMUDiscovery->getBoxNumRegs(pmuType, s, box) >= 4) - { - out.push_back( - UncorePMU( - std::make_shared(handle, uncorePMUDiscovery->getBoxCtlAddr(pmuType, s, box)), - std::make_shared(handle, uncorePMUDiscovery->getBoxCtlAddr(pmuType, s, box, 0)), - std::make_shared(handle, uncorePMUDiscovery->getBoxCtlAddr(pmuType, s, box, 1)), - std::make_shared(handle, uncorePMUDiscovery->getBoxCtlAddr(pmuType, s, box, 2)), - std::make_shared(handle, uncorePMUDiscovery->getBoxCtlAddr(pmuType, s, box, 3)), - std::make_shared(handle, uncorePMUDiscovery->getBoxCtrAddr(pmuType, s, box, 0)), - std::make_shared(handle, uncorePMUDiscovery->getBoxCtrAddr(pmuType, s, box, 1)), - std::make_shared(handle, uncorePMUDiscovery->getBoxCtrAddr(pmuType, s, box, 2)), - std::make_shared(handle, uncorePMUDiscovery->getBoxCtrAddr(pmuType, s, box, 3)), - std::shared_ptr(), - std::shared_ptr(), - (filter0 < 0) ? std::shared_ptr() : std::make_shared(handle, uncorePMUDiscovery->getBoxCtlAddr(pmuType, s, box) + filter0) // filters not supported by discovery - ) - ); - } - } - } - }; - auto addPMUsFromDiscoveryRef = [this, &handle, &s](std::vector& out, const unsigned int pmuType, const int filter0 = -1) { if (uncorePMUDiscovery.get()) From ff3a34bd7b1c296b8fa19bf706b0c7c4aab438db Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Fri, 19 Jan 2024 12:29:22 +0100 Subject: [PATCH 13/16] fix llc slice display Change-Id: Ibb4b4ea153914949c35b6bea4c4516de8e99f90c --- src/cpucounters.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 42309195..da0305d4 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -1502,7 +1502,8 @@ void PCM::printSystemTopology() const if (hybrid == false) { - std::cerr << "Last level cache slices per socket: " << getMaxNumOfUncorePMUs(CBO_PMU_ID) << "\n"; + // TODO: deprecate this output and move it to uncore PMU section (use getMaxNumOfUncorePMUs(CBO_PMU_ID) ) + std::cerr << "Last level cache slices per socket: " << getMaxNumOfCBoxesInternal() << "\n"; } std::cerr << "Core PMU (perfmon) version: " << perfmon_version << "\n"; std::cerr << "Number of core PMU generic (programmable) counters: " << core_gen_counter_num_max << "\n"; From 5f61cdd7908a6f6725cf0eca20b66e3a09f8dbdf Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Fri, 19 Jan 2024 15:58:32 +0100 Subject: [PATCH 14/16] code re-formatting Change-Id: Ibda03e41f5f61e87ae4b92c4841a2ff183e76802 --- src/cpucounters.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index da0305d4..01ec9f6e 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -1991,6 +1991,7 @@ void PCM::initUncorePMUsDirect() if (IVYTOWN == cpu_model || JAKETOWN == cpu_model) { uncorePMUs[s].resize(1); + { std::vector > CounterControlRegs{ std::make_shared(handle, JKTIVT_UBOX_MSR_PMON_CTL0_ADDR), std::make_shared(handle, JKTIVT_UBOX_MSR_PMON_CTL1_ADDR) @@ -2008,10 +2009,12 @@ void PCM::initUncorePMUsDirect() std::make_shared(handle, JKTIVT_UCLK_FIXED_CTR_ADDR) ) ); + } } else if (SPR == cpu_model) { uncorePMUs[s].resize(1); + { std::vector > CounterControlRegs{ std::make_shared(handle, SPR_UBOX_MSR_PMON_CTL0_ADDR), std::make_shared(handle, SPR_UBOX_MSR_PMON_CTL1_ADDR) @@ -2029,10 +2032,12 @@ void PCM::initUncorePMUsDirect() std::make_shared(handle, SPR_UCLK_FIXED_CTR_ADDR) ) ); + } } else if (isServerCPU() && hasPCICFGUncore()) { uncorePMUs[s].resize(1); + { std::vector > CounterControlRegs{ std::make_shared(handle, UBOX_MSR_PMON_CTL0_ADDR), std::make_shared(handle, UBOX_MSR_PMON_CTL1_ADDR), @@ -2050,6 +2055,7 @@ void PCM::initUncorePMUsDirect() std::make_shared(handle, UCLK_FIXED_CTR_ADDR) ) ); + } } auto addPMUsFromDiscoveryRef = [this, &handle, &s](std::vector& out, const unsigned int pmuType, const int filter0 = -1) From d6191f76370b3ebd5daa2bf4f56386345bcef49b Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Fri, 19 Jan 2024 17:20:43 +0100 Subject: [PATCH 15/16] add EMR support Change-Id: I28f96032de0a656949b7d8e02e1559509c2cd6c7 Co-authored-by: Alexander Antonov Co-authored-by: Dementiev, Roman --- .../GenuineIntel-6-CF-1.json | 144 ++++++++++ src/cpucounters.cpp | 74 ++++-- src/cpucounters.h | 27 +- src/opCode-207.txt | 45 ++++ src/pcm-iio.cpp | 247 +++++++++++++----- src/pcm-memory.cpp | 28 +- src/pcm-pcie.cpp | 1 + src/pcm-power.cpp | 5 +- src/pcm-tsx.cpp | 1 + 9 files changed, 474 insertions(+), 98 deletions(-) create mode 100644 src/PMURegisterDeclarations/GenuineIntel-6-CF-1.json create mode 100644 src/opCode-207.txt diff --git a/src/PMURegisterDeclarations/GenuineIntel-6-CF-1.json b/src/PMURegisterDeclarations/GenuineIntel-6-CF-1.json new file mode 100644 index 00000000..60f71ca6 --- /dev/null +++ b/src/PMURegisterDeclarations/GenuineIntel-6-CF-1.json @@ -0,0 +1,144 @@ +{ + "core" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "User": {"Config": 0, "Position": 16, "Width": 1, "DefaultValue": 1}, + "OS": {"Config": 0, "Position": 17, "Width": 1, "DefaultValue": 1}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1}, + "PinControl": {"Config": 0, "Position": 19, "Width": 1, "DefaultValue": 0}, + "APICInt": {"Config": 0, "Position": 20, "Width": 1, "DefaultValue": 0}, + "Enable": {"Config": 0, "Position": 22, "Width": 1, "DefaultValue": 1}, + "Invert": {"Config": 0, "Position": 23, "Width": 1}, + "CounterMask": {"Config": 0, "Position": 24, "Width": 8}, + "InTX": {"Config": 0, "Position": 32, "Width": 1, "DefaultValue": 0}, + "InTXCheckpointed": {"Config": 0, "Position": 33, "Width": 1, "DefaultValue": 0}, + "MSRIndex": { + "0x1a6" : {"Config": 1, "Position": 0, "Width": 64}, + "0x1a7" : {"Config": 2, "Position": 0, "Width": 64}, + "0x3f6" : {"Config": 3, "Position": 0, "Width": 64}, + "0x3f7" : {"Config": 4, "Position": 0, "Width": 64} + } + }, + "fixed0" : { + "OS": {"Config": 0, "Position": 0, "Width": 1, "DefaultValue": 1}, + "User": {"Config": 0, "Position": 1, "Width": 1, "DefaultValue": 1}, + "EnablePMI": {"Config": 0, "Position": 3, "Width": 1, "DefaultValue": 0}, + "EventCode": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "UMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "EdgeDetect": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "Invert": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "CounterMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"} + }, + "fixed1" : { + "OS": {"Config": 0, "Position": 4, "Width": 1, "DefaultValue": 1}, + "User": {"Config": 0, "Position": 5, "Width": 1, "DefaultValue": 1}, + "EnablePMI": {"Config": 0, "Position": 7, "Width": 1, "DefaultValue": 0}, + "EventCode": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "UMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "EdgeDetect": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "Invert": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "CounterMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"} + }, + "fixed2" : { + "OS": {"Config": 0, "Position": 8, "Width": 1, "DefaultValue": 1}, + "User": {"Config": 0, "Position": 9, "Width": 1, "DefaultValue": 1}, + "EnablePMI": {"Config": 0, "Position": 11, "Width": 1, "DefaultValue": 0}, + "EventCode": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "UMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "EdgeDetect": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "Invert": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "CounterMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"} + }, + "fixed3" : { + "OS": {"Config": 0, "Position": 12, "Width": 1, "DefaultValue": 1}, + "User": {"Config": 0, "Position": 13, "Width": 1, "DefaultValue": 1}, + "EnablePMI": {"Config": 0, "Position": 15, "Width": 1, "DefaultValue": 0}, + "EventCode": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "UMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "EdgeDetect": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "Invert": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "CounterMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "PerfMetrics": {"Config": 2, "Position": 0, "Width": 1, "DefaultValue": 0, "__comment": "fake field to tell the collector to also print the L1 top-down metrics, not just raw slots count"} + } + }, + "cha" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "TIDEnable": {"Config": 0, "Position": 16, "Width": 1, "DefaultValue": 0}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}, + "UMaskExt": {"Config": 0, "Position": 32, "Width": 26}, + "TID": {"Config": 1, "Position": 0, "Width": 10, "DefaultValue": 0} + } + }, + "imc" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, + "xpi" : { + "__comment" : "this is for UPI LL and QPI LL uncore PMUs", + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}, + "UMaskExt": {"Config": 0, "Position": 32, "Width": 24} + } + }, + "m2m" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}, + "UMaskExt": {"Config": 0, "Position": 32, "Width": 8} + } + }, + "m3upi" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, + "mdf" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, + "irp" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, + "pcu" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0} + } + }, + "iio" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 12, "DefaultValue": 0}, + "PortMask": {"Config": 0, "Position": 36, "Width": 12}, + "FCMask": {"Config": 0, "Position": 48, "Width": 3} + } + } +} diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 01ec9f6e..a937ccff 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -721,6 +721,7 @@ void PCM::initCStateSupportTables() case SKX: case ICX: case SPR: + case EMR: PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0x60D, 0, 0, 0, 0x3F9, 0, 0, 0, 0}) ); case HASWELL_ULT: case BROADWELL: @@ -771,6 +772,7 @@ void PCM::initCStateSupportTables() case SNOWRIDGE: case ICX: case SPR: + case EMR: PCM_CSTATE_ARRAY(coreCStateMsr, PCM_PARAM_PROTECT({0, 0, 0, 0x3FC, 0, 0, 0x3FD, 0x3FE, 0, 0, 0}) ); case KNL: PCM_CSTATE_ARRAY(coreCStateMsr, PCM_PARAM_PROTECT({0, 0, 0, 0, 0, 0, 0x3FF, 0, 0, 0, 0}) ); @@ -1596,6 +1598,7 @@ bool PCM::detectNominalFrequency() || cpu_model == SKX || cpu_model == ICX || cpu_model == SPR + || cpu_model == EMR ) ? (100000000ULL) : (133333333ULL); nominal_frequency = ((freq >> 8) & 255) * bus_freq; @@ -1884,7 +1887,7 @@ void PCM::initUncoreObjects() #endif } } - if (cpu_model == ICX || cpu_model == SNOWRIDGE || cpu_model == SPR) + if (cpu_model == ICX || cpu_model == SNOWRIDGE || cpu_model == SPR || cpu_model == EMR) { bool failed = false; try @@ -1961,6 +1964,7 @@ void PCM::globalFreezeUncoreCountersInternal(const unsigned long long int freeze switch (cpu_model) { case SPR: + case EMR: handle->write(SPR_MSR_UNCORE_PMON_GLOBAL_CTL, freeze); break; case SKX: @@ -2011,7 +2015,7 @@ void PCM::initUncorePMUsDirect() ); } } - else if (SPR == cpu_model) + else if (SPR == cpu_model || EMR == cpu_model) { uncorePMUs[s].resize(1); { @@ -2135,6 +2139,7 @@ void PCM::initUncorePMUsDirect() ); break; case SPR: + case EMR: uncorePMUs[s].resize(1); addPMUsFromDiscoveryRef(uncorePMUs[s][0][PCU_PMU_ID], SPR_PCU_BOX_TYPE, 0xE); if (uncorePMUs[s][0][PCU_PMU_ID].empty()) @@ -2148,6 +2153,7 @@ void PCM::initUncorePMUsDirect() switch (cpu_model) { case SPR: + case EMR: uncorePMUs[s].resize(1); addPMUsFromDiscoveryRef(uncorePMUs[s][0][MDF_PMU_ID], SPR_MDF_BOX_TYPE); if (uncorePMUs[s][0][MDF_PMU_ID].empty()) @@ -2160,7 +2166,7 @@ void PCM::initUncorePMUsDirect() // init IIO addresses iioPMUs.resize(num_sockets); - switch (getCPUModel()) + switch (cpu_model) { case PCM::SKX: for (uint32 s = 0; s < (uint32)num_sockets; ++s) @@ -2222,7 +2228,9 @@ void PCM::initUncorePMUsDirect() } } break; + case PCM::SPR: + case PCM::EMR: for (uint32 s = 0; s < (uint32)num_sockets; ++s) { auto & handle = MSR[socketRefCore[s]]; @@ -2415,6 +2423,7 @@ void PCM::initUncorePMUsDirect() IRP_UNIT_CTL = SNR_IRP_UNIT_CTL; break; case SPR: + case EMR: irpStacks = SPR_M2IOSF_NUM; IRP_CTL_REG_OFFSET = SPR_IRP_CTL_REG_OFFSET; IRP_CTR_REG_OFFSET = SPR_IRP_CTR_REG_OFFSET; @@ -2500,7 +2509,7 @@ void PCM::initUncorePMUsDirect() } return UncorePMU(std::make_shared(handle, unitControlAddr - unitControlAddrAligned), CounterControlRegs, CounterValueRegs); }; - if (getCPUModel() == PCM::SPR) + if (getCPUModel() == PCM::SPR || getCPUModel() == PCM::EMR) { const auto n_units = (std::min)(uncorePMUDiscovery->getNumBoxes(SPR_CXLCM_BOX_TYPE, s), uncorePMUDiscovery->getNumBoxes(SPR_CXLDP_BOX_TYPE, s)); @@ -2970,6 +2979,7 @@ bool PCM::isCPUModelSupported(const int model_) || model_ == SKX || model_ == ICX || model_ == SPR + || model_ == EMR ); } @@ -3260,6 +3270,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter case SKX: case ICX: case SPR: + case EMR: assert(useSkylakeEvents()); coreEventDesc[0].event_number = SKL_MEM_LOAD_RETIRED_L3_MISS_EVTNR; coreEventDesc[0].umask_value = SKL_MEM_LOAD_RETIRED_L3_MISS_UMASK; @@ -4547,6 +4558,8 @@ const char * PCM::getUArchCodename(const int32 cpu_model_param) const return "Icelake-SP"; case SPR: return "Sapphire Rapids-SP"; + case EMR: + return "Emerald Rapids-SP"; } return "unknown"; } @@ -5223,7 +5236,7 @@ PCM::ErrorCode PCM::programServerUncoreLatencyMetrics(bool enable_pmm) if (enable_pmm == false) { //DDR is false - if (ICX == cpu_model || SPR == cpu_model) + if (ICX == cpu_model || SPR == cpu_model || EMR == cpu_model) { DDRConfig[0] = MC_CH_PCI_PMON_CTL_EVENT(0x80) + MC_CH_PCI_PMON_CTL_UMASK(1); // DRAM RPQ occupancy DDRConfig[1] = MC_CH_PCI_PMON_CTL_EVENT(0x10) + MC_CH_PCI_PMON_CTL_UMASK(1); // DRAM RPQ Insert @@ -5276,6 +5289,7 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof switch (cpu_model) { case SPR: + case EMR: PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(1); // clock ticks break; default: @@ -5293,6 +5307,7 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof switch (cpu_model) { case SPR: + case EMR: PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x35); // POWER_STATE_OCCUPANCY.C0 PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x36); // POWER_STATE_OCCUPANCY.C3 PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x37); // POWER_STATE_OCCUPANCY.C6 @@ -5311,9 +5326,9 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof case 3: PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x04); // Thermal frequency limit cycles: FREQ_MAX_LIMIT_THERMAL_CYCLES PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x05); // Power frequency limit cycles: FREQ_MAX_POWER_CYCLES - PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX,ICX,SNOWRIDGE,SPR) + PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR) break; - case 4: // not supported on SKX, ICX, SNOWRIDGE, SPR + case 4: // not supported on SKX, ICX, SNOWRIDGE, SPR, EMR PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x06); // OS frequency limit cycles: FREQ_MAX_OS_CYCLES PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x05); // Power frequency limit cycles: FREQ_MAX_POWER_CYCLES PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX and ICX and SNOWRIDGE) @@ -5327,7 +5342,8 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof { PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x60) + PCU_MSR_PMON_CTL_EDGE_DET ; // number of frequency transitions PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x60) ; // cycles spent changing frequency: FREQ_TRANS_CYCLES - } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model || ICX == cpu_model || SNOWRIDGE == cpu_model || SPR == cpu_model) + } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model + || ICX == cpu_model || SNOWRIDGE == cpu_model || SPR == cpu_model || EMR == cpu_model) { PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x74) + PCU_MSR_PMON_CTL_EDGE_DET ; // number of frequency transitions PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x74) ; // cycles spent changing frequency: FREQ_TRANS_CYCLES @@ -5346,10 +5362,10 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof { PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x2B) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC2 transitions PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x2D) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC6 transitions - } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model || ICX == cpu_model || SNOWRIDGE == cpu_model || SPR == cpu_model) + } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model || ICX == cpu_model || SNOWRIDGE == cpu_model || SPR == cpu_model || EMR == cpu_model) { - PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x4E) ; // PC1e residenicies (not supported on SKX,ICX,SNOWRIDGE,SPR) - PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x4E) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC1 transitions (not supported on SKX,ICX,SNOWRIDGE,SPR) + PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x4E) ; // PC1e residenicies (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR) + PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x4E) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC1 transitions (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR) PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x2B) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC2 transitions PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x2D) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC6 transitions } else @@ -6990,7 +7006,7 @@ void ServerUncorePMUs::initRegisterLocations(const PCM * pcm) PCM_PCICFG_M2M_INIT(2, SERVER) PCM_PCICFG_M2M_INIT(3, SERVER) } - else if (cpu_model == PCM::SPR) + else if (cpu_model == PCM::SPR || cpu_model == PCM::EMR) { PCM_PCICFG_QPI_INIT(0, SPR); PCM_PCICFG_QPI_INIT(1, SPR); @@ -7234,7 +7250,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) for (auto & handle : m2mHandles) { - if (cpu_model == PCM::ICX || cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::SPR) + if (cpu_model == PCM::ICX || cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::SPR || cpu_model == PCM::EMR) { m2mPMUs.push_back( UncorePMU( @@ -7272,7 +7288,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) populateM2MPMUs(groupnr, M2Mbus, cpu_model, HBM_M2MRegisterLocation, hbm_m2mPMUs); int numChannels = 0; - if (cpu_model == PCM::SPR) + if (cpu_model == PCM::SPR || cpu_model == PCM::EMR) { numChannels = 3; } @@ -7398,6 +7414,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) { case PCM::ICX: case PCM::SPR: + case PCM::EMR: m3upiPMUs.push_back( UncorePMU( std::make_shared(handle, ICX_M3UPI_PCI_PMON_BOX_CTL_ADDR), @@ -7542,7 +7559,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) ) ); } - else if (cpu_model == PCM::SPR) + else if (cpu_model == PCM::SPR || cpu_model == PCM::EMR) { xpiPMUs.push_back( UncorePMU( @@ -8056,6 +8073,7 @@ void ServerUncorePMUs::programServerUncoreMemoryMetrics(const ServerUncoreMemory } break; case PCM::SPR: + case PCM::EMR: { EDCCntConfig[EventPosition::READ] = MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xcf); // monitor reads on counter 0: CAS_COUNT.RD EDCCntConfig[EventPosition::WRITE] = MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xf0); // monitor writes on counter 1: CAS_COUNT.WR @@ -8130,6 +8148,7 @@ void ServerUncorePMUs::program() MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(0x30); // monitor writes on counter 1: CAS_COUNT.WR break; case PCM::SPR: + case PCM::EMR: EDCCntConfig[EventPosition::READ] = MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xcf); // monitor reads on counter 0: CAS_COUNT.RD EDCCntConfig[EventPosition::WRITE] = MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xf0); // monitor writes on counter 1: CAS_COUNT.WR break; @@ -8359,6 +8378,7 @@ void ServerUncorePMUs::program_power_metrics(int mc_profile) case PCM::ICX: case PCM::SNOWRIDGE: case PCM::SPR: + case PCM::EMR: UNC_M_POWER_CKE_CYCLES = 0x47; break; } @@ -8448,6 +8468,7 @@ void ServerUncorePMUs::programM2M() switch (cpu_model) { case PCM::SPR: + case PCM::EMR: cfg[EventPosition::M2M_CLOCKTICKS] = M2M_PCI_PMON_CTL_EVENT(0x01); // CLOCKTICKS cfg[EventPosition::PMM_READ] = M2M_PCI_PMON_CTL_EVENT(0x24) + M2M_PCI_PMON_CTL_UMASK(0x20) + UNC_PMON_CTL_UMASK_EXT(0x03); // UNC_M2M_IMC_READS.TO_PMM cfg[EventPosition::PMM_WRITE] = M2M_PCI_PMON_CTL_EVENT(0x25) + M2M_PCI_PMON_CTL_UMASK(0x80) + UNC_PMON_CTL_UMASK_EXT(0x18); // UNC_M2M_IMC_WRITES.TO_PMM @@ -8879,7 +8900,7 @@ uint64 PCM::CX_MSR_PMON_CTRY(uint32 Cbo, uint32 Ctr) const { return CX_MSR_PMON_BOX_CTL(Cbo) + SERVER_CHA_MSR_PMON_CTR0_OFFSET + Ctr; } - else if (SPR == cpu_model) + else if (SPR == cpu_model || EMR == cpu_model) { return SPR_CHA0_MSR_PMON_CTR0 + SPR_CHA_MSR_STEP * Cbo + Ctr; } @@ -8903,7 +8924,7 @@ uint64 PCM::CX_MSR_PMON_BOX_FILTER(uint32 Cbo) const { return CX_MSR_PMON_BOX_CTL(Cbo) + SERVER_CHA_MSR_PMON_BOX_FILTER_OFFSET; } - else if (SPR == cpu_model) + else if (SPR == cpu_model || EMR == cpu_model) { return SPR_CHA0_MSR_PMON_BOX_FILTER + SPR_CHA_MSR_STEP * Cbo; } @@ -8937,7 +8958,7 @@ uint64 PCM::CX_MSR_PMON_CTLY(uint32 Cbo, uint32 Ctl) const { return CX_MSR_PMON_BOX_CTL(Cbo) + SERVER_CHA_MSR_PMON_CTL0_OFFSET + Ctl; } - else if (SPR == cpu_model) + else if (SPR == cpu_model || EMR == cpu_model) { return SPR_CHA0_MSR_PMON_CTL0 + SPR_CHA_MSR_STEP * Cbo + Ctl; } @@ -8961,7 +8982,7 @@ uint64 PCM::CX_MSR_PMON_BOX_CTL(uint32 Cbo) const { return ICX_CHA_MSR_PMON_BOX_CTL[Cbo]; } - else if (SPR == cpu_model) + else if (SPR == cpu_model || EMR == cpu_model) { return SPR_CHA0_MSR_PMON_BOX_CTRL + SPR_CHA_MSR_STEP * Cbo; } @@ -9036,6 +9057,7 @@ uint32 PCM::getMaxNumOfCBoxesInternal() const switch (cpu_model) { case SPR: + case EMR: try { PciHandleType * h = getDeviceHandle(PCM_INTEL_PCI_VENDOR_ID, 0x325b); if (h) @@ -9141,6 +9163,7 @@ void PCM::programIIOCounters(uint64 rawEvents[4], int IIOStack) switch (getCPUModel()) { case PCM::SPR: + case PCM::EMR: stacks_count = SPR_M2IOSF_NUM; break; case PCM::ICX: @@ -9226,6 +9249,7 @@ void PCM::programPCIeEventGroup(eventGroup_t &eventGroup) switch (cpu_model) { case PCM::SPR: + case PCM::EMR: case PCM::ICX: case PCM::SNOWRIDGE: for (uint32 idx = 0; idx < eventGroup.size(); ++idx) @@ -9270,7 +9294,7 @@ void PCM::programCbo(const uint64 * events, const uint32 opCode, const uint32 nc { pmu.initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN); - if (ICX != cpu_model && SNOWRIDGE != cpu_model && SPR != cpu_model) + if (ICX != cpu_model && SNOWRIDGE != cpu_model && SPR != cpu_model && EMR != cpu_model) programCboOpcodeFilter(opCode, pmu, nc_, 0, loc, rem); if ((HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model) && llc_lookup_tid_filter != 0) @@ -9516,6 +9540,7 @@ bool PCM::supportIDXAccelDev() const switch (this->getCPUModel()) { case PCM::SPR: + case PCM::EMR: retval = true; break; @@ -9746,12 +9771,12 @@ void UncorePMU::cleanup() void UncorePMU::freeze(const uint32 extra) { - *unitControl = (getCPUModel() == PCM::SPR) ? SPR_UNC_PMON_UNIT_CTL_FRZ : (extra + UNC_PMON_UNIT_CTL_FRZ); + *unitControl = (getCPUModel() == PCM::SPR || getCPUModel() == PCM::EMR) ? SPR_UNC_PMON_UNIT_CTL_FRZ : (extra + UNC_PMON_UNIT_CTL_FRZ); } void UncorePMU::unfreeze(const uint32 extra) { - *unitControl = (getCPUModel() == PCM::SPR) ? 0 : extra; + *unitControl = (getCPUModel() == PCM::SPR || getCPUModel() == PCM::EMR) ? 0 : extra; } bool UncorePMU::initFreeze(const uint32 extra, const char* xPICheckMsg) @@ -9760,7 +9785,7 @@ bool UncorePMU::initFreeze(const uint32 extra, const char* xPICheckMsg) { return true; // this PMU does not have unit control register => no op } - if (getCPUModel() == PCM::SPR) + if (getCPUModel() == PCM::SPR || getCPUModel() == PCM::EMR) { *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ; // freeze *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ + SPR_UNC_PMON_UNIT_CTL_RST_CONTROL; // freeze and reset control registers @@ -9795,7 +9820,7 @@ bool UncorePMU::initFreeze(const uint32 extra, const char* xPICheckMsg) void UncorePMU::resetUnfreeze(const uint32 extra) { - if (getCPUModel() == PCM::SPR) + if (getCPUModel() == PCM::SPR || getCPUModel() == PCM::EMR) { *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ + SPR_UNC_PMON_UNIT_CTL_RST_COUNTERS; // freeze and reset counter registers *unitControl = 0; // unfreeze @@ -9980,6 +10005,7 @@ void PCM::setupCustomCoreEventsForNuma(PCM::ExtendedCustomCoreEventDescription& conf.OffcoreResponseMsrValue[1] = 0x0730000477; break; case PCM::SPR: + case PCM::EMR: std::cout << "INFO: Monitored accesses include demand + L2 cache prefetcher, code read and RFO.\n"; // OCR.READS_TO_CORE.LOCAL_DRAM conf.OffcoreResponseMsrValue[0] = 0x104004477; diff --git a/src/cpucounters.h b/src/cpucounters.h index c386a54b..7bf216a9 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -1201,7 +1201,7 @@ class PCM_API PCM auto ctrl = pmu.counterControl[c]; if (ctrl.get() != nullptr) { - if (PCM::SPR == cpu_model) + if (PCM::SPR == cpu_model || PCM::EMR == cpu_model) { *ctrl = *curEvent; } @@ -1258,6 +1258,7 @@ class PCM_API PCM return isHWTMAL1Supported() && ( SPR == cpu_model + || EMR == cpu_model ); } @@ -1552,6 +1553,7 @@ class PCM_API PCM switch (cpu_model) { case SPR: + case EMR: case ADL: // ADL big core (GLC) case RPL: useGLCOCREvent = true; @@ -1785,6 +1787,7 @@ class PCM_API PCM ICX_D = 108, ICX = 106, SPR = 143, + EMR = 207, END_OF_MODEL_LIST = 0x0ffff }; @@ -1873,6 +1876,7 @@ class PCM_API PCM case SKX: case ICX: case SPR: + case EMR: return (serverUncorePMUs.size() && serverUncorePMUs[0].get()) ? (serverUncorePMUs[0]->getNumQPIPorts()) : 0; } return 0; @@ -1897,6 +1901,7 @@ class PCM_API PCM case SKX: case ICX: case SPR: + case EMR: case BDX: case KNL: return (serverUncorePMUs.size() && serverUncorePMUs[0].get()) ? (serverUncorePMUs[0]->getNumMC()) : 0; @@ -1923,6 +1928,7 @@ class PCM_API PCM case SKX: case ICX: case SPR: + case EMR: case BDX: case KNL: case SNOWRIDGE: @@ -1952,6 +1958,7 @@ class PCM_API PCM case SKX: case ICX: case SPR: + case EMR: case BDX: case KNL: case SNOWRIDGE: @@ -2009,6 +2016,7 @@ class PCM_API PCM case ICX: return 5; case SPR: + case EMR: return 6; } if (isAtom()) @@ -2059,6 +2067,7 @@ class PCM_API PCM case ICX: case SNOWRIDGE: case SPR: + case EMR: case KNL: return true; default: @@ -2318,6 +2327,7 @@ class PCM_API PCM || cpu_model == PCM::ADL || cpu_model == PCM::RPL || cpu_model == PCM::SPR + || cpu_model == PCM::EMR ); } @@ -2333,6 +2343,7 @@ class PCM_API PCM || cpu_model == PCM::SKX || cpu_model == PCM::ICX || cpu_model == PCM::SPR + || cpu_model == PCM::EMR ); } @@ -2353,7 +2364,8 @@ class PCM_API PCM || cpu_model == PCM::BDX || cpu_model == PCM::SKX || cpu_model == PCM::ICX - || cpu_model == PCM::SPR + || cpu_model == PCM::SPR + || cpu_model == PCM::EMR ); } @@ -2367,7 +2379,8 @@ class PCM_API PCM || cpu_model == PCM::IVYTOWN || (cpu_model == PCM::SKX && cpu_stepping > 1) || cpu_model == PCM::ICX - || cpu_model == PCM::SPR + || cpu_model == PCM::SPR + || cpu_model == PCM::EMR ); } @@ -2420,6 +2433,7 @@ class PCM_API PCM || cpu_model == PCM::ICX || cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::SPR + || cpu_model == PCM::EMR ); } @@ -2436,6 +2450,7 @@ class PCM_API PCM || cpu_model == PCM::SKX || cpu_model == PCM::ICX || cpu_model == PCM::SPR + || cpu_model == PCM::EMR || useSKLPath() ); } @@ -2446,6 +2461,7 @@ class PCM_API PCM cpu_model == PCM::SKX || cpu_model == PCM::ICX || cpu_model == PCM::SPR + || cpu_model == PCM::EMR ); } @@ -2457,6 +2473,7 @@ class PCM_API PCM || cpu_model == PCM::ICX || cpu_model == PCM::SNOWRIDGE || cpu_model == SPR + || cpu_model == EMR ); } @@ -2512,6 +2529,7 @@ class PCM_API PCM || cpu_model == PCM::SKX || cpu_model == PCM::ICX || cpu_model == PCM::SPR + || cpu_model == PCM::EMR || cpu_model == PCM::BDX || cpu_model == PCM::KNL ); @@ -2530,6 +2548,7 @@ class PCM_API PCM cpu_model_ == PCM::SKX || cpu_model_ == PCM::ICX || cpu_model_ == PCM::SPR + || cpu_model_ == PCM::EMR ); } @@ -2552,6 +2571,7 @@ class PCM_API PCM cpu_model == PCM::SKX || cpu_model == PCM::ICX || cpu_model == PCM::SPR + || cpu_model == PCM::EMR ); } @@ -2565,6 +2585,7 @@ class PCM_API PCM || PCM::SKX == cpu_model || PCM::ICX == cpu_model || PCM::SPR == cpu_model + || PCM::EMR == cpu_model ; } diff --git a/src/opCode-207.txt b/src/opCode-207.txt new file mode 100644 index 00000000..f2360e2a --- /dev/null +++ b/src/opCode-207.txt @@ -0,0 +1,45 @@ +#Clockticks +#ctr=0,ev_sel=0x1,umask=0x0,en=1,ch_mask=0,fc_mask=0x0,multiplier=1,divider=1,hname=Clockticks,vname=Total +# Inbound (PCIe device DMA into system) payload events +ctr=0,ev_sel=0x83,umask=0x1,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part0 +ctr=1,ev_sel=0x83,umask=0x1,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part1 +ctr=0,ev_sel=0x83,umask=0x1,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part2 +ctr=1,ev_sel=0x83,umask=0x1,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part3 +ctr=0,ev_sel=0x83,umask=0x1,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part4 +ctr=1,ev_sel=0x83,umask=0x1,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part5 +ctr=0,ev_sel=0x83,umask=0x1,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part6 +ctr=1,ev_sel=0x83,umask=0x1,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part7 +ctr=0,ev_sel=0x83,umask=0x4,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part0 +ctr=1,ev_sel=0x83,umask=0x4,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part1 +ctr=0,ev_sel=0x83,umask=0x4,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part2 +ctr=1,ev_sel=0x83,umask=0x4,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part3 +ctr=0,ev_sel=0x83,umask=0x4,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part4 +ctr=1,ev_sel=0x83,umask=0x4,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part5 +ctr=0,ev_sel=0x83,umask=0x4,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part6 +ctr=1,ev_sel=0x83,umask=0x4,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part7 +# Outbound (CPU MMIO to the PCIe device) payload events +ctr=2,ev_sel=0x83,umask=0x80,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part0 +ctr=3,ev_sel=0x83,umask=0x80,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part1 +ctr=2,ev_sel=0x83,umask=0x80,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part2 +ctr=3,ev_sel=0x83,umask=0x80,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part3 +ctr=2,ev_sel=0x83,umask=0x80,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part4 +ctr=3,ev_sel=0x83,umask=0x80,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part5 +ctr=2,ev_sel=0x83,umask=0x80,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part6 +ctr=3,ev_sel=0x83,umask=0x80,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part7 +ctr=2,ev_sel=0xc0,umask=0x1,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part0 +ctr=3,ev_sel=0xc0,umask=0x1,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part1 +ctr=2,ev_sel=0xc0,umask=0x1,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part2 +ctr=3,ev_sel=0xc0,umask=0x1,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part3 +ctr=2,ev_sel=0xc0,umask=0x1,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part4 +ctr=3,ev_sel=0xc0,umask=0x1,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part5 +ctr=2,ev_sel=0xc0,umask=0x1,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part6 +ctr=3,ev_sel=0xc0,umask=0x1,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part7 +# IOMMU events +ctr=0,ev_sel=0x40,umask=0x01,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOTLB Lookup,vname=Total +ctr=1,ev_sel=0x40,umask=0x20,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOTLB Miss,vname=Total +ctr=2,ev_sel=0x40,umask=0x80,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=Ctxt Cache Hit,vname=Total +ctr=3,ev_sel=0x41,umask=0x10,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=256T Cache Hit,vname=Total +ctr=0,ev_sel=0x41,umask=0x08,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=512G Cache Hit,vname=Total +ctr=1,ev_sel=0x41,umask=0x04,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=1G Cache Hit,vname=Total +ctr=2,ev_sel=0x41,umask=0x02,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=2M Cache Hit,vname=Total +ctr=3,ev_sel=0x41,umask=0xc0,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOMMU Mem Access,vname=Total diff --git a/src/pcm-iio.cpp b/src/pcm-iio.cpp index 830aa149..6782c2ef 100644 --- a/src/pcm-iio.cpp +++ b/src/pcm-iio.cpp @@ -143,8 +143,7 @@ static const std::map snr_sad_to_pmu_id_mapping = { #define IAX_DID 0x0cfe // In-Memory Database Analytics Accelerator (IAX) #define QATV2_DID 0x4940 // QuickAssist (CPM) v2 -#define SPR_XCC_DMI_PART_ID 7 -#define SPR_MCC_DMI_PART_ID 2 +#define SPR_DMI_PART_ID 7 #define SPR_XCC_HQM_PART_ID 5 #define SPR_MCC_HQM_PART_ID 4 #define SPR_XCC_QAT_PART_ID 4 @@ -234,6 +233,124 @@ static const std::string spr_mcc_iio_stack_names[] = { "IIO Stack 10 - DMI ", }; +#define EMR_DMI_PMON_ID 7 +#define EMR_PCIE_GEN5_0_PMON_ID 1 +#define EMR_PCIE_GEN5_1_PMON_ID 2 +#define EMR_PCIE_GEN5_2_PMON_ID 3 +#define EMR_PCIE_GEN5_3_PMON_ID 8 +#define EMR_PCIE_GEN5_4_PMON_ID 6 +#define EMR_IDX0_PMON_ID 0 +#define EMR_IDX1_PMON_ID 4 +#define EMR_IDX2_PMON_ID 5 +#define EMR_IDX3_PMON_ID 9 + +const std::map emr_sad_to_pmu_id_mapping = { + { 0, EMR_DMI_PMON_ID }, + { 1, EMR_PCIE_GEN5_0_PMON_ID }, + { 2, EMR_PCIE_GEN5_1_PMON_ID }, + { 3, EMR_PCIE_GEN5_2_PMON_ID }, + { 4, EMR_PCIE_GEN5_3_PMON_ID }, + { 5, EMR_PCIE_GEN5_4_PMON_ID }, + { 8, EMR_IDX0_PMON_ID }, + { 9, EMR_IDX1_PMON_ID }, + { 10, EMR_IDX2_PMON_ID }, + { 11, EMR_IDX3_PMON_ID } +}; + +static const std::string emr_iio_stack_names[] = { + "IIO Stack 0 - IDX0 ", + "IIO Stack 1 - PCIe3 ", + "IIO Stack 2 - PCIe0 ", + "IIO Stack 3 - IDX1 ", + "IIO Stack 4 - PCIe1 ", + "IIO Stack 5 - IDX2 ", + "IIO Stack 6 - PCIe2 ", + "IIO Stack 7 - DMI", + "IIO Stack 8 - IDX3 ", + "IIO Stack 9 - PCIe4", + "IIO Stack 10 - NONE ", + "IIO Stack 11 - NONE ", +}; + +enum EagleStreamPlatformStacks +{ + esDMI = 0, + esPCIe0, + esPCIe1, + esPCIe2, + esPCIe3, + esPCIe4, + esDINO0, + esDINO1, + esDINO2, + esDINO3, + esEndOfList +}; + +const std::vector spr_xcc_stacks_enumeration = { + /* esDMI */ SPR_XCC_DMI_PMON_ID, + /* esPCIe0 */ SPR_XCC_PCIE_GEN5_0_PMON_ID, + /* esPCIe1 */ SPR_XCC_PCIE_GEN5_1_PMON_ID, + /* esPCIe2 */ SPR_XCC_PCIE_GEN5_2_PMON_ID, + /* esPCIe3 */ SPR_XCC_PCIE_GEN5_3_PMON_ID, + /* esPCIe4 */ SPR_XCC_PCIE_GEN5_4_PMON_ID, + /* esDINO0 */ SPR_XCC_IDX0_PMON_ID, + /* esDINO1 */ SPR_XCC_IDX1_PMON_ID, + /* esDINO2 */ SPR_XCC_IDX2_PMON_ID, + /* esDINO3 */ SPR_XCC_IDX3_PMON_ID, +}; + +const std::vector spr_mcc_stacks_enumeration = { + /* esDMI */ SPR_MCC_DMI_PMON_ID, + /* esPCIe0 */ SPR_MCC_PCIE_GEN5_0_PMON_ID, + /* esPCIe1 */ SPR_MCC_PCIE_GEN5_1_PMON_ID, + /* esPCIe2 */ SPR_MCC_PCIE_GEN5_2_PMON_ID, + /* esPCIe3 */ SPR_MCC_PCIE_GEN5_3_PMON_ID, + /* esPCIe4 */ SPR_MCC_PCIE_GEN5_4_PMON_ID, + /* esDINO0 */ SPR_MCC_IDX0_PMON_ID, +}; + +const std::vector emr_stacks_enumeration = { + /* esDMI */ EMR_DMI_PMON_ID, + /* esPCIe0 */ EMR_PCIE_GEN5_0_PMON_ID, + /* esPCIe1 */ EMR_PCIE_GEN5_1_PMON_ID, + /* esPCIe2 */ EMR_PCIE_GEN5_2_PMON_ID, + /* esPCIe3 */ EMR_PCIE_GEN5_3_PMON_ID, + /* esPCIe4 */ EMR_PCIE_GEN5_4_PMON_ID, + /* esDINO0 */ EMR_IDX0_PMON_ID, + /* esDINO1 */ EMR_IDX1_PMON_ID, + /* esDINO2 */ EMR_IDX2_PMON_ID, + /* esDINO3 */ EMR_IDX3_PMON_ID, +}; + +enum class EagleStreamSupportedTypes +{ + esInvalid = -1, + esSprXcc, + esSprMcc, + esEmrXcc +}; + +typedef EagleStreamSupportedTypes estype; + +const std::map> es_stacks_enumeration = { + {estype::esSprXcc, spr_xcc_stacks_enumeration}, + {estype::esSprMcc, spr_mcc_stacks_enumeration}, + {estype::esEmrXcc, emr_stacks_enumeration }, +}; + +const std::map es_stack_names = { + {estype::esSprXcc, spr_xcc_iio_stack_names}, + {estype::esSprMcc, spr_mcc_iio_stack_names}, + {estype::esEmrXcc, emr_iio_stack_names }, +}; + +const std::map> es_sad_to_pmu_id_mapping = { + {estype::esSprXcc, spr_xcc_sad_to_pmu_id_mapping}, + {estype::esSprMcc, spr_mcc_sad_to_pmu_id_mapping}, + {estype::esEmrXcc, emr_sad_to_pmu_id_mapping }, +}; + struct iio_counter : public counter { std::vector data; }; @@ -478,15 +595,17 @@ vector build_csv(vector& iios, vector &child_pci_devs, int domain, int secondary, int subordinate); public: - IPlatformMapping(uint32_t sockets_count) : m_sockets(sockets_count) {} + IPlatformMapping(int cpu_model, uint32_t sockets_count) : m_sockets(sockets_count), m_model(cpu_model) {} virtual ~IPlatformMapping() {}; static std::unique_ptr getPlatformMapping(int cpu_model, uint32_t sockets_count); virtual bool pciTreeDiscover(std::vector& iios) = 0; uint32_t socketsCount() const { return m_sockets; } + uint32_t cpuId() const { return m_model; } }; // Mapping for SkyLake Server. @@ -494,7 +613,7 @@ class PurleyPlatformMapping: public IPlatformMapping { private: void getUboxBusNumbers(std::vector& ubox); public: - PurleyPlatformMapping(uint32_t sockets_count) : IPlatformMapping(sockets_count) {} + PurleyPlatformMapping(int cpu_model, uint32_t sockets_count) : IPlatformMapping(cpu_model, sockets_count) {} ~PurleyPlatformMapping() = default; bool pciTreeDiscover(std::vector& iios) override; }; @@ -586,7 +705,7 @@ bool PurleyPlatformMapping::pciTreeDiscover(std::vector& sad_id_bus_map); }; @@ -635,7 +754,7 @@ class WhitleyPlatformMapping: public IPlatformMapping10Nm { const std::map& sad_to_pmu_id_mapping; const std::string * iio_stack_names; public: - WhitleyPlatformMapping(uint32_t sockets_count) : IPlatformMapping10Nm(sockets_count), + WhitleyPlatformMapping(int cpu_model, uint32_t sockets_count) : IPlatformMapping10Nm(cpu_model, sockets_count), icx_d(PCM::getInstance()->getCPUModelFromCPUID() == PCM::ICX_D), sad_to_pmu_id_mapping(icx_d ? icx_d_sad_to_pmu_id_mapping : icx_sad_to_pmu_id_mapping), iio_stack_names(icx_d ? icx_d_iio_stack_names : icx_iio_stack_names) @@ -764,7 +883,7 @@ bool WhitleyPlatformMapping::pciTreeDiscover(std::vector& iios) override; bool JacobsvilleAccelerators(const std::pair& sad_id_bus_pair, struct iio_stack& stack); @@ -930,22 +1049,24 @@ class EagleStreamPlatformMapping: public IPlatformMapping { private: bool getRootBuses(std::map> &root_buses); - bool stackProbeXcc(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket); - bool stackProbeMcc(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket); + bool stackProbe(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket); bool eagleStreamDmiStackProbe(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket); bool eagleStreamPciStackProbe(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket); bool eagleStreamAcceleratorStackProbe(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket); + bool isDmiStack(int unit); + bool isPcieStack(int unit); + bool isDinoStack(int unit); + std::uint32_t m_chop; + EagleStreamSupportedTypes m_es_type; +public: + EagleStreamPlatformMapping(int cpu_model, uint32_t sockets_count) : IPlatformMapping(cpu_model, sockets_count), m_chop(0), m_es_type(estype::esInvalid) {} + ~EagleStreamPlatformMapping() = default; bool setChopValue(); bool isXccPlatform() const { return m_chop == kXccChop; } const std::uint32_t kXccChop = 0b11; const std::uint32_t kMccChop = 0b01; - std::uint32_t m_chop; -public: - EagleStreamPlatformMapping(uint32_t sockets_count) : IPlatformMapping(sockets_count), m_chop(0) {} - ~EagleStreamPlatformMapping() = default; - bool pciTreeDiscover(std::vector& iios) override; }; @@ -969,6 +1090,7 @@ bool EagleStreamPlatformMapping::setChopValue() capid4 = SPR_CAPID4_GET_PHYSICAL_CHOP(capid4); if (capid4 == kXccChop || capid4 == kMccChop) { m_chop = capid4; + m_es_type = cpuId() == PCM::SPR ? (m_chop == kXccChop ? estype::esSprXcc : estype::esSprMcc) : estype::esEmrXcc; } else { std::cerr << "Unknown chop value " << capid4 << std::endl; @@ -1005,13 +1127,13 @@ bool EagleStreamPlatformMapping::getRootBuses(std::map> cpuBusId) & 0x1)) { cout << "CPU bus " << cpuBusId << " is disabled on package " << package_id << endl; continue; } - auto &sad_to_pmu_id_mapping = isXccPlatform() ? spr_xcc_sad_to_pmu_id_mapping : spr_mcc_sad_to_pmu_id_mapping; if (sad_to_pmu_id_mapping.find(cpuBusId) == sad_to_pmu_id_mapping.end()) { cerr << "Cannot map CPU bus " << cpuBusId << " to IO PMU ID" << endl; @@ -1035,12 +1157,12 @@ bool EagleStreamPlatformMapping::eagleStreamDmiStackProbe(int unit, const struct { struct iio_stack stack; stack.iio_unit_id = unit; - stack.stack_name = isXccPlatform() ? spr_xcc_iio_stack_names[unit] : spr_mcc_iio_stack_names[unit]; + stack.stack_name = es_stack_names.at(m_es_type)[unit]; stack.busno = address.busno; stack.domain = address.domainno; struct iio_bifurcated_part pch_part; struct pci *pci = &pch_part.root_pci_dev; - auto dmi_part_id = isXccPlatform() ? SPR_XCC_DMI_PART_ID : SPR_MCC_DMI_PART_ID; + auto dmi_part_id = SPR_DMI_PART_ID; pch_part.part_id = dmi_part_id; pci->bdf = address; if (!probe_pci(pci)) { @@ -1070,7 +1192,7 @@ bool EagleStreamPlatformMapping::eagleStreamPciStackProbe(int unit, const struct stack.domain = address.domainno; stack.busno = address.busno; stack.iio_unit_id = unit; - stack.stack_name = isXccPlatform() ? spr_xcc_iio_stack_names[unit] : spr_mcc_iio_stack_names[unit]; + stack.stack_name = es_stack_names.at(m_es_type)[unit]; for (int slot = 1; slot < 9; ++slot) { // Check if port is enabled @@ -1110,7 +1232,7 @@ bool EagleStreamPlatformMapping::eagleStreamAcceleratorStackProbe(int unit, cons // Channel mappings are checked on B0 stepping auto rb = address.busno; const std::vector acceleratorBuses{ rb, rb + 1, rb + 2, rb + 3 }; - stack.stack_name = isXccPlatform() ? spr_xcc_iio_stack_names[unit] : spr_mcc_iio_stack_names[unit]; + stack.stack_name = es_stack_names.at(m_es_type)[unit]; for (auto& b : acceleratorBuses) { for (auto d = 0; d < 32; ++d) { for (auto f = 0; f < 8; ++f) { @@ -1151,45 +1273,43 @@ bool EagleStreamPlatformMapping::eagleStreamAcceleratorStackProbe(int unit, cons return true; } -bool EagleStreamPlatformMapping::stackProbeXcc(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket) +bool EagleStreamPlatformMapping::isDmiStack(int unit) { - switch (unit) - { - case SPR_XCC_DMI_PMON_ID: - return eagleStreamDmiStackProbe(unit, address, iio_on_socket); - case SPR_XCC_PCIE_GEN5_0_PMON_ID: - case SPR_XCC_PCIE_GEN5_1_PMON_ID: - case SPR_XCC_PCIE_GEN5_2_PMON_ID: - case SPR_XCC_PCIE_GEN5_3_PMON_ID: - case SPR_XCC_PCIE_GEN5_4_PMON_ID: - return eagleStreamPciStackProbe(unit, address, iio_on_socket); - case SPR_XCC_IDX0_PMON_ID: - case SPR_XCC_IDX1_PMON_ID: - case SPR_XCC_IDX2_PMON_ID: - case SPR_XCC_IDX3_PMON_ID: - return eagleStreamAcceleratorStackProbe(unit, address, iio_on_socket); - default: - return false; - } + const auto& stacks_enumeration = es_stacks_enumeration.at(m_es_type); + + return stacks_enumeration[esDMI] == unit; } -bool EagleStreamPlatformMapping::stackProbeMcc(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket) +bool EagleStreamPlatformMapping::isPcieStack(int unit) { - switch (unit) - { - case SPR_MCC_DMI_PMON_ID: - return eagleStreamDmiStackProbe(unit, address, iio_on_socket); - case SPR_MCC_PCIE_GEN5_0_PMON_ID: - case SPR_MCC_PCIE_GEN5_1_PMON_ID: - case SPR_MCC_PCIE_GEN5_2_PMON_ID: - case SPR_MCC_PCIE_GEN5_3_PMON_ID: - case SPR_MCC_PCIE_GEN5_4_PMON_ID: - return eagleStreamPciStackProbe(unit, address, iio_on_socket); - case SPR_MCC_IDX0_PMON_ID: - return eagleStreamAcceleratorStackProbe(unit, address, iio_on_socket); - default: - return false; + const auto& stacks_enumeration = es_stacks_enumeration.at(m_es_type); + + return stacks_enumeration[esPCIe0] == unit || stacks_enumeration[esPCIe1] == unit || + stacks_enumeration[esPCIe2] == unit || stacks_enumeration[esPCIe3] == unit || + stacks_enumeration[esPCIe4] == unit; +} + +bool EagleStreamPlatformMapping::isDinoStack(int unit) +{ + const auto& stacks_enumeration = es_stacks_enumeration.at(m_es_type); + + return stacks_enumeration[esDINO0] == unit || stacks_enumeration[esDINO1] == unit || + stacks_enumeration[esDINO2] == unit || stacks_enumeration[esDINO3] == unit; +} + +bool EagleStreamPlatformMapping::stackProbe(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket) +{ + if (isDmiStack(unit)) { + return eagleStreamDmiStackProbe(unit, address, iio_on_socket); } + else if (isPcieStack(unit)) { + return eagleStreamPciStackProbe(unit, address, iio_on_socket); + } + else if (isDinoStack(unit)) { + return eagleStreamAcceleratorStackProbe(unit, address, iio_on_socket); + } + + return false; } bool EagleStreamPlatformMapping::pciTreeDiscover(std::vector& iios) @@ -1207,15 +1327,8 @@ bool EagleStreamPlatformMapping::pciTreeDiscover(std::vectorfirst; for (auto rb = rbs_on_socket.cbegin(); rb != rbs_on_socket.cend(); ++rb) { - if (isXccPlatform()) { - if (!stackProbeXcc(rb->first, rb->second, iio_on_socket)) { - return false; - } - } - else { - if (!stackProbeMcc(rb->first, rb->second, iio_on_socket)) { - return false; - } + if (!stackProbe(rb->first, rb->second, iio_on_socket)) { + return false; } } std::sort(iio_on_socket.stacks.begin(), iio_on_socket.stacks.end()); @@ -1250,13 +1363,14 @@ std::unique_ptr IPlatformMapping::getPlatformMapping(int cpu_m { switch (cpu_model) { case PCM::SKX: - return std::unique_ptr{new PurleyPlatformMapping(sockets_count)}; + return std::unique_ptr{new PurleyPlatformMapping(cpu_model, sockets_count)}; case PCM::ICX: - return std::unique_ptr{new WhitleyPlatformMapping(sockets_count)}; + return std::unique_ptr{new WhitleyPlatformMapping(cpu_model, sockets_count)}; case PCM::SNOWRIDGE: - return std::unique_ptr{new JacobsvillePlatformMapping(sockets_count)}; + return std::unique_ptr{new JacobsvillePlatformMapping(cpu_model, sockets_count)}; case PCM::SPR: - return std::unique_ptr{new EagleStreamPlatformMapping(sockets_count)}; + case PCM::EMR: + return std::unique_ptr{new EagleStreamPlatformMapping(cpu_model, sockets_count)}; default: return nullptr; } @@ -1271,6 +1385,7 @@ ccr* get_ccr(PCM* m, uint64_t& ccr) case PCM::ICX: case PCM::SNOWRIDGE: case PCM::SPR: + case PCM::EMR: return new icx_ccr(ccr); default: cerr << m->getCPUFamilyModelString() << " is not supported! Program aborted" << endl; diff --git a/src/pcm-memory.cpp b/src/pcm-memory.cpp index 328c3bf1..3e17b73c 100644 --- a/src/pcm-memory.cpp +++ b/src/pcm-memory.cpp @@ -38,7 +38,7 @@ constexpr uint32 max_sockets = 256; uint32 max_imc_channels = ServerUncoreCounterState::maxChannels; const uint32 max_edc_channels = ServerUncoreCounterState::maxChannels; const uint32 max_imc_controllers = ServerUncoreCounterState::maxControllers; -bool SPR_CXL = false; +bool SPR_CXL = false; // use SPR CXL monitoring implementation typedef struct memdata { float iMC_Rd_socket_chan[max_sockets][ServerUncoreCounterState::maxChannels]{}; @@ -1162,6 +1162,27 @@ class CHAEventCollector } }; break; + case PCM::EMR: + eventGroups = { + { + UNC_PMON_CTL_EVENT(0x35) + UNC_PMON_CTL_UMASK(0x01) + UNC_PMON_CTL_UMASK_EXT(0x20C80682) , // UNC_CHA_TOR_INSERTS.IA_MISS_RFO_CXL_EXP_LOCAL + UNC_PMON_CTL_EVENT(0x35) + UNC_PMON_CTL_UMASK(0x01) + UNC_PMON_CTL_UMASK_EXT(0x20C81682) , // UNC_CHA_TOR_INSERTS.IA_MISS_DRD_CXL_EXP_LOCAL + UNC_PMON_CTL_EVENT(0x35) + UNC_PMON_CTL_UMASK(0x01) + UNC_PMON_CTL_UMASK_EXT(0x20C88682) // UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO_CXL_EXP_LOCAL + }, + { + UNC_PMON_CTL_EVENT(0x35) + UNC_PMON_CTL_UMASK(0x01) + UNC_PMON_CTL_UMASK_EXT(0x20CCC682) , // UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF_CXL_EXP_LOCAL + UNC_PMON_CTL_EVENT(0x35) + UNC_PMON_CTL_UMASK(0x01) + UNC_PMON_CTL_UMASK_EXT(0x20C89682) , // UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_CXL_EXP_LOCAL + UNC_PMON_CTL_EVENT(0x35) + UNC_PMON_CTL_UMASK(0x01) + UNC_PMON_CTL_UMASK_EXT(0x20CCD682) , // UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA_CXL_EXP_LOCAL + UNC_PMON_CTL_EVENT(0x35) + UNC_PMON_CTL_UMASK(0x40) + UNC_PMON_CTL_UMASK_EXT(0x20E87E82) , // UNC_CHA_TOR_INSERTS.RRQ_MISS_INVXTOM_CXL_EXP_LOCAL + }, + { + UNC_PMON_CTL_EVENT(0x35) + UNC_PMON_CTL_UMASK(0x40) + UNC_PMON_CTL_UMASK_EXT(0x20E80682) , // UNC_CHA_TOR_INSERTS.RRQ_MISS_RDCUR_CXL_EXP_LOCAL + UNC_PMON_CTL_EVENT(0x35) + UNC_PMON_CTL_UMASK(0x40) + UNC_PMON_CTL_UMASK_EXT(0x20E80E82) , // UNC_CHA_TOR_INSERTS.RRQ_MISS_RDCODE_CXL_EXP_LOCAL + UNC_PMON_CTL_EVENT(0x35) + UNC_PMON_CTL_UMASK(0x40) + UNC_PMON_CTL_UMASK_EXT(0x20E81682) , // UNC_CHA_TOR_INSERTS.RRQ_MISS_RDDATA_CXL_EXP_LOCAL + UNC_PMON_CTL_EVENT(0x35) + UNC_PMON_CTL_UMASK(0x40) + UNC_PMON_CTL_UMASK_EXT(0x20E82682) , // UNC_CHA_TOR_INSERTS.RRQ_MISS_RDINVOWN_OPT_CXL_EXP_LOCAL + } + }; + break; } assert(eventGroups.size() > 1); @@ -1390,9 +1411,10 @@ int mainThrows(int argc, char * argv[]) m->disableJKTWorkaround(); print_cpu_details(); + const auto cpu_model = m->getCPUModel(); if (!m->hasPCICFGUncore()) { - cerr << "Unsupported processor model (" << m->getCPUModel() << ").\n"; + cerr << "Unsupported processor model (" << cpu_model << ").\n"; if (m->memoryTrafficMetricsAvailable()) cerr << "For processor-level memory bandwidth statistics please use 'pcm' utility\n"; exit(EXIT_FAILURE); @@ -1450,7 +1472,7 @@ int mainThrows(int argc, char * argv[]) shared_ptr chaEventCollector; - SPR_CXL = (PCM::SPR == m->getCPUModel()) && (getNumCXLPorts(m) > 0); + SPR_CXL = (PCM::SPR == cpu_model || PCM::EMR == cpu_model) && (getNumCXLPorts(m) > 0); if (SPR_CXL) { chaEventCollector = std::make_shared(delay, sysCmd, mainLoop, m); diff --git a/src/pcm-pcie.cpp b/src/pcm-pcie.cpp index 36021d9a..c18f21d7 100644 --- a/src/pcm-pcie.cpp +++ b/src/pcm-pcie.cpp @@ -97,6 +97,7 @@ IPlatform *IPlatform::getPlatform(PCM *m, bool csv, bool print_bandwidth, bool p { switch (m->getCPUModel()) { case PCM::SPR: + case PCM::EMR: return new EagleStreamPlatform(m, csv, print_bandwidth, print_additional_info, delay); case PCM::ICX: case PCM::SNOWRIDGE: diff --git a/src/pcm-power.cpp b/src/pcm-power.cpp index a8fa7899..08a015d8 100644 --- a/src/pcm-power.cpp +++ b/src/pcm-power.cpp @@ -474,13 +474,13 @@ int mainThrows(int argc, char * argv[]) printHeader(true); cout << "; Thermal freq limit cycles: " << getNormalizedPCUCounter(u, 1, BeforeState[socket], AfterState[socket]) * 100. << " %" << "; Power freq limit cycles:" << getNormalizedPCUCounter(u, 2, BeforeState[socket], AfterState[socket]) * 100. << " %"; - if (cpu_model != PCM::SKX && cpu_model != PCM::ICX && cpu_model != PCM::SNOWRIDGE && cpu_model != PCM::SPR) + if(cpu_model != PCM::SKX && cpu_model != PCM::ICX && cpu_model != PCM::SNOWRIDGE && cpu_model != PCM::SPR && cpu_model != PCM::EMR) cout << "; Clipped freq limit cycles:" << getNormalizedPCUCounter(u, 3, BeforeState[socket], AfterState[socket]) * 100. << " %"; cout << "\n"; break; case 4: - if (cpu_model == PCM::SKX || cpu_model == PCM::ICX || cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::SPR) + if (cpu_model == PCM::SKX || cpu_model == PCM::ICX || cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::SPR || cpu_model == PCM::EMR) { cout << "This PCU profile is not supported on your processor\n"; break; @@ -516,6 +516,7 @@ int mainThrows(int argc, char * argv[]) case PCM::ICX: case PCM::SNOWRIDGE: case PCM::SPR: + case PCM::EMR: cout << "; PC2 residency: " << getPackageCStateResidency(2, BeforeState[socket], AfterState[socket]) * 100. << " %"; cout << "; PC2 transitions: " << getUncoreCounter(PCM::PCU_PMU_ID, u, 2, BeforeState[socket], AfterState[socket]) << " "; cout << "; PC3 residency: " << getPackageCStateResidency(3, BeforeState[socket], AfterState[socket]) * 100. << " %"; diff --git a/src/pcm-tsx.cpp b/src/pcm-tsx.cpp index a5fb12c5..01a18c0f 100644 --- a/src/pcm-tsx.cpp +++ b/src/pcm-tsx.cpp @@ -320,6 +320,7 @@ int mainThrows(int argc, char * argv[]) eventDefinition = iclEventDefinition; break; case PCM::SPR: + case PCM::EMR: eventDefinition = sprEventDefinition; break; } From 11fc19f043adc8e33c3d6860384b18f9ba518c57 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 24 Jan 2024 09:09:53 +0100 Subject: [PATCH 16/16] fix the workflow Change-Id: If8a8c852efb03f83a268c91c77e75728c902a773 --- .github/workflows/ci-cov-linux-report.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-cov-linux-report.yml b/.github/workflows/ci-cov-linux-report.yml index 0dfc9c2b..03f4d809 100644 --- a/.github/workflows/ci-cov-linux-report.yml +++ b/.github/workflows/ci-cov-linux-report.yml @@ -17,7 +17,7 @@ jobs: with: egress-policy: audit - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 with: submodules: recursive @@ -40,4 +40,4 @@ jobs: uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1 with: name: coverity-linux-and-python-report-${{ github.sha }} - path: "*-Report.pdf" \ No newline at end of file + path: "*-Report.pdf"