Skip to content

Commit

Permalink
define flops events/metrics for AMD Zen4 (#217)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #217

define two new events `zen4::fp_ret_x87_fp_ops.all` and `zen4::fp_ret_sse_avx_ops.all` to count scalar/vector fp ops.

also define a new metric `fp_ops_all` that will use two events above on zen4 hosts and fallback to intel events on other hosts.

Reviewed By: bigzachattack

Differential Revision: D52861377

fbshipit-source-id: 4219c8b6483c6d97c639da80167abf1c46a04641
  • Loading branch information
Alston Tang authored and facebook-github-bot committed Jan 23, 2024
1 parent bfdae99 commit 18f46eb
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 0 deletions.
26 changes: 26 additions & 0 deletions hbt/src/perf_event/AmdEvents.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,32 @@ void addEvents(PmuDeviceManager& pmu_manager) {
"L3 Cache misses",
"L3 Cache misses"),
std::vector<EventId>({"l3-cache-misses"}));

// FLOPs events for AMD Zen3/Zen4
pmu_manager.addEvent(
std::make_shared<EventDef>(
PmuType::cpu,
"zen3/4::fp_ret_x87_fp_ops.all",
EventDef::Encoding{.code = amd_msr::kRetiredX87Flops.val},
"Retired x87 floating-point ops of all types.",
"The number of all x87 floating-point Ops that have retired."),
std::vector<EventId>({"zen3/4-ret-x87-fp-ops-all"}));
pmu_manager.addEvent(
std::make_shared<EventDef>(
PmuType::cpu,
"zen3::fp_ret_sse_avx_ops.all",
EventDef::Encoding{.code = amd_msr::kZen3RetiredSseAvxFlops.val},
"Retired SSE and AVX floating-point ops of all types.",
"The number of all SSE/AVX floating-point Ops that have retired."),
std::vector<EventId>({"zen3-ret-sse-avx-fp-ops-all"}));
pmu_manager.addEvent(
std::make_shared<EventDef>(
PmuType::cpu,
"zen4::fp_ret_sse_avx_ops.all",
EventDef::Encoding{.code = amd_msr::kZen4RetiredSseAvxFlops.val},
"Retired SSE and AVX floating-point ops of all types.",
"The number of all SSE/AVX floating-point Ops that have retired."),
std::vector<EventId>({"zen4-ret-sse-avx-fp-ops-all"}));
}
} // namespace milan

Expand Down
106 changes: 106 additions & 0 deletions hbt/src/perf_event/BuiltinMetrics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -783,6 +783,112 @@ std::shared_ptr<Metrics> makeAvailableMetrics() {
System::Permissions{},
std::vector<std::string>{}));

metrics->add(std::make_shared<MetricDesc>(
"fp_ops_all",
"Total floating points operations",
"Counts number of floating points operations of single precision type, double precision type, and bfloat types "
"executed by the processor. "
"For AMD, each event counts the # retired floating point operations. "
"For Intel, each event counts the # retired instructions "
"Multiply # of instructions by # of operations packed inside an instruction to calculate # operations.",
std::map<TOptCpuArch, EventRefs>{
{CpuArch::MILAN,
EventRefs{
EventRef{
"flops_scalar",
PmuType::cpu,
"zen3/4::fp_ret_x87_fp_ops.all",
EventExtraAttr{},
{}},
EventRef{
"flops_vector",
PmuType::cpu,
"zen3::fp_ret_sse_avx_ops.all",
EventExtraAttr{},
{}}}},
{CpuArch::BERGAMO,
EventRefs{
EventRef{
"flops_scalar",
PmuType::cpu,
"zen3/4::fp_ret_x87_fp_ops.all",
EventExtraAttr{},
{}},
EventRef{
"flops_vector",
PmuType::cpu,
"zen4::fp_ret_sse_avx_ops.all",
EventExtraAttr{},
{}}}},
{CpuArch::GENOA,
EventRefs{
EventRef{
"flops_scalar",
PmuType::cpu,
"zen3/4::fp_ret_x87_fp_ops.all",
EventExtraAttr{},
{}},
EventRef{
"flops_vector",
PmuType::cpu,
"zen4::fp_ret_sse_avx_ops.all",
EventExtraAttr{},
{}}}},
// Intel by default
{std::nullopt,
EventRefs{
EventRef{
"instr_dp_scalar",
PmuType::cpu,
"FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
EventExtraAttr{},
{}},
EventRef{
"instr_dp_128b_packed",
PmuType::cpu,
"FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
EventExtraAttr{},
{}},
EventRef{
"instr_dp_256b_packed",
PmuType::cpu,
"FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
EventExtraAttr{},
{}},
EventRef{
"instr_dp_512b_packed",
PmuType::cpu,
"FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
EventExtraAttr{},
{}},
EventRef{
"instr_sp_scalar",
PmuType::cpu,
"FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
EventExtraAttr{},
{}},
EventRef{
"instr_sp_128b_packed",
PmuType::cpu,
"FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
EventExtraAttr{},
{}},
EventRef{
"instr_sp_256b_packed",
PmuType::cpu,
"FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
EventExtraAttr{},
{}},
EventRef{
"instr_sp_512b_packed",
PmuType::cpu,
"FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
EventExtraAttr{},
{}}}}},
100'000'000,
System::Permissions{},
std::vector<std::string>{}));

metrics->add(std::make_shared<MetricDesc>(
"cpu_clock",
"High-resolution sys and user CPU clock",
Expand Down

0 comments on commit 18f46eb

Please sign in to comment.