Skip to content

Commit

Permalink
Profiler - Migrate from rocprofv1 to rocprofv3
Browse files Browse the repository at this point in the history
Signed-off-by: Galantsev, Dmitrii <[email protected]>

Fixed RDC for Rocprofv3

Updates

Signed-off-by: adapryor <[email protected]>
Change-Id: Ic9162bacf1322b265e6bbcdd9fbb9b1fdef414fd

last updates

Change-Id: I12e168501327c5e4cff8a9273b0512fb0e098fe7

comment

Change-Id: I61da61e66dcc017ec46f98ff4c90fb064c9679e8
  • Loading branch information
dmitrii-galantsev authored and adam360x committed Dec 20, 2024
1 parent 35eb8e7 commit 7c91a07
Show file tree
Hide file tree
Showing 11 changed files with 540 additions and 259 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@ jobs:
rocm-smi-lib \
rocm-validation-suite \
rocprofiler-dev \
rocprofiler-plugins \
rocprofiler-register \
rocprofiler-sdk \
hip-dev \
hip-runtime-amd \
hipcc \
build-essential \
ccache \
cmake \
Expand Down
21 changes: 7 additions & 14 deletions include/rdc_modules/rdc_rocp/RdcRocpBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ THE SOFTWARE.

#ifndef RDC_MODULES_RDC_ROCP_RDCROCPBASE_H_
#define RDC_MODULES_RDC_ROCP_RDCROCPBASE_H_
#include <rocprofiler/rocprofiler.h>
#include <rocprofiler-sdk/agent.h>

#include <cstdint>
#include <map>
Expand All @@ -32,16 +32,11 @@ THE SOFTWARE.

#include "rdc/rdc.h"
#include "rdc_lib/RdcTelemetryLibInterface.h"
#include "rdc_modules/rdc_rocp/RdcRocpCounterSampler.h"

namespace amd {
namespace rdc {

typedef struct {
hsa_agent_t* agents;
unsigned count;
unsigned capacity;
} hsa_agent_arr_t;

/// Common interface for RocP tests and samples
class RdcRocpBase {
public:
Expand All @@ -68,18 +63,16 @@ class RdcRocpBase {
protected:
private:
typedef std::pair<uint32_t, rdc_field_t> rdc_field_pair_t;
static const size_t buffer_length_k = 5;
/**
* @brief Tweak this to change for how long each metric is collected
*/
static const uint32_t collection_duration_us_k = 10000;

double read_feature(rocprofiler_t* context, uint32_t gpu_index);
double read_feature(rocprofiler_record_counter_t* record, uint32_t gpu_index);
double run_profiler(uint32_t gpu_index, rdc_field_t field);

hsa_agent_arr_t agent_arr = {};
std::vector<hsa_queue_t*> queues;
std::map<uint32_t, rocprofiler_feature_t> gpuid_to_feature;
std::vector<rocprofiler_agent_v0_t> agents = {};
std::vector<std::shared_ptr<CounterSampler>> samplers = {};
std::map<rdc_field_t, const char*> field_to_metric = {};

// these fields must be divided by time passed
Expand All @@ -89,9 +82,9 @@ class RdcRocpBase {
};

/**
* @brief Convert from rocmtools status into RDC status
* @brief Convert from profiler status into RDC status
*/
rdc_status_t Rocp2RdcError(hsa_status_t status);
rdc_status_t Rocp2RdcError(rocprofiler_status_t status);
};

} // namespace rdc
Expand Down
92 changes: 92 additions & 0 deletions include/rdc_modules/rdc_rocp/RdcRocpCounterSampler.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.

#ifndef RDC_MODULES_RDC_ROCP_RDCROCPCOUNTERSAMPLER_H_
#define RDC_MODULES_RDC_ROCP_RDCROCPCOUNTERSAMPLER_H_

#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/registration.h>
#include <rocprofiler-sdk/rocprofiler.h>

#include <map>
#include <memory>
#include <unordered_map>
#include <vector>

namespace amd {
namespace rdc {
class CounterSampler {
public:
// Setup system profiling for an agent
explicit CounterSampler(rocprofiler_agent_id_t agent);

~CounterSampler();

// Decode the counter name of a record
const std::string& decode_record_name(const rocprofiler_record_counter_t& rec) const;

// Get the dimensions of a record (what CU/SE/etc the counter is for). High cost operation
// should be cached if possible.
std::unordered_map<std::string, size_t> get_record_dimensions(
const rocprofiler_record_counter_t& rec);

// Sample the counter values for a set of counters, returns the records in the out parameter.
void sample_counter_values(const std::vector<std::string>& counters,
std::vector<rocprofiler_record_counter_t>& out, uint64_t duration);

rocprofiler_agent_id_t get_agent() const { return agent_; }

// Get the supported counters for an agent
static std::unordered_map<std::string, rocprofiler_counter_id_t> get_supported_counters(
rocprofiler_agent_id_t agent);

// Get the available agents on the system
static std::vector<rocprofiler_agent_v0_t> get_available_agents();

static std::vector<std::shared_ptr<CounterSampler>>& get_samplers();

private:
rocprofiler_agent_id_t agent_ = {};
rocprofiler_context_id_t ctx_ = {};
rocprofiler_buffer_id_t buf_ = {};
rocprofiler_profile_config_id_t profile_ = {.handle = 0};

std::map<std::vector<std::string>, rocprofiler_profile_config_id_t> cached_profiles_;
std::map<uint64_t, uint64_t> profile_sizes_;

// Internal function used to set the profile for the agent when start_context is called
void set_profile(rocprofiler_context_id_t ctx, rocprofiler_agent_set_profile_callback_t cb) const;

// Get the size of a counter in number of records
size_t get_counter_size(rocprofiler_counter_id_t counter);

// Get the dimensions of a counter
std::vector<rocprofiler_record_dimension_info_t> get_counter_dimensions(
rocprofiler_counter_id_t counter);

static std::vector<std::shared_ptr<CounterSampler>> samplers_;
};

} // namespace rdc
} // namespace amd

#endif // RDC_MODULES_RDC_ROCP_RDCROCPCOUNTERSAMPLER_H_
3 changes: 2 additions & 1 deletion python_binding/RdcReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
rdc_field_t.RDC_FI_POWER_USAGE,
rdc_field_t.RDC_FI_GPU_CLOCK,
rdc_field_t.RDC_FI_GPU_UTIL,
rdc_field_t.RDC_FI_GPU_TEMP
rdc_field_t.RDC_FI_GPU_TEMP,
rdc_field_t.RDC_FI_GPU_MEMORY_USAGE
]

default_unit_coverter = {
Expand Down
3 changes: 2 additions & 1 deletion python_binding/rdc_collectd.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
rdc_field_t.RDC_FI_POWER_USAGE,
rdc_field_t.RDC_FI_GPU_CLOCK,
rdc_field_t.RDC_FI_GPU_UTIL,
rdc_field_t.RDC_FI_GPU_TEMP
rdc_field_t.RDC_FI_GPU_TEMP,
rdc_field_t.RDC_FI_GPU_MEMORY_USAGE,
]


Expand Down
1 change: 1 addition & 0 deletions python_binding/rdc_prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
rdc_field_t.RDC_FI_GPU_TEMP,
rdc_field_t.RDC_FI_PROF_ACTIVE_CYCLES,
rdc_field_t.RDC_FI_PROF_ACTIVE_WAVES,
rdc_field_t.RDC_FI_PROF_OCCUPANCY_PERCENT,
]

class PrometheusReader(RdcReader):
Expand Down
17 changes: 8 additions & 9 deletions rdc_libs/rdc/src/RdcRocpLib.cc
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ std::string RdcRocpLib::get_rocm_path() {

std::string line;
while (getline(file, line)) {
size_t index_end = line.find("librocprofiler64.so");
size_t index_end = line.find("librocprofiler-register.so");
size_t index_start = index_end;
if (index_end == std::string::npos) {
// no library on this line
Expand All @@ -189,28 +189,27 @@ std::string RdcRocpLib::get_rocm_path() {
}

rdc_status_t RdcRocpLib::set_rocprofiler_path() {
// rocprofiler requires ROCP_METRICS to be set
std::string rocprofiler_metrics_path =
get_rocm_path() + "/libexec/rocprofiler/counters/derived_counters.xml";
// rocprofiler requires ROCPROFILER_METRICS_PATH to be set
std::string rocprofiler_metrics_path = get_rocm_path() + "/share/rocprofiler-sdk/";

// set rocm prefix
int result = setenv("ROCP_METRICS", rocprofiler_metrics_path.c_str(), 0);
int result = setenv("ROCPROFILER_METRICS_PATH", rocprofiler_metrics_path.c_str(), 0);
if (result != 0) {
RDC_LOG(RDC_ERROR, "setenv ROCP_METRICS failed! " << result);
RDC_LOG(RDC_ERROR, "setenv ROCPROFILER_METRICS_PATH failed! " << result);
return RDC_ST_PERM_ERROR;
}

// check that env exists
const char* rocprofiler_metrics_env = getenv("ROCP_METRICS");
const char* rocprofiler_metrics_env = getenv("ROCPROFILER_METRICS_PATH");
if (rocprofiler_metrics_env == nullptr) {
RDC_LOG(RDC_ERROR, "ROCP_METRICS is not set!");
RDC_LOG(RDC_ERROR, "ROCPROFILER_METRICS_PATH is not set!");
return RDC_ST_NO_DATA;
}

// check that file can be accessed
std::ifstream test_file(rocprofiler_metrics_env);
if (!test_file.good()) {
RDC_LOG(RDC_ERROR, "failed to open ROCP_METRICS: " << rocprofiler_metrics_env);
RDC_LOG(RDC_ERROR, "failed to open ROCPROFILER_METRICS_PATH: " << rocprofiler_metrics_env);
return RDC_ST_FILE_ERROR;
}

Expand Down
13 changes: 7 additions & 6 deletions rdc_libs/rdc_modules/rdc_rocp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,31 @@ set(RDC_ROCP_LIB_COMPONENT "lib${RDC_ROCP_LIB}")
set(RDC_ROCP_LIB_SRC_LIST
"${BOOTSTRAP_LIB_SRC_DIR}/RdcLogger.cc"
"${SRC_DIR}/RdcTelemetryLib.cc"
"${SRC_DIR}/RdcRocpCounterSampler.cc"
"${SRC_DIR}/RdcRocpBase.cc")
set(RDC_ROCP_LIB_INC_LIST
"${PROJECT_SOURCE_DIR}/include/rdc/rdc.h"
"${RDC_LIB_INC_DIR}/RdcDiagnosticLibInterface.h"
"${RDC_LIB_INC_DIR}/rdc_common.h"
"${RDC_LIB_INC_DIR}/RdcLogger.h"
"${INC_DIR}/RdcRocpBase.h")
"${INC_DIR}/RdcRocpBase.h"
"${INC_DIR}/RdcRocpCounterSampler.h")

if(BUILD_PROFILER)
message("Build librdc_rocp.so is enabled, make sure ROCmTools is installed.")

message("RDC_ROCP_LIB_INC_LIST=${RDC_ROCP_LIB_INC_LIST}")

set(ROCPROFILER_LIB rocprofiler::rocprofiler)
# below provides rocprofiler::rocprofiler package
include(Findrocprofiler)

find_package(rocprofiler-sdk
HINTS ${ROCM_DIR}/lib/cmake
CONFIGURE REQUIRED)
find_package(hsa-runtime64
NAMES hsa-runtime64
HINTS ${ROCM_DIR}/lib/cmake
CONFIGURE REQUIRED)
set(RDC_LIB_MODULES ${RDC_LIB_MODULES} ${RDC_ROCP_LIB} PARENT_SCOPE)
add_library(${RDC_ROCP_LIB} SHARED ${RDC_ROCP_LIB_SRC_LIST} ${RDC_ROCP_LIB_INC_LIST})
target_link_libraries(${RDC_ROCP_LIB} PRIVATE ${RDC_LIB} ${BOOTSTRAP_LIB} hsa-runtime64::hsa-runtime64 rocprofiler::rocprofiler pthread dl)
target_link_libraries(${RDC_ROCP_LIB} PRIVATE ${RDC_LIB} ${BOOTSTRAP_LIB} hsa-runtime64::hsa-runtime64 rocprofiler-sdk::rocprofiler-sdk pthread dl)
target_include_directories(${RDC_ROCP_LIB} PRIVATE
"${PROJECT_SOURCE_DIR}"
"${PROJECT_SOURCE_DIR}/include"
Expand Down
Loading

0 comments on commit 7c91a07

Please sign in to comment.