Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issue#66 #75

Open
wants to merge 19 commits into
base: amd-master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
48cc785
SWDEV-282961: dependency arrows missing
chrispaquot Jun 23, 2021
9f0ca10
Add support for gfx90a
Jun 16, 2021
a369af3
SWDEV-282961 Skip barrier events. Process hipMemSet events
rkebichi May 19, 2021
c55464d
Merge remote-tracking branch 'remotes/origin/amd-staging' into amd-ma…
Sushma1920 Jul 8, 2021
2f18979
Add dependency on rocm-core
amd-isparry Jul 7, 2021
613fc6e
Merge remote-tracking branch 'remotes/origin/amd-staging' into amd-ma…
Jul 21, 2021
7707d60
Merge remote-tracking branch 'remotes/origin/amd-staging' into amd-ma…
Aug 3, 2021
4430b04
Merge remote-tracking branch 'remotes/origin/amd-staging' into amd-ma…
Aug 14, 2021
4c3053c
Merge "Add dependency on rocm-core" into amd-master
amd-isparry Aug 17, 2021
d630e60
Merge remote-tracking branch 'remotes/origin/amd-staging' into amd-ma…
Aug 26, 2021
c764803
Merge remote-tracking branch 'remotes/origin/amd-staging' into amd-ma…
Sep 2, 2021
b641632
SWDEV-296922 : Incorrect rounding due to integer division in rocprofi…
cyamder Aug 12, 2021
bb3fdb0
SWDEV-283942 SWDEV-292075
cyamder Sep 30, 2021
2f8bd45
SWDEV-296922 : Incorrect rounding due to integer division in rocprofi…
cyamder Aug 12, 2021
f2df81a
Install merge_traces.sh in $prefix/bin
lmoriche Oct 6, 2021
fc19a41
SWDEV-283942 SWDEV-292075: Fixed corrupted multithread map handling
cyamder Sep 30, 2021
e140f47
Merge "SWDEV-283942 SWDEV-292075" into release/rocm-rel-4.5
zhang2amd Oct 8, 2021
268e596
SWDEV-283942 : when --ctx-limit option is enabled, profiler hangs if
kikimych Dec 2, 2021
3395f79
github issue#66
kikimych Feb 16, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ install ( FILES
DESTINATION include/${DEST_NAME} )
# rpl_run.sh tblextr.py txt2xml.sh
install ( FILES
${CMAKE_CURRENT_SOURCE_DIR}/bin/merge_traces.sh
${CMAKE_CURRENT_SOURCE_DIR}/bin/rpl_run.sh
${CMAKE_CURRENT_SOURCE_DIR}/bin/txt2xml.sh
${CMAKE_CURRENT_SOURCE_DIR}/bin/txt2params.py
Expand Down Expand Up @@ -203,7 +204,7 @@ else()
endif()
message ( "Using CPACK_DEBIAN_PACKAGE_RELEASE ${CPACK_DEBIAN_PACKAGE_RELEASE}" )
set ( CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT" )
set ( CPACK_DEBIAN_PACKAGE_DEPENDS "hsa-rocr-dev" )
set ( CPACK_DEBIAN_PACKAGE_DEPENDS "hsa-rocr-dev, rocm-core" )
## Process the Debian install/remove scripts to update the CPACK variables
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst.in DEBIAN/postinst @ONLY )
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm.in DEBIAN/prerm @ONLY )
Expand All @@ -228,12 +229,17 @@ if ( PROC_RESULT EQUAL "0" AND NOT EVAL_RESULT STREQUAL "" )
string ( APPEND CPACK_RPM_PACKAGE_RELEASE "%{?dist}" )
endif()
set ( CPACK_RPM_FILE_NAME "RPM-DEFAULT" )
set ( CPACK_RPM_PACKAGE_DEPENDS "hsa-rocr-dev" )
set ( CPACK_RPM_PACKAGE_REQUIRES "hsa-rocr-dev, rocm-core" )
## Process the Rpm install/remove scripts to update the CPACK variables
configure_file ( "${CMAKE_CURRENT_SOURCE_DIR}/RPM/post.in" RPM/post @ONLY )
configure_file ( "${CMAKE_CURRENT_SOURCE_DIR}/RPM/postun.in" RPM/postun @ONLY )

set ( CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/post" )
set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/postun" )
# Remove dependency on rocm-core if -DROCM_DEP_ROCMCORE=ON not given to cmake
if(NOT ROCM_DEP_ROCMCORE)
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_PACKAGE_REQUIRES ${CPACK_RPM_PACKAGE_REQUIRES})
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_PACKAGE_DEPENDS ${CPACK_DEBIAN_PACKAGE_DEPENDS})
endif()

include ( CPack )
5 changes: 4 additions & 1 deletion bin/tblextr.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,10 @@ def parse_res(infile):
beg_pattern = re.compile("^dispatch\[(\d*)\], (.*) kernel-name\(\"([^\"]*)\"\)")
prop_pattern = re.compile("([\w-]+)\((\w+)\)");
ts_pattern = re.compile(", time\((\d*),(\d*),(\d*),(\d*)\)")
var_pattern = re.compile("^\s*([^\s]*)\s+\((\d*)\)")
# var pattern below matches a variable name and a variable value from a one
# line text in the format of for example "WRITE_SIZE (0.2500000000)" or
# "GRBM_GUI_ACTIVE (27867)" or "TA_TA_BUSY[0]"
var_pattern = re.compile("^\s*([a-zA-Z0-9_]+(?:\[\d+\])?)\s+\((\d+(?:\.\d+)?)\)")

dispatch_number = 0
for line in inp.readlines():
Expand Down
6 changes: 3 additions & 3 deletions src/core/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ inline unsigned align_size(unsigned size, unsigned alignment) {
template <class Map> class MetricArgs : public xml::args_cache_t {
public:
MetricArgs(const Map& map) : map_(map) {}
bool Lookup(const std::string& name, uint64_t& result) const {
bool Lookup(const std::string& name, double& result) const {
rocprofiler_feature_t* info = NULL;
auto it = map_.find(name);
if (it == map_.end()) EXC_RAISING(HSA_STATUS_ERROR, "var '" << name << "' is not found");
Expand Down Expand Up @@ -311,8 +311,8 @@ class Context {
if (it == info_map_.end())
EXC_RAISING(HSA_STATUS_ERROR, "metric '" << name << "', rocprofiler info is not found " << this);
rocprofiler_feature_t* info = it->second;
info->data.result_int64 = expr->Eval(args);
info->data.kind = ROCPROFILER_DATA_KIND_INT64;
info->data.result_double = expr->Eval(args);
info->data.kind = ROCPROFILER_DATA_KIND_DOUBLE;
}
}
}
Expand Down
9 changes: 2 additions & 7 deletions src/core/gpu_command.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,13 +97,10 @@ struct gpu_cmd_fncomp_t {
};
typedef std::map<gpu_cmd_key_t, gpu_cmd_entry_t, gpu_cmd_fncomp_t> gpu_cmd_map_t;

typedef std::mutex gpu_cmd_mutex_t;
gpu_cmd_mutex_t gpu_cmd_mutex;

size_t GetGpuCommand(gpu_cmd_op_t op,
const rocprofiler::util::AgentInfo* agent_info,
packet_t** command_out) {
static gpu_cmd_map_t* map = NULL;
thread_local gpu_cmd_map_t map;

// Getting chip-id
uint32_t chip_id = 0;
Expand All @@ -112,9 +109,7 @@ size_t GetGpuCommand(gpu_cmd_op_t op,
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_agent_get_info failed");

// Query/create a command
std::lock_guard<gpu_cmd_mutex_t> lck(gpu_cmd_mutex);
if (map == NULL) map = new gpu_cmd_map_t;
auto ret = map->insert({gpu_cmd_key_t{op, chip_id}, gpu_cmd_entry_t{}});
auto ret = map.insert({gpu_cmd_key_t{op, chip_id}, gpu_cmd_entry_t{}});
gpu_cmd_map_t::iterator it = ret.first;
if (ret.second) {
it->second.size = CreateGpuCommand(op, agent_info, it->second.command, Profile::LEGACY_SLOT_SIZE_PKT);
Expand Down
36 changes: 16 additions & 20 deletions src/core/rocprofiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -400,42 +400,38 @@ PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t fa
ONLOAD_TRACE_BEG();
rocprofiler::SaveHsaApi(table);
rocprofiler::ProxyQueue::InitFactory();
bool intercept_mode = false;

// Checking environment to enable intercept mode
const char* intercept_env = getenv("ROCP_HSA_INTERCEPT");

int intercept_env_value = 0;
if (intercept_env != NULL) {
switch (atoi(intercept_env)) {
// Intercepting disabled
intercept_env_value = atoi(intercept_env);

switch (intercept_env_value) {
case 0:
intercept_mode = false;
rocprofiler::InterceptQueue::TrackerOn(false);
break;
// Intercepting enabled without timestamping
case 1:
intercept_mode = true;
// 0: Intercepting disabled
// 1: Intercepting enabled without timestamping
rocprofiler::InterceptQueue::TrackerOn(false);
break;
// Intercepting enabled with timestamping
case 2:
intercept_mode = true;
// Intercepting enabled with timestamping
rocprofiler::InterceptQueue::TrackerOn(true);
break;
default:
ERR_LOGGING("Bad ROCP_HSA_INTERCEPT env var value (" << intercept_env << ")");
ERR_LOGGING("Bad ROCP_HSA_INTERCEPT env var value (" << intercept_env << "): " <<
"valid values are 0 (standalone), 1 (intercepting without timestamp), 2 (intercepting with timestamp)");
return false;
}
}

// always enable excutable tracking
rocprofiler::util::HsaRsrcFactory::EnableExecutableTracking(table);

// Loading a tool lib and setting of intercept mode
const uint32_t intercept_mode_mask = rocprofiler::LoadTool();
if (intercept_mode_mask & rocprofiler::DISPATCH_INTERCEPT_MODE) {
intercept_mode = true;
}
if (intercept_mode_mask & rocprofiler::CODE_OBJ_TRACKING_MODE) {
if (intercept_mode == false) EXC_RAISING(HSA_STATUS_ERROR, "code objects tracking without intercept mode enabled");
rocprofiler::util::HsaRsrcFactory::EnableExecutableTracking(table);
}

if (intercept_mode_mask & rocprofiler::MEMCOPY_INTERCEPT_MODE) {
hsa_status_t status = hsa_amd_profiling_async_copy_enable(true);
if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "hsa_amd_profiling_async_copy_enable");
Expand All @@ -453,14 +449,14 @@ PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t fa
}

// HSA intercepting
if (intercept_mode) {
if (intercept_env_value != 0) {
rocprofiler::ProxyQueue::HsaIntercept(table);
rocprofiler::InterceptQueue::HsaIntercept(table);
} else {
rocprofiler::StandaloneIntercept();
}

ONLOAD_TRACE("end intercept_mode(" << std::hex << intercept_mode << ")" <<
ONLOAD_TRACE("end intercept_mode(" << std::hex << intercept_env_value << ")" <<
" intercept_mode_mask(" << std::hex << intercept_mode_mask << ")" << std::dec);
return true;
}
Expand Down
6 changes: 6 additions & 0 deletions src/util/hsa_rsrc_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,12 @@ const char* HsaRsrcFactory::GetKernelNameRef(uint64_t addr) {

void HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) {
std::lock_guard<mutex_t> lck(mutex_);
// Prevent infinite recursion
//
if (hsa_api_.hsa_executable_freeze == hsa_executable_freeze &&
hsa_api_.hsa_executable_destroy == hsa_executable_destroy)
return;

executable_tracking_on_ = true;
table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor;
table->core_->hsa_executable_destroy_fn = hsa_executable_destroy_interceptor;
Expand Down
5 changes: 3 additions & 2 deletions src/xml/expr.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ THE SOFTWARE.
#include <iostream>
#include <sstream>
#include <string.h>
#include <float.h>

namespace xml {
class exception_t : public std::exception {
Expand All @@ -45,8 +46,8 @@ class div_zero_exception_t : public exception_t {
explicit div_zero_exception_t(const std::string& msg) : exception_t("Divide by zero exception " + msg) {}
};

typedef uint64_t args_t;
static const args_t ARGS_MAX = UINT64_MAX;
typedef double args_t;
static const args_t ARGS_MAX = DBL_MAX;
typedef std::map<std::string, args_t> args_map_t;
class Expr;

Expand Down
3 changes: 3 additions & 0 deletions test/app/intercept_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ void dump_context_entry(context_entry_t* entry, rocprofiler_feature_t* features,
case ROCPROFILER_DATA_KIND_INT64:
fprintf(stdout, "= (%lu)\n", p->data.result_int64);
break;
case ROCPROFILER_DATA_KIND_DOUBLE:
fprintf(stdout, "= (%lf)\n", p->data.result_double);
break;
default:
fprintf(stderr, "Undefined data kind(%u)\n", p->data.kind);
abort();
Expand Down
3 changes: 3 additions & 0 deletions test/app/standalone_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ void print_features(rocprofiler_feature_t* feature, uint32_t feature_count) {
case ROCPROFILER_DATA_KIND_INT64:
std::cout << std::dec << " result64 (" << p->data.result_int64 << ")" << std::endl;
break;
case ROCPROFILER_DATA_KIND_DOUBLE:
std::cout << " result64 (" << p->data.result_double << ")" << std::endl;
break;
case ROCPROFILER_DATA_KIND_BYTES: {
const char* ptr = reinterpret_cast<const char*>(p->data.result_bytes.ptr);
uint64_t size = 0;
Expand Down
2 changes: 1 addition & 1 deletion test/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ eval_test() {
}

# paths to ROC profiler and oher libraries
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD:$PWD/../../lib:/home/jenkins/compute-package/lib
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD:$PWD/../../lib:/opt/rocm/lib

# enable tools load failure reporting
export HSA_TOOLS_REPORT_LOAD_FAILURE=1
Expand Down
12 changes: 10 additions & 2 deletions test/tool/tool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -351,19 +351,23 @@ void output_results(const context_entry_t* entry, const char* label) {
case ROCPROFILER_DATA_KIND_INT64:
fprintf(file, "(%lu)\n", p->data.result_int64);
break;
case ROCPROFILER_DATA_KIND_DOUBLE:
fprintf(file, "(%.10lf)\n", p->data.result_double);
break;
default:
fprintf(stderr, "RPL-tool: undefined data kind(%u)\n", p->data.kind);
abort();
}
}
}

// Output group intermeadate profiling results, created internally for complex metrics
// Output group intermediate profiling results, created internally for complex metrics
void output_group(const context_entry_t* entry, const char* label) {
const rocprofiler_group_t* group = &(entry->group);
context_entry_t group_entry = *entry;
for (unsigned i = 0; i < group->feature_count; ++i) {
if (group->features[i]->data.kind == ROCPROFILER_DATA_KIND_INT64) {
if (group->features[i]->data.kind == ROCPROFILER_DATA_KIND_INT64 ||
group->features[i]->data.kind == ROCPROFILER_DATA_KIND_DOUBLE) {
group_entry.features = group->features[i];
group_entry.feature_count = 1;
output_results(&group_entry, label);
Expand Down Expand Up @@ -1126,6 +1130,10 @@ extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings)

const uint32_t features_found = metrics_vec.size();

if (!features_found) {
CTX_OUTSTANDING_MAX = 0;
}

// Context array aloocation
context_array = new context_array_t;

Expand Down