From 48cc7855d52e0024071447aa061c65c6fa204fcd Mon Sep 17 00:00:00 2001 From: Christophe Paquot Date: Tue, 22 Jun 2021 21:12:08 -0700 Subject: [PATCH 01/11] SWDEV-282961: dependency arrows missing When building the json data flow, from_us_list has (timestamp, stream_id, thread_id). stream_id used to be interpreted as from_tid and tid as to_tid. But that's not correct. stream_id is always a destination and tid is the initiator (source). Change-Id: I2f5bb86a387b4003b17271c90bdf9de4b59a79bf --- bin/sqlitedb.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/sqlitedb.py b/bin/sqlitedb.py index dc5358ff..00a7dba2 100644 --- a/bin/sqlitedb.py +++ b/bin/sqlitedb.py @@ -159,11 +159,11 @@ def flow_json(self, base_id, from_pid, from_us_list, to_pid, to_us_dict, corr_id for ind in range(len(from_us_list)): corr_id = corr_id_list[ind] if (len(corr_id_list) != 0) else ind if corr_id in to_us_dict: - (from_ts, from_tid, to_tid) = from_us_list[ind] + (from_ts, stream_id, tid) = from_us_list[ind] to_ts = to_us_dict[corr_id] if from_ts > to_ts: from_ts = to_ts - fd.write(',{"ts":%d,"ph":"s","cat":"DataFlow","id":%d,"pid":%d,"tid":%d,"name":"dep"}\n' % (from_ts, dep_id, from_pid, from_tid)) - fd.write(',{"ts":%d,"ph":"t","cat":"DataFlow","id":%d,"pid":%d,"tid":%d,"name":"dep"}\n' % (to_ts, dep_id, to_pid, to_tid)) + fd.write(',{"ts":%d,"ph":"s","cat":"DataFlow","id":%d,"pid":%d,"tid":%d,"name":"dep"}\n' % (from_ts, dep_id, from_pid, tid)) + fd.write(',{"ts":%d,"ph":"t","cat":"DataFlow","id":%d,"pid":%d,"tid":%d,"name":"dep"}\n' % (to_ts, dep_id, to_pid, stream_id)) dep_id += 1 def metadata_json(self, jsonfile, sysinfo_file): From 9f0ca101ec2286827c9b1cd8ccfa2ee3438f6a15 Mon Sep 17 00:00:00 2001 From: AMD Date: Wed, 16 Jun 2021 18:33:58 -0500 Subject: [PATCH 02/11] Add support for gfx90a Merge gfx90a support from the 'amd-npi' branch. Change-Id: I9b51711ed4a1d2f1ed42ba9b83cb12136be228b8 (cherry picked from commit 4df3e0bd9ae6e5982b43fd2fc3867cf5f0b87a53) --- src/core/metrics.h | 6 +++--- test/tool/gfx_metrics.xml | 17 +++++++++++++++++ test/tool/metrics.xml | 8 ++++++-- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/src/core/metrics.h b/src/core/metrics.h index a221168a..6eeebee3 100644 --- a/src/core/metrics.h +++ b/src/core/metrics.h @@ -196,9 +196,9 @@ class MetricsDict { xml_->AddConst("top.const.metric", "SE_NUM", agent_info->se_num); ImportMetrics(agent_info, "const"); agent_name_ = agent_info->name; - if (std::string("gfx906") == agent_info->name) { - ImportMetrics(agent_info, agent_info->name); - } else if (std::string("gfx908") == agent_info->name) { + if (std::string("gfx906") == agent_info->name || + std::string("gfx908") == agent_info->name || + std::string("gfx90a") == agent_info->name) { ImportMetrics(agent_info, agent_info->name); } else { agent_name_ = agent_info->gfxip; diff --git a/test/tool/gfx_metrics.xml b/test/tool/gfx_metrics.xml index 698826c6..c2a79af2 100644 --- a/test/tool/gfx_metrics.xml +++ b/test/tool/gfx_metrics.xml @@ -101,3 +101,20 @@ + + + + + + + + + + + + + + + + + diff --git a/test/tool/metrics.xml b/test/tool/metrics.xml index c340a439..a920ff04 100644 --- a/test/tool/metrics.xml +++ b/test/tool/metrics.xml @@ -65,12 +65,16 @@ + + -# VG20 +# Vega20 -# MI100 +# Arcturus +# Aldebaran + # GPUBusy The percentage of time GPU was busy. From a369af3049bc3a6d2faca537f4e47d5a046f01cf Mon Sep 17 00:00:00 2001 From: rachida Date: Tue, 18 May 2021 20:11:48 -0400 Subject: [PATCH 03/11] SWDEV-282961 Skip barrier events. Process hipMemSet events Marker events inside hcc_ops_trace.txt are from barriers so they are not meant to be stored in ops_patch_data map. Added support for hipMemset events which are a kind of memory copy. Change-Id: I213fe959bcd35ff0371613ba5bffd95bc53e06b5 (cherry picked from commit caa5f323007734fd0b14b3fa49618a5d7cc7acdd) --- bin/tblextr.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/bin/tblextr.py b/bin/tblextr.py index 61644e2a..deafb199 100755 --- a/bin/tblextr.py +++ b/bin/tblextr.py @@ -354,7 +354,7 @@ def fill_api_db(table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep copy_index = 0 ptrn_val = re.compile(r'(\d+):(\d+) (\d+):(\d+) ([^\(]+)(\(.*)$') - hip_mcopy_ptrn = re.compile(r'hipMemcpy') + hip_mcopy_ptrn = re.compile(r'hipMemcpy|hipMemset') hip_wait_event_ptrn = re.compile(r'WaitEvent') hip_sync_event_ptrn = re.compile(r'hipStreamSynchronize') hip_sync_dev_event_ptrn = re.compile(r'hipDeviceSynchronize') @@ -430,7 +430,6 @@ def fill_api_db(table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep if corr_id == 0: corr_id = record_id rec_vals.append(corr_id) - # extracting/converting stream id (stream_id, stream_found) = get_field(record_args, 'stream') if stream_found == 0: @@ -489,9 +488,6 @@ def fill_api_db(table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep mcopy_found = 1 op_found = 1 - if op_found: - ops_patch_data[(corr_id, proc_id)] = (thread_id, stream_id, kernel_str) - # HIP WaitEvent API if wait_event_ptrn.search(record_name): op_found = 1 @@ -505,6 +501,9 @@ def fill_api_db(table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep hsa_patch_data[(copy_index, proc_id)] = thread_id copy_index += 1 + if op_found: + ops_patch_data[(corr_id, proc_id)] = (thread_id, stream_id, kernel_str) + if op_found: op_found = 0 beg_ns = int(rec_vals[0]) @@ -650,6 +649,7 @@ def fill_ops_db(kernel_table_name, mcopy_table_name, db, indir): proc_id = int(m.group(3)) # checking name for memcopy pattern + is_barrier = 0 if ptrn_mcopy.search(name): rec_table_name = mcopy_table_name table_handle = mcopy_table_handle @@ -664,6 +664,7 @@ def fill_ops_db(kernel_table_name, mcopy_table_name, db, indir): if ptrn_barrier.search(name): name = '""' + is_barrier = 1 thread_id = 0 stream_id = 0 @@ -671,7 +672,8 @@ def fill_ops_db(kernel_table_name, mcopy_table_name, db, indir): (thread_id, stream_id, name_patch) = ops_patch_data[(corr_id, proc_id)] if name_patch != '': name = name_patch else: - fatal("hcc ops data not found: '" + record + "', " + str(corr_id) + ", " + str(proc_id)) + if is_barrier: continue + else: fatal("hcc ops data not found: '" + record + "', " + str(corr_id) + ", " + str(proc_id)) # activity record rec_vals[4] = name # Name @@ -878,3 +880,4 @@ def fill_ops_db(kernel_table_name, mcopy_table_name, db, indir): sys.exit(0) ############################################################# + From 2f189791a5cf6a6beb0aea9572d8760f792f0b32 Mon Sep 17 00:00:00 2001 From: Icarus Sparry Date: Wed, 7 Jul 2021 15:49:02 +0000 Subject: [PATCH 04/11] Add dependency on rocm-core Signed-off-by: Icarus Sparry Change-Id: Icb935e9230888fd005d9ca3617e28f6173173cc8 --- CMakeLists.txt | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c34f7cc9..e47f06df 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -204,7 +204,7 @@ else() endif() message ( "Using CPACK_DEBIAN_PACKAGE_RELEASE ${CPACK_DEBIAN_PACKAGE_RELEASE}" ) set ( CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT" ) -set ( CPACK_DEBIAN_PACKAGE_DEPENDS "hsa-rocr-dev" ) +set ( CPACK_DEBIAN_PACKAGE_DEPENDS "hsa-rocr-dev, rocm-core" ) ## Process the Debian install/remove scripts to update the CPACK variables configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst.in DEBIAN/postinst @ONLY ) configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm.in DEBIAN/prerm @ONLY ) @@ -229,12 +229,17 @@ if ( PROC_RESULT EQUAL "0" AND NOT EVAL_RESULT STREQUAL "" ) string ( APPEND CPACK_RPM_PACKAGE_RELEASE "%{?dist}" ) endif() set ( CPACK_RPM_FILE_NAME "RPM-DEFAULT" ) -set ( CPACK_RPM_PACKAGE_DEPENDS "hsa-rocr-dev" ) +set ( CPACK_RPM_PACKAGE_REQUIRES "hsa-rocr-dev, rocm-core" ) ## Process the Rpm install/remove scripts to update the CPACK variables configure_file ( "${CMAKE_CURRENT_SOURCE_DIR}/RPM/post.in" RPM/post @ONLY ) configure_file ( "${CMAKE_CURRENT_SOURCE_DIR}/RPM/postun.in" RPM/postun @ONLY ) set ( CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/post" ) set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/postun" ) +# Remove dependency on rocm-core if -DROCM_DEP_ROCMCORE=ON not given to cmake +if(NOT ROCM_DEP_ROCMCORE) + string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_PACKAGE_REQUIRES ${CPACK_RPM_PACKAGE_REQUIRES}) + string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_PACKAGE_DEPENDS ${CPACK_DEBIAN_PACKAGE_DEPENDS}) +endif() include ( CPack ) From b641632fefb462bb4f17e1bc2ae9f91312657bd7 Mon Sep 17 00:00:00 2001 From: Chun Yang Date: Wed, 11 Aug 2021 19:55:31 -0700 Subject: [PATCH 05/11] SWDEV-296922 : Incorrect rounding due to integer division in rocprofiler metrics Changed derived metrics to double from int64. Fixed standalone test due to int64 to float change Fixed intercept test due to int64 to float change. Change-Id: I49631c187406ae9dd94a869b3bb13772012e8cdf (cherry picked from commit f9017cbdc57166c417474b6794bfc08b703e356a) --- bin/tblextr.py | 5 ++++- src/core/context.h | 6 +++--- src/xml/expr.h | 5 +++-- test/app/intercept_test.cpp | 3 +++ test/app/standalone_test.cpp | 3 +++ test/tool/tool.cpp | 8 ++++++-- 6 files changed, 22 insertions(+), 8 deletions(-) diff --git a/bin/tblextr.py b/bin/tblextr.py index 66be2739..4a8c085b 100755 --- a/bin/tblextr.py +++ b/bin/tblextr.py @@ -115,7 +115,10 @@ def parse_res(infile): beg_pattern = re.compile("^dispatch\[(\d*)\], (.*) kernel-name\(\"([^\"]*)\"\)") prop_pattern = re.compile("([\w-]+)\((\w+)\)"); ts_pattern = re.compile(", time\((\d*),(\d*),(\d*),(\d*)\)") - var_pattern = re.compile("^\s*([^\s]*)\s+\((\d*)\)") + # var pattern below matches a variable name and a variable value from a one + # line text in the format of for example "WRITE_SIZE (0.2500000000)" or + # "GRBM_GUI_ACTIVE (27867)" + var_pattern = re.compile("^\s*([a-zA-Z0-9_]+)\s+\((\d+(?:\.\d+)?)\)") dispatch_number = 0 for line in inp.readlines(): diff --git a/src/core/context.h b/src/core/context.h index a8026dd3..f629ef1c 100644 --- a/src/core/context.h +++ b/src/core/context.h @@ -55,7 +55,7 @@ inline unsigned align_size(unsigned size, unsigned alignment) { template class MetricArgs : public xml::args_cache_t { public: MetricArgs(const Map& map) : map_(map) {} - bool Lookup(const std::string& name, uint64_t& result) const { + bool Lookup(const std::string& name, double& result) const { rocprofiler_feature_t* info = NULL; auto it = map_.find(name); if (it == map_.end()) EXC_RAISING(HSA_STATUS_ERROR, "var '" << name << "' is not found"); @@ -311,8 +311,8 @@ class Context { if (it == info_map_.end()) EXC_RAISING(HSA_STATUS_ERROR, "metric '" << name << "', rocprofiler info is not found " << this); rocprofiler_feature_t* info = it->second; - info->data.result_int64 = expr->Eval(args); - info->data.kind = ROCPROFILER_DATA_KIND_INT64; + info->data.result_double = expr->Eval(args); + info->data.kind = ROCPROFILER_DATA_KIND_DOUBLE; } } } diff --git a/src/xml/expr.h b/src/xml/expr.h index 731e25e4..7f754b4c 100644 --- a/src/xml/expr.h +++ b/src/xml/expr.h @@ -29,6 +29,7 @@ THE SOFTWARE. #include #include #include +#include namespace xml { class exception_t : public std::exception { @@ -45,8 +46,8 @@ class div_zero_exception_t : public exception_t { explicit div_zero_exception_t(const std::string& msg) : exception_t("Divide by zero exception " + msg) {} }; -typedef uint64_t args_t; -static const args_t ARGS_MAX = UINT64_MAX; +typedef double args_t; +static const args_t ARGS_MAX = DBL_MAX; typedef std::map args_map_t; class Expr; diff --git a/test/app/intercept_test.cpp b/test/app/intercept_test.cpp index e62bf6ce..bbcdf806 100644 --- a/test/app/intercept_test.cpp +++ b/test/app/intercept_test.cpp @@ -124,6 +124,9 @@ void dump_context_entry(context_entry_t* entry, rocprofiler_feature_t* features, case ROCPROFILER_DATA_KIND_INT64: fprintf(stdout, "= (%lu)\n", p->data.result_int64); break; + case ROCPROFILER_DATA_KIND_DOUBLE: + fprintf(stdout, "= (%lf)\n", p->data.result_double); + break; default: fprintf(stderr, "Undefined data kind(%u)\n", p->data.kind); abort(); diff --git a/test/app/standalone_test.cpp b/test/app/standalone_test.cpp index 34bc05ea..1344e0eb 100644 --- a/test/app/standalone_test.cpp +++ b/test/app/standalone_test.cpp @@ -78,6 +78,9 @@ void print_features(rocprofiler_feature_t* feature, uint32_t feature_count) { case ROCPROFILER_DATA_KIND_INT64: std::cout << std::dec << " result64 (" << p->data.result_int64 << ")" << std::endl; break; + case ROCPROFILER_DATA_KIND_DOUBLE: + std::cout << " result64 (" << p->data.result_double << ")" << std::endl; + break; case ROCPROFILER_DATA_KIND_BYTES: { const char* ptr = reinterpret_cast(p->data.result_bytes.ptr); uint64_t size = 0; diff --git a/test/tool/tool.cpp b/test/tool/tool.cpp index 4bdce5dd..1ccce956 100644 --- a/test/tool/tool.cpp +++ b/test/tool/tool.cpp @@ -351,6 +351,9 @@ void output_results(const context_entry_t* entry, const char* label) { case ROCPROFILER_DATA_KIND_INT64: fprintf(file, "(%lu)\n", p->data.result_int64); break; + case ROCPROFILER_DATA_KIND_DOUBLE: + fprintf(file, "(%.10lf)\n", p->data.result_double); + break; default: fprintf(stderr, "RPL-tool: undefined data kind(%u)\n", p->data.kind); abort(); @@ -358,12 +361,13 @@ void output_results(const context_entry_t* entry, const char* label) { } } -// Output group intermeadate profiling results, created internally for complex metrics +// Output group intermediate profiling results, created internally for complex metrics void output_group(const context_entry_t* entry, const char* label) { const rocprofiler_group_t* group = &(entry->group); context_entry_t group_entry = *entry; for (unsigned i = 0; i < group->feature_count; ++i) { - if (group->features[i]->data.kind == ROCPROFILER_DATA_KIND_INT64) { + if (group->features[i]->data.kind == ROCPROFILER_DATA_KIND_INT64 || + group->features[i]->data.kind == ROCPROFILER_DATA_KIND_DOUBLE) { group_entry.features = group->features[i]; group_entry.feature_count = 1; output_results(&group_entry, label); From bb3fdb0b76d23fd72f21399c8b5ade3122101f8b Mon Sep 17 00:00:00 2001 From: Chun Yang Date: Wed, 29 Sep 2021 18:51:36 -0700 Subject: [PATCH 06/11] SWDEV-283942 SWDEV-292075 Fixed exception thrown when ROCP_HSA_INTERCEPT not set or set to 0; Fixed ROCM hsa_init() failed with error 4096 when trying to read hardware performance counters; Fixed LD_LIBRARY_PATH to include necessary library; Change-Id: Idcb7ff807a79f4267374c34041d3bca33d85f532 --- src/core/rocprofiler.cpp | 36 ++++++++++++++++-------------------- test/run.sh | 2 +- 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/src/core/rocprofiler.cpp b/src/core/rocprofiler.cpp index bbb97e3f..b50bd51d 100644 --- a/src/core/rocprofiler.cpp +++ b/src/core/rocprofiler.cpp @@ -400,42 +400,38 @@ PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t fa ONLOAD_TRACE_BEG(); rocprofiler::SaveHsaApi(table); rocprofiler::ProxyQueue::InitFactory(); - bool intercept_mode = false; // Checking environment to enable intercept mode const char* intercept_env = getenv("ROCP_HSA_INTERCEPT"); + + int intercept_env_value = 0; if (intercept_env != NULL) { - switch (atoi(intercept_env)) { - // Intercepting disabled + intercept_env_value = atoi(intercept_env); + + switch (intercept_env_value) { case 0: - intercept_mode = false; - rocprofiler::InterceptQueue::TrackerOn(false); - break; - // Intercepting enabled without timestamping case 1: - intercept_mode = true; + // 0: Intercepting disabled + // 1: Intercepting enabled without timestamping rocprofiler::InterceptQueue::TrackerOn(false); break; - // Intercepting enabled with timestamping case 2: - intercept_mode = true; + // Intercepting enabled with timestamping rocprofiler::InterceptQueue::TrackerOn(true); break; default: - ERR_LOGGING("Bad ROCP_HSA_INTERCEPT env var value (" << intercept_env << ")"); + ERR_LOGGING("Bad ROCP_HSA_INTERCEPT env var value (" << intercept_env << "): " << + "valid values are 0 (standalone), 1 (intercepting without timestamp), 2 (intercepting with timestamp)"); return false; } } + // always enable excutable tracking + rocprofiler::util::HsaRsrcFactory::EnableExecutableTracking(table); + // Loading a tool lib and setting of intercept mode const uint32_t intercept_mode_mask = rocprofiler::LoadTool(); - if (intercept_mode_mask & rocprofiler::DISPATCH_INTERCEPT_MODE) { - intercept_mode = true; - } - if (intercept_mode_mask & rocprofiler::CODE_OBJ_TRACKING_MODE) { - if (intercept_mode == false) EXC_RAISING(HSA_STATUS_ERROR, "code objects tracking without intercept mode enabled"); - rocprofiler::util::HsaRsrcFactory::EnableExecutableTracking(table); - } + if (intercept_mode_mask & rocprofiler::MEMCOPY_INTERCEPT_MODE) { hsa_status_t status = hsa_amd_profiling_async_copy_enable(true); if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "hsa_amd_profiling_async_copy_enable"); @@ -453,14 +449,14 @@ PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t fa } // HSA intercepting - if (intercept_mode) { + if (intercept_env_value != 0) { rocprofiler::ProxyQueue::HsaIntercept(table); rocprofiler::InterceptQueue::HsaIntercept(table); } else { rocprofiler::StandaloneIntercept(); } - ONLOAD_TRACE("end intercept_mode(" << std::hex << intercept_mode << ")" << + ONLOAD_TRACE("end intercept_mode(" << std::hex << intercept_env_value << ")" << " intercept_mode_mask(" << std::hex << intercept_mode_mask << ")" << std::dec); return true; } diff --git a/test/run.sh b/test/run.sh index f4f07166..135d3bb3 100755 --- a/test/run.sh +++ b/test/run.sh @@ -57,7 +57,7 @@ eval_test() { } # paths to ROC profiler and oher libraries -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD:$PWD/../../lib:/home/jenkins/compute-package/lib +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD:$PWD/../../lib:/opt/rocm/lib # enable tools load failure reporting export HSA_TOOLS_REPORT_LOAD_FAILURE=1 From 2f8bd45c284953197976fe62dced014f38ccdac5 Mon Sep 17 00:00:00 2001 From: Chun Yang Date: Wed, 11 Aug 2021 19:55:31 -0700 Subject: [PATCH 07/11] SWDEV-296922 : Incorrect rounding due to integer division in rocprofiler metrics Changed var_pattern in tblextr.py to include pattern like "name[0]" Change-Id: Ibe1c512595cfbdcaca8fa5bddceb3f6a570caf43 (cherry picked from commit ff43ca1542b22b7f906ceac00d156258f5e3c4de) --- bin/tblextr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/tblextr.py b/bin/tblextr.py index 4a8c085b..c58b4fac 100755 --- a/bin/tblextr.py +++ b/bin/tblextr.py @@ -117,8 +117,8 @@ def parse_res(infile): ts_pattern = re.compile(", time\((\d*),(\d*),(\d*),(\d*)\)") # var pattern below matches a variable name and a variable value from a one # line text in the format of for example "WRITE_SIZE (0.2500000000)" or - # "GRBM_GUI_ACTIVE (27867)" - var_pattern = re.compile("^\s*([a-zA-Z0-9_]+)\s+\((\d+(?:\.\d+)?)\)") + # "GRBM_GUI_ACTIVE (27867)" or "TA_TA_BUSY[0]" + var_pattern = re.compile("^\s*([a-zA-Z0-9_]+(?:\[\d+\])?)\s+\((\d+(?:\.\d+)?)\)") dispatch_number = 0 for line in inp.readlines(): From f2df81ae5c18ee0d56ed99f2de26d34db7e13063 Mon Sep 17 00:00:00 2001 From: Laurent Morichetti Date: Tue, 5 Oct 2021 21:29:53 -0700 Subject: [PATCH 08/11] Install merge_traces.sh in $prefix/bin Change-Id: I6cea078e5b64a68d7bd269dc3aab976a7ab7b5c7 --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3b51b021..12574d1e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -152,6 +152,7 @@ install ( FILES DESTINATION include/${DEST_NAME} ) # rpl_run.sh tblextr.py txt2xml.sh install ( FILES + ${CMAKE_CURRENT_SOURCE_DIR}/bin/merge_traces.sh ${CMAKE_CURRENT_SOURCE_DIR}/bin/rpl_run.sh ${CMAKE_CURRENT_SOURCE_DIR}/bin/txt2xml.sh ${CMAKE_CURRENT_SOURCE_DIR}/bin/txt2params.py From fc19a41406b11edd3f8b1045b0b8245cdaa12578 Mon Sep 17 00:00:00 2001 From: Chun Yang Date: Wed, 29 Sep 2021 18:50:44 -0700 Subject: [PATCH 09/11] SWDEV-283942 SWDEV-292075: Fixed corrupted multithread map handling Change-Id: Ib7d33a4b7f3306b7195ff89c28b021fb1fa6bc88 (cherry picked from commit 2519d00c1743d39d0ced89d3b000a216504da0ad) --- src/core/gpu_command.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/core/gpu_command.cpp b/src/core/gpu_command.cpp index 48e4fba7..e337367b 100644 --- a/src/core/gpu_command.cpp +++ b/src/core/gpu_command.cpp @@ -97,13 +97,10 @@ struct gpu_cmd_fncomp_t { }; typedef std::map gpu_cmd_map_t; -typedef std::mutex gpu_cmd_mutex_t; -gpu_cmd_mutex_t gpu_cmd_mutex; - size_t GetGpuCommand(gpu_cmd_op_t op, const rocprofiler::util::AgentInfo* agent_info, packet_t** command_out) { - static gpu_cmd_map_t* map = NULL; + thread_local gpu_cmd_map_t map; // Getting chip-id uint32_t chip_id = 0; @@ -112,9 +109,7 @@ size_t GetGpuCommand(gpu_cmd_op_t op, if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_agent_get_info failed"); // Query/create a command - std::lock_guard lck(gpu_cmd_mutex); - if (map == NULL) map = new gpu_cmd_map_t; - auto ret = map->insert({gpu_cmd_key_t{op, chip_id}, gpu_cmd_entry_t{}}); + auto ret = map.insert({gpu_cmd_key_t{op, chip_id}, gpu_cmd_entry_t{}}); gpu_cmd_map_t::iterator it = ret.first; if (ret.second) { it->second.size = CreateGpuCommand(op, agent_info, it->second.command, Profile::LEGACY_SLOT_SIZE_PKT); From 268e596885e3aa64d613fe80224ce769c3242e6b Mon Sep 17 00:00:00 2001 From: Aleksey Date: Thu, 2 Dec 2021 18:14:33 +0300 Subject: [PATCH 10/11] SWDEV-283942 : when --ctx-limit option is enabled, profiler hangs if user provided empty feature set to track. Fixed this behaviour --- test/tool/tool.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/tool/tool.cpp b/test/tool/tool.cpp index 1ccce956..d24100c6 100644 --- a/test/tool/tool.cpp +++ b/test/tool/tool.cpp @@ -1130,6 +1130,10 @@ extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings) const uint32_t features_found = metrics_vec.size(); + if (!features_found) { + CTX_OUTSTANDING_MAX = 0; + } + // Context array aloocation context_array = new context_array_t; From 3395f794c55f98ff1fdc12a8a9258e31aac8a740 Mon Sep 17 00:00:00 2001 From: Aleksey Date: Wed, 16 Feb 2022 21:15:22 +0300 Subject: [PATCH 11/11] github issue#66 It's impossible to guarantee order of the InitHsaApiTable and EnableExecutableTracking calls. Added explicit check for infinite recursion in EnableExecutableTracking call. --- src/util/hsa_rsrc_factory.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/util/hsa_rsrc_factory.cpp b/src/util/hsa_rsrc_factory.cpp index 9d980312..0a5a7ec9 100644 --- a/src/util/hsa_rsrc_factory.cpp +++ b/src/util/hsa_rsrc_factory.cpp @@ -737,6 +737,12 @@ const char* HsaRsrcFactory::GetKernelNameRef(uint64_t addr) { void HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) { std::lock_guard lck(mutex_); + // Prevent infinite recursion + // + if (hsa_api_.hsa_executable_freeze == hsa_executable_freeze && + hsa_api_.hsa_executable_destroy == hsa_executable_destroy) + return; + executable_tracking_on_ = true; table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor; table->core_->hsa_executable_destroy_fn = hsa_executable_destroy_interceptor;