Skip to content

Commit

Permalink
RCP 5.5 release
Browse files Browse the repository at this point in the history
  • Loading branch information
chesik-amd committed Aug 22, 2018
1 parent 76e3633 commit 45393f9
Show file tree
Hide file tree
Showing 29 changed files with 157 additions and 332 deletions.
3 changes: 1 addition & 2 deletions Build/Linux/Common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ COMMON_SRC = $(COMMON_DIR)/Src
COMMON_LIB = $(COMMON_DIR)/Lib
COMMON_LIB_AMD = $(COMMON_DIR)/Lib/AMD
COMMON_LIB_EXT = $(COMMON_DIR)/Lib/Ext
GPU_PERF_API_DIR = $(COMMON_LIB_AMD)/GPUPerfAPI/3_1/Include
GPU_PERF_API_DIR = $(COMMON_LIB_AMD)/GPUPerfAPI/3_2/Include
APPSDK_DIR = $(COMMON_LIB_AMD)/APPSDK/3-0
CAL_DIR = $(COMMON_LIB_AMD)/CAL/8.95
ACL_DIR = $(COMMON_LIB_AMD)/ACL/TOT
Expand All @@ -77,7 +77,6 @@ UTF8CPP_DIR = $(COMMON_LIB_EXT)/utf8cpp/source
ADL_DIR = $(COMMON_LIB_AMD)/ADL
ADLUTIL_DIR = $(COMMON_SRC)/ADLUtil
COMMON_PROJ_DIR = $(COMMON_DIR)/Src
CELF_DIR = $(COMMON_SRC)/CElf
HSAUTILS_DIR = $(COMMON_SRC)/HSAUtils

GPU_PROFILER_LIB_PREFIX=RCP
Expand Down
3 changes: 0 additions & 3 deletions Build/Linux/CommonTargets.mk
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,6 @@ $(OBJ_DIR)/%.o: $(SRC_SPROFILE_DIR)/%.cpp
$(OBJ_DIR)/%.o: $(SRC_SANALYZE_DIR)/%.cpp
$(BUILD_SRC)

$(OBJ_DIR)/%.o: $(CELF_DIR)/Src/%.cpp
$(BUILD_SRC)

$(OBJ_DIR)/%.o: $(HSATESTCOMMON_DIR)/%.cpp
$(BUILD_SRC)

Expand Down
2 changes: 1 addition & 1 deletion Build/Linux/build_rcp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ HSAFDNTRACE="$SRCDIR/HSAFdnTrace"
PRELOADXINITTHREADS="$SRCDIR/PreloadXInitThreads"
ACTIVITYLOGGER="CXLActivityLogger"
ACTIVITYLOGGERDIR="$COMMONSRC/AMDTActivityLogger/"
GPA="$COMMON/Lib/AMD/GPUPerfAPI/3_1"
GPA="$COMMON/Lib/AMD/GPUPerfAPI/3_2"
VKSTABLECLOCKS="$COMMON/Lib/AMD/VKStableClocks/VKStableClocks/VkStableClocks"
JQPLOT_PATH="$SRCCOMMON/jqPlot"
PROFILEDATAPARSERSRC="$SRCDIR/ProfileDataParser"
Expand Down
2 changes: 1 addition & 1 deletion Build/VS2015/RCP-GPUPerfAPI.props
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup Label="UserMacros">
<GPUPerfAPIDir>$(CommonDir)\Lib\AMD\GPUPerfAPI\3_1</GPUPerfAPIDir>
<GPUPerfAPIDir>$(CommonDir)\Lib\AMD\GPUPerfAPI\3_2</GPUPerfAPIDir>
</PropertyGroup>
<PropertyGroup>
<_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
Expand Down
6 changes: 0 additions & 6 deletions Build/VS2015/RCPCLProfileAgent.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,8 @@
<Import Project="RCP-VKStableClocks.props" />
<Import Project="..\..\..\RCP-Internal\Build\VS2015\RCP-Internal.props" Condition="exists('..\..\..\RCP-Internal\Build\VS2015\RCP-Internal.props')" />
<Import Project="$(CommonDir)\Lib\AMD\ACL\Global-ACL.props" />
<Import Project="$(CommonDir)\Src\CElf\Global-CElf.props" />
<Import Project="$(CommonDir)\Src\ADLUtil\Global-ADLUtil.props" />
<Import Project="$(CommonDir)\Src\ACLModuleManager\Global-ACLModuleManager.props" />
<Import Project="$(CommonDir)\Lib\AMD\CAL\Global-CAL-NoLib.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
Expand All @@ -63,10 +61,6 @@
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="..\..\..\Common\Src\ADLUtil\ADLUtil.cpp" />
<ClCompile Include="..\..\..\Common\Src\CElf\Src\CElf.cpp" />
<ClCompile Include="..\..\..\Common\Src\CElf\Src\CElfSection.cpp" />
<ClCompile Include="..\..\..\Common\Src\CElf\Src\CElfStringTable.cpp" />
<ClCompile Include="..\..\..\Common\Src\CElf\Src\CElfSymbolTable.cpp" />
<ClCompile Include="..\..\..\Common\Src\DynamicLibraryModule\ACLModule.cpp" />
<ClCompile Include="..\..\..\Common\Src\ACLModuleManager\ACLModuleManager.cpp" />
<ClCompile Include="..\..\Src\CLProfileAgent\CLBuffer.cpp" />
Expand Down
15 changes: 0 additions & 15 deletions Build/VS2015/RCPCLProfileAgent.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,6 @@
<ClCompile Include="..\..\..\Common\Src\ADLUtil\ADLUtil.cpp">
<Filter>ADLUtil</Filter>
</ClCompile>
<ClCompile Include="..\..\..\Common\Src\CElf\Src\CElf.cpp">
<Filter>CElf</Filter>
</ClCompile>
<ClCompile Include="..\..\..\Common\Src\CElf\Src\CElfSection.cpp">
<Filter>CElf</Filter>
</ClCompile>
<ClCompile Include="..\..\..\Common\Src\CElf\Src\CElfStringTable.cpp">
<Filter>CElf</Filter>
</ClCompile>
<ClCompile Include="..\..\..\Common\Src\CElf\Src\CElfSymbolTable.cpp">
<Filter>CElf</Filter>
</ClCompile>
<ClCompile Include="..\..\Src\CLProfileAgent\CLBuffer.cpp">
<Filter>Source Files</Filter>
</ClCompile>
Expand Down Expand Up @@ -98,9 +86,6 @@
<Filter Include="Resource Files">
<UniqueIdentifier>{7124187b-7b76-4cc9-bdd2-0ff0af4cbbdc}</UniqueIdentifier>
</Filter>
<Filter Include="CElf">
<UniqueIdentifier>{59c4443f-a055-4755-8763-238265e9bc85}</UniqueIdentifier>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{22d938b8-1adb-4011-ac9a-850e37321634}</UniqueIdentifier>
</Filter>
Expand Down
15 changes: 4 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,13 @@ RCP was formerly delivered as part of CodeXL with the executable name
* When used with CodeXL, all profiler data can be visualized in a user-friendly graphical user interface.

## What's New
* Version 5.4 (6/22/18)
* Adds support for additional GPUs and APUs.
* Support for profiling OpenCL applications running on ROCm
* OpenCL: Support for tracing OpenCL 2.1 and 2.2 APIs
* ROCm/HSA: Support for ROCm 1.8.
* ROCm/HSA: Support for tracing AMD vendor extensions.
* Fixes an issue parsing occupancy data collected on systems with certain locale settings.
* ROCm/HSA: Fixes an issue with garbage characters in the .atp file for some HSA API string parameters.
* OpenCL: Fixes profiling on recent amdgpu-pro drivers using the legacy OpenCL stack.
* OpenCL: Works around a driver issue where GPU clock frequencies remain fixed after profiling on GFX9-based GPUs.
* Version 5.5 (8/22/18)
* Adds support for additional GPUs and APUs.
* ROCm/HSA: Fixes several issues with incorrect or missing data transfer timestamps.

## System Requirements
* An AMD Radeon GCN-based GPU or APU
* Radeon Software Crimson ReLive Edition 18.5.1 or later (Driver Packaging Version 18.10 or later).
* Radeon Software Adrenaline Edition 18.8.1 or later (Driver Packaging Version 18.30 or later).
* ROCm 1.8. See system requirements for ROCm: https://rocm.github.io/install.html and https://rocm.github.io/hardware.html
* Windows 7, 8.1, and 10
* For Windows, the `Visual C++ Redistributable for Visual Studio 2015` is required. It can be downloaded from https://www.microsoft.com/en-us/download/details.aspx?id=48145
Expand Down
4 changes: 4 additions & 0 deletions ReleaseNotes.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Radeon Compute Profiler Release Notes
---
## Version 5.5 (8/22/18)
* Adds support for additional GPUs and APUs.
* ROCm/HSA: Fixes several issues with incorrect or missing data transfer timestamps.

## Version 5.4 (6/22/18)
* Adds support for additional GPUs and APUs.
* Support for profiling OpenCL applications running on ROCm
Expand Down
6 changes: 2 additions & 4 deletions Scripts/UpdateCommonMap.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
"common-lib-amd-ADL" : "../Common/Lib/AMD/ADL",
"common-lib-AMD-ACL" : "../Common/Lib/AMD/ACL",
"common-lib-amd-APPSDK-3.0" : "../Common/Lib/AMD/APPSDK",
"common-lib-AMD-CAL-8.95" : "../Common/Lib/AMD/CAL",
"common-lib-ext-Boost-1.59" : "../Common/Lib/Ext/Boost",
"common-lib-ext-mhook" : "../Common/Lib/Ext/mhook",
"common-lib-ext-tinyxml-2.6.2" : "../Common/Lib/Ext/tinyxml",
Expand All @@ -21,7 +20,6 @@
"common-src-AMDTBaseTools" : "../Common/Src/AMDTBaseTools",
"common-src-AMDTInterceptor" : "../Common/Src/AMDTInterceptor",
"common-src-AMDTOSWrappers" : "../Common/Src/AMDTOSWrappers",
"common-src-CElf" : "../Common/Src/CElf",
"common-src-DeviceInfo" : "../Common/Src/DeviceInfo",
"common-src-DynamicLibraryModule" : "../Common/Src/DynamicLibraryModule",
"common-src-HSAUtils" : "../Common/Src/HSAUtils",
Expand All @@ -33,12 +31,12 @@
}

downloadMappingWin = {
"https://github.com/GPUOpen-Tools/GPA/releases/download/v3.1a/GPUPerfAPI-3.1.571.0.zip" : "../Common/Lib/AMD/GPUPerfAPI",
"https://github.com/GPUOpen-Tools/GPA/releases/download/v3.2/GPUPerfAPI-3.2.623.0.zip" : "../Common/Lib/AMD/GPUPerfAPI",
"https://github.com/GPUOpen-Tools/RCP/releases/download/v5.4/RadeonComputeProfiler-v5.4.6937.zip" : "../Common/Lib/AMD/VKStableClocks"
}

downloadMappingLin = {
"https://github.com/GPUOpen-Tools/GPA/releases/download/v3.1a/GPUPerfAPI.3.1.793-lnx.tgz" : "../Common/Lib/AMD/GPUPerfAPI",
"https://github.com/GPUOpen-Tools/GPA/releases/download/v3.2/GPUPerfAPI.3.2.858-lnx.tgz" : "../Common/Lib/AMD/GPUPerfAPI",
"https://github.com/GPUOpen-Tools/RCP/releases/download/v5.4/RadeonComputeProfiler-v5.4.6906.tgz" : "../Common/Lib/AMD/VKStableClocks"
}

Expand Down
34 changes: 2 additions & 32 deletions Src/CLProfileAgent/CLKernelAssembly.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@

#include <sstream>
#include <algorithm>
#include <fstream>
#include <boost/algorithm/string.hpp>
#include <CElf.h>

// ADL headers
#include <ADLUtil.h>
Expand Down Expand Up @@ -43,8 +43,7 @@ KernelAssembly::KernelAssembly() : m_strFilePrefix(KERNEL_ASSEMBLY_FILE_PREFIX),
m_bOutputIL(false),
m_bOutputISA(false),
m_bOutputCL(false),
m_bOutputHSAIL(false),
m_bInitCAL(false)
m_bOutputHSAIL(false)
{
};

Expand Down Expand Up @@ -196,25 +195,6 @@ bool SaveBifToFile(const char* pszFileName, ACLModule* mod, aclBinary* pBin)
return true;
}

bool GetCPUISA(CElf& elf, string& strISAOut)
{
const CElfSection* pAstextSection = elf.GetSection(".astext");

if (pAstextSection == NULL)
{
// ERROR
Log(logERROR, "Failed to retrieve astext section.\n");
return false;
}
else
{
const vector<char> data(pAstextSection->GetData());
strISAOut = string(data.begin(), data.end());
}

return true;
}

bool KernelAssembly::GenerateKernelFilesFromACLModule(ACLModule* pAclModule,
aclCompiler* pAclCompiler,
std::vector<char>& vBinary,
Expand Down Expand Up @@ -297,16 +277,6 @@ bool KernelAssembly::GenerateKernelFilesFromACLModule(ACLModule* pAclMod
strISA = pAsText;
bISA = true;
}
else
{
// Fallback to CELF path
CElf elf(vBinary);

if (elf.good())
{
bISA = GetCPUISA(elf, strISA);
}
}

if (bISA)
{
Expand Down
1 change: 0 additions & 1 deletion Src/CLProfileAgent/CLKernelAssembly.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,6 @@ class KernelAssembly
bool m_bOutputISA; ///< flag indicating whether or not to write out the ISA file
bool m_bOutputCL; ///< flag indicating whether or not to write out the CL file
bool m_bOutputHSAIL; ///< flag indicating whether or not to write out the HSAIL file
bool m_bInitCAL; ///< flag indicating whether or not CALRT is initialized
static std::string m_sTmpDisassemblyLoggerISA; ///< string to hold the ISA text contents from the disassembly logger
static std::string m_sTmpDisassemblyLoggerHSAIL; ///< string to hold the HSAIL text contents from the disassembly logger
static unsigned int m_sDisassembleCount; ///< count of the number of unique text items from the disassembly logger
Expand Down
5 changes: 0 additions & 5 deletions Src/CLProfileAgent/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ INCLUDES = \
-I$(DEVICEINFO_DIR) \
-I$(TSINGLETON_DIR) \
-isystem$(BOOST_DIR) \
-isystem$(CELF_DIR)/Include \
-I$(UTF8CPP_DIR) \
-I$(ACL_DIR)/include \
-I$(ADL_DIR)/include \
Expand Down Expand Up @@ -46,10 +45,6 @@ SO_OBJS = \
./$(OBJ_DIR)/CLDeferredKernel.o \
./$(OBJ_DIR)/CLProfilerMineCLEntry.o \
./$(OBJ_DIR)/CLProfilerMineCLMemory.o \
./$(OBJ_DIR)/CElf.o \
./$(OBJ_DIR)/CElfSection.o \
./$(OBJ_DIR)/CElfStringTable.o \
./$(OBJ_DIR)/CElfSymbolTable.o \
./$(OBJ_DIR)/GPUPerfAPICounterLoader.o \

include $(DEPTH)/Build/Linux/CommonTargets.mk
Expand Down
2 changes: 1 addition & 1 deletion Src/Common/Version.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
// @{

#define RCP_MAJOR_VERSION 5
#define RCP_MINOR_VERSION 4
#define RCP_MINOR_VERSION 5
#define RCP_BUILD_NUMBER 0
#define RCP_UPDATE_VERSION 0

Expand Down
1 change: 0 additions & 1 deletion Src/HSAFdnCommon/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ LIB_OBJS = \
./$(OBJ_DIR)/HSAToolsRTModule.o \
./$(OBJ_DIR)/HSAAgentUtils.o \
./$(OBJ_DIR)/HSAAgentIterateReplacer.o \
./$(OBJ_DIR)/HSAAPITableVersions.o \
./$(OBJ_DIR)/HSAKernelDemangler.o \

include $(DEPTH)/Build/Linux/CommonTargets.mk
Expand Down
24 changes: 2 additions & 22 deletions Src/HSAFdnPMC/HSAPMCAgent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@
#include "FileUtils.h"
#include "GlobalSettings.h"

#include "HSAPMCInterceptionTable1_0.h"
#include "HSAPMCInterceptionTable1_2.h"
#include "HSAAPITableVersions.h"
#include "HSAAgentUtils.h"

#include "HSAGPAProfiler.h"
Expand Down Expand Up @@ -52,26 +49,9 @@ extern "C" bool DLL_PUBLIC OnLoad(void* pTable, uint64_t runtimeVersion, uint64_

std::cout << RCP_PRODUCT_NAME " " << RCP_VERSION_STRING << " is enabled\n";

if (ROCM_1_1_X_AND_EARLIER_ROOT_RUNTIME_VERSION == runtimeVersion)
{
// ROCm versions 1.1.1 and earlier
InitHSAAPIInterceptPMC1_0(reinterpret_cast<ApiTable1_0*>(pTable));
}
else
{
HsaApiTable* pHsaTable = reinterpret_cast<HsaApiTable*>(pTable);
HsaApiTable* pHsaTable = reinterpret_cast<HsaApiTable*>(pTable);

if (IsROCm12(pHsaTable))
{
// ROCm 1.2 backwards compatibility
HsaApiTable1_2* pHsaTable1_2 = reinterpret_cast<HsaApiTable1_2*>(pTable);
InitHSAAPIInterceptPMC1_2(pHsaTable1_2);
}
else
{
InitHSAAPIInterceptPMC(pHsaTable);
}
}
InitHSAAPIInterceptPMC(pHsaTable);

Parameters params;
FileUtils::GetParametersFromFile(params);
Expand Down
2 changes: 0 additions & 2 deletions Src/HSAFdnPMC/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ LIBPATH = $(COMMON_LIB_PATHS)
SO_OBJS = \
./$(OBJ_DIR)/HSAPMCAgent.o \
./$(OBJ_DIR)/HSAPMCInterceptionHelpers.o \
./$(OBJ_DIR)/HSAPMCInterceptionTable1_0.o \
./$(OBJ_DIR)/HSAPMCInterceptionTable1_2.o \
./$(OBJ_DIR)/HSAGPAProfiler.o \
./$(OBJ_DIR)/HSAPMCInterception.o \
./$(OBJ_DIR)/CLCUInfoBase.o \
Expand Down
16 changes: 15 additions & 1 deletion Src/HSAFdnTrace/AutoGenerated/HSAAMDExtensionAPITraceClasses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -833,7 +833,8 @@ void HSA_APITrace_hsa_amd_memory_async_copy::Create(
uint32_t num_dep_signals,
const hsa_signal_t* dep_signals,
hsa_signal_t completion_signal,
hsa_status_t retVal)
hsa_status_t retVal,
ULONGLONG asyncCopyIdentifier)
{
m_ullStart = ullStartTime;
m_ullEnd = ullEndTime;
Expand All @@ -853,6 +854,19 @@ void HSA_APITrace_hsa_amd_memory_async_copy::Create(

m_completion_signal = completion_signal;
m_retVal = retVal;

m_asyncCopyIdentifier = asyncCopyIdentifier;
}

bool HSA_APITrace_hsa_amd_memory_async_copy::WriteTimestampEntry(std::ostream& sout, bool bTimeout)
{
if (HSAAPIBase::WriteTimestampEntry(sout, bTimeout))
{
// async copy identifier
sout << std::left << std::setw(21) << m_asyncCopyIdentifier;
}

return true;
}

///////////////////////////////////////////////////
Expand Down
11 changes: 10 additions & 1 deletion Src/HSAFdnTrace/AutoGenerated/HSAAMDExtensionAPITraceClasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,7 @@ class HSA_APITrace_hsa_amd_memory_async_copy : public HSAAPIBase
/// \param dep_signals Parameter passed to hsa_amd_memory_async_copy
/// \param completion_signal Parameter passed to hsa_amd_memory_async_copy
/// \param retVal the return value for hsa_amd_memory_async_copy
/// \param asyncCopyIdentifier Parameter passed to hsa_amd_memory_async_copy
void Create(ULONGLONG ullStartTime,
ULONGLONG ullEndTime,
void* dst,
Expand All @@ -850,7 +851,14 @@ class HSA_APITrace_hsa_amd_memory_async_copy : public HSAAPIBase
uint32_t num_dep_signals,
const hsa_signal_t* dep_signals,
hsa_signal_t completion_signal,
hsa_status_t retVal);
hsa_status_t retVal,
ULONGLONG asyncCopyIdentifier);

/// Write timestamp entry
/// \param sout output stream
/// \param bTimeout a flag indicating output mode
/// \return True if timestamps are ready
bool WriteTimestampEntry(std::ostream& sout, bool bTimeout);

private:
/// Disabled copy constructor
Expand All @@ -871,6 +879,7 @@ class HSA_APITrace_hsa_amd_memory_async_copy : public HSAAPIBase
hsa_signal_t m_dep_signalsVal; ///< Member to hold value passed to hsa_amd_memory_async_copy in dep_signals parameter
hsa_signal_t m_completion_signal; ///< Parameter passed to hsa_amd_memory_async_copy
hsa_status_t m_retVal; ///< Parameter passed to hsa_amd_memory_async_copy
ULONGLONG m_asyncCopyIdentifier; ///< async copy identifier
};

///////////////////////////////////////////////////
Expand Down
7 changes: 5 additions & 2 deletions Src/HSAFdnTrace/AutoGenerated/HSATraceInterception.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3758,7 +3758,9 @@ hsa_status_t HSA_API_Trace_hsa_amd_memory_pool_free(void* ptr)
hsa_status_t HSA_API_Trace_hsa_amd_memory_async_copy(void* dst, hsa_agent_t dst_agent, const void* src, hsa_agent_t src_agent, size_t size, uint32_t num_dep_signals, const hsa_signal_t* dep_signals, hsa_signal_t completion_signal)
{
hsa_signal_t origSignal = completion_signal;
HSA_APITrace_hsa_amd_memory_async_copy_PreCallHelper(dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, completion_signal);
ULONGLONG asyncCopyIdentifier = OSUtils::Instance()->GetTimeNanos();
HSA_APITrace_hsa_amd_memory_async_copy_PreCallHelper(dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, completion_signal, asyncCopyIdentifier);

ULONGLONG ullStart = OSUtils::Instance()->GetTimeNanos();
hsa_status_t retVal = g_pRealAmdExtFunctions->hsa_amd_memory_async_copy_fn(dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, completion_signal);
ULONGLONG ullEnd = OSUtils::Instance()->GetTimeNanos();
Expand All @@ -3777,7 +3779,8 @@ hsa_status_t HSA_API_Trace_hsa_amd_memory_async_copy(void* dst, hsa_agent_t dst_
num_dep_signals,
dep_signals,
origSignal,
retVal);
retVal,
asyncCopyIdentifier);

RECORD_STACK_TRACE_FOR_API(pAPIInfo);
HSAAPIInfoManager::Instance()->AddAPIInfoEntry(pAPIInfo);
Expand Down
Loading

0 comments on commit 45393f9

Please sign in to comment.