Skip to content

Commit

Permalink
RCP 5.1 release
Browse files Browse the repository at this point in the history
  • Loading branch information
chesik-amd committed Jun 27, 2017
1 parent 532282d commit a620537
Show file tree
Hide file tree
Showing 67 changed files with 1,102 additions and 564 deletions.
2 changes: 1 addition & 1 deletion Build/Linux/Common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ COMMON_SRC = $(COMMON_DIR)/Src
COMMON_LIB = $(COMMON_DIR)/Lib
COMMON_LIB_AMD = $(COMMON_DIR)/Lib/AMD
COMMON_LIB_EXT = $(COMMON_DIR)/Lib/Ext
GPU_PERF_API_DIR = $(COMMON_LIB_AMD)/GPUPerfAPI/2_22/Include
GPU_PERF_API_DIR = $(COMMON_LIB_AMD)/GPUPerfAPI/2_23/Include
ATI_STREAM_SDK_DIR = $(COMMON_LIB_AMD)/APPSDK/3-0
APPSDK_DIR = $(ATI_STREAM_SDK_DIR)
CAL_DIR = $(COMMON_LIB_AMD)/CAL/8.95
Expand Down
3 changes: 3 additions & 0 deletions Build/Linux/CommonTargets.mk
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@ default: $(TARGET) $(TARGETSO) $(TARGETLIB)

# build targets
$(TARGET) : makedir $(OBJS)
$(PRE_BUILD_ACTION)
$(CC) $(LINKFLAGS_EXE) $(PLATFORM_LFLAG) $(OBJS) $(LIBPATH) $(LIBS) -o $(TARGET) $(STATIC_LIBS)
$(POST_BUILD_ACTION)

$(TARGETLIB) : makedir $(LIB_OBJS)
$(PRE_BUILD_ACTION)
ar rcs $(TARGETLIB) $(LIB_OBJS)
$(POST_BUILD_ACTION)

$(TARGETSO) : makedir $(SO_OBJS)
$(PRE_BUILD_ACTION)
$(CC) $(LINKFLAGS_SO) $(PLATFORM_LFLAG) $(SO_OBJS) $(LIBPATH) $(LIBS) -o $(TARGETSO) $(STATIC_LIBS)
$(POST_BUILD_ACTION)

Expand Down
2 changes: 1 addition & 1 deletion Build/Linux/build_rcp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ HSAFDNTRACE="$SRCDIR/HSAFdnTrace"
PRELOADXINITTHREADS="$SRCDIR/PreloadXInitThreads"
ACTIVITYLOGGER="CXLActivityLogger"
ACTIVITYLOGGERDIR="$COMMONSRC/AMDTActivityLogger/"
GPA="$COMMON/Lib/AMD/GPUPerfAPI/2_22"
GPA="$COMMON/Lib/AMD/GPUPerfAPI/2_23"
JQPLOT_PATH="$RCPROOT/Src/Common/jqPlot"
DOXYGENBIN="$COMMON/DK/Doxygen/doxygen-1.5.6/bin/doxygen"
PROFILEDATAPARSERSRC="$SRCDIR/ProfileDataParser"
Expand Down
2 changes: 1 addition & 1 deletion Build/VS2015/RCP-GPUPerfAPI.props
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup Label="UserMacros">
<GPUPerfAPIDir>$(CommonDir)\Lib\AMD\GPUPerfAPI\2_22</GPUPerfAPIDir>
<GPUPerfAPIDir>$(CommonDir)\Lib\AMD\GPUPerfAPI\2_23</GPUPerfAPIDir>
</PropertyGroup>
<PropertyGroup>
<_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
Expand Down
2 changes: 2 additions & 0 deletions Build/VS2015/RCPCLCommon.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
</PropertyGroup>
<ItemGroup>
<ClCompile Include="..\..\Src\CLCommon\CLCUInfoBase.cpp" />
<ClCompile Include="..\..\Src\CLCommon\CLDeviceReplacer.cpp" />
<ClCompile Include="..\..\Src\CLCommon\CLFunctionDefs.cpp" />
<ClCompile Include="..\..\Src\CLCommon\CLFunctionEnumDefs.cpp" />
<ClCompile Include="..\..\Src\CLCommon\CLInternalFunctionDefs.cpp" />
Expand All @@ -54,6 +55,7 @@
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\Src\CLCommon\CLCUInfoBase.h" />
<ClInclude Include="..\..\Src\CLCommon\CLDeviceReplacer.h" />
<ClInclude Include="..\..\Src\CLCommon\CLFunctionDefs.h" />
<ClInclude Include="..\..\Src\CLCommon\CLFunctionEnumDefs.h" />
<ClInclude Include="..\..\Src\CLCommon\CLInternalFunctionDefs.h" />
Expand Down
6 changes: 6 additions & 0 deletions Build/VS2015/RCPCLCommon.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
<ClCompile Include="..\..\..\Common\Src\DynamicLibraryModule\OpenCLModule.cpp">
<Filter>DynamicLibraryModule</Filter>
</ClCompile>
<ClCompile Include="..\..\Src\CLCommon\CLDeviceReplacer.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\Src\CLCommon\CLCUInfoBase.h">
Expand All @@ -45,6 +48,9 @@
<ClInclude Include="..\..\..\Common\Src\DynamicLibraryModule\OpenCLModule.h">
<Filter>DynamicLibraryModule</Filter>
</ClInclude>
<ClInclude Include="..\..\Src\CLCommon\CLDeviceReplacer.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Filter Include="DynamicLibraryModule">
Expand Down
30 changes: 22 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ RCP was formerly delivered as part of CodeXL with the executable name
* [Cloning the Repository](#cloning-the-repository)
* [Source Code Directory Layout](#source-code-directory-layout)
* [Why version 5.x?](#why-version-5x)
* [Known Issues](#known-issues)
* [Building the Source Code](BUILD.md)
* [License](LICENSE)

Expand All @@ -30,18 +31,20 @@ RCP was formerly delivered as part of CodeXL with the executable name
* When used with CodeXL, all profiler data can be visualized in a user-friendly graphical user interface.

## What's New
* Version 5.0 (in comparison to CodeXL 2.2)
* Adds support for additional GPUs and APUs.
* ROCm/HSA: Support for ROCm 1.5
* Support for demangling names of HIP and HCC kernels. Requires c++filt to be installed on the system. c++filt can be installed using *sudo apt-get install binutils*
* Version 5.1 (6/28/17)
* Adds support for additional GPUs, including Vega series GPUs
* ROCm/HSA: Support for ROCm 1.6
* Improves display of pointer parameters for some HSA APIs in the ATP file
* Fixes an issue with parsing an ATP file which has non-ascii characters (affected Summary page generation and display within CodeXL)

## System Requirements
* An AMD Radeon GCN-based GPU or APU
* Radeon Software Crimson Edition 17.2.2 or later (Driver Packaging Version 16.60 or later).
* ROCm 1.5. See system requirements for ROCm: https://rocm.github.io/install.html and https://rocm.github.io/hardware.html.
* Radeon Software Crimson ReLive Edition 17.4.3 or later (Driver Packaging Version 17.10 or later).
* For Vega support, a driver with Driver Packaging Version 17.20 or later is required
* ROCm 1.6. See system requirements for ROCm: https://rocm.github.io/install.html and https://rocm.github.io/hardware.html.
* Windows 7, 8.1, and 10
* For Windows, the __Visual C++ Redistributable for Visual Studio 2015__ is required. It can be downloaded from https://www.microsoft.com/en-us/download/details.aspx?id=48145
* Ubuntu (14.04 and later) and RHEL (7 and later) distributions
* For Windows, the `Visual C++ Redistributable for Visual Studio 2015` is required. It can be downloaded from https://www.microsoft.com/en-us/download/details.aspx?id=48145
* Ubuntu (14.04 and later, 16.04 or later for ROCm support) and RHEL (7 and later) distributions

## Cloning the Repository
To clone the RCP repository, execute the following git commands
Expand Down Expand Up @@ -80,3 +83,14 @@ APP Profiler product, which progressed from version 1.x to 3.x. Then the profile
was included in CodeXL, and the codebase was labelled as version 4.x. Now that RCP
is being pulled out of CodeXL and into its own codebase again, we've bumped the
version number up to 5.x.

##Known Issues
* For the OpenCL™ Profiler
* Collecting Performance Counters for an OpenCL™ application is not currently working for Vega GPUs on Windows when using a 17.20-based driver. This is due to missing driver support in the 17.20 driver. Future driver versions should provide the support needed.
* Collecting Performance Counters using --perfcounter for an OpenCL™ application when running OpenCL-on-ROCm is not suported currently. The workaround is to profile using the ROCm profiler (using the --hsapmc command-line switch).
* For the ROCm Profiler
* API Trace and Perf Counter data may be truncated or missing if the application being profiled does not call hsa_shut_down
* Kernel occupancy information will only be written to disk if the application being profiled calls hsa_shut_down
* When collecting a trace for an application that performs memory transfers using hsa_amd_memory_async_copy, if the application asks for the data transfer timestamps directly, it will not get correct timestamps. The profiler will show the correct timestamps, however.
* When collecting an aql packet trace, if the application asks for the kernel dispatch timestamps directly, it will not get correct timestamps. The profiler will show the correct timestamps, however.
* When the rocm-profiler package (.deb or .rpm) is installed along with rocm, it may not be able to generate the default single-pass counter files. If you do not see counter files in /opt/rocm/profiler/counterfiles, you can generate them manually with this command: "sudo /opt/rocm/profiler/bin/CodeXLGpuProfiler --list --outputfile /opt/rocm/profiler/counterfiles/counters --maxpassperfile 1"
13 changes: 13 additions & 0 deletions ReleaseNotes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Radeon Compute Profiler Release Notes
---

## Version 5.1 (6/28/17)
* Adds support for additional GPUs, including Vega series GPUs
* ROCm/HSA: Support for ROCm 1.6
* Improves display of pointer parameters for some HSA APIs in the ATP file
* Fixes an issue with parsing an ATP file which has non-ascii characters (affected Summary page generation and display within CodeXL)

## Version 5.0 (in comparison to CodeXL 2.2) (6/2/17)
* Adds support for additional GPUs and APUs.
* ROCm/HSA: Support for ROCm 1.5
* Support for demangling names of HIP and HCC kernels. Requires c++filt to be installed on the system. c++filt can be installed using *sudo apt-get install binutils*
4 changes: 2 additions & 2 deletions Scripts/UpdateCommonMap.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@
}

downloadMappingWin = {
"https://github.com/GPUOpen-Tools/GPA/releases/download/v2.22.1/GPUPerfAPI-2.22.2262.1.zip" : "../Common/Lib/AMD/GPUPerfAPI",
"https://github.com/GPUOpen-Tools/GPA/releases/download/v2.23/GPUPerfAPI-2.23.2382.0.zip" : "../Common/Lib/AMD/GPUPerfAPI",
}

downloadMappingLin = {
"https://github.com/GPUOpen-Tools/GPA/releases/download/v2.22.1/GPUPerfAPI.2.22.1834-lnx.tgz" : "../Common/Lib/AMD/GPUPerfAPI",
"https://github.com/GPUOpen-Tools/GPA/releases/download/v2.23/GPUPerfAPI.2.23.1973-lnx.tgz" : "../Common/Lib/AMD/GPUPerfAPI",
}


170 changes: 170 additions & 0 deletions Src/CLCommon/CLDeviceReplacer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
//==============================================================================
// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved.
/// \author AMD Developer Tools Team
/// \file
/// \brief This file contains implementation to CLDevice Replacer class
//==============================================================================

#include "CLDeviceReplacer.h"
#include "CLFunctionDefs.h"

cl_int CLDeviceReplacer::ReplaceDeviceIds(cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, cl_device_id* device_list, cl_uint* num_devices, unsigned int gpuIndex, cl_uint originalRetCode)
{
cl_int ret = originalRetCode;

if (!m_bIsGPUIterated)
{
m_bIsGPUIterated = true;
cl_uint deviceCount;
bool success = CL_SUCCESS == (ret = g_realDispatchTable.GetDeviceIDs(nullptr, CL_DEVICE_TYPE_GPU, 0u, nullptr, &deviceCount));

if (!success)
{
cl_uint platformCount;
success = CL_SUCCESS == (ret = g_realDispatchTable.GetPlatformIDs(0u, nullptr, &platformCount));
success = CL_SUCCESS == (ret = g_realDispatchTable.GetDeviceIDs(nullptr, CL_DEVICE_TYPE_GPU, 0u, nullptr, &deviceCount));
}

if (success && 0u < deviceCount)
{
cl_device_id* deviceIds = new(std::nothrow) cl_device_id[deviceCount];
m_CLGPUDeviceIdsList.push_back(deviceIds);

if (nullptr != deviceIds)
{
success = CL_SUCCESS == (ret = g_realDispatchTable.GetDeviceIDs(nullptr, CL_DEVICE_TYPE_GPU, deviceCount, deviceIds, nullptr));

if (success)
{
for (unsigned int deviceIndex = 0; deviceIndex < deviceCount; deviceIndex++)
{
cl_platform_id devicePlatform = nullptr;
success = CL_SUCCESS == (ret = g_realDispatchTable.GetDeviceInfo(deviceIds[deviceIndex], CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &devicePlatform, nullptr));

if (success && nullptr != devicePlatform)
{
if (m_clPlatformDeviceIdsMap.find(devicePlatform) == m_clPlatformDeviceIdsMap.end())
{
std::vector<cl_device_id> deviceIdList;
m_clPlatformDeviceIdsMap.insert(std::pair<cl_platform_id, std::vector<cl_device_id>>(devicePlatform, deviceIdList));
}

m_clPlatformDeviceIdsMap[devicePlatform].push_back(deviceIds[deviceIndex]);
}
}
}
}
}
}

if (0u <= gpuIndex)
{
if (CL_DEVICE_TYPE_GPU == (CL_DEVICE_TYPE_GPU & device_type))
{
// Check is there any cldevice exist apart from GPUs for specified deviceType
cl_uint nonGPUDeviceCount = 0u;
bool success = CL_SUCCESS == (ret = g_realDispatchTable.GetDeviceIDs(platform, (device_type ^ CL_DEVICE_TYPE_GPU), 0u, nullptr, &nonGPUDeviceCount));

// In case of querying only device Count
if (nullptr == device_list && nullptr != num_devices)
{
cl_uint gpuDeviceCountForThePlatform = static_cast<uint32_t>(m_clPlatformDeviceIdsMap[platform].size());

if (0u < *num_devices)
{
if (*num_devices >= gpuDeviceCountForThePlatform)
{
// This contains only GPU devices or GPU Devices along with other devices
if (gpuIndex < gpuDeviceCountForThePlatform)
{
// Only sigle gpu available
*num_devices = (*num_devices - gpuDeviceCountForThePlatform) + 1;
}
else
{
// Index is greater than the GPU devices - No GPU
*num_devices = *num_devices - gpuDeviceCountForThePlatform;
}
}
}
else
{
// If we reached here it means either this function gets called before the actual call or there is no device exist with specified deviceType

if (success)
{
if (0u < m_clPlatformDeviceIdsMap[platform].size() && gpuIndex < static_cast<unsigned int>(m_clPlatformDeviceIdsMap[platform].size()))
{
// Handle in case Function is called before actual call to the function
*num_devices = nonGPUDeviceCount + 1;
}
else
{
// No GPU device available - return non GPU device Count
*num_devices = nonGPUDeviceCount;
}
}
}

ret = 0 == *num_devices ? CL_DEVICE_NOT_FOUND : CL_SUCCESS;
}
else if (nullptr != device_list)
{
// Case: when querying for device list (not the count)
bool isGPUExistForTheSpecifiedPlatformAndIndex = static_cast<uint32_t>(m_clPlatformDeviceIdsMap[platform].size()) > static_cast<uint32_t>(gpuIndex);

if (isGPUExistForTheSpecifiedPlatformAndIndex)
{
if (nullptr != num_devices)
{
*num_devices = 1; // Only one GPU is available
}

if (num_entries > 0u)
{
for (unsigned int i = 0; i < num_entries; i++)
{
cl_device_type clCurrentDeviceType;

if (CL_SUCCESS == (ret = g_realDispatchTable.GetDeviceInfo(device_list[i], CL_DEVICE_TYPE, sizeof(cl_device_type), &clCurrentDeviceType, nullptr)))
{
if (CL_DEVICE_TYPE_GPU == clCurrentDeviceType)
{
if (!isGPUExistForTheSpecifiedPlatformAndIndex || device_list[i] != m_clPlatformDeviceIdsMap[platform][gpuIndex])
{
device_list[i] = nullptr;
}
}
}
}

ret = CL_SUCCESS;
}
else
{
ret = originalRetCode;
}
}
else
{
ret = CL_DEVICE_NOT_FOUND;
}
}
}
else
{
// Not querying for GPU device - Don't do anything
ret = originalRetCode;
}
}

return ret;
}

CLDeviceReplacer::~CLDeviceReplacer()
{
for (std::vector<cl_device_id*>::iterator it = m_CLGPUDeviceIdsList.begin(); it != m_CLGPUDeviceIdsList.end(); ++it)
{
delete[](*it);
}
}
52 changes: 52 additions & 0 deletions Src/CLCommon/CLDeviceReplacer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
//==============================================================================
// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved.
/// \author AMD Developer Tools Team
/// \file
/// \brief This file contains a class to replace a device id
//==============================================================================

#ifndef _CL_DEVICE_REPLACER_
#define _CL_DEVICE_REPLACER_

#include <CL/opencl.h>
#include <TSingleton.h>
#include <vector>
#include <map>

/// Class for replacing the cl device ids
class CLDeviceReplacer : public TSingleton<CLDeviceReplacer>
{
friend class TSingleton<CLDeviceReplacer>;

public:

/// Replaces the CL device ids for the GPU device and specified GPU index
/// \param[in] platform platform id
/// \param[in] device_type type of the device
/// \param[in] num_entries number of entries for the devices to be updated
/// \param[out] device_list list of the device
/// \param[out] num_devices number of devices of specified device type
/// \param[in] gpuIndex index of the GPU
cl_int ReplaceDeviceIds(
cl_platform_id platform,
cl_device_type device_type,
cl_uint num_entries,
cl_device_id* device_list,
cl_uint* num_devices,
unsigned int gpuIndex,
cl_uint originalRetCode);

/// Destructor
~CLDeviceReplacer();

private:
/// Constructor
CLDeviceReplacer(): m_bIsGPUIterated(false)
{}

std::map<cl_platform_id, std::vector<cl_device_id>> m_clPlatformDeviceIdsMap; ///< list of the device ids for each platform
std::vector<cl_device_id*> m_CLGPUDeviceIdsList; ///< list of the device ids for housekeeping
bool m_bIsGPUIterated; ///< flag indicating the all the platform and devices has been iterated
};

#endif // _CL_DEVICE_REPLACER_
1 change: 1 addition & 0 deletions Src/CLCommon/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ LIB_OBJS = \
./$(OBJ_DIR)/CLCUInfoBase.o \
./$(OBJ_DIR)/CLPlatformInfo.o \
./$(OBJ_DIR)/OpenCLModule.o \
./$(OBJ_DIR)/CLDeviceReplacer.o \

include $(DEPTH)/Build/Linux/CommonTargets.mk

Expand Down
Loading

0 comments on commit a620537

Please sign in to comment.