Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

changed the CMake option from AMDGPU_TARGETS to GPU_TARGETS #1435

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,33 +68,33 @@ if (BUILD_LOCAL_GPU_TARGET_ONLY)
endif()

# Determine which GPU architectures to build for
set(AMDGPU_TARGETS "${DEFAULT_GPUS}" CACHE STRING "Target default GPUs if AMDGPU_TARGETS is not defined.")
set(GPU_TARGETS "${DEFAULT_GPUS}" CACHE STRING "Target default GPUs if GPU_TARGETS is not defined.")

# Modify GPU architectures for Address Sanitizer builds by appending "xnack+"
if (BUILD_ADDRESS_SANITIZER)
SET(amdgpu_targets "")
foreach(amdgpu_target IN LISTS AMDGPU_TARGETS)
foreach(amdgpu_target IN LISTS GPU_TARGETS)
if(NOT amdgpu_target STREQUAL "")
list(APPEND amdgpu_targets "${amdgpu_target}:xnack+")
endif()
endforeach()
SET(AMDGPU_TARGETS "${amdgpu_targets}" CACHE STRING "Modified GPU list for Address-Sanitizer enabled build." FORCE)
SET(GPU_TARGETS "${amdgpu_targets}" CACHE STRING "Modified GPU list for Address-Sanitizer enabled build." FORCE)
endif()

# Check if clang compiler can offload to AMDGPU_TARGETS
# Check if clang compiler can offload to GPU_TARGETS
if (COMMAND rocm_check_target_ids)
message(STATUS "Checking for ROCm support for GPU targets: " "${AMDGPU_TARGETS}")
rocm_check_target_ids(SUPPORTED_GPUS TARGETS ${AMDGPU_TARGETS})
message(STATUS "Checking for ROCm support for GPU targets: " "${GPU_TARGETS}")
rocm_check_target_ids(SUPPORTED_GPUS TARGETS ${GPU_TARGETS})
else()
message(WARNING "Unable to check for supported GPU targets. Falling back to default GPUs.")
set(SUPPORTED_GPUS ${DEFAULT_GPUS})
endif()

set(GPU_TARGETS "${SUPPORTED_GPUS}" CACHE STRING "GPU targets to compile for.")
message(STATUS "Compiling for ${GPU_TARGETS}")
set(COMPILING_TARGETS "${SUPPORTED_GPUS}" CACHE STRING "GPU targets to compile for.")
message(STATUS "Compiling for ${COMPILING_TARGETS}")

## NOTE: Reload rocm-cmake in order to update GPU_TARGETS
include(cmake/Dependencies.cmake) # Reloading to use desired GPU_TARGETS instead of defaults
## NOTE: Reload rocm-cmake in order to update COMPILING_TARGETS
include(cmake/Dependencies.cmake) # Reloading to use desired COMPILING_TARGETS instead of defaults

# Try to establish ROCM_PATH (for find_package)
#==================================================================================================
Expand Down Expand Up @@ -296,7 +296,7 @@ endif()

## Disable building MSCCL++ if the build environment is invalid
## Currently MSCCL++ is supported only on gfx942
if (ENABLE_MSCCLPP AND NOT ("gfx942" IN_LIST GPU_TARGETS OR "gfx942:xnack-" IN_LIST GPU_TARGETS OR "gfx942:xnack+" IN_LIST GPU_TARGETS))
if (ENABLE_MSCCLPP AND NOT ("gfx942" IN_LIST COMPILING_TARGETS OR "gfx942:xnack-" IN_LIST COMPILING_TARGETS OR "gfx942:xnack+" IN_LIST COMPILING_TARGETS))
set(ENABLE_MSCCLPP OFF)
message(WARNING "Can only build MSCCL++ for gfx942; disabling MSCCL++ build")
endif()
Expand Down Expand Up @@ -737,7 +737,7 @@ if (HAVE_KERNARG_PRELOAD)
endif()

## NOTE: This is currently being handled by rocm-cmake, however may need to be re-enabled in the future
#foreach(target ${GPU_TARGETS})
#foreach(target ${COMPILING_TARGETS})
# target_compile_options(rccl PRIVATE --offload-arch=${target})
#endforeach()

Expand Down Expand Up @@ -859,7 +859,7 @@ if(NOT BUILD_SHARED_LIBS)
endif()
endforeach()

foreach(target ${GPU_TARGETS})
foreach(target ${COMPILING_TARGETS})
list(APPEND static_link_flags --offload-arch=${target})
endforeach()
list(JOIN static_link_flags " " flags_str)
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ RCCL build & installation helper script
-i|--install Install RCCL library (see --prefix argument below)
-j|--jobs Specify how many parallel compilation jobs to run ($nproc by default)
-l|--local_gpu_only Only compile for local GPU architecture
--amdgpu_targets Only compile for specified GPU architecture(s). For multiple targets, seperate by ';' (builds for all supported GPU architectures by default)
--gpu_targets Only compile for specified GPU architecture(s). For multiple targets, seperate by ';' (builds for all supported GPU architectures by default)
--no_clean Don't delete files if they already exist
--npkit-enable Compile with npkit enabled
--openmp-test-enable Enable OpenMP in rccl unit tests
Expand All @@ -61,7 +61,7 @@ RCCL build & installation helper script
--verbose Show compile commands
```

By default, RCCL builds for all GPU targets defined in `DEFAULT_GPUS` in `CMakeLists.txt`. To target specific GPU(s), and potentially reduce build time, use `--amdgpu_targets` as a `;` separated string listing GPU(s) to target.
By default, RCCL builds for all GPU targets defined in `DEFAULT_GPUS` in `CMakeLists.txt`. To target specific GPU(s), and potentially reduce build time, use `--gpu_targets` as a `;` separated string listing GPU(s) to target.

## Manual build

Expand Down
6 changes: 3 additions & 3 deletions docs/install/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ RCCL build & installation helper script options:
-i|--install Install RCCL library (see --prefix argument below)
-j|--jobs Specify how many parallel compilation jobs to run ($nproc by default)
-l|--local_gpu_only Only compile for local GPU architecture
--amdgpu_targets Only compile for specified GPU architecture(s). For multiple targets, seperate by ';' (builds for all supported GPU architectures by default)
--gpu_targets Only compile for specified GPU architecture(s). For multiple targets, seperate by ';' (builds for all supported GPU architectures by default)
--no_clean Don't delete files if they already exist
--npkit-enable Compile with npkit enabled
--openmp-test-enable Enable OpenMP in rccl unit tests
Expand All @@ -66,7 +66,7 @@ RCCL build & installation helper script options:
--verbose Show compile commands

.. tip::
By default, RCCL builds for all GPU targets defined in ``DEFAULT_GPUS`` in `CMakeLists.txt <https://github.com/ROCm/rccl/blob/develop/CMakeLists.txt>`_. To target specific GPU(s), and potentially reduce build time, use ``--amdgpu_targets`` as a ``;`` separated string listing GPU(s) to target.
By default, RCCL builds for all GPU targets defined in ``DEFAULT_GPUS`` in `CMakeLists.txt <https://github.com/ROCm/rccl/blob/develop/CMakeLists.txt>`_. To target specific GPU(s), and potentially reduce build time, use ``--gpu_targets`` as a ``;`` separated string listing GPU(s) to target.

Manual build
============
Expand Down Expand Up @@ -158,7 +158,7 @@ Improving performance on MI300 when using less than 8 GPUs
==========================================================

On a system with 8\*MI300X GPUs, each pair of GPUs are connected with dedicated XGMI links in a fully-connected topology. So, for collective operations, one can achieve good performance when all 8 GPUs (and all XGMI links) are used. When using less than 8 GPUs, one can only achieve a fraction of the potential bandwidth on the system.
But, if your workload warrants using less than 8 MI300 GPUs on a system, you can set the run-time variable `NCCL_MIN_NCHANNELS` to increase the number of channels.
But, if your workload warrants using less than 8 MI300 GPUs on a system, you can set the run-time variable `NCCL_MIN_NCHANNELS` to increase the number of channels.

For example: ``export NCCL_MIN_NCHANNELS=32``

Expand Down
12 changes: 6 additions & 6 deletions install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ build_address_sanitizer=false
build_bfd=false
build_freorg_bkwdcomp=false
build_local_gpu_only=false
build_amdgpu_targets=""
build_gpu_targets=""
build_package=false
build_release=true
build_static=false
Expand Down Expand Up @@ -52,7 +52,7 @@ function display_help()
echo " -i|--install Install RCCL library (see --prefix argument below)"
echo " -j|--jobs Specify how many parallel compilation jobs to run ($num_parallel_jobs by default)"
echo " -l|--local_gpu_only Only compile for local GPU architecture"
echo " --amdgpu_targets Only compile for specified GPU architecture(s). For multiple targets, seperate by ';' (builds for all supported GPU architectures by default)"
echo " --gpu_targets Only compile for specified GPU architecture(s). For multiple targets, seperate by ';' (builds for all supported GPU architectures by default)"
echo " --no_clean Don't delete files if they already exist"
echo " --npkit-enable Compile with npkit enabled"
echo " --openmp-test-enable Enable OpenMP in rccl unit tests"
Expand All @@ -75,7 +75,7 @@ function display_help()
# check if we have a modern version of getopt that can handle whitespace and long parameters
getopt -T
if [[ "$?" -eq 4 ]]; then
GETOPT_PARSE=$(getopt --name "${0}" --options dfhij:lprt --longoptions address-sanitizer,dependencies,debug,enable_backtrace,disable-colltrace,disable-msccl-kernel,disable-mscclpp,fast,help,install,jobs:,local_gpu_only,amdgpu_targets:,no_clean,npkit-enable,openmp-test-enable,roctx-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,static,tests_build,time-trace,verbose -- "$@")
GETOPT_PARSE=$(getopt --name "${0}" --options dfhij:lprt --longoptions address-sanitizer,dependencies,debug,enable_backtrace,disable-colltrace,disable-msccl-kernel,disable-mscclpp,fast,help,install,jobs:,local_gpu_only,gpu_targets:,no_clean,npkit-enable,openmp-test-enable,roctx-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,static,tests_build,time-trace,verbose -- "$@")
else
echo "Need a new version of getopt"
exit 1
Expand All @@ -102,7 +102,7 @@ while true; do
-i | --install) install_library=true; shift ;;
-j | --jobs) num_parallel_jobs=${2}; shift 2 ;;
-l | --local_gpu_only) build_local_gpu_only=true; shift ;;
--amdgpu_targets) build_amdgpu_targets=${2}; shift 2 ;;
--gpu_targets) build_gpu_targets=${2}; shift 2 ;;
--no_clean) clean_build=false; shift ;;
--npkit-enable) npkit_enabled=true; shift ;;
--openmp-test-enable) openmp_test_enabled=true; shift ;;
Expand Down Expand Up @@ -218,8 +218,8 @@ if [[ "${build_local_gpu_only}" == true ]]; then
fi

# Build for specified GPU target(s) only
if [[ ! -z "${build_amdgpu_targets}" ]]; then
cmake_common_options="${cmake_common_options} -DAMDGPU_TARGETS=${build_amdgpu_targets}"
if [[ ! -z "${build_gpu_targets}" ]]; then
cmake_common_options="${cmake_common_options} -DAMDGPU_TARGETS=${build_gpu_targets}"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

-DGPU_TARGETS?

fi

# shared vs static
Expand Down