Skip to content

Commit

Permalink
Fix auto-detecting GPU architectures (#7593)
Browse files Browse the repository at this point in the history
Fixes regression from #7579 in auto-detecting GPU architectures when `-DCMAKE_CUDA_ARCHITECTURES=` is passed on the CLI.

Now that the cached `CMAKE_CUDA_ARCHITECTURES` isn't unset before calling `enable_language(CUDA)`, this call throws an error and configuration fails. This change ensures we call `enable_language(CUDA)` after any potential rewrites of `CMAKE_CUDA_ARCHITECTURES`.

This PR also aligns with RMM's `EvalGPUArchs.cmake` logic and prints `SUPPORTED_CUDA_ARCHITECTURES` instead of `"ALL"` in the case the current machine is a CPU-only node.

Related: rapidsai/rmm#727

Authors:
  - Paul Taylor (@trxcllnt)
  - Robert Maynard (@robertmaynard)

Approvers:
  - Keith Kraus (@kkraus14)

URL: #7593
  • Loading branch information
trxcllnt authored Mar 15, 2021
1 parent 36f18c8 commit 05bb2f0
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 52 deletions.
2 changes: 1 addition & 1 deletion build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ if hasArg clean; then
fi

if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then
CUDF_CMAKE_CUDA_ARCHITECTURES="-DCMAKE_CUDA_ARCHITECTURES=ALL"
CUDF_CMAKE_CUDA_ARCHITECTURES="-DCMAKE_CUDA_ARCHITECTURES="
echo "Building for the architecture of the GPU in the system..."
else
CUDF_CMAKE_CUDA_ARCHITECTURES=""
Expand Down
22 changes: 6 additions & 16 deletions cpp/cmake/Modules/ConfigureCUDA.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,16 @@
# Find the CUDAToolkit
find_package(CUDAToolkit REQUIRED)

# Must come after find_package(CUDAToolkit) because we symlink
# ccache as a compiler front-end for nvcc in gpuCI CPU builds.
enable_language(CUDA)

if(CMAKE_CUDA_COMPILER_VERSION)
# Compute the version. from CMAKE_CUDA_COMPILER_VERSION
string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${CMAKE_CUDA_COMPILER_VERSION})
string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${CMAKE_CUDA_COMPILER_VERSION})
set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}")
endif()

message(VERBOSE "CUDF: CUDA_VERSION_MAJOR: ${CUDA_VERSION_MAJOR}")
message(VERBOSE "CUDF: CUDA_VERSION_MINOR: ${CUDA_VERSION_MINOR}")
message(STATUS "CUDF: CUDA_VERSION: ${CUDA_VERSION}")

# Auto-detect available GPU compute architectures

include(${CUDF_SOURCE_DIR}/cmake/Modules/SetGPUArchs.cmake)
message(STATUS "CUDF: Building CUDF for GPU architectures: ${CMAKE_CUDA_ARCHITECTURES}")

# Must come after find_package(CUDAToolkit) because we symlink
# ccache as a compiler front-end for nvcc in gpuCI CPU builds.
# Must also come after we detect and potentially rewrite
# CMAKE_CUDA_ARCHITECTURES
enable_language(CUDA)

if(CMAKE_COMPILER_IS_GNUCXX)
list(APPEND CUDF_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations)
if(CUDF_BUILD_TESTS OR CUDF_BUILD_BENCHMARKS)
Expand Down
37 changes: 22 additions & 15 deletions cpp/cmake/Modules/EvalGPUArchs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,21 @@
# limitations under the License.
#=============================================================================

# Unset this first in case it's set to <empty_string>
set(CMAKE_CUDA_ARCHITECTURES OFF)

# Enable CUDA so we can invoke nvcc
enable_language(CUDA)

# Function uses the CUDA runtime API to query the compute capability of the device, so if a user
# doesn't pass any architecture options to CMake we only build the current architecture
function(evaluate_gpu_archs gpu_archs)
set(eval_file ${PROJECT_BINARY_DIR}/eval_gpu_archs.cu)
set(eval_exe ${PROJECT_BINARY_DIR}/eval_gpu_archs)
set(error_file ${PROJECT_BINARY_DIR}/eval_gpu_archs.stderr.log)
file(WRITE ${eval_file}
[=[
file(
WRITE ${eval_file}
"
#include <cstdio>
#include <set>
#include <string>
Expand All @@ -32,32 +41,30 @@ int main(int argc, char** argv) {
char buff[32];
cudaDeviceProp prop;
if(cudaGetDeviceProperties(&prop, dev) != cudaSuccess) continue;
sprintf(buff, "%d%d", prop.major, prop.minor);
sprintf(buff, \"%d%d\", prop.major, prop.minor);
archs.insert(buff);
}
}
if(archs.empty()) {
printf("ALL");
printf(\"${SUPPORTED_CUDA_ARCHITECTURES}\");
} else {
bool first = true;
for(const auto& arch : archs) {
printf(first? "%s" : ";%s", arch.c_str());
printf(first? \"%s\" : \";%s\", arch.c_str());
first = false;
}
}
printf("\n");
printf(\"\\n\");
return 0;
}
]=])
")
execute_process(
COMMAND ${CMAKE_CUDA_COMPILER}
-std=c++11
-o ${eval_exe}
--run
${eval_file}
COMMAND ${CMAKE_CUDA_COMPILER} -std=c++11 -o ${eval_exe} --run ${eval_file}
OUTPUT_VARIABLE __gpu_archs
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_FILE ${error_file})
message(VERBOSE "CUDF: Auto detection of gpu-archs: ${__gpu_archs}")
set(${gpu_archs} ${__gpu_archs} PARENT_SCOPE)
endfunction()
message(STATUS "CUDF: Auto detection of gpu-archs: ${__gpu_archs}")
set(${gpu_archs}
${__gpu_archs}
PARENT_SCOPE)
endfunction(evaluate_gpu_archs)
46 changes: 26 additions & 20 deletions cpp/cmake/Modules/SetGPUArchs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -25,35 +25,41 @@ else()
list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "62" "72")
endif()

if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11)
# CMake < 3.20 has a bug in FindCUDAToolkit where it won't properly detect the CUDAToolkit version
# when find_package(CUDAToolkit) occurs before enable_language(CUDA)
if(NOT DEFINED CUDAToolkit_VERSION AND CMAKE_CUDA_COMPILER)
execute_process(COMMAND ${CMAKE_CUDA_COMPILER} "--version" OUTPUT_VARIABLE NVCC_OUT)
if(NVCC_OUT MATCHES [=[ V([0-9]+)\.([0-9]+)\.([0-9]+)]=])
set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}")
endif()
unset(NVCC_OUT)
endif()

if(CUDAToolkit_VERSION_MAJOR LESS 11)
list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "80")
endif()
if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 10)
if(CUDAToolkit_VERSION_MAJOR LESS 10)
list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "75")
endif()
if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 9)
if(CUDAToolkit_VERSION_MAJOR LESS 9)
list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "70")
endif()

if(CUDF_BUILD_FOR_DETECTED_ARCHS)
include(${CUDF_SOURCE_DIR}/cmake/Modules/EvalGPUArchs.cmake)
evaluate_gpu_archs(CMAKE_CUDA_ARCHITECTURES)
if(CMAKE_CUDA_ARCHITECTURES STREQUAL "ALL")
unset(CMAKE_CUDA_ARCHITECTURES CACHE)
set(CUDF_BUILD_FOR_ALL_ARCHS TRUE)
else()
set(CUDF_BUILD_FOR_ALL_ARCHS FALSE)
list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real")
endif()
endif()

if(CUDF_BUILD_FOR_ALL_ARCHS)
if(${PROJECT_NAME}_BUILD_FOR_ALL_ARCHS)
set(CMAKE_CUDA_ARCHITECTURES ${SUPPORTED_CUDA_ARCHITECTURES})
# CMake architecture list entry of "80" means to build compute and sm.
# What we want is for the newest arch only to build that way
# while the rest built only for sm.
list(SORT CMAKE_CUDA_ARCHITECTURES ORDER ASCENDING)

# CMake architecture list entry of "80" means to build compute and sm. What we want is for the
# newest arch only to build that way while the rest built only for sm.
list(POP_BACK CMAKE_CUDA_ARCHITECTURES latest_arch)
list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real")
list(APPEND CMAKE_CUDA_ARCHITECTURES ${latest_arch})

elseif(${PROJECT_NAME}_BUILD_FOR_DETECTED_ARCHS)
include(${PROJECT_SOURCE_DIR}/cmake/Modules/EvalGPUArchs.cmake)
evaluate_gpu_archs(CMAKE_CUDA_ARCHITECTURES)

list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real")
endif()

0 comments on commit 05bb2f0

Please sign in to comment.