From 05bb2f06ad05b4db1cde08e947797729e4a4b9dd Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Mon, 15 Mar 2021 18:56:30 -0500 Subject: [PATCH] Fix auto-detecting GPU architectures (#7593) Fixes regression from https://github.com/rapidsai/cudf/pull/7579 in auto-detecting GPU architectures when `-DCMAKE_CUDA_ARCHITECTURES=` is passed on the CLI. Now that the cached `CMAKE_CUDA_ARCHITECTURES` isn't unset before calling `enable_language(CUDA)`, this call throws an error and configuration fails. This change ensures we call `enable_language(CUDA)` after any potential rewrites of `CMAKE_CUDA_ARCHITECTURES`. This PR also aligns with RMM's `EvalGPUArchs.cmake` logic and prints `SUPPORTED_CUDA_ARCHITECTURES` instead of `"ALL"` in the case the current machine is a CPU-only node. Related: https://github.com/rapidsai/rmm/pull/727 Authors: - Paul Taylor (@trxcllnt) - Robert Maynard (@robertmaynard) Approvers: - Keith Kraus (@kkraus14) URL: https://github.com/rapidsai/cudf/pull/7593 --- build.sh | 2 +- cpp/cmake/Modules/ConfigureCUDA.cmake | 22 ++++--------- cpp/cmake/Modules/EvalGPUArchs.cmake | 37 ++++++++++++--------- cpp/cmake/Modules/SetGPUArchs.cmake | 46 +++++++++++++++------------ 4 files changed, 55 insertions(+), 52 deletions(-) diff --git a/build.sh b/build.sh index 5eb404d02a8..d75053f8849 100755 --- a/build.sh +++ b/build.sh @@ -135,7 +135,7 @@ if hasArg clean; then fi if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then - CUDF_CMAKE_CUDA_ARCHITECTURES="-DCMAKE_CUDA_ARCHITECTURES=ALL" + CUDF_CMAKE_CUDA_ARCHITECTURES="-DCMAKE_CUDA_ARCHITECTURES=" echo "Building for the architecture of the GPU in the system..." else CUDF_CMAKE_CUDA_ARCHITECTURES="" diff --git a/cpp/cmake/Modules/ConfigureCUDA.cmake b/cpp/cmake/Modules/ConfigureCUDA.cmake index 44699a13206..d4be6e65021 100644 --- a/cpp/cmake/Modules/ConfigureCUDA.cmake +++ b/cpp/cmake/Modules/ConfigureCUDA.cmake @@ -17,26 +17,16 @@ # Find the CUDAToolkit find_package(CUDAToolkit REQUIRED) -# Must come after find_package(CUDAToolkit) because we symlink -# ccache as a compiler front-end for nvcc in gpuCI CPU builds. -enable_language(CUDA) - -if(CMAKE_CUDA_COMPILER_VERSION) - # Compute the version. from CMAKE_CUDA_COMPILER_VERSION - string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${CMAKE_CUDA_COMPILER_VERSION}) - string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${CMAKE_CUDA_COMPILER_VERSION}) - set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}") -endif() - -message(VERBOSE "CUDF: CUDA_VERSION_MAJOR: ${CUDA_VERSION_MAJOR}") -message(VERBOSE "CUDF: CUDA_VERSION_MINOR: ${CUDA_VERSION_MINOR}") -message(STATUS "CUDF: CUDA_VERSION: ${CUDA_VERSION}") - # Auto-detect available GPU compute architectures - include(${CUDF_SOURCE_DIR}/cmake/Modules/SetGPUArchs.cmake) message(STATUS "CUDF: Building CUDF for GPU architectures: ${CMAKE_CUDA_ARCHITECTURES}") +# Must come after find_package(CUDAToolkit) because we symlink +# ccache as a compiler front-end for nvcc in gpuCI CPU builds. +# Must also come after we detect and potentially rewrite +# CMAKE_CUDA_ARCHITECTURES +enable_language(CUDA) + if(CMAKE_COMPILER_IS_GNUCXX) list(APPEND CUDF_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations) if(CUDF_BUILD_TESTS OR CUDF_BUILD_BENCHMARKS) diff --git a/cpp/cmake/Modules/EvalGPUArchs.cmake b/cpp/cmake/Modules/EvalGPUArchs.cmake index 6c747a0b867..09e42c6cc7a 100644 --- a/cpp/cmake/Modules/EvalGPUArchs.cmake +++ b/cpp/cmake/Modules/EvalGPUArchs.cmake @@ -14,12 +14,21 @@ # limitations under the License. #============================================================================= +# Unset this first in case it's set to +set(CMAKE_CUDA_ARCHITECTURES OFF) + +# Enable CUDA so we can invoke nvcc +enable_language(CUDA) + +# Function uses the CUDA runtime API to query the compute capability of the device, so if a user +# doesn't pass any architecture options to CMake we only build the current architecture function(evaluate_gpu_archs gpu_archs) set(eval_file ${PROJECT_BINARY_DIR}/eval_gpu_archs.cu) set(eval_exe ${PROJECT_BINARY_DIR}/eval_gpu_archs) set(error_file ${PROJECT_BINARY_DIR}/eval_gpu_archs.stderr.log) - file(WRITE ${eval_file} -[=[ + file( + WRITE ${eval_file} + " #include #include #include @@ -32,32 +41,30 @@ int main(int argc, char** argv) { char buff[32]; cudaDeviceProp prop; if(cudaGetDeviceProperties(&prop, dev) != cudaSuccess) continue; - sprintf(buff, "%d%d", prop.major, prop.minor); + sprintf(buff, \"%d%d\", prop.major, prop.minor); archs.insert(buff); } } if(archs.empty()) { - printf("ALL"); + printf(\"${SUPPORTED_CUDA_ARCHITECTURES}\"); } else { bool first = true; for(const auto& arch : archs) { - printf(first? "%s" : ";%s", arch.c_str()); + printf(first? \"%s\" : \";%s\", arch.c_str()); first = false; } } - printf("\n"); + printf(\"\\n\"); return 0; } -]=]) +") execute_process( - COMMAND ${CMAKE_CUDA_COMPILER} - -std=c++11 - -o ${eval_exe} - --run - ${eval_file} + COMMAND ${CMAKE_CUDA_COMPILER} -std=c++11 -o ${eval_exe} --run ${eval_file} OUTPUT_VARIABLE __gpu_archs OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_FILE ${error_file}) - message(VERBOSE "CUDF: Auto detection of gpu-archs: ${__gpu_archs}") - set(${gpu_archs} ${__gpu_archs} PARENT_SCOPE) -endfunction() + message(STATUS "CUDF: Auto detection of gpu-archs: ${__gpu_archs}") + set(${gpu_archs} + ${__gpu_archs} + PARENT_SCOPE) +endfunction(evaluate_gpu_archs) diff --git a/cpp/cmake/Modules/SetGPUArchs.cmake b/cpp/cmake/Modules/SetGPUArchs.cmake index 396023ee9a9..61e4e6bc198 100644 --- a/cpp/cmake/Modules/SetGPUArchs.cmake +++ b/cpp/cmake/Modules/SetGPUArchs.cmake @@ -25,35 +25,41 @@ else() list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "62" "72") endif() -if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11) +# CMake < 3.20 has a bug in FindCUDAToolkit where it won't properly detect the CUDAToolkit version +# when find_package(CUDAToolkit) occurs before enable_language(CUDA) +if(NOT DEFINED CUDAToolkit_VERSION AND CMAKE_CUDA_COMPILER) + execute_process(COMMAND ${CMAKE_CUDA_COMPILER} "--version" OUTPUT_VARIABLE NVCC_OUT) + if(NVCC_OUT MATCHES [=[ V([0-9]+)\.([0-9]+)\.([0-9]+)]=]) + set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") + set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") + set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") + set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}") + endif() + unset(NVCC_OUT) +endif() + +if(CUDAToolkit_VERSION_MAJOR LESS 11) list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "80") endif() -if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 10) +if(CUDAToolkit_VERSION_MAJOR LESS 10) list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "75") endif() -if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 9) +if(CUDAToolkit_VERSION_MAJOR LESS 9) list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "70") endif() -if(CUDF_BUILD_FOR_DETECTED_ARCHS) - include(${CUDF_SOURCE_DIR}/cmake/Modules/EvalGPUArchs.cmake) - evaluate_gpu_archs(CMAKE_CUDA_ARCHITECTURES) - if(CMAKE_CUDA_ARCHITECTURES STREQUAL "ALL") - unset(CMAKE_CUDA_ARCHITECTURES CACHE) - set(CUDF_BUILD_FOR_ALL_ARCHS TRUE) - else() - set(CUDF_BUILD_FOR_ALL_ARCHS FALSE) - list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real") - endif() -endif() - -if(CUDF_BUILD_FOR_ALL_ARCHS) +if(${PROJECT_NAME}_BUILD_FOR_ALL_ARCHS) set(CMAKE_CUDA_ARCHITECTURES ${SUPPORTED_CUDA_ARCHITECTURES}) - # CMake architecture list entry of "80" means to build compute and sm. - # What we want is for the newest arch only to build that way - # while the rest built only for sm. - list(SORT CMAKE_CUDA_ARCHITECTURES ORDER ASCENDING) + + # CMake architecture list entry of "80" means to build compute and sm. What we want is for the + # newest arch only to build that way while the rest built only for sm. list(POP_BACK CMAKE_CUDA_ARCHITECTURES latest_arch) list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real") list(APPEND CMAKE_CUDA_ARCHITECTURES ${latest_arch}) + +elseif(${PROJECT_NAME}_BUILD_FOR_DETECTED_ARCHS) + include(${PROJECT_SOURCE_DIR}/cmake/Modules/EvalGPUArchs.cmake) + evaluate_gpu_archs(CMAKE_CUDA_ARCHITECTURES) + + list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real") endif()