From 05bb2f06ad05b4db1cde08e947797729e4a4b9dd Mon Sep 17 00:00:00 2001
From: Paul Taylor <paul.e.taylor@me.com>
Date: Mon, 15 Mar 2021 18:56:30 -0500
Subject: [PATCH] Fix auto-detecting GPU architectures (#7593)

Fixes regression from https://github.com/rapidsai/cudf/pull/7579 in auto-detecting GPU architectures when `-DCMAKE_CUDA_ARCHITECTURES=` is passed on the CLI.

Now that the cached `CMAKE_CUDA_ARCHITECTURES` isn't unset before calling `enable_language(CUDA)`, this call throws an error and configuration fails. This change ensures we call `enable_language(CUDA)` after any potential rewrites of `CMAKE_CUDA_ARCHITECTURES`.

This PR also aligns with RMM's `EvalGPUArchs.cmake` logic and prints `SUPPORTED_CUDA_ARCHITECTURES` instead of `"ALL"` in the case the current machine is a CPU-only node.

Related: https://github.com/rapidsai/rmm/pull/727

Authors:
  - Paul Taylor (@trxcllnt)
  - Robert Maynard (@robertmaynard)

Approvers:
  - Keith Kraus (@kkraus14)

URL: https://github.com/rapidsai/cudf/pull/7593
---
 build.sh                              |  2 +-
 cpp/cmake/Modules/ConfigureCUDA.cmake | 22 ++++---------
 cpp/cmake/Modules/EvalGPUArchs.cmake  | 37 ++++++++++++---------
 cpp/cmake/Modules/SetGPUArchs.cmake   | 46 +++++++++++++++------------
 4 files changed, 55 insertions(+), 52 deletions(-)

diff --git a/build.sh b/build.sh
index 5eb404d02a8..d75053f8849 100755
--- a/build.sh
+++ b/build.sh
@@ -135,7 +135,7 @@ if hasArg clean; then
 fi
 
 if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then
-    CUDF_CMAKE_CUDA_ARCHITECTURES="-DCMAKE_CUDA_ARCHITECTURES=ALL"
+    CUDF_CMAKE_CUDA_ARCHITECTURES="-DCMAKE_CUDA_ARCHITECTURES="
     echo "Building for the architecture of the GPU in the system..."
 else
     CUDF_CMAKE_CUDA_ARCHITECTURES=""
diff --git a/cpp/cmake/Modules/ConfigureCUDA.cmake b/cpp/cmake/Modules/ConfigureCUDA.cmake
index 44699a13206..d4be6e65021 100644
--- a/cpp/cmake/Modules/ConfigureCUDA.cmake
+++ b/cpp/cmake/Modules/ConfigureCUDA.cmake
@@ -17,26 +17,16 @@
 # Find the CUDAToolkit
 find_package(CUDAToolkit REQUIRED)
 
-# Must come after find_package(CUDAToolkit) because we symlink
-# ccache as a compiler front-end for nvcc in gpuCI CPU builds.
-enable_language(CUDA)
-
-if(CMAKE_CUDA_COMPILER_VERSION)
-  # Compute the version. from  CMAKE_CUDA_COMPILER_VERSION
-  string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${CMAKE_CUDA_COMPILER_VERSION})
-  string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${CMAKE_CUDA_COMPILER_VERSION})
-  set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}")
-endif()
-
-message(VERBOSE "CUDF: CUDA_VERSION_MAJOR: ${CUDA_VERSION_MAJOR}")
-message(VERBOSE "CUDF: CUDA_VERSION_MINOR: ${CUDA_VERSION_MINOR}")
-message(STATUS "CUDF: CUDA_VERSION: ${CUDA_VERSION}")
-
 # Auto-detect available GPU compute architectures
-
 include(${CUDF_SOURCE_DIR}/cmake/Modules/SetGPUArchs.cmake)
 message(STATUS "CUDF: Building CUDF for GPU architectures: ${CMAKE_CUDA_ARCHITECTURES}")
 
+# Must come after find_package(CUDAToolkit) because we symlink
+# ccache as a compiler front-end for nvcc in gpuCI CPU builds.
+# Must also come after we detect and potentially rewrite
+# CMAKE_CUDA_ARCHITECTURES
+enable_language(CUDA)
+
 if(CMAKE_COMPILER_IS_GNUCXX)
     list(APPEND CUDF_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations)
     if(CUDF_BUILD_TESTS OR CUDF_BUILD_BENCHMARKS)
diff --git a/cpp/cmake/Modules/EvalGPUArchs.cmake b/cpp/cmake/Modules/EvalGPUArchs.cmake
index 6c747a0b867..09e42c6cc7a 100644
--- a/cpp/cmake/Modules/EvalGPUArchs.cmake
+++ b/cpp/cmake/Modules/EvalGPUArchs.cmake
@@ -14,12 +14,21 @@
 # limitations under the License.
 #=============================================================================
 
+# Unset this first in case it's set to <empty_string>
+set(CMAKE_CUDA_ARCHITECTURES OFF)
+
+# Enable CUDA so we can invoke nvcc
+enable_language(CUDA)
+
+# Function uses the CUDA runtime API to query the compute capability of the device, so if a user
+# doesn't pass any architecture options to CMake we only build the current architecture
 function(evaluate_gpu_archs gpu_archs)
   set(eval_file ${PROJECT_BINARY_DIR}/eval_gpu_archs.cu)
   set(eval_exe ${PROJECT_BINARY_DIR}/eval_gpu_archs)
   set(error_file ${PROJECT_BINARY_DIR}/eval_gpu_archs.stderr.log)
-  file(WRITE ${eval_file}
-[=[
+  file(
+    WRITE ${eval_file}
+    "
 #include <cstdio>
 #include <set>
 #include <string>
@@ -32,32 +41,30 @@ int main(int argc, char** argv) {
       char buff[32];
       cudaDeviceProp prop;
       if(cudaGetDeviceProperties(&prop, dev) != cudaSuccess) continue;
-      sprintf(buff, "%d%d", prop.major, prop.minor);
+      sprintf(buff, \"%d%d\", prop.major, prop.minor);
       archs.insert(buff);
     }
   }
   if(archs.empty()) {
-    printf("ALL");
+    printf(\"${SUPPORTED_CUDA_ARCHITECTURES}\");
   } else {
     bool first = true;
     for(const auto& arch : archs) {
-      printf(first? "%s" : ";%s", arch.c_str());
+      printf(first? \"%s\" : \";%s\", arch.c_str());
       first = false;
     }
   }
-  printf("\n");
+  printf(\"\\n\");
   return 0;
 }
-]=])
+")
   execute_process(
-    COMMAND ${CMAKE_CUDA_COMPILER}
-      -std=c++11
-      -o ${eval_exe}
-      --run
-      ${eval_file}
+    COMMAND ${CMAKE_CUDA_COMPILER} -std=c++11 -o ${eval_exe} --run ${eval_file}
     OUTPUT_VARIABLE __gpu_archs
     OUTPUT_STRIP_TRAILING_WHITESPACE
     ERROR_FILE ${error_file})
-  message(VERBOSE "CUDF: Auto detection of gpu-archs: ${__gpu_archs}")
-  set(${gpu_archs} ${__gpu_archs} PARENT_SCOPE)
-endfunction()
+  message(STATUS "CUDF: Auto detection of gpu-archs: ${__gpu_archs}")
+  set(${gpu_archs}
+      ${__gpu_archs}
+      PARENT_SCOPE)
+endfunction(evaluate_gpu_archs)
diff --git a/cpp/cmake/Modules/SetGPUArchs.cmake b/cpp/cmake/Modules/SetGPUArchs.cmake
index 396023ee9a9..61e4e6bc198 100644
--- a/cpp/cmake/Modules/SetGPUArchs.cmake
+++ b/cpp/cmake/Modules/SetGPUArchs.cmake
@@ -25,35 +25,41 @@ else()
   list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "62" "72")
 endif()
 
-if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11)
+# CMake < 3.20 has a bug in FindCUDAToolkit where it won't properly detect the CUDAToolkit version
+# when find_package(CUDAToolkit) occurs before enable_language(CUDA)
+if(NOT DEFINED CUDAToolkit_VERSION AND CMAKE_CUDA_COMPILER)
+  execute_process(COMMAND ${CMAKE_CUDA_COMPILER} "--version" OUTPUT_VARIABLE NVCC_OUT)
+  if(NVCC_OUT MATCHES [=[ V([0-9]+)\.([0-9]+)\.([0-9]+)]=])
+    set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
+    set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
+    set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
+    set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}")
+  endif()
+  unset(NVCC_OUT)
+endif()
+
+if(CUDAToolkit_VERSION_MAJOR LESS 11)
   list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "80")
 endif()
-if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 10)
+if(CUDAToolkit_VERSION_MAJOR LESS 10)
   list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "75")
 endif()
-if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 9)
+if(CUDAToolkit_VERSION_MAJOR LESS 9)
   list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "70")
 endif()
 
-if(CUDF_BUILD_FOR_DETECTED_ARCHS)
-  include(${CUDF_SOURCE_DIR}/cmake/Modules/EvalGPUArchs.cmake)
-  evaluate_gpu_archs(CMAKE_CUDA_ARCHITECTURES)
-  if(CMAKE_CUDA_ARCHITECTURES STREQUAL "ALL")
-    unset(CMAKE_CUDA_ARCHITECTURES CACHE)
-    set(CUDF_BUILD_FOR_ALL_ARCHS TRUE)
-  else()
-    set(CUDF_BUILD_FOR_ALL_ARCHS FALSE)
-    list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real")
-  endif()
-endif()
-
-if(CUDF_BUILD_FOR_ALL_ARCHS)
+if(${PROJECT_NAME}_BUILD_FOR_ALL_ARCHS)
   set(CMAKE_CUDA_ARCHITECTURES ${SUPPORTED_CUDA_ARCHITECTURES})
-  # CMake architecture list entry of "80" means to build compute and sm.
-  # What we want is for the newest arch only to build that way
-  # while the rest built only for sm.
-  list(SORT CMAKE_CUDA_ARCHITECTURES ORDER ASCENDING)
+
+  # CMake architecture list entry of "80" means to build compute and sm. What we want is for the
+  # newest arch only to build that way while the rest built only for sm.
   list(POP_BACK CMAKE_CUDA_ARCHITECTURES latest_arch)
   list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real")
   list(APPEND CMAKE_CUDA_ARCHITECTURES ${latest_arch})
+
+elseif(${PROJECT_NAME}_BUILD_FOR_DETECTED_ARCHS)
+  include(${PROJECT_SOURCE_DIR}/cmake/Modules/EvalGPUArchs.cmake)
+  evaluate_gpu_archs(CMAKE_CUDA_ARCHITECTURES)
+
+  list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real")
 endif()