From 7a6e60e8051347ec19cc30d376be04ddcd89c465 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Fri, 19 Feb 2021 13:33:00 -0500
Subject: [PATCH] Use CMAKE_CUDA_ARCHITECTURES (#7391)

When using CMake >= 3.18 you now don't get `CMP0104` warnings for each target that use CUDA.

To correct this issue I have cherry-picked a collection of changes from https://github.com/rapidsai/cudf/pull/7107 which updated cudf to use `CMAKE_CUDA_ARCHITECTURES`.

When using CMake < 3.18 cudf will transform `CMAKE_CUDA_ARCHITECTURES` into the correct flags and store it into `CMAKE_CUDA_FLAGS`.

When I was porting this over from !7107 I noticed that when cudf is built for multi arch ( sm60, sm70, sm80, ... ) it only compiles compute for the newest arch. This behavior is preserved, and !7107 will need to be updated with this change.

Authors:
  - Robert Maynard (@robertmaynard)
  - Paul Taylor (@trxcllnt)

Approvers:
  - Mark Harris (@harrism)
  - Keith Kraus (@kkraus14)

URL: https://github.com/rapidsai/cudf/pull/7391
---
 build.sh                              |   6 +-
 cpp/CMakeLists.txt                    | 104 +++++---------------------
 cpp/cmake/EvalGpuArchs.cmake          |  18 ++---
 cpp/cmake/Modules/ConfigureCUDA.cmake |  61 +++++++++++++++
 cpp/cmake/Modules/SetGPUArchs.cmake   |  61 +++++++++++++++
 5 files changed, 152 insertions(+), 98 deletions(-)
 create mode 100644 cpp/cmake/Modules/ConfigureCUDA.cmake
 create mode 100644 cpp/cmake/Modules/SetGPUArchs.cmake

diff --git a/build.sh b/build.sh
index df74e0a537e..b51e503fc39 100755
--- a/build.sh
+++ b/build.sh
@@ -135,10 +135,10 @@ if hasArg clean; then
 fi
 
 if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then
-    GPU_ARCH="-DGPU_ARCHS="
+    CUDF_CMAKE_CUDA_ARCHITECTURES="-DCMAKE_CUDA_ARCHITECTURES="
     echo "Building for the architecture of the GPU in the system..."
 else
-    GPU_ARCH="-DGPU_ARCHS=ALL"
+    CUDF_CMAKE_CUDA_ARCHITECTURES=""
     echo "Building for *ALL* supported GPU architectures..."
 fi
 
@@ -148,7 +148,7 @@ fi
 if buildAll || hasArg libcudf; then
     cmake -S $REPODIR/cpp -B ${LIB_BUILD_DIR} \
           -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
-          ${GPU_ARCH} \
+          ${CUDF_CMAKE_CUDA_ARCHITECTURES} \
           -DUSE_NVTX=${BUILD_NVTX} \
           -DBUILD_BENCHMARKS=${BUILD_BENCHMARKS} \
           -DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 918500f5f9e..921481ffa5c 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -16,11 +16,21 @@
 
 cmake_minimum_required(VERSION 3.14...3.17 FATAL_ERROR)
 
+# If `CMAKE_CUDA_ARCHITECTURES` is not defined, build for all supported architectures. If
+# `CMAKE_CUDA_ARCHITECTURES` is set to an empty string (""), build for only the current
+# architecture. If `CMAKE_CUDA_ARCHITECTURES` is specified by the user, use user setting.
+
+# This needs to be run before enabling the CUDA language due to the default initialization behavior
+# of `CMAKE_CUDA_ARCHITECTURES`, https://gitlab.kitware.com/cmake/cmake/-/issues/21302
+if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
+  set(CUDA_DATAFRAME_BUILD_FOR_ALL_ARCHS TRUE)
+elseif(CMAKE_CUDA_ARCHITECTURES STREQUAL "")
+  unset(CMAKE_CUDA_ARCHITECTURES CACHE)
+  set(CUDA_DATAFRAME_BUILD_FOR_DETECTED_ARCHS TRUE)
+endif()
+
 project(CUDA_DATAFRAME VERSION 0.19.0 LANGUAGES C CXX CUDA)
 
-if(NOT CMAKE_CUDA_COMPILER)
-  message(SEND_ERROR "CMake cannot locate a CUDA compiler")
-endif(NOT CMAKE_CUDA_COMPILER)
 
 ###################################################################################################
 # - build type ------------------------------------------------------------------------------------
@@ -57,91 +67,13 @@ if(CMAKE_COMPILER_IS_GNUCXX)
     set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=-Wno-parentheses")
 endif(CMAKE_COMPILER_IS_GNUCXX)
 
-if(CMAKE_CUDA_COMPILER_VERSION)
-  # Compute the version. from  CMAKE_CUDA_COMPILER_VERSION
-  string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${CMAKE_CUDA_COMPILER_VERSION})
-  string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${CMAKE_CUDA_COMPILER_VERSION})
-  set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}" CACHE STRING "Version of CUDA as computed from nvcc.")
-  mark_as_advanced(CUDA_VERSION)
-endif()
-
-message(STATUS "CUDA_VERSION_MAJOR: ${CUDA_VERSION_MAJOR}")
-message(STATUS "CUDA_VERSION_MINOR: ${CUDA_VERSION_MINOR}")
-message(STATUS "CUDA_VERSION: ${CUDA_VERSION}")
-
-# Always set this convenience variable
-set(CUDA_VERSION_STRING "${CUDA_VERSION}")
-
-# Auto-detect available GPU compute architectures
-set(GPU_ARCHS "ALL" CACHE STRING
-  "List of GPU architectures (semicolon-separated) to be compiled for. Pass 'ALL' if you want to compile for all supported GPU architectures. Empty string means to auto-detect the GPUs on the current system")
-
-if("${GPU_ARCHS}" STREQUAL "")
-  include(cmake/EvalGpuArchs.cmake)
-  evaluate_gpu_archs(GPU_ARCHS)
-endif()
-
-if("${GPU_ARCHS}" STREQUAL "ALL")
-  
-  # Check for embedded vs workstation architectures
-  if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
-    # This is being built for Linux4Tegra or SBSA ARM64
-    set(GPU_ARCHS "62")
-    if((CUDA_VERSION_MAJOR EQUAL 9) OR (CUDA_VERSION_MAJOR GREATER 9))
-      set(GPU_ARCHS "${GPU_ARCHS};72")
-    endif()
-    if((CUDA_VERSION_MAJOR EQUAL 11) OR (CUDA_VERSION_MAJOR GREATER 11))
-      set(GPU_ARCHS "${GPU_ARCHS};75;80")
-    endif()
-
-  else()
-    # This is being built for an x86 or x86_64 architecture
-    set(GPU_ARCHS "60")
-    if((CUDA_VERSION_MAJOR EQUAL 9) OR (CUDA_VERSION_MAJOR GREATER 9))
-      set(GPU_ARCHS "${GPU_ARCHS};70")
-    endif()
-    if((CUDA_VERSION_MAJOR EQUAL 10) OR (CUDA_VERSION_MAJOR GREATER 10))
-      set(GPU_ARCHS "${GPU_ARCHS};75")
-    endif()
-    if((CUDA_VERSION_MAJOR EQUAL 11) OR (CUDA_VERSION_MAJOR GREATER 11))
-      set(GPU_ARCHS "${GPU_ARCHS};80")
-    endif()
-
-  endif()
-  
-endif()
-message("GPU_ARCHS = ${GPU_ARCHS}")
-
-foreach(arch ${GPU_ARCHS})
-  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_${arch},code=sm_${arch}")
-endforeach()
-
-list(GET GPU_ARCHS -1 ptx)
-set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_${ptx},code=compute_${ptx}")
-
-set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-constexpr")
-
-# set warnings as errors
-# TODO: remove `no-maybe-unitialized` used to suppress warnings in rmm::exec_policy
-set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror=cross-execution-space-call -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations")
-
+# * find CUDAToolkit package
+# * determine GPU architectures
+# * enable the CMake CUDA language
+# * set other CUDA compilation flags
 option(DISABLE_DEPRECATION_WARNING "Disable warnings generated from deprecated declarations." OFF)
-if(DISABLE_DEPRECATION_WARNING)
-    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=-Wno-deprecated-declarations")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations")
-endif(DISABLE_DEPRECATION_WARNING)
-
-# Option to enable line info in CUDA device compilation to allow introspection when profiling / memchecking
 option(CMAKE_CUDA_LINEINFO "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler" OFF)
-if(CMAKE_CUDA_LINEINFO)
-    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo")
-endif(CMAKE_CUDA_LINEINFO)
-
-# Debug options
-if(CMAKE_BUILD_TYPE MATCHES Debug)
-    message(STATUS "Building with debugging flags")
-    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G -Xcompiler=-rdynamic")
-endif(CMAKE_BUILD_TYPE MATCHES Debug)
+include(cmake/Modules/ConfigureCUDA.cmake)
 
 # To apply RUNPATH to transitive dependencies (this is a temporary solution)
 set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--disable-new-dtags")
diff --git a/cpp/cmake/EvalGpuArchs.cmake b/cpp/cmake/EvalGpuArchs.cmake
index ce097b94914..6c747a0b867 100644
--- a/cpp/cmake/EvalGpuArchs.cmake
+++ b/cpp/cmake/EvalGpuArchs.cmake
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -19,7 +19,7 @@ function(evaluate_gpu_archs gpu_archs)
   set(eval_exe ${PROJECT_BINARY_DIR}/eval_gpu_archs)
   set(error_file ${PROJECT_BINARY_DIR}/eval_gpu_archs.stderr.log)
   file(WRITE ${eval_file}
-    "
+[=[
 #include <cstdio>
 #include <set>
 #include <string>
@@ -32,23 +32,23 @@ int main(int argc, char** argv) {
       char buff[32];
       cudaDeviceProp prop;
       if(cudaGetDeviceProperties(&prop, dev) != cudaSuccess) continue;
-      sprintf(buff, \"%d%d\", prop.major, prop.minor);
+      sprintf(buff, "%d%d", prop.major, prop.minor);
       archs.insert(buff);
     }
   }
   if(archs.empty()) {
-    printf(\"ALL\");
+    printf("ALL");
   } else {
     bool first = true;
     for(const auto& arch : archs) {
-      printf(first? \"%s\" : \";%s\", arch.c_str());
+      printf(first? "%s" : ";%s", arch.c_str());
       first = false;
     }
   }
-  printf(\"\\n\");
+  printf("\n");
   return 0;
 }
-")
+]=])
   execute_process(
     COMMAND ${CMAKE_CUDA_COMPILER}
       -std=c++11
@@ -58,6 +58,6 @@ int main(int argc, char** argv) {
     OUTPUT_VARIABLE __gpu_archs
     OUTPUT_STRIP_TRAILING_WHITESPACE
     ERROR_FILE ${error_file})
-  message("Auto detection of gpu-archs: ${__gpu_archs}")
+  message(VERBOSE "CUDF: Auto detection of gpu-archs: ${__gpu_archs}")
   set(${gpu_archs} ${__gpu_archs} PARENT_SCOPE)
-endfunction(evaluate_gpu_archs)
+endfunction()
diff --git a/cpp/cmake/Modules/ConfigureCUDA.cmake b/cpp/cmake/Modules/ConfigureCUDA.cmake
new file mode 100644
index 00000000000..344026da5cb
--- /dev/null
+++ b/cpp/cmake/Modules/ConfigureCUDA.cmake
@@ -0,0 +1,61 @@
+#=============================================================================
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#=============================================================================
+
+# Auto-detect available GPU compute architectures
+
+include(${CUDA_DATAFRAME_SOURCE_DIR}/cmake/Modules/SetGPUArchs.cmake)
+message(STATUS "CUDF: Building CUDF for GPU architectures: ${CMAKE_CUDA_ARCHITECTURES}")
+
+if(CMAKE_CUDA_COMPILER_VERSION)
+  # Compute the version. from  CMAKE_CUDA_COMPILER_VERSION
+  string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${CMAKE_CUDA_COMPILER_VERSION})
+  string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${CMAKE_CUDA_COMPILER_VERSION})
+  set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}")
+endif()
+
+message(VERBOSE "CUDF: CUDA_VERSION_MAJOR: ${CUDA_VERSION_MAJOR}")
+message(VERBOSE "CUDF: CUDA_VERSION_MINOR: ${CUDA_VERSION_MINOR}")
+message(STATUS "CUDF: CUDA_VERSION: ${CUDA_VERSION}")
+
+if(CMAKE_COMPILER_IS_GNUCXX)
+    string(APPEND CMAKE_CXX_FLAGS " -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations")
+    if(CUDF_BUILD_TESTS OR CUDF_BUILD_BENCHMARKS)
+        # Suppress parentheses warning which causes gmock to fail
+        string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler=-Wno-parentheses")
+    endif()
+endif()
+
+string(APPEND CMAKE_CUDA_FLAGS " --expt-extended-lambda --expt-relaxed-constexpr")
+
+# set warnings as errors
+string(APPEND CMAKE_CUDA_FLAGS " -Werror=cross-execution-space-call")
+string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations")
+
+if(DISABLE_DEPRECATION_WARNING)
+    string(APPEND CMAKE_CXX_FLAGS " -Wno-deprecated-declarations")
+    string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler=-Wno-deprecated-declarations")
+endif()
+
+# Option to enable line info in CUDA device compilation to allow introspection when profiling / memchecking
+if(CMAKE_CUDA_LINEINFO)
+    string(APPEND CMAKE_CUDA_FLAGS " -lineinfo")
+endif()
+
+# Debug options
+if(CMAKE_BUILD_TYPE MATCHES Debug)
+    message(VERBOSE "CUDF: Building with debugging flags")
+    string(APPEND CMAKE_CUDA_FLAGS " -G -Xcompiler=-rdynamic")
+endif()
diff --git a/cpp/cmake/Modules/SetGPUArchs.cmake b/cpp/cmake/Modules/SetGPUArchs.cmake
new file mode 100644
index 00000000000..2480ed4ad79
--- /dev/null
+++ b/cpp/cmake/Modules/SetGPUArchs.cmake
@@ -0,0 +1,61 @@
+# =============================================================================
+# Copyright (c) 2021, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+# Build the list of supported architectures
+
+set(SUPPORTED_CUDA_ARCHITECTURES "60" "62" "70" "72" "75" "80")
+
+# Check for embedded vs workstation architectures
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
+  # This is being built for Linux4Tegra or SBSA ARM64
+  list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "60" "70")
+else()
+  # This is being built for an x86 or x86_64 architecture
+  list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "62" "72")
+endif()
+
+if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11)
+  list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "80")
+endif()
+if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 10)
+  list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "75")
+endif()
+if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 9)
+  list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "70")
+endif()
+
+if(${PROJECT_NAME}_BUILD_FOR_ALL_ARCHS)
+  set(CMAKE_CUDA_ARCHITECTURES ${SUPPORTED_CUDA_ARCHITECTURES})
+elseif(${PROJECT_NAME}_BUILD_FOR_DETECTED_ARCHS)
+  include(${PROJECT_SOURCE_DIR}/cmake/EvalGpuArchs.cmake)
+  evaluate_gpu_archs(CMAKE_CUDA_ARCHITECTURES)
+endif()
+
+if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
+  # CMake architecture list entry of "80" means to build compute and sm.
+  # What we want is for the newest arch only to build that way
+  # while the rest built only for sm.
+  list(SORT CMAKE_CUDA_ARCHITECTURES ORDER ASCENDING)
+  list(POP_BACK CMAKE_CUDA_ARCHITECTURES latest_arch)
+  list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real")
+  list(APPEND CMAKE_CUDA_ARCHITECTURES ${latest_arch})
+else()
+  foreach(arch IN LISTS CMAKE_CUDA_ARCHITECTURES)
+    string(APPEND CMAKE_CUDA_FLAGS " -gencode=arch=compute_${arch},code=sm_${arch}")
+  endforeach()
+
+  list(GET CMAKE_CUDA_ARCHITECTURES -1 ptx)
+  string(APPEND CMAKE_CUDA_FLAGS " -gencode=arch=compute_${ptx},code=compute_${ptx}")
+  unset(CMAKE_CUDA_ARCHITECTURES)
+endif()