From 7a6e60e8051347ec19cc30d376be04ddcd89c465 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Fri, 19 Feb 2021 13:33:00 -0500 Subject: [PATCH] Use CMAKE_CUDA_ARCHITECTURES (#7391) When using CMake >= 3.18 you now don't get `CMP0104` warnings for each target that use CUDA. To correct this issue I have cherry-picked a collection of changes from https://github.com/rapidsai/cudf/pull/7107 which updated cudf to use `CMAKE_CUDA_ARCHITECTURES`. When using CMake < 3.18 cudf will transform `CMAKE_CUDA_ARCHITECTURES` into the correct flags and store it into `CMAKE_CUDA_FLAGS`. When I was porting this over from !7107 I noticed that when cudf is built for multi arch ( sm60, sm70, sm80, ... ) it only compiles compute for the newest arch. This behavior is preserved, and !7107 will need to be updated with this change. Authors: - Robert Maynard (@robertmaynard) - Paul Taylor (@trxcllnt) Approvers: - Mark Harris (@harrism) - Keith Kraus (@kkraus14) URL: https://github.com/rapidsai/cudf/pull/7391 --- build.sh | 6 +- cpp/CMakeLists.txt | 104 +++++--------------------- cpp/cmake/EvalGpuArchs.cmake | 18 ++--- cpp/cmake/Modules/ConfigureCUDA.cmake | 61 +++++++++++++++ cpp/cmake/Modules/SetGPUArchs.cmake | 61 +++++++++++++++ 5 files changed, 152 insertions(+), 98 deletions(-) create mode 100644 cpp/cmake/Modules/ConfigureCUDA.cmake create mode 100644 cpp/cmake/Modules/SetGPUArchs.cmake diff --git a/build.sh b/build.sh index df74e0a537e..b51e503fc39 100755 --- a/build.sh +++ b/build.sh @@ -135,10 +135,10 @@ if hasArg clean; then fi if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then - GPU_ARCH="-DGPU_ARCHS=" + CUDF_CMAKE_CUDA_ARCHITECTURES="-DCMAKE_CUDA_ARCHITECTURES=" echo "Building for the architecture of the GPU in the system..." else - GPU_ARCH="-DGPU_ARCHS=ALL" + CUDF_CMAKE_CUDA_ARCHITECTURES="" echo "Building for *ALL* supported GPU architectures..." fi @@ -148,7 +148,7 @@ fi if buildAll || hasArg libcudf; then cmake -S $REPODIR/cpp -B ${LIB_BUILD_DIR} \ -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \ - ${GPU_ARCH} \ + ${CUDF_CMAKE_CUDA_ARCHITECTURES} \ -DUSE_NVTX=${BUILD_NVTX} \ -DBUILD_BENCHMARKS=${BUILD_BENCHMARKS} \ -DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \ diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 918500f5f9e..921481ffa5c 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -16,11 +16,21 @@ cmake_minimum_required(VERSION 3.14...3.17 FATAL_ERROR) +# If `CMAKE_CUDA_ARCHITECTURES` is not defined, build for all supported architectures. If +# `CMAKE_CUDA_ARCHITECTURES` is set to an empty string (""), build for only the current +# architecture. If `CMAKE_CUDA_ARCHITECTURES` is specified by the user, use user setting. + +# This needs to be run before enabling the CUDA language due to the default initialization behavior +# of `CMAKE_CUDA_ARCHITECTURES`, https://gitlab.kitware.com/cmake/cmake/-/issues/21302 +if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CUDA_DATAFRAME_BUILD_FOR_ALL_ARCHS TRUE) +elseif(CMAKE_CUDA_ARCHITECTURES STREQUAL "") + unset(CMAKE_CUDA_ARCHITECTURES CACHE) + set(CUDA_DATAFRAME_BUILD_FOR_DETECTED_ARCHS TRUE) +endif() + project(CUDA_DATAFRAME VERSION 0.19.0 LANGUAGES C CXX CUDA) -if(NOT CMAKE_CUDA_COMPILER) - message(SEND_ERROR "CMake cannot locate a CUDA compiler") -endif(NOT CMAKE_CUDA_COMPILER) ################################################################################################### # - build type ------------------------------------------------------------------------------------ @@ -57,91 +67,13 @@ if(CMAKE_COMPILER_IS_GNUCXX) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=-Wno-parentheses") endif(CMAKE_COMPILER_IS_GNUCXX) -if(CMAKE_CUDA_COMPILER_VERSION) - # Compute the version. from CMAKE_CUDA_COMPILER_VERSION - string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${CMAKE_CUDA_COMPILER_VERSION}) - string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${CMAKE_CUDA_COMPILER_VERSION}) - set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}" CACHE STRING "Version of CUDA as computed from nvcc.") - mark_as_advanced(CUDA_VERSION) -endif() - -message(STATUS "CUDA_VERSION_MAJOR: ${CUDA_VERSION_MAJOR}") -message(STATUS "CUDA_VERSION_MINOR: ${CUDA_VERSION_MINOR}") -message(STATUS "CUDA_VERSION: ${CUDA_VERSION}") - -# Always set this convenience variable -set(CUDA_VERSION_STRING "${CUDA_VERSION}") - -# Auto-detect available GPU compute architectures -set(GPU_ARCHS "ALL" CACHE STRING - "List of GPU architectures (semicolon-separated) to be compiled for. Pass 'ALL' if you want to compile for all supported GPU architectures. Empty string means to auto-detect the GPUs on the current system") - -if("${GPU_ARCHS}" STREQUAL "") - include(cmake/EvalGpuArchs.cmake) - evaluate_gpu_archs(GPU_ARCHS) -endif() - -if("${GPU_ARCHS}" STREQUAL "ALL") - - # Check for embedded vs workstation architectures - if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") - # This is being built for Linux4Tegra or SBSA ARM64 - set(GPU_ARCHS "62") - if((CUDA_VERSION_MAJOR EQUAL 9) OR (CUDA_VERSION_MAJOR GREATER 9)) - set(GPU_ARCHS "${GPU_ARCHS};72") - endif() - if((CUDA_VERSION_MAJOR EQUAL 11) OR (CUDA_VERSION_MAJOR GREATER 11)) - set(GPU_ARCHS "${GPU_ARCHS};75;80") - endif() - - else() - # This is being built for an x86 or x86_64 architecture - set(GPU_ARCHS "60") - if((CUDA_VERSION_MAJOR EQUAL 9) OR (CUDA_VERSION_MAJOR GREATER 9)) - set(GPU_ARCHS "${GPU_ARCHS};70") - endif() - if((CUDA_VERSION_MAJOR EQUAL 10) OR (CUDA_VERSION_MAJOR GREATER 10)) - set(GPU_ARCHS "${GPU_ARCHS};75") - endif() - if((CUDA_VERSION_MAJOR EQUAL 11) OR (CUDA_VERSION_MAJOR GREATER 11)) - set(GPU_ARCHS "${GPU_ARCHS};80") - endif() - - endif() - -endif() -message("GPU_ARCHS = ${GPU_ARCHS}") - -foreach(arch ${GPU_ARCHS}) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_${arch},code=sm_${arch}") -endforeach() - -list(GET GPU_ARCHS -1 ptx) -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_${ptx},code=compute_${ptx}") - -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-constexpr") - -# set warnings as errors -# TODO: remove `no-maybe-unitialized` used to suppress warnings in rmm::exec_policy -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror=cross-execution-space-call -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations") - +# * find CUDAToolkit package +# * determine GPU architectures +# * enable the CMake CUDA language +# * set other CUDA compilation flags option(DISABLE_DEPRECATION_WARNING "Disable warnings generated from deprecated declarations." OFF) -if(DISABLE_DEPRECATION_WARNING) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=-Wno-deprecated-declarations") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations") -endif(DISABLE_DEPRECATION_WARNING) - -# Option to enable line info in CUDA device compilation to allow introspection when profiling / memchecking option(CMAKE_CUDA_LINEINFO "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler" OFF) -if(CMAKE_CUDA_LINEINFO) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo") -endif(CMAKE_CUDA_LINEINFO) - -# Debug options -if(CMAKE_BUILD_TYPE MATCHES Debug) - message(STATUS "Building with debugging flags") - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G -Xcompiler=-rdynamic") -endif(CMAKE_BUILD_TYPE MATCHES Debug) +include(cmake/Modules/ConfigureCUDA.cmake) # To apply RUNPATH to transitive dependencies (this is a temporary solution) set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--disable-new-dtags") diff --git a/cpp/cmake/EvalGpuArchs.cmake b/cpp/cmake/EvalGpuArchs.cmake index ce097b94914..6c747a0b867 100644 --- a/cpp/cmake/EvalGpuArchs.cmake +++ b/cpp/cmake/EvalGpuArchs.cmake @@ -1,5 +1,5 @@ #============================================================================= -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ function(evaluate_gpu_archs gpu_archs) set(eval_exe ${PROJECT_BINARY_DIR}/eval_gpu_archs) set(error_file ${PROJECT_BINARY_DIR}/eval_gpu_archs.stderr.log) file(WRITE ${eval_file} - " +[=[ #include #include #include @@ -32,23 +32,23 @@ int main(int argc, char** argv) { char buff[32]; cudaDeviceProp prop; if(cudaGetDeviceProperties(&prop, dev) != cudaSuccess) continue; - sprintf(buff, \"%d%d\", prop.major, prop.minor); + sprintf(buff, "%d%d", prop.major, prop.minor); archs.insert(buff); } } if(archs.empty()) { - printf(\"ALL\"); + printf("ALL"); } else { bool first = true; for(const auto& arch : archs) { - printf(first? \"%s\" : \";%s\", arch.c_str()); + printf(first? "%s" : ";%s", arch.c_str()); first = false; } } - printf(\"\\n\"); + printf("\n"); return 0; } -") +]=]) execute_process( COMMAND ${CMAKE_CUDA_COMPILER} -std=c++11 @@ -58,6 +58,6 @@ int main(int argc, char** argv) { OUTPUT_VARIABLE __gpu_archs OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_FILE ${error_file}) - message("Auto detection of gpu-archs: ${__gpu_archs}") + message(VERBOSE "CUDF: Auto detection of gpu-archs: ${__gpu_archs}") set(${gpu_archs} ${__gpu_archs} PARENT_SCOPE) -endfunction(evaluate_gpu_archs) +endfunction() diff --git a/cpp/cmake/Modules/ConfigureCUDA.cmake b/cpp/cmake/Modules/ConfigureCUDA.cmake new file mode 100644 index 00000000000..344026da5cb --- /dev/null +++ b/cpp/cmake/Modules/ConfigureCUDA.cmake @@ -0,0 +1,61 @@ +#============================================================================= +# Copyright (c) 2018-2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +# Auto-detect available GPU compute architectures + +include(${CUDA_DATAFRAME_SOURCE_DIR}/cmake/Modules/SetGPUArchs.cmake) +message(STATUS "CUDF: Building CUDF for GPU architectures: ${CMAKE_CUDA_ARCHITECTURES}") + +if(CMAKE_CUDA_COMPILER_VERSION) + # Compute the version. from CMAKE_CUDA_COMPILER_VERSION + string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${CMAKE_CUDA_COMPILER_VERSION}) + string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${CMAKE_CUDA_COMPILER_VERSION}) + set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}") +endif() + +message(VERBOSE "CUDF: CUDA_VERSION_MAJOR: ${CUDA_VERSION_MAJOR}") +message(VERBOSE "CUDF: CUDA_VERSION_MINOR: ${CUDA_VERSION_MINOR}") +message(STATUS "CUDF: CUDA_VERSION: ${CUDA_VERSION}") + +if(CMAKE_COMPILER_IS_GNUCXX) + string(APPEND CMAKE_CXX_FLAGS " -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations") + if(CUDF_BUILD_TESTS OR CUDF_BUILD_BENCHMARKS) + # Suppress parentheses warning which causes gmock to fail + string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler=-Wno-parentheses") + endif() +endif() + +string(APPEND CMAKE_CUDA_FLAGS " --expt-extended-lambda --expt-relaxed-constexpr") + +# set warnings as errors +string(APPEND CMAKE_CUDA_FLAGS " -Werror=cross-execution-space-call") +string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations") + +if(DISABLE_DEPRECATION_WARNING) + string(APPEND CMAKE_CXX_FLAGS " -Wno-deprecated-declarations") + string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler=-Wno-deprecated-declarations") +endif() + +# Option to enable line info in CUDA device compilation to allow introspection when profiling / memchecking +if(CMAKE_CUDA_LINEINFO) + string(APPEND CMAKE_CUDA_FLAGS " -lineinfo") +endif() + +# Debug options +if(CMAKE_BUILD_TYPE MATCHES Debug) + message(VERBOSE "CUDF: Building with debugging flags") + string(APPEND CMAKE_CUDA_FLAGS " -G -Xcompiler=-rdynamic") +endif() diff --git a/cpp/cmake/Modules/SetGPUArchs.cmake b/cpp/cmake/Modules/SetGPUArchs.cmake new file mode 100644 index 00000000000..2480ed4ad79 --- /dev/null +++ b/cpp/cmake/Modules/SetGPUArchs.cmake @@ -0,0 +1,61 @@ +# ============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Build the list of supported architectures + +set(SUPPORTED_CUDA_ARCHITECTURES "60" "62" "70" "72" "75" "80") + +# Check for embedded vs workstation architectures +if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") + # This is being built for Linux4Tegra or SBSA ARM64 + list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "60" "70") +else() + # This is being built for an x86 or x86_64 architecture + list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "62" "72") +endif() + +if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11) + list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "80") +endif() +if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 10) + list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "75") +endif() +if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 9) + list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "70") +endif() + +if(${PROJECT_NAME}_BUILD_FOR_ALL_ARCHS) + set(CMAKE_CUDA_ARCHITECTURES ${SUPPORTED_CUDA_ARCHITECTURES}) +elseif(${PROJECT_NAME}_BUILD_FOR_DETECTED_ARCHS) + include(${PROJECT_SOURCE_DIR}/cmake/EvalGpuArchs.cmake) + evaluate_gpu_archs(CMAKE_CUDA_ARCHITECTURES) +endif() + +if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.18) + # CMake architecture list entry of "80" means to build compute and sm. + # What we want is for the newest arch only to build that way + # while the rest built only for sm. + list(SORT CMAKE_CUDA_ARCHITECTURES ORDER ASCENDING) + list(POP_BACK CMAKE_CUDA_ARCHITECTURES latest_arch) + list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real") + list(APPEND CMAKE_CUDA_ARCHITECTURES ${latest_arch}) +else() + foreach(arch IN LISTS CMAKE_CUDA_ARCHITECTURES) + string(APPEND CMAKE_CUDA_FLAGS " -gencode=arch=compute_${arch},code=sm_${arch}") + endforeach() + + list(GET CMAKE_CUDA_ARCHITECTURES -1 ptx) + string(APPEND CMAKE_CUDA_FLAGS " -gencode=arch=compute_${ptx},code=compute_${ptx}") + unset(CMAKE_CUDA_ARCHITECTURES) +endif()