diff --git a/CHANGELOG.md b/CHANGELOG.md index b811eccf35e..c136bc59225 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ - PR #3032 Use `asarray` to coerce indices to a NumPy array - PR #2996 IO Readers: Replace `cuio::device_buffer` with `rmm::device_buffer` - PR #3029 Update gdf_ numeric types with stdint and move to cudf namespace +- PR #2955 Add cmake option to only build for present GPU architecture - PR #3070 Move functions.h and related source to legacy ## Bug Fixes diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 4ed7280ce40..65f53cf5966 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -61,9 +61,32 @@ if(CMAKE_COMPILER_IS_GNUCXX) endif(CMAKE_CXX11_ABI) endif(CMAKE_COMPILER_IS_GNUCXX) -#set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61") -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_60,code=sm_60") -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_70,code=compute_70") +# Auto-detect available GPU compute architectures +set(GPU_ARCHS "ALL" CACHE STRING + "List of GPU architectures (semicolon-separated) to be compiled for. Pass 'ALL' if you want to compile for all supported GPU architectures. Empty string means to auto-detect the GPUs on the current system") + +if("${GPU_ARCHS}" STREQUAL "") + include(cmake/EvalGpuArchs.cmake) + evaluate_gpu_archs(GPU_ARCHS) +endif() + +if("${GPU_ARCHS}" STREQUAL "ALL") + set(GPU_ARCHS "60") + if((CUDA_VERSION_MAJOR EQUAL 9) OR (CUDA_VERSION_MAJOR GREATER 9)) + set(GPU_ARCHS "${GPU_ARCHS};70") + endif() + if((CUDA_VERSION_MAJOR EQUAL 10) OR (CUDA_VERSION_MAJOR GREATER 10)) + set(GPU_ARCHS "${GPU_ARCHS};75") + endif() +endif() +message("GPU_ARCHS = ${GPU_ARCHS}") + +foreach(arch ${GPU_ARCHS}) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_${arch},code=sm_${arch}") +endforeach() + +list(GET GPU_ARCHS -1 ptx) +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_${ptx},code=compute_${ptx}") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-constexpr") diff --git a/cpp/benchmarks/quantiles/group_quantiles_benchmark.cu b/cpp/benchmarks/quantiles/group_quantiles_benchmark.cu index cc1954829ee..5556568354b 100644 --- a/cpp/benchmarks/quantiles/group_quantiles_benchmark.cu +++ b/cpp/benchmarks/quantiles/group_quantiles_benchmark.cu @@ -19,6 +19,7 @@ #include #include +#include #include "../fixture/benchmark_fixture.hpp" #include "../synchronization/synchronization.hpp" diff --git a/cpp/cmake/EvalGpuArchs.cmake b/cpp/cmake/EvalGpuArchs.cmake new file mode 100644 index 00000000000..740987e4785 --- /dev/null +++ b/cpp/cmake/EvalGpuArchs.cmake @@ -0,0 +1,62 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +function(evaluate_gpu_archs gpu_archs) + set(eval_file ${PROJECT_BINARY_DIR}/eval_gpu_archs.cu) + set(eval_exe ${PROJECT_BINARY_DIR}/eval_gpu_archs) + set(error_file ${PROJECT_BINARY_DIR}/eval_gpu_archs.stderr.log) + file(WRITE ${eval_file} + " +#include +#include +#include +using namespace std; +int main(int argc, char** argv) { + set archs; + int nDevices; + if((cudaGetDeviceCount(&nDevices) == cudaSuccess) && (nDevices > 0)) { + for(int dev=0;dev