Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix auto-detecting GPU architectures #7593

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ if hasArg clean; then
fi

if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then
CUDF_CMAKE_CUDA_ARCHITECTURES="-DCMAKE_CUDA_ARCHITECTURES=ALL"
CUDF_CMAKE_CUDA_ARCHITECTURES="-DCMAKE_CUDA_ARCHITECTURES="
echo "Building for the architecture of the GPU in the system..."
else
CUDF_CMAKE_CUDA_ARCHITECTURES=""
Expand Down
22 changes: 6 additions & 16 deletions cpp/cmake/Modules/ConfigureCUDA.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,16 @@
# Find the CUDAToolkit
find_package(CUDAToolkit REQUIRED)

# Must come after find_package(CUDAToolkit) because we symlink
# ccache as a compiler front-end for nvcc in gpuCI CPU builds.
enable_language(CUDA)

if(CMAKE_CUDA_COMPILER_VERSION)
# Compute the version. from CMAKE_CUDA_COMPILER_VERSION
string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${CMAKE_CUDA_COMPILER_VERSION})
string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${CMAKE_CUDA_COMPILER_VERSION})
set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}")
endif()

message(VERBOSE "CUDF: CUDA_VERSION_MAJOR: ${CUDA_VERSION_MAJOR}")
message(VERBOSE "CUDF: CUDA_VERSION_MINOR: ${CUDA_VERSION_MINOR}")
message(STATUS "CUDF: CUDA_VERSION: ${CUDA_VERSION}")

# Auto-detect available GPU compute architectures

include(${CUDF_SOURCE_DIR}/cmake/Modules/SetGPUArchs.cmake)
message(STATUS "CUDF: Building CUDF for GPU architectures: ${CMAKE_CUDA_ARCHITECTURES}")

# Must come after find_package(CUDAToolkit) because we symlink
# ccache as a compiler front-end for nvcc in gpuCI CPU builds.
# Must also come after we detect and potentially rewrite
# CMAKE_CUDA_ARCHITECTURES
enable_language(CUDA)

if(CMAKE_COMPILER_IS_GNUCXX)
list(APPEND CUDF_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations)
if(CUDF_BUILD_TESTS OR CUDF_BUILD_BENCHMARKS)
Expand Down
37 changes: 22 additions & 15 deletions cpp/cmake/Modules/EvalGPUArchs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,21 @@
# limitations under the License.
#=============================================================================

# Unset this first in case it's set to <empty_string>
set(CMAKE_CUDA_ARCHITECTURES OFF)

# Enable CUDA so we can invoke nvcc
enable_language(CUDA)

# Function uses the CUDA runtime API to query the compute capability of the device, so if a user
# doesn't pass any architecture options to CMake we only build the current architecture
function(evaluate_gpu_archs gpu_archs)
set(eval_file ${PROJECT_BINARY_DIR}/eval_gpu_archs.cu)
set(eval_exe ${PROJECT_BINARY_DIR}/eval_gpu_archs)
set(error_file ${PROJECT_BINARY_DIR}/eval_gpu_archs.stderr.log)
file(WRITE ${eval_file}
[=[
file(
WRITE ${eval_file}
"
trxcllnt marked this conversation as resolved.
Show resolved Hide resolved
#include <cstdio>
#include <set>
#include <string>
Expand All @@ -32,32 +41,30 @@ int main(int argc, char** argv) {
char buff[32];
cudaDeviceProp prop;
if(cudaGetDeviceProperties(&prop, dev) != cudaSuccess) continue;
sprintf(buff, "%d%d", prop.major, prop.minor);
sprintf(buff, \"%d%d\", prop.major, prop.minor);
archs.insert(buff);
}
}
if(archs.empty()) {
printf("ALL");
printf(\"${SUPPORTED_CUDA_ARCHITECTURES}\");
} else {
bool first = true;
for(const auto& arch : archs) {
printf(first? "%s" : ";%s", arch.c_str());
printf(first? \"%s\" : \";%s\", arch.c_str());
first = false;
}
}
printf("\n");
printf(\"\\n\");
return 0;
}
]=])
")
execute_process(
COMMAND ${CMAKE_CUDA_COMPILER}
-std=c++11
-o ${eval_exe}
--run
${eval_file}
COMMAND ${CMAKE_CUDA_COMPILER} -std=c++11 -o ${eval_exe} --run ${eval_file}
OUTPUT_VARIABLE __gpu_archs
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_FILE ${error_file})
message(VERBOSE "CUDF: Auto detection of gpu-archs: ${__gpu_archs}")
set(${gpu_archs} ${__gpu_archs} PARENT_SCOPE)
endfunction()
message(STATUS "CUDF: Auto detection of gpu-archs: ${__gpu_archs}")
set(${gpu_archs}
${__gpu_archs}
PARENT_SCOPE)
endfunction(evaluate_gpu_archs)
46 changes: 26 additions & 20 deletions cpp/cmake/Modules/SetGPUArchs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -25,35 +25,41 @@ else()
list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "62" "72")
endif()

if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11)
# CMake < 3.20 has a bug in FindCUDAToolkit where it won't properly detect the CUDAToolkit version
# when find_package(CUDAToolkit) occurs before enable_language(CUDA)
if(NOT DEFINED CUDAToolkit_VERSION AND CMAKE_CUDA_COMPILER)
execute_process(COMMAND ${CMAKE_CUDA_COMPILER} "--version" OUTPUT_VARIABLE NVCC_OUT)
if(NVCC_OUT MATCHES [=[ V([0-9]+)\.([0-9]+)\.([0-9]+)]=])
set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}")
endif()
unset(NVCC_OUT)
endif()

if(CUDAToolkit_VERSION_MAJOR LESS 11)
trxcllnt marked this conversation as resolved.
Show resolved Hide resolved
list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "80")
endif()
if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 10)
if(CUDAToolkit_VERSION_MAJOR LESS 10)
list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "75")
endif()
if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 9)
if(CUDAToolkit_VERSION_MAJOR LESS 9)
list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "70")
endif()

if(CUDF_BUILD_FOR_DETECTED_ARCHS)
include(${CUDF_SOURCE_DIR}/cmake/Modules/EvalGPUArchs.cmake)
evaluate_gpu_archs(CMAKE_CUDA_ARCHITECTURES)
if(CMAKE_CUDA_ARCHITECTURES STREQUAL "ALL")
unset(CMAKE_CUDA_ARCHITECTURES CACHE)
set(CUDF_BUILD_FOR_ALL_ARCHS TRUE)
else()
set(CUDF_BUILD_FOR_ALL_ARCHS FALSE)
list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real")
endif()
endif()

if(CUDF_BUILD_FOR_ALL_ARCHS)
if(${PROJECT_NAME}_BUILD_FOR_ALL_ARCHS)
set(CMAKE_CUDA_ARCHITECTURES ${SUPPORTED_CUDA_ARCHITECTURES})
# CMake architecture list entry of "80" means to build compute and sm.
# What we want is for the newest arch only to build that way
# while the rest built only for sm.
list(SORT CMAKE_CUDA_ARCHITECTURES ORDER ASCENDING)

# CMake architecture list entry of "80" means to build compute and sm. What we want is for the
# newest arch only to build that way while the rest built only for sm.
list(POP_BACK CMAKE_CUDA_ARCHITECTURES latest_arch)
list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real")
list(APPEND CMAKE_CUDA_ARCHITECTURES ${latest_arch})

elseif(${PROJECT_NAME}_BUILD_FOR_DETECTED_ARCHS)
include(${PROJECT_SOURCE_DIR}/cmake/Modules/EvalGPUArchs.cmake)
evaluate_gpu_archs(CMAKE_CUDA_ARCHITECTURES)

list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real")
endif()