Skip to content

Commit

Permalink
Merge pull request #1 from ironictoo/pr_build_action
Browse files Browse the repository at this point in the history
CMake Updates and Linux Package Build
  • Loading branch information
jimkring authored Oct 11, 2022
2 parents c64e19d + be45592 commit b4c5250
Show file tree
Hide file tree
Showing 9 changed files with 322 additions and 153 deletions.
40 changes: 28 additions & 12 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,27 @@

cmake_minimum_required( VERSION 3.11 )
set_property( GLOBAL PROPERTY USE_FOLDERS ON )
cmake_policy(SET CMP0091 NEW)

if( NOT PROJECT_NAME )
project( Gpufit VERSION 1.2.0 )
project( Gpufit VERSION 1.2.0 LANGUAGES CUDA CXX)
include( CTest )
endif()

if( NOT CMAKE_CXX_STANDARD )
set( CMAKE_CXX_STANDARD 14 )
endif()

if( MSVC ) # link runtime statically with MSVC
foreach( type ${CMAKE_CONFIGURATION_TYPES} ${CMAKE_BUILD_TYPE} )
string( TOUPPER ${type} TYPE )
foreach( flags CMAKE_C_FLAGS_${TYPE} CMAKE_CXX_FLAGS_${TYPE} )
get_property( help CACHE ${flags} PROPERTY HELPSTRING )
string( REPLACE "/MD" "/MT" ${flags} "${${flags}}" )
set( ${flags} "${${flags}}" CACHE STRING "${help}" FORCE )
endforeach()
endforeach()
endif()
#if( MSVC ) # link runtime statically with MSVC
# foreach( type ${CMAKE_CONFIGURATION_TYPES} ${CMAKE_BUILD_TYPE} )
# string( TOUPPER ${type} TYPE )
# foreach( flags CMAKE_C_FLAGS_${TYPE} CMAKE_CXX_FLAGS_${TYPE} )
# get_property( help CACHE ${flags} PROPERTY HELPSTRING )
# string( REPLACE "/MD" "/MT" ${flags} "${${flags}}" )
# set( ${flags} "${${flags}}" CACHE STRING "${help}" FORCE )
# endforeach()
# endforeach()
#endif()

function( add_launcher target executable arguments working_directory )
if( MSVC12 OR MSVC14 )
Expand Down Expand Up @@ -144,6 +145,14 @@ add_subdirectory( Gpufit )

add_subdirectory( examples/c++/gpu_vs_cpu_profiling )


# link runtime statically with MSVC
set_property(TARGET Gpufit PROPERTY
MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
set_property(TARGET Cpufit PROPERTY
MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")


# Launcher
#
# Uses the following variables:
Expand Down Expand Up @@ -184,6 +193,13 @@ if( PYTHONINTERP_FOUND )
"${Python_WORKING_DIRECTORY}"
)
endif()
if( UNIX )
# Copy over install file for python integration
file(COPY "${CMAKE_SOURCE_DIR}/package/install_gpufit_python.sh"
DESTINATION "${CMAKE_BINARY_DIR}"
FILE_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE
)
endif()
endif()

# Tests
Expand Down
189 changes: 49 additions & 140 deletions Gpufit/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,135 +1,61 @@

# CUDA
#
# Uses the following variables:
#
# CUDA_ARCHITECTURES (Default All)
# -- Argument passed to CUDA_SELECT_NVCC_ARCH_FLAGS(...)
# resulting in code_generation_flags
# (see http://cmake.org/cmake/help/v3.7/module/FindCUDA.html).
# CUDA_ARCHITECTURES: Auto | Common | All | ARCH_AND_PTX ...
# Auto: Detects local machine GPU architecture.
# Common: Covers common subset of architectures.
# All: Covers all known architectures.
# ARCH_AND_PTX: NAME | NUM.NUM | NUM.NUM(NUM.NUM) | NUM.NUM+PTX
# NAME: Fermi Kepler Maxwell Kepler+Tegra Kepler+Tesla Maxwell+Tegra Pascal
# NUM: Any number.
# Only those pairs are currently accepted by NVCC though:
# 2.0 2.1 3.0 3.2 3.5 3.7 5.0 5.2 5.3 6.0 6.2
# Examples:
# 2.1(2.0) results in
# -gencode;arch=compute_20,code=sm_21
# Kepler+Tesla results in
# -gencode;arch=compute_37,code=sm_37
# 6.2+PTX results in
# -gencode;arch=compute_62,code=sm_62;-gencode;arch=compute_62,code=compute_62
#
# CUDA_NVCC_FLAGS (Default ${code_generation_flags})
# -- Additional NVCC command line arguments
# (see http://cmake.org/cmake/help/v3.7/module/FindCUDA.html).
# NOTE that multiple arguments must be semi-colon delimited
# (e.g. --compiler-options;-Wall)
#
# Multiple CUDA versions installed, specify which version to use
# Set CUDA_BIN_PATH before running CMake or CUDA_TOOLKIT_ROOT_DIR after first configuration
# to installation folder of desired CUDA version

find_package( CUDA 6.5 REQUIRED )

set( CUDA_ARCHITECTURES ${DEFAULT_CUDA_ARCH} CACHE STRING
"Auto | Common | All | ... see CUDA_SELECT_NVCC_ARCH_FLAGS(...)" )

if( CUDA_ARCHITECTURES STREQUAL Auto )

set( file ${PROJECT_BINARY_DIR}/detect_cuda_architectures.cpp )
file( WRITE ${file} ""
"#include <cuda_runtime.h>\n"
"#include <cstdio>\n"
"int main()\n"
"{\n"
" int count = 0;\n"
" if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n"
" if (count == 0) return -1;\n"
" for (int device = 0; device < count; ++device)\n"
" {\n"
" cudaDeviceProp prop;\n"
" if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n"
" std::printf(\"%d.%d \", prop.major, prop.minor);\n"
" }\n"
" return 0;\n"
"}\n"
)
try_run( run_result compile_result ${PROJECT_BINARY_DIR} ${file}
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CUDA_INCLUDE_DIRS}"
LINK_LIBRARIES ${CUDA_LIBRARIES}
RUN_OUTPUT_VARIABLE architectures
)
if( run_result EQUAL 0 )
string( REPLACE "2.1" "2.1(2.0)" architectures "${architectures}" )
if( CUDA_VERSION VERSION_LESS "7.0" )
string( REGEX REPLACE "3\\.[27]|5\\.[23]|6\\.[01]" "5.2+PTX" architectures "${architectures}" )
elseif( CUDA_VERSION VERSION_LESS "8.0" )
string( REGEX REPLACE "5\\.3|6\\.[01]" "5.3+PTX" architectures "${architectures}" )
endif()
set( CUDA_ARCHITECTURES "${architectures}" )
endif()

elseif( CUDA_ARCHITECTURES STREQUAL All )
# Uses the CMAKE standard CUDA tools

cmake_minimum_required(VERSION 3.18)

if(NOT DEFINED CMAKE_CUDA_STANDARD)
set(CMAKE_CUDA_STANDARD 14)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
endif()


message( STATUS "CMAKE_CUDA_COMPILER_VERSION=${CMAKE_CUDA_COMPILER_VERSION}")


# All does not include the latest PTX!
set( CUDA_ARCHITECTURES "" )
set( CMAKE_CUDA_ARCHITECTURES_D "" )

if( CUDA_VERSION VERSION_LESS "12.0" )
list( INSERT CUDA_ARCHITECTURES 0 "3.5" "5.0" "5.2" )
if( CMAKE_CUDA_COMPILER_VERSION VERSION_LESS "12.0" )
list( INSERT CMAKE_CUDA_ARCHITECTURES_D 0 35 50 52 )
endif()
if( CUDA_VERSION VERSION_LESS "11.0" )
list( INSERT CUDA_ARCHITECTURES 0 "3.0" "3.2")
if( CMAKE_CUDA_COMPILER_VERSION VERSION_LESS "11.0" )
list( INSERT CMAKE_CUDA_ARCHITECTURES_D 0 30 32)
endif()
if( CUDA_VERSION VERSION_LESS "9.0" )
list( INSERT CUDA_ARCHITECTURES 0 "2.0" "2.1(2.0)" )
if( CMAKE_CUDA_COMPILER_VERSION VERSION_LESS "9.0" )
list( INSERT CMAKE_CUDA_ARCHITECTURES_D 0 20 21 )
endif()

if( CUDA_VERSION VERSION_GREATER "6.5" )
list( APPEND CUDA_ARCHITECTURES "5.3" )


if( CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER "6.5" )
list( APPEND CMAKE_CUDA_ARCHITECTURES_D 53 )
endif()

if( CUDA_VERSION VERSION_GREATER "7.5" )
list( APPEND CUDA_ARCHITECTURES "6.0" "6.1" )
if( CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER "7.5" )
list( APPEND CMAKE_CUDA_ARCHITECTURES_D 60 61 )
endif()

if( CUDA_VERSION VERSION_GREATER "8.0" )
list( APPEND CUDA_ARCHITECTURES "7.0" "7.2")
if( CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER "8.0" )
list( APPEND CMAKE_CUDA_ARCHITECTURES_D 70 72)
endif()

if( CUDA_VERSION VERSION_GREATER "9.2" )
list( APPEND CUDA_ARCHITECTURES "7.5" )
if( CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER "9.2" )
list( APPEND CMAKE_CUDA_ARCHITECTURES_D 75 )
endif()

if( CUDA_VERSION VERSION_GREATER "10.2" )
list( APPEND CUDA_ARCHITECTURES "8.0" )
if( CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER "10.2" )
list( APPEND CMAKE_CUDA_ARCHITECTURES_D 80 )
endif()

if( CUDA_VERSION VERSION_GREATER "11.0" )
list( APPEND CUDA_ARCHITECTURES "8.6" )
if( CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER "11.0" )
list( APPEND CMAKE_CUDA_ARCHITECTURES_D 86 )
endif()

string( APPEND CUDA_ARCHITECTURES "+PTX" )

endif()

message( STATUS "CUDA_ARCHITECTURES=${CUDA_ARCHITECTURES}" )
# string( APPEND CMAKE_CUDA_ARCHITECTURES "+PTX" )
set( CMAKE_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES_D}" CACHE STRING "List of architectures to compile, default is all")

CUDA_SELECT_NVCC_ARCH_FLAGS( code_generation_flags "${CUDA_ARCHITECTURES}" )
list( APPEND CUDA_NVCC_FLAGS ${code_generation_flags} )

message( STATUS "CUDA_NVCC_FLAGS=${code_generation_flags}" )

if( NOT WIN32 )
list( APPEND CUDA_NVCC_FLAGS --std=c++11)
endif()

# Gpufit

set( GpuHeaders
gpufit.h
constants.h
Expand Down Expand Up @@ -177,7 +103,7 @@ source_group("CUDA Source Files" FILES ${GpuCudaSources})
source_group("CUDA Model Files" FILES ${GpuCudaModels})
source_group("CUDA Estimator Files" FILES ${GpuCudaEstimators})

cuda_add_library( Gpufit SHARED
add_library( Gpufit SHARED
${GpuHeaders}
${GpuSources}
${GpuCudaHeaders}
Expand All @@ -192,38 +118,21 @@ set_target_properties( Gpufit
CXX_VISIBILITY_PRESET hidden
)

# USE_CUBLAS
if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND CUDA_VERSION VERSION_GREATER "6.5")
target_include_directories( Gpufit SYSTEM PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})

# USE_CUBLAS
if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER "6.5")
set( USE_CUBLAS ${DEFAULT_USE_CUBLAS} CACHE BOOL "ON | OFF")
if( USE_CUBLAS )
if ( WIN32 )
if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND CUDA_VERSION VERSION_GREATER_EQUAL "10")
set( CUBLAS_DLL "${CUDA_TOOLKIT_ROOT_DIR}/bin/cublas64_${CUDA_VERSION_MAJOR}.dll" )
else()
set( CUBLAS_DLL "${CUDA_TOOLKIT_ROOT_DIR}/bin/cublas64_${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}.dll" )
endif()
add_custom_command( TARGET Gpufit POST_BUILD
COMMAND ${CMAKE_COMMAND} -E
copy_if_different ${CUBLAS_DLL} $<TARGET_FILE_DIR:Gpufit> )
else()
find_cuda_helper_libs(cublas_static)
find_cuda_helper_libs(cublasLt_static)
find_cuda_helper_libs(culibos)

set( CUDA_CUBLAS_LIBRARIES
${CUDA_cublas_static_LIBRARY}
${CUDA_cublasLt_static_LIBRARY}
${CUDA_cudart_static_LIBRARY}
${CUDA_culibos_LIBRARY}
dl
pthread
rt )
endif()

add_definitions( -DUSE_CUBLAS )

target_link_libraries( Gpufit ${CUDA_CUBLAS_LIBRARIES} )
endif()
find_package(CUDAToolkit REQUIRED)
set( STATIC_CUBLAS ON CACHE BOOL "ON | OFF")
if ( STATIC_CUBLAS )
target_link_libraries( Gpufit CUDA::cublas_static CUDA::cublasLt_static)
else ()
target_link_libraries( Gpufit CUDA::cublas CUDA::cublasLt)
endif ()
add_definitions( -DUSE_CUBLAS )
endif()
elseif( CMAKE_SIZEOF_VOID_P EQUAL 4 )
message( STATUS "CUBLAS: 32-bit architecture detected; USE_CUBLAS flag ignored." )
elseif( CUDA_VERSION VERSION_LESS "7.0" )
Expand Down
1 change: 1 addition & 0 deletions Gpufit/info.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "info.h"
#include <algorithm>
#include <limits>

Info::Info() :
n_parameters_(0),
Expand Down
8 changes: 8 additions & 0 deletions Gpufit/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,11 @@ add_boost_test( Gpufit Gauss_Fit_2D_Rotated )
add_boost_test( Gpufit Cauchy_Fit_2D_Elliptic )
add_boost_test( Gpufit Fletcher_Powell_Helix_Fit )
add_boost_test( Gpufit Brown_Dennis_Fit )

if( UNIX )
# Copy over run tests script
file(COPY "${CMAKE_SOURCE_DIR}/Gpufit/tests/test_all.sh"
DESTINATION "${CMAKE_BINARY_DIR}"
FILE_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE
)
endif()
34 changes: 34 additions & 0 deletions Gpufit/tests/test_all.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/bash

printf "Gauss 2D Rotated\n"
./Gpufit_Test_Gauss_Fit_2D_Rotated

printf "\nGauss 2D Elliptic\n"
./Gpufit_Test_Gauss_Fit_2D_Elliptic

printf "\nGauss 2D\n"
./Gpufit_Test_Gauss_Fit_2D

printf "\nGauss 1D\n"
./Gpufit_Test_Gauss_Fit_1D

printf "\nLinear 1D\n"
./Gpufit_Test_Linear_Fit_1D

printf "\nFletcher Powell Helix\n"
./Gpufit_Test_Fletcher_Powell_Helix_Fit

printf "\nError Handling\n"
./Gpufit_Test_Error_Handling

printf "\nCauchy Fit 2D Elliptic\n"
./Gpufit_Test_Cauchy_Fit_2D_Elliptic

printf "\nBrown Dennis\n"
./Gpufit_Test_Brown_Dennis_Fit

printf "\nTest Consistency\n"
./Cpufit_Gpufit_Test_Consistency



2 changes: 1 addition & 1 deletion examples/c++/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ function( add_example module name )
endfunction()

function( add_cuda_example module name )
cuda_add_executable( ${name} ${name}.cu )
add_executable( ${name} ${name}.cu )
target_link_libraries( ${name} ${module} )
set_property( TARGET ${name}
PROPERTY RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}" )
Expand Down
10 changes: 10 additions & 0 deletions package/control
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Package: gpufit
Version: version_string
Section: base
Priority: optional
Architecture: amd64
Depends:
Maintainer: Samuel Barnes <[email protected]>
Description: GPU-accelerated Levenberg-Marquardt curve fitting toolkit, implemented in CUDA
Requires CUDA video card and driver
Python, Java, and Matlab bindings
Loading

0 comments on commit b4c5250

Please sign in to comment.