Skip to content

Commit

Permalink
Switch over to rapids-cmake patches for thrust (#11921)
Browse files Browse the repository at this point in the history
Now that rapids-cmake supports custom patches we can move cudf over to rapids-cmake for Thrust. This removes the need for custom install rules in cudf for Thrust, as rapids-cmake does that for us.
 
 This also separates out all Thrust patches so that we can better track upstream approval and remove as needed.

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - David Wendt (https://github.com/davidwendt)

URL: #11921
  • Loading branch information
robertmaynard authored Oct 24, 2022
1 parent 5c2150e commit 5a190b9
Show file tree
Hide file tree
Showing 8 changed files with 196 additions and 201 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ junit-cudf.xml
test-results

## Patching
*.diff
*.orig
*.rej

Expand Down
77 changes: 19 additions & 58 deletions cpp/cmake/thirdparty/get_thrust.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,73 +13,34 @@
# =============================================================================

# This function finds thrust and sets any additional necessary environment variables.
function(find_and_configure_thrust VERSION)
# We only want to set `UPDATE_DISCONNECTED` while the GIT tag hasn't moved from the last time we
# cloned
set(cpm_thrust_disconnect_update "UPDATE_DISCONNECTED TRUE")
set(CPM_THRUST_CURRENT_VERSION
${VERSION}
CACHE STRING "version of thrust we checked out"
)
if(NOT VERSION VERSION_EQUAL CPM_THRUST_CURRENT_VERSION)
set(CPM_THRUST_CURRENT_VERSION
${VERSION}
CACHE STRING "version of thrust we checked out" FORCE
)
set(cpm_thrust_disconnect_update "")
endif()
function(find_and_configure_thrust)

# We currently require cuDF to always build with a custom version of thrust. This is needed so
# that build times of of cudf are kept reasonable, without this CI builds of cudf will be killed
# as some source file can take over 45 minutes to build
#
set(CPM_DOWNLOAD_ALL TRUE)
rapids_cpm_find(
Thrust ${VERSION}
BUILD_EXPORT_SET cudf-exports
INSTALL_EXPORT_SET cudf-exports
CPM_ARGS
GIT_REPOSITORY https://github.com/NVIDIA/thrust.git
GIT_TAG ${VERSION}
GIT_SHALLOW TRUE ${cpm_thrust_disconnect_update}
PATCH_COMMAND patch --reject-file=- -p1 -N < ${CUDF_SOURCE_DIR}/cmake/thrust.patch || true
OPTIONS "THRUST_INSTALL TRUE"
)
include(${rapids-cmake-dir}/cpm/thrust.cmake)
include(${rapids-cmake-dir}/cpm/package_override.cmake)

if(NOT TARGET cudf::Thrust)
thrust_create_target(cudf::Thrust FROM_OPTIONS)
endif()
set(cudf_patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/patches")
rapids_cpm_package_override("${cudf_patch_dir}/thrust_override.json")

if(Thrust_SOURCE_DIR) # only install thrust when we have an in-source version
include(GNUInstallDirs)
install(
DIRECTORY "${Thrust_SOURCE_DIR}/thrust"
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/libcudf/Thrust/"
FILES_MATCHING
REGEX "\\.(h|inl)$"
)
install(
DIRECTORY "${Thrust_SOURCE_DIR}/dependencies/cub/cub"
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/libcudf/Thrust/dependencies/"
FILES_MATCHING
PATTERN "*.cuh"
)
# Make sure we install thrust into the `include/libcudf` subdirectory instead of the default
include(GNUInstallDirs)
set(CMAKE_INSTALL_INCLUDEDIR "${CMAKE_INSTALL_INCLUDEDIR}/libcudf")
set(CMAKE_INSTALL_LIBDIR "${CMAKE_INSTALL_INCLUDEDIR}/lib")

install(DIRECTORY "${Thrust_SOURCE_DIR}/thrust/cmake"
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/libcudf/Thrust/thrust/"
)
install(DIRECTORY "${Thrust_SOURCE_DIR}/dependencies/cub/cub/cmake"
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/libcudf/Thrust/dependencies/cub/"
)
# Find or install Thrust with our custom set of patches
rapids_cpm_thrust(
NAMESPACE cudf
BUILD_EXPORT_SET cudf-exports
INSTALL_EXPORT_SET cudf-exports
)

if(Thrust_SOURCE_DIR)
# Store where CMake can find our custom Thrust install
include("${rapids-cmake-dir}/export/find_package_root.cmake")
rapids_export_find_package_root(
INSTALL Thrust [=[${CMAKE_CURRENT_LIST_DIR}/../../../include/libcudf/Thrust/]=] cudf-exports
INSTALL Thrust [=[${CMAKE_CURRENT_LIST_DIR}/../../../include/libcudf/lib/cmake/thrust]=]
cudf-exports
)
endif()
endfunction()

set(CUDF_MIN_VERSION_Thrust 1.17.2)

find_and_configure_thrust(${CUDF_MIN_VERSION_Thrust})
find_and_configure_thrust()
29 changes: 29 additions & 0 deletions cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
diff --git a/thrust/system/cuda/detail/dispatch.h b/thrust/system/cuda/detail/dispatch.h
index d0e3f94..76774b0 100644
--- a/thrust/system/cuda/detail/dispatch.h
+++ b/thrust/system/cuda/detail/dispatch.h
@@ -32,9 +32,8 @@
status = call arguments; \
} \
else { \
- auto THRUST_PP_CAT2(count, _fixed) = static_cast<thrust::detail::int64_t>(count); \
- status = call arguments; \
- }
+ throw std::runtime_error("THRUST_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
+ }

/**
* Dispatch between 32-bit and 64-bit index based versions of the same algorithm
@@ -52,10 +51,8 @@
status = call arguments; \
} \
else { \
- auto THRUST_PP_CAT2(count1, _fixed) = static_cast<thrust::detail::int64_t>(count1); \
- auto THRUST_PP_CAT2(count2, _fixed) = static_cast<thrust::detail::int64_t>(count2); \
- status = call arguments; \
- }
+ throw std::runtime_error("THRUST_DOUBLE_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
+ }
/**
* Dispatch between 32-bit and 64-bit index based versions of the same algorithm
* implementation. This version allows using different token sequences for callables
39 changes: 39 additions & 0 deletions cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
diff --git a/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh b/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh
index b188c75f..3f36656f 100644
--- a/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh
+++ b/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh
@@ -736,7 +736,7 @@ struct DeviceRadixSortPolicy


/// SM60 (GP100)
- struct Policy600 : ChainedPolicy<600, Policy600, Policy500>
+ struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
{
enum {
PRIMARY_RADIX_BITS = (sizeof(KeyT) > 1) ? 7 : 5, // 6.9B 32b keys/s (Quadro P100)
diff --git a/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh b/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh
index e0470ccb..6a0c2ed6 100644
--- a/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh
+++ b/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh
@@ -280,7 +280,7 @@ struct DeviceReducePolicy
};

/// SM60
- struct Policy600 : ChainedPolicy<600, Policy600, Policy350>
+ struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
{
// ReducePolicy (P100: 591 GB/s @ 64M 4B items; 583 GB/s @ 256M 1B items)
typedef AgentReducePolicy<
diff --git a/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh b/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh
index c2d04588..ac2d10e0 100644
--- a/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh
+++ b/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh
@@ -177,7 +177,7 @@ struct DeviceScanPolicy
};

/// SM600
- struct Policy600 : ChainedPolicy<600, Policy600, Policy520>
+ struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
{
typedef AgentScanPolicy<
128, 15, ///< Threads per block, items per thread
49 changes: 49 additions & 0 deletions cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
diff --git a/dependencies/cub/cub/block/block_merge_sort.cuh b/dependencies/cub/cub/block/block_merge_sort.cuh
index 4769df36..d86d6342 100644
--- a/dependencies/cub/cub/block/block_merge_sort.cuh
+++ b/dependencies/cub/cub/block/block_merge_sort.cuh
@@ -91,7 +91,7 @@ __device__ __forceinline__ void SerialMerge(KeyT *keys_shared,
KeyT key1 = keys_shared[keys1_beg];
KeyT key2 = keys_shared[keys2_beg];

-#pragma unroll
+#pragma unroll 1
for (int item = 0; item < ITEMS_PER_THREAD; ++item)
{
bool p = (keys2_beg < keys2_end) &&
@@ -383,7 +383,7 @@ public:
//
KeyT max_key = oob_default;

- #pragma unroll
+ #pragma unroll 1
for (int item = 1; item < ITEMS_PER_THREAD; ++item)
{
if (ITEMS_PER_THREAD * linear_tid + item < valid_items)
@@ -407,7 +407,7 @@ public:
// each thread has sorted keys
// merge sort keys in shared memory
//
- #pragma unroll
+ #pragma unroll 1
for (int target_merged_threads_number = 2;
target_merged_threads_number <= NUM_THREADS;
target_merged_threads_number *= 2)
diff --git a/dependencies/cub/cub/thread/thread_sort.cuh b/dependencies/cub/cub/thread/thread_sort.cuh
index 5d486789..b42fb5f0 100644
--- a/dependencies/cub/cub/thread/thread_sort.cuh
+++ b/dependencies/cub/cub/thread/thread_sort.cuh
@@ -83,10 +83,10 @@ StableOddEvenSort(KeyT (&keys)[ITEMS_PER_THREAD],
{
constexpr bool KEYS_ONLY = std::is_same<ValueT, NullType>::value;

- #pragma unroll
+ #pragma unroll 1
for (int i = 0; i < ITEMS_PER_THREAD; ++i)
{
- #pragma unroll
+ #pragma unroll 1
for (int j = 1 & i; j < ITEMS_PER_THREAD - 1; j += 2)
{
if (compare_op(keys[j + 1], keys[j]))

34 changes: 34 additions & 0 deletions cpp/cmake/thirdparty/patches/thrust_override.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@

{
"packages" : {
"Thrust" : {
"patches" : [
{
"file" : "Thrust/install_rules.diff",
"issue" : "Thrust 1.X installs incorrect files [https://github.com/NVIDIA/thrust/issues/1790]",
"fixed_in" : "2.0.0"
},
{
"file" : "${current_json_dir}/thrust_transform_iter_with_reduce_by_key.diff",
"issue" : "Support transform_output_iterator as output of reduce by key [https://github.com/NVIDIA/thrust/pull/1805]",
"fixed_in" : "2.1"
},
{
"file" : "${current_json_dir}/thrust_disable_64bit_dispatching.diff",
"issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]",
"fixed_in" : ""
},
{
"file" : "${current_json_dir}/thrust_faster_sort_compile_times.diff",
"issue" : "Improve Thrust sort compile times by not unrolling loops for inlined comparators [https://github.com/rapidsai/cudf/pull/10577]",
"fixed_in" : ""
},
{
"file" : "${current_json_dir}/thrust_faster_scan_compile_times.diff",
"issue" : "Improve Thrust scan compile times by reducing the number of kernels generated [https://github.com/rapidsai/cudf/pull/8183]",
"fixed_in" : ""
}
]
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
diff --git a/thrust/iterator/transform_input_output_iterator.h b/thrust/iterator/transform_input_output_iterator.h
index f512a36..a5f725d 100644
--- a/thrust/iterator/transform_input_output_iterator.h
+++ b/thrust/iterator/transform_input_output_iterator.h
@@ -102,6 +102,8 @@ template <typename InputFunction, typename OutputFunction, typename Iterator>
/*! \endcond
*/

+ transform_input_output_iterator() = default;
+
/*! This constructor takes as argument a \c Iterator an \c InputFunction and an
* \c OutputFunction and copies them to a new \p transform_input_output_iterator
*
diff --git a/thrust/iterator/transform_output_iterator.h b/thrust/iterator/transform_output_iterator.h
index 66fb46a..4a68cb5 100644
--- a/thrust/iterator/transform_output_iterator.h
+++ b/thrust/iterator/transform_output_iterator.h
@@ -104,6 +104,8 @@ template <typename UnaryFunction, typename OutputIterator>
/*! \endcond
*/

+ transform_output_iterator() = default;
+
/*! This constructor takes as argument an \c OutputIterator and an \c
* UnaryFunction and copies them to a new \p transform_output_iterator
*
Loading

0 comments on commit 5a190b9

Please sign in to comment.