Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch over to rapids-cmake patches for thrust #11921

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ junit-cudf.xml
test-results

## Patching
*.diff
*.orig
*.rej

Expand Down
77 changes: 19 additions & 58 deletions cpp/cmake/thirdparty/get_thrust.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,73 +13,34 @@
# =============================================================================

# This function finds thrust and sets any additional necessary environment variables.
function(find_and_configure_thrust VERSION)
# We only want to set `UPDATE_DISCONNECTED` while the GIT tag hasn't moved from the last time we
# cloned
set(cpm_thrust_disconnect_update "UPDATE_DISCONNECTED TRUE")
set(CPM_THRUST_CURRENT_VERSION
${VERSION}
CACHE STRING "version of thrust we checked out"
)
if(NOT VERSION VERSION_EQUAL CPM_THRUST_CURRENT_VERSION)
set(CPM_THRUST_CURRENT_VERSION
${VERSION}
CACHE STRING "version of thrust we checked out" FORCE
)
set(cpm_thrust_disconnect_update "")
endif()
function(find_and_configure_thrust)

# We currently require cuDF to always build with a custom version of thrust. This is needed so
# that build times of of cudf are kept reasonable, without this CI builds of cudf will be killed
# as some source file can take over 45 minutes to build
#
set(CPM_DOWNLOAD_ALL TRUE)
rapids_cpm_find(
Thrust ${VERSION}
BUILD_EXPORT_SET cudf-exports
INSTALL_EXPORT_SET cudf-exports
CPM_ARGS
GIT_REPOSITORY https://github.com/NVIDIA/thrust.git
GIT_TAG ${VERSION}
GIT_SHALLOW TRUE ${cpm_thrust_disconnect_update}
PATCH_COMMAND patch --reject-file=- -p1 -N < ${CUDF_SOURCE_DIR}/cmake/thrust.patch || true
OPTIONS "THRUST_INSTALL TRUE"
)
include(${rapids-cmake-dir}/cpm/thrust.cmake)
include(${rapids-cmake-dir}/cpm/package_override.cmake)

if(NOT TARGET cudf::Thrust)
thrust_create_target(cudf::Thrust FROM_OPTIONS)
endif()
set(cudf_patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/patches")
rapids_cpm_package_override("${cudf_patch_dir}/thrust_override.json")

if(Thrust_SOURCE_DIR) # only install thrust when we have an in-source version
include(GNUInstallDirs)
install(
DIRECTORY "${Thrust_SOURCE_DIR}/thrust"
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/libcudf/Thrust/"
FILES_MATCHING
REGEX "\\.(h|inl)$"
)
install(
DIRECTORY "${Thrust_SOURCE_DIR}/dependencies/cub/cub"
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/libcudf/Thrust/dependencies/"
FILES_MATCHING
PATTERN "*.cuh"
)
# Make sure we install thrust into the `include/libcudf` subdirectory instead of the default
include(GNUInstallDirs)
set(CMAKE_INSTALL_INCLUDEDIR "${CMAKE_INSTALL_INCLUDEDIR}/libcudf")
set(CMAKE_INSTALL_LIBDIR "${CMAKE_INSTALL_INCLUDEDIR}/lib")

install(DIRECTORY "${Thrust_SOURCE_DIR}/thrust/cmake"
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/libcudf/Thrust/thrust/"
)
install(DIRECTORY "${Thrust_SOURCE_DIR}/dependencies/cub/cub/cmake"
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/libcudf/Thrust/dependencies/cub/"
)
# Find or install Thrust with our custom set of patches
rapids_cpm_thrust(
NAMESPACE cudf
BUILD_EXPORT_SET cudf-exports
INSTALL_EXPORT_SET cudf-exports
)

if(Thrust_SOURCE_DIR)
# Store where CMake can find our custom Thrust install
include("${rapids-cmake-dir}/export/find_package_root.cmake")
rapids_export_find_package_root(
INSTALL Thrust [=[${CMAKE_CURRENT_LIST_DIR}/../../../include/libcudf/Thrust/]=] cudf-exports
INSTALL Thrust [=[${CMAKE_CURRENT_LIST_DIR}/../../../include/libcudf/lib/cmake/thrust]=]
cudf-exports
)
endif()
endfunction()

set(CUDF_MIN_VERSION_Thrust 1.17.2)

find_and_configure_thrust(${CUDF_MIN_VERSION_Thrust})
find_and_configure_thrust()
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
diff --git a/thrust/system/cuda/detail/dispatch.h b/thrust/system/cuda/detail/dispatch.h
index d0e3f94..76774b0 100644
--- a/thrust/system/cuda/detail/dispatch.h
+++ b/thrust/system/cuda/detail/dispatch.h
@@ -32,9 +32,8 @@
status = call arguments; \
} \
else { \
- auto THRUST_PP_CAT2(count, _fixed) = static_cast<thrust::detail::int64_t>(count); \
- status = call arguments; \
- }
+ throw std::runtime_error("THRUST_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
+ }

/**
* Dispatch between 32-bit and 64-bit index based versions of the same algorithm
@@ -52,10 +51,8 @@
status = call arguments; \
} \
else { \
- auto THRUST_PP_CAT2(count1, _fixed) = static_cast<thrust::detail::int64_t>(count1); \
- auto THRUST_PP_CAT2(count2, _fixed) = static_cast<thrust::detail::int64_t>(count2); \
- status = call arguments; \
- }
+ throw std::runtime_error("THRUST_DOUBLE_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
+ }
/**
* Dispatch between 32-bit and 64-bit index based versions of the same algorithm
* implementation. This version allows using different token sequences for callables
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
diff --git a/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh b/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh
index b188c75f..3f36656f 100644
--- a/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh
+++ b/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh
@@ -736,7 +736,7 @@ struct DeviceRadixSortPolicy


/// SM60 (GP100)
- struct Policy600 : ChainedPolicy<600, Policy600, Policy500>
+ struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
{
enum {
PRIMARY_RADIX_BITS = (sizeof(KeyT) > 1) ? 7 : 5, // 6.9B 32b keys/s (Quadro P100)
diff --git a/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh b/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh
index e0470ccb..6a0c2ed6 100644
--- a/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh
+++ b/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh
@@ -280,7 +280,7 @@ struct DeviceReducePolicy
};

/// SM60
- struct Policy600 : ChainedPolicy<600, Policy600, Policy350>
+ struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
{
// ReducePolicy (P100: 591 GB/s @ 64M 4B items; 583 GB/s @ 256M 1B items)
typedef AgentReducePolicy<
diff --git a/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh b/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh
index c2d04588..ac2d10e0 100644
--- a/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh
+++ b/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh
@@ -177,7 +177,7 @@ struct DeviceScanPolicy
};

/// SM600
- struct Policy600 : ChainedPolicy<600, Policy600, Policy520>
+ struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
{
typedef AgentScanPolicy<
128, 15, ///< Threads per block, items per thread
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
diff --git a/dependencies/cub/cub/block/block_merge_sort.cuh b/dependencies/cub/cub/block/block_merge_sort.cuh
index 4769df36..d86d6342 100644
--- a/dependencies/cub/cub/block/block_merge_sort.cuh
+++ b/dependencies/cub/cub/block/block_merge_sort.cuh
@@ -91,7 +91,7 @@ __device__ __forceinline__ void SerialMerge(KeyT *keys_shared,
KeyT key1 = keys_shared[keys1_beg];
KeyT key2 = keys_shared[keys2_beg];

-#pragma unroll
+#pragma unroll 1
for (int item = 0; item < ITEMS_PER_THREAD; ++item)
{
bool p = (keys2_beg < keys2_end) &&
@@ -383,7 +383,7 @@ public:
//
KeyT max_key = oob_default;

- #pragma unroll
+ #pragma unroll 1
for (int item = 1; item < ITEMS_PER_THREAD; ++item)
{
if (ITEMS_PER_THREAD * linear_tid + item < valid_items)
@@ -407,7 +407,7 @@ public:
// each thread has sorted keys
// merge sort keys in shared memory
//
- #pragma unroll
+ #pragma unroll 1
for (int target_merged_threads_number = 2;
target_merged_threads_number <= NUM_THREADS;
target_merged_threads_number *= 2)
diff --git a/dependencies/cub/cub/thread/thread_sort.cuh b/dependencies/cub/cub/thread/thread_sort.cuh
index 5d486789..b42fb5f0 100644
--- a/dependencies/cub/cub/thread/thread_sort.cuh
+++ b/dependencies/cub/cub/thread/thread_sort.cuh
@@ -83,10 +83,10 @@ StableOddEvenSort(KeyT (&keys)[ITEMS_PER_THREAD],
{
constexpr bool KEYS_ONLY = std::is_same<ValueT, NullType>::value;

- #pragma unroll
+ #pragma unroll 1
for (int i = 0; i < ITEMS_PER_THREAD; ++i)
{
- #pragma unroll
+ #pragma unroll 1
for (int j = 1 & i; j < ITEMS_PER_THREAD - 1; j += 2)
{
if (compare_op(keys[j + 1], keys[j]))

34 changes: 34 additions & 0 deletions cpp/cmake/thirdparty/patches/thrust_override.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@

{
"packages" : {
"Thrust" : {
"patches" : [
{
"file" : "Thrust/install_rules.diff",
"issue" : "Thrust 1.X installs incorrect files [https://github.com/NVIDIA/thrust/issues/1790]",
"fixed_in" : "2.0.0"
},
{
"file" : "${current_json_dir}/thrust_transform_iter_with_reduce_by_key.diff",
"issue" : "Support transform_output_iterator as output of reduce by key [https://github.com/NVIDIA/thrust/pull/1805]",
"fixed_in" : "2.1"
},
{
"file" : "${current_json_dir}/thrust_disable_64bit_dispatching.diff",
"issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]",
"fixed_in" : ""
},
{
"file" : "${current_json_dir}/thrust_faster_sort_compile_times.diff",
"issue" : "Improve Thrust sort compile times by not unrolling loops for inlined comparators [https://github.com/rapidsai/cudf/pull/10577]",
"fixed_in" : ""
},
{
"file" : "${current_json_dir}/thrust_faster_scan_compile_times.diff",
"issue" : "Improve Thrust scan compile times by reducing the number of kernels generated [https://github.com/rapidsai/cudf/pull/8183]",
"fixed_in" : ""
}
]
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
diff --git a/thrust/iterator/transform_input_output_iterator.h b/thrust/iterator/transform_input_output_iterator.h
index f512a36..a5f725d 100644
--- a/thrust/iterator/transform_input_output_iterator.h
+++ b/thrust/iterator/transform_input_output_iterator.h
@@ -102,6 +102,8 @@ template <typename InputFunction, typename OutputFunction, typename Iterator>
/*! \endcond
*/

+ transform_input_output_iterator() = default;
+
/*! This constructor takes as argument a \c Iterator an \c InputFunction and an
* \c OutputFunction and copies them to a new \p transform_input_output_iterator
*
diff --git a/thrust/iterator/transform_output_iterator.h b/thrust/iterator/transform_output_iterator.h
index 66fb46a..4a68cb5 100644
--- a/thrust/iterator/transform_output_iterator.h
+++ b/thrust/iterator/transform_output_iterator.h
@@ -104,6 +104,8 @@ template <typename UnaryFunction, typename OutputIterator>
/*! \endcond
*/

+ transform_output_iterator() = default;
+
/*! This constructor takes as argument an \c OutputIterator and an \c
* UnaryFunction and copies them to a new \p transform_output_iterator
*
Loading