-
Notifications
You must be signed in to change notification settings - Fork 915
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Switch over to rapids-cmake patches for thrust (#11921)
Now that rapids-cmake supports custom patches we can move cudf over to rapids-cmake for Thrust. This removes the need for custom install rules in cudf for Thrust, as rapids-cmake does that for us. This also separates out all Thrust patches so that we can better track upstream approval and remove as needed. Authors: - Robert Maynard (https://github.com/robertmaynard) Approvers: - Bradley Dice (https://github.com/bdice) - David Wendt (https://github.com/davidwendt) URL: #11921
- Loading branch information
1 parent
5c2150e
commit 5a190b9
Showing
8 changed files
with
196 additions
and
201 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -70,7 +70,6 @@ junit-cudf.xml | |
test-results | ||
|
||
## Patching | ||
*.diff | ||
*.orig | ||
*.rej | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
29 changes: 29 additions & 0 deletions
29
cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
diff --git a/thrust/system/cuda/detail/dispatch.h b/thrust/system/cuda/detail/dispatch.h | ||
index d0e3f94..76774b0 100644 | ||
--- a/thrust/system/cuda/detail/dispatch.h | ||
+++ b/thrust/system/cuda/detail/dispatch.h | ||
@@ -32,9 +32,8 @@ | ||
status = call arguments; \ | ||
} \ | ||
else { \ | ||
- auto THRUST_PP_CAT2(count, _fixed) = static_cast<thrust::detail::int64_t>(count); \ | ||
- status = call arguments; \ | ||
- } | ||
+ throw std::runtime_error("THRUST_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \ | ||
+ } | ||
|
||
/** | ||
* Dispatch between 32-bit and 64-bit index based versions of the same algorithm | ||
@@ -52,10 +51,8 @@ | ||
status = call arguments; \ | ||
} \ | ||
else { \ | ||
- auto THRUST_PP_CAT2(count1, _fixed) = static_cast<thrust::detail::int64_t>(count1); \ | ||
- auto THRUST_PP_CAT2(count2, _fixed) = static_cast<thrust::detail::int64_t>(count2); \ | ||
- status = call arguments; \ | ||
- } | ||
+ throw std::runtime_error("THRUST_DOUBLE_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \ | ||
+ } | ||
/** | ||
* Dispatch between 32-bit and 64-bit index based versions of the same algorithm | ||
* implementation. This version allows using different token sequences for callables |
39 changes: 39 additions & 0 deletions
39
cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times.diff
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
diff --git a/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh b/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh | ||
index b188c75f..3f36656f 100644 | ||
--- a/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh | ||
+++ b/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh | ||
@@ -736,7 +736,7 @@ struct DeviceRadixSortPolicy | ||
|
||
|
||
/// SM60 (GP100) | ||
- struct Policy600 : ChainedPolicy<600, Policy600, Policy500> | ||
+ struct Policy600 : ChainedPolicy<600, Policy600, Policy600> | ||
{ | ||
enum { | ||
PRIMARY_RADIX_BITS = (sizeof(KeyT) > 1) ? 7 : 5, // 6.9B 32b keys/s (Quadro P100) | ||
diff --git a/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh b/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh | ||
index e0470ccb..6a0c2ed6 100644 | ||
--- a/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh | ||
+++ b/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh | ||
@@ -280,7 +280,7 @@ struct DeviceReducePolicy | ||
}; | ||
|
||
/// SM60 | ||
- struct Policy600 : ChainedPolicy<600, Policy600, Policy350> | ||
+ struct Policy600 : ChainedPolicy<600, Policy600, Policy600> | ||
{ | ||
// ReducePolicy (P100: 591 GB/s @ 64M 4B items; 583 GB/s @ 256M 1B items) | ||
typedef AgentReducePolicy< | ||
diff --git a/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh b/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh | ||
index c2d04588..ac2d10e0 100644 | ||
--- a/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh | ||
+++ b/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh | ||
@@ -177,7 +177,7 @@ struct DeviceScanPolicy | ||
}; | ||
|
||
/// SM600 | ||
- struct Policy600 : ChainedPolicy<600, Policy600, Policy520> | ||
+ struct Policy600 : ChainedPolicy<600, Policy600, Policy600> | ||
{ | ||
typedef AgentScanPolicy< | ||
128, 15, ///< Threads per block, items per thread |
49 changes: 49 additions & 0 deletions
49
cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
diff --git a/dependencies/cub/cub/block/block_merge_sort.cuh b/dependencies/cub/cub/block/block_merge_sort.cuh | ||
index 4769df36..d86d6342 100644 | ||
--- a/dependencies/cub/cub/block/block_merge_sort.cuh | ||
+++ b/dependencies/cub/cub/block/block_merge_sort.cuh | ||
@@ -91,7 +91,7 @@ __device__ __forceinline__ void SerialMerge(KeyT *keys_shared, | ||
KeyT key1 = keys_shared[keys1_beg]; | ||
KeyT key2 = keys_shared[keys2_beg]; | ||
|
||
-#pragma unroll | ||
+#pragma unroll 1 | ||
for (int item = 0; item < ITEMS_PER_THREAD; ++item) | ||
{ | ||
bool p = (keys2_beg < keys2_end) && | ||
@@ -383,7 +383,7 @@ public: | ||
// | ||
KeyT max_key = oob_default; | ||
|
||
- #pragma unroll | ||
+ #pragma unroll 1 | ||
for (int item = 1; item < ITEMS_PER_THREAD; ++item) | ||
{ | ||
if (ITEMS_PER_THREAD * linear_tid + item < valid_items) | ||
@@ -407,7 +407,7 @@ public: | ||
// each thread has sorted keys | ||
// merge sort keys in shared memory | ||
// | ||
- #pragma unroll | ||
+ #pragma unroll 1 | ||
for (int target_merged_threads_number = 2; | ||
target_merged_threads_number <= NUM_THREADS; | ||
target_merged_threads_number *= 2) | ||
diff --git a/dependencies/cub/cub/thread/thread_sort.cuh b/dependencies/cub/cub/thread/thread_sort.cuh | ||
index 5d486789..b42fb5f0 100644 | ||
--- a/dependencies/cub/cub/thread/thread_sort.cuh | ||
+++ b/dependencies/cub/cub/thread/thread_sort.cuh | ||
@@ -83,10 +83,10 @@ StableOddEvenSort(KeyT (&keys)[ITEMS_PER_THREAD], | ||
{ | ||
constexpr bool KEYS_ONLY = std::is_same<ValueT, NullType>::value; | ||
|
||
- #pragma unroll | ||
+ #pragma unroll 1 | ||
for (int i = 0; i < ITEMS_PER_THREAD; ++i) | ||
{ | ||
- #pragma unroll | ||
+ #pragma unroll 1 | ||
for (int j = 1 & i; j < ITEMS_PER_THREAD - 1; j += 2) | ||
{ | ||
if (compare_op(keys[j + 1], keys[j])) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
|
||
{ | ||
"packages" : { | ||
"Thrust" : { | ||
"patches" : [ | ||
{ | ||
"file" : "Thrust/install_rules.diff", | ||
"issue" : "Thrust 1.X installs incorrect files [https://github.com/NVIDIA/thrust/issues/1790]", | ||
"fixed_in" : "2.0.0" | ||
}, | ||
{ | ||
"file" : "${current_json_dir}/thrust_transform_iter_with_reduce_by_key.diff", | ||
"issue" : "Support transform_output_iterator as output of reduce by key [https://github.com/NVIDIA/thrust/pull/1805]", | ||
"fixed_in" : "2.1" | ||
}, | ||
{ | ||
"file" : "${current_json_dir}/thrust_disable_64bit_dispatching.diff", | ||
"issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]", | ||
"fixed_in" : "" | ||
}, | ||
{ | ||
"file" : "${current_json_dir}/thrust_faster_sort_compile_times.diff", | ||
"issue" : "Improve Thrust sort compile times by not unrolling loops for inlined comparators [https://github.com/rapidsai/cudf/pull/10577]", | ||
"fixed_in" : "" | ||
}, | ||
{ | ||
"file" : "${current_json_dir}/thrust_faster_scan_compile_times.diff", | ||
"issue" : "Improve Thrust scan compile times by reducing the number of kernels generated [https://github.com/rapidsai/cudf/pull/8183]", | ||
"fixed_in" : "" | ||
} | ||
] | ||
} | ||
} | ||
} |
26 changes: 26 additions & 0 deletions
26
cpp/cmake/thirdparty/patches/thrust_transform_iter_with_reduce_by_key.diff
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
diff --git a/thrust/iterator/transform_input_output_iterator.h b/thrust/iterator/transform_input_output_iterator.h | ||
index f512a36..a5f725d 100644 | ||
--- a/thrust/iterator/transform_input_output_iterator.h | ||
+++ b/thrust/iterator/transform_input_output_iterator.h | ||
@@ -102,6 +102,8 @@ template <typename InputFunction, typename OutputFunction, typename Iterator> | ||
/*! \endcond | ||
*/ | ||
|
||
+ transform_input_output_iterator() = default; | ||
+ | ||
/*! This constructor takes as argument a \c Iterator an \c InputFunction and an | ||
* \c OutputFunction and copies them to a new \p transform_input_output_iterator | ||
* | ||
diff --git a/thrust/iterator/transform_output_iterator.h b/thrust/iterator/transform_output_iterator.h | ||
index 66fb46a..4a68cb5 100644 | ||
--- a/thrust/iterator/transform_output_iterator.h | ||
+++ b/thrust/iterator/transform_output_iterator.h | ||
@@ -104,6 +104,8 @@ template <typename UnaryFunction, typename OutputIterator> | ||
/*! \endcond | ||
*/ | ||
|
||
+ transform_output_iterator() = default; | ||
+ | ||
/*! This constructor takes as argument an \c OutputIterator and an \c | ||
* UnaryFunction and copies them to a new \p transform_output_iterator | ||
* |
Oops, something went wrong.