Skip to content

Commit

Permalink
Use CCCL 2.2.0.
Browse files Browse the repository at this point in the history
  • Loading branch information
bdice committed Dec 14, 2023
1 parent cee6429 commit 74979ea
Show file tree
Hide file tree
Showing 9 changed files with 66 additions and 175 deletions.
14 changes: 6 additions & 8 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -183,17 +183,15 @@ rapids_cpm_init()
include(cmake/thirdparty/get_jitify.cmake)
# find nvCOMP
include(cmake/thirdparty/get_nvcomp.cmake)
# find thrust/cub
include(cmake/thirdparty/get_thrust.cmake)
# find CCCL
include(cmake/thirdparty/get_cccl.cmake)
# find rmm
include(cmake/thirdparty/get_rmm.cmake)
# find arrow
include(cmake/thirdparty/get_arrow.cmake)
# find dlpack
include(cmake/thirdparty/get_dlpack.cmake)
# find libcu++
include(cmake/thirdparty/get_libcudacxx.cmake)
# find cuCollections Should come after including thrust and libcudacxx
# find cuCollections, should come after including CCCL
include(cmake/thirdparty/get_cucollections.cmake)
# find or install GoogleTest
if(CUDF_BUILD_TESTUTIL)
Expand Down Expand Up @@ -758,7 +756,7 @@ add_dependencies(cudf jitify_preprocess_run)
# Specify the target module library dependencies
target_link_libraries(
cudf
PUBLIC ${ARROW_LIBRARIES} libcudacxx::libcudacxx cudf::Thrust rmm::rmm
PUBLIC ${ARROW_LIBRARIES} libcudacxx::libcudacxx CCCL::Thrust rmm::rmm
PRIVATE cuco::cuco ZLIB::ZLIB nvcomp::nvcomp kvikio::kvikio
$<TARGET_NAME_IF_EXISTS:cuFile_interface>
)
Expand Down Expand Up @@ -1011,8 +1009,8 @@ following IMPORTED GLOBAL targets:

set(common_code_string
[=[
if(NOT TARGET cudf::Thrust)
thrust_create_target(cudf::Thrust FROM_OPTIONS)
if(NOT TARGET CCCL::Thrust)
thrust_create_target(CCCL::Thrust FROM_OPTIONS)
endif()
]=]
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# =============================================================================
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
Expand All @@ -12,34 +12,30 @@
# the License.
# =============================================================================

# This function finds thrust and sets any additional necessary environment variables.
function(find_and_configure_thrust)
# This function finds cccl and sets any additional necessary environment variables.
function(find_and_configure_cccl)

include(${rapids-cmake-dir}/cpm/thrust.cmake)
include(${rapids-cmake-dir}/cpm/cccl.cmake)
include(${rapids-cmake-dir}/cpm/package_override.cmake)

set(cudf_patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/patches")
rapids_cpm_package_override("${cudf_patch_dir}/thrust_override.json")
rapids_cpm_package_override("${cudf_patch_dir}/cccl_override.json")

# Make sure we install thrust into the `include/libcudf` subdirectory instead of the default
# Make sure we install cccl into the `include/libcudf` subdirectory instead of the default
include(GNUInstallDirs)
set(CMAKE_INSTALL_INCLUDEDIR "${CMAKE_INSTALL_INCLUDEDIR}/libcudf")
set(CMAKE_INSTALL_LIBDIR "${CMAKE_INSTALL_INCLUDEDIR}/lib")

# Find or install Thrust with our custom set of patches
rapids_cpm_thrust(
NAMESPACE cudf
BUILD_EXPORT_SET cudf-exports
INSTALL_EXPORT_SET cudf-exports
)
# Find or install CCCL with our custom set of patches
rapids_cpm_cccl(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports)

# Store where CMake can find our custom Thrust install
# Store where CMake can find our custom CCCL install
include("${rapids-cmake-dir}/export/find_package_root.cmake")
rapids_export_find_package_root(
INSTALL Thrust [=[${CMAKE_CURRENT_LIST_DIR}/../../../include/libcudf/lib/rapids/cmake/thrust]=]
INSTALL CCCL [=[${CMAKE_CURRENT_LIST_DIR}/../../../include/libcudf/lib/rapids/cmake/cccl]=]
EXPORT_SET cudf-exports
CONDITION Thrust_SOURCE_DIR
CONDITION CCCL_SOURCE_DIR
)
endfunction()

find_and_configure_thrust()
find_and_configure_cccl()
35 changes: 0 additions & 35 deletions cpp/cmake/thirdparty/get_libcudacxx.cmake

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,18 +1,8 @@

{
"packages" : {
"Thrust" : {
"CCCL" : {
"patches" : [
{
"file" : "Thrust/install_rules.diff",
"issue" : "Thrust 1.X installs incorrect files [https://github.com/NVIDIA/thrust/issues/1790]",
"fixed_in" : "2.0.0"
},
{
"file" : "${current_json_dir}/thrust_transform_iter_with_reduce_by_key.diff",
"issue" : "Support transform_output_iterator as output of reduce by key [https://github.com/NVIDIA/thrust/pull/1805]",
"fixed_in" : "2.1"
},
{
"file" : "${current_json_dir}/thrust_disable_64bit_dispatching.diff",
"issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]",
Expand All @@ -27,11 +17,6 @@
"file" : "${current_json_dir}/thrust_faster_scan_compile_times.diff",
"issue" : "Improve Thrust scan compile times by reducing the number of kernels generated [https://github.com/rapidsai/cudf/pull/8183]",
"fixed_in" : ""
},
{
"file" : "${current_json_dir}/cub_segmented_sort_with_bool_key.diff",
"issue" : "Fix an error in CUB DeviceSegmentedSort when the keys are bool type [https://github.com/NVIDIA/cub/issues/594]",
"fixed_in" : "2.1"
}
]
}
Expand Down
14 changes: 0 additions & 14 deletions cpp/cmake/thirdparty/patches/cub_segmented_sort_with_bool_key.diff

This file was deleted.

24 changes: 10 additions & 14 deletions cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff
Original file line number Diff line number Diff line change
@@ -1,29 +1,25 @@
diff --git a/thrust/system/cuda/detail/dispatch.h b/thrust/system/cuda/detail/dispatch.h
index d0e3f94..76774b0 100644
--- a/thrust/system/cuda/detail/dispatch.h
+++ b/thrust/system/cuda/detail/dispatch.h
@@ -32,9 +32,8 @@
diff --git a/thrust/thrust/system/cuda/detail/dispatch.h b/thrust/thrust/system/cuda/detail/dispatch.h
index d0e3f94ec..5c32a9c60 100644
--- a/thrust/thrust/system/cuda/detail/dispatch.h
+++ b/thrust/thrust/system/cuda/detail/dispatch.h
@@ -32,8 +32,7 @@
status = call arguments; \
} \
else { \
- auto THRUST_PP_CAT2(count, _fixed) = static_cast<thrust::detail::int64_t>(count); \
- status = call arguments; \
- }
+ throw std::runtime_error("THRUST_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
+ }
+ throw std::runtime_error("THRUST_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
}

/**
* Dispatch between 32-bit and 64-bit index based versions of the same algorithm
@@ -52,10 +51,8 @@
@@ -52,9 +51,7 @@
status = call arguments; \
} \
else { \
- auto THRUST_PP_CAT2(count1, _fixed) = static_cast<thrust::detail::int64_t>(count1); \
- auto THRUST_PP_CAT2(count2, _fixed) = static_cast<thrust::detail::int64_t>(count2); \
- status = call arguments; \
- }
+ throw std::runtime_error("THRUST_DOUBLE_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
+ }
+ throw std::runtime_error("THRUST_DOUBLE_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
}
/**
* Dispatch between 32-bit and 64-bit index based versions of the same algorithm
* implementation. This version allows using different token sequences for callables
58 changes: 29 additions & 29 deletions cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times.diff
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
diff --git a/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh b/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh
index b188c75f..3f36656f 100644
--- a/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh
+++ b/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh
@@ -736,7 +736,7 @@ struct DeviceRadixSortPolicy
diff --git a/cub/cub/device/dispatch/dispatch_radix_sort.cuh b/cub/cub/device/dispatch/dispatch_radix_sort.cuh
index 84b6ccffd..25a237f93 100644
--- a/cub/cub/device/dispatch/dispatch_radix_sort.cuh
+++ b/cub/cub/device/dispatch/dispatch_radix_sort.cuh
@@ -808,7 +808,7 @@ struct DeviceRadixSortPolicy


/// SM60 (GP100)
Expand All @@ -11,29 +11,29 @@ index b188c75f..3f36656f 100644
{
enum {
PRIMARY_RADIX_BITS = (sizeof(KeyT) > 1) ? 7 : 5, // 6.9B 32b keys/s (Quadro P100)
diff --git a/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh b/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh
index e0470ccb..6a0c2ed6 100644
--- a/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh
+++ b/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh
@@ -280,7 +280,7 @@ struct DeviceReducePolicy
};
diff --git a/cub/cub/device/dispatch/dispatch_reduce.cuh b/cub/cub/device/dispatch/dispatch_reduce.cuh
index 994adc095..d3e6719a7 100644
--- a/cub/cub/device/dispatch/dispatch_reduce.cuh
+++ b/cub/cub/device/dispatch/dispatch_reduce.cuh
@@ -479,7 +479,7 @@ struct DeviceReducePolicy
};

/// SM60
- struct Policy600 : ChainedPolicy<600, Policy600, Policy350>
+ struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
{
// ReducePolicy (P100: 591 GB/s @ 64M 4B items; 583 GB/s @ 256M 1B items)
typedef AgentReducePolicy<
diff --git a/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh b/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh
index c2d04588..ac2d10e0 100644
--- a/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh
+++ b/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh
@@ -177,7 +177,7 @@ struct DeviceScanPolicy
};
/// SM60
- struct Policy600 : ChainedPolicy<600, Policy600, Policy350>
+ struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
{
static constexpr int threads_per_block = 256;
static constexpr int items_per_thread = 16;
diff --git a/cub/cub/device/dispatch/tuning/tuning_scan.cuh b/cub/cub/device/dispatch/tuning/tuning_scan.cuh
index 0ea5c41ad..1bcd8a111 100644
--- a/cub/cub/device/dispatch/tuning/tuning_scan.cuh
+++ b/cub/cub/device/dispatch/tuning/tuning_scan.cuh
@@ -303,7 +303,7 @@ struct DeviceScanPolicy
/// SM600
struct Policy600
: DefaultTuning
- , ChainedPolicy<600, Policy600, Policy520>
+ , ChainedPolicy<600, Policy600, Policy600>
{};

/// SM600
- struct Policy600 : ChainedPolicy<600, Policy600, Policy520>
+ struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
{
typedef AgentScanPolicy<
128, 15, ///< Threads per block, items per thread
/// SM800
25 changes: 8 additions & 17 deletions cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
diff --git a/dependencies/cub/cub/block/block_merge_sort.cuh b/dependencies/cub/cub/block/block_merge_sort.cuh
index 4769df36..d86d6342 100644
--- a/dependencies/cub/cub/block/block_merge_sort.cuh
+++ b/dependencies/cub/cub/block/block_merge_sort.cuh
diff --git a/cub/cub/block/block_merge_sort.cuh b/cub/cub/block/block_merge_sort.cuh
index dc07ef6c2..a066c14da 100644
--- a/cub/cub/block/block_merge_sort.cuh
+++ b/cub/cub/block/block_merge_sort.cuh
@@ -91,7 +91,7 @@ __device__ __forceinline__ void SerialMerge(KeyT *keys_shared,
KeyT key1 = keys_shared[keys1_beg];
KeyT key2 = keys_shared[keys2_beg];
Expand All @@ -20,19 +20,10 @@ index 4769df36..d86d6342 100644
for (int item = 1; item < ITEMS_PER_THREAD; ++item)
{
if (ITEMS_PER_THREAD * linear_tid + item < valid_items)
@@ -407,7 +407,7 @@ public:
// each thread has sorted keys
// merge sort keys in shared memory
//
- #pragma unroll
+ #pragma unroll 1
for (int target_merged_threads_number = 2;
target_merged_threads_number <= NUM_THREADS;
target_merged_threads_number *= 2)
diff --git a/dependencies/cub/cub/thread/thread_sort.cuh b/dependencies/cub/cub/thread/thread_sort.cuh
index 5d486789..b42fb5f0 100644
--- a/dependencies/cub/cub/thread/thread_sort.cuh
+++ b/dependencies/cub/cub/thread/thread_sort.cuh
diff --git a/cub/cub/thread/thread_sort.cuh b/cub/cub/thread/thread_sort.cuh
index 5d4867896..b42fb5f00 100644
--- a/cub/cub/thread/thread_sort.cuh
+++ b/cub/cub/thread/thread_sort.cuh
@@ -83,10 +83,10 @@ StableOddEvenSort(KeyT (&keys)[ITEMS_PER_THREAD],
{
constexpr bool KEYS_ONLY = std::is_same<ValueT, NullType>::value;
Expand Down

This file was deleted.

0 comments on commit 74979ea

Please sign in to comment.