From a989230124f672212199f643ac027f980b84353d Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Sun, 10 Dec 2023 21:59:41 -0600 Subject: [PATCH 01/10] Use CCCL 2.2.0. --- cpp/CMakeLists.txt | 14 ++--- .../{get_thrust.cmake => get_cccl.cmake} | 25 ++++---- cpp/cmake/thirdparty/get_libcudacxx.cmake | 35 ----------- ...hrust_override.json => cccl_override.json} | 17 +----- .../cub_segmented_sort_with_bool_key.diff | 14 ----- .../thrust_disable_64bit_dispatching.diff | 24 ++++---- .../thrust_faster_scan_compile_times.diff | 58 +++++++++---------- .../thrust_faster_sort_compile_times.diff | 25 +++----- ...ust_transform_iter_with_reduce_by_key.diff | 26 --------- 9 files changed, 66 insertions(+), 172 deletions(-) rename cpp/cmake/thirdparty/{get_thrust.cmake => get_cccl.cmake} (63%) delete mode 100644 cpp/cmake/thirdparty/get_libcudacxx.cmake rename cpp/cmake/thirdparty/patches/{thrust_override.json => cccl_override.json} (54%) delete mode 100644 cpp/cmake/thirdparty/patches/cub_segmented_sort_with_bool_key.diff delete mode 100644 cpp/cmake/thirdparty/patches/thrust_transform_iter_with_reduce_by_key.diff diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index dbd67a1e6ac..aa28a65db49 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -183,17 +183,15 @@ rapids_cpm_init() include(cmake/thirdparty/get_jitify.cmake) # find nvCOMP include(cmake/thirdparty/get_nvcomp.cmake) -# find thrust/cub -include(cmake/thirdparty/get_thrust.cmake) +# find CCCL +include(cmake/thirdparty/get_cccl.cmake) # find rmm include(cmake/thirdparty/get_rmm.cmake) # find arrow include(cmake/thirdparty/get_arrow.cmake) # find dlpack include(cmake/thirdparty/get_dlpack.cmake) -# find libcu++ -include(cmake/thirdparty/get_libcudacxx.cmake) -# find cuCollections Should come after including thrust and libcudacxx +# find cuCollections, should come after including CCCL include(cmake/thirdparty/get_cucollections.cmake) # find or install GoogleTest if(CUDF_BUILD_TESTUTIL) @@ -758,7 +756,7 @@ add_dependencies(cudf jitify_preprocess_run) # Specify the target module library dependencies target_link_libraries( cudf - PUBLIC ${ARROW_LIBRARIES} libcudacxx::libcudacxx cudf::Thrust rmm::rmm + PUBLIC ${ARROW_LIBRARIES} libcudacxx::libcudacxx CCCL::Thrust rmm::rmm PRIVATE cuco::cuco ZLIB::ZLIB nvcomp::nvcomp kvikio::kvikio $ ) @@ -1011,8 +1009,8 @@ following IMPORTED GLOBAL targets: set(common_code_string [=[ -if(NOT TARGET cudf::Thrust) - thrust_create_target(cudf::Thrust FROM_OPTIONS) +if(NOT TARGET CCCL::Thrust) + thrust_create_target(CCCL::Thrust FROM_OPTIONS) endif() ]=] ) diff --git a/cpp/cmake/thirdparty/get_thrust.cmake b/cpp/cmake/thirdparty/get_cccl.cmake similarity index 63% rename from cpp/cmake/thirdparty/get_thrust.cmake rename to cpp/cmake/thirdparty/get_cccl.cmake index 67ed4287d7b..5d004edc49d 100644 --- a/cpp/cmake/thirdparty/get_thrust.cmake +++ b/cpp/cmake/thirdparty/get_cccl.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -12,34 +12,33 @@ # the License. # ============================================================================= -# This function finds thrust and sets any additional necessary environment variables. -function(find_and_configure_thrust) +# This function finds cccl and sets any additional necessary environment variables. +function(find_and_configure_cccl) - include(${rapids-cmake-dir}/cpm/thrust.cmake) + include(${rapids-cmake-dir}/cpm/cccl.cmake) include(${rapids-cmake-dir}/cpm/package_override.cmake) set(cudf_patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/patches") - rapids_cpm_package_override("${cudf_patch_dir}/thrust_override.json") + rapids_cpm_package_override("${cudf_patch_dir}/cccl_override.json") - # Make sure we install thrust into the `include/libcudf` subdirectory instead of the default + # Make sure we install cccl into the `include/libcudf` subdirectory instead of the default include(GNUInstallDirs) set(CMAKE_INSTALL_INCLUDEDIR "${CMAKE_INSTALL_INCLUDEDIR}/libcudf") set(CMAKE_INSTALL_LIBDIR "${CMAKE_INSTALL_INCLUDEDIR}/lib") - # Find or install Thrust with our custom set of patches - rapids_cpm_thrust( - NAMESPACE cudf + # Find or install CCCL with our custom set of patches + rapids_cpm_cccl( BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports ) - # Store where CMake can find our custom Thrust install + # Store where CMake can find our custom CCCL install include("${rapids-cmake-dir}/export/find_package_root.cmake") rapids_export_find_package_root( - INSTALL Thrust [=[${CMAKE_CURRENT_LIST_DIR}/../../../include/libcudf/lib/rapids/cmake/thrust]=] + INSTALL CCCL [=[${CMAKE_CURRENT_LIST_DIR}/../../../include/libcudf/lib/rapids/cmake/cccl]=] EXPORT_SET cudf-exports - CONDITION Thrust_SOURCE_DIR + CONDITION CCCL_SOURCE_DIR ) endfunction() -find_and_configure_thrust() +find_and_configure_cccl() diff --git a/cpp/cmake/thirdparty/get_libcudacxx.cmake b/cpp/cmake/thirdparty/get_libcudacxx.cmake deleted file mode 100644 index 285d66287f3..00000000000 --- a/cpp/cmake/thirdparty/get_libcudacxx.cmake +++ /dev/null @@ -1,35 +0,0 @@ -# ============================================================================= -# Copyright (c) 2023, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. -# ============================================================================= - -# This function finds thrust and sets any additional necessary environment variables. -function(find_and_configure_libcudacxx) - # Make sure we install libcudacxx beside our patched version of thrust - include(GNUInstallDirs) - set(CMAKE_INSTALL_INCLUDEDIR "${CMAKE_INSTALL_INCLUDEDIR}/libcudf") - set(CMAKE_INSTALL_LIBDIR "${CMAKE_INSTALL_INCLUDEDIR}/lib") - - include(${rapids-cmake-dir}/cpm/libcudacxx.cmake) - rapids_cpm_libcudacxx(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports) - - # Store where CMake can find our custom Thrust install - include("${rapids-cmake-dir}/export/find_package_root.cmake") - rapids_export_find_package_root( - INSTALL libcudacxx - [=[${CMAKE_CURRENT_LIST_DIR}/../../../include/libcudf/lib/rapids/cmake/libcudacxx]=] - EXPORT_SET cudf-exports - CONDITION libcudacxx_SOURCE_DIR - ) -endfunction() - -find_and_configure_libcudacxx() diff --git a/cpp/cmake/thirdparty/patches/thrust_override.json b/cpp/cmake/thirdparty/patches/cccl_override.json similarity index 54% rename from cpp/cmake/thirdparty/patches/thrust_override.json rename to cpp/cmake/thirdparty/patches/cccl_override.json index ded2b90eeba..2f29578f7ae 100644 --- a/cpp/cmake/thirdparty/patches/thrust_override.json +++ b/cpp/cmake/thirdparty/patches/cccl_override.json @@ -1,18 +1,8 @@ { "packages" : { - "Thrust" : { + "CCCL" : { "patches" : [ - { - "file" : "Thrust/install_rules.diff", - "issue" : "Thrust 1.X installs incorrect files [https://github.com/NVIDIA/thrust/issues/1790]", - "fixed_in" : "2.0.0" - }, - { - "file" : "${current_json_dir}/thrust_transform_iter_with_reduce_by_key.diff", - "issue" : "Support transform_output_iterator as output of reduce by key [https://github.com/NVIDIA/thrust/pull/1805]", - "fixed_in" : "2.1" - }, { "file" : "${current_json_dir}/thrust_disable_64bit_dispatching.diff", "issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]", @@ -27,11 +17,6 @@ "file" : "${current_json_dir}/thrust_faster_scan_compile_times.diff", "issue" : "Improve Thrust scan compile times by reducing the number of kernels generated [https://github.com/rapidsai/cudf/pull/8183]", "fixed_in" : "" - }, - { - "file" : "${current_json_dir}/cub_segmented_sort_with_bool_key.diff", - "issue" : "Fix an error in CUB DeviceSegmentedSort when the keys are bool type [https://github.com/NVIDIA/cub/issues/594]", - "fixed_in" : "2.1" } ] } diff --git a/cpp/cmake/thirdparty/patches/cub_segmented_sort_with_bool_key.diff b/cpp/cmake/thirdparty/patches/cub_segmented_sort_with_bool_key.diff deleted file mode 100644 index 7c40fd4287d..00000000000 --- a/cpp/cmake/thirdparty/patches/cub_segmented_sort_with_bool_key.diff +++ /dev/null @@ -1,14 +0,0 @@ -diff --git a/dependencies/cub/cub/agent/agent_sub_warp_merge_sort.cuh b/dependencies/cub/cub/agent/agent_sub_warp_merge_sort.cuh -index ad65f2a3..ad45a21e 100644 ---- a/dependencies/cub/cub/agent/agent_sub_warp_merge_sort.cuh -+++ b/dependencies/cub/cub/agent/agent_sub_warp_merge_sort.cuh -@@ -221,7 +221,8 @@ public: - using UnsignedBitsT = typename Traits::UnsignedBits; - UnsignedBitsT default_key_bits = IS_DESCENDING ? Traits::LOWEST_KEY - : Traits::MAX_KEY; -- KeyT oob_default = reinterpret_cast(default_key_bits); -+ KeyT oob_default = std::is_same_v ? !IS_DESCENDING -+ : reinterpret_cast(default_key_bits); - - WarpLoadKeysT(storage.load_keys) - .Load(keys_input, keys, segment_size, oob_default); diff --git a/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff b/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff index 382f7dca1b0..d3f1a26781f 100644 --- a/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff +++ b/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff @@ -1,29 +1,25 @@ -diff --git a/thrust/system/cuda/detail/dispatch.h b/thrust/system/cuda/detail/dispatch.h -index d0e3f94..76774b0 100644 ---- a/thrust/system/cuda/detail/dispatch.h -+++ b/thrust/system/cuda/detail/dispatch.h -@@ -32,9 +32,8 @@ +diff --git a/thrust/thrust/system/cuda/detail/dispatch.h b/thrust/thrust/system/cuda/detail/dispatch.h +index d0e3f94ec..5c32a9c60 100644 +--- a/thrust/thrust/system/cuda/detail/dispatch.h ++++ b/thrust/thrust/system/cuda/detail/dispatch.h +@@ -32,8 +32,7 @@ status = call arguments; \ } \ else { \ - auto THRUST_PP_CAT2(count, _fixed) = static_cast(count); \ - status = call arguments; \ -- } -+ throw std::runtime_error("THRUST_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \ -+ } ++ throw std::runtime_error("THRUST_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \ + } /** - * Dispatch between 32-bit and 64-bit index based versions of the same algorithm -@@ -52,10 +51,8 @@ +@@ -52,9 +51,7 @@ status = call arguments; \ } \ else { \ - auto THRUST_PP_CAT2(count1, _fixed) = static_cast(count1); \ - auto THRUST_PP_CAT2(count2, _fixed) = static_cast(count2); \ - status = call arguments; \ -- } -+ throw std::runtime_error("THRUST_DOUBLE_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \ -+ } ++ throw std::runtime_error("THRUST_DOUBLE_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \ + } /** * Dispatch between 32-bit and 64-bit index based versions of the same algorithm - * implementation. This version allows using different token sequences for callables diff --git a/cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times.diff b/cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times.diff index 6bf165805cc..a606e21b92d 100644 --- a/cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times.diff +++ b/cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times.diff @@ -1,8 +1,8 @@ -diff --git a/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh b/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh -index b188c75f..3f36656f 100644 ---- a/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh -+++ b/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh -@@ -736,7 +736,7 @@ struct DeviceRadixSortPolicy +diff --git a/cub/cub/device/dispatch/dispatch_radix_sort.cuh b/cub/cub/device/dispatch/dispatch_radix_sort.cuh +index 84b6ccffd..25a237f93 100644 +--- a/cub/cub/device/dispatch/dispatch_radix_sort.cuh ++++ b/cub/cub/device/dispatch/dispatch_radix_sort.cuh +@@ -808,7 +808,7 @@ struct DeviceRadixSortPolicy /// SM60 (GP100) @@ -11,29 +11,29 @@ index b188c75f..3f36656f 100644 { enum { PRIMARY_RADIX_BITS = (sizeof(KeyT) > 1) ? 7 : 5, // 6.9B 32b keys/s (Quadro P100) -diff --git a/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh b/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh -index e0470ccb..6a0c2ed6 100644 ---- a/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh -+++ b/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh -@@ -280,7 +280,7 @@ struct DeviceReducePolicy - }; +diff --git a/cub/cub/device/dispatch/dispatch_reduce.cuh b/cub/cub/device/dispatch/dispatch_reduce.cuh +index 994adc095..d3e6719a7 100644 +--- a/cub/cub/device/dispatch/dispatch_reduce.cuh ++++ b/cub/cub/device/dispatch/dispatch_reduce.cuh +@@ -479,7 +479,7 @@ struct DeviceReducePolicy + }; - /// SM60 -- struct Policy600 : ChainedPolicy<600, Policy600, Policy350> -+ struct Policy600 : ChainedPolicy<600, Policy600, Policy600> - { - // ReducePolicy (P100: 591 GB/s @ 64M 4B items; 583 GB/s @ 256M 1B items) - typedef AgentReducePolicy< -diff --git a/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh b/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh -index c2d04588..ac2d10e0 100644 ---- a/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh -+++ b/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh -@@ -177,7 +177,7 @@ struct DeviceScanPolicy - }; + /// SM60 +- struct Policy600 : ChainedPolicy<600, Policy600, Policy350> ++ struct Policy600 : ChainedPolicy<600, Policy600, Policy600> + { + static constexpr int threads_per_block = 256; + static constexpr int items_per_thread = 16; +diff --git a/cub/cub/device/dispatch/tuning/tuning_scan.cuh b/cub/cub/device/dispatch/tuning/tuning_scan.cuh +index 0ea5c41ad..1bcd8a111 100644 +--- a/cub/cub/device/dispatch/tuning/tuning_scan.cuh ++++ b/cub/cub/device/dispatch/tuning/tuning_scan.cuh +@@ -303,7 +303,7 @@ struct DeviceScanPolicy + /// SM600 + struct Policy600 + : DefaultTuning +- , ChainedPolicy<600, Policy600, Policy520> ++ , ChainedPolicy<600, Policy600, Policy600> + {}; - /// SM600 -- struct Policy600 : ChainedPolicy<600, Policy600, Policy520> -+ struct Policy600 : ChainedPolicy<600, Policy600, Policy600> - { - typedef AgentScanPolicy< - 128, 15, ///< Threads per block, items per thread + /// SM800 diff --git a/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff b/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff index 864c89d4504..c34b6433d10 100644 --- a/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff +++ b/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff @@ -1,7 +1,7 @@ -diff --git a/dependencies/cub/cub/block/block_merge_sort.cuh b/dependencies/cub/cub/block/block_merge_sort.cuh -index 4769df36..d86d6342 100644 ---- a/dependencies/cub/cub/block/block_merge_sort.cuh -+++ b/dependencies/cub/cub/block/block_merge_sort.cuh +diff --git a/cub/cub/block/block_merge_sort.cuh b/cub/cub/block/block_merge_sort.cuh +index dc07ef6c2..a066c14da 100644 +--- a/cub/cub/block/block_merge_sort.cuh ++++ b/cub/cub/block/block_merge_sort.cuh @@ -91,7 +91,7 @@ __device__ __forceinline__ void SerialMerge(KeyT *keys_shared, KeyT key1 = keys_shared[keys1_beg]; KeyT key2 = keys_shared[keys2_beg]; @@ -20,19 +20,10 @@ index 4769df36..d86d6342 100644 for (int item = 1; item < ITEMS_PER_THREAD; ++item) { if (ITEMS_PER_THREAD * linear_tid + item < valid_items) -@@ -407,7 +407,7 @@ public: - // each thread has sorted keys - // merge sort keys in shared memory - // -- #pragma unroll -+ #pragma unroll 1 - for (int target_merged_threads_number = 2; - target_merged_threads_number <= NUM_THREADS; - target_merged_threads_number *= 2) -diff --git a/dependencies/cub/cub/thread/thread_sort.cuh b/dependencies/cub/cub/thread/thread_sort.cuh -index 5d486789..b42fb5f0 100644 ---- a/dependencies/cub/cub/thread/thread_sort.cuh -+++ b/dependencies/cub/cub/thread/thread_sort.cuh +diff --git a/cub/cub/thread/thread_sort.cuh b/cub/cub/thread/thread_sort.cuh +index 5d4867896..b42fb5f00 100644 +--- a/cub/cub/thread/thread_sort.cuh ++++ b/cub/cub/thread/thread_sort.cuh @@ -83,10 +83,10 @@ StableOddEvenSort(KeyT (&keys)[ITEMS_PER_THREAD], { constexpr bool KEYS_ONLY = std::is_same::value; diff --git a/cpp/cmake/thirdparty/patches/thrust_transform_iter_with_reduce_by_key.diff b/cpp/cmake/thirdparty/patches/thrust_transform_iter_with_reduce_by_key.diff deleted file mode 100644 index 6a56af90d0d..00000000000 --- a/cpp/cmake/thirdparty/patches/thrust_transform_iter_with_reduce_by_key.diff +++ /dev/null @@ -1,26 +0,0 @@ -diff --git a/thrust/iterator/transform_input_output_iterator.h b/thrust/iterator/transform_input_output_iterator.h -index f512a36..a5f725d 100644 ---- a/thrust/iterator/transform_input_output_iterator.h -+++ b/thrust/iterator/transform_input_output_iterator.h -@@ -102,6 +102,8 @@ template - /*! \endcond - */ - -+ transform_input_output_iterator() = default; -+ - /*! This constructor takes as argument a \c Iterator an \c InputFunction and an - * \c OutputFunction and copies them to a new \p transform_input_output_iterator - * -diff --git a/thrust/iterator/transform_output_iterator.h b/thrust/iterator/transform_output_iterator.h -index 66fb46a..4a68cb5 100644 ---- a/thrust/iterator/transform_output_iterator.h -+++ b/thrust/iterator/transform_output_iterator.h -@@ -104,6 +104,8 @@ template - /*! \endcond - */ - -+ transform_output_iterator() = default; -+ - /*! This constructor takes as argument an \c OutputIterator and an \c - * UnaryFunction and copies them to a new \p transform_output_iterator - * From 5b3bfe7e8a49d0639ee527ff8646aa755263b319 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Sun, 10 Dec 2023 22:00:10 -0600 Subject: [PATCH 02/10] Use cuCollections built with CCCL 2.2.0. --- fetch_rapids.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake index 65b0b89042b..6bcf8402647 100644 --- a/fetch_rapids.cmake +++ b/fetch_rapids.cmake @@ -11,6 +11,8 @@ # or implied. See the License for the specific language governing permissions and limitations under # the License. # ============================================================================= +set(rapids-cmake-repo bdice/rapids-cmake) +set(rapids-cmake-branch cuco-cccl-2.2.0) if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CUDF_RAPIDS.cmake) file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.02/RAPIDS.cmake ${CMAKE_CURRENT_BINARY_DIR}/CUDF_RAPIDS.cmake From 48f4ebfec1e91ccca8265f371054c2e95ada6dd3 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Sun, 10 Dec 2023 21:59:54 -0600 Subject: [PATCH 03/10] Use rmm artifacts. --- ci/build_cpp.sh | 3 +++ ci/build_python.sh | 10 ++++++++++ ci/test_cpp_common.sh | 3 +++ ci/test_python_common.sh | 4 ++++ 4 files changed, 20 insertions(+) diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh index f1ad8ee7778..1719b66f65e 100755 --- a/ci/build_cpp.sh +++ b/ci/build_cpp.sh @@ -9,12 +9,15 @@ export CMAKE_GENERATOR=Ninja rapids-print-env +LIBRMM_CHANNEL=$(rapids-get-pr-conda-artifact rmm 1404 cpp) + version=$(rapids-generate-version) rapids-logger "Begin cpp build" # With boa installed conda build forward to boa RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild \ + --channel "${LIBRMM_CHANNEL}" \ conda/recipes/libcudf rapids-upload-conda-to-s3 cpp diff --git a/ci/build_python.sh b/ci/build_python.sh index 32fe7b6b3ce..7252f566bcb 100755 --- a/ci/build_python.sh +++ b/ci/build_python.sh @@ -21,6 +21,8 @@ done rapids-logger "Begin py build" CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) +LIBRMM_CHANNEL=$(rapids-get-pr-conda-artifact rmm 1404 cpp) +RMM_CHANNEL=$(rapids-get-pr-conda-artifact rmm 1404 python) # TODO: Remove `--no-test` flag once importing on a CPU # node works correctly @@ -28,23 +30,31 @@ CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ + --channel "${LIBRMM_CHANNEL}" \ + --channel "${RMM_CHANNEL}" \ conda/recipes/cudf RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ + --channel "${LIBRMM_CHANNEL}" \ + --channel "${RMM_CHANNEL}" \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ conda/recipes/dask-cudf RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ + --channel "${LIBRMM_CHANNEL}" \ + --channel "${RMM_CHANNEL}" \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ conda/recipes/cudf_kafka RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ + --channel "${LIBRMM_CHANNEL}" \ + --channel "${RMM_CHANNEL}" \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ conda/recipes/custreamz diff --git a/ci/test_cpp_common.sh b/ci/test_cpp_common.sh index 37aba751a09..b126773a2aa 100644 --- a/ci/test_cpp_common.sh +++ b/ci/test_cpp_common.sh @@ -22,6 +22,8 @@ conda activate test set -u CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) +LIBRMM_CHANNEL=$(rapids-get-pr-conda-artifact rmm 1404 cpp) + RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"} RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/ mkdir -p "${RAPIDS_TESTS_DIR}" @@ -30,6 +32,7 @@ rapids-print-env rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ + --channel "${LIBRMM_CHANNEL}" \ libcudf libcudf_kafka libcudf-tests rapids-logger "Check GPU usage" diff --git a/ci/test_python_common.sh b/ci/test_python_common.sh index 1c330d47ac6..a18f113c2dd 100755 --- a/ci/test_python_common.sh +++ b/ci/test_python_common.sh @@ -26,6 +26,8 @@ set -u rapids-logger "Downloading artifacts from previous jobs" CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python) +LIBRMM_CHANNEL=$(rapids-get-pr-conda-artifact rmm 1404 cpp) +RMM_CHANNEL=$(rapids-get-pr-conda-artifact rmm 1404 python) RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"} RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/ @@ -37,4 +39,6 @@ rapids-print-env rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ --channel "${PYTHON_CHANNEL}" \ + --channel "${LIBRMM_CHANNEL}" \ + --channel "${RMM_CHANNEL}" \ cudf libcudf From f02e48c5d3b1058bd3dda79e08dab853e6a54274 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 14 Dec 2023 12:00:29 -0800 Subject: [PATCH 04/10] Try removing thrust_create_target. --- cpp/CMakeLists.txt | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index aa28a65db49..aebfacde9b7 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -756,7 +756,7 @@ add_dependencies(cudf jitify_preprocess_run) # Specify the target module library dependencies target_link_libraries( cudf - PUBLIC ${ARROW_LIBRARIES} libcudacxx::libcudacxx CCCL::Thrust rmm::rmm + PUBLIC ${ARROW_LIBRARIES} CCCL::CCCL rmm::rmm PRIVATE cuco::cuco ZLIB::ZLIB nvcomp::nvcomp kvikio::kvikio $ ) @@ -1007,14 +1007,6 @@ following IMPORTED GLOBAL targets: ]=] ) -set(common_code_string - [=[ -if(NOT TARGET CCCL::Thrust) - thrust_create_target(CCCL::Thrust FROM_OPTIONS) -endif() -]=] -) - if(CUDF_ENABLE_ARROW_PARQUET) string( APPEND @@ -1038,7 +1030,6 @@ if(testing IN_LIST cudf_FIND_COMPONENTS) endif() ]=] ) -string(APPEND install_code_string "${common_code_string}") rapids_export( INSTALL cudf @@ -1062,8 +1053,6 @@ endif() ]=] ) -string(APPEND build_code_string "${common_code_string}") - rapids_export( BUILD cudf EXPORT_SET cudf-exports ${_components_export_string} From d93a9b426321fb1833761ba1d1325c29e76b8fcc Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 14 Dec 2023 14:37:45 -0800 Subject: [PATCH 05/10] cmake-format --- cpp/cmake/thirdparty/get_cccl.cmake | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cpp/cmake/thirdparty/get_cccl.cmake b/cpp/cmake/thirdparty/get_cccl.cmake index 5d004edc49d..799bdd7f178 100644 --- a/cpp/cmake/thirdparty/get_cccl.cmake +++ b/cpp/cmake/thirdparty/get_cccl.cmake @@ -27,10 +27,7 @@ function(find_and_configure_cccl) set(CMAKE_INSTALL_LIBDIR "${CMAKE_INSTALL_INCLUDEDIR}/lib") # Find or install CCCL with our custom set of patches - rapids_cpm_cccl( - BUILD_EXPORT_SET cudf-exports - INSTALL_EXPORT_SET cudf-exports - ) + rapids_cpm_cccl(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports) # Store where CMake can find our custom CCCL install include("${rapids-cmake-dir}/export/find_package_root.cmake") From 0c4634bbd6959955ca2eb2603fefbf52b2047613 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 15 Dec 2023 17:50:09 -0800 Subject: [PATCH 06/10] Add in upstream CCCL patches --- cpp/cmake/thirdparty/patches/cccl_override.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cpp/cmake/thirdparty/patches/cccl_override.json b/cpp/cmake/thirdparty/patches/cccl_override.json index 2f29578f7ae..c3356fe2711 100644 --- a/cpp/cmake/thirdparty/patches/cccl_override.json +++ b/cpp/cmake/thirdparty/patches/cccl_override.json @@ -3,6 +3,11 @@ "packages" : { "CCCL" : { "patches" : [ + { + "file" : "cccl/bug_fixes.diff", + "issue" : "CCCL installs header-search.cmake files in nondeterministic order and has a typo in checking target creation that leads to duplicates", + "fixed_in" : "2.3" + }, { "file" : "${current_json_dir}/thrust_disable_64bit_dispatching.diff", "issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]", From bc33a6ca603836be3321fd382be7f022a807823a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 18 Dec 2023 09:16:45 -0800 Subject: [PATCH 07/10] Install rmm artifact in java build as well --- ci/test_java.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ci/test_java.sh b/ci/test_java.sh index 0863795162d..518b949f142 100755 --- a/ci/test_java.sh +++ b/ci/test_java.sh @@ -25,11 +25,14 @@ set -u rapids-print-env +LIBRMM_CHANNEL=$(rapids-get-pr-conda-artifact rmm 1404 cpp) + rapids-logger "Downloading artifacts from previous jobs" CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ + --channel "${LIBRMM_CHANNEL}" \ libcudf rapids-logger "Check GPU usage" From 5c07e832af0eaa849c2ad7a5694d877570f16ac6 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 18 Dec 2023 14:02:44 -0600 Subject: [PATCH 08/10] Update cpp/CMakeLists.txt Co-authored-by: Vyas Ramasubramani --- cpp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index aebfacde9b7..4163275744e 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -183,7 +183,7 @@ rapids_cpm_init() include(cmake/thirdparty/get_jitify.cmake) # find nvCOMP include(cmake/thirdparty/get_nvcomp.cmake) -# find CCCL +# find CCCL before rmm so that we get cudf's patched version of CCCL include(cmake/thirdparty/get_cccl.cmake) # find rmm include(cmake/thirdparty/get_rmm.cmake) From 9772f4f80b6f32dac00e7f830354e971b1ac1fb8 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 18 Dec 2023 16:10:31 -0800 Subject: [PATCH 09/10] Add patch for CCCL PR 211. --- cpp/cmake/thirdparty/patches/cccl_override.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cpp/cmake/thirdparty/patches/cccl_override.json b/cpp/cmake/thirdparty/patches/cccl_override.json index c3356fe2711..fa82bfb5421 100644 --- a/cpp/cmake/thirdparty/patches/cccl_override.json +++ b/cpp/cmake/thirdparty/patches/cccl_override.json @@ -8,6 +8,11 @@ "issue" : "CCCL installs header-search.cmake files in nondeterministic order and has a typo in checking target creation that leads to duplicates", "fixed_in" : "2.3" }, + { + "file" : "cccl/revert_pr_211.diff", + "issue" : "thrust::copy introduced a change in behavior that causes failures with cudaErrorInvalidValue.", + "fixed_in" : "" + }, { "file" : "${current_json_dir}/thrust_disable_64bit_dispatching.diff", "issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]", From 68aca853473a8908330fa5389977a6c98702ed5c Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 19 Dec 2023 07:54:37 -0800 Subject: [PATCH 10/10] Revert testing changes. --- ci/build_cpp.sh | 3 --- ci/build_python.sh | 10 ---------- ci/test_cpp_common.sh | 2 -- ci/test_java.sh | 3 --- ci/test_python_common.sh | 4 ---- fetch_rapids.cmake | 2 -- 6 files changed, 24 deletions(-) diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh index 1719b66f65e..f1ad8ee7778 100755 --- a/ci/build_cpp.sh +++ b/ci/build_cpp.sh @@ -9,15 +9,12 @@ export CMAKE_GENERATOR=Ninja rapids-print-env -LIBRMM_CHANNEL=$(rapids-get-pr-conda-artifact rmm 1404 cpp) - version=$(rapids-generate-version) rapids-logger "Begin cpp build" # With boa installed conda build forward to boa RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild \ - --channel "${LIBRMM_CHANNEL}" \ conda/recipes/libcudf rapids-upload-conda-to-s3 cpp diff --git a/ci/build_python.sh b/ci/build_python.sh index 7252f566bcb..32fe7b6b3ce 100755 --- a/ci/build_python.sh +++ b/ci/build_python.sh @@ -21,8 +21,6 @@ done rapids-logger "Begin py build" CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) -LIBRMM_CHANNEL=$(rapids-get-pr-conda-artifact rmm 1404 cpp) -RMM_CHANNEL=$(rapids-get-pr-conda-artifact rmm 1404 python) # TODO: Remove `--no-test` flag once importing on a CPU # node works correctly @@ -30,31 +28,23 @@ RMM_CHANNEL=$(rapids-get-pr-conda-artifact rmm 1404 python) RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ - --channel "${LIBRMM_CHANNEL}" \ - --channel "${RMM_CHANNEL}" \ conda/recipes/cudf RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ - --channel "${LIBRMM_CHANNEL}" \ - --channel "${RMM_CHANNEL}" \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ conda/recipes/dask-cudf RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ - --channel "${LIBRMM_CHANNEL}" \ - --channel "${RMM_CHANNEL}" \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ conda/recipes/cudf_kafka RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ - --channel "${LIBRMM_CHANNEL}" \ - --channel "${RMM_CHANNEL}" \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ conda/recipes/custreamz diff --git a/ci/test_cpp_common.sh b/ci/test_cpp_common.sh index b126773a2aa..163d381c1d4 100644 --- a/ci/test_cpp_common.sh +++ b/ci/test_cpp_common.sh @@ -22,7 +22,6 @@ conda activate test set -u CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) -LIBRMM_CHANNEL=$(rapids-get-pr-conda-artifact rmm 1404 cpp) RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"} RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/ @@ -32,7 +31,6 @@ rapids-print-env rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ - --channel "${LIBRMM_CHANNEL}" \ libcudf libcudf_kafka libcudf-tests rapids-logger "Check GPU usage" diff --git a/ci/test_java.sh b/ci/test_java.sh index 518b949f142..0863795162d 100755 --- a/ci/test_java.sh +++ b/ci/test_java.sh @@ -25,14 +25,11 @@ set -u rapids-print-env -LIBRMM_CHANNEL=$(rapids-get-pr-conda-artifact rmm 1404 cpp) - rapids-logger "Downloading artifacts from previous jobs" CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ - --channel "${LIBRMM_CHANNEL}" \ libcudf rapids-logger "Check GPU usage" diff --git a/ci/test_python_common.sh b/ci/test_python_common.sh index a18f113c2dd..1c330d47ac6 100755 --- a/ci/test_python_common.sh +++ b/ci/test_python_common.sh @@ -26,8 +26,6 @@ set -u rapids-logger "Downloading artifacts from previous jobs" CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python) -LIBRMM_CHANNEL=$(rapids-get-pr-conda-artifact rmm 1404 cpp) -RMM_CHANNEL=$(rapids-get-pr-conda-artifact rmm 1404 python) RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"} RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/ @@ -39,6 +37,4 @@ rapids-print-env rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ --channel "${PYTHON_CHANNEL}" \ - --channel "${LIBRMM_CHANNEL}" \ - --channel "${RMM_CHANNEL}" \ cudf libcudf diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake index 6bcf8402647..65b0b89042b 100644 --- a/fetch_rapids.cmake +++ b/fetch_rapids.cmake @@ -11,8 +11,6 @@ # or implied. See the License for the specific language governing permissions and limitations under # the License. # ============================================================================= -set(rapids-cmake-repo bdice/rapids-cmake) -set(rapids-cmake-branch cuco-cccl-2.2.0) if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CUDF_RAPIDS.cmake) file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.02/RAPIDS.cmake ${CMAKE_CURRENT_BINARY_DIR}/CUDF_RAPIDS.cmake