From 5b710d7dc1900b4343f2bd99b7cf432bb955668c Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 1 Mar 2023 16:47:16 -0800 Subject: [PATCH 01/17] Remove some params. --- cpp/include/cudf/detail/copy.hpp | 8 ++--- cpp/include/cudf/detail/gather.cuh | 15 +++++----- cpp/include/cudf/detail/hashing.hpp | 38 +++++++++++------------- cpp/include/cudf/lists/detail/gather.cuh | 32 +++++++++----------- cpp/tests/copying/detail_gather_tests.cu | 16 +++++----- 5 files changed, 50 insertions(+), 59 deletions(-) diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp index 8c3f315284d..c144f27cc00 100644 --- a/cpp/include/cudf/detail/copy.hpp +++ b/cpp/include/cudf/detail/copy.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -285,9 +285,9 @@ std::unique_ptr copy_if_else( std::unique_ptr sample( table_view const& input, size_type const n, - sample_with_replacement replacement = sample_with_replacement::FALSE, - int64_t const seed = 0, - rmm::cuda_stream_view stream = cudf::get_default_stream(), + sample_with_replacement replacement, + int64_t const seed, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index 57d834e6277..ac2865c05c5 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -647,13 +647,12 @@ void gather_bitmask(table_view const& source, * @return cudf::table Result of the gather */ template -std::unique_ptr
gather( - table_view const& source_table, - MapIterator gather_map_begin, - MapIterator gather_map_end, - out_of_bounds_policy bounds_policy = out_of_bounds_policy::DONT_CHECK, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr
gather(table_view const& source_table, + MapIterator gather_map_begin, + MapIterator gather_map_end, + out_of_bounds_policy bounds_policy, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { std::vector> destination_columns; diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/detail/hashing.hpp index b7469d80a8d..771b3e150ec 100644 --- a/cpp/include/cudf/detail/hashing.hpp +++ b/cpp/include/cudf/detail/hashing.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,29 +31,25 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr hash( - table_view const& input, - hash_id hash_function = hash_id::HASH_MURMUR3, - uint32_t seed = cudf::DEFAULT_HASH_SEED, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr hash(table_view const& input, + hash_id hash_function, + uint32_t seed, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); -std::unique_ptr murmur_hash3_32( - table_view const& input, - uint32_t seed = cudf::DEFAULT_HASH_SEED, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr murmur_hash3_32(table_view const& input, + uint32_t seed, + rmm::cuda_stream_view, + rmm::mr::device_memory_resource* mr); -std::unique_ptr spark_murmur_hash3_32( - table_view const& input, - uint32_t seed = cudf::DEFAULT_HASH_SEED, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr spark_murmur_hash3_32(table_view const& input, + uint32_t seed, + rmm::cuda_stream_view, + rmm::mr::device_memory_resource* mr); -std::unique_ptr md5_hash( - table_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr md5_hash(table_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /* Copyright 2005-2014 Daniel James. * diff --git a/cpp/include/cudf/lists/detail/gather.cuh b/cpp/include/cudf/lists/detail/gather.cuh index 48c0ed8f6e9..83710a49f6a 100644 --- a/cpp/include/cudf/lists/detail/gather.cuh +++ b/cpp/include/cudf/lists/detail/gather.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -285,11 +285,10 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column, * * @returns column with elements gathered based on `gather_data` */ -std::unique_ptr gather_list_nested( - lists_column_view const& list, - gather_data& gd, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr gather_list_nested(lists_column_view const& list, + gather_data& gd, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Gather a leaf column from a hierarchy of list columns. @@ -303,11 +302,10 @@ std::unique_ptr gather_list_nested( * * @returns column with elements gathered based on `gather_data` */ -std::unique_ptr gather_list_leaf( - column_view const& column, - gather_data const& gd, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr gather_list_leaf(column_view const& column, + gather_data const& gd, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::lists::segmented_gather(lists_column_view const& source_column, @@ -317,13 +315,11 @@ std::unique_ptr gather_list_leaf( * * @param stream CUDA stream on which to execute kernels */ -std::unique_ptr segmented_gather( - lists_column_view const& source_column, - lists_column_view const& gather_map_list, - out_of_bounds_policy bounds_policy = out_of_bounds_policy::DONT_CHECK, - // Move before bounds_policy? - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr segmented_gather(lists_column_view const& source_column, + lists_column_view const& gather_map_list, + out_of_bounds_policy bounds_policy, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace lists diff --git a/cpp/tests/copying/detail_gather_tests.cu b/cpp/tests/copying/detail_gather_tests.cu index bf2937ae8ab..d4898113afb 100644 --- a/cpp/tests/copying/detail_gather_tests.cu +++ b/cpp/tests/copying/detail_gather_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -46,9 +46,11 @@ TYPED_TEST_SUITE(GatherTest, cudf::test::NumericTypes); TYPED_TEST(GatherTest, GatherDetailDeviceVectorTest) { constexpr cudf::size_type source_size{1000}; - rmm::device_uvector gather_map(source_size, cudf::get_default_stream()); - thrust::sequence( - rmm::exec_policy_nosync(cudf::get_default_stream()), gather_map.begin(), gather_map.end()); + rmm::device_uvector gather_map_vec(source_size, cudf::get_default_stream()); + auto gather_map = std::make_unique(std::move(gather_map_vec)); + thrust::sequence(rmm::exec_policy_nosync(cudf::get_default_stream()), + gather_map->mutable_view().begin(), + gather_map->mutable_view().end()); auto data = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; }); cudf::test::fixed_width_column_wrapper source_column(data, data + source_size); @@ -57,8 +59,7 @@ TYPED_TEST(GatherTest, GatherDetailDeviceVectorTest) // test with device vector iterators { - std::unique_ptr result = - cudf::detail::gather(source_table, gather_map.begin(), gather_map.end()); + std::unique_ptr result = cudf::gather(source_table, gather_map->view()); for (auto i = 0; i < source_table.num_columns(); ++i) { CUDF_TEST_EXPECT_COLUMNS_EQUAL(source_table.column(i), result->view().column(i)); @@ -69,8 +70,7 @@ TYPED_TEST(GatherTest, GatherDetailDeviceVectorTest) // test with raw pointers { - std::unique_ptr result = - cudf::detail::gather(source_table, gather_map.data(), gather_map.data() + gather_map.size()); + std::unique_ptr result = cudf::gather(source_table, gather_map->view()); for (auto i = 0; i < source_table.num_columns(); ++i) { CUDF_TEST_EXPECT_COLUMNS_EQUAL(source_table.column(i), result->view().column(i)); From a26045c97cb57bd3ae021db6ee4a61753ea80506 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 1 Mar 2023 17:00:48 -0800 Subject: [PATCH 02/17] copy APIs. --- cpp/include/cudf/detail/copy.hpp | 119 ++++++++++++--------------- cpp/src/copying/scatter.cu | 4 +- cpp/src/sort/segmented_sort_impl.cuh | 23 ++++-- 3 files changed, 71 insertions(+), 75 deletions(-) diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp index c144f27cc00..83395f8fa90 100644 --- a/cpp/include/cudf/detail/copy.hpp +++ b/cpp/include/cudf/detail/copy.hpp @@ -144,12 +144,11 @@ std::vector split(table_view const& input, * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr shift( - column_view const& input, - size_type offset, - scalar const& fill_value, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr shift(column_view const& input, + size_type offset, + scalar const& fill_value, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Performs segmented shifts for specified values. @@ -184,24 +183,22 @@ std::unique_ptr shift( * * @note If `offset == 0`, a copy of @p segmented_values is returned. */ -std::unique_ptr segmented_shift( - column_view const& segmented_values, - device_span segment_offsets, - size_type offset, - scalar const& fill_value, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr segmented_shift(column_view const& segmented_values, + device_span segment_offsets, + size_type offset, + scalar const& fill_value, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::contiguous_split * * @param stream CUDA stream used for device memory operations and kernel launches. **/ -std::vector contiguous_split( - cudf::table_view const& input, - std::vector const& splits, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::vector contiguous_split(cudf::table_view const& input, + std::vector const& splits, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::pack @@ -210,7 +207,7 @@ std::vector contiguous_split( **/ packed_columns pack(cudf::table_view const& input, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::allocate_like(column_view const&, size_type, mask_allocation_policy, @@ -218,12 +215,11 @@ packed_columns pack(cudf::table_view const& input, * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr allocate_like( - column_view const& input, - size_type size, - mask_allocation_policy mask_alloc = mask_allocation_policy::RETAIN, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr allocate_like(column_view const& input, + size_type size, + mask_allocation_policy mask_alloc, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::copy_if_else( column_view const&, column_view const&, @@ -231,12 +227,11 @@ std::unique_ptr allocate_like( * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr copy_if_else( - column_view const& lhs, - column_view const& rhs, - column_view const& boolean_mask, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr copy_if_else(column_view const& lhs, + column_view const& rhs, + column_view const& boolean_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::copy_if_else( scalar const&, column_view const&, @@ -244,12 +239,11 @@ std::unique_ptr copy_if_else( * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr copy_if_else( - scalar const& lhs, - column_view const& rhs, - column_view const& boolean_mask, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr copy_if_else(scalar const& lhs, + column_view const& rhs, + column_view const& boolean_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::copy_if_else( column_view const&, scalar const&, @@ -257,12 +251,11 @@ std::unique_ptr copy_if_else( * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr copy_if_else( - column_view const& lhs, - scalar const& rhs, - column_view const& boolean_mask, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr copy_if_else(column_view const& lhs, + scalar const& rhs, + column_view const& boolean_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::copy_if_else( scalar const&, scalar const&, @@ -270,36 +263,33 @@ std::unique_ptr copy_if_else( * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr copy_if_else( - scalar const& lhs, - scalar const& rhs, - column_view const& boolean_mask, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr copy_if_else(scalar const& lhs, + scalar const& rhs, + column_view const& boolean_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::sample * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr
sample( - table_view const& input, - size_type const n, - sample_with_replacement replacement, - int64_t const seed, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
sample(table_view const& input, + size_type const n, + sample_with_replacement replacement, + int64_t const seed, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::get_element * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr get_element( - column_view const& input, - size_type index, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr get_element(column_view const& input, + size_type index, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::has_nonempty_nulls @@ -320,10 +310,9 @@ bool may_have_nonempty_nulls(column_view const& input, rmm::cuda_stream_view str * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr purge_nonempty_nulls( - column_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr purge_nonempty_nulls(column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu index dd4912a216e..95248f0d98e 100644 --- a/cpp/src/copying/scatter.cu +++ b/cpp/src/copying/scatter.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -253,7 +253,7 @@ struct column_scalar_scatterer_impl { auto scatter_functor = column_scalar_scatterer{}; auto fields_iter_begin = make_counting_transform_iterator(0, [&](auto const& i) { - auto row_slr = get_element(typed_s->view().column(i), 0, stream); + auto row_slr = get_element(typed_s->view().column(i), 0, stream, mr); return type_dispatcher(row_slr->type(), scatter_functor, *row_slr, diff --git a/cpp/src/sort/segmented_sort_impl.cuh b/cpp/src/sort/segmented_sort_impl.cuh index a32382b840f..8c1d57f79bc 100644 --- a/cpp/src/sort/segmented_sort_impl.cuh +++ b/cpp/src/sort/segmented_sort_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -68,12 +68,13 @@ struct column_fast_sort_fn { column_view const& segment_offsets, mutable_column_view& indices, bool ascending, - rmm::cuda_stream_view stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { // CUB's segmented sort functions cannot accept iterators. // We create a temporary column here for it to use. auto temp_col = - cudf::detail::allocate_like(input, input.size(), mask_allocation_policy::NEVER, stream); + cudf::detail::allocate_like(input, input.size(), mask_allocation_policy::NEVER, stream, mr); mutable_column_view output_view = temp_col->mutable_view(); // DeviceSegmentedSort is faster than DeviceSegmentedRadixSort at this time @@ -128,14 +129,19 @@ struct column_fast_sort_fn { column_view const& segment_offsets, mutable_column_view& indices, bool ascending, - rmm::cuda_stream_view stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { - fast_sort(input, segment_offsets, indices, ascending, stream); + fast_sort(input, segment_offsets, indices, ascending, stream, mr); } template ())> - void operator()( - column_view const&, column_view const&, mutable_column_view&, bool, rmm::cuda_stream_view) + void operator()(column_view const&, + column_view const&, + mutable_column_view&, + bool, + rmm::cuda_stream_view, + rmm::mr::device_memory_resource*) { CUDF_FAIL("Column type cannot be used with fast-sort function"); } @@ -171,7 +177,8 @@ std::unique_ptr fast_segmented_sorted_order(column_view const& input, segment_offsets, indices_view, column_order == order::ASCENDING, - stream); + stream, + mr); return sorted_indices; } From fd5c0e9d1def5824824cff77ecf019dd62afcb87 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 1 Mar 2023 17:09:44 -0800 Subject: [PATCH 03/17] Update arrow calls. --- cpp/include/cudf/detail/interop.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/include/cudf/detail/interop.hpp b/cpp/include/cudf/detail/interop.hpp index 25ce5b09eb8..452144da167 100644 --- a/cpp/include/cudf/detail/interop.hpp +++ b/cpp/include/cudf/detail/interop.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -111,9 +111,9 @@ data_type arrow_to_cudf_type(arrow::DataType const& arrow_type); * @param stream CUDA stream used for device memory operations and kernel launches. */ std::shared_ptr to_arrow(table_view input, - std::vector const& metadata = {}, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - arrow::MemoryPool* ar_mr = arrow::default_memory_pool()); + std::vector const& metadata, + rmm::cuda_stream_view stream, + arrow::MemoryPool* ar_mr); /** * @copydoc cudf::arrow_to_cudf From 2d4c23fb398f7d656c7f70a23e9fde7c90a0a2e0 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 1 Mar 2023 17:19:30 -0800 Subject: [PATCH 04/17] stream_compaction. --- cpp/include/cudf/detail/stream_compaction.hpp | 89 +++++++++---------- cpp/src/copying/scatter.cu | 2 +- cpp/src/stream_compaction/distinct.cu | 2 +- cpp/src/stream_compaction/stable_distinct.cu | 4 +- 4 files changed, 45 insertions(+), 52 deletions(-) diff --git a/cpp/include/cudf/detail/stream_compaction.hpp b/cpp/include/cudf/detail/stream_compaction.hpp index e725718ed22..e6cf9bb2c50 100644 --- a/cpp/include/cudf/detail/stream_compaction.hpp +++ b/cpp/include/cudf/detail/stream_compaction.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,12 +32,11 @@ namespace detail { * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr
drop_nulls( - table_view const& input, - std::vector const& keys, - cudf::size_type keep_threshold, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
drop_nulls(table_view const& input, + std::vector const& keys, + cudf::size_type keep_threshold, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::drop_nans(table_view const&, std::vector const&, @@ -45,50 +44,46 @@ std::unique_ptr
drop_nulls( * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr
drop_nans( - table_view const& input, - std::vector const& keys, - cudf::size_type keep_threshold, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
drop_nans(table_view const& input, + std::vector const& keys, + cudf::size_type keep_threshold, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::apply_boolean_mask * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr
apply_boolean_mask( - table_view const& input, - column_view const& boolean_mask, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
apply_boolean_mask(table_view const& input, + column_view const& boolean_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::unique * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr
unique( - table_view const& input, - std::vector const& keys, - duplicate_keep_option keep, - null_equality nulls_equal = null_equality::EQUAL, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
unique(table_view const& input, + std::vector const& keys, + duplicate_keep_option keep, + null_equality nulls_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::distinct * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr
distinct( - table_view const& input, - std::vector const& keys, - duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY, - null_equality nulls_equal = null_equality::EQUAL, - nan_equality nans_equal = nan_equality::ALL_EQUAL, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
distinct(table_view const& input, + std::vector const& keys, + duplicate_keep_option keep, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Create a new table without duplicate rows. @@ -110,14 +105,13 @@ std::unique_ptr
distinct( * @param mr Device memory resource used to allocate the returned table * @return A table containing the resulting distinct rows */ -std::unique_ptr
stable_distinct( - table_view const& input, - std::vector const& keys, - duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY, - null_equality nulls_equal = null_equality::EQUAL, - nan_equality nans_equal = nan_equality::ALL_EQUAL, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
stable_distinct(table_view const& input, + std::vector const& keys, + duplicate_keep_option keep, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Create a column of indices of all distinct rows in the input table. @@ -133,13 +127,12 @@ std::unique_ptr
stable_distinct( * @param mr Device memory resource used to allocate the returned vector * @return A device_uvector containing the result indices */ -rmm::device_uvector get_distinct_indices( - table_view const& input, - duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY, - null_equality nulls_equal = null_equality::EQUAL, - nan_equality nans_equal = nan_equality::ALL_EQUAL, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +rmm::device_uvector get_distinct_indices(table_view const& input, + duplicate_keep_option keep, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::unique_count(column_view const&, null_policy, nan_policy) diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu index 95248f0d98e..273ecaf9adb 100644 --- a/cpp/src/copying/scatter.cu +++ b/cpp/src/copying/scatter.cu @@ -393,7 +393,7 @@ std::unique_ptr boolean_mask_scatter(column_view const& input, // The scatter map is actually a table with only one column, which is scatter map. auto scatter_map = - detail::apply_boolean_mask(table_view{{indices->view()}}, boolean_mask, stream); + detail::apply_boolean_mask(table_view{{indices->view()}}, boolean_mask, stream, mr); auto output_table = detail::scatter( table_view{{input}}, scatter_map->get_column(0).view(), table_view{{target}}, stream, mr); diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index e15d54b4251..9a5a52a5b90 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -146,7 +146,7 @@ std::unique_ptr
distinct(table_view const& input, } auto const gather_map = - get_distinct_indices(input.select(keys), keep, nulls_equal, nans_equal, stream); + get_distinct_indices(input.select(keys), keep, nulls_equal, nans_equal, stream, mr); return detail::gather(input, gather_map, out_of_bounds_policy::DONT_CHECK, diff --git a/cpp/src/stream_compaction/stable_distinct.cu b/cpp/src/stream_compaction/stable_distinct.cu index dc80a454777..142018d4336 100644 --- a/cpp/src/stream_compaction/stable_distinct.cu +++ b/cpp/src/stream_compaction/stable_distinct.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,7 +39,7 @@ std::unique_ptr
stable_distinct(table_view const& input, } auto const distinct_indices = - get_distinct_indices(input.select(keys), keep, nulls_equal, nans_equal, stream); + get_distinct_indices(input.select(keys), keep, nulls_equal, nans_equal, stream, mr); // Markers to denote which rows to be copied to the output. auto const output_markers = [&] { From 385b629e43e3e5866177e1ff6174c23203852e57 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 1 Mar 2023 21:36:06 -0800 Subject: [PATCH 05/17] Some binary_ops. --- cpp/src/binaryop/compiled/binary_ops.hpp | 80 +++++++++++------------- 1 file changed, 37 insertions(+), 43 deletions(-) diff --git a/cpp/src/binaryop/compiled/binary_ops.hpp b/cpp/src/binaryop/compiled/binary_ops.hpp index c51993409ef..47fd50c5d97 100644 --- a/cpp/src/binaryop/compiled/binary_ops.hpp +++ b/cpp/src/binaryop/compiled/binary_ops.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,29 +32,26 @@ class mutable_column_device_view; namespace binops { namespace compiled { -std::unique_ptr string_null_min_max( - scalar const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr string_null_min_max(scalar const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); -std::unique_ptr string_null_min_max( - column_view const& lhs, - scalar const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr string_null_min_max(column_view const& lhs, + scalar const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); -std::unique_ptr string_null_min_max( - column_view const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr string_null_min_max(column_view const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Performs a binary operation between a string scalar and a string @@ -75,13 +72,12 @@ std::unique_ptr string_null_min_max( * @param mr Device memory resource used to allocate the returned column's device memory * @return std::unique_ptr Output column */ -std::unique_ptr binary_operation( - scalar const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr binary_operation(scalar const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Performs a binary operation between a string column and a string @@ -102,13 +98,12 @@ std::unique_ptr binary_operation( * @param mr Device memory resource used to allocate the returned column's device memory * @return std::unique_ptr Output column */ -std::unique_ptr binary_operation( - column_view const& lhs, - scalar const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr binary_operation(column_view const& lhs, + scalar const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Performs a binary operation between two string columns. @@ -128,13 +123,12 @@ std::unique_ptr binary_operation( * @param mr Device memory resource used to allocate the returned column's device memory * @return std::unique_ptr Output column */ -std::unique_ptr binary_operation( - column_view const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr binary_operation(column_view const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); void binary_operation(mutable_column_view& out, scalar const& lhs, From 2f24667dc237c31529ba1b766f16124a553cd8f3 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 6 Mar 2023 11:04:37 -0800 Subject: [PATCH 06/17] Some stream_compaction. --- cpp/include/cudf/detail/stream_compaction.hpp | 8 ++++---- cpp/src/stream_compaction/distinct_count.cu | 3 ++- cpp/src/stream_compaction/unique_count.cu | 5 +++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/cpp/include/cudf/detail/stream_compaction.hpp b/cpp/include/cudf/detail/stream_compaction.hpp index e6cf9bb2c50..e0fc7b71cd9 100644 --- a/cpp/include/cudf/detail/stream_compaction.hpp +++ b/cpp/include/cudf/detail/stream_compaction.hpp @@ -150,8 +150,8 @@ cudf::size_type unique_count(column_view const& input, * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ cudf::size_type unique_count(table_view const& input, - null_equality nulls_equal = null_equality::EQUAL, - rmm::cuda_stream_view stream = cudf::get_default_stream()); + null_equality nulls_equal, + rmm::cuda_stream_view stream); /** * @copydoc cudf::distinct_count(column_view const&, null_policy, nan_policy) @@ -169,8 +169,8 @@ cudf::size_type distinct_count(column_view const& input, * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ cudf::size_type distinct_count(table_view const& input, - null_equality nulls_equal = null_equality::EQUAL, - rmm::cuda_stream_view stream = cudf::get_default_stream()); + null_equality nulls_equal, + rmm::cuda_stream_view stream); } // namespace detail } // namespace cudf diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu index 8cde6e0a7ed..fb6505daf06 100644 --- a/cpp/src/stream_compaction/distinct_count.cu +++ b/cpp/src/stream_compaction/distinct_count.cu @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "cudf/utilities/default_stream.hpp" #include "stream_compaction_common.cuh" #include "stream_compaction_common.hpp" @@ -193,6 +194,6 @@ cudf::size_type distinct_count(column_view const& input, cudf::size_type distinct_count(table_view const& input, null_equality nulls_equal) { CUDF_FUNC_RANGE(); - return detail::distinct_count(input, nulls_equal); + return detail::distinct_count(input, nulls_equal, cudf::get_default_stream()); } } // namespace cudf diff --git a/cpp/src/stream_compaction/unique_count.cu b/cpp/src/stream_compaction/unique_count.cu index 8363ee8120b..11a0226e269 100644 --- a/cpp/src/stream_compaction/unique_count.cu +++ b/cpp/src/stream_compaction/unique_count.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "cudf/utilities/default_stream.hpp" #include "stream_compaction_common.cuh" #include "stream_compaction_common.hpp" @@ -133,7 +134,7 @@ cudf::size_type unique_count(column_view const& input, cudf::size_type unique_count(table_view const& input, null_equality nulls_equal) { CUDF_FUNC_RANGE(); - return detail::unique_count(input, nulls_equal); + return detail::unique_count(input, nulls_equal, cudf::get_default_stream()); } } // namespace cudf From a50bf6b34037b910f54d1059bd3fe4e69e768dc2 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 7 Mar 2023 12:50:47 -0800 Subject: [PATCH 07/17] Use current memory resource for temporaries instead of user-provided one. --- cpp/src/copying/scatter.cu | 7 ++++--- cpp/src/sort/segmented_sort_impl.cuh | 8 ++++++-- cpp/src/stream_compaction/distinct.cu | 9 +++++++-- cpp/src/stream_compaction/stable_distinct.cu | 8 ++++++-- 4 files changed, 23 insertions(+), 9 deletions(-) diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu index 273ecaf9adb..316f39b616c 100644 --- a/cpp/src/copying/scatter.cu +++ b/cpp/src/copying/scatter.cu @@ -253,7 +253,8 @@ struct column_scalar_scatterer_impl { auto scatter_functor = column_scalar_scatterer{}; auto fields_iter_begin = make_counting_transform_iterator(0, [&](auto const& i) { - auto row_slr = get_element(typed_s->view().column(i), 0, stream, mr); + auto row_slr = + get_element(typed_s->view().column(i), 0, stream, rmm::mr::get_current_device_resource()); return type_dispatcher(row_slr->type(), scatter_functor, *row_slr, @@ -392,8 +393,8 @@ std::unique_ptr boolean_mask_scatter(column_view const& input, 0); // The scatter map is actually a table with only one column, which is scatter map. - auto scatter_map = - detail::apply_boolean_mask(table_view{{indices->view()}}, boolean_mask, stream, mr); + auto scatter_map = detail::apply_boolean_mask( + table_view{{indices->view()}}, boolean_mask, stream, rmm::mr::get_current_device_resource()); auto output_table = detail::scatter( table_view{{input}}, scatter_map->get_column(0).view(), table_view{{target}}, stream, mr); diff --git a/cpp/src/sort/segmented_sort_impl.cuh b/cpp/src/sort/segmented_sort_impl.cuh index 8c1d57f79bc..8a367209acf 100644 --- a/cpp/src/sort/segmented_sort_impl.cuh +++ b/cpp/src/sort/segmented_sort_impl.cuh @@ -24,6 +24,7 @@ #include #include +#include namespace cudf { namespace detail { @@ -73,8 +74,11 @@ struct column_fast_sort_fn { { // CUB's segmented sort functions cannot accept iterators. // We create a temporary column here for it to use. - auto temp_col = - cudf::detail::allocate_like(input, input.size(), mask_allocation_policy::NEVER, stream, mr); + auto temp_col = cudf::detail::allocate_like(input, + input.size(), + mask_allocation_policy::NEVER, + stream, + rmm::mr::get_current_device_resource()); mutable_column_view output_view = temp_col->mutable_view(); // DeviceSegmentedSort is faster than DeviceSegmentedRadixSort at this time diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index 9a5a52a5b90..83c97521abf 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -145,8 +146,12 @@ std::unique_ptr
distinct(table_view const& input, return empty_like(input); } - auto const gather_map = - get_distinct_indices(input.select(keys), keep, nulls_equal, nans_equal, stream, mr); + auto const gather_map = get_distinct_indices(input.select(keys), + keep, + nulls_equal, + nans_equal, + stream, + rmm::mr::get_current_device_resource()); return detail::gather(input, gather_map, out_of_bounds_policy::DONT_CHECK, diff --git a/cpp/src/stream_compaction/stable_distinct.cu b/cpp/src/stream_compaction/stable_distinct.cu index 142018d4336..d45897930b0 100644 --- a/cpp/src/stream_compaction/stable_distinct.cu +++ b/cpp/src/stream_compaction/stable_distinct.cu @@ -38,8 +38,12 @@ std::unique_ptr
stable_distinct(table_view const& input, return empty_like(input); } - auto const distinct_indices = - get_distinct_indices(input.select(keys), keep, nulls_equal, nans_equal, stream, mr); + auto const distinct_indices = get_distinct_indices(input.select(keys), + keep, + nulls_equal, + nans_equal, + stream, + rmm::mr::get_current_device_resource()); // Markers to denote which rows to be copied to the output. auto const output_markers = [&] { From ba57a94f8413f5e2d6f2b5f7fcf0386f5885034a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 8 Mar 2023 08:10:07 -0800 Subject: [PATCH 08/17] Bump From fc227b71a98c616907a5e8ceba984e7094191a41 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 8 Mar 2023 14:24:28 -0500 Subject: [PATCH 09/17] Remove unused mr parameters. --- cpp/src/sort/segmented_sort_impl.cuh | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/cpp/src/sort/segmented_sort_impl.cuh b/cpp/src/sort/segmented_sort_impl.cuh index 8a367209acf..46195e1d2cc 100644 --- a/cpp/src/sort/segmented_sort_impl.cuh +++ b/cpp/src/sort/segmented_sort_impl.cuh @@ -69,8 +69,7 @@ struct column_fast_sort_fn { column_view const& segment_offsets, mutable_column_view& indices, bool ascending, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::cuda_stream_view stream) { // CUB's segmented sort functions cannot accept iterators. // We create a temporary column here for it to use. @@ -133,19 +132,14 @@ struct column_fast_sort_fn { column_view const& segment_offsets, mutable_column_view& indices, bool ascending, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::cuda_stream_view stream) { - fast_sort(input, segment_offsets, indices, ascending, stream, mr); + fast_sort(input, segment_offsets, indices, ascending, stream); } template ())> - void operator()(column_view const&, - column_view const&, - mutable_column_view&, - bool, - rmm::cuda_stream_view, - rmm::mr::device_memory_resource*) + void operator()( + column_view const&, column_view const&, mutable_column_view&, bool, rmm::cuda_stream_view) { CUDF_FAIL("Column type cannot be used with fast-sort function"); } From d0371d10d233e2cd0044661e68b75885336dfc4a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 8 Mar 2023 14:34:48 -0500 Subject: [PATCH 10/17] Remove one more. --- cpp/src/sort/segmented_sort_impl.cuh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/src/sort/segmented_sort_impl.cuh b/cpp/src/sort/segmented_sort_impl.cuh index 46195e1d2cc..b1fce380e28 100644 --- a/cpp/src/sort/segmented_sort_impl.cuh +++ b/cpp/src/sort/segmented_sort_impl.cuh @@ -175,8 +175,7 @@ std::unique_ptr fast_segmented_sorted_order(column_view const& input, segment_offsets, indices_view, column_order == order::ASCENDING, - stream, - mr); + stream); return sorted_indices; } From c6ad4cfd31f0075a57061b5d92407630f99db91b Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 9 Mar 2023 07:39:57 -0800 Subject: [PATCH 11/17] Reorder includes. --- cpp/src/sort/segmented_sort_impl.cuh | 2 +- cpp/src/stream_compaction/distinct_count.cu | 2 +- cpp/src/stream_compaction/unique_count.cu | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/sort/segmented_sort_impl.cuh b/cpp/src/sort/segmented_sort_impl.cuh index b1fce380e28..b7347974173 100644 --- a/cpp/src/sort/segmented_sort_impl.cuh +++ b/cpp/src/sort/segmented_sort_impl.cuh @@ -22,9 +22,9 @@ #include #include +#include #include -#include namespace cudf { namespace detail { diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu index c5e0d9c6060..0dae26c18a9 100644 --- a/cpp/src/stream_compaction/distinct_count.cu +++ b/cpp/src/stream_compaction/distinct_count.cu @@ -14,7 +14,6 @@ * limitations under the License. */ -#include "cudf/utilities/default_stream.hpp" #include "stream_compaction_common.cuh" #include "stream_compaction_common.hpp" @@ -29,6 +28,7 @@ #include #include #include +#include #include #include diff --git a/cpp/src/stream_compaction/unique_count.cu b/cpp/src/stream_compaction/unique_count.cu index 5876363a61b..4c1cf2b2bc3 100644 --- a/cpp/src/stream_compaction/unique_count.cu +++ b/cpp/src/stream_compaction/unique_count.cu @@ -14,7 +14,6 @@ * limitations under the License. */ -#include "cudf/utilities/default_stream.hpp" #include "stream_compaction_common.cuh" #include "stream_compaction_common.hpp" @@ -29,6 +28,7 @@ #include #include #include +#include #include #include From 489985b1fee2734ca8cafdb69271ea18e798e7b5 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 9 Mar 2023 14:34:23 -0800 Subject: [PATCH 12/17] Update java call. --- java/src/main/native/src/ColumnViewJni.cu | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/java/src/main/native/src/ColumnViewJni.cu b/java/src/main/native/src/ColumnViewJni.cu index 7e0b0f9330d..6914d29c8c0 100644 --- a/java/src/main/native/src/ColumnViewJni.cu +++ b/java/src/main/native/src/ColumnViewJni.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -192,7 +193,7 @@ std::unique_ptr lists_distinct_by_key(cudf::lists_column_view cons child.child(0), child.child(1)}}, // input table std::vector{0, 1}, // key columns cudf::duplicate_keep_option::KEEP_LAST, cudf::null_equality::EQUAL, - cudf::nan_equality::ALL_EQUAL, stream) + cudf::nan_equality::ALL_EQUAL, stream, rmm::mr::get_current_device_resource()) ->release(); auto const out_labels = out_columns.front()->view(); From 146afb2b083b96083c5dc8aa3dd3d6f2ec8bee73 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 9 Mar 2023 15:14:13 -0800 Subject: [PATCH 13/17] clang-format. --- java/src/main/native/src/ColumnViewJni.cu | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/java/src/main/native/src/ColumnViewJni.cu b/java/src/main/native/src/ColumnViewJni.cu index 6914d29c8c0..8a2c0b2b411 100644 --- a/java/src/main/native/src/ColumnViewJni.cu +++ b/java/src/main/native/src/ColumnViewJni.cu @@ -188,13 +188,14 @@ std::unique_ptr lists_distinct_by_key(cudf::lists_column_view cons // Use `cudf::duplicate_keep_option::KEEP_LAST` so this will produce the desired behavior when // being called in `create_map` in spark-rapids. // Other options comparing nulls and NaNs are set as all-equal. - auto out_columns = cudf::detail::stable_distinct( - table_view{{column_view{cudf::device_span{labels}}, - child.child(0), child.child(1)}}, // input table - std::vector{0, 1}, // key columns - cudf::duplicate_keep_option::KEEP_LAST, cudf::null_equality::EQUAL, - cudf::nan_equality::ALL_EQUAL, stream, rmm::mr::get_current_device_resource()) - ->release(); + auto out_columns = + cudf::detail::stable_distinct( + table_view{{column_view{cudf::device_span{labels}}, child.child(0), + child.child(1)}}, // input table + std::vector{0, 1}, // key columns + cudf::duplicate_keep_option::KEEP_LAST, cudf::null_equality::EQUAL, + cudf::nan_equality::ALL_EQUAL, stream, rmm::mr::get_current_device_resource()) + ->release(); auto const out_labels = out_columns.front()->view(); // Assemble a structs column of . From 91cb17741bd9ee32b189442a6f5fd2e7425f558e Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 10 Mar 2023 15:36:44 -0500 Subject: [PATCH 14/17] Update cpp/src/stream_compaction/distinct.cu Co-authored-by: David Wendt <45795991+davidwendt@users.noreply.github.com> --- cpp/src/stream_compaction/distinct.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index 83c97521abf..083b1b2eb46 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -26,6 +26,7 @@ #include #include + #include #include #include From 791d873f1feb31391a60619a165d5d4a22fc761b Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 13 Mar 2023 07:21:36 -0700 Subject: [PATCH 15/17] Go back to calling detail::gather and pass the now required parameters. --- cpp/tests/copying/detail_gather_tests.cu | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/cpp/tests/copying/detail_gather_tests.cu b/cpp/tests/copying/detail_gather_tests.cu index d4898113afb..89d015ff275 100644 --- a/cpp/tests/copying/detail_gather_tests.cu +++ b/cpp/tests/copying/detail_gather_tests.cu @@ -46,11 +46,9 @@ TYPED_TEST_SUITE(GatherTest, cudf::test::NumericTypes); TYPED_TEST(GatherTest, GatherDetailDeviceVectorTest) { constexpr cudf::size_type source_size{1000}; - rmm::device_uvector gather_map_vec(source_size, cudf::get_default_stream()); - auto gather_map = std::make_unique(std::move(gather_map_vec)); - thrust::sequence(rmm::exec_policy_nosync(cudf::get_default_stream()), - gather_map->mutable_view().begin(), - gather_map->mutable_view().end()); + rmm::device_uvector gather_map(source_size, cudf::get_default_stream()); + thrust::sequence( + rmm::exec_policy_nosync(cudf::get_default_stream()), gather_map.begin(), gather_map.end()); auto data = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; }); cudf::test::fixed_width_column_wrapper source_column(data, data + source_size); @@ -59,7 +57,13 @@ TYPED_TEST(GatherTest, GatherDetailDeviceVectorTest) // test with device vector iterators { - std::unique_ptr result = cudf::gather(source_table, gather_map->view()); + std::unique_ptr result = + cudf::detail::gather(source_table, + gather_map.begin(), + gather_map.end(), + cudf::out_of_bounds_policy::DONT_CHECK, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); for (auto i = 0; i < source_table.num_columns(); ++i) { CUDF_TEST_EXPECT_COLUMNS_EQUAL(source_table.column(i), result->view().column(i)); @@ -70,7 +74,13 @@ TYPED_TEST(GatherTest, GatherDetailDeviceVectorTest) // test with raw pointers { - std::unique_ptr result = cudf::gather(source_table, gather_map->view()); + std::unique_ptr result = + cudf::detail::gather(source_table, + gather_map.begin(), + gather_map.data() + gather_map.size(), + cudf::out_of_bounds_policy::DONT_CHECK, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); for (auto i = 0; i < source_table.num_columns(); ++i) { CUDF_TEST_EXPECT_COLUMNS_EQUAL(source_table.column(i), result->view().column(i)); From 3a3e8f36969874f00e61fc004e0d2dfe7abe86d0 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 13 Mar 2023 07:34:50 -0700 Subject: [PATCH 16/17] Remove default mr from detail::gather and update all call sites. --- cpp/benchmarks/common/generate_input.cu | 4 +++- cpp/include/cudf/detail/gather.hpp | 28 ++++++++++------------ cpp/src/copying/copy.cu | 6 +++-- cpp/src/rolling/detail/lead_lag_nested.cuh | 6 +++-- cpp/tests/copying/detail_gather_tests.cu | 3 ++- cpp/tests/copying/gather_str_tests.cpp | 15 ++++++++---- 6 files changed, 36 insertions(+), 26 deletions(-) diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu index 2829d14070c..a4650c369ba 100644 --- a/cpp/benchmarks/common/generate_input.cu +++ b/cpp/benchmarks/common/generate_input.cu @@ -32,6 +32,7 @@ #include #include +#include #include #include #include @@ -542,7 +543,8 @@ std::unique_ptr create_random_column(data_profi sample_indices, cudf::out_of_bounds_policy::DONT_CHECK, cudf::detail::negative_index_policy::NOT_ALLOWED, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); return std::move(str_table->release()[0]); } diff --git a/cpp/include/cudf/detail/gather.hpp b/cpp/include/cudf/detail/gather.hpp index 9d61a8de184..034eb6c1282 100644 --- a/cpp/include/cudf/detail/gather.hpp +++ b/cpp/include/cudf/detail/gather.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -61,13 +61,12 @@ enum class negative_index_policy : bool { ALLOWED, NOT_ALLOWED }; * @param[in] mr Device memory resource used to allocate the returned table's device memory * @return Result of the gather */ -std::unique_ptr
gather( - table_view const& source_table, - column_view const& gather_map, - out_of_bounds_policy bounds_policy, - negative_index_policy neg_indices, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
gather(table_view const& source_table, + column_view const& gather_map, + out_of_bounds_policy bounds_policy, + negative_index_policy neg_indices, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::detail::gather(table_view const&,column_view const&,table_view @@ -76,13 +75,12 @@ std::unique_ptr
gather( * * @throws cudf::logic_error if `gather_map` span size is larger than max of `size_type`. */ -std::unique_ptr
gather( - table_view const& source_table, - device_span const gather_map, - out_of_bounds_policy bounds_policy, - negative_index_policy neg_indices, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
gather(table_view const& source_table, + device_span const gather_map, + out_of_bounds_policy bounds_policy, + negative_index_policy neg_indices, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu index 0978cf441d8..9cea2152996 100644 --- a/cpp/src/copying/copy.cu +++ b/cpp/src/copying/copy.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -174,7 +175,8 @@ std::unique_ptr scatter_gather_based_if_else(cudf::column_view const& lh gather_map, out_of_bounds_policy::DONT_CHECK, negative_index_policy::NOT_ALLOWED, - stream); + stream, + rmm::mr::get_current_device_resource()); auto result = cudf::detail::scatter( table_view{std::vector{scatter_src_lhs->get_column(0).view()}}, diff --git a/cpp/src/rolling/detail/lead_lag_nested.cuh b/cpp/src/rolling/detail/lead_lag_nested.cuh index 859ed7e5d53..17b12c74518 100644 --- a/cpp/src/rolling/detail/lead_lag_nested.cuh +++ b/cpp/src/rolling/detail/lead_lag_nested.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,6 +27,7 @@ #include +#include #include #include #include @@ -191,7 +192,8 @@ std::unique_ptr compute_lead_lag_for_nested(aggregation::Kind op, scatter_map, out_of_bounds_policy::DONT_CHECK, cudf::detail::negative_index_policy::NOT_ALLOWED, - stream); + stream, + rmm::mr::get_current_device_resource()); // Scatter defaults into locations where LEAD/LAG computed nulls. auto scattered_results = cudf::detail::scatter( diff --git a/cpp/tests/copying/detail_gather_tests.cu b/cpp/tests/copying/detail_gather_tests.cu index 89d015ff275..aae511413ef 100644 --- a/cpp/tests/copying/detail_gather_tests.cu +++ b/cpp/tests/copying/detail_gather_tests.cu @@ -107,7 +107,8 @@ TYPED_TEST(GatherTest, GatherDetailInvalidIndexTest) gather_map, cudf::out_of_bounds_policy::NULLIFY, cudf::detail::negative_index_policy::NOT_ALLOWED, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); auto expect_data = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i % 2) ? 0 : i; }); diff --git a/cpp/tests/copying/gather_str_tests.cpp b/cpp/tests/copying/gather_str_tests.cpp index 3db2ce399cc..95ab4b25642 100644 --- a/cpp/tests/copying/gather_str_tests.cpp +++ b/cpp/tests/copying/gather_str_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,7 @@ #include #include #include +#include class GatherTestStr : public cudf::test::BaseFixture { }; @@ -87,7 +88,8 @@ TEST_F(GatherTestStr, Gather) gather_map, cudf::out_of_bounds_policy::NULLIFY, cudf::detail::negative_index_policy::NOT_ALLOWED, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); std::vector h_expected; std::vector expected_validity; @@ -118,7 +120,8 @@ TEST_F(GatherTestStr, GatherDontCheckOutOfBounds) gather_map, cudf::out_of_bounds_policy::DONT_CHECK, cudf::detail::negative_index_policy::NOT_ALLOWED, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); std::vector h_expected; for (auto itr = h_map.begin(); itr != h_map.end(); ++itr) { @@ -137,7 +140,8 @@ TEST_F(GatherTestStr, GatherEmptyMapStringsColumn) gather_map, cudf::out_of_bounds_policy::NULLIFY, cudf::detail::negative_index_policy::NOT_ALLOWED, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); cudf::test::expect_column_empty(results->get_column(0).view()); } @@ -151,6 +155,7 @@ TEST_F(GatherTestStr, GatherZeroSizeStringsColumn) gather_map, cudf::out_of_bounds_policy::NULLIFY, cudf::detail::negative_index_policy::NOT_ALLOWED, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, results->get_column(0).view()); } From 3deb09c6274100d077520cb085ae060d5893381a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 13 Mar 2023 14:37:30 -0400 Subject: [PATCH 17/17] Apply suggestions from code review Co-authored-by: David Wendt <45795991+davidwendt@users.noreply.github.com> --- cpp/benchmarks/common/generate_input.cu | 2 +- cpp/src/copying/copy.cu | 2 +- cpp/src/rolling/detail/lead_lag_nested.cuh | 2 +- cpp/tests/copying/gather_str_tests.cpp | 1 + 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu index 085332c697d..545028260b8 100644 --- a/cpp/benchmarks/common/generate_input.cu +++ b/cpp/benchmarks/common/generate_input.cu @@ -31,8 +31,8 @@ #include #include - #include + #include #include #include diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu index 9cea2152996..9ec00612f2f 100644 --- a/cpp/src/copying/copy.cu +++ b/cpp/src/copying/copy.cu @@ -29,8 +29,8 @@ #include #include - #include + #include #include #include diff --git a/cpp/src/rolling/detail/lead_lag_nested.cuh b/cpp/src/rolling/detail/lead_lag_nested.cuh index 17b12c74518..d2fe9fabd1b 100644 --- a/cpp/src/rolling/detail/lead_lag_nested.cuh +++ b/cpp/src/rolling/detail/lead_lag_nested.cuh @@ -26,8 +26,8 @@ #include #include - #include + #include #include #include diff --git a/cpp/tests/copying/gather_str_tests.cpp b/cpp/tests/copying/gather_str_tests.cpp index 95ab4b25642..7810566fbf1 100644 --- a/cpp/tests/copying/gather_str_tests.cpp +++ b/cpp/tests/copying/gather_str_tests.cpp @@ -24,6 +24,7 @@ #include #include #include + #include class GatherTestStr : public cudf::test::BaseFixture {