Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-24.04' into rm/pandas_cre
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke committed Jan 30, 2024
2 parents fa85f0c + 57bbe94 commit d3ba9ab
Show file tree
Hide file tree
Showing 18 changed files with 131 additions and 166 deletions.
46 changes: 46 additions & 0 deletions cpp/include/cudf/detail/cuco_helpers.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <rmm/cuda_stream_view.hpp>
#include <rmm/mr/device/polymorphic_allocator.hpp>

namespace cudf::detail {

/**
* @brief Stream-ordered allocator adaptor used for cuco data structures
*
* The stream-ordered `rmm::mr::polymorphic_allocator` cannot be used in `cuco` directly since the
* later expects a standard C++ `Allocator` interface. This allocator helper provides a simple way
* to handle cuco memory allocation/deallocation with the given `stream` and the rmm default memory
* resource.
*/
class cuco_allocator
: public rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<char>> {
/// Default stream-ordered allocator type
using default_allocator = rmm::mr::polymorphic_allocator<char>;
/// The base allocator adaptor type
using base_type = rmm::mr::stream_allocator_adaptor<default_allocator>;

public:
/**
* @brief Constructs the allocator adaptor with the given `stream`
*/
cuco_allocator(rmm::cuda_stream_view stream) : base_type{default_allocator{}, stream} {}
};

} // namespace cudf::detail
8 changes: 3 additions & 5 deletions cpp/include/cudf/detail/hash_reduce_by_row.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,15 +14,14 @@
* limitations under the License.
*/

#include <cudf/hashing/detail/hash_allocator.cuh>
#include <cudf/detail/cuco_helpers.hpp>
#include <cudf/hashing/detail/helper_functions.cuh>
#include <cudf/table/experimental/row_operators.cuh>
#include <cudf/types.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>
#include <rmm/mr/device/polymorphic_allocator.hpp>

#include <thrust/for_each.h>
#include <thrust/iterator/counting_iterator.h>
Expand All @@ -32,9 +31,8 @@

namespace cudf::detail {

using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor<default_allocator<char>>;
using hash_map_type =
cuco::static_map<size_type, size_type, cuda::thread_scope_device, hash_table_allocator_type>;
cuco::static_map<size_type, size_type, cuda::thread_scope_device, cudf::detail::cuco_allocator>;

/**
* @brief The base struct for customized reduction functor to perform reduce-by-key with keys are
Expand Down
11 changes: 5 additions & 6 deletions cpp/include/cudf/detail/join.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -24,7 +24,6 @@
#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_buffer.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/mr/device/polymorphic_allocator.hpp>

#include <cuco/static_multimap.cuh>

Expand All @@ -33,16 +32,16 @@
#include <optional>

// Forward declaration
template <typename T>
class default_allocator;

namespace cudf::experimental::row::equality {
class preprocessed_table;
}

namespace cudf {
namespace detail {

// Forward declaration
class cuco_allocator;

constexpr int DEFAULT_JOIN_CG_SIZE = 2;

enum class join_kind { INNER_JOIN, LEFT_JOIN, FULL_JOIN, LEFT_SEMI_JOIN, LEFT_ANTI_JOIN };
Expand All @@ -62,7 +61,7 @@ struct hash_join {
cuco::static_multimap<hash_value_type,
cudf::size_type,
cuda::thread_scope_device,
rmm::mr::stream_allocator_adaptor<default_allocator<char>>,
cudf::detail::cuco_allocator,
cuco::double_hashing<DEFAULT_JOIN_CG_SIZE, Hasher, Hasher>>;

hash_join() = delete;
Expand Down
62 changes: 0 additions & 62 deletions cpp/include/cudf/hashing/detail/hash_allocator.cuh

This file was deleted.

6 changes: 3 additions & 3 deletions cpp/src/hash/concurrent_unordered_map.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2023, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -20,12 +20,12 @@

#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/hashing/detail/default_hash.cuh>
#include <cudf/hashing/detail/hash_allocator.cuh>
#include <cudf/hashing/detail/helper_functions.cuh>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/mr/device/polymorphic_allocator.hpp>

#include <thrust/pair.h>

Expand Down Expand Up @@ -117,7 +117,7 @@ template <typename Key,
typename Element,
typename Hasher = cudf::hashing::detail::default_hash<Key>,
typename Equality = equal_to<Key>,
typename Allocator = default_allocator<thrust::pair<Key, Element>>>
typename Allocator = rmm::mr::polymorphic_allocator<thrust::pair<Key, Element>>>
class concurrent_unordered_map {
public:
using size_type = size_t;
Expand Down
11 changes: 4 additions & 7 deletions cpp/src/io/json/json_tree.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@
#include "nested_json.hpp"
#include <io/utilities/hostdevice_vector.hpp>

#include <cudf/detail/cuco_helpers.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/scatter.cuh>
#include <cudf/detail/utilities/algorithm.cuh>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/hashing/detail/default_hash.cuh>
#include <cudf/hashing/detail/hash_allocator.cuh>
#include <cudf/hashing/detail/hashing.hpp>
#include <cudf/hashing/detail/helper_functions.cuh>
#include <cudf/utilities/error.hpp>
Expand All @@ -31,7 +31,6 @@
#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>
#include <rmm/mr/device/polymorphic_allocator.hpp>

#include <cub/device/device_radix_sort.cuh>

Expand Down Expand Up @@ -511,7 +510,6 @@ rmm::device_uvector<size_type> hash_node_type_with_field_name(device_span<Symbol
rmm::cuda_stream_view stream)
{
CUDF_FUNC_RANGE();
using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor<default_allocator<char>>;

auto const num_nodes = d_tree.node_categories.size();
auto const num_fields = thrust::count(rmm::exec_policy(stream),
Expand Down Expand Up @@ -555,7 +553,7 @@ rmm::device_uvector<size_type> hash_node_type_with_field_name(device_span<Symbol
cuco::empty_key{empty_node_index_sentinel},
d_equal,
cuco::experimental::linear_probing<1, hasher_type>{d_hasher},
hash_table_allocator_type{default_allocator<char>{}, stream},
cudf::detail::cuco_allocator{stream},
stream.value()};
key_set.insert_if_async(iter,
iter + num_nodes,
Expand Down Expand Up @@ -735,15 +733,14 @@ std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> hash_n
};

constexpr size_type empty_node_index_sentinel = -1;
using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor<default_allocator<char>>;
using hasher_type = decltype(d_hashed_cache);
using hasher_type = decltype(d_hashed_cache);

auto key_set = cuco::experimental::static_set{
cuco::experimental::extent{compute_hash_table_size(num_nodes)},
cuco::empty_key<cudf::size_type>{empty_node_index_sentinel},
d_equal,
cuco::experimental::linear_probing<1, hasher_type>{d_hashed_cache},
hash_table_allocator_type{default_allocator<char>{}, stream},
cudf::detail::cuco_allocator{stream},
stream.value()};

// insert and convert node ids to unique set ids
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/join/hash_join.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -19,6 +19,7 @@
#include <cudf/detail/iterator.cuh>
#include <cudf/detail/join.hpp>
#include <cudf/detail/structs/utilities.hpp>
#include <cudf/hashing/detail/helper_functions.cuh>
#include <cudf/join.hpp>

#include <rmm/cuda_stream_view.hpp>
Expand Down Expand Up @@ -368,7 +369,7 @@ hash_join<Hasher>::hash_join(cudf::table_view const& build,
cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
cuco::empty_value{cudf::detail::JoinNoneValue},
stream.value(),
detail::hash_table_allocator_type{default_allocator<char>{}, stream}},
cudf::detail::cuco_allocator{stream}},
_build{build},
_preprocessed_build{
cudf::experimental::row::equality::preprocessed_table::create(_build, stream)}
Expand Down
13 changes: 4 additions & 9 deletions cpp/src/join/join_common_utils.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,16 +15,13 @@
*/
#pragma once

#include <cudf/detail/cuco_helpers.hpp>
#include <cudf/detail/join.hpp>
#include <cudf/hashing/detail/default_hash.cuh>
#include <cudf/hashing/detail/hash_allocator.cuh>
#include <cudf/hashing/detail/helper_functions.cuh>
#include <cudf/join.hpp>
#include <cudf/table/row_operators.cuh>
#include <cudf/table/table_view.hpp>

#include <rmm/mr/device/polymorphic_allocator.hpp>

#include <cuco/static_map.cuh>
#include <cuco/static_multimap.cuh>

Expand All @@ -43,8 +40,6 @@ using pair_type = cuco::pair<hash_value_type, size_type>;

using hash_type = cuco::murmurhash3_32<hash_value_type>;

using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor<default_allocator<char>>;

using multimap_type = cudf::hash_join::impl_type::map_type;

// Multimap type used for mixed joins. TODO: This is a temporary alias used
Expand All @@ -53,11 +48,11 @@ using multimap_type = cudf::hash_join::impl_type::map_type;
using mixed_multimap_type = cuco::static_multimap<hash_value_type,
size_type,
cuda::thread_scope_device,
hash_table_allocator_type,
cudf::detail::cuco_allocator,
cuco::double_hashing<1, hash_type, hash_type>>;

using semi_map_type = cuco::
static_map<hash_value_type, size_type, cuda::thread_scope_device, hash_table_allocator_type>;
static_map<hash_value_type, size_type, cuda::thread_scope_device, cudf::detail::cuco_allocator>;

using row_hash_legacy =
cudf::row_hasher<cudf::hashing::detail::default_hash, cudf::nullate::DYNAMIC>;
Expand Down
25 changes: 12 additions & 13 deletions cpp/src/join/mixed_join.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -21,6 +21,7 @@
#include <cudf/ast/detail/expression_parser.hpp>
#include <cudf/ast/expressions.hpp>
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/hashing/detail/helper_functions.cuh>
#include <cudf/join.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_device_view.cuh>
Expand Down Expand Up @@ -127,12 +128,11 @@ mixed_join(
auto build_view = table_device_view::create(build, stream);

// Don't use multimap_type because we want a CG size of 1.
mixed_multimap_type hash_table{
compute_hash_table_size(build.num_rows()),
cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
cuco::empty_value{cudf::detail::JoinNoneValue},
stream.value(),
detail::hash_table_allocator_type{default_allocator<char>{}, stream}};
mixed_multimap_type hash_table{compute_hash_table_size(build.num_rows()),
cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
cuco::empty_value{cudf::detail::JoinNoneValue},
stream.value(),
cudf::detail::cuco_allocator{stream}};

// TODO: To add support for nested columns we will need to flatten in many
// places. However, this probably isn't worth adding any time soon since we
Expand Down Expand Up @@ -393,12 +393,11 @@ compute_mixed_join_output_size(table_view const& left_equality,
auto build_view = table_device_view::create(build, stream);

// Don't use multimap_type because we want a CG size of 1.
mixed_multimap_type hash_table{
compute_hash_table_size(build.num_rows()),
cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
cuco::empty_value{cudf::detail::JoinNoneValue},
stream.value(),
detail::hash_table_allocator_type{default_allocator<char>{}, stream}};
mixed_multimap_type hash_table{compute_hash_table_size(build.num_rows()),
cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
cuco::empty_value{cudf::detail::JoinNoneValue},
stream.value(),
cudf::detail::cuco_allocator{stream}};

// TODO: To add support for nested columns we will need to flatten in many
// places. However, this probably isn't worth adding any time soon since we
Expand Down
Loading

0 comments on commit d3ba9ab

Please sign in to comment.