Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add detail cuco_allocator #14877

Merged
merged 8 commits into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions cpp/include/cudf/detail/cuco_helpers.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <rmm/cuda_stream_view.hpp>
#include <rmm/mr/device/polymorphic_allocator.hpp>

namespace cudf::detail {

/**
* @brief Stream-ordered allocator adaptor used for cuco data structures
*
* The stream-ordered `rmm::mr::polymorphic_allocator` cannot be used in `cuco` directly since the
* later expects a standard C++ `Allocator` interface. This allocator helper provides a simple way
* to handle cuco memory allocation/deallocation with the given `stream` and the rmm default memory
* resource.
*/
class cuco_allocator
ttnghia marked this conversation as resolved.
Show resolved Hide resolved
: public rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<char>> {
/// Default stream-ordered allocator type
using default_allocator = rmm::mr::polymorphic_allocator<char>;
/// The base allocator adaptor type
using base_type = rmm::mr::stream_allocator_adaptor<default_allocator>;

public:
/**
* @brief Constructs the allocator adaptor with the given `stream`
*/
cuco_allocator(rmm::cuda_stream_view stream) : base_type{default_allocator{}, stream} {}
};

} // namespace cudf::detail
8 changes: 3 additions & 5 deletions cpp/include/cudf/detail/hash_reduce_by_row.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,15 +14,14 @@
* limitations under the License.
*/

#include <cudf/hashing/detail/hash_allocator.cuh>
#include <cudf/detail/cuco_helpers.hpp>
#include <cudf/hashing/detail/helper_functions.cuh>
#include <cudf/table/experimental/row_operators.cuh>
#include <cudf/types.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>
#include <rmm/mr/device/polymorphic_allocator.hpp>

#include <thrust/for_each.h>
#include <thrust/iterator/counting_iterator.h>
Expand All @@ -32,9 +31,8 @@

namespace cudf::detail {

using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor<default_allocator<char>>;
using hash_map_type =
cuco::static_map<size_type, size_type, cuda::thread_scope_device, hash_table_allocator_type>;
cuco::static_map<size_type, size_type, cuda::thread_scope_device, cudf::detail::cuco_allocator>;

/**
* @brief The base struct for customized reduction functor to perform reduce-by-key with keys are
Expand Down
11 changes: 5 additions & 6 deletions cpp/include/cudf/detail/join.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -24,7 +24,6 @@
#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_buffer.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/mr/device/polymorphic_allocator.hpp>

#include <cuco/static_multimap.cuh>

Expand All @@ -33,16 +32,16 @@
#include <optional>

// Forward declaration
template <typename T>
class default_allocator;

namespace cudf::experimental::row::equality {
class preprocessed_table;
}

namespace cudf {
namespace detail {

// Forward declaration
class cuco_allocator;

constexpr int DEFAULT_JOIN_CG_SIZE = 2;

enum class join_kind { INNER_JOIN, LEFT_JOIN, FULL_JOIN, LEFT_SEMI_JOIN, LEFT_ANTI_JOIN };
Expand All @@ -62,7 +61,7 @@ struct hash_join {
cuco::static_multimap<hash_value_type,
cudf::size_type,
cuda::thread_scope_device,
rmm::mr::stream_allocator_adaptor<default_allocator<char>>,
cudf::detail::cuco_allocator,
cuco::double_hashing<DEFAULT_JOIN_CG_SIZE, Hasher, Hasher>>;

hash_join() = delete;
Expand Down
62 changes: 0 additions & 62 deletions cpp/include/cudf/hashing/detail/hash_allocator.cuh

This file was deleted.

6 changes: 3 additions & 3 deletions cpp/src/hash/concurrent_unordered_map.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2023, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -20,12 +20,12 @@

#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/hashing/detail/default_hash.cuh>
#include <cudf/hashing/detail/hash_allocator.cuh>
#include <cudf/hashing/detail/helper_functions.cuh>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/mr/device/polymorphic_allocator.hpp>

#include <thrust/pair.h>

Expand Down Expand Up @@ -117,7 +117,7 @@ template <typename Key,
typename Element,
typename Hasher = cudf::hashing::detail::default_hash<Key>,
typename Equality = equal_to<Key>,
typename Allocator = default_allocator<thrust::pair<Key, Element>>>
typename Allocator = rmm::mr::polymorphic_allocator<thrust::pair<Key, Element>>>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this can't use the cuco_allocator because it needs to allocate in units that aren't just raw bytes, but instead based on the Key/Element pair size?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exactly, cuco uses a trait internally to convert the given allocator to the desired type thus the input data type doesn't really matter. concurrent_unordered_map doesn't have this implemented thus we have to pass the proper type to make it work.

class concurrent_unordered_map {
public:
using size_type = size_t;
Expand Down
11 changes: 4 additions & 7 deletions cpp/src/io/json/json_tree.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@
#include "nested_json.hpp"
#include <io/utilities/hostdevice_vector.hpp>

#include <cudf/detail/cuco_helpers.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/scatter.cuh>
#include <cudf/detail/utilities/algorithm.cuh>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/hashing/detail/default_hash.cuh>
#include <cudf/hashing/detail/hash_allocator.cuh>
#include <cudf/hashing/detail/hashing.hpp>
#include <cudf/hashing/detail/helper_functions.cuh>
#include <cudf/utilities/error.hpp>
Expand All @@ -31,7 +31,6 @@
#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>
#include <rmm/mr/device/polymorphic_allocator.hpp>

#include <cub/device/device_radix_sort.cuh>

Expand Down Expand Up @@ -511,7 +510,6 @@ rmm::device_uvector<size_type> hash_node_type_with_field_name(device_span<Symbol
rmm::cuda_stream_view stream)
{
CUDF_FUNC_RANGE();
using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor<default_allocator<char>>;

auto const num_nodes = d_tree.node_categories.size();
auto const num_fields = thrust::count(rmm::exec_policy(stream),
Expand Down Expand Up @@ -555,7 +553,7 @@ rmm::device_uvector<size_type> hash_node_type_with_field_name(device_span<Symbol
cuco::empty_key{empty_node_index_sentinel},
d_equal,
cuco::experimental::linear_probing<1, hasher_type>{d_hasher},
hash_table_allocator_type{default_allocator<char>{}, stream},
cudf::detail::cuco_allocator{stream},
stream.value()};
key_set.insert_if_async(iter,
iter + num_nodes,
Expand Down Expand Up @@ -735,15 +733,14 @@ std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> hash_n
};

constexpr size_type empty_node_index_sentinel = -1;
using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor<default_allocator<char>>;
using hasher_type = decltype(d_hashed_cache);
using hasher_type = decltype(d_hashed_cache);

auto key_set = cuco::experimental::static_set{
cuco::experimental::extent{compute_hash_table_size(num_nodes)},
cuco::empty_key<cudf::size_type>{empty_node_index_sentinel},
d_equal,
cuco::experimental::linear_probing<1, hasher_type>{d_hashed_cache},
hash_table_allocator_type{default_allocator<char>{}, stream},
cudf::detail::cuco_allocator{stream},
stream.value()};

// insert and convert node ids to unique set ids
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/join/hash_join.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -19,6 +19,7 @@
#include <cudf/detail/iterator.cuh>
#include <cudf/detail/join.hpp>
#include <cudf/detail/structs/utilities.hpp>
#include <cudf/hashing/detail/helper_functions.cuh>
#include <cudf/join.hpp>

#include <rmm/cuda_stream_view.hpp>
Expand Down Expand Up @@ -368,7 +369,7 @@ hash_join<Hasher>::hash_join(cudf::table_view const& build,
cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
cuco::empty_value{cudf::detail::JoinNoneValue},
stream.value(),
detail::hash_table_allocator_type{default_allocator<char>{}, stream}},
cudf::detail::cuco_allocator{stream}},
_build{build},
_preprocessed_build{
cudf::experimental::row::equality::preprocessed_table::create(_build, stream)}
Expand Down
13 changes: 4 additions & 9 deletions cpp/src/join/join_common_utils.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,16 +15,13 @@
*/
#pragma once

#include <cudf/detail/cuco_helpers.hpp>
#include <cudf/detail/join.hpp>
#include <cudf/hashing/detail/default_hash.cuh>
#include <cudf/hashing/detail/hash_allocator.cuh>
#include <cudf/hashing/detail/helper_functions.cuh>
#include <cudf/join.hpp>
#include <cudf/table/row_operators.cuh>
#include <cudf/table/table_view.hpp>

#include <rmm/mr/device/polymorphic_allocator.hpp>

#include <cuco/static_map.cuh>
#include <cuco/static_multimap.cuh>

Expand All @@ -43,8 +40,6 @@ using pair_type = cuco::pair<hash_value_type, size_type>;

using hash_type = cuco::murmurhash3_32<hash_value_type>;

using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor<default_allocator<char>>;

using multimap_type = cudf::hash_join::impl_type::map_type;

// Multimap type used for mixed joins. TODO: This is a temporary alias used
Expand All @@ -53,11 +48,11 @@ using multimap_type = cudf::hash_join::impl_type::map_type;
using mixed_multimap_type = cuco::static_multimap<hash_value_type,
size_type,
cuda::thread_scope_device,
hash_table_allocator_type,
cudf::detail::cuco_allocator,
cuco::double_hashing<1, hash_type, hash_type>>;

using semi_map_type = cuco::
static_map<hash_value_type, size_type, cuda::thread_scope_device, hash_table_allocator_type>;
static_map<hash_value_type, size_type, cuda::thread_scope_device, cudf::detail::cuco_allocator>;

using row_hash_legacy =
cudf::row_hasher<cudf::hashing::detail::default_hash, cudf::nullate::DYNAMIC>;
Expand Down
25 changes: 12 additions & 13 deletions cpp/src/join/mixed_join.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -21,6 +21,7 @@
#include <cudf/ast/detail/expression_parser.hpp>
#include <cudf/ast/expressions.hpp>
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/hashing/detail/helper_functions.cuh>
#include <cudf/join.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_device_view.cuh>
Expand Down Expand Up @@ -127,12 +128,11 @@ mixed_join(
auto build_view = table_device_view::create(build, stream);

// Don't use multimap_type because we want a CG size of 1.
mixed_multimap_type hash_table{
compute_hash_table_size(build.num_rows()),
cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
cuco::empty_value{cudf::detail::JoinNoneValue},
stream.value(),
detail::hash_table_allocator_type{default_allocator<char>{}, stream}};
mixed_multimap_type hash_table{compute_hash_table_size(build.num_rows()),
cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
cuco::empty_value{cudf::detail::JoinNoneValue},
stream.value(),
cudf::detail::cuco_allocator{stream}};

// TODO: To add support for nested columns we will need to flatten in many
// places. However, this probably isn't worth adding any time soon since we
Expand Down Expand Up @@ -393,12 +393,11 @@ compute_mixed_join_output_size(table_view const& left_equality,
auto build_view = table_device_view::create(build, stream);

// Don't use multimap_type because we want a CG size of 1.
mixed_multimap_type hash_table{
compute_hash_table_size(build.num_rows()),
cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
cuco::empty_value{cudf::detail::JoinNoneValue},
stream.value(),
detail::hash_table_allocator_type{default_allocator<char>{}, stream}};
mixed_multimap_type hash_table{compute_hash_table_size(build.num_rows()),
cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
cuco::empty_value{cudf::detail::JoinNoneValue},
stream.value(),
cudf::detail::cuco_allocator{stream}};

// TODO: To add support for nested columns we will need to flatten in many
// places. However, this probably isn't worth adding any time soon since we
Expand Down
Loading
Loading