Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move template parameter to function parameter in cudf::detail::left_semi_anti_join #8914

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ add_library(cudf
src/join/cross_join.cu
src/join/hash_join.cu
src/join/join.cu
src/join/join_utils.cu
src/join/semi_join.cu
src/lists/contains.cu
src/lists/combine/concatenate_list_elements.cu
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/join/join.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down
134 changes: 22 additions & 112 deletions cpp/src/join/join_common_utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,17 @@

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/sequence.h>
#include <cub/cub.cuh>

namespace cudf {
namespace detail {

/**
* @brief Computes the trivial left join operation for the case when the
* right table is empty. In this case all the valid indices of the left table
* right table is empty.
*
* In this case all the valid indices of the left table
* are returned with their corresponding right indices being set to
* JoinNoneValue, i.e. -1.
*
Expand All @@ -41,21 +41,12 @@ namespace detail {
*
* @return Join output indices vector pair
*/
inline std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
std::unique_ptr<rmm::device_uvector<size_type>>>
std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
std::unique_ptr<rmm::device_uvector<size_type>>>
get_trivial_left_join_indices(
table_view const& left,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
auto left_indices = std::make_unique<rmm::device_uvector<size_type>>(left.num_rows(), stream, mr);
thrust::sequence(rmm::exec_policy(stream), left_indices->begin(), left_indices->end(), 0);
auto right_indices =
std::make_unique<rmm::device_uvector<size_type>>(left.num_rows(), stream, mr);
thrust::uninitialized_fill(
rmm::exec_policy(stream), right_indices->begin(), right_indices->end(), JoinNoneValue);
return std::make_pair(std::move(left_indices), std::move(right_indices));
}
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

// Convenient alias for a pair of unique pointers to device uvectors.
using VectorPair = std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
Expand Down Expand Up @@ -83,47 +74,11 @@ using VectorPair = std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
*
* @return A pair of vectors containing the concatenated output.
*/
inline VectorPair concatenate_vector_pairs(VectorPair& a,
VectorPair& b,
rmm::cuda_stream_view stream)
{
CUDF_EXPECTS((a.first->size() == a.second->size()),
"Mismatch between sizes of vectors in vector pair");
CUDF_EXPECTS((b.first->size() == b.second->size()),
"Mismatch between sizes of vectors in vector pair");
if (a.first->is_empty()) {
return std::move(b);
} else if (b.first->is_empty()) {
return std::move(a);
}
auto original_size = a.first->size();
a.first->resize(a.first->size() + b.first->size(), stream);
a.second->resize(a.second->size() + b.second->size(), stream);
thrust::copy(
rmm::exec_policy(stream), b.first->begin(), b.first->end(), a.first->begin() + original_size);
thrust::copy(rmm::exec_policy(stream),
b.second->begin(),
b.second->end(),
a.second->begin() + original_size);
return std::move(a);
}

/**
* @brief Device functor to determine if an index is contained in a range.
*/
template <typename T>
struct valid_range {
T start, stop;
__host__ __device__ valid_range(const T begin, const T end) : start(begin), stop(end) {}

__host__ __device__ __forceinline__ bool operator()(const T index)
{
return ((index >= start) && (index < stop));
}
};
VectorPair concatenate_vector_pairs(VectorPair& a, VectorPair& b, rmm::cuda_stream_view stream);

/**
* @brief Creates a table containing the complement of left join indices.
*
* This table has two columns. The first one is filled with JoinNoneValue(-1)
* and the second one contains values from 0 to right_table_row_count - 1
* excluding those found in the right_indices column.
Expand All @@ -136,72 +91,27 @@ struct valid_range {
*
* @return Pair of vectors containing the left join indices complement
*/
inline std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
std::unique_ptr<rmm::device_uvector<size_type>>>
std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
std::unique_ptr<rmm::device_uvector<size_type>>>
get_left_join_indices_complement(std::unique_ptr<rmm::device_uvector<size_type>>& right_indices,
size_type left_table_row_count,
size_type right_table_row_count,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
// Get array of indices that do not appear in right_indices

// Vector allocated for unmatched result
auto right_indices_complement =
std::make_unique<rmm::device_uvector<size_type>>(right_table_row_count, stream);

// If left table is empty in a full join call then all rows of the right table
// should be represented in the joined indices. This is an optimization since
// if left table is empty and full join is called all the elements in
// right_indices will be JoinNoneValue, i.e. -1. This if path should
// produce exactly the same result as the else path but will be faster.
if (left_table_row_count == 0) {
thrust::sequence(rmm::exec_policy(stream),
right_indices_complement->begin(),
right_indices_complement->end(),
0);
} else {
// Assume all the indices in invalid_index_map are invalid
auto invalid_index_map =
std::make_unique<rmm::device_uvector<size_type>>(right_table_row_count, stream);
thrust::uninitialized_fill(
rmm::exec_policy(stream), invalid_index_map->begin(), invalid_index_map->end(), int32_t{1});

// Functor to check for index validity since left joins can create invalid indices
valid_range<size_type> valid(0, right_table_row_count);
rmm::mr::device_memory_resource* mr);

// invalid_index_map[index_ptr[i]] = 0 for i = 0 to right_table_row_count
// Thus specifying that those locations are valid
thrust::scatter_if(rmm::exec_policy(stream),
thrust::make_constant_iterator(0),
thrust::make_constant_iterator(0) + right_indices->size(),
right_indices->begin(), // Index locations
right_indices->begin(), // Stencil - Check if index location is valid
invalid_index_map->begin(), // Output indices
valid); // Stencil Predicate
size_type begin_counter = static_cast<size_type>(0);
size_type end_counter = static_cast<size_type>(right_table_row_count);
/**
* @brief Device functor to determine if an index is contained in a range.
*/
template <typename T>
struct valid_range {
T start, stop;
__host__ __device__ valid_range(const T begin, const T end) : start(begin), stop(end) {}

// Create list of indices that have been marked as invalid
size_type indices_count = thrust::copy_if(rmm::exec_policy(stream),
thrust::make_counting_iterator(begin_counter),
thrust::make_counting_iterator(end_counter),
invalid_index_map->begin(),
right_indices_complement->begin(),
thrust::identity<size_type>()) -
right_indices_complement->begin();
right_indices_complement->resize(indices_count, stream);
__host__ __device__ __forceinline__ bool operator()(const T index)
{
return ((index >= start) && (index < stop));
}

auto left_invalid_indices =
std::make_unique<rmm::device_uvector<size_type>>(right_indices_complement->size(), stream);
thrust::uninitialized_fill(rmm::exec_policy(stream),
left_invalid_indices->begin(),
left_invalid_indices->end(),
JoinNoneValue);

return std::make_pair(std::move(left_invalid_indices), std::move(right_indices_complement));
}
};

/**
* @brief Adds a pair of indices to the shared memory cache
Expand Down
25 changes: 2 additions & 23 deletions cpp/src/join/join_common_utils.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -19,8 +19,6 @@
#include <cudf/table/row_operators.cuh>
#include <cudf/table/table_view.hpp>

#include <rmm/device_uvector.hpp>

#include <hash/concurrent_unordered_multimap.cuh>

#include <limits>
Expand Down Expand Up @@ -49,26 +47,7 @@ using row_equality = cudf::row_equality_comparator<true>;

enum class join_kind { INNER_JOIN, LEFT_JOIN, FULL_JOIN, LEFT_SEMI_JOIN, LEFT_ANTI_JOIN };

inline bool is_trivial_join(table_view const& left, table_view const& right, join_kind join_type)
{
// If there is nothing to join, then send empty table with all columns
if (left.is_empty() || right.is_empty()) { return true; }

// If left join and the left table is empty, return immediately
if ((join_kind::LEFT_JOIN == join_type) && (0 == left.num_rows())) { return true; }

// If Inner Join and either table is empty, return immediately
if ((join_kind::INNER_JOIN == join_type) && ((0 == left.num_rows()) || (0 == right.num_rows()))) {
return true;
}

// If left semi join (contains) and right table is empty,
// return immediately
if ((join_kind::LEFT_SEMI_JOIN == join_type) && (0 == right.num_rows())) { return true; }

return false;
}
bool is_trivial_join(table_view const& left, table_view const& right, join_kind join_type);

} // namespace detail

} // namespace cudf
155 changes: 155 additions & 0 deletions cpp/src/join/join_utils.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <join/join_common_utils.cuh>

#include <rmm/exec_policy.hpp>

#include <thrust/copy.h>
#include <thrust/iterator/constant_iterator.h>
#include <thrust/scatter.h>
#include <thrust/sequence.h>

namespace cudf {
namespace detail {

bool is_trivial_join(table_view const& left, table_view const& right, join_kind join_type)
{
// If there is nothing to join, then send empty table with all columns
if (left.is_empty() || right.is_empty()) { return true; }

// If left join and the left table is empty, return immediately
if ((join_kind::LEFT_JOIN == join_type) && (0 == left.num_rows())) { return true; }

// If Inner Join and either table is empty, return immediately
if ((join_kind::INNER_JOIN == join_type) && ((0 == left.num_rows()) || (0 == right.num_rows()))) {
return true;
}

// If left semi join (contains) and right table is empty,
// return immediately
if ((join_kind::LEFT_SEMI_JOIN == join_type) && (0 == right.num_rows())) { return true; }

return false;
}

std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
std::unique_ptr<rmm::device_uvector<size_type>>>
get_trivial_left_join_indices(table_view const& left,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto left_indices = std::make_unique<rmm::device_uvector<size_type>>(left.num_rows(), stream, mr);
thrust::sequence(rmm::exec_policy(stream), left_indices->begin(), left_indices->end(), 0);
auto right_indices =
std::make_unique<rmm::device_uvector<size_type>>(left.num_rows(), stream, mr);
thrust::uninitialized_fill(
rmm::exec_policy(stream), right_indices->begin(), right_indices->end(), JoinNoneValue);
return std::make_pair(std::move(left_indices), std::move(right_indices));
}

VectorPair concatenate_vector_pairs(VectorPair& a, VectorPair& b, rmm::cuda_stream_view stream)
{
CUDF_EXPECTS((a.first->size() == a.second->size()),
"Mismatch between sizes of vectors in vector pair");
CUDF_EXPECTS((b.first->size() == b.second->size()),
"Mismatch between sizes of vectors in vector pair");
if (a.first->is_empty()) {
return std::move(b);
} else if (b.first->is_empty()) {
return std::move(a);
}
auto original_size = a.first->size();
a.first->resize(a.first->size() + b.first->size(), stream);
a.second->resize(a.second->size() + b.second->size(), stream);
thrust::copy(
rmm::exec_policy(stream), b.first->begin(), b.first->end(), a.first->begin() + original_size);
thrust::copy(rmm::exec_policy(stream),
b.second->begin(),
b.second->end(),
a.second->begin() + original_size);
return std::move(a);
}

std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
std::unique_ptr<rmm::device_uvector<size_type>>>
get_left_join_indices_complement(std::unique_ptr<rmm::device_uvector<size_type>>& right_indices,
size_type left_table_row_count,
size_type right_table_row_count,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
// Get array of indices that do not appear in right_indices

// Vector allocated for unmatched result
auto right_indices_complement =
std::make_unique<rmm::device_uvector<size_type>>(right_table_row_count, stream);

// If left table is empty in a full join call then all rows of the right table
// should be represented in the joined indices. This is an optimization since
// if left table is empty and full join is called all the elements in
// right_indices will be JoinNoneValue, i.e. -1. This if path should
// produce exactly the same result as the else path but will be faster.
if (left_table_row_count == 0) {
thrust::sequence(rmm::exec_policy(stream),
right_indices_complement->begin(),
right_indices_complement->end(),
0);
} else {
// Assume all the indices in invalid_index_map are invalid
auto invalid_index_map =
std::make_unique<rmm::device_uvector<size_type>>(right_table_row_count, stream);
thrust::uninitialized_fill(
rmm::exec_policy(stream), invalid_index_map->begin(), invalid_index_map->end(), int32_t{1});

// Functor to check for index validity since left joins can create invalid indices
valid_range<size_type> valid(0, right_table_row_count);

// invalid_index_map[index_ptr[i]] = 0 for i = 0 to right_table_row_count
// Thus specifying that those locations are valid
thrust::scatter_if(rmm::exec_policy(stream),
thrust::make_constant_iterator(0),
thrust::make_constant_iterator(0) + right_indices->size(),
right_indices->begin(), // Index locations
right_indices->begin(), // Stencil - Check if index location is valid
invalid_index_map->begin(), // Output indices
valid); // Stencil Predicate
size_type begin_counter = static_cast<size_type>(0);
size_type end_counter = static_cast<size_type>(right_table_row_count);

// Create list of indices that have been marked as invalid
size_type indices_count = thrust::copy_if(rmm::exec_policy(stream),
thrust::make_counting_iterator(begin_counter),
thrust::make_counting_iterator(end_counter),
invalid_index_map->begin(),
right_indices_complement->begin(),
thrust::identity<size_type>()) -
right_indices_complement->begin();
right_indices_complement->resize(indices_count, stream);
}

auto left_invalid_indices =
std::make_unique<rmm::device_uvector<size_type>>(right_indices_complement->size(), stream);
thrust::uninitialized_fill(rmm::exec_policy(stream),
left_invalid_indices->begin(),
left_invalid_indices->end(),
JoinNoneValue);

return std::make_pair(std::move(left_invalid_indices), std::move(right_indices_complement));
}

} // namespace detail
} // namespace cudf
Loading