diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index eef8a04067d..00000000000 --- a/Dockerfile +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. - -# An integration test & dev container which builds and installs cuDF from main -ARG CUDA_VERSION=11.0 -ARG CUDA_SHORT_VERSION=${CUDA_VERSION} -ARG LINUX_VERSION=ubuntu18.04 -FROM nvidia/cuda:${CUDA_VERSION}-devel-${LINUX_VERSION} -ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/lib -ENV DEBIAN_FRONTEND=noninteractive - -ARG CC=9 -ARG CXX=9 -RUN apt update -y --fix-missing && \ - apt upgrade -y && \ - apt install -y --no-install-recommends software-properties-common && \ - add-apt-repository ppa:ubuntu-toolchain-r/test && \ - apt update -y --fix-missing && \ - apt install -y --no-install-recommends \ - git \ - gcc-${CC} \ - g++-${CXX} \ - tzdata && \ - apt-get autoremove -y && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -# Install conda -ADD https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh /miniconda.sh -RUN sh /miniconda.sh -b -p /conda && /conda/bin/conda update -n base conda -ENV PATH=${PATH}:/conda/bin -# Enables "source activate conda" -SHELL ["/bin/bash", "-c"] - -# Build cuDF conda env -ARG CUDA_SHORT_VERSION -ARG PYTHON_VERSION -ENV PYTHON_VERSION=$PYTHON_VERSION -ARG NUMBA_VERSION -ENV NUMBA_VERSION=$NUMBA_VERSION -ARG NUMPY_VERSION -ENV NUMPY_VERSION=$NUMPY_VERSION -ARG PANDAS_VERSION -ENV PANDAS_VERSION=$PANDAS_VERSION -ARG PYARROW_VERSION -ENV PYARROW_VERSION=$PYARROW_VERSION -ARG CYTHON_VERSION -ENV CYTHON_VERSION=$CYTHON_VERSION -ARG CMAKE_VERSION -ENV CMAKE_VERSION=$CMAKE_VERSION -ARG CUDF_REPO=https://github.com/rapidsai/cudf -ENV CUDF_REPO=$CUDF_REPO -ARG CUDF_BRANCH=main -ENV CUDF_BRANCH=$CUDF_BRANCH - -# Add everything from the local build context -ADD . /cudf/ - -# Checks if local build context has the source, if not clone it then run a bash script to modify -# the environment file based on versions set in build args -RUN ls -la /cudf -RUN if [ -f /cudf/docker/package_versions.sh ]; \ - then /cudf/docker/package_versions.sh /cudf/conda/environments/cudf_dev_cuda${CUDA_SHORT_VERSION}.yml && \ - conda env create --name cudf --file /cudf/conda/environments/cudf_dev_cuda${CUDA_SHORT_VERSION}.yml ; \ - else rm -rf /cudf && \ - git clone --recurse-submodules -b ${CUDF_BRANCH} ${CUDF_REPO} /cudf && \ - /cudf/docker/package_versions.sh /cudf/conda/environments/cudf_dev_cuda${CUDA_SHORT_VERSION}.yml && \ - conda env create --name cudf --file /cudf/conda/environments/cudf_dev_cuda${CUDA_SHORT_VERSION}.yml ; \ - fi - -ENV CC=/opts/conda/envs/rapids/bin/gcc-${CC} -ENV CXX=/opts/conda/envs/rapids/bin/g++-${CXX} - -# libcudf & cudf build/install -RUN source activate cudf && \ - cd /cudf/ && \ - ./build.sh libcudf cudf diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 208053ef070..e4637408110 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -324,11 +324,16 @@ add_library( src/jit/parser.cpp src/jit/type.cpp src/join/conditional_join.cu - src/join/mixed_join.cu src/join/cross_join.cu src/join/hash_join.cu src/join/join.cu src/join/join_utils.cu + src/join/mixed_join.cu + src/join/mixed_join_kernels.cu + src/join/mixed_join_kernels_semi.cu + src/join/mixed_join_semi.cu + src/join/mixed_join_size_kernels.cu + src/join/mixed_join_size_kernels_semi.cu src/join/semi_join.cu src/lists/contains.cu src/lists/combine/concatenate_list_elements.cu diff --git a/cpp/benchmarks/fixture/benchmark_fixture.hpp b/cpp/benchmarks/fixture/benchmark_fixture.hpp index 83f79bd68c5..ca3a748ccad 100644 --- a/cpp/benchmarks/fixture/benchmark_fixture.hpp +++ b/cpp/benchmarks/fixture/benchmark_fixture.hpp @@ -29,9 +29,11 @@ namespace { // memory resource factory helpers inline auto make_cuda() { return std::make_shared(); } -inline auto make_pool() +inline auto make_pool_instance() { - return rmm::mr::make_owning_wrapper(make_cuda()); + static rmm::mr::cuda_memory_resource cuda_mr; + static rmm::mr::pool_memory_resource pool_mr{&cuda_mr}; + return std::shared_ptr(&pool_mr); } } // namespace @@ -68,9 +70,15 @@ inline auto make_pool() */ class benchmark : public ::benchmark::Fixture { public: + benchmark() : ::benchmark::Fixture() + { + const char* env_iterations = std::getenv("CUDF_BENCHMARK_ITERATIONS"); + if (env_iterations != nullptr) { this->Iterations(std::max(0L, atol(env_iterations))); } + } + void SetUp(const ::benchmark::State& state) override { - mr = make_pool(); + mr = make_pool_instance(); rmm::mr::set_current_device_resource(mr.get()); // set default resource to pool } diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp index 8520cb1bb0d..f6efea5f2bb 100644 --- a/cpp/include/cudf/join.hpp +++ b/cpp/include/cudf/join.hpp @@ -1039,6 +1039,109 @@ mixed_full_join( std::optional>> output_size_data = {}, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Returns an index vector corresponding to all rows in the left tables + * where the columns of the equality table are equal and the predicate + * evaluates to true on the conditional tables. + * + * If the provided predicate returns NULL for a pair of rows (left, right), the + * left row is not included in the output. It is the user's responsiblity to + * choose a suitable compare_nulls value AND use appropriate null-safe + * operators in the expression. + * + * If the provided output size or per-row counts are incorrect, behavior is undefined. + * + * @code{.pseudo} + * left_equality: {{0, 1, 2}} + * right_equality: {{1, 2, 3}} + * left_conditional: {{4, 4, 4}} + * right_conditional: {{3, 4, 5}} + * Expression: Left.Column_0 > Right.Column_0 + * Result: {1} + * @endcode + * + * @throw cudf::logic_error If the binary predicate outputs a non-boolean result. + * @throw cudf::logic_error If the number of rows in left_equality and left_conditional do not + * match. + * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not + * match. + * + * @param left_equality The left table used for the equality join. + * @param right_equality The right table used for the equality join. + * @param left_conditional The left table used for the conditional join. + * @param right_conditional The right table used for the conditional join. + * @param binary_predicate The condition on which to join. + * @param compare_nulls Whether or not null values join to each other or not. + * @param output_size_data An optional pair of values indicating the exact output size and the + * number of matches for each row in the larger of the two input tables, left or right (may be + * precomputed using the corresponding mixed_full_join_size API). + * @param mr Device memory resource used to allocate the returned table and columns' device memory + * + * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct + * the result of performing a mixed full join between the four input tables. + */ +std::unique_ptr> mixed_left_semi_join( + table_view const& left_equality, + table_view const& right_equality, + table_view const& left_conditional, + table_view const& right_conditional, + ast::expression const& binary_predicate, + null_equality compare_nulls = null_equality::EQUAL, + std::optional>> output_size_data = {}, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Returns an index vector corresponding to all rows in the left tables + * for which there is no row in the right tables where the columns of the + * equality table are equal and the predicate evaluates to true on the + * conditional tables. + * + * If the provided predicate returns NULL for a pair of rows (left, right), the + * left row is not included in the output. It is the user's responsiblity to + * choose a suitable compare_nulls value AND use appropriate null-safe + * operators in the expression. + * + * If the provided output size or per-row counts are incorrect, behavior is undefined. + * + * @code{.pseudo} + * left_equality: {{0, 1, 2}} + * right_equality: {{1, 2, 3}} + * left_conditional: {{4, 4, 4}} + * right_conditional: {{3, 4, 5}} + * Expression: Left.Column_0 > Right.Column_0 + * Result: {0, 2} + * @endcode + * + * @throw cudf::logic_error If the binary predicate outputs a non-boolean result. + * @throw cudf::logic_error If the number of rows in left_equality and left_conditional do not + * match. + * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not + * match. + * + * @param left_equality The left table used for the equality join. + * @param right_equality The right table used for the equality join. + * @param left_conditional The left table used for the conditional join. + * @param right_conditional The right table used for the conditional join. + * @param binary_predicate The condition on which to join. + * @param compare_nulls Whether or not null values join to each other or not. + * @param output_size_data An optional pair of values indicating the exact output size and the + * number of matches for each row in the larger of the two input tables, left or right (may be + * precomputed using the corresponding mixed_full_join_size API). + * @param mr Device memory resource used to allocate the returned table and columns' device memory + * + * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct + * the result of performing a mixed full join between the four input tables. + */ +std::unique_ptr> mixed_left_anti_join( + table_view const& left_equality, + table_view const& right_equality, + table_view const& left_conditional, + table_view const& right_conditional, + ast::expression const& binary_predicate, + null_equality compare_nulls = null_equality::EQUAL, + std::optional>> output_size_data = {}, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Returns the exact number of matches (rows) when performing a * mixed inner join between the specified tables where the columns of the @@ -1125,6 +1228,90 @@ std::pair>> mixed_le null_equality compare_nulls = null_equality::EQUAL, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Returns the exact number of matches (rows) when performing a mixed + * left semi join between the specified tables where the columns of the + * equality table are equal and the predicate evaluates to true on the + * conditional tables. + * + * If the provided predicate returns NULL for a pair of rows (left, right), + * that pair is not included in the output. It is the user's responsiblity to + * choose a suitable compare_nulls value AND use appropriate null-safe + * operators in the expression. + * + * @throw cudf::logic_error If the binary predicate outputs a non-boolean result. + * @throw cudf::logic_error If the number of rows in left_equality and left_conditional do not + * match. + * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not + * match. + * + * @param left_equality The left table used for the equality join. + * @param right_equality The right table used for the equality join. + * @param left_conditional The left table used for the conditional join. + * @param right_conditional The right table used for the conditional join. + * @param binary_predicate The condition on which to join. + * @param compare_nulls Whether or not null values join to each other or not. + * @param output_size An optional pair of values indicating the exact output size and the number of + * matches for each row in the larger of the two input tables, left or right (may be precomputed + * using the corresponding mixed_inner_join_size API). + * @param mr Device memory resource used to allocate the returned table and columns' device memory + * + * @return A pair containing the size that would result from performing the + * requested join and the number of matches for each row in one of the two + * tables. Which of the two tables is an implementation detail and should not + * be relied upon, simply passed to the corresponding `mixed_left_join` API as + * is. + */ +std::pair>> mixed_left_semi_join_size( + table_view const& left_equality, + table_view const& right_equality, + table_view const& left_conditional, + table_view const& right_conditional, + ast::expression const& binary_predicate, + null_equality compare_nulls = null_equality::EQUAL, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Returns the exact number of matches (rows) when performing a mixed + * left anti join between the specified tables. + * + * If the provided predicate returns NULL for a pair of rows (left, right), + * that pair is not included in the output. It is the user's responsiblity to + * choose a suitable compare_nulls value AND use appropriate null-safe + * operators in the expression. + * + * @throw cudf::logic_error If the binary predicate outputs a non-boolean result. + * @throw cudf::logic_error If the number of rows in left_equality and left_conditional do not + * match. + * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not + * match. + * + * @param left_equality The left table used for the equality join. + * @param right_equality The right table used for the equality join. + * @param left_conditional The left table used for the conditional join. + * @param right_conditional The right table used for the conditional join. + * @param binary_predicate The condition on which to join. + * @param compare_nulls Whether or not null values join to each other or not. + * @param output_size An optional pair of values indicating the exact output size and the number of + * matches for each row in the larger of the two input tables, left or right (may be precomputed + * using the corresponding mixed_inner_join_size API). + * @param mr Device memory resource used to allocate the returned table and columns' device memory + * + * @return A pair containing the size that would result from performing the + * requested join and the number of matches for each row in one of the two + * tables. Which of the two tables is an implementation detail and should not + * be relied upon, simply passed to the corresponding `mixed_left_join` API as + * is. + */ +std::pair>> mixed_left_anti_join_size( + table_view const& left_equality, + table_view const& right_equality, + table_view const& left_conditional, + table_view const& right_conditional, + ast::expression const& binary_predicate, + null_equality compare_nulls = null_equality::EQUAL, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Returns the exact number of matches (rows) when performing a * conditional inner join between the specified tables where the predicate diff --git a/cpp/src/join/conditional_join.cu b/cpp/src/join/conditional_join.cu index c3dc343dd2d..dc62eeec539 100644 --- a/cpp/src/join/conditional_join.cu +++ b/cpp/src/join/conditional_join.cu @@ -110,7 +110,6 @@ conditional_join(table_view const& left, } else { // Allocate storage for the counter used to get the size of the join output rmm::device_scalar size(0, stream, mr); - CHECK_CUDA(stream.value()); if (has_nulls) { compute_conditional_join_output_size <<>>( @@ -130,7 +129,6 @@ conditional_join(table_view const& left, swap_tables, size.data()); } - CHECK_CUDA(stream.value()); join_size = size.value(stream); } @@ -178,8 +176,6 @@ conditional_join(table_view const& left, swap_tables); } - CHECK_CUDA(stream.value()); - auto join_indices = std::make_pair(std::move(left_indices), std::move(right_indices)); // For full joins, get the indices in the right table that were not joined to @@ -260,7 +256,6 @@ std::size_t compute_conditional_join_output_size(table_view const& left, // Allocate storage for the counter used to get the size of the join output rmm::device_scalar size(0, stream, mr); - CHECK_CUDA(stream.value()); // Determine number of output rows without actually building the output to simply // find what the size of the output will be. @@ -283,8 +278,6 @@ std::size_t compute_conditional_join_output_size(table_view const& left, swap_tables, size.data()); } - CHECK_CUDA(stream.value()); - return size.value(stream); } diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp index 938a85247f8..526c22d1d5c 100644 --- a/cpp/src/join/join_common_utils.hpp +++ b/cpp/src/join/join_common_utils.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,7 @@ #include +#include #include #include @@ -60,6 +61,9 @@ using mixed_multimap_type = cuco::static_multimap>; +using semi_map_type = cuco:: + static_map; + using row_hash = cudf::row_hasher; using row_equality = cudf::row_equality_comparator; diff --git a/cpp/src/join/mixed_join.cu b/cpp/src/join/mixed_join.cu index c609b58132c..0eb0a8de352 100644 --- a/cpp/src/join/mixed_join.cu +++ b/cpp/src/join/mixed_join.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,8 +33,6 @@ #include #include -#include - namespace cudf { namespace detail { @@ -57,6 +55,9 @@ mixed_join( CUDF_EXPECTS(right_conditional.num_rows() == right_equality.num_rows(), "The right conditional and equality tables must have the same number of rows."); + CUDF_EXPECTS((join_type != join_kind::LEFT_SEMI_JOIN) && (join_type != join_kind::LEFT_ANTI_JOIN), + "Left semi and anti joins should use mixed_join_semi."); + auto const right_num_rows{right_conditional.num_rows()}; auto const left_num_rows{left_conditional.num_rows()}; auto const swap_tables = (join_type == join_kind::INNER_JOIN) && (right_num_rows > left_num_rows); @@ -71,25 +72,21 @@ mixed_join( // null index for the right table; in others, we return an empty output. if (right_num_rows == 0) { switch (join_type) { - // Left, left anti, and full all return all the row indices from left - // with a corresponding NULL from the right. + // Left and full joins all return all the row indices from + // left with a corresponding NULL from the right. case join_kind::LEFT_JOIN: - case join_kind::LEFT_ANTI_JOIN: case join_kind::FULL_JOIN: return get_trivial_left_join_indices(left_conditional, stream); - // Inner and left semi joins return empty output because no matches can exist. + // Inner joins return empty output because no matches can exist. case join_kind::INNER_JOIN: - case join_kind::LEFT_SEMI_JOIN: return std::make_pair(std::make_unique>(0, stream, mr), std::make_unique>(0, stream, mr)); default: CUDF_FAIL("Invalid join kind."); break; } } else if (left_num_rows == 0) { switch (join_type) { - // Left, left anti, left semi, and inner joins all return empty sets. + // Left and inner joins all return empty sets. case join_kind::LEFT_JOIN: - case join_kind::LEFT_ANTI_JOIN: case join_kind::INNER_JOIN: - case join_kind::LEFT_SEMI_JOIN: return std::make_pair(std::make_unique>(0, stream, mr), std::make_unique>(0, stream, mr)); // Full joins need to return the trivial complement. @@ -160,7 +157,6 @@ mixed_join( } else { // Allocate storage for the counter used to get the size of the join output rmm::device_scalar size(0, stream, mr); - CHECK_CUDA(stream.value()); matches_per_row = rmm::device_uvector{static_cast(outer_num_rows), stream, mr}; @@ -199,7 +195,6 @@ mixed_join( size.data(), mutable_matches_per_row_span); } - CHECK_CUDA(stream.value()); join_size = size.value(stream); } @@ -229,7 +224,7 @@ mixed_join( auto const& join_output_r = right_indices->data(); if (has_nulls) { - mixed_join + mixed_join <<>>( *left_conditional_view, *right_conditional_view, @@ -244,7 +239,7 @@ mixed_join( join_result_offsets.data(), swap_tables); } else { - mixed_join + mixed_join <<>>( *left_conditional_view, *right_conditional_view, @@ -260,8 +255,6 @@ mixed_join( swap_tables); } - CHECK_CUDA(stream.value()); - auto join_indices = std::make_pair(std::move(left_indices), std::move(right_indices)); // For full joins, get the indices in the right table that were not joined to @@ -292,6 +285,10 @@ compute_mixed_join_output_size(table_view const& left_equality, CUDF_EXPECTS(join_type != join_kind::FULL_JOIN, "Size estimation is not available for full joins."); + CUDF_EXPECTS( + (join_type != join_kind::LEFT_SEMI_JOIN) && (join_type != join_kind::LEFT_ANTI_JOIN), + "Left semi and anti join size estimation should use compute_mixed_join_output_size_semi."); + CUDF_EXPECTS(left_conditional.num_rows() == left_equality.num_rows(), "The left conditional and equality tables must have the same number of rows."); CUDF_EXPECTS(right_conditional.num_rows() == right_equality.num_rows(), @@ -319,14 +316,12 @@ compute_mixed_join_output_size(table_view const& left_equality, // Left, left anti, and full all return all the row indices from left // with a corresponding NULL from the right. case join_kind::LEFT_JOIN: - case join_kind::LEFT_ANTI_JOIN: case join_kind::FULL_JOIN: { thrust::fill(matches_per_row->begin(), matches_per_row->end(), 1); return {left_num_rows, std::move(matches_per_row)}; } // Inner and left semi joins return empty output because no matches can exist. - case join_kind::INNER_JOIN: - case join_kind::LEFT_SEMI_JOIN: { + case join_kind::INNER_JOIN: { thrust::fill(matches_per_row->begin(), matches_per_row->end(), 0); return {0, std::move(matches_per_row)}; } @@ -336,9 +331,7 @@ compute_mixed_join_output_size(table_view const& left_equality, switch (join_type) { // Left, left anti, left semi, and inner joins all return empty sets. case join_kind::LEFT_JOIN: - case join_kind::LEFT_ANTI_JOIN: - case join_kind::INNER_JOIN: - case join_kind::LEFT_SEMI_JOIN: { + case join_kind::INNER_JOIN: { thrust::fill(matches_per_row->begin(), matches_per_row->end(), 0); return {0, std::move(matches_per_row)}; } @@ -397,7 +390,6 @@ compute_mixed_join_output_size(table_view const& left_equality, // Allocate storage for the counter used to get the size of the join output rmm::device_scalar size(0, stream, mr); - CHECK_CUDA(stream.value()); // Determine number of output rows without actually building the output to simply // find what the size of the output will be. @@ -430,7 +422,6 @@ compute_mixed_join_output_size(table_view const& left_equality, size.data(), matches_per_row_span); } - CHECK_CUDA(stream.value()); return {size.value(stream), std::move(matches_per_row)}; } diff --git a/cpp/src/join/mixed_join_common_utils.cuh b/cpp/src/join/mixed_join_common_utils.cuh new file mode 100644 index 00000000000..60c909702ab --- /dev/null +++ b/cpp/src/join/mixed_join_common_utils.cuh @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include +#include + +#include +#include + +#include + +namespace cudf { +namespace detail { + +/** + * @brief Equality comparator for use with cuco map methods that require expression evaluation. + * + * This class just defines the construction of the class and the necessary + * attributes, specifically the equality operator for the non-conditional parts + * of the operator and the evaluator used for the conditional. + */ +template +struct expression_equality { + __device__ expression_equality( + cudf::ast::detail::expression_evaluator const& evaluator, + cudf::ast::detail::IntermediateDataType* thread_intermediate_storage, + bool const swap_tables, + row_equality const& equality_probe) + : evaluator{evaluator}, + thread_intermediate_storage{thread_intermediate_storage}, + swap_tables{swap_tables}, + equality_probe{equality_probe} + { + } + + cudf::ast::detail::IntermediateDataType* thread_intermediate_storage; + cudf::ast::detail::expression_evaluator const& evaluator; + bool const swap_tables; + row_equality const& equality_probe; +}; + +/** + * @brief Equality comparator for cuco::static_map queries. + * + * This equality comparator is designed for use with cuco::static_map's APIs. A + * probe hit indicates that the hashes of the keys are equal, at which point + * this comparator checks whether the keys themselves are equal (using the + * provided equality_probe) and then evaluates the conditional expression + */ +template +struct single_expression_equality : expression_equality { + using expression_equality::expression_equality; + + // The parameters are build/probe rather than left/right because the operator + // is called by cuco's kernels with parameters in this order (note that this + // is an implementation detail that we should eventually stop relying on by + // defining operators with suitable heterogeneous typing). Rather than + // converting to left/right semantics, we can operate directly on build/probe + // until we get to the expression evaluator, which needs to convert back to + // left/right semantics because the conditional expression need not be + // commutative. + // TODO: The input types should really be size_type. + __device__ __forceinline__ bool operator()(hash_value_type const build_row_index, + hash_value_type const probe_row_index) const noexcept + { + auto output_dest = cudf::ast::detail::value_expression_result(); + // Two levels of checks: + // 1. The contents of the columns involved in the equality condition are equal. + // 2. The predicate evaluated on the relevant columns (already encoded in the evaluator) + // evaluates to true. + if (this->equality_probe(probe_row_index, build_row_index)) { + auto const lrow_idx = this->swap_tables ? build_row_index : probe_row_index; + auto const rrow_idx = this->swap_tables ? probe_row_index : build_row_index; + this->evaluator.evaluate(output_dest, + static_cast(lrow_idx), + static_cast(rrow_idx), + 0, + this->thread_intermediate_storage); + return (output_dest.is_valid() && output_dest.value()); + } + return false; + } +}; + +/** + * @brief Equality comparator for cuco::static_multimap queries. + * + * This equality comparator is designed for use with cuco::static_multimap's + * pair* APIs, which will compare equality based on comparing (key, value) + * pairs. In the context of joins, these pairs are of the form + * (row_hash, row_id). A hash probe hit indicates that hash of a probe row's hash is + * equal to the hash of the hash of some row in the multimap, at which point we need an + * equality comparator that will check whether the contents of the rows are + * identical. This comparator does so by verifying key equality (i.e. that + * probe_row_hash == build_row_hash) and then using a row_equality_comparator + * to compare the contents of the row indices that are stored as the payload in + * the hash map. + */ +template +struct pair_expression_equality : public expression_equality { + using expression_equality::expression_equality; + + // The parameters are build/probe rather than left/right because the operator + // is called by cuco's kernels with parameters in this order (note that this + // is an implementation detail that we should eventually stop relying on by + // defining operators with suitable heterogeneous typing). Rather than + // converting to left/right semantics, we can operate directly on build/probe + // until we get to the expression evaluator, which needs to convert back to + // left/right semantics because the conditional expression need not be + // commutative. + __device__ __forceinline__ bool operator()(pair_type const& build_row, + pair_type const& probe_row) const noexcept + { + auto output_dest = cudf::ast::detail::value_expression_result(); + // Three levels of checks: + // 1. Row hashes of the columns involved in the equality condition are equal. + // 2. The contents of the columns involved in the equality condition are equal. + // 3. The predicate evaluated on the relevant columns (already encoded in the evaluator) + // evaluates to true. + if ((probe_row.first == build_row.first) && + this->equality_probe(probe_row.second, build_row.second)) { + auto const lrow_idx = this->swap_tables ? build_row.second : probe_row.second; + auto const rrow_idx = this->swap_tables ? probe_row.second : build_row.second; + this->evaluator.evaluate( + output_dest, lrow_idx, rrow_idx, 0, this->thread_intermediate_storage); + return (output_dest.is_valid() && output_dest.value()); + } + return false; + } +}; + +} // namespace detail + +} // namespace cudf diff --git a/cpp/src/join/mixed_join_kernels.cu b/cpp/src/join/mixed_join_kernels.cu new file mode 100644 index 00000000000..5638f0ddd38 --- /dev/null +++ b/cpp/src/join/mixed_join_kernels.cu @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include + +namespace cudf { +namespace detail { +namespace cg = cooperative_groups; + +template +__global__ void mixed_join(table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::mixed_multimap_type::device_view hash_table_view, + size_type* join_output_l, + size_type* join_output_r, + cudf::ast::detail::expression_device_view device_expression_data, + cudf::size_type const* join_result_offsets, + bool const swap_tables) +{ + // Normally the casting of a shared memory array is used to create multiple + // arrays of different types from the shared memory buffer, but here it is + // used to circumvent conflicts between arrays of different types between + // different template instantiations due to the extern specifier. + extern __shared__ char raw_intermediate_storage[]; + cudf::ast::detail::IntermediateDataType* intermediate_storage = + reinterpret_cast*>(raw_intermediate_storage); + auto thread_intermediate_storage = + &intermediate_storage[threadIdx.x * device_expression_data.num_intermediates]; + + cudf::size_type const left_num_rows = left_table.num_rows(); + cudf::size_type const right_num_rows = right_table.num_rows(); + auto const outer_num_rows = (swap_tables ? right_num_rows : left_num_rows); + + cudf::size_type outer_row_index = threadIdx.x + blockIdx.x * block_size; + + auto evaluator = cudf::ast::detail::expression_evaluator( + left_table, right_table, device_expression_data); + + row_hash hash_probe{nullate::DYNAMIC{has_nulls}, probe}; + auto const empty_key_sentinel = hash_table_view.get_empty_key_sentinel(); + make_pair_function pair_func{hash_probe, empty_key_sentinel}; + + if (outer_row_index < outer_num_rows) { + // Figure out the number of elements for this key. + cg::thread_block_tile<1> this_thread = cg::this_thread(); + // Figure out the number of elements for this key. + auto query_pair = pair_func(outer_row_index); + auto equality = pair_expression_equality{ + evaluator, thread_intermediate_storage, swap_tables, equality_probe}; + + auto probe_key_begin = thrust::make_discard_iterator(); + auto probe_value_begin = swap_tables ? join_output_r + join_result_offsets[outer_row_index] + : join_output_l + join_result_offsets[outer_row_index]; + auto contained_key_begin = thrust::make_discard_iterator(); + auto contained_value_begin = swap_tables ? join_output_l + join_result_offsets[outer_row_index] + : join_output_r + join_result_offsets[outer_row_index]; + + if (join_type == join_kind::LEFT_JOIN || join_type == join_kind::FULL_JOIN) { + hash_table_view.pair_retrieve_outer(this_thread, + query_pair, + probe_key_begin, + probe_value_begin, + contained_key_begin, + contained_value_begin, + equality); + } else { + hash_table_view.pair_retrieve(this_thread, + query_pair, + probe_key_begin, + probe_value_begin, + contained_key_begin, + contained_value_begin, + equality); + } + } +} + +template __global__ void mixed_join( + table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::mixed_multimap_type::device_view hash_table_view, + size_type* join_output_l, + size_type* join_output_r, + cudf::ast::detail::expression_device_view device_expression_data, + cudf::size_type const* join_result_offsets, + bool const swap_tables); + +template __global__ void mixed_join( + table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::mixed_multimap_type::device_view hash_table_view, + size_type* join_output_l, + size_type* join_output_r, + cudf::ast::detail::expression_device_view device_expression_data, + cudf::size_type const* join_result_offsets, + bool const swap_tables); + +} // namespace detail + +} // namespace cudf diff --git a/cpp/src/join/mixed_join_kernels.cuh b/cpp/src/join/mixed_join_kernels.cuh index 9812d4c4b7d..18d5e22fd1c 100644 --- a/cpp/src/join/mixed_join_kernels.cuh +++ b/cpp/src/join/mixed_join_kernels.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,93 +16,15 @@ #pragma once -#include #include +#include -#include #include -#include #include #include -#include - -#include - -#include -#include -#include -#include -#include namespace cudf { namespace detail { -namespace cg = cooperative_groups; - -/** - * @brief Device functor to determine if two pairs are identical. - * - * This equality comparator is designed for use with cuco::static_multimap's - * pair* APIs, which will compare equality based on comparing (key, value) - * pairs. In the context of joins, these pairs are of the form - * (row_hash, row_id). A hash probe hit indicates that hash of a probe row's hash is - * equal to the hash of the hash of some row in the multimap, at which point we need an - * equality comparator that will check whether the contents of the rows are - * identical. This comparator does so by verifying key equality (i.e. that - * probe_row_hash == build_row_hash) and then using a row_equality_comparator - * to compare the contents of the row indices that are stored as the payload in - * the hash map. - * - * This particular comparator is a specialized version of the pair_equality used in hash joins. This - * version also checks the expression_evaluator. - */ -template -class pair_expression_equality { - public: - __device__ pair_expression_equality( - cudf::ast::detail::expression_evaluator const& evaluator, - cudf::ast::detail::IntermediateDataType* thread_intermediate_storage, - bool const swap_tables, - row_equality const& equality_probe) - : evaluator{evaluator}, - thread_intermediate_storage{thread_intermediate_storage}, - swap_tables{swap_tables}, - equality_probe{equality_probe} - { - } - - // The parameters are build/probe rather than left/right because the operator - // is called by cuco's kernels with parameters in this order (note that this - // is an implementation detail that we should eventually stop relying on by - // defining operators with suitable heterogeneous typing). Rather than - // converting to left/right semantics, we can operate directly on build/probe - // until we get to the expression evaluator, which needs to convert back to - // left/right semantics because the conditional expression need not be - // commutative. - __device__ __forceinline__ bool operator()(const pair_type& build_row, - const pair_type& probe_row) const noexcept - { - auto output_dest = cudf::ast::detail::value_expression_result(); - // Three levels of checks: - // 1. Row hashes of the columns involved in the equality condition are equal. - // 2. The contents of the columns involved in the equality condition are equal. - // 3. The predicate evaluated on the relevant columns (already encoded in the evaluator) - // evaluates to true. - if ((probe_row.first == build_row.first) && - equality_probe(probe_row.second, build_row.second)) { - auto const lrow_idx = swap_tables ? build_row.second : probe_row.second; - auto const rrow_idx = swap_tables ? probe_row.second : build_row.second; - evaluator.evaluate(output_dest, lrow_idx, rrow_idx, 0, thread_intermediate_storage); - return (output_dest.is_valid() && output_dest.value()); - } - return false; - } - - private: - cudf::ast::detail::IntermediateDataType* thread_intermediate_storage; - cudf::ast::detail::expression_evaluator const& evaluator; - bool const swap_tables; - row_equality const& equality_probe; -}; /** * @brief Computes the output size of joining the left table to the right table. @@ -146,63 +68,7 @@ __global__ void compute_mixed_join_output_size( ast::detail::expression_device_view device_expression_data, bool const swap_tables, std::size_t* output_size, - cudf::device_span matches_per_row) -{ - // The (required) extern storage of the shared memory array leads to - // conflicting declarations between different templates. The easiest - // workaround is to declare an arbitrary (here char) array type then cast it - // after the fact to the appropriate type. - extern __shared__ char raw_intermediate_storage[]; - cudf::ast::detail::IntermediateDataType* intermediate_storage = - reinterpret_cast*>(raw_intermediate_storage); - auto thread_intermediate_storage = - intermediate_storage + (threadIdx.x * device_expression_data.num_intermediates); - - std::size_t thread_counter{0}; - cudf::size_type const start_idx = threadIdx.x + blockIdx.x * block_size; - cudf::size_type const stride = block_size * gridDim.x; - cudf::size_type const left_num_rows = left_table.num_rows(); - cudf::size_type const right_num_rows = right_table.num_rows(); - auto const outer_num_rows = (swap_tables ? right_num_rows : left_num_rows); - - auto evaluator = cudf::ast::detail::expression_evaluator( - left_table, right_table, device_expression_data); - - // TODO: The hash join code assumes that nulls exist here, so I'm doing the - // same but at some point we may want to benchmark that. - row_hash hash_probe{nullate::DYNAMIC{has_nulls}, probe}; - auto const empty_key_sentinel = hash_table_view.get_empty_key_sentinel(); - make_pair_function pair_func{hash_probe, empty_key_sentinel}; - - for (cudf::size_type outer_row_index = start_idx; outer_row_index < outer_num_rows; - outer_row_index += stride) { - // Figure out the number of elements for this key. - cg::thread_block_tile<1> this_thread = cg::this_thread(); - auto query_pair = pair_func(outer_row_index); - // TODO: Address asymmetry in operator. - auto count_equality = pair_expression_equality{ - evaluator, thread_intermediate_storage, swap_tables, equality_probe}; - // TODO: This entire kernel probably won't work for left anti joins since I - // need to use a normal map (not a multimap), so this condition is probably - // overspecified at the moment. - if (join_type == join_kind::LEFT_JOIN || join_type == join_kind::LEFT_ANTI_JOIN || - join_type == join_kind::FULL_JOIN) { - matches_per_row[outer_row_index] = - hash_table_view.pair_count_outer(this_thread, query_pair, count_equality); - } else { - matches_per_row[outer_row_index] = - hash_table_view.pair_count(this_thread, query_pair, count_equality); - } - thread_counter += matches_per_row[outer_row_index]; - } - - using BlockReduce = cub::BlockReduce; - __shared__ typename BlockReduce::TempStorage temp_storage; - std::size_t block_counter = BlockReduce(temp_storage).Sum(thread_counter); - - // Add block counter to global counter - if (threadIdx.x == 0) atomicAdd(output_size, block_counter); -} + cudf::device_span matches_per_row); /** * @brief Performs a join using the combination of a hash lookup to identify @@ -215,7 +81,6 @@ __global__ void compute_mixed_join_output_size( * between probe and build rows. * * @tparam block_size The number of threads per block for this kernel - * @tparam output_cache_size The side of the shared memory buffer to cache join * @tparam has_nulls Whether or not the inputs may contain nulls. * * @param[in] left_table The left table @@ -235,11 +100,7 @@ __global__ void compute_mixed_join_output_size( * @param[in] swap_tables If true, the kernel was launched with one thread per right row and * the kernel needs to internally loop over left rows. Otherwise, loop over right rows. */ -template +template __global__ void mixed_join(table_device_view left_table, table_device_view right_table, table_device_view probe, @@ -247,75 +108,11 @@ __global__ void mixed_join(table_device_view left_table, row_equality const equality_probe, join_kind const join_type, cudf::detail::mixed_multimap_type::device_view hash_table_view, - OutputIt1 join_output_l, - OutputIt2 join_output_r, + size_type* join_output_l, + size_type* join_output_r, cudf::ast::detail::expression_device_view device_expression_data, cudf::size_type const* join_result_offsets, - bool const swap_tables) -{ - // Normally the casting of a shared memory array is used to create multiple - // arrays of different types from the shared memory buffer, but here it is - // used to circumvent conflicts between arrays of different types between - // different template instantiations due to the extern specifier. - extern __shared__ char raw_intermediate_storage[]; - cudf::ast::detail::IntermediateDataType* intermediate_storage = - reinterpret_cast*>(raw_intermediate_storage); - auto thread_intermediate_storage = - &intermediate_storage[threadIdx.x * device_expression_data.num_intermediates]; - - cudf::size_type const left_num_rows = left_table.num_rows(); - cudf::size_type const right_num_rows = right_table.num_rows(); - auto const outer_num_rows = (swap_tables ? right_num_rows : left_num_rows); - - cudf::size_type outer_row_index = threadIdx.x + blockIdx.x * block_size; - - auto evaluator = cudf::ast::detail::expression_evaluator( - left_table, right_table, device_expression_data); - - // TODO: The hash join code assumes that nulls exist here, so I'm doing the - // same but at some point we may want to benchmark that. - row_hash hash_probe{nullate::DYNAMIC{has_nulls}, probe}; - auto const empty_key_sentinel = hash_table_view.get_empty_key_sentinel(); - make_pair_function pair_func{hash_probe, empty_key_sentinel}; - - if (outer_row_index < outer_num_rows) { - // Figure out the number of elements for this key. - cg::thread_block_tile<1> this_thread = cg::this_thread(); - // Figure out the number of elements for this key. - auto query_pair = pair_func(outer_row_index); - auto equality = pair_expression_equality{ - evaluator, thread_intermediate_storage, swap_tables, equality_probe}; - - auto probe_key_begin = thrust::make_discard_iterator(); - auto probe_value_begin = swap_tables ? join_output_r + join_result_offsets[outer_row_index] - : join_output_l + join_result_offsets[outer_row_index]; - auto contained_key_begin = thrust::make_discard_iterator(); - auto contained_value_begin = swap_tables ? join_output_l + join_result_offsets[outer_row_index] - : join_output_r + join_result_offsets[outer_row_index]; - - // TODO: This entire kernel probably won't work for left anti joins since I - // need to use a normal map (not a multimap), so this condition is probably - // overspecified at the moment. - if (join_type == join_kind::LEFT_JOIN || join_type == join_kind::LEFT_ANTI_JOIN || - join_type == join_kind::FULL_JOIN) { - hash_table_view.pair_retrieve_outer(this_thread, - query_pair, - probe_key_begin, - probe_value_begin, - contained_key_begin, - contained_value_begin, - equality); - } else { - hash_table_view.pair_retrieve(this_thread, - query_pair, - probe_key_begin, - probe_value_begin, - contained_key_begin, - contained_value_begin, - equality); - } - } -} + bool const swap_tables); } // namespace detail diff --git a/cpp/src/join/mixed_join_kernels_semi.cu b/cpp/src/join/mixed_join_kernels_semi.cu new file mode 100644 index 00000000000..c8cfc9998f0 --- /dev/null +++ b/cpp/src/join/mixed_join_kernels_semi.cu @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +namespace cudf { +namespace detail { + +namespace cg = cooperative_groups; + +template +__global__ void mixed_join_semi(table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::semi_map_type::device_view hash_table_view, + size_type* join_output_l, + cudf::ast::detail::expression_device_view device_expression_data, + cudf::size_type const* join_result_offsets, + bool const swap_tables) +{ + // Normally the casting of a shared memory array is used to create multiple + // arrays of different types from the shared memory buffer, but here it is + // used to circumvent conflicts between arrays of different types between + // different template instantiations due to the extern specifier. + extern __shared__ char raw_intermediate_storage[]; + cudf::ast::detail::IntermediateDataType* intermediate_storage = + reinterpret_cast*>(raw_intermediate_storage); + auto thread_intermediate_storage = + &intermediate_storage[threadIdx.x * device_expression_data.num_intermediates]; + + cudf::size_type const left_num_rows = left_table.num_rows(); + cudf::size_type const right_num_rows = right_table.num_rows(); + auto const outer_num_rows = (swap_tables ? right_num_rows : left_num_rows); + + cudf::size_type outer_row_index = threadIdx.x + blockIdx.x * block_size; + + auto evaluator = cudf::ast::detail::expression_evaluator( + left_table, right_table, device_expression_data); + + row_hash hash_probe{nullate::DYNAMIC{has_nulls}, probe}; + + if (outer_row_index < outer_num_rows) { + // Figure out the number of elements for this key. + auto equality = single_expression_equality{ + evaluator, thread_intermediate_storage, swap_tables, equality_probe}; + + if ((join_type == join_kind::LEFT_ANTI_JOIN) != + (hash_table_view.contains(outer_row_index, hash_probe, equality))) { + *(join_output_l + join_result_offsets[outer_row_index]) = outer_row_index; + } + } +} + +template __global__ void mixed_join_semi( + table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::semi_map_type::device_view hash_table_view, + size_type* join_output_l, + cudf::ast::detail::expression_device_view device_expression_data, + cudf::size_type const* join_result_offsets, + bool const swap_tables); + +template __global__ void mixed_join_semi( + table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::semi_map_type::device_view hash_table_view, + size_type* join_output_l, + cudf::ast::detail::expression_device_view device_expression_data, + cudf::size_type const* join_result_offsets, + bool const swap_tables); + +} // namespace detail + +} // namespace cudf diff --git a/cpp/src/join/mixed_join_kernels_semi.cuh b/cpp/src/join/mixed_join_kernels_semi.cuh new file mode 100644 index 00000000000..0a590f5b09a --- /dev/null +++ b/cpp/src/join/mixed_join_kernels_semi.cuh @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include +#include +#include + +namespace cudf { +namespace detail { + +/** + * @brief Computes the output size of joining the left table to the right table for semi/anti joins. + * + * This method probes the hash table with each row in the probe table using a + * custom equality comparator that also checks that the conditional expression + * evaluates to true between the left/right tables when a match is found + * between probe and build rows. + * + * @tparam block_size The number of threads per block for this kernel + * @tparam has_nulls Whether or not the inputs may contain nulls. + * + * @param[in] left_table The left table + * @param[in] right_table The right table + * @param[in] probe The table with which to probe the hash table for matches. + * @param[in] build The table with which the hash table was built. + * @param[in] equality_probe The equality comparator used when probing the hash table. + * @param[in] join_type The type of join to be performed + * @param[in] hash_table_view The hash table built from `build`. + * @param[in] device_expression_data Container of device data required to evaluate the desired + * expression. + * @param[in] swap_tables If true, the kernel was launched with one thread per right row and + * the kernel needs to internally loop over left rows. Otherwise, loop over right rows. + * @param[out] output_size The resulting output size + * @param[out] matches_per_row The number of matches in one pair of + * equality/conditional tables for each row in the other pair of tables. If + * swap_tables is true, matches_per_row corresponds to the right_table, + * otherwise it corresponds to the left_table. Note that corresponding swap of + * left/right tables to determine which is the build table and which is the + * probe table has already happened on the host. + */ +template +__global__ void compute_mixed_join_output_size_semi( + table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::semi_map_type::device_view hash_table_view, + ast::detail::expression_device_view device_expression_data, + bool const swap_tables, + std::size_t* output_size, + cudf::device_span matches_per_row); + +/** + * @brief Performs a semi/anti join using the combination of a hash lookup to + * identify equal rows between one pair of tables and the evaluation of an + * expression containing an arbitrary expression. + * + * This method probes the hash table with each row in the probe table using a + * custom equality comparator that also checks that the conditional expression + * evaluates to true between the left/right tables when a match is found + * between probe and build rows. + * + * @tparam block_size The number of threads per block for this kernel + * @tparam has_nulls Whether or not the inputs may contain nulls. + * + * @param[in] left_table The left table + * @param[in] right_table The right table + * @param[in] probe The table with which to probe the hash table for matches. + * @param[in] build The table with which the hash table was built. + * @param[in] equality_probe The equality comparator used when probing the hash table. + * @param[in] join_type The type of join to be performed + * @param[in] hash_table_view The hash table built from `build`. + * @param[out] join_output_l The left result of the join operation + * @param[in] device_expression_data Container of device data required to evaluate the desired + * expression. + * @param[in] join_result_offsets The starting indices in join_output[l|r] + * where the matches for each row begin. Equivalent to a prefix sum of + * matches_per_row. + * @param[in] swap_tables If true, the kernel was launched with one thread per right row and + * the kernel needs to internally loop over left rows. Otherwise, loop over right rows. + */ +template +__global__ void mixed_join_semi(table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::semi_map_type::device_view hash_table_view, + size_type* join_output_l, + cudf::ast::detail::expression_device_view device_expression_data, + cudf::size_type const* join_result_offsets, + bool const swap_tables); + +} // namespace detail + +} // namespace cudf diff --git a/cpp/src/join/mixed_join_semi.cu b/cpp/src/join/mixed_join_semi.cu new file mode 100644 index 00000000000..f38e653c4a6 --- /dev/null +++ b/cpp/src/join/mixed_join_semi.cu @@ -0,0 +1,569 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +namespace cudf { +namespace detail { + +namespace { +/** + * @brief Device functor to create a pair of hash value and index for a given row. + */ +struct make_pair_function_semi { + __device__ __forceinline__ cudf::detail::pair_type operator()(size_type i) const noexcept + { + // The value is irrelevant since we only ever use the hash map to check for + // membership of a particular row index. + return cuco::make_pair(i, 0); + } +}; + +/** + * @brief Equality comparator that composes two row_equality comparators. + */ +class double_row_equality { + public: + double_row_equality(row_equality equality_comparator, row_equality conditional_comparator) + : _equality_comparator{equality_comparator}, _conditional_comparator{conditional_comparator} + { + } + + __device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept + { + return _equality_comparator(lhs_row_index, rhs_row_index) && + _conditional_comparator(lhs_row_index, rhs_row_index); + } + + private: + row_equality _equality_comparator; + row_equality _conditional_comparator; +}; + +} // namespace + +std::unique_ptr> mixed_join_semi( + table_view const& left_equality, + table_view const& right_equality, + table_view const& left_conditional, + table_view const& right_conditional, + ast::expression const& binary_predicate, + null_equality compare_nulls, + join_kind join_type, + std::optional>> output_size_data, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_EXPECTS((join_type != join_kind::INNER_JOIN) && (join_type != join_kind::LEFT_JOIN) && + (join_type != join_kind::FULL_JOIN), + "Inner, left, and full joins should use mixed_join."); + + CUDF_EXPECTS(left_conditional.num_rows() == left_equality.num_rows(), + "The left conditional and equality tables must have the same number of rows."); + CUDF_EXPECTS(right_conditional.num_rows() == right_equality.num_rows(), + "The right conditional and equality tables must have the same number of rows."); + + auto const right_num_rows{right_conditional.num_rows()}; + auto const left_num_rows{left_conditional.num_rows()}; + auto const swap_tables = (join_type == join_kind::INNER_JOIN) && (right_num_rows > left_num_rows); + + // The "outer" table is the larger of the two tables. The kernels are + // launched with one thread per row of the outer table, which also means that + // it is the probe table for the hash + auto const outer_num_rows{swap_tables ? right_num_rows : left_num_rows}; + + // We can immediately filter out cases where the right table is empty. In + // some cases, we return all the rows of the left table with a corresponding + // null index for the right table; in others, we return an empty output. + if (right_num_rows == 0) { + switch (join_type) { + // Anti and semi return all the row indices from left + // with a corresponding NULL from the right. + case join_kind::LEFT_ANTI_JOIN: + return get_trivial_left_join_indices(left_conditional, stream).first; + // Inner and left semi joins return empty output because no matches can exist. + case join_kind::LEFT_SEMI_JOIN: + return std::make_unique>(0, stream, mr); + default: CUDF_FAIL("Invalid join kind."); break; + } + } else if (left_num_rows == 0) { + switch (join_type) { + // Anti and semi joins both return empty sets. + case join_kind::LEFT_ANTI_JOIN: + case join_kind::LEFT_SEMI_JOIN: + return std::make_unique>(0, stream, mr); + default: CUDF_FAIL("Invalid join kind."); break; + } + } + + // If evaluating the expression may produce null outputs we create a nullable + // output column and follow the null-supporting expression evaluation code + // path. + auto const has_nulls = + cudf::has_nulls(left_equality) || cudf::has_nulls(right_equality) || + binary_predicate.may_evaluate_null(left_conditional, right_conditional, stream); + + auto const parser = ast::detail::expression_parser{ + binary_predicate, left_conditional, right_conditional, has_nulls, stream, mr}; + CUDF_EXPECTS(parser.output_type().id() == type_id::BOOL8, + "The expression must produce a boolean output."); + + // TODO: The non-conditional join impls start with a dictionary matching, + // figure out what that is and what it's needed for (and if conditional joins + // need to do the same). + auto& probe = swap_tables ? right_equality : left_equality; + auto& build = swap_tables ? left_equality : right_equality; + auto probe_view = table_device_view::create(probe, stream); + auto build_view = table_device_view::create(build, stream); + auto left_conditional_view = table_device_view::create(left_conditional, stream); + auto right_conditional_view = table_device_view::create(right_conditional, stream); + auto& build_conditional_view = swap_tables ? left_conditional_view : right_conditional_view; + row_equality equality_probe{ + cudf::nullate::DYNAMIC{has_nulls}, *probe_view, *build_view, compare_nulls}; + + semi_map_type hash_table{compute_hash_table_size(build.num_rows()), + std::numeric_limits::max(), + cudf::detail::JoinNoneValue, + detail::hash_table_allocator_type{default_allocator{}, stream}, + stream.value()}; + + // Create hash table containing all keys found in right table + // TODO: To add support for nested columns we will need to flatten in many + // places. However, this probably isn't worth adding any time soon since we + // won't be able to support AST conditions for those types anyway. + auto const build_nulls = cudf::nullate::DYNAMIC{cudf::has_nulls(build)}; + row_hash const hash_build{build_nulls, *build_view}; + // Since we may see multiple rows that are identical in the equality tables + // but differ in the conditional tables, the equality comparator used for + // insertion must account for both sets of tables. An alternative solution + // would be to use a multimap, but that solution would store duplicates where + // equality and conditional rows are equal, so this approach is preferable. + // One way to make this solution even more efficient would be to only include + // the columns of the conditional table that are used by the expression, but + // that requires additional plumbing through the AST machinery and is out of + // scope for now. + row_equality equality_build_equality{build_nulls, *build_view, *build_view, compare_nulls}; + row_equality equality_build_conditional{ + build_nulls, *build_conditional_view, *build_conditional_view, compare_nulls}; + double_row_equality equality_build{equality_build_equality, equality_build_conditional}; + make_pair_function_semi pair_func_build{}; + + auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func_build); + + // skip rows that are null here. + if ((compare_nulls == null_equality::EQUAL) or (not nullable(build))) { + hash_table.insert(iter, iter + right_num_rows, hash_build, equality_build, stream.value()); + } else { + thrust::counting_iterator stencil(0); + auto const [row_bitmask, _] = cudf::detail::bitmask_and(build, stream); + row_is_valid pred{static_cast(row_bitmask.data())}; + + // insert valid rows + hash_table.insert_if( + iter, iter + right_num_rows, stencil, pred, hash_build, equality_build, stream.value()); + } + + auto hash_table_view = hash_table.get_device_view(); + + // For inner joins we support optimizing the join by launching one thread for + // whichever table is larger rather than always using the left table. + detail::grid_1d const config(outer_num_rows, DEFAULT_JOIN_BLOCK_SIZE); + auto const shmem_size_per_block = parser.shmem_per_thread * config.num_threads_per_block; + join_kind const kernel_join_type = + join_type == join_kind::FULL_JOIN ? join_kind::LEFT_JOIN : join_type; + + // If the join size data was not provided as an input, compute it here. + std::size_t join_size; + // Using an optional because we only need to allocate a new vector if one was + // not passed as input, and rmm::device_uvector is not default constructible + std::optional> matches_per_row{}; + device_span matches_per_row_span{}; + + if (output_size_data.has_value()) { + join_size = output_size_data->first; + matches_per_row_span = output_size_data->second; + } else { + // Allocate storage for the counter used to get the size of the join output + rmm::device_scalar size(0, stream, mr); + + matches_per_row = + rmm::device_uvector{static_cast(outer_num_rows), stream, mr}; + // Note that the view goes out of scope after this else statement, but the + // data owned by matches_per_row stays alive so the data pointer is valid. + auto mutable_matches_per_row_span = cudf::device_span{ + matches_per_row->begin(), static_cast(outer_num_rows)}; + matches_per_row_span = cudf::device_span{ + matches_per_row->begin(), static_cast(outer_num_rows)}; + if (has_nulls) { + compute_mixed_join_output_size_semi + <<>>( + *left_conditional_view, + *right_conditional_view, + *probe_view, + *build_view, + equality_probe, + kernel_join_type, + hash_table_view, + parser.device_expression_data, + swap_tables, + size.data(), + mutable_matches_per_row_span); + } else { + compute_mixed_join_output_size_semi + <<>>( + *left_conditional_view, + *right_conditional_view, + *probe_view, + *build_view, + equality_probe, + kernel_join_type, + hash_table_view, + parser.device_expression_data, + swap_tables, + size.data(), + mutable_matches_per_row_span); + } + join_size = size.value(stream); + } + + if (join_size == 0) { return std::make_unique>(0, stream, mr); } + + // Given the number of matches per row, we need to compute the offsets for insertion. + auto join_result_offsets = + rmm::device_uvector{static_cast(outer_num_rows), stream, mr}; + thrust::exclusive_scan(rmm::exec_policy{stream}, + matches_per_row_span.begin(), + matches_per_row_span.end(), + join_result_offsets.begin()); + + auto left_indices = std::make_unique>(join_size, stream, mr); + auto const& join_output_l = left_indices->data(); + + if (has_nulls) { + mixed_join_semi + <<>>( + *left_conditional_view, + *right_conditional_view, + *probe_view, + *build_view, + equality_probe, + kernel_join_type, + hash_table_view, + join_output_l, + parser.device_expression_data, + join_result_offsets.data(), + swap_tables); + } else { + mixed_join_semi + <<>>( + *left_conditional_view, + *right_conditional_view, + *probe_view, + *build_view, + equality_probe, + kernel_join_type, + hash_table_view, + join_output_l, + parser.device_expression_data, + join_result_offsets.data(), + swap_tables); + } + + return left_indices; +} + +std::pair>> +compute_mixed_join_output_size_semi(table_view const& left_equality, + table_view const& right_equality, + table_view const& left_conditional, + table_view const& right_conditional, + ast::expression const& binary_predicate, + null_equality compare_nulls, + join_kind join_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_EXPECTS( + (join_type != join_kind::INNER_JOIN) && (join_type != join_kind::LEFT_JOIN) && + (join_type != join_kind::FULL_JOIN), + "Inner, left, and full join size estimation should use compute_mixed_join_output_size."); + + CUDF_EXPECTS(left_conditional.num_rows() == left_equality.num_rows(), + "The left conditional and equality tables must have the same number of rows."); + CUDF_EXPECTS(right_conditional.num_rows() == right_equality.num_rows(), + "The right conditional and equality tables must have the same number of rows."); + + auto const right_num_rows{right_conditional.num_rows()}; + auto const left_num_rows{left_conditional.num_rows()}; + auto const swap_tables = (join_type == join_kind::INNER_JOIN) && (right_num_rows > left_num_rows); + + // The "outer" table is the larger of the two tables. The kernels are + // launched with one thread per row of the outer table, which also means that + // it is the probe table for the hash + auto const outer_num_rows{swap_tables ? right_num_rows : left_num_rows}; + + auto matches_per_row = std::make_unique>( + static_cast(outer_num_rows), stream, mr); + auto matches_per_row_span = cudf::device_span{ + matches_per_row->begin(), static_cast(outer_num_rows)}; + + // We can immediately filter out cases where one table is empty. In + // some cases, we return all the rows of the other table with a corresponding + // null index for the empty table; in others, we return an empty output. + if (right_num_rows == 0) { + switch (join_type) { + // Left, left anti, and full all return all the row indices from left + // with a corresponding NULL from the right. + case join_kind::LEFT_ANTI_JOIN: { + thrust::fill(matches_per_row->begin(), matches_per_row->end(), 1); + return {left_num_rows, std::move(matches_per_row)}; + } + // Inner and left semi joins return empty output because no matches can exist. + case join_kind::LEFT_SEMI_JOIN: return {0, std::move(matches_per_row)}; + default: CUDF_FAIL("Invalid join kind."); break; + } + } else if (left_num_rows == 0) { + switch (join_type) { + // Left, left anti, left semi, and inner joins all return empty sets. + case join_kind::LEFT_ANTI_JOIN: + case join_kind::LEFT_SEMI_JOIN: { + thrust::fill(matches_per_row->begin(), matches_per_row->end(), 0); + return {0, std::move(matches_per_row)}; + } + default: CUDF_FAIL("Invalid join kind."); break; + } + } + + // If evaluating the expression may produce null outputs we create a nullable + // output column and follow the null-supporting expression evaluation code + // path. + auto const has_nulls = + cudf::has_nulls(left_equality) || cudf::has_nulls(right_equality) || + binary_predicate.may_evaluate_null(left_conditional, right_conditional, stream); + + auto const parser = ast::detail::expression_parser{ + binary_predicate, left_conditional, right_conditional, has_nulls, stream, mr}; + CUDF_EXPECTS(parser.output_type().id() == type_id::BOOL8, + "The expression must produce a boolean output."); + + // TODO: The non-conditional join impls start with a dictionary matching, + // figure out what that is and what it's needed for (and if conditional joins + // need to do the same). + auto& probe = swap_tables ? right_equality : left_equality; + auto& build = swap_tables ? left_equality : right_equality; + auto probe_view = table_device_view::create(probe, stream); + auto build_view = table_device_view::create(build, stream); + auto left_conditional_view = table_device_view::create(left_conditional, stream); + auto right_conditional_view = table_device_view::create(right_conditional, stream); + auto& build_conditional_view = swap_tables ? left_conditional_view : right_conditional_view; + row_equality equality_probe{ + cudf::nullate::DYNAMIC{has_nulls}, *probe_view, *build_view, compare_nulls}; + + semi_map_type hash_table{compute_hash_table_size(build.num_rows()), + std::numeric_limits::max(), + cudf::detail::JoinNoneValue, + detail::hash_table_allocator_type{default_allocator{}, stream}, + stream.value()}; + + // Create hash table containing all keys found in right table + // TODO: To add support for nested columns we will need to flatten in many + // places. However, this probably isn't worth adding any time soon since we + // won't be able to support AST conditions for those types anyway. + auto const build_nulls = cudf::nullate::DYNAMIC{cudf::has_nulls(build)}; + row_hash const hash_build{build_nulls, *build_view}; + // Since we may see multiple rows that are identical in the equality tables + // but differ in the conditional tables, the equality comparator used for + // insertion must account for both sets of tables. An alternative solution + // would be to use a multimap, but that solution would store duplicates where + // equality and conditional rows are equal, so this approach is preferable. + // One way to make this solution even more efficient would be to only include + // the columns of the conditional table that are used by the expression, but + // that requires additional plumbing through the AST machinery and is out of + // scope for now. + row_equality equality_build_equality{build_nulls, *build_view, *build_view, compare_nulls}; + row_equality equality_build_conditional{ + build_nulls, *build_conditional_view, *build_conditional_view, compare_nulls}; + double_row_equality equality_build{equality_build_equality, equality_build_conditional}; + make_pair_function_semi pair_func_build{}; + + auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func_build); + + // skip rows that are null here. + if ((compare_nulls == null_equality::EQUAL) or (not nullable(build))) { + hash_table.insert(iter, iter + right_num_rows, hash_build, equality_build, stream.value()); + } else { + thrust::counting_iterator stencil(0); + auto const [row_bitmask, _] = cudf::detail::bitmask_and(build, stream); + row_is_valid pred{static_cast(row_bitmask.data())}; + + // insert valid rows + hash_table.insert_if( + iter, iter + right_num_rows, stencil, pred, hash_build, equality_build, stream.value()); + } + + auto hash_table_view = hash_table.get_device_view(); + + // For inner joins we support optimizing the join by launching one thread for + // whichever table is larger rather than always using the left table. + detail::grid_1d const config(outer_num_rows, DEFAULT_JOIN_BLOCK_SIZE); + auto const shmem_size_per_block = parser.shmem_per_thread * config.num_threads_per_block; + + // Allocate storage for the counter used to get the size of the join output + rmm::device_scalar size(0, stream, mr); + + // Determine number of output rows without actually building the output to simply + // find what the size of the output will be. + if (has_nulls) { + compute_mixed_join_output_size_semi + <<>>( + *left_conditional_view, + *right_conditional_view, + *probe_view, + *build_view, + equality_probe, + join_type, + hash_table_view, + parser.device_expression_data, + swap_tables, + size.data(), + matches_per_row_span); + } else { + compute_mixed_join_output_size_semi + <<>>( + *left_conditional_view, + *right_conditional_view, + *probe_view, + *build_view, + equality_probe, + join_type, + hash_table_view, + parser.device_expression_data, + swap_tables, + size.data(), + matches_per_row_span); + } + + return {size.value(stream), std::move(matches_per_row)}; +} + +} // namespace detail + +std::pair>> mixed_left_semi_join_size( + table_view const& left_equality, + table_view const& right_equality, + table_view const& left_conditional, + table_view const& right_conditional, + ast::expression const& binary_predicate, + null_equality compare_nulls, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::compute_mixed_join_output_size_semi(left_equality, + right_equality, + left_conditional, + right_conditional, + binary_predicate, + compare_nulls, + detail::join_kind::LEFT_SEMI_JOIN, + rmm::cuda_stream_default, + mr); +} + +std::unique_ptr> mixed_left_semi_join( + table_view const& left_equality, + table_view const& right_equality, + table_view const& left_conditional, + table_view const& right_conditional, + ast::expression const& binary_predicate, + null_equality compare_nulls, + std::optional>> output_size_data, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::mixed_join_semi(left_equality, + right_equality, + left_conditional, + right_conditional, + binary_predicate, + compare_nulls, + detail::join_kind::LEFT_SEMI_JOIN, + output_size_data, + rmm::cuda_stream_default, + mr); +} + +std::pair>> mixed_left_anti_join_size( + table_view const& left_equality, + table_view const& right_equality, + table_view const& left_conditional, + table_view const& right_conditional, + ast::expression const& binary_predicate, + null_equality compare_nulls, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::compute_mixed_join_output_size_semi(left_equality, + right_equality, + left_conditional, + right_conditional, + binary_predicate, + compare_nulls, + detail::join_kind::LEFT_ANTI_JOIN, + rmm::cuda_stream_default, + mr); +} + +std::unique_ptr> mixed_left_anti_join( + table_view const& left_equality, + table_view const& right_equality, + table_view const& left_conditional, + table_view const& right_conditional, + ast::expression const& binary_predicate, + null_equality compare_nulls, + std::optional>> output_size_data, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::mixed_join_semi(left_equality, + right_equality, + left_conditional, + right_conditional, + binary_predicate, + compare_nulls, + detail::join_kind::LEFT_ANTI_JOIN, + output_size_data, + rmm::cuda_stream_default, + mr); +} + +} // namespace cudf diff --git a/cpp/src/join/mixed_join_size_kernels.cu b/cpp/src/join/mixed_join_size_kernels.cu new file mode 100644 index 00000000000..1a08b8792c2 --- /dev/null +++ b/cpp/src/join/mixed_join_size_kernels.cu @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include + +namespace cudf { +namespace detail { +namespace cg = cooperative_groups; + +template +__global__ void compute_mixed_join_output_size( + table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::mixed_multimap_type::device_view hash_table_view, + ast::detail::expression_device_view device_expression_data, + bool const swap_tables, + std::size_t* output_size, + cudf::device_span matches_per_row) +{ + // The (required) extern storage of the shared memory array leads to + // conflicting declarations between different templates. The easiest + // workaround is to declare an arbitrary (here char) array type then cast it + // after the fact to the appropriate type. + extern __shared__ char raw_intermediate_storage[]; + cudf::ast::detail::IntermediateDataType* intermediate_storage = + reinterpret_cast*>(raw_intermediate_storage); + auto thread_intermediate_storage = + intermediate_storage + (threadIdx.x * device_expression_data.num_intermediates); + + std::size_t thread_counter{0}; + cudf::size_type const start_idx = threadIdx.x + blockIdx.x * block_size; + cudf::size_type const stride = block_size * gridDim.x; + cudf::size_type const left_num_rows = left_table.num_rows(); + cudf::size_type const right_num_rows = right_table.num_rows(); + auto const outer_num_rows = (swap_tables ? right_num_rows : left_num_rows); + + auto evaluator = cudf::ast::detail::expression_evaluator( + left_table, right_table, device_expression_data); + + row_hash hash_probe{nullate::DYNAMIC{has_nulls}, probe}; + auto const empty_key_sentinel = hash_table_view.get_empty_key_sentinel(); + make_pair_function pair_func{hash_probe, empty_key_sentinel}; + + // Figure out the number of elements for this key. + cg::thread_block_tile<1> this_thread = cg::this_thread(); + // TODO: Address asymmetry in operator. + auto count_equality = pair_expression_equality{ + evaluator, thread_intermediate_storage, swap_tables, equality_probe}; + + for (cudf::size_type outer_row_index = start_idx; outer_row_index < outer_num_rows; + outer_row_index += stride) { + auto query_pair = pair_func(outer_row_index); + if (join_type == join_kind::LEFT_JOIN || join_type == join_kind::FULL_JOIN) { + matches_per_row[outer_row_index] = + hash_table_view.pair_count_outer(this_thread, query_pair, count_equality); + } else { + matches_per_row[outer_row_index] = + hash_table_view.pair_count(this_thread, query_pair, count_equality); + } + thread_counter += matches_per_row[outer_row_index]; + } + + using BlockReduce = cub::BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + std::size_t block_counter = BlockReduce(temp_storage).Sum(thread_counter); + + // Add block counter to global counter + if (threadIdx.x == 0) atomicAdd(output_size, block_counter); +} + +template __global__ void compute_mixed_join_output_size( + table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::mixed_multimap_type::device_view hash_table_view, + ast::detail::expression_device_view device_expression_data, + bool const swap_tables, + std::size_t* output_size, + cudf::device_span matches_per_row); + +template __global__ void compute_mixed_join_output_size( + table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::mixed_multimap_type::device_view hash_table_view, + ast::detail::expression_device_view device_expression_data, + bool const swap_tables, + std::size_t* output_size, + cudf::device_span matches_per_row); + +} // namespace detail + +} // namespace cudf diff --git a/cpp/src/join/mixed_join_size_kernels_semi.cu b/cpp/src/join/mixed_join_size_kernels_semi.cu new file mode 100644 index 00000000000..2c077a698f8 --- /dev/null +++ b/cpp/src/join/mixed_join_size_kernels_semi.cu @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +namespace cudf { +namespace detail { + +namespace cg = cooperative_groups; + +template +__global__ void compute_mixed_join_output_size_semi( + table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::semi_map_type::device_view hash_table_view, + ast::detail::expression_device_view device_expression_data, + bool const swap_tables, + std::size_t* output_size, + cudf::device_span matches_per_row) +{ + // The (required) extern storage of the shared memory array leads to + // conflicting declarations between different templates. The easiest + // workaround is to declare an arbitrary (here char) array type then cast it + // after the fact to the appropriate type. + extern __shared__ char raw_intermediate_storage[]; + cudf::ast::detail::IntermediateDataType* intermediate_storage = + reinterpret_cast*>(raw_intermediate_storage); + auto thread_intermediate_storage = + intermediate_storage + (threadIdx.x * device_expression_data.num_intermediates); + + std::size_t thread_counter{0}; + cudf::size_type const start_idx = threadIdx.x + blockIdx.x * block_size; + cudf::size_type const stride = block_size * gridDim.x; + cudf::size_type const left_num_rows = left_table.num_rows(); + cudf::size_type const right_num_rows = right_table.num_rows(); + auto const outer_num_rows = (swap_tables ? right_num_rows : left_num_rows); + + auto evaluator = cudf::ast::detail::expression_evaluator( + left_table, right_table, device_expression_data); + row_hash hash_probe{nullate::DYNAMIC{has_nulls}, probe}; + // TODO: Address asymmetry in operator. + auto equality = single_expression_equality{ + evaluator, thread_intermediate_storage, swap_tables, equality_probe}; + + for (cudf::size_type outer_row_index = start_idx; outer_row_index < outer_num_rows; + outer_row_index += stride) { + matches_per_row[outer_row_index] = + ((join_type == join_kind::LEFT_ANTI_JOIN) != + (hash_table_view.contains(outer_row_index, hash_probe, equality))); + thread_counter += matches_per_row[outer_row_index]; + } + + using BlockReduce = cub::BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + std::size_t block_counter = BlockReduce(temp_storage).Sum(thread_counter); + + // Add block counter to global counter + if (threadIdx.x == 0) atomicAdd(output_size, block_counter); +} + +template __global__ void compute_mixed_join_output_size_semi( + table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::semi_map_type::device_view hash_table_view, + ast::detail::expression_device_view device_expression_data, + bool const swap_tables, + std::size_t* output_size, + cudf::device_span matches_per_row); + +template __global__ void compute_mixed_join_output_size_semi( + table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::semi_map_type::device_view hash_table_view, + ast::detail::expression_device_view device_expression_data, + bool const swap_tables, + std::size_t* output_size, + cudf::device_span matches_per_row); + +} // namespace detail + +} // namespace cudf diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu index 5eb8ca2452e..8563a2a3bd3 100644 --- a/cpp/src/join/semi_join.cu +++ b/cpp/src/join/semi_join.cu @@ -37,8 +37,6 @@ #include #include -#include - namespace cudf { namespace detail { @@ -91,13 +89,11 @@ std::unique_ptr> left_semi_anti_join( auto left_flattened_keys = left_flattened_tables.flattened_columns(); // Create hash table. - auto hash_table = cuco:: - static_map{ - compute_hash_table_size(right_num_rows), - std::numeric_limits::max(), - cudf::detail::JoinNoneValue, - hash_table_allocator_type{default_allocator{}, stream}, - stream.value()}; + semi_map_type hash_table{compute_hash_table_size(right_num_rows), + std::numeric_limits::max(), + cudf::detail::JoinNoneValue, + hash_table_allocator_type{default_allocator{}, stream}, + stream.value()}; // Create hash table containing all keys found in right table auto right_rows_d = table_device_view::create(right_flattened_keys, stream); diff --git a/cpp/tests/join/mixed_join_tests.cu b/cpp/tests/join/mixed_join_tests.cu index d6a348698b5..f9ed22150b7 100644 --- a/cpp/tests/join/mixed_join_tests.cu +++ b/cpp/tests/join/mixed_join_tests.cu @@ -641,3 +641,302 @@ TYPED_TEST(MixedFullJoinTest, Basic2) {JoinNoneValue, 1}, {JoinNoneValue, 2}}); } + +template +struct MixedJoinSingleReturnTest : public MixedJoinTest { + /* + * Perform a join of tables constructed from two input data sets according to + * verify that the outputs match the expected outputs (up to order). + */ + virtual void _test(cudf::table_view left_equality, + cudf::table_view right_equality, + cudf::table_view left_conditional, + cudf::table_view right_conditional, + cudf::ast::operation predicate, + std::vector expected_outputs, + cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) + { + auto [result_size, actual_counts] = this->join_size( + left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls); + EXPECT_TRUE(result_size == expected_outputs.size()); + + auto result = this->join( + left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls); + std::vector resulting_indices; + for (size_t i = 0; i < result->size(); ++i) { + // Note: Not trying to be terribly efficient here since these tests are + // small, otherwise a batch copy to host before constructing the tuples + // would be important. + resulting_indices.push_back(result->element(i, rmm::cuda_stream_default)); + } + std::sort(resulting_indices.begin(), resulting_indices.end()); + std::sort(expected_outputs.begin(), expected_outputs.end()); + EXPECT_TRUE( + std::equal(resulting_indices.begin(), resulting_indices.end(), expected_outputs.begin())); + } + + /* + * Perform a join of tables constructed from two input data sets according to + * the provided predicate and verify that the outputs match the expected + * outputs (up to order). + */ + void test(ColumnVector left_data, + ColumnVector right_data, + std::vector equality_columns, + std::vector conditional_columns, + cudf::ast::operation predicate, + std::vector expected_outputs) + { + // Note that we need to maintain the column wrappers otherwise the + // resulting column views will be referencing potentially invalid memory. + auto [left_wrappers, + right_wrappers, + left_columns, + right_columns, + left_equality, + right_equality, + left_conditional, + right_conditional] = + this->parse_input(left_data, right_data, equality_columns, conditional_columns); + this->_test(left_equality, + right_equality, + left_conditional, + right_conditional, + predicate, + expected_outputs); + } + + /* + * Perform a join of tables constructed from two input data sets according to + * the provided predicate and verify that the outputs match the expected + * outputs (up to order). + */ + void test_nulls(NullableColumnVector left_data, + NullableColumnVector right_data, + std::vector equality_columns, + std::vector conditional_columns, + cudf::ast::operation predicate, + std::vector expected_outputs, + cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) + { + // Note that we need to maintain the column wrappers otherwise the + // resulting column views will be referencing potentially invalid memory. + auto [left_wrappers, + right_wrappers, + left_columns, + right_columns, + left_equality, + right_equality, + left_conditional, + right_conditional] = + this->parse_input(left_data, right_data, equality_columns, conditional_columns); + this->_test(left_equality, + right_equality, + left_conditional, + right_conditional, + predicate, + expected_outputs, + compare_nulls); + } + + /** + * This method must be implemented by subclasses for specific types of joins. + * It should be a simply forwarding of arguments to the appropriate cudf + * mixed join API. + */ + virtual SingleJoinReturn join(cudf::table_view left_equality, + cudf::table_view right_equality, + cudf::table_view left_conditional, + cudf::table_view right_conditional, + cudf::ast::operation predicate, + cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) = 0; + + /** + * This method must be implemented by subclasses for specific types of joins. + * It should be a simply forwarding of arguments to the appropriate cudf + * mixed join size computation API. + */ + virtual std::pair>> join_size( + cudf::table_view left_equality, + cudf::table_view right_equality, + cudf::table_view left_conditional, + cudf::table_view right_conditional, + cudf::ast::operation predicate, + cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) = 0; +}; + +/** + * Tests of mixed left semi joins. + */ +template +struct MixedLeftSemiJoinTest : public MixedJoinSingleReturnTest { + SingleJoinReturn join(cudf::table_view left_equality, + cudf::table_view right_equality, + cudf::table_view left_conditional, + cudf::table_view right_conditional, + cudf::ast::operation predicate, + cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) override + { + return cudf::mixed_left_semi_join( + left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls); + } + + std::pair>> join_size( + cudf::table_view left_equality, + cudf::table_view right_equality, + cudf::table_view left_conditional, + cudf::table_view right_conditional, + cudf::ast::operation predicate, + cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) override + { + return cudf::mixed_left_semi_join_size( + left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls); + } +}; + +TYPED_TEST_SUITE(MixedLeftSemiJoinTest, cudf::test::IntegralTypesNotBool); + +TYPED_TEST(MixedLeftSemiJoinTest, BasicEquality) +{ + this->test({{0, 1, 2}, {3, 4, 5}, {10, 20, 30}}, + {{0, 1, 3}, {5, 4, 5}, {30, 40, 50}}, + {0}, + {1, 2}, + left_zero_eq_right_zero, + {1}); +} + +TYPED_TEST(MixedLeftSemiJoinTest, BasicEqualityDuplicates) +{ + this->test({{0, 1, 2, 1}, {3, 4, 5, 6}, {10, 20, 30, 40}}, + {{0, 1, 3, 1}, {5, 4, 5, 6}, {30, 40, 50, 40}}, + {0}, + {1, 2}, + left_zero_eq_right_zero, + {1, 3}); +} + +TYPED_TEST(MixedLeftSemiJoinTest, BasicNullEqualityEqual) +{ + this->test_nulls({{{0, 1, 2}, {1, 1, 0}}, {{3, 4, 5}, {1, 1, 1}}, {{10, 20, 30}, {1, 1, 1}}}, + {{{0, 1, 3}, {1, 1, 0}}, {{5, 4, 5}, {1, 1, 1}}, {{30, 40, 30}, {1, 1, 1}}}, + {0}, + {1, 2}, + left_zero_eq_right_zero, + {1, 2}, + cudf::null_equality::EQUAL); +}; + +TYPED_TEST(MixedLeftSemiJoinTest, BasicNullEqualityUnequal) +{ + this->test_nulls({{{0, 1, 2}, {1, 1, 0}}, {{3, 4, 5}, {1, 1, 1}}, {{10, 20, 30}, {1, 1, 1}}}, + {{{0, 1, 3}, {1, 1, 0}}, {{5, 4, 5}, {1, 1, 1}}, {{30, 40, 30}, {1, 1, 1}}}, + {0}, + {1, 2}, + left_zero_eq_right_zero, + {1}, + cudf::null_equality::UNEQUAL); +}; + +TYPED_TEST(MixedLeftSemiJoinTest, AsymmetricEquality) +{ + this->test({{0, 2, 1}, {3, 5, 4}, {10, 30, 20}}, + {{0, 1, 3}, {5, 4, 5}, {30, 40, 50}}, + {0}, + {1, 2}, + left_zero_eq_right_zero, + {2}); +} + +TYPED_TEST(MixedLeftSemiJoinTest, AsymmetricLeftLargerEquality) +{ + this->test({{0, 2, 1, 4}, {3, 5, 4, 10}, {10, 30, 20, 100}}, + {{0, 1, 3}, {5, 4, 5}, {30, 40, 50}}, + {0}, + {1, 2}, + left_zero_eq_right_zero, + {2}); +} + +/** + * Tests of mixed left semi joins. + */ +template +struct MixedLeftAntiJoinTest : public MixedJoinSingleReturnTest { + SingleJoinReturn join(cudf::table_view left_equality, + cudf::table_view right_equality, + cudf::table_view left_conditional, + cudf::table_view right_conditional, + cudf::ast::operation predicate, + cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) override + { + return cudf::mixed_left_anti_join( + left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls); + } + + std::pair>> join_size( + cudf::table_view left_equality, + cudf::table_view right_equality, + cudf::table_view left_conditional, + cudf::table_view right_conditional, + cudf::ast::operation predicate, + cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) override + { + return cudf::mixed_left_anti_join_size( + left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls); + } +}; + +TYPED_TEST_SUITE(MixedLeftAntiJoinTest, cudf::test::IntegralTypesNotBool); + +TYPED_TEST(MixedLeftAntiJoinTest, BasicEquality) +{ + this->test({{0, 1, 2}, {3, 4, 5}, {10, 20, 30}}, + {{0, 1, 3}, {5, 4, 5}, {30, 40, 50}}, + {0}, + {1, 2}, + left_zero_eq_right_zero, + {0, 2}); +} + +TYPED_TEST(MixedLeftAntiJoinTest, BasicNullEqualityEqual) +{ + this->test_nulls({{{0, 1, 2}, {1, 1, 0}}, {{3, 4, 5}, {1, 1, 1}}, {{10, 20, 30}, {1, 1, 1}}}, + {{{0, 1, 3}, {1, 1, 0}}, {{5, 4, 5}, {1, 1, 1}}, {{30, 40, 30}, {1, 1, 1}}}, + {0}, + {1, 2}, + left_zero_eq_right_zero, + {0}, + cudf::null_equality::EQUAL); +}; + +TYPED_TEST(MixedLeftAntiJoinTest, BasicNullEqualityUnequal) +{ + this->test_nulls({{{0, 1, 2}, {1, 1, 0}}, {{3, 4, 5}, {1, 1, 1}}, {{10, 20, 30}, {1, 1, 1}}}, + {{{0, 1, 3}, {1, 1, 0}}, {{5, 4, 5}, {1, 1, 1}}, {{30, 40, 30}, {1, 1, 1}}}, + {0}, + {1, 2}, + left_zero_eq_right_zero, + {0, 2}, + cudf::null_equality::UNEQUAL); +}; + +TYPED_TEST(MixedLeftAntiJoinTest, AsymmetricEquality) +{ + this->test({{0, 2, 1}, {3, 5, 4}, {10, 30, 20}}, + {{0, 1, 3}, {5, 4, 5}, {30, 40, 50}}, + {0}, + {1, 2}, + left_zero_eq_right_zero, + {0, 1}); +} + +TYPED_TEST(MixedLeftAntiJoinTest, AsymmetricLeftLargerEquality) +{ + this->test({{0, 2, 1, 4}, {3, 5, 4, 10}, {10, 30, 20, 100}}, + {{0, 1, 3}, {5, 4, 5}, {30, 40, 50}}, + {0}, + {1, 2}, + left_zero_eq_right_zero, + {0, 1, 3}); +} diff --git a/docker/package_versions.sh b/docker/package_versions.sh deleted file mode 100755 index c558f66d511..00000000000 --- a/docker/package_versions.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env bash -# Usage: -# "./package_versions.sh /cudf/conda/environments/cudf_dev.yml" - Updates package versions in file based on Docker build-args - -FILENAME=$1 - -set_version() { - sed -i "s/\- $1\([<>=][^a-zA-Z]*\)\?$/\- $1=$2/" $FILENAME -} - -replace_text() { - sed -i "s/$1/$2/" $FILENAME -} - -add_package() { - sed -i "s/\- $1\([<>=][^a-zA-Z]*\)\?$/a \- $2=$3/" $FILENAME -} - -if [ "$PYTHON_VERSION" ]; then - PACKAGE_NAME="python" - set_version "$PACKAGE_NAME" "$PYTHON_VERSION" -fi - -if [ "$NUMBA_VERSION" ]; then - PACKAGE_NAME="numba" - set_version "$PACKAGE_NAME" "$NUMBA_VERSION" -fi - -if [ "$PANDAS_VERSION" ]; then - PACKAGE_NAME="pandas" - set_version "$PACKAGE_NAME" "$PANDAS_VERSION" -fi - -if [ "$PYARROW_VERSION" ]; then - PACKAGE_NAME="pyarrow" - set_version "$PACKAGE_NAME" "$PYARROW_VERSION" -fi - -if [ "$CYTHON_VERSION" ]; then - PACKAGE_NAME="cython" - set_version "$PACKAGE_NAME" "$CYTHON_VERSION" -fi - -if [ "$CMAKE_VERSION" ]; then - PACKAGE_NAME="cmake" - set_version "$PACKAGE_NAME" "$CMAKE_VERSION" -fi - -if [ "$NUMPY_VERSION" ]; then - ABOVE_PACKAGE="pandas" - PACKAGE_NAME="numpy" - add_package "$ABOVE_PACKAGE" "$PACKAGE_NAME" "$NUMPY_VERSION" -fi diff --git a/docker_build/Dockerfile b/docker_build/Dockerfile deleted file mode 100644 index 696a6969778..00000000000 --- a/docker_build/Dockerfile +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. - -ARG CUDA_VERSION=11.2.2 -FROM nvidia/cuda:${CUDA_VERSION}-devel -ENV CUDA_SHORT_VERSION=11.2 - -SHELL ["/bin/bash", "-c"] -ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/lib:/repos/dist/lib - -ENV DEBIAN_FRONTEND=noninteractive - -ENV CUDA_HOME=/usr/local/cuda -ENV CUDA_PATH=$CUDA_HOME -ENV PATH=${CUDA_HOME}/lib64/:${PATH}:${CUDA_HOME}/bin - -# Build env variables for arrow -ENV CMAKE_BUILD_TYPE=release -ENV PYARROW_WITH_PARQUET=1 -ENV PYARROW_WITH_CUDA=1 -ENV PYARROW_WITH_ORC=1 -ENV PYARROW_WITH_DATASET=1 - -ENV ARROW_HOME=/repos/dist - -# Build env variables for rmm -ENV INSTALL_PREFIX=/usr - - -RUN apt update -y --fix-missing && \ - apt upgrade -y && \ - apt install -y --no-install-recommends software-properties-common && \ - add-apt-repository ppa:deadsnakes/ppa && \ - apt update -y --fix-missing - -RUN apt install -y --no-install-recommends \ - git \ - python3.8-dev \ - build-essential \ - autoconf \ - bison \ - flex \ - libjemalloc-dev \ - wget \ - libssl-dev \ - protobuf-compiler && \ - apt-get autoremove -y && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* && \ - update-alternatives --install /usr/bin/python python /usr/bin/python3.8 1 && \ - wget https://bootstrap.pypa.io/get-pip.py && \ - python get-pip.py - -# Install cmake -RUN version=3.18 && build=5 && mkdir ~/temp && cd ~/temp && wget https://cmake.org/files/v$version/cmake-$version.$build.tar.gz && \ - tar -xzvf cmake-$version.$build.tar.gz && cd cmake-$version.$build/ && ./bootstrap && make -j$(nproc) && make install - -# Install arrow from source -RUN git clone https://github.com/apache/arrow.git /repos/arrow && mkdir /repos/dist/ && cd /repos/arrow && git checkout apache-arrow-1.0.1 && git submodule init && \ - git submodule update && export PARQUET_TEST_DATA="${PWD}/cpp/submodules/parquet-testing/data" && export ARROW_TEST_DATA="${PWD}/testing/data" && \ - cd /repos/arrow/cpp && mkdir release && cd /repos/arrow/cpp/release && pip install -r /repos/arrow/python/requirements-build.txt && \ - cmake -DCMAKE_INSTALL_PREFIX=$ARROW_HOME -DCMAKE_INSTALL_LIBDIR=lib -DARROW_FLIGHT=ON -DARROW_GANDIVA=OFF -DARROW_ORC=ON -DARROW_WITH_BZ2=ON -DARROW_WITH_ZLIB=ON -DARROW_WITH_ZSTD=ON -DARROW_WITH_LZ4=ON -DARROW_WITH_SNAPPY=ON -DARROW_WITH_BROTLI=ON -DARROW_PARQUET=ON -DARROW_PYTHON=ON -DARROW_PLASMA=ON -DARROW_BUILD_TESTS=ON -DARROW_CUDA=ON -DARROW_DATASET=ON .. && \ - make -j$(nproc) && make install && cd /repos/arrow/python/ && python setup.py build_ext --build-type=release bdist_wheel && pip install /repos/arrow/python/dist/*.whl - - -# Install rmm from source -RUN cd /repos/ && git clone https://github.com/rapidsai/rmm.git && cd /repos/rmm/ && ./build.sh librmm && pip install /repos/rmm/python/. - -ADD . /repos/cudf/ - -# Build env for CUDF build -ENV CUDF_HOME=/repos/cudf/ -ENV CUDF_ROOT=/repos/cudf/cpp/build/ - -# Install cudf from source -RUN cd /repos/cudf/ && git submodule update --init --recursive && ./build.sh libcudf && \ - pip install /repos/cudf/python/cudf/. - diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java index a021ded4588..bb0321d0a16 100644 --- a/java/src/main/java/ai/rapids/cudf/Table.java +++ b/java/src/main/java/ai/rapids/cudf/Table.java @@ -616,10 +616,6 @@ private static native long[] conditionalInnerJoinGatherMapsWithCount(long leftTa private static native long[] conditionalFullJoinGatherMaps(long leftTable, long rightTable, long condition) throws CudfException; - private static native long[] conditionalFullJoinGatherMapsWithCount(long leftTable, long rightTable, - long condition, - long rowCount) throws CudfException; - private static native long conditionalLeftSemiJoinRowCount(long leftTable, long rightTable, long condition) throws CudfException; @@ -670,6 +666,32 @@ private static native long[] mixedFullJoinGatherMaps(long leftKeysTable, long ri long leftConditionTable, long rightConditionTable, long condition, boolean compareNullsEqual); + private static native long[] mixedLeftSemiJoinSize(long leftKeysTable, long rightKeysTable, + long leftConditionTable, long rightConditionTable, + long condition, boolean compareNullsEqual); + + private static native long[] mixedLeftSemiJoinGatherMap(long leftKeysTable, long rightKeysTable, + long leftConditionTable, long rightConditionTable, + long condition, boolean compareNullsEqual); + + private static native long[] mixedLeftSemiJoinGatherMapWithSize(long leftKeysTable, long rightKeysTable, + long leftConditionTable, long rightConditionTable, + long condition, boolean compareNullsEqual, + long outputRowCount, long matchesColumnView); + + private static native long[] mixedLeftAntiJoinSize(long leftKeysTable, long rightKeysTable, + long leftConditionTable, long rightConditionTable, + long condition, boolean compareNullsEqual); + + private static native long[] mixedLeftAntiJoinGatherMap(long leftKeysTable, long rightKeysTable, + long leftConditionTable, long rightConditionTable, + long condition, boolean compareNullsEqual); + + private static native long[] mixedLeftAntiJoinGatherMapWithSize(long leftKeysTable, long rightKeysTable, + long leftConditionTable, long rightConditionTable, + long condition, boolean compareNullsEqual, + long outputRowCount, long matchesColumnView); + private static native long[] crossJoin(long leftTable, long rightTable) throws CudfException; private static native long[] concatenate(long[] cudfTablePointers) throws CudfException; @@ -2853,7 +2875,7 @@ public static GatherMap[] mixedFullJoinGatherMaps(Table leftKeys, Table rightKey return buildJoinGatherMaps(gatherMapData); } - private GatherMap buildSemiJoinGatherMap(long[] gatherMapData) { + private static GatherMap buildSemiJoinGatherMap(long[] gatherMapData) { long bufferSize = gatherMapData[0]; long leftAddr = gatherMapData[1]; long leftHandle = gatherMapData[2]; @@ -2939,6 +2961,94 @@ public GatherMap conditionalLeftSemiJoinGatherMap(Table rightTable, return buildSemiJoinGatherMap(gatherMapData); } + /** + * Computes output size information for a left semi join between two tables using a mix of + * equality and inequality conditions. The entire join condition is assumed to be a logical AND + * of the equality condition and inequality condition. + * NOTE: It is the responsibility of the caller to close the resulting size information object + * or native resources can be leaked! + * @param leftKeys the left table's key columns for the equality condition + * @param rightKeys the right table's key columns for the equality condition + * @param leftConditional the left table's columns needed to evaluate the inequality condition + * @param rightConditional the right table's columns needed to evaluate the inequality condition + * @param condition the inequality condition of the join + * @param nullEquality whether nulls should compare as equal + * @return size information for the join + */ + public static MixedJoinSize mixedLeftSemiJoinSize(Table leftKeys, Table rightKeys, + Table leftConditional, Table rightConditional, + CompiledExpression condition, + NullEquality nullEquality) { + long[] mixedSizeInfo = mixedLeftSemiJoinSize( + leftKeys.getNativeView(), rightKeys.getNativeView(), + leftConditional.getNativeView(), rightConditional.getNativeView(), + condition.getNativeHandle(), nullEquality == NullEquality.EQUAL); + assert mixedSizeInfo.length == 2; + long outputRowCount = mixedSizeInfo[0]; + long matchesColumnHandle = mixedSizeInfo[1]; + return new MixedJoinSize(outputRowCount, new ColumnVector(matchesColumnHandle)); + } + + /** + * Computes the gather map that can be used to manifest the result of a left semi join between + * two tables using a mix of equality and inequality conditions. The entire join condition is + * assumed to be a logical AND of the equality condition and inequality condition. + * A {@link GatherMap} instance will be returned that can be used to gather + * the left table to produce the result of the left semi join. + * It is the responsibility of the caller to close the resulting gather map instances. + * @param leftKeys the left table's key columns for the equality condition + * @param rightKeys the right table's key columns for the equality condition + * @param leftConditional the left table's columns needed to evaluate the inequality condition + * @param rightConditional the right table's columns needed to evaluate the inequality condition + * @param condition the inequality condition of the join + * @param nullEquality whether nulls should compare as equal + * @return left and right table gather maps + */ + public static GatherMap mixedLeftSemiJoinGatherMap(Table leftKeys, Table rightKeys, + Table leftConditional, Table rightConditional, + CompiledExpression condition, + NullEquality nullEquality) { + long[] gatherMapData = mixedLeftSemiJoinGatherMap( + leftKeys.getNativeView(), rightKeys.getNativeView(), + leftConditional.getNativeView(), rightConditional.getNativeView(), + condition.getNativeHandle(), + nullEquality == NullEquality.EQUAL); + return buildSemiJoinGatherMap(gatherMapData); + } + + /** + * Computes the gather map that can be used to manifest the result of a left semi join between + * two tables using a mix of equality and inequality conditions. The entire join condition is + * assumed to be a logical AND of the equality condition and inequality condition. + * A {@link GatherMap} instance will be returned that can be used to gather + * the left table to produce the result of the left semi join. + * It is the responsibility of the caller to close the resulting gather map instances. + * This interface allows passing the size result from + * {@link #mixedLeftSemiJoinSize(Table, Table, Table, Table, CompiledExpression, NullEquality)} + * when the output size was computed previously. + * @param leftKeys the left table's key columns for the equality condition + * @param rightKeys the right table's key columns for the equality condition + * @param leftConditional the left table's columns needed to evaluate the inequality condition + * @param rightConditional the right table's columns needed to evaluate the inequality condition + * @param condition the inequality condition of the join + * @param nullEquality whether nulls should compare as equal + * @param joinSize mixed join size result + * @return left and right table gather maps + */ + public static GatherMap mixedLeftSemiJoinGatherMap(Table leftKeys, Table rightKeys, + Table leftConditional, Table rightConditional, + CompiledExpression condition, + NullEquality nullEquality, + MixedJoinSize joinSize) { + long[] gatherMapData = mixedLeftSemiJoinGatherMapWithSize( + leftKeys.getNativeView(), rightKeys.getNativeView(), + leftConditional.getNativeView(), rightConditional.getNativeView(), + condition.getNativeHandle(), + nullEquality == NullEquality.EQUAL, + joinSize.getOutputRowCount(), joinSize.getMatches().getNativeView()); + return buildSemiJoinGatherMap(gatherMapData); + } + /** * Computes the gather map that can be used to manifest the result of a left anti-join between * two tables. It is assumed this table instance holds the key columns from the left table, and @@ -3018,6 +3128,94 @@ public GatherMap conditionalLeftAntiJoinGatherMap(Table rightTable, return buildSemiJoinGatherMap(gatherMapData); } + /** + * Computes output size information for a left anti join between two tables using a mix of + * equality and inequality conditions. The entire join condition is assumed to be a logical AND + * of the equality condition and inequality condition. + * NOTE: It is the responsibility of the caller to close the resulting size information object + * or native resources can be leaked! + * @param leftKeys the left table's key columns for the equality condition + * @param rightKeys the right table's key columns for the equality condition + * @param leftConditional the left table's columns needed to evaluate the inequality condition + * @param rightConditional the right table's columns needed to evaluate the inequality condition + * @param condition the inequality condition of the join + * @param nullEquality whether nulls should compare as equal + * @return size information for the join + */ + public static MixedJoinSize mixedLeftAntiJoinSize(Table leftKeys, Table rightKeys, + Table leftConditional, Table rightConditional, + CompiledExpression condition, + NullEquality nullEquality) { + long[] mixedSizeInfo = mixedLeftAntiJoinSize( + leftKeys.getNativeView(), rightKeys.getNativeView(), + leftConditional.getNativeView(), rightConditional.getNativeView(), + condition.getNativeHandle(), nullEquality == NullEquality.EQUAL); + assert mixedSizeInfo.length == 2; + long outputRowCount = mixedSizeInfo[0]; + long matchesColumnHandle = mixedSizeInfo[1]; + return new MixedJoinSize(outputRowCount, new ColumnVector(matchesColumnHandle)); + } + + /** + * Computes the gather map that can be used to manifest the result of a left anti join between + * two tables using a mix of equality and inequality conditions. The entire join condition is + * assumed to be a logical AND of the equality condition and inequality condition. + * A {@link GatherMap} instance will be returned that can be used to gather + * the left table to produce the result of the left anti join. + * It is the responsibility of the caller to close the resulting gather map instances. + * @param leftKeys the left table's key columns for the equality condition + * @param rightKeys the right table's key columns for the equality condition + * @param leftConditional the left table's columns needed to evaluate the inequality condition + * @param rightConditional the right table's columns needed to evaluate the inequality condition + * @param condition the inequality condition of the join + * @param nullEquality whether nulls should compare as equal + * @return left and right table gather maps + */ + public static GatherMap mixedLeftAntiJoinGatherMap(Table leftKeys, Table rightKeys, + Table leftConditional, Table rightConditional, + CompiledExpression condition, + NullEquality nullEquality) { + long[] gatherMapData = mixedLeftAntiJoinGatherMap( + leftKeys.getNativeView(), rightKeys.getNativeView(), + leftConditional.getNativeView(), rightConditional.getNativeView(), + condition.getNativeHandle(), + nullEquality == NullEquality.EQUAL); + return buildSemiJoinGatherMap(gatherMapData); + } + + /** + * Computes the gather map that can be used to manifest the result of a left anti join between + * two tables using a mix of equality and inequality conditions. The entire join condition is + * assumed to be a logical AND of the equality condition and inequality condition. + * A {@link GatherMap} instance will be returned that can be used to gather + * the left table to produce the result of the left anti join. + * It is the responsibility of the caller to close the resulting gather map instances. + * This interface allows passing the size result from + * {@link #mixedLeftAntiJoinSize(Table, Table, Table, Table, CompiledExpression, NullEquality)} + * when the output size was computed previously. + * @param leftKeys the left table's key columns for the equality condition + * @param rightKeys the right table's key columns for the equality condition + * @param leftConditional the left table's columns needed to evaluate the inequality condition + * @param rightConditional the right table's columns needed to evaluate the inequality condition + * @param condition the inequality condition of the join + * @param nullEquality whether nulls should compare as equal + * @param joinSize mixed join size result + * @return left and right table gather maps + */ + public static GatherMap mixedLeftAntiJoinGatherMap(Table leftKeys, Table rightKeys, + Table leftConditional, Table rightConditional, + CompiledExpression condition, + NullEquality nullEquality, + MixedJoinSize joinSize) { + long[] gatherMapData = mixedLeftAntiJoinGatherMapWithSize( + leftKeys.getNativeView(), rightKeys.getNativeView(), + leftConditional.getNativeView(), rightConditional.getNativeView(), + condition.getNativeHandle(), + nullEquality == NullEquality.EQUAL, + joinSize.getOutputRowCount(), joinSize.getMatches().getNativeView()); + return buildSemiJoinGatherMap(gatherMapData); + } + /** * For details about how this method functions refer to * {@link #convertToRowsFixedWidthOptimized()}. diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index dd84fcd9f12..10f295e27bf 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -948,6 +948,31 @@ jlongArray mixed_join_gather_maps(JNIEnv *env, jlong j_left_keys, jlong j_right_ CATCH_STD(env, NULL); } +template +jlongArray mixed_join_gather_single_map(JNIEnv *env, jlong j_left_keys, jlong j_right_keys, + jlong j_left_condition, jlong j_right_condition, + jlong j_condition, jboolean j_nulls_equal, T join_func) { + JNI_NULL_CHECK(env, j_left_keys, "left keys table is null", 0); + JNI_NULL_CHECK(env, j_right_keys, "right keys table is null", 0); + JNI_NULL_CHECK(env, j_left_condition, "left condition table is null", 0); + JNI_NULL_CHECK(env, j_right_condition, "right condition table is null", 0); + JNI_NULL_CHECK(env, j_condition, "condition is null", 0); + try { + cudf::jni::auto_set_device(env); + auto const left_keys = reinterpret_cast(j_left_keys); + auto const right_keys = reinterpret_cast(j_right_keys); + auto const left_condition = reinterpret_cast(j_left_condition); + auto const right_condition = reinterpret_cast(j_right_condition); + auto const condition = reinterpret_cast(j_condition); + auto const nulls_equal = + j_nulls_equal ? cudf::null_equality::EQUAL : cudf::null_equality::UNEQUAL; + return gather_map_to_java(env, + join_func(*left_keys, *right_keys, *left_condition, *right_condition, + condition->get_top_expression(), nulls_equal)); + } + CATCH_STD(env, NULL); +} + std::pair> get_mixed_size_info(JNIEnv *env, jlong j_output_row_count, jlong j_matches_view) { auto const row_count = static_cast(j_output_row_count); @@ -2537,6 +2562,50 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_conditionalLeftSemiJoinGa }); } +JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_mixedLeftSemiJoinSize( + JNIEnv *env, jclass, jlong j_left_keys, jlong j_right_keys, jlong j_left_condition, + jlong j_right_condition, jlong j_condition, jboolean j_nulls_equal) { + return cudf::jni::mixed_join_size( + env, j_left_keys, j_right_keys, j_left_condition, j_right_condition, j_condition, + j_nulls_equal, + [](cudf::table_view const &left_keys, cudf::table_view const &right_keys, + cudf::table_view const &left_condition, cudf::table_view const &right_condition, + cudf::ast::expression const &condition, cudf::null_equality nulls_equal) { + return cudf::mixed_left_semi_join_size(left_keys, right_keys, left_condition, + right_condition, condition, nulls_equal); + }); +} + +JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_mixedLeftSemiJoinGatherMap( + JNIEnv *env, jclass, jlong j_left_keys, jlong j_right_keys, jlong j_left_condition, + jlong j_right_condition, jlong j_condition, jboolean j_nulls_equal) { + return cudf::jni::mixed_join_gather_single_map( + env, j_left_keys, j_right_keys, j_left_condition, j_right_condition, j_condition, + j_nulls_equal, + [](cudf::table_view const &left_keys, cudf::table_view const &right_keys, + cudf::table_view const &left_condition, cudf::table_view const &right_condition, + cudf::ast::expression const &condition, cudf::null_equality nulls_equal) { + return cudf::mixed_left_semi_join(left_keys, right_keys, left_condition, right_condition, + condition, nulls_equal); + }); +} + +JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_mixedLeftSemiJoinGatherMapWithSize( + JNIEnv *env, jclass, jlong j_left_keys, jlong j_right_keys, jlong j_left_condition, + jlong j_right_condition, jlong j_condition, jboolean j_nulls_equal, jlong j_output_row_count, + jlong j_matches_view) { + auto size_info = cudf::jni::get_mixed_size_info(env, j_output_row_count, j_matches_view); + return cudf::jni::mixed_join_gather_single_map( + env, j_left_keys, j_right_keys, j_left_condition, j_right_condition, j_condition, + j_nulls_equal, + [&size_info](cudf::table_view const &left_keys, cudf::table_view const &right_keys, + cudf::table_view const &left_condition, cudf::table_view const &right_condition, + cudf::ast::expression const &condition, cudf::null_equality nulls_equal) { + return cudf::mixed_left_semi_join(left_keys, right_keys, left_condition, right_condition, + condition, nulls_equal, size_info); + }); +} + JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_leftAntiJoinGatherMap( JNIEnv *env, jclass, jlong j_left_keys, jlong j_right_keys, jboolean compare_nulls_equal) { return cudf::jni::join_gather_single_map( @@ -2585,6 +2654,50 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_conditionalLeftAntiJoinGa }); } +JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_mixedLeftAntiJoinSize( + JNIEnv *env, jclass, jlong j_left_keys, jlong j_right_keys, jlong j_left_condition, + jlong j_right_condition, jlong j_condition, jboolean j_nulls_equal) { + return cudf::jni::mixed_join_size( + env, j_left_keys, j_right_keys, j_left_condition, j_right_condition, j_condition, + j_nulls_equal, + [](cudf::table_view const &left_keys, cudf::table_view const &right_keys, + cudf::table_view const &left_condition, cudf::table_view const &right_condition, + cudf::ast::expression const &condition, cudf::null_equality nulls_equal) { + return cudf::mixed_left_anti_join_size(left_keys, right_keys, left_condition, + right_condition, condition, nulls_equal); + }); +} + +JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_mixedLeftAntiJoinGatherMap( + JNIEnv *env, jclass, jlong j_left_keys, jlong j_right_keys, jlong j_left_condition, + jlong j_right_condition, jlong j_condition, jboolean j_nulls_equal) { + return cudf::jni::mixed_join_gather_single_map( + env, j_left_keys, j_right_keys, j_left_condition, j_right_condition, j_condition, + j_nulls_equal, + [](cudf::table_view const &left_keys, cudf::table_view const &right_keys, + cudf::table_view const &left_condition, cudf::table_view const &right_condition, + cudf::ast::expression const &condition, cudf::null_equality nulls_equal) { + return cudf::mixed_left_anti_join(left_keys, right_keys, left_condition, right_condition, + condition, nulls_equal); + }); +} + +JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_mixedLeftAntiJoinGatherMapWithSize( + JNIEnv *env, jclass, jlong j_left_keys, jlong j_right_keys, jlong j_left_condition, + jlong j_right_condition, jlong j_condition, jboolean j_nulls_equal, jlong j_output_row_count, + jlong j_matches_view) { + auto size_info = cudf::jni::get_mixed_size_info(env, j_output_row_count, j_matches_view); + return cudf::jni::mixed_join_gather_single_map( + env, j_left_keys, j_right_keys, j_left_condition, j_right_condition, j_condition, + j_nulls_equal, + [&size_info](cudf::table_view const &left_keys, cudf::table_view const &right_keys, + cudf::table_view const &left_condition, cudf::table_view const &right_condition, + cudf::ast::expression const &condition, cudf::null_equality nulls_equal) { + return cudf::mixed_left_anti_join(left_keys, right_keys, left_condition, right_condition, + condition, nulls_equal, size_info); + }); +} + JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_crossJoin(JNIEnv *env, jclass, jlong left_table, jlong right_table) { diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index 47c468de8c8..db1327c5471 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -2380,6 +2380,222 @@ void testMixedFullJoinGatherMapsNulls() { } } + @Test + void testMixedLeftSemiJoinGatherMap() { + BinaryOperation expr = new BinaryOperation(BinaryOperator.GREATER, + new ColumnReference(1, TableReference.LEFT), + new ColumnReference(1, TableReference.RIGHT)); + try (CompiledExpression condition = expr.compile(); + Table left = new Table.TestBuilder() + .column(2, 3, 9, 0, 1, 7, 4, 6, 5, 8) + .column(1, 2, 3, 4, 5, 6, 7, 8, 9, 0) + .build(); + Table leftKeys = new Table(left.getColumn(0)); + Table right = new Table.TestBuilder() + .column(6, 5, 9, 8, 10, 32) + .column(0, 1, 2, 3, 4, 5) + .column(7, 8, 9, 0, 1, 2).build(); + Table rightKeys = new Table(right.getColumn(0)); + Table expected = new Table.TestBuilder() + .column(2, 7, 8) + .build(); + GatherMap map = Table.mixedLeftSemiJoinGatherMap(leftKeys, rightKeys, left, right, + condition, NullEquality.UNEQUAL)) { + verifySemiJoinGatherMap(map, expected); + } + } + + @Test + void testMixedLeftSemiJoinGatherMapNulls() { + BinaryOperation expr = new BinaryOperation(BinaryOperator.GREATER, + new ColumnReference(1, TableReference.LEFT), + new ColumnReference(1, TableReference.RIGHT)); + try (CompiledExpression condition = expr.compile(); + Table left = new Table.TestBuilder() + .column(null, 3, 9, 0, 1, 7, 4, null, 5, 8) + .column( 1, 2, 3, 4, 5, 6, 7, 8, 9, 0) + .build(); + Table leftKeys = new Table(left.getColumn(0)); + Table right = new Table.TestBuilder() + .column(null, 5, null, 8, 10, 32) + .column( 0, 1, 2, 3, 4, 5) + .column( 7, 8, 9, 0, 1, 2).build(); + Table rightKeys = new Table(right.getColumn(0)); + Table expected = new Table.TestBuilder() + .column(0, 7, 8) + .build(); + GatherMap map = Table.mixedLeftSemiJoinGatherMap(leftKeys, rightKeys, left, right, + condition, NullEquality.EQUAL)) { + verifySemiJoinGatherMap(map, expected); + } + } + + @Test + void testMixedLeftSemiJoinGatherMapWithSize() { + BinaryOperation expr = new BinaryOperation(BinaryOperator.GREATER, + new ColumnReference(1, TableReference.LEFT), + new ColumnReference(1, TableReference.RIGHT)); + try (CompiledExpression condition = expr.compile(); + Table left = new Table.TestBuilder() + .column(2, 3, 9, 0, 1, 7, 4, 6, 5, 8) + .column(1, 2, 3, 4, 5, 6, 7, 8, 9, 0) + .build(); + Table leftKeys = new Table(left.getColumn(0)); + Table right = new Table.TestBuilder() + .column(6, 5, 9, 8, 10, 32) + .column(0, 1, 2, 3, 4, 5) + .column(7, 8, 9, 0, 1, 2).build(); + Table rightKeys = new Table(right.getColumn(0)); + Table expected = new Table.TestBuilder() + .column(2, 7, 8) + .build(); + MixedJoinSize sizeInfo = Table.mixedLeftSemiJoinSize(leftKeys, rightKeys, left, right, + condition, NullEquality.UNEQUAL)) { + assertEquals(expected.getRowCount(), sizeInfo.getOutputRowCount()); + try (GatherMap map = Table.mixedLeftSemiJoinGatherMap(leftKeys, rightKeys, left, right, + condition, NullEquality.UNEQUAL, sizeInfo)) { + verifySemiJoinGatherMap(map, expected); + } + } + } + + @Test + void testMixedLeftSemiJoinGatherMapNullsWithSize() { + BinaryOperation expr = new BinaryOperation(BinaryOperator.GREATER, + new ColumnReference(1, TableReference.LEFT), + new ColumnReference(1, TableReference.RIGHT)); + try (CompiledExpression condition = expr.compile(); + Table left = new Table.TestBuilder() + .column(null, 3, 9, 0, 1, 7, 4, null, 5, 8) + .column( 1, 2, 3, 4, 5, 6, 7, 8, 9, 0) + .build(); + Table leftKeys = new Table(left.getColumn(0)); + Table right = new Table.TestBuilder() + .column(null, 5, null, 8, 10, 32) + .column( 0, 1, 2, 3, 4, 5) + .column( 7, 8, 9, 0, 1, 2).build(); + Table rightKeys = new Table(right.getColumn(0)); + Table expected = new Table.TestBuilder() + .column(0, 7, 8) + .build(); + MixedJoinSize sizeInfo = Table.mixedLeftSemiJoinSize(leftKeys, rightKeys, left, right, + condition, NullEquality.EQUAL)) { + assertEquals(expected.getRowCount(), sizeInfo.getOutputRowCount()); + try (GatherMap map = Table.mixedLeftSemiJoinGatherMap(leftKeys, rightKeys, left, right, + condition, NullEquality.EQUAL, sizeInfo)) { + verifySemiJoinGatherMap(map, expected); + } + } + } + + @Test + void testMixedLeftAntiJoinGatherMap() { + BinaryOperation expr = new BinaryOperation(BinaryOperator.GREATER, + new ColumnReference(1, TableReference.LEFT), + new ColumnReference(1, TableReference.RIGHT)); + try (CompiledExpression condition = expr.compile(); + Table left = new Table.TestBuilder() + .column(2, 3, 9, 0, 1, 7, 4, 6, 5, 8) + .column(1, 2, 3, 4, 5, 6, 7, 8, 9, 0) + .build(); + Table leftKeys = new Table(left.getColumn(0)); + Table right = new Table.TestBuilder() + .column(6, 5, 9, 8, 10, 32) + .column(0, 1, 2, 3, 4, 5) + .column(7, 8, 9, 0, 1, 2).build(); + Table rightKeys = new Table(right.getColumn(0)); + Table expected = new Table.TestBuilder() + .column(0, 1, 3, 4, 5, 6, 9) + .build(); + GatherMap map = Table.mixedLeftAntiJoinGatherMap(leftKeys, rightKeys, left, right, + condition, NullEquality.UNEQUAL)) { + verifySemiJoinGatherMap(map, expected); + } + } + + @Test + void testMixedLeftAntiJoinGatherMapNulls() { + BinaryOperation expr = new BinaryOperation(BinaryOperator.GREATER, + new ColumnReference(1, TableReference.LEFT), + new ColumnReference(1, TableReference.RIGHT)); + try (CompiledExpression condition = expr.compile(); + Table left = new Table.TestBuilder() + .column(null, 3, 9, 0, 1, 7, 4, null, 5, 8) + .column( 1, 2, 3, 4, 5, 6, 7, 8, 9, 0) + .build(); + Table leftKeys = new Table(left.getColumn(0)); + Table right = new Table.TestBuilder() + .column(null, 5, null, 8, 10, 32) + .column( 0, 1, 2, 3, 4, 5) + .column( 7, 8, 9, 0, 1, 2).build(); + Table rightKeys = new Table(right.getColumn(0)); + Table expected = new Table.TestBuilder() + .column(1, 2, 3, 4, 5, 6, 9) + .build(); + GatherMap map = Table.mixedLeftAntiJoinGatherMap(leftKeys, rightKeys, left, right, + condition, NullEquality.EQUAL)) { + verifySemiJoinGatherMap(map, expected); + } + } + + @Test + void testMixedLeftAntiJoinGatherMapWithSize() { + BinaryOperation expr = new BinaryOperation(BinaryOperator.GREATER, + new ColumnReference(1, TableReference.LEFT), + new ColumnReference(1, TableReference.RIGHT)); + try (CompiledExpression condition = expr.compile(); + Table left = new Table.TestBuilder() + .column(2, 3, 9, 0, 1, 7, 4, 6, 5, 8) + .column(1, 2, 3, 4, 5, 6, 7, 8, 9, 0) + .build(); + Table leftKeys = new Table(left.getColumn(0)); + Table right = new Table.TestBuilder() + .column(6, 5, 9, 8, 10, 32) + .column(0, 1, 2, 3, 4, 5) + .column(7, 8, 9, 0, 1, 2).build(); + Table rightKeys = new Table(right.getColumn(0)); + Table expected = new Table.TestBuilder() + .column(0, 1, 3, 4, 5, 6, 9) + .build(); + MixedJoinSize sizeInfo = Table.mixedLeftAntiJoinSize(leftKeys, rightKeys, left, right, + condition, NullEquality.UNEQUAL)) { + assertEquals(expected.getRowCount(), sizeInfo.getOutputRowCount()); + try (GatherMap map = Table.mixedLeftAntiJoinGatherMap(leftKeys, rightKeys, left, right, + condition, NullEquality.UNEQUAL, sizeInfo)) { + verifySemiJoinGatherMap(map, expected); + } + } + } + + @Test + void testMixedLeftAntiJoinGatherMapNullsWithSize() { + BinaryOperation expr = new BinaryOperation(BinaryOperator.GREATER, + new ColumnReference(1, TableReference.LEFT), + new ColumnReference(1, TableReference.RIGHT)); + try (CompiledExpression condition = expr.compile(); + Table left = new Table.TestBuilder() + .column(null, 3, 9, 0, 1, 7, 4, null, 5, 8) + .column( 1, 2, 3, 4, 5, 6, 7, 8, 9, 0) + .build(); + Table leftKeys = new Table(left.getColumn(0)); + Table right = new Table.TestBuilder() + .column(null, 5, null, 8, 10, 32) + .column( 0, 1, 2, 3, 4, 5) + .column( 7, 8, 9, 0, 1, 2).build(); + Table rightKeys = new Table(right.getColumn(0)); + Table expected = new Table.TestBuilder() + .column(1, 2, 3, 4, 5, 6, 9) + .build(); + MixedJoinSize sizeInfo = Table.mixedLeftAntiJoinSize(leftKeys, rightKeys, left, right, + condition, NullEquality.EQUAL)) { + assertEquals(expected.getRowCount(), sizeInfo.getOutputRowCount()); + try (GatherMap map = Table.mixedLeftAntiJoinGatherMap(leftKeys, rightKeys, left, right, + condition, NullEquality.EQUAL, sizeInfo)) { + verifySemiJoinGatherMap(map, expected); + } + } + } + @Test void testLeftSemiJoinGatherMap() { try (Table leftKeys = new Table.TestBuilder().column(2, 3, 9, 0, 1, 7, 4, 6, 5, 8).build(); diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 3e359335719..fa7680df8a5 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -5474,11 +5474,17 @@ def test_memory_usage_list(): @pytest.mark.parametrize("rows", [10, 100]) def test_memory_usage_multi(rows): deep = True + # We need to sample without replacement to guarantee that the size of the + # levels are always the same. df = pd.DataFrame( { "A": np.arange(rows, dtype="int32"), - "B": np.random.choice(np.arange(3, dtype="int64"), rows), - "C": np.random.choice(np.arange(3, dtype="float64"), rows), + "B": np.random.choice( + np.arange(rows, dtype="int64"), rows, replace=False + ), + "C": np.random.choice( + np.arange(rows, dtype="float64"), rows, replace=False + ), } ).set_index(["B", "C"]) gdf = cudf.from_pandas(df) @@ -5486,8 +5492,8 @@ def test_memory_usage_multi(rows): # of the underlying columns, levels, and codes expect = rows * 16 # Source Columns expect += rows * 16 # Codes - expect += 3 * 8 # Level 0 - expect += 3 * 8 # Level 1 + expect += rows * 8 # Level 0 + expect += rows * 8 # Level 1 assert expect == gdf.index.memory_usage(deep=deep)