From ad5452d7eb417527ad6bd0b6a29a544466b38429 Mon Sep 17 00:00:00 2001 From: David <45795991+davidwendt@users.noreply.github.com> Date: Fri, 26 Mar 2021 10:24:00 -0400 Subject: [PATCH 01/20] Add gbenchmark for nvtext replace-tokens function (#7708) Reference #5696 Creates gbenchmarks for `nvtext::replace_tokens()` function. The benchmarks measures various string lengths and number of rows with the default whitespace delimiter and 4 hardcoded tokens. This API already uses the `make_strings_children` utility. Authors: - David (@davidwendt) Approvers: - Karthikeyan (@karthikeyann) - Nghia Truong (@ttnghia) - @nvdbaranec - Keith Kraus (@kkraus14) URL: https://github.com/rapidsai/cudf/pull/7708 --- cpp/benchmarks/CMakeLists.txt | 5 +- cpp/benchmarks/text/replace_benchmark.cpp | 85 +++++++++++++++++++++++ 2 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 cpp/benchmarks/text/replace_benchmark.cpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 7fd84b508ac..43ca6de11b4 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -177,8 +177,9 @@ ConfigureBench(BINARYOP_BENCH binaryop/binaryop_benchmark.cu) ConfigureBench(TEXT_BENCH text/normalize_benchmark.cpp text/normalize_spaces_benchmark.cpp - text/tokenize_benchmark.cpp - text/subword_benchmark.cpp) + text/replace_benchmark.cpp + text/subword_benchmark.cpp + text/tokenize_benchmark.cpp) ################################################################################################### # - strings benchmark ------------------------------------------------------------------- diff --git a/cpp/benchmarks/text/replace_benchmark.cpp b/cpp/benchmarks/text/replace_benchmark.cpp new file mode 100644 index 00000000000..f5428aee225 --- /dev/null +++ b/cpp/benchmarks/text/replace_benchmark.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include + +class TextReplace : public cudf::benchmark { +}; + +static void BM_replace(benchmark::State& state) +{ + auto const n_rows = static_cast(state.range(0)); + auto const n_length = static_cast(state.range(1)); + + std::vector words{" ", "one ", "two ", "three ", "four ", + "five ", "six ", "sevén ", "eight ", "nine ", + "ten ", "eleven ", "twelve ", "thirteen ", "fourteen ", + "fifteen ", "sixteen ", "seventeen ", "eighteen ", "nineteen "}; + + std::default_random_engine generator; + std::uniform_int_distribution tokens_dist(0, words.size() - 1); + std::string row; // build a row of random tokens + while (static_cast(row.size()) < n_length) row += words[tokens_dist(generator)]; + + std::uniform_int_distribution position_dist(0, 16); + + auto elements = cudf::detail::make_counting_transform_iterator( + 0, [&](auto idx) { return row.c_str() + position_dist(generator); }); + cudf::test::strings_column_wrapper input(elements, elements + n_rows); + cudf::strings_column_view view(input); + + cudf::test::strings_column_wrapper targets({"one", "two", "sevén", "zero"}); + cudf::test::strings_column_wrapper replacements({"1", "2", "7", "0"}); + + for (auto _ : state) { + cuda_event_timer raii(state, true, 0); + nvtext::replace_tokens( + view, cudf::strings_column_view(targets), cudf::strings_column_view(replacements)); + } + + state.SetBytesProcessed(state.iterations() * view.chars_size()); +} + +static void generate_bench_args(benchmark::internal::Benchmark* b) +{ + int const min_rows = 1 << 12; + int const max_rows = 1 << 24; + int const row_multiplier = 8; + int const min_row_length = 1 << 5; + int const max_row_length = 1 << 13; + int const length_multiplier = 4; + generate_string_bench_args( + b, min_rows, max_rows, row_multiplier, min_row_length, max_row_length, length_multiplier); +} + +#define NVTEXT_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(TextReplace, name) \ + (::benchmark::State & st) { BM_replace(st); } \ + BENCHMARK_REGISTER_F(TextReplace, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +NVTEXT_BENCHMARK_DEFINE(replace) From bf2e96c70c9c7097ecf64ad413550be2f75374b8 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath <3190405+shwina@users.noreply.github.com> Date: Fri, 26 Mar 2021 11:42:37 -0400 Subject: [PATCH 02/20] Add support for `unique` groupby aggregation (#7726) Adds support for `SeriesGroupBy.unique()`. Also adds support for `DataFrameGroupBy.unique()` but that's not tested, as Pandas doesn't support it (yet?). Resolves https://github.com/rapidsai/cudf/issues/2973 Authors: - Ashwin Srinath (@shwina) Approvers: - Keith Kraus (@kkraus14) URL: https://github.com/rapidsai/cudf/pull/7726 --- docs/cudf/source/groupby.md | 1 + python/cudf/cudf/_lib/aggregation.pyx | 7 +++--- python/cudf/cudf/_lib/groupby.pyx | 31 +++++++++++++++++++------- python/cudf/cudf/tests/test_groupby.py | 31 +++++++++++++++++++++++++- 4 files changed, 57 insertions(+), 13 deletions(-) diff --git a/docs/cudf/source/groupby.md b/docs/cudf/source/groupby.md index 7e96d4fe38c..5376df261e7 100644 --- a/docs/cudf/source/groupby.md +++ b/docs/cudf/source/groupby.md @@ -137,6 +137,7 @@ The following table summarizes the available aggregations and the types that sup | nunique | ✅ | ✅ | ✅ | ✅ | | | | nth | ✅ | ✅ | ✅ | | | | | collect | ✅ | ✅ | ✅ | | ✅ | | +| unique | ✅ | ✅ | ✅ | ✅ | | | ## GroupBy apply diff --git a/python/cudf/cudf/_lib/aggregation.pyx b/python/cudf/cudf/_lib/aggregation.pyx index 840f0c98987..7138bb49743 100644 --- a/python/cudf/cudf/_lib/aggregation.pyx +++ b/python/cudf/cudf/_lib/aggregation.pyx @@ -41,7 +41,7 @@ class AggregationKind(Enum): ALL = libcudf_aggregation.aggregation.Kind.ALL SUM_OF_SQUARES = libcudf_aggregation.aggregation.Kind.SUM_OF_SQUARES MEAN = libcudf_aggregation.aggregation.Kind.MEAN - VARIANCE = libcudf_aggregation.aggregation.Kind.VARIANCE + VAR = libcudf_aggregation.aggregation.Kind.VARIANCE STD = libcudf_aggregation.aggregation.Kind.STD MEDIAN = libcudf_aggregation.aggregation.Kind.MEDIAN QUANTILE = libcudf_aggregation.aggregation.Kind.QUANTILE @@ -50,13 +50,12 @@ class AggregationKind(Enum): NUNIQUE = libcudf_aggregation.aggregation.Kind.NUNIQUE NTH = libcudf_aggregation.aggregation.Kind.NTH_ELEMENT COLLECT = libcudf_aggregation.aggregation.Kind.COLLECT - COLLECT_SET = libcudf_aggregation.aggregation.Kind.COLLECT_SET + UNIQUE = libcudf_aggregation.aggregation.Kind.COLLECT_SET PTX = libcudf_aggregation.aggregation.Kind.PTX CUDA = libcudf_aggregation.aggregation.Kind.CUDA cdef class Aggregation: - def __init__(self, op, **kwargs): self.c_obj = move(make_aggregation(op, kwargs)) @@ -246,7 +245,7 @@ cdef class _AggregationFactory: return agg @classmethod - def collect_set(cls): + def unique(cls): cdef Aggregation agg = Aggregation.__new__(Aggregation) agg.c_obj = move(libcudf_aggregation.make_collect_set_aggregation()) return agg diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx index 0f5cdc73d3b..713a2274a77 100644 --- a/python/cudf/cudf/_lib/groupby.pyx +++ b/python/cudf/cudf/_lib/groupby.pyx @@ -35,13 +35,15 @@ _GROUPBY_AGGS = { "median", "nunique", "nth", - "collect" + "collect", + "unique", } _CATEGORICAL_AGGS = { "count", "size", "nunique", + "unique", } _STRING_AGGS = { @@ -51,13 +53,15 @@ _STRING_AGGS = { "min", "nunique", "nth", - "collect" + "collect", + "unique", } _LIST_AGGS = { - "collect" + "collect", } + cdef class GroupBy: cdef unique_ptr[libcudf_groupby.groupby] c_obj cdef dict __dict__ @@ -145,12 +149,23 @@ cdef class GroupBy: vector[libcudf_groupby.aggregation_result] ] c_result - with nogil: - c_result = move( - self.c_obj.get()[0].aggregate( - c_agg_requests + try: + with nogil: + c_result = move( + self.c_obj.get()[0].aggregate( + c_agg_requests + ) ) - ) + except RuntimeError as e: + # TODO: remove this try..except after + # https://github.com/rapidsai/cudf/issues/7611 + # is resolved + if ("make_empty_column") in str(e): + raise NotImplementedError( + "Aggregation not supported for empty columns" + ) from e + else: + raise grouped_keys = Table.from_unique_ptr( move(c_result.first), diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index 8011510d340..a96db59dee3 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -12,7 +12,13 @@ import cudf from cudf.core import DataFrame, Series from cudf.core._compat import PANDAS_GE_110 -from cudf.tests.utils import assert_eq, assert_exceptions_equal +from cudf.tests.utils import ( + DATETIME_TYPES, + SIGNED_TYPES, + TIMEDELTA_TYPES, + assert_eq, + assert_exceptions_equal, +) _now = np.datetime64("now") _tomorrow = _now + np.timedelta64(1, "D") @@ -1532,3 +1538,26 @@ def test_groupby_nonempty_no_keys(pdf): lambda: gdf.groupby([]), compare_error_message=False, ) + + +@pytest.mark.parametrize( + "by,data", + [ + # ([], []), # error? + ([1, 1, 2, 2], [0, 0, 1, 1]), + ([1, 2, 3, 4], [0, 0, 0, 0]), + ([1, 2, 1, 2], [0, 1, 1, 1]), + ], +) +@pytest.mark.parametrize( + "dtype", + SIGNED_TYPES + DATETIME_TYPES + TIMEDELTA_TYPES + ["string", "category"], +) +def test_groupby_unique(by, data, dtype): + pdf = pd.DataFrame({"by": by, "data": data}) + pdf["data"] = pdf["data"].astype(dtype) + gdf = cudf.from_pandas(pdf) + + expect = pdf.groupby("by")["data"].unique() + got = gdf.groupby("by")["data"].unique() + assert_eq(expect, got) From b0586c4e8988b836d8bcdeddfd5d384b7011af6f Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Fri, 26 Mar 2021 14:23:06 -0500 Subject: [PATCH 03/20] Added JNI support for new is_integer (#7739) Adds JNI bindings for improved is_integer with bounds checks Authors: - Robert (Bobby) Evans (@revans2) Approvers: - Jason Lowe (@jlowe) URL: https://github.com/rapidsai/cudf/pull/7739 --- .../main/java/ai/rapids/cudf/ColumnView.java | 21 ++++++- java/src/main/native/src/ColumnViewJni.cpp | 17 +++++ .../java/ai/rapids/cudf/ColumnVectorTest.java | 63 +++++++++++++++++++ 3 files changed, 99 insertions(+), 2 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index e50a9e86ead..b29b873092d 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -288,19 +288,34 @@ public final ColumnVector isNull() { /** * Returns a Boolean vector with the same number of rows as this instance, that has * TRUE for any entry that is an integer, and FALSE if its not an integer. A null will be returned - * for null entries + * for null entries. * * NOTE: Integer doesn't mean a 32-bit integer. It means a number that is not a fraction. * i.e. If this method returns true for a value it could still result in an overflow or underflow * if you convert it to a Java integral type * - * @return - Boolean vector + * @return Boolean vector */ public final ColumnVector isInteger() { assert type.equals(DType.STRING); return new ColumnVector(isInteger(getNativeView())); } + /** + * Returns a Boolean vector with the same number of rows as this instance, that has + * TRUE for any entry that is an integer, and FALSE if its not an integer. A null will be returned + * for null entries. + * + * @param intType the data type that should be used for bounds checking. Note that only + * integer types are allowed. + * @return Boolean vector + */ + public final ColumnVector isInteger(DType intType) { + assert type.equals(DType.STRING); + return new ColumnVector(isIntegerWithType(getNativeView(), + intType.getTypeId().getNativeId(), intType.getScale())); + } + /** * Returns a Boolean vector with the same number of rows as this instance, that has * TRUE for any entry that is a float, and FALSE if its not a float. A null will be returned @@ -2845,6 +2860,8 @@ private static native long rollingWindow( private static native long isInteger(long viewHandle); + private static native long isIntegerWithType(long viewHandle, int typeId, int typeScale); + private static native long isNotNanNative(long viewHandle); private static native long isNotNullNative(long viewHandle); diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 4132016d85c..3928794b55c 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -1788,6 +1788,23 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_isInteger(JNIEnv *env, jo CATCH_STD(env, 0) } +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_isIntegerWithType(JNIEnv *env, jobject, + jlong handle, + jint j_dtype, + jint scale) { + + JNI_NULL_CHECK(env, handle, "native view handle is null", 0) + + try { + cudf::jni::auto_set_device(env); + cudf::column_view *view = reinterpret_cast(handle); + cudf::data_type int_dtype = cudf::jni::make_data_type(j_dtype, scale); + std::unique_ptr result = cudf::strings::is_integer(*view, int_dtype); + return reinterpret_cast(result.release()); + } + CATCH_STD(env, 0) +} + JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_copyColumnViewToCV(JNIEnv *env, jobject j_object, jlong handle) { diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 02fbe56431b..5a9404f5760 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -3339,6 +3339,69 @@ void testNansToNulls() { } } + @Test + void testIsIntegerWithBounds() { + String[] intStrings = {"A", "nan", "Inf", "-Inf", "3.5", + String.valueOf(Byte.MIN_VALUE), + String.valueOf(Byte.MIN_VALUE + 1L), + String.valueOf(Byte.MIN_VALUE - 1L), + String.valueOf(Byte.MAX_VALUE), + String.valueOf(Byte.MAX_VALUE + 1L), + String.valueOf(Byte.MAX_VALUE - 1L), + String.valueOf(Short.MIN_VALUE), + String.valueOf(Short.MIN_VALUE + 1L), + String.valueOf(Short.MIN_VALUE - 1L), + String.valueOf(Short.MAX_VALUE), + String.valueOf(Short.MAX_VALUE + 1L), + String.valueOf(Short.MAX_VALUE - 1L), + String.valueOf(Integer.MIN_VALUE), + String.valueOf(Integer.MIN_VALUE + 1L), + String.valueOf(Integer.MIN_VALUE - 1L), + String.valueOf(Integer.MAX_VALUE), + String.valueOf(Integer.MAX_VALUE + 1L), + String.valueOf(Integer.MAX_VALUE - 1L), + String.valueOf(Long.MIN_VALUE), + String.valueOf(Long.MIN_VALUE + 1L), + "-9223372036854775809", + String.valueOf(Long.MAX_VALUE), + "9223372036854775808", + String.valueOf(Long.MAX_VALUE - 1L)}; + try (ColumnVector intStringCV = ColumnVector.fromStrings(intStrings); + ColumnVector isByte = intStringCV.isInteger(DType.INT8); + ColumnVector expectedByte = ColumnVector.fromBoxedBooleans( + false, false, false, false, false, + true, true, false, true, false, true, + false, false, false, false, false, false, + false, false, false, false, false, false, + false, false, false, false, false, false); + ColumnVector isShort = intStringCV.isInteger(DType.INT16); + ColumnVector expectedShort = ColumnVector.fromBoxedBooleans( + false, false, false, false, false, + true, true, true, true, true, true, + true, true, false, true, false, true, + false, false, false, false, false, false, + false, false, false, false, false, false); + ColumnVector isInt = intStringCV.isInteger(DType.INT32); + ColumnVector expectedInt = ColumnVector.fromBoxedBooleans( + false, false, false, false, false, + true, true, true, true, true, true, + true, true, true, true, true, true, + true, true, false, true, false, true, + false, false, false, false, false, false); + ColumnVector isLong = intStringCV.isInteger(DType.INT64); + ColumnVector expectedLong = ColumnVector.fromBoxedBooleans( + false, false, false, false, false, + true, true, true, true, true, true, + true, true, true, true, true, true, + true, true, true, true, true, true, + true, true, false, true, false, true)) { + assertColumnsAreEqual(expectedByte, isByte); + assertColumnsAreEqual(expectedShort, isShort); + assertColumnsAreEqual(expectedInt, isInt); + assertColumnsAreEqual(expectedLong, isLong); + } + } + @Test void testIsInteger() { String[] intStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "2147483647", From add4b4535999dcc200b7fdf83298b90d0495af96 Mon Sep 17 00:00:00 2001 From: Kumar Aatish Date: Fri, 26 Mar 2021 22:26:02 -0400 Subject: [PATCH 04/20] Fix string length in stripe dictionary building (#7744) In PR #7676 the length of the current string being referred to while building stripe dictionaries was always set to 0 while incrementing the dictionary character count of a StripeDictionary. This led to corrupted strings when the dictionary encoding was used as noted in issue #7741. This has been fixed in this PR. Fixes #7741 Authors: - Kumar Aatish (@kaatish) Approvers: - Vukasin Milovanovic (@vuule) - Nghia Truong (@ttnghia) URL: https://github.com/rapidsai/cudf/pull/7744 --- cpp/src/io/orc/dict_enc.cu | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/orc/dict_enc.cu b/cpp/src/io/orc/dict_enc.cu index 5695e882a95..e69a61bde66 100644 --- a/cpp/src/io/orc/dict_enc.cu +++ b/cpp/src/io/orc/dict_enc.cu @@ -396,7 +396,10 @@ __global__ void __launch_bounds__(block_size) uint32_t cur = (i + t < num_strings) ? dict_data[i + t] : 0; uint32_t cur_len = 0; bool is_dupe = false; - if (i + t < num_strings) { current_string = s->stripe.leaf_column->element(cur); } + if (i + t < num_strings) { + current_string = s->stripe.leaf_column->element(cur); + cur_len = current_string.size_bytes(); + } if (i + t != 0 && i + t < num_strings) { uint32_t prev = dict_data[i + t - 1]; is_dupe = (current_string == (s->stripe.leaf_column->element(prev))); From 44adf97fc49e5569b83b31ad5c7f05f6b64c20bd Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Fri, 26 Mar 2021 22:48:53 -0700 Subject: [PATCH 05/20] Fix dictionary size computation in ORC writer (#7737) Fixes #7661 Corrects the field order in `std::accumulate` that computes the string column size w.r.t encoding. Authors: - Vukasin Milovanovic (@vuule) Approvers: - Kumar Aatish (@kaatish) - Ram (Ramakrishna Prabhu) (@rgsl888prabhu) URL: https://github.com/rapidsai/cudf/pull/7737 --- cpp/src/io/orc/writer_impl.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index cb75698fd8d..10050806552 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -321,8 +321,8 @@ void writer::impl::build_dictionaries(orc_column_view *columns, string_column_cost{}, [&](auto cost, auto rg_idx) -> string_column_cost { const auto &dt = dict[rg_idx * str_col_ids.size() + col_idx]; - return {cost.dictionary + dt.dict_char_count + dt.num_dict_strings, - cost.direct + dt.string_char_count}; + return {cost.direct + dt.string_char_count, + cost.dictionary + dt.dict_char_count + dt.num_dict_strings}; }); // Disable dictionary if it does not reduce the output size if (col_cost.dictionary >= col_cost.direct) { From ccc28d55202f6f6bb14718ed9022881ef0176b6e Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Sat, 27 Mar 2021 13:56:20 +0530 Subject: [PATCH 06/20] Use stream in groupby calls (#7705) **sort_groupby_helper::** - [x] sorted_values() - [x] grouped_values() - unique_keys() - sorted_keys() - [x] num_groups() - num_keys() - [x] key_sort_order() - [x] group_offsets() - [x] group_labels() - [x] unsorted_keys_labels() - [x] keys_bitmask_column() **groupby::** - [x] - dispatch_aggregation() Authors: - Karthikeyan (@karthikeyann) Approvers: - David (@davidwendt) - Ram (Ramakrishna Prabhu) (@rgsl888prabhu) URL: https://github.com/rapidsai/cudf/pull/7705 --- .../cudf/detail/groupby/sort_helper.hpp | 26 ++++---- cpp/src/groupby/groupby.cu | 4 +- cpp/src/groupby/sort/aggregate.cpp | 60 ++++++++++--------- cpp/src/groupby/sort/functors.hpp | 4 +- cpp/src/groupby/sort/scan.cpp | 13 ++-- cpp/src/groupby/sort/sort_helper.cu | 26 ++++---- cpp/src/rolling/grouped_rolling.cu | 4 +- 7 files changed, 72 insertions(+), 65 deletions(-) diff --git a/cpp/include/cudf/detail/groupby/sort_helper.hpp b/cpp/include/cudf/detail/groupby/sort_helper.hpp index a68d649b8c8..bfc9673d3cb 100644 --- a/cpp/include/cudf/detail/groupby/sort_helper.hpp +++ b/cpp/include/cudf/detail/groupby/sort_helper.hpp @@ -93,7 +93,7 @@ struct sort_groupby_helper { */ std::unique_ptr sorted_values( column_view const& values, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -108,7 +108,7 @@ struct sort_groupby_helper { */ std::unique_ptr grouped_values( column_view const& values, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -117,7 +117,7 @@ struct sort_groupby_helper { * @return a new table in which each row is a unique row in the sorted key table. */ std::unique_ptr unique_keys( - rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -126,13 +126,13 @@ struct sort_groupby_helper { * @return a new table containing the sorted keys. */ std::unique_ptr
sorted_keys( - rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Get the number of groups in `keys` */ - size_type num_groups() { return group_offsets().size() - 1; } + size_type num_groups(rmm::cuda_stream_view stream) { return group_offsets(stream).size() - 1; } /** * @brief Return the effective number of keys @@ -141,7 +141,7 @@ struct sort_groupby_helper { * When include_null_keys = NO, returned value is the number of rows in `keys` * in which no element is null */ - size_type num_keys(rmm::cuda_stream_view stream = rmm::cuda_stream_default); + size_type num_keys(rmm::cuda_stream_view stream); /** * @brief Get the sorted order of `keys`. @@ -156,7 +156,7 @@ struct sort_groupby_helper { * * @return the sort order indices for `keys`. */ - column_view key_sort_order(rmm::cuda_stream_view stream = rmm::cuda_stream_default); + column_view key_sort_order(rmm::cuda_stream_view stream); /** * @brief Get each group's offset into the sorted order of `keys`. @@ -169,13 +169,13 @@ struct sort_groupby_helper { * @return vector of offsets of the starting point of each group in the sorted * key table */ - index_vector const& group_offsets(rmm::cuda_stream_view stream = rmm::cuda_stream_default); + index_vector const& group_offsets(rmm::cuda_stream_view stream); /** * @brief Get the group labels corresponding to the sorted order of `keys`. * * Each group is assigned a unique numerical "label" in - * `[0, 1, 2, ... , num_groups() - 1, num_groups())`. + * `[0, 1, 2, ... , num_groups() - 1, num_groups(stream))`. * For a row in sorted `keys`, its corresponding group label indicates which * group it belongs to. * @@ -184,7 +184,7 @@ struct sort_groupby_helper { * * @return vector of group labels for each row in the sorted key column */ - index_vector const& group_labels(rmm::cuda_stream_view stream = rmm::cuda_stream_default); + index_vector const& group_labels(rmm::cuda_stream_view stream); private: /** @@ -192,7 +192,7 @@ struct sort_groupby_helper { * * Returns the group label for every row in the original `keys` table. For a * given unique key row, its group label is equivalent to what is returned by - * `group_labels()`. However, if a row contains a null value, and + * `group_labels(stream)`. However, if a row contains a null value, and * `include_null_keys == NO`, then its label is NULL. * * Computes and stores unsorted labels on first invocation and returns stored @@ -201,7 +201,7 @@ struct sort_groupby_helper { * @return A nullable column of `INT32` containing group labels in the order * of the unsorted key table */ - column_view unsorted_keys_labels(rmm::cuda_stream_view stream = rmm::cuda_stream_default); + column_view unsorted_keys_labels(rmm::cuda_stream_view stream); /** * @brief Get the column representing the row bitmask for the `keys` @@ -215,7 +215,7 @@ struct sort_groupby_helper { * Computes and stores bitmask on first invocation and returns stored column * on subsequent calls. */ - column_view keys_bitmask_column(rmm::cuda_stream_view stream = rmm::cuda_stream_default); + column_view keys_bitmask_column(rmm::cuda_stream_view stream); private: column_ptr _key_sorted_order; ///< Indices to produce _keys in sorted order diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index 0312d17a37c..34c57996af3 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -156,7 +156,7 @@ std::pair, std::vector> groupby::aggr if (_keys.num_rows() == 0) { return std::make_pair(empty_like(_keys), empty_results(requests)); } - return dispatch_aggregation(requests, 0, mr); + return dispatch_aggregation(requests, rmm::cuda_stream_default, mr); } // Compute scan requests @@ -190,7 +190,7 @@ groupby::groups groupby::get_groups(table_view values, rmm::mr::device_memory_re if (values.num_columns()) { auto grouped_values = cudf::detail::gather(values, - helper().key_sort_order(), + helper().key_sort_order(rmm::cuda_stream_default), cudf::out_of_bounds_policy::DONT_CHECK, cudf::detail::negative_index_policy::NOT_ALLOWED, rmm::cuda_stream_default, diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 86e2837967e..4e2303c8b9b 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -70,8 +70,9 @@ void aggregrate_result_functor::operator()(aggregation agg, get_grouped_values().nullable() ? detail::group_count_valid( - get_grouped_values(), helper.group_labels(), helper.num_groups(), stream, mr) - : detail::group_count_all(helper.group_offsets(), helper.num_groups(), stream, mr)); + get_grouped_values(), helper.group_labels(stream), helper.num_groups(stream), stream, mr) + : detail::group_count_all( + helper.group_offsets(stream), helper.num_groups(stream), stream, mr)); } template <> @@ -80,7 +81,9 @@ void aggregrate_result_functor::operator()(aggregation c if (cache.has_result(col_idx, agg)) return; cache.add_result( - col_idx, agg, detail::group_count_all(helper.group_offsets(), helper.num_groups(), stream, mr)); + col_idx, + agg, + detail::group_count_all(helper.group_offsets(stream), helper.num_groups(stream), stream, mr)); } template <> @@ -88,10 +91,11 @@ void aggregrate_result_functor::operator()(aggregation const& { if (cache.has_result(col_idx, agg)) return; - cache.add_result(col_idx, - agg, - detail::group_sum( - get_grouped_values(), helper.num_groups(), helper.group_labels(), stream, mr)); + cache.add_result( + col_idx, + agg, + detail::group_sum( + get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr)); }; template <> @@ -102,9 +106,9 @@ void aggregrate_result_functor::operator()(aggregation cons cache.add_result(col_idx, agg, detail::group_argmax(get_grouped_values(), - helper.num_groups(), - helper.group_labels(), - helper.key_sort_order(), + helper.num_groups(stream), + helper.group_labels(stream), + helper.key_sort_order(stream), stream, mr)); }; @@ -117,9 +121,9 @@ void aggregrate_result_functor::operator()(aggregation cons cache.add_result(col_idx, agg, detail::group_argmin(get_grouped_values(), - helper.num_groups(), - helper.group_labels(), - helper.key_sort_order(), + helper.num_groups(stream), + helper.group_labels(stream), + helper.key_sort_order(stream), stream, mr)); }; @@ -132,7 +136,7 @@ void aggregrate_result_functor::operator()(aggregation const& auto result = [&]() { if (cudf::is_fixed_width(values.type())) { return detail::group_min( - get_grouped_values(), helper.num_groups(), helper.group_labels(), stream, mr); + get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr); } else { auto argmin_agg = make_argmin_aggregation(); operator()(*argmin_agg); @@ -169,7 +173,7 @@ void aggregrate_result_functor::operator()(aggregation const& auto result = [&]() { if (cudf::is_fixed_width(values.type())) { return detail::group_max( - get_grouped_values(), helper.num_groups(), helper.group_labels(), stream, mr); + get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr); } else { auto argmax_agg = make_argmax_aggregation(); operator()(*argmax_agg); @@ -238,7 +242,7 @@ void aggregrate_result_functor::operator()(aggregation co auto result = detail::group_var(get_grouped_values(), mean_result, group_sizes, - helper.group_labels(), + helper.group_labels(stream), var_agg._ddof, stream, mr); @@ -271,8 +275,8 @@ void aggregrate_result_functor::operator()(aggregation co auto result = detail::group_quantiles(get_sorted_values(), group_sizes, - helper.group_offsets(), - helper.num_groups(), + helper.group_offsets(stream), + helper.num_groups(stream), quantile_agg._quantiles, quantile_agg._interpolation, stream, @@ -291,8 +295,8 @@ void aggregrate_result_functor::operator()(aggregation cons auto result = detail::group_quantiles(get_sorted_values(), group_sizes, - helper.group_offsets(), - helper.num_groups(), + helper.group_offsets(stream), + helper.num_groups(stream), {0.5}, interpolation::LINEAR, stream, @@ -308,9 +312,9 @@ void aggregrate_result_functor::operator()(aggregation con auto nunique_agg = static_cast(agg); auto result = detail::group_nunique(get_sorted_values(), - helper.group_labels(), - helper.num_groups(), - helper.group_offsets(), + helper.group_labels(stream), + helper.num_groups(stream), + helper.group_offsets(stream), nunique_agg._null_handling, stream, mr); @@ -337,9 +341,9 @@ void aggregrate_result_functor::operator()(aggregation agg, detail::group_nth_element(get_grouped_values(), group_sizes, - helper.group_labels(), - helper.group_offsets(), - helper.num_groups(), + helper.group_labels(stream), + helper.group_offsets(stream), + helper.num_groups(stream), nth_element_agg._n, nth_element_agg._null_handling, stream, @@ -357,7 +361,7 @@ void aggregrate_result_functor::operator()(aggregatio if (cache.has_result(col_idx, agg)) return; auto result = detail::group_collect( - get_grouped_values(), helper.group_offsets(), helper.num_groups(), stream, mr); + get_grouped_values(), helper.group_offsets(stream), helper.num_groups(stream), stream, mr); cache.add_result(col_idx, agg, std::move(result)); }; @@ -373,7 +377,7 @@ void aggregrate_result_functor::operator()(aggregation if (cache.has_result(col_idx, agg)) { return; } auto const collect_result = detail::group_collect( - get_grouped_values(), helper.group_offsets(), helper.num_groups(), stream, mr); + get_grouped_values(), helper.group_offsets(stream), helper.num_groups(stream), stream, mr); auto const nulls_equal = static_cast(agg)._null_equal; cache.add_result(col_idx, diff --git a/cpp/src/groupby/sort/functors.hpp b/cpp/src/groupby/sort/functors.hpp index 565320fbe80..afb92f8e141 100644 --- a/cpp/src/groupby/sort/functors.hpp +++ b/cpp/src/groupby/sort/functors.hpp @@ -64,7 +64,7 @@ struct store_result_functor { // It's overridden in scan implementation. return sorted_values->view(); else - return (grouped_values = helper.grouped_values(values))->view(); + return (grouped_values = helper.grouped_values(values, stream))->view(); }; /** @@ -76,7 +76,7 @@ struct store_result_functor { column_view get_sorted_values() { return sorted_values ? sorted_values->view() - : (sorted_values = helper.sorted_values(values))->view(); + : (sorted_values = helper.sorted_values(values, stream))->view(); }; protected: diff --git a/cpp/src/groupby/sort/scan.cpp b/cpp/src/groupby/sort/scan.cpp index 3d7ccf18242..336a6777ffa 100644 --- a/cpp/src/groupby/sort/scan.cpp +++ b/cpp/src/groupby/sort/scan.cpp @@ -59,7 +59,7 @@ struct scan_result_functor final : store_result_functor { if (grouped_values) return grouped_values->view(); else - return (grouped_values = helper.grouped_values(values))->view(); + return (grouped_values = helper.grouped_values(values, stream))->view(); }; }; @@ -71,7 +71,8 @@ void scan_result_functor::operator()(aggregation const& agg) cache.add_result( col_idx, agg, - detail::sum_scan(get_grouped_values(), helper.num_groups(), helper.group_labels(), stream, mr)); + detail::sum_scan( + get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr)); } template <> @@ -82,7 +83,8 @@ void scan_result_functor::operator()(aggregation const& agg) cache.add_result( col_idx, agg, - detail::min_scan(get_grouped_values(), helper.num_groups(), helper.group_labels(), stream, mr)); + detail::min_scan( + get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr)); } template <> @@ -93,7 +95,8 @@ void scan_result_functor::operator()(aggregation const& agg) cache.add_result( col_idx, agg, - detail::max_scan(get_grouped_values(), helper.num_groups(), helper.group_labels(), stream, mr)); + detail::max_scan( + get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr)); } template <> @@ -101,7 +104,7 @@ void scan_result_functor::operator()(aggregation const& { if (cache.has_result(col_idx, agg)) return; - cache.add_result(col_idx, agg, detail::count_scan(helper.group_labels(), stream, mr)); + cache.add_result(col_idx, agg, detail::count_scan(helper.group_labels(stream), stream, mr)); } } // namespace detail diff --git a/cpp/src/groupby/sort/sort_helper.cu b/cpp/src/groupby/sort/sort_helper.cu index 6a9da36e21b..5e944f75712 100644 --- a/cpp/src/groupby/sort/sort_helper.cu +++ b/cpp/src/groupby/sort/sort_helper.cu @@ -141,7 +141,7 @@ column_view sort_groupby_helper::key_sort_order(rmm::cuda_stream_view stream) // presence of a null value within a row. This allows moving all rows that // contain a null value to the end of the sorted order. - auto augmented_keys = table_view({table_view({keys_bitmask_column()}), _keys}); + auto augmented_keys = table_view({table_view({keys_bitmask_column(stream)}), _keys}); _key_sorted_order = cudf::detail::stable_sorted_order( augmented_keys, @@ -164,7 +164,7 @@ sort_groupby_helper::index_vector const& sort_groupby_helper::group_offsets( _group_offsets = std::make_unique(num_keys(stream) + 1, stream); auto device_input_table = table_device_view::create(_keys, stream); - auto sorted_order = key_sort_order().data(); + auto sorted_order = key_sort_order(stream).data(); decltype(_group_offsets->begin()) result_end; if (has_nulls(_keys)) { @@ -207,9 +207,9 @@ sort_groupby_helper::index_vector const& sort_groupby_helper::group_labels( group_labels.end(), index_vector::value_type{0}); thrust::scatter(rmm::exec_policy(stream), - thrust::make_constant_iterator(1, decltype(num_groups())(1)), - thrust::make_constant_iterator(1, num_groups()), - group_offsets().begin() + 1, + thrust::make_constant_iterator(1, decltype(num_groups(stream))(1)), + thrust::make_constant_iterator(1, num_groups(stream)), + group_offsets(stream).begin() + 1, group_labels.begin()); thrust::inclusive_scan( @@ -226,9 +226,9 @@ column_view sort_groupby_helper::unsorted_keys_labels(rmm::cuda_stream_view stre data_type(type_to_id()), _keys.num_rows(), mask_state::ALL_NULL, stream); auto group_labels_view = cudf::column_view( - data_type(type_to_id()), group_labels().size(), group_labels().data()); + data_type(type_to_id()), group_labels(stream).size(), group_labels(stream).data()); - auto scatter_map = key_sort_order(); + auto scatter_map = key_sort_order(stream); std::unique_ptr
t_unsorted_keys_labels = cudf::detail::scatter(table_view({group_labels_view}), @@ -267,7 +267,7 @@ sort_groupby_helper::column_ptr sort_groupby_helper::sorted_values( column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { column_ptr values_sort_order = - cudf::detail::stable_sorted_order(table_view({unsorted_keys_labels(), values}), + cudf::detail::stable_sorted_order(table_view({unsorted_keys_labels(stream), values}), {}, std::vector(2, null_order::AFTER), stream, @@ -289,7 +289,7 @@ sort_groupby_helper::column_ptr sort_groupby_helper::sorted_values( sort_groupby_helper::column_ptr sort_groupby_helper::grouped_values( column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto gather_map = key_sort_order(); + auto gather_map = key_sort_order(stream); auto grouped_values_table = cudf::detail::gather(table_view({values}), gather_map, @@ -304,14 +304,14 @@ sort_groupby_helper::column_ptr sort_groupby_helper::grouped_values( std::unique_ptr
sort_groupby_helper::unique_keys(rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto idx_data = key_sort_order().data(); + auto idx_data = key_sort_order(stream).data(); auto gather_map_it = thrust::make_transform_iterator( - group_offsets().begin(), [idx_data] __device__(size_type i) { return idx_data[i]; }); + group_offsets(stream).begin(), [idx_data] __device__(size_type i) { return idx_data[i]; }); return cudf::detail::gather(_keys, gather_map_it, - gather_map_it + num_groups(), + gather_map_it + num_groups(stream), out_of_bounds_policy::DONT_CHECK, stream, mr); @@ -321,7 +321,7 @@ std::unique_ptr
sort_groupby_helper::sorted_keys(rmm::cuda_stream_view st rmm::mr::device_memory_resource* mr) { return cudf::detail::gather(_keys, - key_sort_order(), + key_sort_order(stream), cudf::out_of_bounds_policy::DONT_CHECK, cudf::detail::negative_index_policy::NOT_ALLOWED, stream, diff --git a/cpp/src/rolling/grouped_rolling.cu b/cpp/src/rolling/grouped_rolling.cu index b8cb5e45fec..34d6d5fa194 100644 --- a/cpp/src/rolling/grouped_rolling.cu +++ b/cpp/src/rolling/grouped_rolling.cu @@ -838,8 +838,8 @@ std::unique_ptr grouped_time_range_rolling_window(table_view const& grou index_vector group_offsets(0, stream), group_labels(0, stream); if (group_keys.num_columns() > 0) { sort_groupby_helper helper{group_keys, cudf::null_policy::INCLUDE, cudf::sorted::YES}; - group_offsets = index_vector(helper.group_offsets(), stream); - group_labels = index_vector(helper.group_labels(), stream); + group_offsets = index_vector(helper.group_offsets(stream), stream); + group_labels = index_vector(helper.group_labels(stream), stream); } // Assumes that `timestamp_column` is actually of a timestamp type. From fe7ec857c01a410521cffbb215527742510c642c Mon Sep 17 00:00:00 2001 From: Conor Hoekstra <36027403+codereport@users.noreply.github.com> Date: Mon, 29 Mar 2021 05:35:28 -0400 Subject: [PATCH 07/20] Fix `cudf::cast` overflow for `decimal64` to `int32_t` or smaller in certain cases (#7733) @galipremsagar found an issue with `cudf::cast` for `decimal64`. His test case was when you have a value un-representable in `int32_t`. The cast operation would cast to early and therefore overflow. This PR fixes that issue. Resolves https://github.com/rapidsai/cudf/issues/7689 Authors: - Conor Hoekstra (@codereport) Approvers: - Mike Wilson (@hyperbolic2346) - Ram (Ramakrishna Prabhu) (@rgsl888prabhu) URL: https://github.com/rapidsai/cudf/pull/7733 --- cpp/include/cudf/fixed_point/fixed_point.hpp | 51 ++++++++++++++++---- cpp/tests/unary/cast_tests.cpp | 27 +++++++++++ 2 files changed, 69 insertions(+), 9 deletions(-) diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp index eb752a8a0ea..952075b1703 100644 --- a/cpp/include/cudf/fixed_point/fixed_point.hpp +++ b/cpp/include/cudf/fixed_point/fixed_point.hpp @@ -218,14 +218,15 @@ class fixed_point { using rep = Rep; /** - * @brief Constructor that will perform shifting to store value appropriately + * @brief Constructor that will perform shifting to store value appropriately (from floating point + * types) * - * @tparam T The type that you are constructing from (integral or floating) + * @tparam T The floating point type that you are constructing from * @param value The value that will be constructed from * @param scale The exponent that is applied to Rad to perform shifting */ template () && + typename cuda::std::enable_if_t() && is_supported_representation_type()>* = nullptr> CUDA_HOST_DEVICE_CALLABLE explicit fixed_point(T const& value, scale_type const& scale) : _value{static_cast(detail::shift(value, scale))}, _scale{scale} @@ -233,8 +234,25 @@ class fixed_point { } /** - * @brief Constructor that will not perform shifting (assumes value already - * shifted) + * @brief Constructor that will perform shifting to store value appropriately (from integral + * types) + * + * @tparam T The integral type that you are constructing from + * @param value The value that will be constructed from + * @param scale The exponent that is applied to Rad to perform shifting + */ + template () && + is_supported_representation_type()>* = nullptr> + CUDA_HOST_DEVICE_CALLABLE explicit fixed_point(T const& value, scale_type const& scale) + // `value` is cast to `Rep` to avoid overflow in cases where + // constructing to `Rep` that is wider than `T` + : _value{detail::shift(static_cast(value), scale)}, _scale{scale} + { + } + + /** + * @brief Constructor that will not perform shifting (assumes value already shifted) * * @param s scaled_integer that contains scale and already shifted value */ @@ -260,18 +278,33 @@ class fixed_point { fixed_point() : _value{0}, _scale{scale_type{0}} {} /** - * @brief Explicit conversion operator + * @brief Explicit conversion operator for casting to floating point types * - * @tparam U The type that is being explicitly converted to (integral or floating) + * @tparam U The floating point type that is being explicitly converted to * @return The `fixed_point` number in base 10 (aka human readable format) */ template ()>* = nullptr> - CUDA_HOST_DEVICE_CALLABLE explicit constexpr operator U() const + typename cuda::std::enable_if_t::value>* = nullptr> + explicit constexpr operator U() const { return detail::shift(static_cast(_value), detail::negate(_scale)); } + /** + * @brief Explicit conversion operator for casting to integral types + * + * @tparam U The integral type that is being explicitly converted to + * @return The `fixed_point` number in base 10 (aka human readable format) + */ + template ::value>* = nullptr> + explicit constexpr operator U() const + { + // Don't cast to U until converting to Rep because in certain cases casting to U before shifting + // will result in integer overflow (i.e. if U = int32_t, Rep = int64_t and _value > 2 billion) + return static_cast(detail::shift(_value, detail::negate(_scale))); + } + CUDA_HOST_DEVICE_CALLABLE operator scaled_integer() const { return scaled_integer{_value, _scale}; diff --git a/cpp/tests/unary/cast_tests.cpp b/cpp/tests/unary/cast_tests.cpp index e8953ab9a30..15d014f9d9c 100644 --- a/cpp/tests/unary/cast_tests.cpp +++ b/cpp/tests/unary/cast_tests.cpp @@ -537,6 +537,9 @@ inline auto make_fixed_point_data_type(int32_t scale) return cudf::data_type{cudf::type_to_id(), scale}; } +struct FixedPointTestSingleType : public cudf::test::BaseFixture { +}; + template struct FixedPointTests : public cudf::test::BaseFixture { }; @@ -592,6 +595,18 @@ TYPED_TEST(FixedPointTests, CastToInt32) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } +TEST_F(FixedPointTestSingleType, CastDecimal64ToInt32) +{ + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + using fw_wrapper = cudf::test::fixed_width_column_wrapper; + + auto const input = fp_wrapper{{7246212000}, numeric::scale_type{-5}}; + auto const expected = fw_wrapper{72462}; + auto const result = cudf::cast(input, make_data_type()); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + TYPED_TEST(FixedPointTests, CastToIntLarge) { using namespace numeric; @@ -659,6 +674,18 @@ TYPED_TEST(FixedPointTests, CastFromInt) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } +TEST_F(FixedPointTestSingleType, CastInt32ToDecimal64) +{ + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + using fw_wrapper = cudf::test::fixed_width_column_wrapper; + + auto const input = fw_wrapper{-48938}; + auto const expected = fp_wrapper{{-4893800000LL}, numeric::scale_type{-5}}; + auto const result = cudf::cast(input, make_fixed_point_data_type(-5)); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + TYPED_TEST(FixedPointTests, CastFromIntLarge) { using namespace numeric; From d9103c4b7998610abc05aa9d85a5a89f3b347251 Mon Sep 17 00:00:00 2001 From: David <45795991+davidwendt@users.noreply.github.com> Date: Mon, 29 Mar 2021 11:50:06 -0400 Subject: [PATCH 08/20] Add gbenchmark for nvtext ngrams functions (#7693) Reference #5696 Creates a gbenchmark for `nvtext::generate_ngrams()` and `nvtext::generate_character_ngrams()` functions. The benchmarks measures various string lengths and number of rows. The `nvtext::generate_ngrams()` was refactored to use the more efficient `make_strings_children` which improved its performance by about 50%. Authors: - David (@davidwendt) Approvers: - Nghia Truong (@ttnghia) - Mark Harris (@harrism) URL: https://github.com/rapidsai/cudf/pull/7693 --- cpp/benchmarks/CMakeLists.txt | 1 + cpp/benchmarks/text/ngrams_benchmark.cpp | 76 ++++++++++++++++++++++++ cpp/src/text/generate_ngrams.cu | 37 ++++-------- 3 files changed, 87 insertions(+), 27 deletions(-) create mode 100644 cpp/benchmarks/text/ngrams_benchmark.cpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 43ca6de11b4..5aa7e0132f8 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -175,6 +175,7 @@ ConfigureBench(BINARYOP_BENCH binaryop/binaryop_benchmark.cu) ################################################################################################### # - nvtext benchmark ------------------------------------------------------------------- ConfigureBench(TEXT_BENCH + text/ngrams_benchmark.cpp text/normalize_benchmark.cpp text/normalize_spaces_benchmark.cpp text/replace_benchmark.cpp diff --git a/cpp/benchmarks/text/ngrams_benchmark.cpp b/cpp/benchmarks/text/ngrams_benchmark.cpp new file mode 100644 index 00000000000..1fe8e3b7f2e --- /dev/null +++ b/cpp/benchmarks/text/ngrams_benchmark.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +class TextNGrams : public cudf::benchmark { +}; + +enum class ngrams_type { tokens, characters }; + +static void BM_ngrams(benchmark::State& state, ngrams_type nt) +{ + auto const n_rows = static_cast(state.range(0)); + auto const max_str_length = static_cast(state.range(1)); + data_profile table_profile; + table_profile.set_distribution_params( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + auto const table = + create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); + cudf::strings_column_view input(table->view().column(0)); + + for (auto _ : state) { + cuda_event_timer raii(state, true, 0); + switch (nt) { + case ngrams_type::tokens: nvtext::generate_ngrams(input); break; + case ngrams_type::characters: nvtext::generate_character_ngrams(input); break; + } + } + + state.SetBytesProcessed(state.iterations() * input.chars_size()); +} + +static void generate_bench_args(benchmark::internal::Benchmark* b) +{ + int const min_rows = 1 << 12; + int const max_rows = 1 << 24; + int const row_mult = 8; + int const min_rowlen = 5; + int const max_rowlen = 40; + int const len_mult = 2; + generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); +} + +#define NVTEXT_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(TextNGrams, name) \ + (::benchmark::State & st) { BM_ngrams(st, ngrams_type::name); } \ + BENCHMARK_REGISTER_F(TextNGrams, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +NVTEXT_BENCHMARK_DEFINE(tokens) +NVTEXT_BENCHMARK_DEFINE(characters) diff --git a/cpp/src/text/generate_ngrams.cu b/cpp/src/text/generate_ngrams.cu index 3c583622ed8..4a41dacbd30 100644 --- a/cpp/src/text/generate_ngrams.cu +++ b/cpp/src/text/generate_ngrams.cu @@ -50,7 +50,7 @@ struct ngram_generator_fn { cudf::column_device_view const d_strings; cudf::size_type ngrams; cudf::string_view const d_separator; - int32_t const* d_offsets{}; + int32_t* d_offsets{}; char* d_chars{}; /** @@ -62,7 +62,7 @@ struct ngram_generator_fn { * @param idx Index of the kernel thread. * @return Number of bytes required for the string for this thread. */ - __device__ cudf::size_type operator()(cudf::size_type idx) + __device__ void operator()(cudf::size_type idx) { char* out_ptr = d_chars ? d_chars + d_offsets[idx] : nullptr; cudf::size_type bytes = 0; @@ -74,7 +74,7 @@ struct ngram_generator_fn { bytes += d_separator.size_bytes(); if (out_ptr) out_ptr = cudf::strings::detail::copy_string(out_ptr, d_separator); } - return bytes; + if (!d_chars) d_offsets[idx] = bytes; } }; @@ -109,11 +109,11 @@ std::unique_ptr generate_ngrams( if (d_strings.is_null(idx)) return false; return !d_strings.element(idx).empty(); }, - stream, - mr) + stream) ->release(); strings_count = table_offsets.front()->size() - 1; - return std::move(table_offsets.front()); + auto result = std::move(table_offsets.front()); + return result; }(); // this allows freeing the temporary table_offsets CUDF_EXPECTS(strings_count >= ngrams, "Insufficient number of strings to generate ngrams"); @@ -131,30 +131,13 @@ std::unique_ptr generate_ngrams( // compute the number of strings of ngrams auto const ngrams_count = strings_count - ngrams + 1; - // build output offsets by computing the output bytes for each generated ngram - auto offsets_transformer_itr = cudf::detail::make_counting_transform_iterator( - 0, ngram_generator_fn{d_strings, ngrams, d_separator}); - auto offsets_column = cudf::strings::detail::make_offsets_child_column( - offsets_transformer_itr, offsets_transformer_itr + ngrams_count, stream, mr); - auto d_offsets = offsets_column->view().data(); - - // build the chars column - // generate the ngrams from the input strings and copy them into the chars data buffer - cudf::size_type const total_bytes = thrust::device_pointer_cast(d_offsets)[ngrams_count]; - auto chars_column = - cudf::strings::detail::create_chars_child_column(ngrams_count, 0, total_bytes, stream, mr); - char* const d_chars = chars_column->mutable_view().data(); - - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - ngrams_count, - ngram_generator_fn{d_strings, ngrams, d_separator, d_offsets, d_chars}); - chars_column->set_null_count(0); + auto children = cudf::strings::detail::make_strings_children( + ngram_generator_fn{d_strings, ngrams, d_separator}, ngrams_count, 0, stream, mr); // make the output strings column from the offsets and chars column return cudf::make_strings_column(ngrams_count, - std::move(offsets_column), - std::move(chars_column), + std::move(children.first), + std::move(children.second), 0, rmm::device_buffer{0, stream, mr}, stream, From 54dfaaa9e99a15e6e8f76106adba842f424fb160 Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Mon, 29 Mar 2021 12:13:04 -0500 Subject: [PATCH 09/20] Create and promote library aliases in libcudf installations (#7734) This PR ensures all `cudf::*` library aliases are created and promoted to `IMPORTED_GLOBAL` when `find_package(cudf)` finds cudf in a local build directory. ~This PR shouldn't affect CI or the targets you'd see when `libcudf` is installed (e.g. by conda), only local source builds.~ edit: This now fixes `cudf::*` alias targets for the `libcudf` installations too, needed by https://github.com/rapidsai/cuspatial/pull/365. Validation method: ```shell $ docker run --rm -it \ -w /tmp/findpackagecudf \ -v "/tmp/findpackagecudf:/tmp/findpackagecudf" \ gpuci/miniconda-cuda:10.2-devel-ubuntu18.04 bash # Set up mamba environment conda install -y -n base -c conda-forge mamba mamba update -y -n base -c defaults conda && mamba update -y -n base -c conda-forge mamba mamba install -y -n base -c conda-forge -c rapidsai-nightly \ git gtest gmock ninja cmake=3.18 gdal=3.0.2 boost-cpp=1.72.0 cudatoolkit=10.2 libcudf=0.19 # Copy changes in this PR (from the host) to container's /opt/conda/lib/cmake/cudf # cmake --install $CUDF_ROOT --prefix $CUDF_ROOT/local-install # docker cp $CUDF_ROOT/local-install/lib/cmake/cudf frosty_agnesi:/opt/conda/lib/cmake/ # Clone cuspatial git clone https://github.com/trxcllnt/cuspatial.git && cd cuspatial && git checkout fix/cmake-exports # Configure cuspatial rm -rf cpp/build && mkdir -p cpp/build \ && cmake -GNinja -B cpp/build -S cpp \ -DBUILD_TESTS=ON -DBUILD_BENCHMARKS=ON -DCMAKE_CUDA_ARCHITECTURES= ``` Authors: - Paul Taylor (@trxcllnt) - Robert Maynard (@robertmaynard) Approvers: - Robert Maynard (@robertmaynard) - Keith Kraus (@kkraus14) - Ray Douglass (@raydouglass) URL: https://github.com/rapidsai/cudf/pull/7734 --- conda/recipes/libcudf/meta.yaml | 2 +- cpp/CMakeLists.txt | 15 ++++--- cpp/cmake/cudf-build-config.cmake.in | 44 ++++++++++++++++--- cpp/cmake/cudf-config.cmake.in | 28 ++++++------ cpp/cmake/thirdparty/CUDF_GetGTest.cmake | 10 +---- cpp/cmake/thirdparty/CUDF_GetRMM.cmake | 5 --- .../cmake/thirdparty/CUDF_KAFKA_GetCUDF.cmake | 6 --- cpp/libcudf_kafka/tests/CMakeLists.txt | 6 +-- 8 files changed, 62 insertions(+), 54 deletions(-) diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 1be8a6b450a..39587b4bd05 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -33,7 +33,7 @@ build: requirements: build: - - cmake >=3.17.0 + - cmake >=3.18 host: - librmm {{ minor_version }}.* - cudatoolkit {{ cuda_version }}.* diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index fc439ebfa7f..48562476070 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -554,12 +554,6 @@ if(CUDF_BUILD_BENCHMARKS) GIT_SHALLOW TRUE OPTIONS "BENCHMARK_ENABLE_TESTING OFF" "BENCHMARK_ENABLE_INSTALL OFF") - if(benchmark_ADDED) - install(TARGETS benchmark - benchmark_main - DESTINATION lib - EXPORT cudf-targets) - endif() add_subdirectory(benchmarks) endif() @@ -636,6 +630,15 @@ elseif(TARGET arrow_static) endif() endif() +if(TARGET gtest) + get_target_property(gtest_is_imported gtest IMPORTED) + if(NOT gtest_is_imported) + export(TARGETS gtest gmock gtest_main gmock_main + FILE ${CUDF_BINARY_DIR}/cudf-gtesting-targets.cmake + NAMESPACE GTest::) + endif() +endif() + export(EXPORT cudf-targets FILE ${CUDF_BINARY_DIR}/cudf-targets.cmake NAMESPACE cudf::) diff --git a/cpp/cmake/cudf-build-config.cmake.in b/cpp/cmake/cudf-build-config.cmake.in index d0c5a608e45..ed1926f20f0 100644 --- a/cpp/cmake/cudf-build-config.cmake.in +++ b/cpp/cmake/cudf-build-config.cmake.in @@ -2,6 +2,22 @@ cmake_minimum_required(VERSION 3.18) +set(_possible_targets_to_promote + cudf::cudf + GTest::gmock + GTest::gmock_main + GTest::gtest + GTest::gtest_main + cudf::cudftestutil + rmm::rmm + arrow_shared + arrow_cuda_shared ) +foreach(target IN LISTS _possible_targets_to_promote) + if(NOT TARGET ${target}) + list(APPEND _targets_to_promote ${target}) + endif() +endforeach() + set(CUDF_VERSION @CUDF_VERSION@) set(CUDF_VERSION_MAJOR @CUDF_VERSION_MAJOR@) set(CUDF_VERSION_MINOR @CUDF_VERSION_MINOR@) @@ -36,21 +52,29 @@ include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_GetThrust.cmake) # find rmm set(CUDF_MIN_VERSION_rmm "${CUDF_VERSION_MAJOR}.${CUDF_VERSION_MINOR}") include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_GetRMM.cmake) -# find gtest -include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_GetGTest.cmake) # find arrow -if(NOT EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-arrow-targets.cmake") +if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-arrow-targets.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/cudf-arrow-targets.cmake") +else() + if(NOT DEFINED CUDF_USE_ARROW_STATIC) + set(CUDF_USE_ARROW_STATIC OFF) + endif() include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_GetArrow.cmake) endif() +# find GTest +if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-gtesting-targets.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/cudf-gtesting-targets.cmake") +else() + # find gtest + include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_GetGTest.cmake) +endif() + list(POP_FRONT CMAKE_MODULE_PATH) -if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-arrow-targets.cmake") - include("${CMAKE_CURRENT_LIST_DIR}/cudf-arrow-targets.cmake") -endif() -include("${CMAKE_CURRENT_LIST_DIR}/cudf-targets.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/cudf-targets.cmake") if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-testing-targets.cmake") include("${CMAKE_CURRENT_LIST_DIR}/cudf-testing-targets.cmake") endif() @@ -59,6 +83,12 @@ include("${CMAKE_CURRENT_LIST_DIR}/cudf-config-version.cmake") check_required_components(cudf) +foreach(target IN LISTS _targets_to_promote) + if(TARGET ${target}) + fix_cmake_global_defaults(${target}) + endif() +endforeach() + set(${CMAKE_FIND_PACKAGE_NAME}_CONFIG "${CMAKE_CURRENT_LIST_FILE}") include(FindPackageHandleStandardArgs) diff --git a/cpp/cmake/cudf-config.cmake.in b/cpp/cmake/cudf-config.cmake.in index 6a280264d3c..66c669851fa 100644 --- a/cpp/cmake/cudf-config.cmake.in +++ b/cpp/cmake/cudf-config.cmake.in @@ -26,11 +26,6 @@ This module offers an optional testing component which defines the following IMPORTED GLOBAL targets: cudf::cudftestutil - The main cudf testing library - cudf::gmock - cudf::gmock_main - cudf::gtest - cudf::gtest_main - Result Variables ^^^^^^^^^^^^^^^^ @@ -49,13 +44,11 @@ cmake_minimum_required(VERSION 3.18) set(_possible_targets_to_promote cudf::cudf - cudf::benchmark - cudf::benchmark_main - cudf::gmock - cudf::gtest - cudf::gmock_main - cudf::gtest_main cudf::cudftestutil + GTest::gmock + GTest::gmock_main + GTest::gtest + GTest::gtest_main rmm::rmm arrow_shared arrow_cuda_shared ) @@ -101,17 +94,22 @@ include("${CMAKE_CURRENT_LIST_DIR}/cudf-targets.cmake") if(testing IN_LIST cudf_FIND_COMPONENTS) enable_language(CUDA) - find_dependency(GTest @CUDF_MIN_VERSION_GTest@) + find_dependency(GTest @CUDF_MIN_VERSION_GTest@ CONFIG) + include("${CMAKE_CURRENT_LIST_DIR}/cudf-testing-targets.cmake") + endif() include("${CMAKE_CURRENT_LIST_DIR}/cudf-config-version.cmake") check_required_components(cudf) -foreach(t IN LISTS _targets_to_promote) - if(TARGET ${t}) - set_target_properties(${t} PROPERTIES IMPORTED_GLOBAL TRUE) +foreach(target IN LISTS _targets_to_promote) + if(TARGET ${target}) + get_target_property(_already_global ${target} IMPORTED_GLOBAL) + if(NOT _already_global) + set_target_properties(${target} PROPERTIES IMPORTED_GLOBAL TRUE) + endif() endif() endforeach() set(${CMAKE_FIND_PACKAGE_NAME}_CONFIG "${CMAKE_CURRENT_LIST_FILE}") diff --git a/cpp/cmake/thirdparty/CUDF_GetGTest.cmake b/cpp/cmake/thirdparty/CUDF_GetGTest.cmake index 666ba0fbb2c..9e4f3c137b1 100644 --- a/cpp/cmake/thirdparty/CUDF_GetGTest.cmake +++ b/cpp/cmake/thirdparty/CUDF_GetGTest.cmake @@ -26,7 +26,7 @@ function(find_and_configure_gtest VERSION) GIT_REPOSITORY https://github.com/google/googletest.git GIT_TAG release-${VERSION} GIT_SHALLOW TRUE - OPTIONS "INSTALL_GTEST OFF" + OPTIONS "INSTALL_GTEST ON" # googletest >= 1.10.0 provides a cmake config file -- use it if it exists FIND_PACKAGE_ARGUMENTS "CONFIG") # Add GTest aliases if they don't already exist. @@ -43,14 +43,6 @@ function(find_and_configure_gtest VERSION) fix_cmake_global_defaults(GTest::gmock) fix_cmake_global_defaults(GTest::gtest_main) fix_cmake_global_defaults(GTest::gmock_main) - if(GTest_ADDED) - install(TARGETS gmock - gtest - gmock_main - gtest_main - DESTINATION lib - EXPORT cudf-testing-targets) - endif() endfunction() set(CUDF_MIN_VERSION_GTest 1.10.0) diff --git a/cpp/cmake/thirdparty/CUDF_GetRMM.cmake b/cpp/cmake/thirdparty/CUDF_GetRMM.cmake index e5d1f2f07a9..136947674f9 100644 --- a/cpp/cmake/thirdparty/CUDF_GetRMM.cmake +++ b/cpp/cmake/thirdparty/CUDF_GetRMM.cmake @@ -55,11 +55,6 @@ function(find_and_configure_rmm VERSION) # Make sure consumers of cudf can also see rmm::rmm fix_cmake_global_defaults(rmm::rmm) - - if(NOT rmm_BINARY_DIR IN_LIST CMAKE_PREFIX_PATH) - list(APPEND CMAKE_PREFIX_PATH "${rmm_BINARY_DIR}") - set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} PARENT_SCOPE) - endif() endfunction() set(CUDF_MIN_VERSION_rmm "${CUDF_VERSION_MAJOR}.${CUDF_VERSION_MINOR}") diff --git a/cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetCUDF.cmake b/cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetCUDF.cmake index 4796495413e..1f7c15d4f75 100644 --- a/cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetCUDF.cmake +++ b/cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetCUDF.cmake @@ -40,12 +40,6 @@ function(find_and_configure_cudf VERSION) "BUILD_BENCHMARKS OFF") cudfkafka_restore_if_enabled(BUILD_TESTS) cudfkafka_restore_if_enabled(BUILD_BENCHMARKS) - - if(NOT cudf_BINARY_DIR IN_LIST CMAKE_PREFIX_PATH) - list(APPEND CMAKE_PREFIX_PATH "${cudf_BINARY_DIR}") - set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} PARENT_SCOPE) - endif() - endfunction() set(CUDF_KAFKA_MIN_VERSION_cudf 0.19) diff --git a/cpp/libcudf_kafka/tests/CMakeLists.txt b/cpp/libcudf_kafka/tests/CMakeLists.txt index f556d36d9d2..e813ed5439e 100644 --- a/cpp/libcudf_kafka/tests/CMakeLists.txt +++ b/cpp/libcudf_kafka/tests/CMakeLists.txt @@ -21,11 +21,7 @@ function(ConfigureTest CMAKE_TEST_NAME ) add_executable(${CMAKE_TEST_NAME} ${ARGN}) set_target_properties(${CMAKE_TEST_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$") - if(TARGET cudf::gmock_main) - target_link_libraries(${CMAKE_TEST_NAME} PRIVATE cudf::gmock_main cudf::gtest_main cudf_kafka) - else() - target_link_libraries(${CMAKE_TEST_NAME} PRIVATE GTest::gmock_main GTest::gtest_main cudf_kafka) - endif() + target_link_libraries(${CMAKE_TEST_NAME} PRIVATE GTest::gmock_main GTest::gtest_main cudf_kafka) target_include_directories(${CMAKE_TEST_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../include) add_test(NAME ${CMAKE_TEST_NAME} COMMAND ${CMAKE_TEST_NAME}) endfunction() From cddafd9b1dd3ab815020a513626a611cd8a50de0 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Mon, 29 Mar 2021 12:35:27 -0500 Subject: [PATCH 10/20] Add replacements column support for Java replaceNulls (#7750) Adds Java bindings for `cudf::replace_nulls` with a columnar replacement parameter. Authors: - Jason Lowe (@jlowe) Approvers: - Robert (Bobby) Evans (@revans2) URL: https://github.com/rapidsai/cudf/pull/7750 --- .../main/java/ai/rapids/cudf/ColumnView.java | 18 ++++++- java/src/main/native/src/ColumnViewJni.cpp | 20 +++++++- .../java/ai/rapids/cudf/ColumnVectorTest.java | 50 ++++++++++++++++--- 3 files changed, 78 insertions(+), 10 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index b29b873092d..90fe3553abc 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -388,7 +388,19 @@ public final ColumnVector findAndReplaceAll(ColumnView oldValues, ColumnView new * @return - ColumnVector with nulls replaced by scalar */ public final ColumnVector replaceNulls(Scalar scalar) { - return new ColumnVector(replaceNulls(getNativeView(), scalar.getScalarHandle())); + return new ColumnVector(replaceNullsScalar(getNativeView(), scalar.getScalarHandle())); + } + + /** + * Returns a ColumnVector with any null values replaced with the corresponding row in the + * specified replacement column. + * This column and the replacement column must have the same type and number of rows. + * + * @param replacements column of replacement values + * @return column with nulls replaced by corresponding row of replacements column + */ + public final ColumnVector replaceNulls(ColumnView replacements) { + return new ColumnVector(replaceNullsColumn(getNativeView(), replacements.getNativeView())); } /** @@ -2840,7 +2852,9 @@ private static native long rollingWindow( private static native long charLengths(long viewHandle) throws CudfException; - private static native long replaceNulls(long viewHandle, long scalarHandle) throws CudfException; + private static native long replaceNullsScalar(long viewHandle, long scalarHandle) throws CudfException; + + private static native long replaceNullsColumn(long viewHandle, long replaceViewHandle) throws CudfException; private static native long ifElseVV(long predVec, long trueVec, long falseVec) throws CudfException; diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 3928794b55c..dc1acc50b5f 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -121,8 +121,9 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_lowerStrings(JNIEnv *env, CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_replaceNulls(JNIEnv *env, jclass, - jlong j_col, jlong j_scalar) { +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_replaceNullsScalar(JNIEnv *env, jclass, + jlong j_col, + jlong j_scalar) { JNI_NULL_CHECK(env, j_col, "column is null", 0); JNI_NULL_CHECK(env, j_scalar, "scalar is null", 0); try { @@ -135,6 +136,21 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_replaceNulls(JNIEnv *env, CATCH_STD(env, 0); } +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_replaceNullsColumn(JNIEnv *env, jclass, + jlong j_col, + jlong j_replace_col) { + JNI_NULL_CHECK(env, j_col, "column is null", 0); + JNI_NULL_CHECK(env, j_replace_col, "replacement column is null", 0); + try { + cudf::jni::auto_set_device(env); + auto col = reinterpret_cast(j_col); + auto replacements = reinterpret_cast(j_replace_col); + std::unique_ptr result = cudf::replace_nulls(*col, *replacements); + return reinterpret_cast(result.release()); + } + CATCH_STD(env, 0); +} + JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_ifElseVV(JNIEnv *env, jclass, jlong j_pred_vec, jlong j_true_vec, diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 5a9404f5760..fe1cba5ceb1 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -1368,7 +1368,7 @@ void testFromScalarNullByte() { } @Test - void testReplaceEmptyColumn() { + void testReplaceNullsScalarEmptyColumn() { try (ColumnVector input = ColumnVector.fromBoxedBooleans(); ColumnVector expected = ColumnVector.fromBoxedBooleans(); Scalar s = Scalar.fromBool(false); @@ -1378,7 +1378,7 @@ void testReplaceEmptyColumn() { } @Test - void testReplaceNullBoolsWithAllNulls() { + void testReplaceNullsScalarBoolsWithAllNulls() { try (ColumnVector input = ColumnVector.fromBoxedBooleans(null, null, null, null); ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, false, false); Scalar s = Scalar.fromBool(false); @@ -1388,7 +1388,7 @@ void testReplaceNullBoolsWithAllNulls() { } @Test - void testReplaceSomeNullBools() { + void testReplaceNullsScalarSomeNullBools() { try (ColumnVector input = ColumnVector.fromBoxedBooleans(false, null, null, false); ColumnVector expected = ColumnVector.fromBoxedBooleans(false, true, true, false); Scalar s = Scalar.fromBool(true); @@ -1398,7 +1398,7 @@ void testReplaceSomeNullBools() { } @Test - void testReplaceNullIntegersWithAllNulls() { + void testReplaceNullsScalarIntegersWithAllNulls() { try (ColumnVector input = ColumnVector.fromBoxedInts(null, null, null, null); ColumnVector expected = ColumnVector.fromBoxedInts(0, 0, 0, 0); Scalar s = Scalar.fromInt(0); @@ -1408,7 +1408,7 @@ void testReplaceNullIntegersWithAllNulls() { } @Test - void testReplaceSomeNullIntegers() { + void testReplaceNullsScalarSomeNullIntegers() { try (ColumnVector input = ColumnVector.fromBoxedInts(1, 2, null, 4, null); ColumnVector expected = ColumnVector.fromBoxedInts(1, 2, 999, 4, 999); Scalar s = Scalar.fromInt(999); @@ -1418,7 +1418,7 @@ void testReplaceSomeNullIntegers() { } @Test - void testReplaceNullsFailsOnTypeMismatch() { + void testReplaceNullsScalarFailsOnTypeMismatch() { try (ColumnVector input = ColumnVector.fromBoxedInts(1, 2, null, 4, null); Scalar s = Scalar.fromBool(true)) { assertThrows(CudfException.class, () -> input.replaceNulls(s).close()); @@ -1434,6 +1434,44 @@ void testReplaceNullsWithNullScalar() { } } + @Test + void testReplaceNullsColumnEmptyColumn() { + try (ColumnVector input = ColumnVector.fromBoxedBooleans(); + ColumnVector r = ColumnVector.fromBoxedBooleans(); + ColumnVector expected = ColumnVector.fromBoxedBooleans(); + ColumnVector result = input.replaceNulls(r)) { + assertColumnsAreEqual(expected, result); + } + } + + @Test + void testReplaceNullsColumnBools() { + try (ColumnVector input = ColumnVector.fromBoxedBooleans(null, true, null, false); + ColumnVector r = ColumnVector.fromBoxedBooleans(false, null, true, true); + ColumnVector expected = ColumnVector.fromBoxedBooleans(false, true, true, false); + ColumnVector result = input.replaceNulls(r)) { + assertColumnsAreEqual(expected, result); + } + } + + @Test + void testReplaceNullsColumnIntegers() { + try (ColumnVector input = ColumnVector.fromBoxedInts(1, 2, null, 4, null); + ColumnVector r = ColumnVector.fromBoxedInts(996, 997, 998, 909, null); + ColumnVector expected = ColumnVector.fromBoxedInts(1, 2, 998, 4, null); + ColumnVector result = input.replaceNulls(r)) { + assertColumnsAreEqual(expected, result); + } + } + + @Test + void testReplaceNullsColumnFailsOnTypeMismatch() { + try (ColumnVector input = ColumnVector.fromBoxedInts(1, 2, null, 4, null); + ColumnVector r = ColumnVector.fromBoxedBooleans(true)) { + assertThrows(CudfException.class, () -> input.replaceNulls(r).close()); + } + } + static QuantileMethod[] methods = {LINEAR, LOWER, HIGHER, MIDPOINT, NEAREST}; static double[] quantiles = {0.0, 0.25, 0.33, 0.5, 1.0}; From 213b7f5d100d188d90e07e615da43e2e3baad10c Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Mon, 29 Mar 2021 13:29:24 -0500 Subject: [PATCH 11/20] Remove unused JVM array creation (#7748) The JNI method to build a column from Arrow creates a Java array but then doesn't use it. This removes the unnecessary JVM callback and object creation. Authors: - Jason Lowe (@jlowe) Approvers: - Robert (Bobby) Evans (@revans2) URL: https://github.com/rapidsai/cudf/pull/7748 --- java/src/main/native/src/ColumnVectorJni.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/java/src/main/native/src/ColumnVectorJni.cpp b/java/src/main/native/src/ColumnVectorJni.cpp index 737abea6f13..ba0e4f05714 100644 --- a/java/src/main/native/src/ColumnVectorJni.cpp +++ b/java/src/main/native/src/ColumnVectorJni.cpp @@ -85,7 +85,6 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_fromArrow(JNIEnv *env, auto null_buffer = arrow::Buffer::Wrap(static_cast(validity_address), static_cast(validity_length)); auto offsets_buffer = arrow::Buffer::Wrap(static_cast(offsets_address), static_cast(offsets_length)); - cudf::jni::native_jlongArray outcol_handles(env, 1); std::shared_ptr arrow_array; switch (n_type) { case cudf::type_id::DECIMAL32: From 42c3bf9b73b51f78d89d2e2d2616f992699fa144 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Mon, 29 Mar 2021 13:35:05 -0500 Subject: [PATCH 12/20] Fix data corruption in string columns (#7746) Fixes: #7735 Minimal repro of the above issue is: ```python >>> import cudf >>> s = cudf.Series(['hi', 'hello', None]) >>> s 0 hi 1 hello 2 dtype: string >>> h = s[0:3] 0 hi 1 hello 2 dtype: string >>> s._column.null_count 1 >>> h._column.null_count 1 ``` Incorrect mask calculation in `Column.from_column_view` because of incorrect `base_size` calculation in `StringColumn`: ```python >>> s._column.mask.to_host_array() array([3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=uint8) >>> h._column.mask.to_host_array() array([], dtype=uint8) # Should have a mask similar to above one. >>> s._column.base_size 0 # Should be 3 >>> h._column.base_size 0 # Should be 3 ``` So in this PR I have fixed the calculation of `StringColumn.base_size` and introduced tests to have a check for the same. Authors: - GALI PREM SAGAR (@galipremsagar) Approvers: - Keith Kraus (@kkraus14) URL: https://github.com/rapidsai/cudf/pull/7746 --- python/cudf/cudf/core/column/string.py | 13 +++++------- python/cudf/cudf/tests/test_serialize.py | 21 +++++++++++++++++++ python/cudf/cudf/tests/test_string.py | 11 ++++++++++ .../dask_cudf/tests/test_distributed.py | 14 +++++++++++-- 4 files changed, 49 insertions(+), 10 deletions(-) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 11dd7556812..de2df9b50d7 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -75,10 +75,6 @@ is_space as cpp_isspace, is_upper as cpp_is_upper, ) -from cudf._lib.strings.convert.convert_integers import ( - is_integer as cpp_is_integer, -) -from cudf._lib.strings.convert.convert_floats import is_float as cpp_is_float from cudf._lib.strings.combine import ( concatenate as cpp_concatenate, join as cpp_join, @@ -91,6 +87,10 @@ from cudf._lib.strings.convert.convert_fixed_point import ( to_decimal as cpp_to_decimal, ) +from cudf._lib.strings.convert.convert_floats import is_float as cpp_is_float +from cudf._lib.strings.convert.convert_integers import ( + is_integer as cpp_is_integer, +) from cudf._lib.strings.convert.convert_urls import ( url_decode as cpp_url_decode, url_encode as cpp_url_encode, @@ -4760,10 +4760,7 @@ def base_size(self) -> int: if len(self.base_children) == 0: return 0 else: - return int( - (self.base_children[0].size - 1) - / self.base_children[0].dtype.itemsize - ) + return self.base_children[0].size - 1 @property def data_array_view(self) -> cuda.devicearray.DeviceNDArray: diff --git a/python/cudf/cudf/tests/test_serialize.py b/python/cudf/cudf/tests/test_serialize.py index 656b66bf793..0e9c61b634d 100644 --- a/python/cudf/cudf/tests/test_serialize.py +++ b/python/cudf/cudf/tests/test_serialize.py @@ -296,3 +296,24 @@ def test_deserialize_cudf_0_16(datadir): actual = pickle.load(open(fname, "rb")) assert_eq(expected, actual) + + +def test_serialize_sliced_string(): + # https://github.com/rapidsai/cudf/issues/7735 + data = ["hi", "hello", None] + pd_series = pd.Series(data, dtype=pd.StringDtype()) + gd_series = cudf.Series(data, dtype="str") + sliced = gd_series[0:3] + serialized_gd_series = gd_series.serialize() + serialized_sliced = sliced.serialize() + + # validate frames are equal or not + # because both should be identical + for i in range(3): + assert_eq( + serialized_gd_series[1][i].to_host_array(), + serialized_sliced[1][i].to_host_array(), + ) + + recreated = cudf.Series.deserialize(*sliced.serialize()) + assert_eq(recreated.to_pandas(nullable=True), pd_series) diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index 98b8bfb870d..8b1ad696f04 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -2922,3 +2922,14 @@ def test_string_std(): assert_exceptions_equal( lfunc=psr.std, rfunc=sr.std, compare_error_message=False ) + + +def test_string_slice_with_mask(): + actual = cudf.Series(["hi", "hello", None]) + expected = actual[0:3] + + assert actual._column.base_size == 3 + assert_eq(actual._column.base_size, expected._column.base_size) + assert_eq(actual._column.null_count, expected._column.null_count) + + assert_eq(actual, expected) diff --git a/python/dask_cudf/dask_cudf/tests/test_distributed.py b/python/dask_cudf/dask_cudf/tests/test_distributed.py index cb3c696adc3..85354704902 100644 --- a/python/dask_cudf/dask_cudf/tests/test_distributed.py +++ b/python/dask_cudf/dask_cudf/tests/test_distributed.py @@ -6,11 +6,11 @@ from dask.distributed import Client from distributed.utils_test import loop # noqa: F401 -import dask_cudf - import cudf from cudf.tests.utils import assert_eq +import dask_cudf + dask_cuda = pytest.importorskip("dask_cuda") @@ -65,3 +65,13 @@ def test_ucx_seriesgroupby(): dask_df_g = dask_df.groupby(["a"]).b.sum().compute() assert dask_df_g.name == "b" + + +def test_str_series_roundtrip(): + with dask_cuda.LocalCUDACluster(n_workers=1) as cluster: + with Client(cluster): + expected = cudf.Series(["hi", "hello", None]) + dask_series = dask_cudf.from_cudf(expected, npartitions=2) + + actual = dask_series.compute() + assert_eq(actual, expected) From 4dd75c4b96612d459d26194ab6c1129a0cd0fb95 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Mon, 29 Mar 2021 15:48:51 -0700 Subject: [PATCH 13/20] Memory map the input file only when GDS compatiblity mode is not used (#7717) `mmap` is expensive on some systems and we can expect better performance with file reads when GDS is used, especially with compatibility mode. This PR adds a source type that does not use `mmap` for host reads. This type is used when GDS and its compatibility mode are enabled. `file_source` is now a base class for file-based input and only implements the device_read functions. `memory_mapped_source` class implements the host reads through the memory mapped file. `direct_read_source` is a newly implemented class that uses read for host reads, no `mmap`. Selection is done in `datasource::create` based on `cufile_config`. Authors: - Vukasin Milovanovic (@vuule) Approvers: - Devavret Makkar (@devavret) - David (@davidwendt) URL: https://github.com/rapidsai/cudf/pull/7717 --- cpp/include/cudf/io/datasource.hpp | 2 +- cpp/src/io/utilities/datasource.cpp | 160 +++++++++++++-------- cpp/src/io/utilities/file_io_utilities.cpp | 109 ++++++-------- cpp/src/io/utilities/file_io_utilities.hpp | 33 ++++- 4 files changed, 174 insertions(+), 130 deletions(-) diff --git a/cpp/include/cudf/io/datasource.hpp b/cpp/include/cudf/io/datasource.hpp index 8fcc045e6d2..ab7a3a6fa9b 100644 --- a/cpp/include/cudf/io/datasource.hpp +++ b/cpp/include/cudf/io/datasource.hpp @@ -123,7 +123,7 @@ class datasource { * @param[in] offset Bytes from the start * @param[in] size Bytes to read * - * @return The data buffer + * @return The data buffer (can be smaller than size) */ virtual std::unique_ptr host_read(size_t offset, size_t size) = 0; diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index 3f2884d5b7d..8f2a5389b4d 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -25,32 +25,69 @@ namespace cudf { namespace io { +namespace { /** - * @brief Implementation class for reading from a file or memory source using - * memory mapped access. - * - * Unlike Arrow's memory mapped IO class, this implementation allows memory - * mapping a subset of the file where the starting offset may not be zero. + * @brief Base class for file input. Only implements direct device reads. */ -class memory_mapped_source : public datasource { - class memory_mapped_buffer : public buffer { - size_t _size = 0; - uint8_t *_data = nullptr; +class file_source : public datasource { + public: + explicit file_source(const char *filepath) + : _file(filepath, O_RDONLY), _cufile_in(detail::make_cufile_input(filepath)) + { + } + + virtual ~file_source() = default; + + bool supports_device_read() const override { return _cufile_in != nullptr; } + + bool is_device_read_preferred(size_t size) const + { + return _cufile_in != nullptr && _cufile_in->is_cufile_io_preferred(size); + } + + std::unique_ptr device_read(size_t offset, + size_t size, + rmm::cuda_stream_view stream) override + { + CUDF_EXPECTS(supports_device_read(), "Device reads are not supported for this file."); + + auto const read_size = std::min(size, _file.size() - offset); + return _cufile_in->read(offset, read_size, stream); + } + + size_t device_read(size_t offset, + size_t size, + uint8_t *dst, + rmm::cuda_stream_view stream) override + { + CUDF_EXPECTS(supports_device_read(), "Device reads are not supported for this file."); - public: - memory_mapped_buffer(uint8_t *data, size_t size) : _size(size), _data(data) {} - size_t size() const override { return _size; } - const uint8_t *data() const override { return _data; } - }; + auto const read_size = std::min(size, _file.size() - offset); + return _cufile_in->read(offset, read_size, dst, stream); + } + + size_t size() const override { return _file.size(); } + + protected: + detail::file_wrapper _file; + + private: + std::unique_ptr _cufile_in; +}; +/** + * @brief Implementation class for reading from a file using memory mapped access. + * + * Unlike Arrow's memory mapped IO class, this implementation allows memory mapping a subset of the + * file where the starting offset may not be zero. + */ +class memory_mapped_source : public file_source { public: explicit memory_mapped_source(const char *filepath, size_t offset, size_t size) - : _cufile_in(detail::make_cufile_input(filepath)) + : file_source(filepath) { - auto const file = detail::file_wrapper(filepath, O_RDONLY); - _file_size = file.size(); - if (_file_size != 0) { map(file.desc(), offset, size); } + if (_file.size() != 0) map(_file.desc(), offset, size); } virtual ~memory_mapped_source() @@ -65,7 +102,7 @@ class memory_mapped_source : public datasource { // Clamp length to available data in the mapped region auto const read_size = std::min(size, _map_size - (offset - _map_offset)); - return std::make_unique( + return std::make_unique( static_cast(_map_addr) + (offset - _map_offset), read_size); } @@ -81,49 +118,15 @@ class memory_mapped_source : public datasource { return read_size; } - bool supports_device_read() const override { return _cufile_in != nullptr; } - - bool is_device_read_preferred(size_t size) const - { - return _cufile_in != nullptr && _cufile_in->is_cufile_io_preferred(size); - } - - std::unique_ptr device_read(size_t offset, - size_t size, - rmm::cuda_stream_view stream) override - { - if (!supports_device_read()) CUDF_FAIL("Device reads are not supported for this file."); - - auto const read_size = std::min(size, _map_size - (offset - _map_offset)); - return _cufile_in->read(offset, read_size, stream); - } - - size_t device_read(size_t offset, - size_t size, - uint8_t *dst, - rmm::cuda_stream_view stream) override - { - if (!supports_device_read()) CUDF_FAIL("Device reads are not supported for this file."); - auto const read_size = std::min(size, _map_size - (offset - _map_offset)); - return _cufile_in->read(offset, read_size, dst, stream); - } - - size_t size() const override { return _file_size; } - private: void map(int fd, size_t offset, size_t size) { - CUDF_EXPECTS(offset < _file_size, "Offset is past end of file"); + CUDF_EXPECTS(offset < _file.size(), "Offset is past end of file"); // Offset for `mmap()` must be page aligned _map_offset = offset & ~(sysconf(_SC_PAGESIZE) - 1); - // Clamp length to available data in the file - if (size == 0) { - size = _file_size - offset; - } else { - if ((offset + size) > _file_size) { size = _file_size - offset; } - } + if (size == 0 || (offset + size) > _file.size()) { size = _file.size() - offset; } // Size for `mmap()` needs to include the page padding _map_size = size + (offset - _map_offset); @@ -134,11 +137,44 @@ class memory_mapped_source : public datasource { } private: - size_t _file_size = 0; - void *_map_addr = nullptr; size_t _map_size = 0; size_t _map_offset = 0; - std::unique_ptr _cufile_in; + void *_map_addr = nullptr; +}; + +/** + * @brief Implementation class for reading from a file using `read` calls + * + * Potentially faster than `memory_mapped_source` when only a small portion of the file is read + * through the host. + */ +class direct_read_source : public file_source { + public: + explicit direct_read_source(const char *filepath) : file_source(filepath) {} + + std::unique_ptr host_read(size_t offset, size_t size) override + { + lseek(_file.desc(), offset, SEEK_SET); + + // Clamp length to available data + ssize_t const read_size = std::min(size, _file.size() - offset); + + std::vector v(read_size); + CUDF_EXPECTS(read(_file.desc(), v.data(), read_size) == read_size, "read failed"); + return buffer::create(std::move(v)); + } + + size_t host_read(size_t offset, size_t size, uint8_t *dst) override + { + lseek(_file.desc(), offset, SEEK_SET); + + // Clamp length to available data + auto const read_size = std::min(size, _file.size() - offset); + + CUDF_EXPECTS(read(_file.desc(), dst, read_size) == static_cast(read_size), + "read failed"); + return read_size; + } }; /** @@ -185,10 +221,18 @@ class user_datasource_wrapper : public datasource { datasource *const source; ///< A non-owning pointer to the user-implemented datasource }; +} // namespace + std::unique_ptr datasource::create(const std::string &filepath, size_t offset, size_t size) { +#ifdef CUFILE_FOUND + if (detail::cufile_config::instance()->is_required()) { + // avoid mmap as GDS is expected to be used for most reads + return std::make_unique(filepath.c_str()); + } +#endif // Use our own memory mapping implementation for direct file reads return std::make_unique(filepath.c_str(), offset, size); } diff --git a/cpp/src/io/utilities/file_io_utilities.cpp b/cpp/src/io/utilities/file_io_utilities.cpp index 22ff057cbc1..322296715fc 100644 --- a/cpp/src/io/utilities/file_io_utilities.cpp +++ b/cpp/src/io/utilities/file_io_utilities.cpp @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include #include #include @@ -26,93 +25,67 @@ namespace cudf { namespace io { namespace detail { +size_t get_file_size(int file_descriptor) +{ + struct stat st; + CUDF_EXPECTS(fstat(file_descriptor, &st) != -1, "Cannot query file size"); + return static_cast(st.st_size); +} + file_wrapper::file_wrapper(std::string const &filepath, int flags) - : fd(open(filepath.c_str(), flags)) + : fd(open(filepath.c_str(), flags)), _size{get_file_size(fd)} { CUDF_EXPECTS(fd != -1, "Cannot open file " + filepath); } file_wrapper::file_wrapper(std::string const &filepath, int flags, mode_t mode) - : fd(open(filepath.c_str(), flags, mode)) + : fd(open(filepath.c_str(), flags, mode)), _size{get_file_size(fd)} { CUDF_EXPECTS(fd != -1, "Cannot open file " + filepath); } file_wrapper::~file_wrapper() { close(fd); } -long file_wrapper::size() const +std::string getenv_or(std::string const &env_var_name, std::string const &default_val) { - if (_size < 0) { - struct stat st; - CUDF_EXPECTS(fstat(fd, &st) != -1, "Cannot query file size"); - _size = static_cast(st.st_size); - } - return _size; + auto const env_val = std::getenv(env_var_name.c_str()); + return (env_val == nullptr) ? default_val : std::string(env_val); } #ifdef CUFILE_FOUND -/** - * @brief Class that manages cuFile configuration. - */ -class cufile_config { - std::string const default_policy = "OFF"; - std::string const json_path_env_var = "CUFILE_ENV_PATH_JSON"; - - std::string const policy = default_policy; - temp_directory tmp_config_dir{"cudf_cufile_config"}; - - std::string getenv_or(std::string const &env_var_name, std::string const &default_val) - { - auto const env_val = std::getenv(env_var_name.c_str()); - return (env_val == nullptr) ? default_val : std::string(env_val); - } - - cufile_config() : policy{getenv_or("LIBCUDF_CUFILE_POLICY", default_policy)} - { - if (is_enabled()) { - // Modify the config file based on the policy - auto const config_file_path = getenv_or(json_path_env_var, "/etc/cufile.json"); - std::ifstream user_config_file(config_file_path); - // Modified config file is stored in a temporary directory - auto const cudf_config_path = tmp_config_dir.path() + "/cufile.json"; - std::ofstream cudf_config_file(cudf_config_path); - - std::string line; - while (std::getline(user_config_file, line)) { - std::string const tag = "\"allow_compat_mode\""; - if (line.find(tag) != std::string::npos) { - // TODO: only replace the true/false value - // Enable compatiblity mode when cuDF does not fall back to host path - cudf_config_file << tag << ": " << (is_required() ? "true" : "false") << ",\n"; - } else { - cudf_config_file << line << '\n'; - } - - // Point libcufile to the modified config file - CUDF_EXPECTS(setenv(json_path_env_var.c_str(), cudf_config_path.c_str(), 0) == 0, - "Failed to set the cuFile config file environment variable."); +cufile_config::cufile_config() : policy{getenv_or("LIBCUDF_CUFILE_POLICY", default_policy)} +{ + if (is_enabled()) { + // Modify the config file based on the policy + auto const config_file_path = getenv_or(json_path_env_var, "/etc/cufile.json"); + std::ifstream user_config_file(config_file_path); + // Modified config file is stored in a temporary directory + auto const cudf_config_path = tmp_config_dir.path() + "/cufile.json"; + std::ofstream cudf_config_file(cudf_config_path); + + std::string line; + while (std::getline(user_config_file, line)) { + std::string const tag = "\"allow_compat_mode\""; + if (line.find(tag) != std::string::npos) { + // TODO: only replace the true/false value + // Enable compatiblity mode when cuDF does not fall back to host path + cudf_config_file << tag << ": " << (is_required() ? "true" : "false") << ",\n"; + } else { + cudf_config_file << line << '\n'; } - } - } - - public: - /** - * @brief Returns true when cuFile use is enabled. - */ - bool is_enabled() const { return policy == "ALWAYS" or policy == "GDS"; } - - /** - * @brief Returns true when cuDF should not fall back to host IO. - */ - bool is_required() const { return policy == "ALWAYS"; } - static cufile_config const *instance() - { - static cufile_config _instance; - return &_instance; + // Point libcufile to the modified config file + CUDF_EXPECTS(setenv(json_path_env_var.c_str(), cudf_config_path.c_str(), 0) == 0, + "Failed to set the cuFile config file environment variable."); + } } -}; +} +cufile_config const *cufile_config::instance() +{ + static cufile_config _instance; + return &_instance; +} /** * @brief Class that dynamically loads the cuFile library and manages the cuFile driver. diff --git a/cpp/src/io/utilities/file_io_utilities.hpp b/cpp/src/io/utilities/file_io_utilities.hpp index 85399bdd44d..0119484aee5 100644 --- a/cpp/src/io/utilities/file_io_utilities.hpp +++ b/cpp/src/io/utilities/file_io_utilities.hpp @@ -24,6 +24,7 @@ #include #include +#include #include @@ -35,14 +36,14 @@ namespace detail { * @brief Class that provides RAII for file handling. */ class file_wrapper { - int const fd = -1; - long mutable _size = -1; + int fd = -1; + size_t _size; public: explicit file_wrapper(std::string const &filepath, int flags); explicit file_wrapper(std::string const &filepath, int flags, mode_t mode); ~file_wrapper(); - long size() const; + auto size() const { return _size; } auto desc() const { return fd; } }; @@ -128,6 +129,32 @@ class cufile_output : public cufile_io_base { class cufile_shim; +/** + * @brief Class that manages cuFile configuration. + */ +class cufile_config { + std::string const default_policy = "OFF"; + std::string const json_path_env_var = "CUFILE_ENV_PATH_JSON"; + + std::string const policy = default_policy; + temp_directory tmp_config_dir{"cudf_cufile_config"}; + + cufile_config(); + + public: + /** + * @brief Returns true when cuFile use is enabled. + */ + bool is_enabled() const { return policy == "ALWAYS" or policy == "GDS"; } + + /** + * @brief Returns true when cuDF should not fall back to host IO. + */ + bool is_required() const { return policy == "ALWAYS"; } + + static cufile_config const *instance(); +}; + /** * @brief Class that provides RAII for cuFile file registration. */ From 56976fa8ca47392299ff8bb6710d25894f741ec6 Mon Sep 17 00:00:00 2001 From: nvdbaranec <56695930+nvdbaranec@users.noreply.github.com> Date: Mon, 29 Mar 2021 19:03:40 -0500 Subject: [PATCH 14/20] cudf::row_bit_count() support. (#7534) Closes https://github.com/rapidsai/cudf/issues/7408 Some notes: - There are some limitations on what this computes, specifically regarding lists or strings embedded inside structs that have null masks. I've added some documentation for this. @jlowe @revans2 This could be made to handle that case properly but it would incur a fairly significant performance cost, and likely would require a large amount of temporary memory. - I made some modifications to the `test::print()` code for lists and structs to be a little more clear when displaying null masks. - The structure of `flatten_functor` and `flatten_hierarchy` will probably raise some eyebrows. These functions return 3 separate pieces of data and rather than trying to cram them awkwardly through as actual return values, they are passed by reference. Authors: - @nvdbaranec Approvers: - Jake Hemstad (@jrhemstad) - David (@davidwendt) - Jason Lowe (@jlowe) - Mark Harris (@harrism) URL: https://github.com/rapidsai/cudf/pull/7534 --- cpp/CMakeLists.txt | 1 + cpp/include/cudf/detail/transform.hpp | 12 +- cpp/include/cudf/lists/lists_column_view.hpp | 1 - cpp/include/cudf/transform.hpp | 31 +- cpp/include/cudf/types.hpp | 1 + cpp/src/jit/type.cpp | 1 + cpp/src/lists/drop_list_duplicates.cu | 2 +- cpp/src/transform/row_bit_count.cu | 542 +++++++++++++++++ cpp/tests/CMakeLists.txt | 3 +- cpp/tests/transform/row_bit_count_test.cu | 596 +++++++++++++++++++ cpp/tests/utilities/column_utilities.cu | 34 +- 11 files changed, 1206 insertions(+), 18 deletions(-) create mode 100644 cpp/src/transform/row_bit_count.cu create mode 100644 cpp/tests/transform/row_bit_count_test.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 48562476070..5cd82e52180 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -389,6 +389,7 @@ add_library(cudf src/transform/jit/code/kernel.cpp src/transform/mask_to_bools.cu src/transform/nans_to_nulls.cu + src/transform/row_bit_count.cu src/transform/transform.cpp src/transpose/transpose.cu src/unary/cast_ops.cu diff --git a/cpp/include/cudf/detail/transform.hpp b/cpp/include/cudf/detail/transform.hpp index bea480d85cd..b94223cdabe 100644 --- a/cpp/include/cudf/detail/transform.hpp +++ b/cpp/include/cudf/detail/transform.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -77,5 +77,15 @@ std::unique_ptr mask_to_bools( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @copydoc cudf::row_bit_count + * + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr row_bit_count( + table_view const& t, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/lists/lists_column_view.hpp b/cpp/include/cudf/lists/lists_column_view.hpp index f8facb83975..768dde2c280 100644 --- a/cpp/include/cudf/lists/lists_column_view.hpp +++ b/cpp/include/cudf/lists/lists_column_view.hpp @@ -56,7 +56,6 @@ class lists_column_view : private column_view { using column_view::null_mask; using column_view::offset; using column_view::size; - using offset_type = int32_t; static_assert(std::is_same::value, "offset_type is expected to be the same as size_type."); using offset_iterator = offset_type const*; diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp index 9b740d207e1..e99e0db21c5 100644 --- a/cpp/include/cudf/transform.hpp +++ b/cpp/include/cudf/transform.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -142,5 +142,34 @@ std::unique_ptr mask_to_bools( size_type end_bit, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Returns an approximate cumulative size in bits of all columns in the `table_view` for + * each row. + * + * This function counts bits instead of bytes to account for the null mask which only has one + * bit per row. + * + * Each row in the returned column is the sum of the per-row size for each column in + * the table. + * + * In some cases, this is an inexact approximation. Specifically, columns of lists and strings + * require N+1 offsets to represent N rows. It is up to the caller to calculate the small + * additional overhead of the terminating offset for any group of rows being considered. + * + * This function returns the per-row sizes as the columns are currently formed. This can + * end up being larger than the number you would get by gathering the rows. Specifically, + * the push-down of struct column validity masks can nullify rows that contain data for + * string or list columns. In these cases, the size returned is conservative: + * + * row_bit_count(column(x)) >= row_bit_count(gather(column(x))) + * + * @param t The table view to perform the computation on. + * @param mr Device memory resource used to allocate the returned columns's device memory + * @return A 32-bit integer column containing the per-row bit counts. + */ +std::unique_ptr row_bit_count( + table_view const& t, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp index 727284194d8..1b8d83883b3 100644 --- a/cpp/include/cudf/types.hpp +++ b/cpp/include/cudf/types.hpp @@ -89,6 +89,7 @@ class mutable_table_view; using size_type = int32_t; using bitmask_type = uint32_t; using valid_type = uint8_t; +using offset_type = int32_t; /** * @brief Similar to `std::distance` but returns `cudf::size_type` and performs `static_cast` diff --git a/cpp/src/jit/type.cpp b/cpp/src/jit/type.cpp index d71e2eb4df8..6b1e8c57c3d 100644 --- a/cpp/src/jit/type.cpp +++ b/cpp/src/jit/type.cpp @@ -71,6 +71,7 @@ std::string get_type_name(data_type type) // TODO: Remove in JIT type utils PR switch (type.id()) { case type_id::LIST: return CUDF_STRINGIFY(List); + case type_id::STRUCT: return CUDF_STRINGIFY(Struct); case type_id::DECIMAL32: return CUDF_STRINGIFY(int32_t); case type_id::DECIMAL64: return CUDF_STRINGIFY(int64_t); diff --git a/cpp/src/lists/drop_list_duplicates.cu b/cpp/src/lists/drop_list_duplicates.cu index 529b7489c35..584b9791d19 100644 --- a/cpp/src/lists/drop_list_duplicates.cu +++ b/cpp/src/lists/drop_list_duplicates.cu @@ -34,7 +34,7 @@ namespace cudf { namespace lists { namespace detail { namespace { -using offset_type = lists_column_view::offset_type; + /** * @brief Copy list entries and entry list offsets ignoring duplicates * diff --git a/cpp/src/transform/row_bit_count.cu b/cpp/src/transform/row_bit_count.cu new file mode 100644 index 00000000000..e36fa36596f --- /dev/null +++ b/cpp/src/transform/row_bit_count.cu @@ -0,0 +1,542 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +namespace cudf { +namespace detail { + +namespace { + +/** + * @brief Struct which contains per-column information necessary to + * traverse a column hierarchy on the gpu. + * + * When `row_bit_count` is called, the input column hierarchy is flattened into a + * vector of column_device_views. For each one of them, we store a column_info + * struct. The `depth` field represents the depth of the column in the original + * hierarchy. + * + * As we traverse the hierarchy for each input row, we maintain a span representing + * the start and end rows for the current nesting depth. At depth 0, this span is + * always just 1 row. As we cross list boundaries int the hierarchy, this span + * grows. So for each column we visit we always know how many rows of it are relevant + * and can compute it's contribution to the overall size. + * + * An example using a list> column, computing the size of row 1. + * + * { {{1, 2}, {3, 4}, {5, 6}}, {{7}, {8, 9, 10}, {11, 12, 13, 14}} } + * + * L0 = List>: + * Length : 2 + * Offsets : 0, 3, 6 + * L1 = List: + * Length : 6 + * Offsets : 0, 2, 4, 6, 7, 10, 14 + * I = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 + * + * + * span0 = [1, 2] row 1 is represented by the span [1, 2] + * span1 = [L0.offsets[span0[0]], L0.offsets[span0[1]]] expand by the offsets of L0 + * span1 = [3, 6] span applied to children of L0 + * span2 = [L1.offsets[span1[0]], L1.offsets[span1[1]]] expand by the offsets of L1 + * span2 = [6, 14] span applied to children of L1 + * + * The total size of our row is computed as: + * (span0[1] - span0[0]) * sizeof(int) the cost of the offsets for L0 + * + + * (span1[1] - span1[0]) * sizeof(int) the cost of the offsets for L1 + * + + * (span2[1] - span2[0]) * sizeof(int) the cost of the integers in I + * + * `depth` represents our depth in the source column hierarchy. + * + * "branches" within the spans can occur when we have lists inside of structs. + * consider a case where we are entering a struct with a span of [4, 8]. + * The internal list column will change that span to something else, say [5, 9]. + * But when we finish processing the list column, the final float column wants to + * go back and use the original span [4, 8]. + * + * [4, 8] [5, 9] [4, 8] + * struct< list<> float> + * + * To accomplish this we maintain a stack of spans. Pushing the current span + * whenever we enter a branch, and popping a span whenever we leave a branch. + * + * `branch_depth_start` represents the branch depth as we reach a new column. + * if `branch_depth_start` is < the last branch depth we saw, we are returning + * from a branch and should pop off the stack. + * + * `branch_depth_end` represents the new branch depth caused by this column. + * if branch_depth_end > branch_depth_start, we are branching and need to + * push the current span on the stack. + * + */ +struct column_info { + size_type depth; + size_type branch_depth_start; + size_type branch_depth_end; +}; + +/** + * @brief Struct which contains hierarchy information precomputed on the host. + * + * If the input data contains only fixed-width types, this preprocess step + * produces the value `simple_per_row_size` which is a constant for every + * row in the output. We can use this value and skip the more complicated + * processing for lists, structs and strings entirely if `complex_type_count` + * is 0. + * + */ +struct hierarchy_info { + hierarchy_info() : simple_per_row_size(0), complex_type_count(0), max_branch_depth(0) {} + + // These two fields act as an optimization. If we find that the entire table + // is just fixed-width types, we do not need to do the more expensive kernel call that + // traverses the individual columns. So if complex_type_count is 0, we can just + // return a column where every row contains the value simple_per_row_size + size_type simple_per_row_size; // in bits + size_type complex_type_count; + + // max depth of span branches present in the hierarchy. + size_type max_branch_depth; +}; + +/** + * @brief Function which flattens the incoming column hierarchy into a vector + * of column_views and produces accompanying column_info and hierarchy_info + * metadata. + * + * @param begin: Beginning of a range of column views + * @param end: End of a range of column views + * @param out: (output) Flattened vector of output column_views + * @param info: (output) Additional per-output column_view metadata needed by the gpu + * @param h_info: (output) Information about the hierarchy + * @param cur_depth: Current absolute depth in the hierarchy + * @param cur_branch_depth: Current branch depth + * @param parent_index: Index into `out` representing our owning parent column + */ +template +void flatten_hierarchy(ColIter begin, + ColIter end, + std::vector& out, + std::vector& info, + hierarchy_info& h_info, + rmm::cuda_stream_view stream, + size_type cur_depth = 0, + size_type cur_branch_depth = 0, + thrust::optional parent_index = {}); + +/** + * @brief Type-dispatched functor called by flatten_hierarchy. + * + */ +struct flatten_functor { + rmm::cuda_stream_view stream; + + // fixed width + template ()>* = nullptr> + void operator()(column_view const& col, + std::vector& out, + std::vector& info, + hierarchy_info& h_info, + rmm::cuda_stream_view stream, + size_type cur_depth, + size_type cur_branch_depth, + thrust::optional parent_index) + { + out.push_back(col); + info.push_back({cur_depth, cur_branch_depth, cur_branch_depth}); + h_info.simple_per_row_size += + (sizeof(device_storage_type_t) * CHAR_BIT) + (col.nullable() ? 1 : 0); + } + + // strings + template ::value>* = nullptr> + void operator()(column_view const& col, + std::vector& out, + std::vector& info, + hierarchy_info& h_info, + rmm::cuda_stream_view stream, + size_type cur_depth, + size_type cur_branch_depth, + thrust::optional parent_index) + { + out.push_back(col); + info.push_back({cur_depth, cur_branch_depth, cur_branch_depth}); + h_info.complex_type_count++; + } + + // lists + template ::value>* = nullptr> + void operator()(column_view const& col, + std::vector& out, + std::vector& info, + hierarchy_info& h_info, + rmm::cuda_stream_view stream, + size_type cur_depth, + size_type cur_branch_depth, + thrust::optional parent_index) + { + // track branch depth as we reach this list and after we pass it + size_type const branch_depth_start = cur_branch_depth; + auto const is_list_inside_struct = + parent_index && out[parent_index.value()].type().id() == type_id::STRUCT; + if (is_list_inside_struct) { + cur_branch_depth++; + h_info.max_branch_depth = max(h_info.max_branch_depth, cur_branch_depth); + } + size_type const branch_depth_end = cur_branch_depth; + + out.push_back(col); + info.push_back({cur_depth, branch_depth_start, branch_depth_end}); + + lists_column_view lcv(col); + auto iter = cudf::detail::make_counting_transform_iterator( + 0, [col = lcv.get_sliced_child(stream)](auto i) { return col; }); + h_info.complex_type_count++; + + flatten_hierarchy( + iter, iter + 1, out, info, h_info, stream, cur_depth + 1, cur_branch_depth, out.size() - 1); + } + + // structs + template ::value>* = nullptr> + void operator()(column_view const& col, + std::vector& out, + std::vector& info, + hierarchy_info& h_info, + rmm::cuda_stream_view stream, + size_type cur_depth, + size_type cur_branch_depth, + thrust::optional parent_index) + { + out.push_back(col); + info.push_back({cur_depth, cur_branch_depth, cur_branch_depth}); + + h_info.simple_per_row_size += col.nullable() ? 1 : 0; + + structs_column_view scv(col); + auto iter = cudf::detail::make_counting_transform_iterator( + 0, [&scv](auto i) { return scv.get_sliced_child(i); }); + flatten_hierarchy(iter, + iter + scv.num_children(), + out, + info, + h_info, + stream, + cur_depth + 1, + cur_branch_depth, + out.size() - 1); + } + + // everything else + template () && !std::is_same::value && + !std::is_same::value && + !std::is_same::value>* = nullptr> + void operator()(column_view const& col, + std::vector& out, + std::vector& info, + hierarchy_info& h_info, + rmm::cuda_stream_view stream, + size_type cur_depth, + size_type cur_branch_depth, + thrust::optional parent_index) + { + CUDF_FAIL("Unsupported column type in row_bit_count"); + } +}; + +template +void flatten_hierarchy(ColIter begin, + ColIter end, + std::vector& out, + std::vector& info, + hierarchy_info& h_info, + rmm::cuda_stream_view stream, + size_type cur_depth, + size_type cur_branch_depth, + thrust::optional parent_index) +{ + std::for_each(begin, end, [&](column_view const& col) { + cudf::type_dispatcher(col.type(), + flatten_functor{stream}, + col, + out, + info, + h_info, + stream, + cur_depth, + cur_branch_depth, + parent_index); + }); +} + +/** + * @brief Struct representing a span of rows. + * + */ +struct row_span { + size_type row_start, row_end; +}; + +/** + * @brief Functor for computing the size, in bits, of a `row_span` of rows for a given + * `column_device_view` + * + */ +struct row_size_functor { + /** + * @brief Computes size in bits of a span of rows in a fixed-width column. + * + * Computed as : ((# of rows) * sizeof(data type) * 8) + * + + * 1 bit per row for validity if applicable. + */ + template + __device__ size_type operator()(column_device_view const& col, row_span const& span) + { + auto const num_rows{span.row_end - span.row_start}; + auto const element_size = sizeof(device_storage_type_t) * CHAR_BIT; + auto const validity_size = col.nullable() ? 1 : 0; + return (element_size + validity_size) * num_rows; + } +}; + +/** + * @brief Computes size in bits of a span of rows in a strings column. + * + * Computed as : ((# of rows) * sizeof(offset) * 8) + (total # of characters * 8)) + * + + * 1 bit per row for validity if applicable. + */ +template <> +__device__ size_type row_size_functor::operator()(column_device_view const& col, + row_span const& span) +{ + column_device_view const& offsets = col.child(strings_column_view::offsets_column_index); + auto const num_rows{span.row_end - span.row_start}; + auto const row_start{span.row_start + col.offset()}; + auto const row_end{span.row_end + col.offset()}; + + auto const offsets_size = sizeof(offset_type) * CHAR_BIT; + auto const validity_size = col.nullable() ? 1 : 0; + auto const chars_size = + (offsets.data()[row_end] - offsets.data()[row_start]) * CHAR_BIT; + return ((offsets_size + validity_size) * num_rows) + chars_size; +} + +/** + * @brief Computes size in bits of a span of rows in a list column. + * + * Computed as : ((# of rows) * sizeof(offset) * 8) + * + + * 1 bit per row for validity if applicable. + */ +template <> +__device__ size_type row_size_functor::operator()(column_device_view const& col, + row_span const& span) +{ + column_device_view const& offsets = col.child(lists_column_view::offsets_column_index); + auto const num_rows{span.row_end - span.row_start}; + + auto const offsets_size = sizeof(offset_type) * CHAR_BIT; + auto const validity_size = col.nullable() ? 1 : 0; + return (offsets_size + validity_size) * num_rows; +} + +/** + * @brief Computes size in bits of a span of rows in a struct column. + * + * Computed as : 1 bit per row for validity if applicable. + */ +template <> +__device__ size_type row_size_functor::operator()(column_device_view const& col, + row_span const& span) +{ + auto const num_rows{span.row_end - span.row_start}; + return (col.nullable() ? 1 : 0) * num_rows; // cost of validity +} + +/** + * @brief Kernel for computing per-row sizes in bits. + * + * @param cols An span of column_device_views represeting a column hierarcy + * @param info An span of column_info structs corresponding the elements in `cols` + * @param output Output span of size (# rows) where per-row bit sizes are stored + * @param max_branch_depth Maximum depth of the span stack needed per-thread + */ +__global__ void compute_row_sizes(device_span cols, + device_span info, + device_span output, + size_type max_branch_depth) +{ + extern __shared__ row_span thread_branch_stacks[]; + int const tid = threadIdx.x + blockIdx.x * blockDim.x; + + auto const num_rows = output.size(); + if (tid >= num_rows) { return; } + + // branch stack. points to the last list prior to branching. + row_span* my_branch_stack = thread_branch_stacks + (tid * max_branch_depth); + size_type branch_depth{0}; + + // current row span - always starts at 1 row. + row_span cur_span{tid, tid + 1}; + + // output size + size_type& size = output[tid]; + size = 0; + + size_type last_branch_depth{0}; + for (size_type idx = 0; idx < cols.size(); idx++) { + column_device_view const& col = cols[idx]; + + // if we've returned from a branch + if (info[idx].branch_depth_start < last_branch_depth) { + cur_span = my_branch_stack[--branch_depth]; + } + // if we're entering a new branch. + // NOTE: this case can happen (a pop and a push by the same column) + // when we have a struct + if (info[idx].branch_depth_end > info[idx].branch_depth_start) { + my_branch_stack[branch_depth++] = cur_span; + } + + // if we're back at depth 0, this is a new top-level column, so reset + // span info + if (info[idx].depth == 0) { + branch_depth = 0; + last_branch_depth = 0; + cur_span = row_span{tid, tid + 1}; + } + + // add the contributing size of this row + size += cudf::type_dispatcher(col.type(), row_size_functor{}, col, cur_span); + + // if this is a list column, update the working span from our offsets + if (col.type().id() == type_id::LIST) { + column_device_view const& offsets = col.child(lists_column_view::offsets_column_index); + auto const base_offset = offsets.data()[col.offset()]; + cur_span.row_start = + offsets.data()[cur_span.row_start + col.offset()] - base_offset; + cur_span.row_end = offsets.data()[cur_span.row_end + col.offset()] - base_offset; + } + + last_branch_depth = info[idx].branch_depth_end; + } +} + +} // anonymous namespace + +/** + * @copydoc cudf::detail::row_bit_count + * + */ +std::unique_ptr row_bit_count(table_view const& t, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + // no rows + if (t.num_rows() <= 0) { return cudf::make_empty_column(data_type{type_id::INT32}); } + + // flatten the hierarchy and determine some information about it. + std::vector cols; + std::vector info; + hierarchy_info h_info; + flatten_hierarchy(t.begin(), t.end(), cols, info, h_info, stream); + CUDF_EXPECTS(info.size() == cols.size(), "Size/info mismatch"); + + // create output buffer and view + auto output = cudf::make_fixed_width_column( + data_type{type_id::INT32}, t.num_rows(), mask_state::UNALLOCATED, stream, mr); + mutable_column_view mcv = output->mutable_view(); + + // simple case. if we have no complex types (lists, strings, etc), the per-row size is already + // trivially computed + if (h_info.complex_type_count <= 0) { + thrust::fill(rmm::exec_policy(stream), + mcv.begin(), + mcv.end(), + h_info.simple_per_row_size); + return output; + } + + // create a contiguous block of column_device_views + auto d_cols = contiguous_copy_column_device_views(cols, stream); + + // move stack info to the gpu + rmm::device_uvector d_info(info.size(), stream); + CUDA_TRY(cudaMemcpyAsync(d_info.data(), + info.data(), + sizeof(column_info) * info.size(), + cudaMemcpyHostToDevice, + stream.value())); + + // each thread needs to maintain a stack of row spans of size max_branch_depth. we will use + // shared memory to do this rather than allocating a potentially gigantic temporary buffer + // of memory of size (# input rows * sizeof(row_span) * max_branch_depth). + auto const shmem_per_thread = sizeof(row_span) * h_info.max_branch_depth; + int device_id; + CUDA_TRY(cudaGetDevice(&device_id)); + int shmem_limit_per_block; + CUDA_TRY( + cudaDeviceGetAttribute(&shmem_limit_per_block, cudaDevAttrMaxSharedMemoryPerBlock, device_id)); + constexpr int max_block_size = 256; + auto const block_size = + shmem_per_thread != 0 + ? std::min(max_block_size, shmem_limit_per_block / static_cast(shmem_per_thread)) + : max_block_size; + auto const shared_mem_size = shmem_per_thread * block_size; + // should we be aborting if we reach some extremely small block size, or just if we hit 0? + CUDF_EXPECTS(block_size > 0, "Encountered a column hierarchy too complex for row_bit_count"); + + cudf::detail::grid_1d grid{t.num_rows(), block_size, 1}; + compute_row_sizes<<>>( + {std::get<1>(d_cols), cols.size()}, + {d_info.data(), info.size()}, + {mcv.data(), static_cast(t.num_rows())}, + h_info.max_branch_depth); + + return output; +} + +} // namespace detail + +/** + * @copydoc cudf::row_bit_count + * + */ +std::unique_ptr row_bit_count(table_view const& t, rmm::mr::device_memory_resource* mr) +{ + return detail::row_bit_count(t, rmm::cuda_stream_default, mr); +} + +} // namespace cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index ab14c2577bb..082f039054e 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -162,7 +162,8 @@ ConfigureTest(TRANSFORM_TEST transform/integration/unary-transform-test.cpp transform/nans_to_null_test.cpp transform/mask_to_bools_test.cpp - transform/bools_to_mask_test.cpp) + transform/bools_to_mask_test.cpp + transform/row_bit_count_test.cu) ################################################################################################### # - interop tests ------------------------------------------------------------------------- diff --git a/cpp/tests/transform/row_bit_count_test.cu b/cpp/tests/transform/row_bit_count_test.cu new file mode 100644 index 00000000000..21e5c818197 --- /dev/null +++ b/cpp/tests/transform/row_bit_count_test.cu @@ -0,0 +1,596 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace cudf; + +template +struct RowBitCountTyped : public cudf::test::BaseFixture { +}; + +TYPED_TEST_CASE(RowBitCountTyped, cudf::test::FixedWidthTypes); + +TYPED_TEST(RowBitCountTyped, SimpleTypes) +{ + using T = TypeParam; + + auto col = cudf::make_fixed_width_column(data_type{type_to_id()}, 16); + + table_view t({*col}); + auto result = cudf::row_bit_count(t); + + // expect size of the type per row + auto expected = make_fixed_width_column(data_type{type_id::INT32}, 16); + cudf::mutable_column_view mcv(*expected); + thrust::fill(rmm::exec_policy(0), + mcv.begin(), + mcv.end(), + sizeof(device_storage_type_t) * CHAR_BIT); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *result); +} + +TYPED_TEST(RowBitCountTyped, SimpleTypesWithNulls) +{ + using T = TypeParam; + + auto iter = thrust::make_counting_iterator(0); + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](int i) { return i % 2 == 0 ? true : false; }); + cudf::test::fixed_width_column_wrapper col(iter, iter + 16, valids); + + table_view t({col}); + auto result = cudf::row_bit_count(t); + + // expect size of the type + 1 bit per row + auto expected = make_fixed_width_column(data_type{type_id::INT32}, 16); + cudf::mutable_column_view mcv(*expected); + thrust::fill(rmm::exec_policy(0), + mcv.begin(), + mcv.end(), + (sizeof(device_storage_type_t) * CHAR_BIT) + 1); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *result); +} + +template +std::pair, std::unique_ptr> build_list_column() +{ + using LCW = cudf::test::lists_column_wrapper; + constexpr size_type type_size = sizeof(device_storage_type_t) * CHAR_BIT; + + // clang-format off + cudf::test::lists_column_wrapper col{ {{1, 2}, {3, 4, 5}}, + LCW{LCW{}}, + {LCW{10}}, + {{6, 7, 8}, {9}}, + {{-1, -2}, {-3, -4}}, + {{-5, -6, -7}, {-8, -9}} }; + // clang-format on + + // expected size = (num rows at level 1 + num_rows at level 2) + # values in the leaf + cudf::test::fixed_width_column_wrapper expected{ + ((4 + 8) * CHAR_BIT) + (type_size * 5), + ((4 + 0) * CHAR_BIT) + (type_size * 0), + ((4 + 4) * CHAR_BIT) + (type_size * 1), + ((4 + 8) * CHAR_BIT) + (type_size * 4), + ((4 + 8) * CHAR_BIT) + (type_size * 4), + ((4 + 8) * CHAR_BIT) + (type_size * 5)}; + + return {col.release(), expected.release()}; +} + +TYPED_TEST(RowBitCountTyped, Lists) +{ + using T = TypeParam; + + std::unique_ptr col; + std::unique_ptr expected_sizes; + std::tie(col, expected_sizes) = build_list_column(); + + table_view t({*col}); + auto result = cudf::row_bit_count(t); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_sizes, *result); +} + +TYPED_TEST(RowBitCountTyped, ListsWithNulls) +{ + using T = TypeParam; + using LCW = cudf::test::lists_column_wrapper; + constexpr size_type type_size = sizeof(device_storage_type_t) * CHAR_BIT; + + std::vector valids{true, false, true}; + std::vector valids2{false, true, false}; + std::vector valids3{true, false}; + + // clang-format off + cudf::test::lists_column_wrapper col{ {{1, 2}, {{3, 4, 5}, valids.begin()}}, + LCW{LCW{}}, + {LCW{10}}, + {{{{6, 7, 8}, valids2.begin()}, {9}}, valids3.begin()} }; + // clang-format on + + table_view t({col}); + auto result = cudf::row_bit_count(t); + + // expected size = (num rows at level 1 + num_rows at level 2) + # values in the leaf + validity + // where applicable + cudf::test::fixed_width_column_wrapper expected{ + ((4 + 8) * CHAR_BIT) + (type_size * 5) + 7, + ((4 + 0) * CHAR_BIT) + (type_size * 0), + ((4 + 4) * CHAR_BIT) + (type_size * 1) + 2, + ((4 + 8) * CHAR_BIT) + (type_size * 3) + 5}; + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); +} + +struct RowBitCount : public cudf::test::BaseFixture { +}; + +TEST_F(RowBitCount, Strings) +{ + std::vector strings{"abc", "ï", "", "z", "bananas", "warp", "", "zing"}; + + cudf::test::strings_column_wrapper col(strings.begin(), strings.end()); + + table_view t({col}); + auto result = cudf::row_bit_count(t); + + // expect 1 offset (4 bytes) + length of string per row + auto size_iter = cudf::detail::make_counting_transform_iterator(0, [&strings](int i) { + return (static_cast(strings[i].size()) + sizeof(offset_type)) * CHAR_BIT; + }); + cudf::test::fixed_width_column_wrapper expected(size_iter, size_iter + strings.size()); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); +} + +TEST_F(RowBitCount, StringsWithNulls) +{ + // clang-format off + std::vector strings { "daïs", "def", "", "z", "bananas", "warp", "", "zing" }; + std::vector valids { 1, 0, 0, 1, 0, 1, 1, 1 }; + // clang-format on + + cudf::test::strings_column_wrapper col(strings.begin(), strings.end(), valids.begin()); + + table_view t({col}); + auto result = cudf::row_bit_count(t); + + // expect 1 offset (4 bytes) + (length of string, or 0 if null) + 1 validity bit per row + auto size_iter = cudf::detail::make_counting_transform_iterator(0, [&strings, &valids](int i) { + return ((static_cast(valids[i] ? strings[i].size() : 0) + sizeof(offset_type)) * + CHAR_BIT) + + 1; + }); + cudf::test::fixed_width_column_wrapper expected(size_iter, size_iter + strings.size()); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); +} + +std::pair, std::unique_ptr> build_struct_column() +{ + std::vector struct_validity{0, 1, 1, 1, 1, 0}; + std::vector strings{"abc", "def", "", "z", "bananas", "daïs"}; + + cudf::test::fixed_width_column_wrapper col0{0, 1, 2, 3, 4, 5}; + cudf::test::fixed_width_column_wrapper col1{{8, 9, 10, 11, 12, 13}, {1, 0, 1, 1, 1, 1}}; + cudf::test::strings_column_wrapper col2(strings.begin(), strings.end()); + + // creating a struct column will cause all child columns to be promoted to have validity + cudf::test::structs_column_wrapper struct_col({col0, col1, col2}, struct_validity); + + // expect (1 offset (4 bytes) + (length of string if row is valid) + 1 validity bit) + + // (1 float + 1 validity bit) + + // (1 int16_t + 1 validity bit) + + // (1 validity bit) + auto size_iter = + cudf::detail::make_counting_transform_iterator(0, [&strings, &struct_validity](int i) { + return (sizeof(float) * CHAR_BIT) + 1 + (sizeof(int16_t) * CHAR_BIT) + 1 + + (static_cast(strings[i].size()) * CHAR_BIT) + + (sizeof(offset_type) * CHAR_BIT) + 1 + 1; + }); + cudf::test::fixed_width_column_wrapper expected_sizes(size_iter, + size_iter + strings.size()); + + return {struct_col.release(), expected_sizes.release()}; +} + +TEST_F(RowBitCount, StructsNoNulls) +{ + std::vector strings{"abc", "daïs", "", "z", "bananas", "warp"}; + + cudf::test::fixed_width_column_wrapper col0{0, 1, 2, 3, 4, 5}; + cudf::test::fixed_width_column_wrapper col1{8, 9, 10, 11, 12, 13}; + cudf::test::strings_column_wrapper col2(strings.begin(), strings.end()); + + cudf::test::structs_column_wrapper struct_col({col0, col1, col2}); + + table_view t({struct_col}); + auto result = cudf::row_bit_count(t); + + // expect 1 offset (4 bytes) + (length of string) + 1 float + 1 int16_t + auto size_iter = cudf::detail::make_counting_transform_iterator(0, [&strings](int i) { + return ((sizeof(float) + sizeof(int16_t)) * CHAR_BIT) + + ((static_cast(strings[i].size()) + sizeof(offset_type)) * CHAR_BIT); + }); + cudf::test::fixed_width_column_wrapper expected(size_iter, size_iter + t.num_rows()); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); +} + +TEST_F(RowBitCount, StructsNulls) +{ + std::unique_ptr struct_col; + std::unique_ptr expected_sizes; + std::tie(struct_col, expected_sizes) = build_struct_column(); + table_view t({*struct_col}); + auto result = cudf::row_bit_count(t); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_sizes, *result); +} + +TEST_F(RowBitCount, StructsNested) +{ + // struct, int16> + cudf::test::fixed_width_column_wrapper col0{0, 1, 2, 3, 4, 5}; + cudf::test::structs_column_wrapper inner_struct({col0}); + + cudf::test::fixed_width_column_wrapper col1{8, 9, 10, 11, 12, 13}; + cudf::test::structs_column_wrapper struct_col({inner_struct, col1}); + + table_view t({struct_col}); + auto result = cudf::row_bit_count(t); + + // expect num_rows * (4 + 2) bytes + auto size_iter = + cudf::detail::make_counting_transform_iterator(0, [&](int i) { return (4 + 2) * CHAR_BIT; }); + cudf::test::fixed_width_column_wrapper expected(size_iter, size_iter + t.num_rows()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); +} + +std::pair, std::unique_ptr> build_nested_and_expected_column( + std::vector const& struct_validity) +{ + // tests the "branching" case -> list ...>>> + + // List, float, int16> + + // Inner list column + // clang-format off + cudf::test::lists_column_wrapper list{ + {1, 2, 3, 4, 5}, + {6, 7, 8}, + {33, 34, 35, 36, 37, 38, 39}, + {-1, -2}, + {-10, -11, -1, -20}, + {40, 41, 42}, + {100, 200, 300}, + {-100, -200, -300}}; + // clang-format on + + // floats + std::vector ages{5, 10, 15, 20, 4, 75, 16, -16}; + std::vector ages_validity = {1, 1, 1, 1, 0, 1, 0, 1}; + auto ages_column = + cudf::test::fixed_width_column_wrapper(ages.begin(), ages.end(), ages_validity.begin()); + + // int16 values + std::vector vals{-1, -2, -3, 1, 2, 3, 8, 9}; + auto i16_column = cudf::test::fixed_width_column_wrapper(vals.begin(), vals.end()); + + // Assemble struct column + auto struct_column = + cudf::test::structs_column_wrapper({list, ages_column, i16_column}, struct_validity); + + // wrap in a list + std::vector outer_offsets{0, 1, 1, 3, 6, 7, 8}; + cudf::test::fixed_width_column_wrapper outer_offsets_col(outer_offsets.begin(), + outer_offsets.end()); + auto const size = static_cast(outer_offsets_col).size() - 1; + + cudf::test::fixed_width_column_wrapper expected_sizes{276, 32, 520, 572, 212, 212}; + + return {cudf::make_lists_column(static_cast(size), + outer_offsets_col.release(), + struct_column.release(), + cudf::UNKNOWN_NULL_COUNT, + rmm::device_buffer{}), + expected_sizes.release()}; +} + +std::unique_ptr build_nested_column(std::vector const& struct_validity) +{ + // List>, Struct>> + + // Inner list column + // clang-format off + cudf::test::lists_column_wrapper list{ + {{1, 2, 3, 4, 5}, {2, 3}}, + {{6, 7, 8}, {8, 9}}, + {{1, 2}, {3, 4, 5}, {33, 34, 35, 36, 37, 38, 39}}}; + // clang-format on + + // Inner struct + std::vector vals{-1, -2, -3}; + auto i16_column = cudf::test::fixed_width_column_wrapper(vals.begin(), vals.end()); + auto inner_struct = cudf::test::structs_column_wrapper({i16_column}); + + // outer struct + auto outer_struct = cudf::test::structs_column_wrapper({list, inner_struct}, struct_validity); + + // wrap in a list + std::vector outer_offsets{0, 1, 1, 3}; + cudf::test::fixed_width_column_wrapper outer_offsets_col(outer_offsets.begin(), + outer_offsets.end()); + auto const size = static_cast(outer_offsets_col).size() - 1; + return make_lists_column(static_cast(size), + outer_offsets_col.release(), + outer_struct.release(), + cudf::UNKNOWN_NULL_COUNT, + rmm::device_buffer{}); +} + +TEST_F(RowBitCount, NestedTypes) +{ + // List, float, List, int16> + { + std::unique_ptr col_no_nulls; + std::unique_ptr expected_sizes; + std::tie(col_no_nulls, expected_sizes) = + build_nested_and_expected_column({1, 1, 1, 1, 1, 1, 1, 1}); + table_view no_nulls_t({*col_no_nulls}); + auto no_nulls_result = cudf::row_bit_count(no_nulls_t); + + auto col_nulls = build_nested_and_expected_column({0, 0, 1, 1, 1, 1, 1, 1}).first; + table_view nulls_t({*col_nulls}); + auto nulls_result = cudf::row_bit_count(nulls_t); + + // List, float, int16> + // + // this illustrates the difference between a row_bit_count + // returning a pre-gather result, or a post-gather result. + // + // in a post-gather situation, the nulls in the struct would result in the values + // nested in the list below to be dropped, resulting in smaller row sizes. + // + // however, for performance reasons, row_bit_count simply walks the data that is + // currently there. so list rows that are null, but have a real span of + // offsets (X, Y) instead of (X, X) will end up getting the child data for those + // rows included. + // + // if row_bit_count() is changed to return a post-gather result (which may be desirable), + // the nulls_result case below will start failing and will need to be changed. + // + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_sizes, *no_nulls_result); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_sizes, *nulls_result); + } + + // List>, Struct>> + { + auto col_no_nulls = build_nested_column({1, 1, 1}); + table_view no_nulls_t({*col_no_nulls}); + auto no_nulls_result = cudf::row_bit_count(no_nulls_t); + + auto col_nulls = build_nested_column({1, 0, 1}); + table_view nulls_t({*col_nulls}); + auto nulls_result = cudf::row_bit_count(nulls_t); + + cudf::test::fixed_width_column_wrapper expected_sizes{372, 32, 840}; + + // same explanation as above + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *no_nulls_result); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *nulls_result); + } + + // test pushing/popping multiple times within one struct, and branch depth > 1 + // + // Struct, float, List>, Struct, List, + // float>>, int8_t>> + { + cudf::test::lists_column_wrapper l0{{1, 2, 3}, {4, 5}, {6, 7, 8, 9}, {5}}; + cudf::test::lists_column_wrapper l1{ + {{-1, -2}, {3, 4}}, {{4, 5}, {6, 7, 8}}, {{-6, -7}, {2}}, {{-11, -11}, {-12, -12}, {3}}}; + cudf::test::lists_column_wrapper l2{{-1, -2}, {4, 5}, {-6, -7}, {1}}; + cudf::test::lists_column_wrapper l3{{-1, -2, 0}, {5}, {-1, -6, -7}, {1, 2}}; + + cudf::test::fixed_width_column_wrapper c0{1, 2, 3, 4}; + cudf::test::fixed_width_column_wrapper c1{1, 2, 3, 4}; + cudf::test::fixed_width_column_wrapper c2{1, 2, 3, 4}; + cudf::test::fixed_width_column_wrapper c3{11, 12, 13, 14}; + + // innermost List>> + auto innermost_struct = cudf::test::structs_column_wrapper({l3, c3}); + std::vector l4_offsets{0, 1, 2, 3, 4}; + cudf::test::fixed_width_column_wrapper l4_offsets_col(l4_offsets.begin(), + l4_offsets.end()); + auto const l4_size = l4_offsets.size() - 1; + auto l4 = cudf::make_lists_column(static_cast(l4_size), + l4_offsets_col.release(), + innermost_struct.release(), + cudf::UNKNOWN_NULL_COUNT, + rmm::device_buffer{}); + + // inner struct + std::vector> inner_struct_children; + inner_struct_children.push_back(l2.release()); + inner_struct_children.push_back(std::move(l4)); + auto inner_struct = cudf::test::structs_column_wrapper(std::move(inner_struct_children)); + + // outer struct + auto struct_col = cudf::test::structs_column_wrapper({c0, l0, c1, l1, inner_struct, c2}); + + table_view t({struct_col}); + auto result = cudf::row_bit_count(t); + + cudf::test::fixed_width_column_wrapper expected_sizes{648, 568, 664, 568}; + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *result); + } +} + +struct sum_functor { + size_type const* s0; + size_type const* s1; + size_type const* s2; + + size_type operator() __device__(int i) { return s0[i] + s1[i] + s2[i]; } +}; + +TEST_F(RowBitCount, Table) +{ + // complex nested column + std::unique_ptr col0; + std::unique_ptr col0_sizes; + std::tie(col0, col0_sizes) = build_nested_and_expected_column({1, 1, 1, 1, 1, 1, 1, 1}); + + // struct column + std::unique_ptr col1; + std::unique_ptr col1_sizes; + std::tie(col1, col1_sizes) = build_struct_column(); + + // list column + std::unique_ptr col2; + std::unique_ptr col2_sizes; + std::tie(col2, col2_sizes) = build_list_column(); + + table_view t({*col0, *col1, *col2}); + auto result = cudf::row_bit_count(t); + + // sum all column sizes + column_view cv0 = static_cast(*col0_sizes); + column_view cv1 = static_cast(*col1_sizes); + column_view cv2 = static_cast(*col2_sizes); + auto expected = cudf::make_fixed_width_column(data_type{type_id::INT32}, t.num_rows()); + cudf::mutable_column_view mcv(*expected); + thrust::transform( + rmm::exec_policy(0), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + t.num_rows(), + mcv.begin(), + sum_functor{cv0.data(), cv1.data(), cv2.data()}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *result); +} + +TEST_F(RowBitCount, SlicedColumnsFixedWidth) +{ + auto const slice_size = 7; + cudf::test::fixed_width_column_wrapper c0_unsliced{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + auto c0 = cudf::slice(c0_unsliced, {2, 2 + slice_size}); + + table_view t({c0}); + auto result = cudf::row_bit_count(t); + + cudf::test::fixed_width_column_wrapper expected{16, 16, 16, 16, 16, 16, 16}; + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); +} + +TEST_F(RowBitCount, SlicedColumnsStrings) +{ + auto const slice_size = 7; + std::vector strings{ + "banana", "metric", "imperial", "abc", "daïs", "", "fire", "def", "cudf", "xyzw"}; + cudf::test::strings_column_wrapper c0_unsliced(strings.begin(), strings.end()); + auto c0 = cudf::slice(c0_unsliced, {3, 3 + slice_size}); + + table_view t({c0}); + auto result = cudf::row_bit_count(t); + + // expect 1 offset (4 bytes) + length of string per row + auto size_iter = cudf::detail::make_counting_transform_iterator(0, [&strings](int i) { + return (static_cast(strings[i].size()) + sizeof(offset_type)) * CHAR_BIT; + }); + cudf::test::fixed_width_column_wrapper expected(size_iter + 3, + size_iter + 3 + slice_size); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); +} + +TEST_F(RowBitCount, SlicedColumnsLists) +{ + auto const slice_size = 2; + cudf::test::lists_column_wrapper c0_unsliced{ + {{"banana", "v"}, {"cats"}}, + {{"dogs", "yay"}, {"xyz", ""}, {"daïs"}}, + {{"fast", "parrot"}, {"orange"}}, + {{"blue"}, {"red", "yellow"}, {"ultraviolet", "", "green"}}}; + auto c0 = cudf::slice(c0_unsliced, {1, 1 + slice_size}); + + table_view t({c0}); + auto result = cudf::row_bit_count(t); + + cudf::test::fixed_width_column_wrapper expected{408, 320}; + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); +} + +TEST_F(RowBitCount, SlicedColumnsStructs) +{ + auto const slice_size = 7; + + cudf::test::fixed_width_column_wrapper c0{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + std::vector strings{ + "banana", "metric", "imperial", "abc", "daïs", "", "fire", "def", "cudf", "xyzw"}; + cudf::test::strings_column_wrapper c1(strings.begin(), strings.end()); + + auto struct_col_unsliced = cudf::test::structs_column_wrapper({c0, c1}); + auto struct_col = cudf::slice(struct_col_unsliced, {3, 3 + slice_size}); + + table_view t({struct_col}); + auto result = cudf::row_bit_count(t); + + // expect 1 offset (4 bytes) + length of string per row + 1 int16_t per row + auto size_iter = cudf::detail::make_counting_transform_iterator(0, [&strings](int i) { + return (static_cast(strings[i].size()) + sizeof(offset_type) + sizeof(int16_t)) * + CHAR_BIT; + }); + cudf::test::fixed_width_column_wrapper expected(size_iter + 3, + size_iter + 3 + slice_size); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); +} + +TEST_F(RowBitCount, EmptyTable) +{ + { + cudf::table_view empty; + auto result = cudf::row_bit_count(empty); + CUDF_EXPECTS(result != nullptr && result->size() == 0, "Expected an empty column"); + } + + { + auto strings = cudf::strings::detail::make_empty_strings_column(0); + auto ints = cudf::make_empty_column(data_type{type_id::INT32}); + cudf::table_view empty({*strings, *ints}); + + auto result = cudf::row_bit_count(empty); + CUDF_EXPECTS(result != nullptr && result->size() == 0, "Expected an empty column"); + } +} \ No newline at end of file diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index cea66eced11..78a67464654 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -694,12 +694,13 @@ struct column_view_printer { get_nested_type_str(col) + (is_sliced ? "(sliced)" : "") + ":\n" + indent + "Length : " + std::to_string(lcv.size()) + "\n" + indent + "Offsets : " + (lcv.size() > 0 ? nested_offsets_to_string(lcv) : "") + "\n" + - (lcv.has_nulls() ? indent + "Null count: " + std::to_string(lcv.null_count()) + "\n" + - detail::to_string(bitmask_to_host(col), col.size(), indent) + "\n" - : "") + - indent + "Children :\n" + - (child.type().id() != type_id::LIST && child.has_nulls() - ? indent + detail::to_string(bitmask_to_host(child), child.size(), indent) + "\n" + (lcv.parent().nullable() + ? indent + "Null count: " + std::to_string(lcv.null_count()) + "\n" + + detail::to_string(bitmask_to_host(col), col.size(), indent) + "\n" + : "") + + // non-nested types don't typically display their null masks, so do it here for convenience. + (!is_nested(child.type()) && child.nullable() + ? " " + detail::to_string(bitmask_to_host(child), child.size(), indent) + "\n" : "") + (detail::to_string(child, ", ", indent + " ")) + "\n"; @@ -718,18 +719,25 @@ struct column_view_printer { out_stream << get_nested_type_str(col) << ":\n" << indent << "Length : " << view.size() << ":\n"; - if (view.has_nulls()) { + if (view.nullable()) { out_stream << indent << "Null count: " << view.null_count() << "\n" << detail::to_string(bitmask_to_host(col), col.size(), indent) << "\n"; } auto iter = thrust::make_counting_iterator(0); - std::transform(iter, - iter + view.num_children(), - std::ostream_iterator(out_stream, "\n"), - [&](size_type index) { - return detail::to_string(view.get_sliced_child(index), ", ", indent + " "); - }); + std::transform( + iter, + iter + view.num_children(), + std::ostream_iterator(out_stream, "\n"), + [&](size_type index) { + auto child = view.get_sliced_child(index); + + // non-nested types don't typically display their null masks, so do it here for convenience. + return (!is_nested(child.type()) && child.nullable() + ? " " + detail::to_string(bitmask_to_host(child), child.size(), indent) + "\n" + : "") + + detail::to_string(child, ", ", indent + " "); + }); out.push_back(out_stream.str()); } From 563edfa566e56073f57acc737f910da9c1de0246 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath <3190405+shwina@users.noreply.github.com> Date: Mon, 29 Mar 2021 20:11:20 -0400 Subject: [PATCH 15/20] Join APIs that return gathermaps (#7454) Closes #6480 # C++ changes ## TL;DR * Adds join APIs that accept join keys and return gathermaps * Return type is a `unique_ptr>>` (rather than a `unique_ptr`), to accommodate join results that can be larger than `INT32_MAX` rows * Simplifies previous join APIs to not accept arguments relating to "common columns" -- instead, those APIs always return all the columns from the LHS/RHS. Users wanting finer control can use the gathermap-based APIs ## The problem The work in this PR was motivated by the need for simpler join APIs that give the user more flexibility in how they want to construct the result of a join. To explain the current problem, consider the `inner_join` API: ```c++ std::unique_ptr inner_join( cudf::table_view const& left, cudf::table_view const& right, std::vector const& left_on, std::vector const& right_on, std::vector> const& columns_in_common, null_equality compare_nulls = null_equality::EQUAL, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); ``` In addition to the left and right tables (and corresponding keys), the API also accepts a `columns_in_common` argument. This is argument specifies pairs of columns from the LHS and RHS respectively, for which only a single column should appear in the result. That single column appears on the "left" side of the result. This makes the API somewhat complicated as well as inflexible. There is a "lower-level" join API that gives more control on which side the "common" columns should go, by providing an additional `common_columns_output_side` argument: ```c++ std::pair, std::unique_ptr> inner_join( cudf::table_view const& probe, std::vector const& probe_on, std::vector> const& columns_in_common, common_columns_output_side common_columns_output_side = common_columns_output_side::PROBE, null_equality compare_nulls = null_equality::EQUAL, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const; ``` But even that offers only limited flexibility: for example, it doesn't allow the user to specify an arbitrary ordering of result columns, or omit columns altogether from the result. ## Proposed API The proposed API in this PR is: ```c++ std::pair>, std::unique_ptr>> inner_join(cudf::table_view const& left_keys, cudf::table_view const& right_keys, null_equality compare_nulls = null_equality::EQUAL, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); ``` Note: * Rather than requiring the full left and right tables of the join, this API only needs the key columns from the left and right tables. * Rather than constructing the result of the join, this API returns the gathermaps which can be used to construct it. * For outer join, non-matches are represented by out-of-bound values in the gathermap. In conjunction with the `out_of_bounds_policy::NULLIFY` argument to `gather`, this will produce nulls in the appropriate locations of the result table. * The API returns a `std::unique_ptr>` rather than just `rmm::device_uvector` because of a Cython limitation that prevents wrapping functions whose return types do not provide a nullary (default) constructor. * The use of `rmm::device_uvector` allows the API to return results of size > `INT32_MAX`, which can occur easily in outer joins. # Python changes ## TL;DR * Add Cython bindings for the new C++ APIs * Rework join internals to interface with the new Cython APIs ## Changes/Improvements ### _Indexer One major change introduced in the join internals is the use of a new type `_Indexer` to represent a key column. Previously, join keys were represented by a numeric offset. This was for two reasons: * A join key could be either an index column or a data column, and the only way to refer to it unambiguously was by its offset -- a DataFrame can have an index column and a data column with the same name. * The C++ API required numeric offsets for the `left_on` and `right_on` arguments `_Indexer` provides a more convenient way to construct and represent join keys by allowing one to refer unambiguosly to an index or data column of a `Frame`: ``` # >>> df # a # b # 4 1 # 5 2 # 6 3 # >>> _Indexer("a", column=True).get(df) # returns column "a" of df # >>> _Indexer("b", index=True).get(df) # returns index level "b" of df ``` ### Casting logic Some of the casting logic has been simplified since we no longer need to post-process (cast) the result returned by libcudf. Previously, we were accounting for `"right"` joins in our casting functions. But, since a right join is implemented in terms of a left join with the operands reversed, it turns out we never really needed to handle right joins separately. I have removed that and it simplifies casting logic further. ### Others * Renamed `casting_logic.py` to `_join_helpers.py` and included other join utilities there. * Added a subclass of `Merge` for handling semi/anti joins * Added a `assert_join_results_equal` helper to compare join results between Pandas and cuDF. libcudf can return join results with arbitrary row ordering, and we weren't accounting for that in some of our tests previously. I'm a bit surprised we never ran into any test failures :) Authors: - Ashwin Srinath (@shwina) - Vyas Ramasubramani (@vyasr) Approvers: - Jake Hemstad (@jrhemstad) - Keith Kraus (@kkraus14) - Mike Wilson (@hyperbolic2346) - @brandon-b-miller - Mark Harris (@harrism) URL: https://github.com/rapidsai/cudf/pull/7454 --- cpp/benchmarks/join/join_benchmark.cu | 8 +- cpp/include/cudf/join.hpp | 440 ++++++---- cpp/include/cudf/table/table_view.hpp | 5 + cpp/src/copying/gather.cu | 4 +- cpp/src/join/hash_join.cu | 499 +++-------- cpp/src/join/hash_join.cuh | 143 ++-- cpp/src/join/join.cu | 339 +++++--- cpp/src/join/join_common_utils.hpp | 15 +- cpp/src/join/semi_join.cu | 194 +++-- cpp/tests/join/join_tests.cpp | 726 +++++++--------- cpp/tests/join/semi_join_tests.cpp | 807 +----------------- python/cudf/cudf/_lib/copying.pyx | 12 +- python/cudf/cudf/_lib/cpp/join.pxd | 56 +- .../cudf/cudf/_lib/cpp/table/table_view.pxd | 1 + python/cudf/cudf/_lib/join.pyx | 272 ++---- python/cudf/cudf/core/column/categorical.py | 3 + python/cudf/cudf/core/column/column.py | 16 +- python/cudf/cudf/core/column/numerical.py | 4 +- python/cudf/cudf/core/dataframe.py | 9 +- python/cudf/cudf/core/frame.py | 187 +--- python/cudf/cudf/core/index.py | 8 + python/cudf/cudf/core/join/__init__.py | 2 +- python/cudf/cudf/core/join/_join_helpers.py | 203 +++++ python/cudf/cudf/core/join/casting_logic.py | 207 ----- python/cudf/cudf/core/join/join.py | 638 +++++++------- python/cudf/cudf/core/multiindex.py | 22 +- python/cudf/cudf/core/series.py | 19 +- python/cudf/cudf/tests/test_joining.py | 189 ++-- python/cudf/cudf/tests/test_string.py | 40 +- 29 files changed, 1998 insertions(+), 3070 deletions(-) create mode 100644 python/cudf/cudf/core/join/_join_helpers.py delete mode 100644 python/cudf/cudf/core/join/casting_logic.py diff --git a/cpp/benchmarks/join/join_benchmark.cu b/cpp/benchmarks/join/join_benchmark.cu index bd013afc451..fa6afdd908c 100644 --- a/cpp/benchmarks/join/join_benchmark.cu +++ b/cpp/benchmarks/join/join_benchmark.cu @@ -105,12 +105,8 @@ static void BM_join(benchmark::State &state) for (auto _ : state) { cuda_event_timer raii(state, true, 0); - auto result = cudf::inner_join(probe_table, - build_table, - columns_to_join, - columns_to_join, - {{0, 0}}, - cudf::null_equality::UNEQUAL); + auto result = cudf::inner_join( + probe_table, build_table, columns_to_join, columns_to_join, cudf::null_equality::UNEQUAL); } } diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp index b2c1296ccef..fcc0bcd444e 100644 --- a/cpp/include/cudf/join.hpp +++ b/cpp/include/cudf/join.hpp @@ -20,6 +20,7 @@ #include #include +#include #include @@ -30,6 +31,44 @@ namespace cudf { * @file */ +/** + * @brief Returns a pair of row index vectors corresponding to an + * inner join between the specified tables. + * + * The first returned vector contains the row indices from the left + * table that have a match in the right table (in unspecified order). + * The corresponding values in the second returned vector are + * the matched row indices from the right table. + * + * @code{.pseudo} + * Left: {{0, 1, 2}} + * Right: {{1, 2, 3}} + * Result: {{1, 2}, {0, 1}} + * + * Left: {{0, 1, 2}, {3, 4, 5}} + * Right: {{1, 2, 3}, {4, 6, 7}} + * Result: {{1}, {0}} + * + * @throw cudf::logic_error if number of elements in `left_keys` or `right_keys` + * mismatch. + * + * @param[in] left_keys The left table + * @param[in] right_keys The right table + * @param[in] compare_nulls controls whether null join-key values + * should match or not. + * @param mr Device memory resource used to allocate the returned table and columns' device memory + * + * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct + * the result of performing an inner join between two tables with `left_keys` and `right_keys` + * as the join keys . + */ +std::pair>, + std::unique_ptr>> +inner_join(cudf::table_view const& left_keys, + cudf::table_view const& right_keys, + null_equality compare_nulls = null_equality::EQUAL, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Performs an inner join on the specified columns of two * tables (`left`, `right`) @@ -38,26 +77,13 @@ namespace cudf { * in the columns being joined on match. * * @code{.pseudo} - * Left a: {0, 1, 2} - * Right b: {1, 2, 3}, a: {1, 2, 5} + * Left: {{0, 1, 2}} + * Right: {{4, 9, 3}, {1, 2, 5}} * left_on: {0} * right_on: {1} - * columns_in_common: { {0, 1} } - * Result: { a: {1, 2}, b: {1, 2} } - * - * Left a: {0, 1, 2} - * Right b: {1, 2, 3}, c: {1, 2, 5} - * left_on: {0} - * right_on: {0} - * columns_in_common: { } - * Result: { a: {1, 2}, b: {1, 2}, c: {1, 2} } + * Result: {{1, 2}, {4, 9}, {1, 2}} * @endcode * - * @throw cudf::logic_error if `columns_in_common` contains a pair of indices - * (L, R) if L does not exist in `left_on` or R does not exist in `right_on`. - * @throw cudf::logic_error if `columns_in_common` contains a pair of indices - * (L, R) such that the location of `L` within `left_on` is not equal to - * location of R within `right_on` * @throw cudf::logic_error if number of elements in `left_on` or `right_on` * mismatch. * @throw cudf::logic_error if number of columns in either `left` or `right` @@ -73,59 +99,83 @@ namespace cudf { * @param[in] right_on The column indices from `right` to join on. * The column from `right` indicated by `right_on[i]` will be compared against the column * from `left` indicated by `left_on[i]`. - * @param[in] columns_in_common is a vector of pairs of column indices into - * `left` and `right`, respectively, that are "in common". For "common" - * columns, only a single output column will be produced, which is gathered - * from `left_on` columns. Else, for every column in `left_on` and `right_on`, - * an output column will be produced. For each of these pairs (L, R), L - * should exist in `left_on` and R should exist in `right_on`. * @param[in] compare_nulls controls whether null join-key values * should match or not. * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return Result of joining `left` and `right` tables on the columns - * specified by `left_on` and `right_on`. The resulting table will be joined columns of - * `left(including common columns)+right(excluding common columns)`. + * specified by `left_on` and `right_on`. */ std::unique_ptr inner_join( cudf::table_view const& left, cudf::table_view const& right, std::vector const& left_on, std::vector const& right_on, - std::vector> const& columns_in_common, null_equality compare_nulls = null_equality::EQUAL, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Returns a pair of row index vectors corresponding to a + * left join between the specified tables. + * + * The first returned vector contains all the row indices from the left + * table (in unspecified order). The corresponding value in the + * second returned vector is either (1) the row index of the matched row + * from the right table, if there is a match or (2) an unspecified + * out-of-bounds value. + * + * @code{.pseudo} + * Left: {{0, 1, 2}} + * Right: {{1, 2, 3}} + * Result: {{0, 1, 2}, {None, 0, 1}} + * + * Left: {{0, 1, 2}, {3, 4, 5}} + * Right: {{1, 2, 3}, {4, 6, 7}} + * Result: {{0, 1, 2}, {None, 0, None}} + * + * @throw cudf::logic_error if number of elements in `left_keys` or `right_keys` + * mismatch. + * + * @param[in] left_keys The left table + * @param[in] right_keys The right table + * @param[in] compare_nulls controls whether null join-key values + * should match or not. + * @param mr Device memory resource used to allocate the returned table and columns' device memory + * + * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct + * the result of performing a left join between two tables with `left_keys` and `right_keys` + * as the join keys . + */ +std::pair>, + std::unique_ptr>> +left_join(cudf::table_view const& left_keys, + cudf::table_view const& right_keys, + null_equality compare_nulls = null_equality::EQUAL, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Performs a left join (also known as left outer join) on the * specified columns of two tables (`left`, `right`) * - * Left Join returns all the rows from the left table and those rows from the + * Left join returns all the rows from the left table and those rows from the * right table that match on the joined columns. * For rows from the right table that do not have a match, the corresponding * values in the left columns will be null. * * @code{.pseudo} - * Left a: {0, 1, 2} - * Right b: {1, 2, 3}, a: {1 ,2 ,5} + * Left: {{0, 1, 2}} + * Right: {{1, 2, 3}, {1, 2 ,5}} * left_on: {0} * right_on: {1} - * columns_in_common: { {0, 1} } - * Result: { a: {0, 1, 2}, b: {NULL, 1, 2} } + * Result: { {0, 1, 2}, {NULL, 1, 2}, {NULL, 1, 2} } * - * Left a: {0, 1, 2} - * Right b: {1, 2, 3}, c: {1, 2, 5} + * Left: {{0, 1, 2}} + * Right {{1, 2, 3}, {1, 2, 5}} * left_on: {0} * right_on: {0} - * columns_in_common: { } - * Result: { a: {0, 1, 2}, b: {NULL, 1, 2}, c: {NULL, 1, 2} } + * Result: { {0, 1, 2}, {NULL, 1, 2}, {NULL, 1, 2} } * @endcode * - * @throw cudf::logic_error if `columns_in_common` contains a pair of indices - * (L, R) if L does not exist in `left_on` or R does not exist in `right_on`. - * @throw cudf::logic_error if `columns_in_common` contains a pair of indices - * (L, R) such that the location of `L` within `left_on` is not equal to - * location of R within `right_on` * @throw cudf::logic_error if number of elements in `left_on` or `right_on` * mismatch. * @throw cudf::logic_error if number of columns in either `left` or `right` @@ -141,29 +191,59 @@ std::unique_ptr inner_join( * @param[in] right_on The column indices from `right` to join on. * The column from `right` indicated by `right_on[i]` will be compared against the column * from `left` indicated by `left_on[i]`. - * @param[in] columns_in_common is a vector of pairs of column indices into - * `left` and `right`, respectively, that are "in common". For "common" - * columns, only a single output column will be produced, which is gathered - * from `left_on` columns. Else, for every column in `left_on` and `right_on`, - * an output column will be produced. For each of these pairs (L, R), L - * should exist in `left_on` and R should exist in `right_on`. * @param[in] compare_nulls controls whether null join-key values * should match or not. * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return Result of joining `left` and `right` tables on the columns - * specified by `left_on` and `right_on`. The resulting table will be joined columns of - * `left(including common columns)+right(excluding common columns)`. + * specified by `left_on` and `right_on`. */ std::unique_ptr left_join( cudf::table_view const& left, cudf::table_view const& right, std::vector const& left_on, std::vector const& right_on, - std::vector> const& columns_in_common, null_equality compare_nulls = null_equality::EQUAL, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Returns a pair of row index vectors corresponding to a + * full join between the specified tables. + * + * Taken pairwise, the values from the returned vectors are one of: + * (1) row indices corresponding to matching rows from the left and + * right tables, (2) a row index and an unspecified out-of-bounds value, + * representing a row from one table without a match in the other. + * + * @code{.pseudo} + * Left: {{0, 1, 2}} + * Right: {{1, 2, 3}} + * Result: {{0, 1, 2, None}, {None, 0, 1, 2}} + * + * Left: {{0, 1, 2}, {3, 4, 5}} + * Right: {{1, 2, 3}, {4, 6, 7}} + * Result: {{0, 1, 2, None, None}, {None, 0, None, 1, 2}} + * + * @throw cudf::logic_error if number of elements in `left_keys` or `right_keys` + * mismatch. + * + * @param[in] left The left table + * @param[in] right The right table + * @param[in] compare_nulls controls whether null join-key values + * should match or not. + * @param mr Device memory resource used to allocate the returned table and columns' device memory + * + * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct + * the result of performing a full join between two tables with `left_keys` and `right_keys` + * as the join keys . + */ +std::pair>, + std::unique_ptr>> +full_join(cudf::table_view const& left_keys, + cudf::table_view const& right_keys, + null_equality compare_nulls = null_equality::EQUAL, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Performs a full join (also known as full outer join) on the * specified columns of two tables (`left`, `right`) @@ -174,26 +254,19 @@ std::unique_ptr left_join( * values in the left columns will be null. * * @code{.pseudo} - * Left a: {0, 1, 2} - * Right b: {1, 2, 3}, c: {1, 2, 5} + * Left: {{0, 1, 2}} + * Right: {{1, 2, 3}, {1, 2, 5}} * left_on: {0} * right_on: {1} - * columns_in_common: { {0, 1} } - * Result: { a: {0, 1, 2, NULL}, b: {NULL, 1, 2, 3}, c: {NULL, 1, 2, 5} } + * Result: { {0, 1, 2, NULL}, {NULL, 1, 2, 3}, {NULL, 1, 2, 5} } * - * Left a: {0, 1, 2} - * Right b: {1, 2, 3}, c: {1, 2, 5} + * Left: {{0, 1, 2}} + * Right: {{1, 2, 3}, {1, 2, 5}} * left_on: {0} * right_on: {0} - * columns_in_common: { } - * Result: { a: {0, 1, 2, NULL}, b: {NULL, 1, 2, 3}, c: {NULL, 1, 2, 5} } + * Result: { {0, 1, 2, NULL}, {NULL, 1, 2, 3}, {NULL, 1, 2, 5} } * @endcode * - * @throw cudf::logic_error if `columns_in_common` contains a pair of indices - * (L, R) if L does not exist in `left_on` or R does not exist in `right_on`. - * @throw cudf::logic_error if `columns_in_common` contains a pair of indices - * (L, R) such that the location of `L` within `left_on` is not equal to - * location of R within `right_on` * @throw cudf::logic_error if number of elements in `left_on` or `right_on` * mismatch. * @throw cudf::logic_error if number of columns in either `left` or `right` @@ -209,28 +282,54 @@ std::unique_ptr left_join( * @param[in] right_on The column indices from `right` to join on. * The column from `right` indicated by `right_on[i]` will be compared against the column * from `left` indicated by `left_on[i]`. - * @param[in] columns_in_common is a vector of pairs of column indices into - * `left` and `right`, respectively, that are "in common". For "common" - * columns, only a single output column will be produced, which is gathered - * from `left_on` columns. Else, for every column in `left_on` and `right_on`, - * an output column will be produced. For each of these pairs (L, R), L - * should exist in `left_on` and R should exist in `right_on`. * @param[in] compare_nulls controls whether null join-key values * should match or not. * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return Result of joining `left` and `right` tables on the columns - * specified by `left_on` and `right_on`. The resulting table will be joined columns of - * `left(including common columns)+right(excluding common columns)`. + * specified by `left_on` and `right_on`. */ std::unique_ptr full_join( cudf::table_view const& left, cudf::table_view const& right, std::vector const& left_on, std::vector const& right_on, - std::vector> const& columns_in_common, null_equality compare_nulls = null_equality::EQUAL, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Returns a vector of row indices corresponding to a left semi join + * between the specified tables. + * + * The returned vector contains the row indices from the left table + * for which there is a matching row in the right table. + * + * @code{.pseudo} + * TableA: {{0, 1, 2}} + * TableB: {{1, 2, 3}} + * right_on: {1} + * Result: {1, 2} + * @endcode + * + * @throw cudf::logic_error if number of columns in either + * `left_keys` or `right_keys` table is 0 or exceeds MAX_JOIN_SIZE + * + * @param[in] left_keys The left table + * @param[in] right_keys The right table + * @param[in] compare_nulls controls whether null join-key values + * should match or not. + * @param mr Device memory resource used to allocate the returned table and columns' device memory + * + * @return A vector `left_indices` that can be used to construct + * the result of performing a left semi join between two tables with + * `left_keys` and `right_keys` as the join keys . + */ +std::unique_ptr> left_semi_join( + cudf::table_view const& left_keys, + cudf::table_view const& right_keys, + null_equality compare_nulls = null_equality::EQUAL, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Performs a left semi join on the specified columns of two * tables (`left`, `right`) @@ -239,24 +338,20 @@ std::unique_ptr full_join( * returns rows that exist in the right table. * * @code{.pseudo} - * TableA a: {0, 1, 2} - * TableB b: {1, 2, 3}, a: {1, 2, 5} + * TableA: {{0, 1, 2}} + * TableB: {{1, 2, 3}, {1, 2, 5}} * left_on: {0} * right_on: {1} - * return_columns: { 0 } - * Result: { a: {1, 2} } + * Result: { {1, 2} } * - * TableA a: {0, 1, 2}, c: {1, 2, 5} - * TableB b: {1, 2, 3} + * TableA {{0, 1, 2}, {1, 2, 5}} + * TableB {{1, 2, 3}} * left_on: {0} * right_on: {0} - * return_columns: { 1 } - * Result: { c: {1, 2} } + * Result: { {1, 2}, {2, 5} } * @endcode * - * @throw cudf::logic_error if the number of columns in either `left` or `right` table is 0 - * @throw cudf::logic_error if the number of returned columns is 0 - * @throw cudf::logic_error if the number of elements in `left_on` and `right_on` are not equal + * @throw cudf::logic_error if the number of columns in either `left_keys` or `right_keys` is 0 * * @param[in] left The left table * @param[in] right The right table @@ -268,22 +363,49 @@ std::unique_ptr full_join( * The column from `right` indicated by `right_on[i]` * will be compared against the column from `left` * indicated by `left_on[i]`. - * @param[in] return_columns A vector of column indices from `left` to - * include in the returned table. * @param[in] compare_nulls Controls whether null join-key values should match or not. * @param[in] mr Device memory resource used to allocate the returned table's * device memory * * @return Result of joining `left` and `right` tables on the columns - * specified by `left_on` and `right_on`. The resulting table - * will contain `return_columns` from `left` that match in right. + * specified by `left_on` and `right_on`. */ std::unique_ptr left_semi_join( cudf::table_view const& left, cudf::table_view const& right, std::vector const& left_on, std::vector const& right_on, - std::vector const& return_columns, + null_equality compare_nulls = null_equality::EQUAL, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Returns a vector of row indices corresponding to a left anti join + * between the specified tables. + * + * The returned vector contains the row indices from the left table + * for which there is no matching row in the right table. + * + * @code{.pseudo} + * TableA: {{0, 1, 2}} + * TableB: {{1, 2, 3}} + * Result: {0} + * @endcode + * + * @throw cudf::logic_error if the number of columns in either `left_keys` or `right_keys` is 0 + * + * @param[in] left_keys The left table + * @param[in] right_keys The right table + * @param[in] compare_nulls controls whether null join-key values + * should match or not. + * @param mr Device memory resource used to allocate the returned table and columns' device memory + * + * @return A column `left_indices` that can be used to construct + * the result of performing a left anti join between two tables with + * `left_keys` and `right_keys` as the join keys . + */ +std::unique_ptr> left_anti_join( + cudf::table_view const& left_keys, + cudf::table_view const& right_keys, null_equality compare_nulls = null_equality::EQUAL, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -295,24 +417,23 @@ std::unique_ptr left_semi_join( * returns rows that do not exist in the right table. * * @code{.pseudo} - * TableA a: {0, 1, 2} - * TableB b: {1, 2, 3}, a: {1, 2, 5} + * TableA: {{0, 1, 2}} + * TableB: {{1, 2, 3}, {1, 2, 5}} * left_on: {0} * right_on: {1} - * return_columns: { 0 } - * Result: { a: {0} } + * Result: {{0}, {1}} * - * TableA a: {0, 1, 2}, c: {1, 2, 5} - * TableB b: {1, 2, 3} + * TableA: {{0, 1, 2}, {1, 2, 5}} + * TableB: {{1, 2, 3}} * left_on: {0} * right_on: {0} - * return_columns: { 1 } - * Result: { c: {1} } + * Result: { {0} {1} } * @endcode * - * @throw cudf::logic_error if the number of columns in either `left` or `right` table is 0 - * @throw cudf::logic_error if the number of returned columns is 0 - * @throw cudf::logic_error if the number of elements in `left_on` and `right_on` are not equal + * @throw cudf::logic_error if number of elements in `left_on` or `right_on` + * mismatch. + * @throw cudf::logic_error if number of columns in either `left` or `right` + * table is 0 or exceeds MAX_JOIN_SIZE * * @param[in] left The left table * @param[in] right The right table @@ -324,22 +445,18 @@ std::unique_ptr left_semi_join( * The column from `right` indicated by `right_on[i]` * will be compared against the column from `left` * indicated by `left_on[i]`. - * @param[in] return_columns A vector of column indices from `left` to - * include in the returned table. * @param[in] compare_nulls Controls whether null join-key values should match or not. * @param[in] mr Device memory resource used to allocate the returned table's * device memory * * @return Result of joining `left` and `right` tables on the columns - * specified by `left_on` and `right_on`. The resulting table - * will contain `return_columns` from `left` that match in right. + * specified by `left_on` and `right_on`. */ std::unique_ptr left_anti_join( cudf::table_view const& left, cudf::table_view const& right, std::vector const& left_on, std::vector const& right_on, - std::vector const& return_columns, null_equality compare_nulls = null_equality::EQUAL, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -393,128 +510,75 @@ class hash_join { * undefined. * * @param build The build table, from which the hash table is built. - * @param build_on The column indices from `build` to join on. * @param compare_nulls Controls whether null join-key values should match or not. * @param stream CUDA stream used for device memory operations and kernel launches */ hash_join(cudf::table_view const& build, - std::vector const& build_on, null_equality compare_nulls, rmm::cuda_stream_view stream = rmm::cuda_stream_default); /** - * @brief Controls where common columns will be output for a inner join. - */ - enum class common_columns_output_side { - PROBE, ///< Common columns is output in the probe portion of the table pair returned by - ///< `inner_join`. - BUILD ///< Common columns is output in the build portion of the table pair returned by - ///< `inner_join`. - }; - - /** - * @brief Performs an inner join by probing in the internal hash table. - * - * Given that it is sometimes desired to choose the small table to be the `build` side for an - * inner join,a (`probe`, `build`) table pair, which contains the probe and build portions of the - * logical joined table respectively, is returned so that caller can freely rearrange them to - * restore the logical `left` `right` order. This introduces some extra logic about where "common" - * columns should go, i.e. the legacy `cudf::inner_join()` API always outputs "common" columns in - * the `left` portion and the corresponding columns in the `right` portion are omitted. To better - * align with the legacy `cudf::inner_join()` API, a `common_columns_output_side` parameter is - * introduced to specify whether "common" columns should go in `probe` or `build` portion. - * - * More details please @see cudf::inner_join(). + * Returns the row indices that can be used to construct the result of performing + * an inner join between two tables. @see cudf::inner_join(). * * @param probe The probe table, from which the tuples are probed. - * @param probe_on The column indices from `probe` to join on. - * @param columns_in_common is a vector of pairs of column indices into - * `probe` and `build`, respectively, that are "in common". For "common" - * columns, only a single output column will be produced, which is gathered - * from `probe_on` columns or `build_on` columns if `probe_output_side` is LEFT or RIGHT. - * Else, for every column in `probe_on` and `build_on`, - * an output column will be produced. For each of these pairs (P, B), P - * should exist in `probe_on` and B should exist in `build_on`. - * @param common_columns_output_side @see `common_columns_output_side`. * @param compare_nulls Controls whether null join-key values should match or not. * @param mr Device memory resource used to allocate the returned table and columns' device * memory. * @param stream CUDA stream used for device memory operations and kernel launches * - * @return Table pair of (`probe`, `build`) of joining both tables on the columns - * specified by `probe_on` and `build_on`. The resulting table pair will be joined columns of - * (`probe(including common columns)`, `build(excluding common columns)`) if - * `common_columns_output_side` is `PROBE`, or (`probe(excluding common columns)`, - * `build(including common columns)`) if `common_columns_output_side` is `BUILD`. + * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct + * the result of performing an inner join between two tables with `build` and `probe` + * as the the join keys . */ - std::pair, std::unique_ptr> inner_join( - cudf::table_view const& probe, - std::vector const& probe_on, - std::vector> const& columns_in_common, - common_columns_output_side common_columns_output_side = common_columns_output_side::PROBE, - null_equality compare_nulls = null_equality::EQUAL, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const; + std::pair>, + std::unique_ptr>> + inner_join(cudf::table_view const& probe, + null_equality compare_nulls = null_equality::EQUAL, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const; /** - * @brief Performs a left join by probing in the internal hash table. - * - * More details please @see cudf::left_join(). + * Returns the row indices that can be used to construct the result of performing + * a left join between two tables. @see cudf::left_join(). * * @param probe The probe table, from which the tuples are probed. - * @param probe_on The column indices from `probe` to join on. - * @param columns_in_common is a vector of pairs of column indices into - * `probe` and `build`, respectively, that are "in common". For "common" - * columns, only a single output column will be produced, which is gathered - * from `probe_on` columns. Else, for every column in `probe_on` and `build_on`, - * an output column will be produced. For each of these pairs (P, B), P - * should exist in `probe_on` and B should exist in `build_on`. * @param compare_nulls Controls whether null join-key values should match or not. * @param mr Device memory resource used to allocate the returned table and columns' device * memory. * @param stream CUDA stream used for device memory operations and kernel launches * - * @return Result of joining `build` and `probe` tables on the columns - * specified by `build_on` and `probe_on`. The resulting table will be joined columns of - * `probe(including common columns)+build(excluding common columns)`. + * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct + * the result of performing a left join between two tables with `build` and `probe` + * as the the join keys . */ - std::unique_ptr left_join( - cudf::table_view const& probe, - std::vector const& probe_on, - std::vector> const& columns_in_common, - null_equality compare_nulls = null_equality::EQUAL, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const; + std::pair>, + std::unique_ptr>> + left_join(cudf::table_view const& probe, + null_equality compare_nulls = null_equality::EQUAL, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const; /** - * @brief Performs a full join by probing in the internal hash table. - * - * More details please @see cudf::full_join(). + * Returns the row indices that can be used to construct the result of performing + * a full join between two tables. @see cudf::full_join(). * * @param probe The probe table, from which the tuples are probed. - * @param probe_on The column indices from `probe` to join on. - * @param columns_in_common is a vector of pairs of column indices into - * `probe` and `build`, respectively, that are "in common". For "common" - * columns, only a single output column will be produced, which is gathered - * from `probe_on` columns. Else, for every column in `probe_on` and `build_on`, - * an output column will be produced. For each of these pairs (P, B), P - * should exist in `probe_on` and B should exist in `build_on`. * @param compare_nulls Controls whether null join-key values should match or not. * @param mr Device memory resource used to allocate the returned table and columns' device * memory. * @param stream CUDA stream used for device memory operations and kernel launches * - * @return Result of joining `build` and `probe` tables on the columns - * specified by `build_on` and `probe_on`. The resulting table will be joined columns of - * `probe(including common columns)+build(excluding common columns)`. + * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct + * the result of performing a full join between two tables with `build` and `probe` + * as the the join keys . */ - std::unique_ptr full_join( - cudf::table_view const& probe, - std::vector const& probe_on, - std::vector> const& columns_in_common, - null_equality compare_nulls = null_equality::EQUAL, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const; + std::pair>, + std::unique_ptr>> + full_join(cudf::table_view const& probe, + null_equality compare_nulls = null_equality::EQUAL, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const; private: struct hash_join_impl; diff --git a/cpp/include/cudf/table/table_view.hpp b/cpp/include/cudf/table/table_view.hpp index 5cdecab9115..a225e590f9a 100644 --- a/cpp/include/cudf/table/table_view.hpp +++ b/cpp/include/cudf/table/table_view.hpp @@ -126,6 +126,11 @@ class table_view_base { */ size_type num_rows() const noexcept { return _num_rows; } + /** + * @brief Returns true if `num_columns()` returns zero, or false otherwise + */ + size_type is_empty() const noexcept { return num_columns() == 0; } + table_view_base() = default; ~table_view_base() = default; diff --git a/cpp/src/copying/gather.cu b/cpp/src/copying/gather.cu index dc153e9395d..181752d18e8 100644 --- a/cpp/src/copying/gather.cu +++ b/cpp/src/copying/gather.cu @@ -43,9 +43,7 @@ std::unique_ptr
gather(table_view const& source_table, if (neg_indices == negative_index_policy::ALLOWED) { cudf::size_type n_rows = source_table.num_rows(); - auto idx_converter = [n_rows] __device__(size_type in) { - return ((in % n_rows) + n_rows) % n_rows; - }; + auto idx_converter = [n_rows] __device__(size_type in) { return in < 0 ? in + n_rows : in; }; return gather(source_table, thrust::make_transform_iterator(map_begin, idx_converter), thrust::make_transform_iterator(map_end, idx_converter), diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index d827d03a6c0..5a6ad8892de 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include #include #include @@ -20,93 +21,44 @@ #include #include -#include +#include #include +#include #include namespace cudf { namespace detail { -/** - * @brief Returns a vector with non-common indices which is set difference - * between `[0, num_columns)` and index values in common_column_indices - * - * @param num_columns The number of columns, which represents column indices - * from `[0, num_columns)` in a table - * @param common_column_indices A vector of common indices which needs to be - * excluded from `[0, num_columns)` - * - * @return vector A vector containing only the indices which are not present in - * `common_column_indices` - */ -auto non_common_column_indices(size_type num_columns, - std::vector const &common_column_indices) -{ - CUDF_EXPECTS(common_column_indices.size() <= static_cast(num_columns), - "Too many columns in common"); - std::vector all_column_indices(num_columns); - std::iota(std::begin(all_column_indices), std::end(all_column_indices), 0); - std::vector sorted_common_column_indices{common_column_indices}; - std::sort(std::begin(sorted_common_column_indices), std::end(sorted_common_column_indices)); - std::vector non_common_column_indices(num_columns - common_column_indices.size()); - std::set_difference(std::cbegin(all_column_indices), - std::cend(all_column_indices), - std::cbegin(sorted_common_column_indices), - std::cend(sorted_common_column_indices), - std::begin(non_common_column_indices)); - return non_common_column_indices; -} - std::pair, std::unique_ptr
> get_empty_joined_table( - table_view const &probe, - table_view const &build, - std::vector> const &columns_in_common, - cudf::hash_join::common_columns_output_side common_columns_output_side) + table_view const &probe, table_view const &build) { - std::vector columns_to_exclude(columns_in_common.size()); - std::transform(columns_in_common.begin(), - columns_in_common.end(), - columns_to_exclude.begin(), - [common_columns_output_side](auto &col) { - return common_columns_output_side == hash_join::common_columns_output_side::PROBE - ? col.second - : col.first; - }); - std::vector non_common_indices = non_common_column_indices( - common_columns_output_side == hash_join::common_columns_output_side::PROBE - ? build.num_columns() - : probe.num_columns(), - columns_to_exclude); std::unique_ptr
empty_probe = empty_like(probe); std::unique_ptr
empty_build = empty_like(build); - if (common_columns_output_side == hash_join::common_columns_output_side::PROBE) { - table_view empty_build_view = empty_build->select(non_common_indices); - empty_build = std::make_unique
(empty_build_view); - } else { - table_view empty_probe_view = empty_probe->select(non_common_indices); - empty_probe = std::make_unique
(empty_probe_view); - } return std::make_pair(std::move(empty_probe), std::move(empty_build)); } -VectorPair concatenate_vector_pairs(VectorPair &a, VectorPair &b) +VectorPair concatenate_vector_pairs(VectorPair &a, VectorPair &b, rmm::cuda_stream_view stream) { - CUDF_EXPECTS((a.first.size() == a.second.size()), + CUDF_EXPECTS((a.first->size() == a.second->size()), "Mismatch between sizes of vectors in vector pair"); - CUDF_EXPECTS((b.first.size() == b.second.size()), + CUDF_EXPECTS((b.first->size() == b.second->size()), "Mismatch between sizes of vectors in vector pair"); - if (a.first.empty()) { - return b; - } else if (b.first.empty()) { - return a; + if (a.first->is_empty()) { + return std::move(b); + } else if (b.first->is_empty()) { + return std::move(a); } - auto original_size = a.first.size(); - a.first.resize(a.first.size() + b.first.size()); - a.second.resize(a.second.size() + b.second.size()); - thrust::copy(b.first.begin(), b.first.end(), a.first.begin() + original_size); - thrust::copy(b.second.begin(), b.second.end(), a.second.begin() + original_size); - return a; + auto original_size = a.first->size(); + a.first->resize(a.first->size() + b.first->size(), stream); + a.second->resize(a.second->size() + b.second->size(), stream); + thrust::copy( + rmm::exec_policy(stream), b.first->begin(), b.first->end(), a.first->begin() + original_size); + thrust::copy(rmm::exec_policy(stream), + b.second->begin(), + b.second->end(), + a.second->begin() + original_size); + return std::move(a); } template @@ -133,16 +85,20 @@ struct valid_range { * * @return Pair of vectors containing the left join indices complement */ -std::pair, rmm::device_vector> -get_left_join_indices_complement(rmm::device_vector &right_indices, - size_type left_table_row_count, - size_type right_table_row_count, - rmm::cuda_stream_view stream) +std::pair>, + std::unique_ptr>> +get_left_join_indices_complement( + std::unique_ptr> &right_indices, + size_type left_table_row_count, + size_type right_table_row_count, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) { // Get array of indices that do not appear in right_indices // Vector allocated for unmatched result - rmm::device_vector right_indices_complement(right_table_row_count); + auto right_indices_complement = + std::make_unique>(right_table_row_count, stream); // If left table is empty in a full join call then all rows of the right table // should be represented in the joined indices. This is an optimization since @@ -151,12 +107,16 @@ get_left_join_indices_complement(rmm::device_vector &right_indices, // produce exactly the same result as the else path but will be faster. if (left_table_row_count == 0) { thrust::sequence(rmm::exec_policy(stream), - right_indices_complement.begin(), - right_indices_complement.end(), + right_indices_complement->begin(), + right_indices_complement->end(), 0); } else { // Assume all the indices in invalid_index_map are invalid - rmm::device_vector invalid_index_map(right_table_row_count, 1); + auto invalid_index_map = + std::make_unique>(right_table_row_count, stream); + thrust::uninitialized_fill( + rmm::exec_policy(stream), invalid_index_map->begin(), invalid_index_map->end(), int32_t{1}); + // Functor to check for index validity since left joins can create invalid indices valid_range valid(0, right_table_row_count); @@ -164,11 +124,11 @@ get_left_join_indices_complement(rmm::device_vector &right_indices, // Thus specifying that those locations are valid thrust::scatter_if(rmm::exec_policy(stream), thrust::make_constant_iterator(0), - thrust::make_constant_iterator(0) + right_indices.size(), - right_indices.begin(), // Index locations - right_indices.begin(), // Stencil - Check if index location is valid - invalid_index_map.begin(), // Output indices - valid); // Stencil Predicate + thrust::make_constant_iterator(0) + right_indices->size(), + right_indices->begin(), // Index locations + right_indices->begin(), // Stencil - Check if index location is valid + invalid_index_map->begin(), // Output indices + valid); // Stencil Predicate size_type begin_counter = static_cast(0); size_type end_counter = static_cast(right_table_row_count); @@ -176,15 +136,19 @@ get_left_join_indices_complement(rmm::device_vector &right_indices, size_type indices_count = thrust::copy_if(rmm::exec_policy(stream), thrust::make_counting_iterator(begin_counter), thrust::make_counting_iterator(end_counter), - invalid_index_map.begin(), - right_indices_complement.begin(), + invalid_index_map->begin(), + right_indices_complement->begin(), thrust::identity()) - - right_indices_complement.begin(); - right_indices_complement.resize(indices_count); + right_indices_complement->begin(); + right_indices_complement->resize(indices_count, stream); } - rmm::device_vector left_invalid_indices(right_indices_complement.size(), - JoinNoneValue); + auto left_invalid_indices = + std::make_unique>(right_indices_complement->size(), stream); + thrust::fill(rmm::exec_policy(stream), + left_invalid_indices->begin(), + left_invalid_indices->end(), + JoinNoneValue); return std::make_pair(std::move(left_invalid_indices), std::move(right_indices_complement)); } @@ -195,8 +159,6 @@ get_left_join_indices_complement(rmm::device_vector &right_indices, * @throw cudf::logic_error if the number of columns in `build` table is 0. * @throw cudf::logic_error if the number of rows in `build` table is 0. * @throw cudf::logic_error if insertion to the hash table fails. - * @throw std::out_of_range if elements of `build_on` exceed the number of columns in the `build` - * table. * * @param build Table of columns used to build join hash. * @param compare_nulls Controls whether null join-key values should match or not. @@ -256,19 +218,22 @@ std::unique_ptr> build_join_ * @return Join output indices vector pair. */ template -std::pair, rmm::device_vector> probe_join_hash_table( - cudf::table_device_view build_table, - cudf::table_device_view probe_table, - multimap_type const &hash_table, - null_equality compare_nulls, - rmm::cuda_stream_view stream) +std::pair>, + std::unique_ptr>> +probe_join_hash_table(cudf::table_device_view build_table, + cudf::table_device_view probe_table, + multimap_type const &hash_table, + null_equality compare_nulls, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { size_type estimated_size = estimate_join_output_size( build_table, probe_table, hash_table, compare_nulls, stream); // If the estimated output size is zero, return immediately if (estimated_size == 0) { - return std::make_pair(rmm::device_vector{}, rmm::device_vector{}); + return std::make_pair(std::make_unique>(0, stream, mr), + std::make_unique>(0, stream, mr)); } // Because we are approximating the number of joined elements, our approximation @@ -278,12 +243,13 @@ std::pair, rmm::device_vector> probe_jo rmm::device_scalar write_index(0, stream); size_type join_size{0}; - rmm::device_vector left_indices; - rmm::device_vector right_indices; + auto left_indices = std::make_unique>(0, stream, mr); + auto right_indices = std::make_unique>(0, stream, mr); + auto current_estimated_size = estimated_size; do { - left_indices.resize(estimated_size); - right_indices.resize(estimated_size); + left_indices->resize(estimated_size, stream); + right_indices->resize(estimated_size, stream); constexpr int block_size{DEFAULT_JOIN_BLOCK_SIZE}; detail::grid_1d config(probe_table.num_rows(), block_size); @@ -298,8 +264,8 @@ std::pair, rmm::device_vector> probe_jo probe_table, hash_probe, equality, - left_indices.data().get(), - right_indices.data().get(), + left_indices->data(), + right_indices->data(), write_index.data(), estimated_size); @@ -310,179 +276,11 @@ std::pair, rmm::device_vector> probe_jo estimated_size *= 2; } while ((current_estimated_size < join_size)); - left_indices.resize(join_size); - right_indices.resize(join_size); + left_indices->resize(join_size, stream); + right_indices->resize(join_size, stream); return std::make_pair(std::move(left_indices), std::move(right_indices)); } -/** - * @brief Combines the non common probe, common probe, non common build and common build - * columns in the correct order according to `common_columns_output_side` to form the joined - * (`probe`, `build`) table pair. - * - * @param probe_noncommon_cols Columns obtained by gathering non common probe columns. - * @param probe_noncommon_col_indices Output locations of non common probe columns in the probe - * portion. - * @param probe_common_col_indices Output locations of common probe columns in the probe portion. - * @param build_noncommon_cols Columns obtained by gathering non common build columns. - * @param build_noncommon_col_indices Output locations of non common build columns in the build - * portion. - * @param build_common_col_indices Output locations of common build columns in the build portion. - * @param common_cols Columns obtained by gathering common columns from `probe` and `build` tables - * in the build portion. - * @param common_columns_output_side @see cudf::hash_join::common_columns_output_side. - * - * @return Table pair of (`probe`, `build`). - */ -std::pair, std::unique_ptr
> combine_join_columns( - std::vector> &&probe_noncommon_cols, - std::vector const &probe_noncommon_col_indices, - std::vector const &probe_common_col_indices, - std::vector> &&build_noncommon_cols, - std::vector const &build_noncommon_col_indices, - std::vector const &build_common_col_indices, - std::vector> &&common_cols, - cudf::hash_join::common_columns_output_side common_columns_output_side) -{ - if (common_columns_output_side == cudf::hash_join::common_columns_output_side::PROBE) { - std::vector> probe_cols(probe_noncommon_cols.size() + - common_cols.size()); - for (size_t i = 0; i < probe_noncommon_cols.size(); ++i) { - probe_cols.at(probe_noncommon_col_indices.at(i)) = std::move(probe_noncommon_cols.at(i)); - } - for (size_t i = 0; i < common_cols.size(); ++i) { - probe_cols.at(probe_common_col_indices.at(i)) = std::move(common_cols.at(i)); - } - return std::make_pair(std::make_unique(std::move(probe_cols)), - std::make_unique(std::move(build_noncommon_cols))); - } else { - std::vector> build_cols(build_noncommon_cols.size() + - common_cols.size()); - for (size_t i = 0; i < build_noncommon_cols.size(); ++i) { - build_cols.at(build_noncommon_col_indices.at(i)) = std::move(build_noncommon_cols.at(i)); - } - for (size_t i = 0; i < common_cols.size(); ++i) { - build_cols.at(build_common_col_indices.at(i)) = std::move(common_cols.at(i)); - } - return std::make_pair(std::make_unique(std::move(probe_noncommon_cols)), - std::make_unique(std::move(build_cols))); - } -} - -/** - * @brief Gathers rows from `probe` and `build` table and returns a (`probe`, `build`) table pair, - * which contains the probe and build portions of the logical joined table respectively. - * - * @tparam JoinKind The type of join to be performed - * - * @param probe Probe side table - * @param build build side table - * @param joined_indices Pair of vectors containing row indices from which - * `probe` and `build` tables are gathered. If any row index is out of bounds, - * the contribution in the output `table` will be NULL. - * @param columns_in_common is a vector of pairs of column indices - * from tables `probe` and `build` respectively, that are "in common". - * For "common" columns, only a single output column will be produced. - * For an inner or left join, the result will be gathered from the column in - * `probe`. For a full join, the result will be gathered from both common - * columns in `probe` and `build` and concatenated to form a single column. - * @param common_columns_output_side @see cudf::hash_join::common_columns_output_side. - * - * @return Table pair of (`probe`, `build`) containing the rows from `probe` and - * `build` specified by `joined_indices`. - * Columns in `columns_in_common` will be included in either `probe` or `build` portion as - * `common_columns_output_side` indicates. Final form would look like - * (`probe(including common columns)`, `build(excluding common columns)`) if - * `common_columns_output_side` is `PROBE`, or (`probe(excluding common columns)`, - * `build(including common columns)`) if `common_columns_output_side` is `BUILD`. - */ -template -std::pair, std::unique_ptr
> construct_join_output_df( - table_view const &probe, - table_view const &build, - VectorPair &joined_indices, - std::vector> const &columns_in_common, - cudf::hash_join::common_columns_output_side common_columns_output_side, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) -{ - std::vector probe_common_col; - probe_common_col.reserve(columns_in_common.size()); - std::vector build_common_col; - build_common_col.reserve(columns_in_common.size()); - for (const auto &c : columns_in_common) { - probe_common_col.push_back(c.first); - build_common_col.push_back(c.second); - } - std::vector probe_noncommon_col = - non_common_column_indices(probe.num_columns(), probe_common_col); - std::vector build_noncommon_col = - non_common_column_indices(build.num_columns(), build_common_col); - - out_of_bounds_policy const bounds_policy = JoinKind != join_kind::INNER_JOIN - ? out_of_bounds_policy::NULLIFY - : out_of_bounds_policy::DONT_CHECK; - - std::unique_ptr
common_table = std::make_unique
(); - // Construct the joined columns - if (join_kind::FULL_JOIN == JoinKind) { - auto complement_indices = get_left_join_indices_complement( - joined_indices.second, probe.num_rows(), build.num_rows(), stream); - if (not columns_in_common.empty()) { - auto common_from_build = detail::gather(build.select(build_common_col), - complement_indices.second.begin(), - complement_indices.second.end(), - bounds_policy, - stream, - rmm::mr::get_current_device_resource()); - auto common_from_probe = detail::gather(probe.select(probe_common_col), - joined_indices.first.begin(), - joined_indices.first.end(), - bounds_policy, - stream, - rmm::mr::get_current_device_resource()); - common_table = cudf::detail::concatenate( - std::vector({common_from_build->view(), common_from_probe->view()}), - stream, - mr); - } - joined_indices = concatenate_vector_pairs(complement_indices, joined_indices); - } else { - if (not columns_in_common.empty()) { - common_table = detail::gather(probe.select(probe_common_col), - joined_indices.first.begin(), - joined_indices.first.end(), - bounds_policy, - stream, - mr); - } - } - - // Construct the probe non common columns - std::unique_ptr
probe_table = detail::gather(probe.select(probe_noncommon_col), - joined_indices.first.begin(), - joined_indices.first.end(), - bounds_policy, - stream, - mr); - - std::unique_ptr
build_table = detail::gather(build.select(build_noncommon_col), - joined_indices.second.begin(), - joined_indices.second.end(), - bounds_policy, - stream, - mr); - - return combine_join_columns(probe_table->release(), - probe_noncommon_col, - probe_common_col, - build_table->release(), - build_noncommon_col, - build_common_col, - common_table->release(), - common_columns_output_side); -} - std::unique_ptr combine_table_pair(std::unique_ptr &&left, std::unique_ptr &&right) { @@ -499,147 +297,112 @@ std::unique_ptr combine_table_pair(std::unique_ptr &&l hash_join::hash_join_impl::~hash_join_impl() = default; hash_join::hash_join_impl::hash_join_impl(cudf::table_view const &build, - std::vector const &build_on, null_equality compare_nulls, rmm::cuda_stream_view stream) - : _build(build), - _build_selected(build.select(build_on)), - _build_on(build_on), - _hash_table(nullptr) + : _build(build), _hash_table(nullptr) { CUDF_FUNC_RANGE(); CUDF_EXPECTS(0 != _build.num_columns(), "Hash join build table is empty"); CUDF_EXPECTS(_build.num_rows() < cudf::detail::MAX_JOIN_SIZE, "Build column size is too big for hash join"); - if (_build_on.empty() || 0 == build.num_rows()) { return; } + if (0 == build.num_rows()) { return; } - _hash_table = build_join_hash_table(_build_selected, compare_nulls, stream); + _hash_table = build_join_hash_table(_build, compare_nulls, stream); } -std::pair, std::unique_ptr> -hash_join::hash_join_impl::inner_join( - cudf::table_view const &probe, - std::vector const &probe_on, - std::vector> const &columns_in_common, - common_columns_output_side common_columns_output_side, - null_equality compare_nulls, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) const +std::pair>, + std::unique_ptr>> +hash_join::hash_join_impl::inner_join(cudf::table_view const &probe, + null_equality compare_nulls, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) const { CUDF_FUNC_RANGE(); - return compute_hash_join( - probe, probe_on, columns_in_common, common_columns_output_side, compare_nulls, stream, mr); + return compute_hash_join(probe, compare_nulls, stream, mr); } -std::unique_ptr hash_join::hash_join_impl::left_join( - cudf::table_view const &probe, - std::vector const &probe_on, - std::vector> const &columns_in_common, - null_equality compare_nulls, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) const +std::pair>, + std::unique_ptr>> +hash_join::hash_join_impl::left_join(cudf::table_view const &probe, + null_equality compare_nulls, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) const { CUDF_FUNC_RANGE(); - auto probe_build_pair = - compute_hash_join(probe, - probe_on, - columns_in_common, - common_columns_output_side::PROBE, - compare_nulls, - stream, - mr); - return cudf::detail::combine_table_pair(std::move(probe_build_pair.first), - std::move(probe_build_pair.second)); + return compute_hash_join(probe, compare_nulls, stream, mr); } -std::unique_ptr hash_join::hash_join_impl::full_join( - cudf::table_view const &probe, - std::vector const &probe_on, - std::vector> const &columns_in_common, - null_equality compare_nulls, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) const +std::pair>, + std::unique_ptr>> +hash_join::hash_join_impl::full_join(cudf::table_view const &probe, + null_equality compare_nulls, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) const { CUDF_FUNC_RANGE(); - auto probe_build_pair = - compute_hash_join(probe, - probe_on, - columns_in_common, - common_columns_output_side::PROBE, - compare_nulls, - stream, - mr); - return cudf::detail::combine_table_pair(std::move(probe_build_pair.first), - std::move(probe_build_pair.second)); + return compute_hash_join(probe, compare_nulls, stream, mr); } template -std::pair, std::unique_ptr> -hash_join::hash_join_impl::compute_hash_join( - cudf::table_view const &probe, - std::vector const &probe_on, - std::vector> const &columns_in_common, - common_columns_output_side common_columns_output_side, - null_equality compare_nulls, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) const +std::pair>, + std::unique_ptr>> +hash_join::hash_join_impl::compute_hash_join(cudf::table_view const &probe, + null_equality compare_nulls, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) const { CUDF_EXPECTS(0 != probe.num_columns(), "Hash join probe table is empty"); CUDF_EXPECTS(probe.num_rows() < cudf::detail::MAX_JOIN_SIZE, "Probe column size is too big for hash join"); - CUDF_EXPECTS(_build_on.size() == probe_on.size(), + CUDF_EXPECTS(_build.num_columns() == probe.num_columns(), "Mismatch in number of columns to be joined on"); - CUDF_EXPECTS(std::all_of(columns_in_common.begin(), - columns_in_common.end(), - [this, &probe_on](auto pair) { - size_t p = std::find(probe_on.begin(), probe_on.end(), pair.first) - - probe_on.begin(); - size_t b = std::find(_build_on.begin(), _build_on.end(), pair.second) - - _build_on.begin(); - return (p != probe_on.size()) && (b != _build_on.size()) && (p == b); - }), - "Invalid values passed to columns_in_common"); - - if (is_trivial_join(probe, _build, probe_on, _build_on, JoinKind)) { - return get_empty_joined_table(probe, _build, columns_in_common, common_columns_output_side); + if (is_trivial_join(probe, _build, JoinKind)) { + return std::make_pair(std::make_unique>(0, stream, mr), + std::make_unique>(0, stream, mr)); } - auto probe_selected = probe.select(probe_on); - CUDF_EXPECTS(std::equal(std::cbegin(_build_selected), - std::cend(_build_selected), - std::cbegin(probe_selected), - std::cend(probe_selected), + CUDF_EXPECTS(std::equal(std::cbegin(_build), + std::cend(_build), + std::cbegin(probe), + std::cend(probe), [](const auto &b, const auto &p) { return b.type() == p.type(); }), "Mismatch in joining column data types"); - constexpr cudf::detail::join_kind ProbeJoinKind = (JoinKind == cudf::detail::join_kind::FULL_JOIN) - ? cudf::detail::join_kind::LEFT_JOIN - : JoinKind; - auto joined_indices = probe_join_indices(probe_selected, compare_nulls, stream); - return cudf::detail::construct_join_output_df( - probe, _build, joined_indices, columns_in_common, common_columns_output_side, stream, mr); + return probe_join_indices(probe, compare_nulls, stream, mr); } template -std::enable_if_t, rmm::device_vector>> +std::pair>, + std::unique_ptr>> hash_join::hash_join_impl::probe_join_indices(cudf::table_view const &probe, null_equality compare_nulls, - rmm::cuda_stream_view stream) const + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) const { // Trivial left join case - exit early - if (!_hash_table && JoinKind == cudf::detail::join_kind::LEFT_JOIN) { - return get_trivial_left_join_indices(probe, stream); + if (!_hash_table && JoinKind != cudf::detail::join_kind::INNER_JOIN) { + return get_trivial_left_join_indices(probe, stream, mr); } CUDF_EXPECTS(_hash_table, "Hash table of hash join is null."); - auto build_table = cudf::table_device_view::create(_build_selected, stream); + auto build_table = cudf::table_device_view::create(_build, stream); auto probe_table = cudf::table_device_view::create(probe, stream); - return cudf::detail::probe_join_hash_table( - *build_table, *probe_table, *_hash_table, compare_nulls, stream); + + constexpr cudf::detail::join_kind ProbeJoinKind = (JoinKind == cudf::detail::join_kind::FULL_JOIN) + ? cudf::detail::join_kind::LEFT_JOIN + : JoinKind; + auto join_indices = cudf::detail::probe_join_hash_table( + *build_table, *probe_table, *_hash_table, compare_nulls, stream, mr); + + if (JoinKind == cudf::detail::join_kind::FULL_JOIN) { + auto complement_indices = detail::get_left_join_indices_complement( + join_indices.second, probe.num_rows(), _build.num_rows(), stream, mr); + join_indices = detail::concatenate_vector_pairs(join_indices, complement_indices, stream); + } + return join_indices; } } // namespace cudf diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh index b37f228f6d3..aaa25e8f941 100644 --- a/cpp/src/join/hash_join.cuh +++ b/cpp/src/join/hash_join.cuh @@ -15,6 +15,9 @@ */ #pragma once +#include +#include +#include #include #include @@ -25,7 +28,7 @@ #include #include -#include +#include #include #include @@ -178,19 +181,29 @@ size_type estimate_join_output_size(table_device_view build_table, * * @param left Table of left columns to join * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the result * * @return Join output indices vector pair */ -inline std::pair, rmm::device_vector> -get_trivial_left_join_indices(table_view const& left, rmm::cuda_stream_view stream) +inline std::pair>, + std::unique_ptr>> +get_trivial_left_join_indices( + table_view const& left, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { - rmm::device_vector left_indices(left.num_rows()); - thrust::sequence(rmm::exec_policy(stream), left_indices.begin(), left_indices.end(), 0); - rmm::device_vector right_indices(left.num_rows()); - thrust::fill(rmm::exec_policy(stream), right_indices.begin(), right_indices.end(), JoinNoneValue); + auto left_indices = std::make_unique>(left.num_rows(), stream, mr); + thrust::sequence(rmm::exec_policy(stream), left_indices->begin(), left_indices->end(), 0); + auto right_indices = + std::make_unique>(left.num_rows(), stream, mr); + thrust::fill( + rmm::exec_policy(stream), right_indices->begin(), right_indices->end(), JoinNoneValue); return std::make_pair(std::move(left_indices), std::move(right_indices)); } +std::pair, std::unique_ptr
> get_empty_joined_table( + table_view const& probe, table_view const& build); + std::unique_ptr combine_table_pair(std::unique_ptr&& left, std::unique_ptr&& right); @@ -207,106 +220,52 @@ struct hash_join::hash_join_impl { private: cudf::table_view _build; - cudf::table_view _build_selected; - std::vector _build_on; std::unique_ptr> _hash_table; public: /** - * @brief Constructor that internally builds the hash table based on the given `build` table and - * column indices specified by `build_on` for subsequent probe calls. + * @brief Constructor that internally builds the hash table based on the given `build` table * * @throw cudf::logic_error if the number of columns in `build` table is 0. * @throw cudf::logic_error if the number of rows in `build` table exceeds MAX_JOIN_SIZE. - * @throw std::out_of_range if elements of `build_on` exceed the number of columns in the `build` - * table. * * @param build The build table, from which the hash table is built. - * @param build_on The column indices from `build` to join on. * @param compare_nulls Controls whether null join-key values should match or not. */ hash_join_impl(cudf::table_view const& build, - std::vector const& build_on, null_equality compare_nulls, rmm::cuda_stream_view stream = rmm::cuda_stream_default); - std::pair, std::unique_ptr> inner_join( - cudf::table_view const& probe, - std::vector const& probe_on, - std::vector> const& columns_in_common, - common_columns_output_side common_columns_output_side, - null_equality compare_nulls, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const; - - std::unique_ptr left_join( - cudf::table_view const& probe, - std::vector const& probe_on, - std::vector> const& columns_in_common, - null_equality compare_nulls, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const; - - std::unique_ptr full_join( - cudf::table_view const& probe, - std::vector const& probe_on, - std::vector> const& columns_in_common, - null_equality compare_nulls, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const; + std::pair>, + std::unique_ptr>> + inner_join(cudf::table_view const& probe, + null_equality compare_nulls, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const; + + std::pair>, + std::unique_ptr>> + left_join(cudf::table_view const& probe, + null_equality compare_nulls, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const; + + std::pair>, + std::unique_ptr>> + full_join(cudf::table_view const& probe, + null_equality compare_nulls, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const; private: - /** - * @brief Performs hash join by probing the columns provided in `probe` as per - * the joining indices given in `probe_on` and returns a (`probe`, `_build`) table pair, which - * contains the probe and build portions of the logical joined table respectively. - * - * @throw cudf::logic_error if `columns_in_common` contains a pair of indices - * (`P`, `B`) where `P` does not exist in `probe_on` or `B` does not exist in - * `_build_on`. - * @throw cudf::logic_error if `columns_in_common` contains a pair of indices - * (`P`, `B`) such that the location of `P` within `probe_on` is not equal to - * the location of `B` within `_build_on`. - * @throw cudf::logic_error if the number of elements in `probe_on` and - * `_build_on` are not equal. - * @throw cudf::logic_error if the number of columns in `probe` is 0. - * @throw cudf::logic_error if the number of rows in `probe` table exceeds MAX_JOIN_SIZE. - * @throw std::out_of_range if elements of `probe_on` exceed the number of columns in the `probe` - * table. - * @throw cudf::logic_error if types do not match between joining columns. - * - * @tparam JoinKind The type of join to be performed. - * - * @param probe The probe table. - * @param probe_on The column's indices from `probe` to join on. - * Column `i` from `probe_on` will be compared against column `i` of `_build_on`. - * @param columns_in_common is a vector of pairs of column indices into - * `probe` and `_build`, respectively, that are "in common". For "common" - * columns, only a single output column will be produced, which is gathered - * from `probe_on` columns. Else, for every column in `probe_on` and `_build_on`, - * an output column will be produced. For each of these pairs (P, B), P - * should exist in `probe_on` and B should exist in `_build_on`. - * @param common_columns_output_side @see cudf::hash_join::common_columns_output_side. - * @param compare_nulls Controls whether null join-key values should match or not. - * @param mr Device memory resource used to allocate the returned table's device memory. - * @param stream CUDA stream used for device memory operations and kernel launches. - * - * @return Table pair of (`probe`, `_build`) of joining both tables on the columns - * specified by `probe_on` and `_build_on`. The resulting table pair will be joined columns of - * (`probe(including common columns)`, `_build(excluding common columns)`) if - * `common_columns_output_side` is `PROBE`, or (`probe(excluding common columns)`, - * `_build(including common columns)`) if `common_columns_output_side` is `BUILD`. - */ template - std::pair, std::unique_ptr> compute_hash_join( - cudf::table_view const& probe, - std::vector const& probe_on, - std::vector> const& columns_in_common, - common_columns_output_side common_columns_output_side, - null_equality compare_nulls, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const; + std::pair>, + std::unique_ptr>> + compute_hash_join(cudf::table_view const& probe, + null_equality compare_nulls, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const; /** * @brief Probes the `_hash_table` built from `_build` for tuples in `probe_table`, @@ -320,15 +279,17 @@ struct hash_join::hash_join_impl { * @param probe_table Table of probe side columns to join. * @param compare_nulls Controls whether null join-key values should match or not. * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned vectors. * * @return Join output indices vector pair. */ template - std::enable_if_t, rmm::device_vector>> + std::pair>, + std::unique_ptr>> probe_join_indices(cudf::table_view const& probe, null_equality compare_nulls, - rmm::cuda_stream_view stream) const; + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const; }; } // namespace cudf diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu index ce27cfcd616..f2e4bab02c6 100644 --- a/cpp/src/join/join.cu +++ b/cpp/src/join/join.cu @@ -26,68 +26,102 @@ namespace cudf { namespace detail { -std::unique_ptr
inner_join( - table_view const& left_input, - table_view const& right_input, - std::vector const& left_on, - std::vector const& right_on, - std::vector> const& columns_in_common, - null_equality compare_nulls, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::pair>, + std::unique_ptr>> +inner_join(table_view const& left_input, + table_view const& right_input, + null_equality compare_nulls, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { // Make sure any dictionary columns have matched key sets. // This will return any new dictionary columns created as well as updated table_views. auto matched = cudf::dictionary::detail::match_dictionaries( - {left_input.select(left_on), right_input.select(right_on)}, + {left_input, right_input}, stream, rmm::mr::get_current_device_resource()); // temporary objects returned // now rebuild the table views with the updated ones - auto const left = scatter_columns(matched.second.front(), left_on, left_input); - auto const right = scatter_columns(matched.second.back(), right_on, right_input); + auto const left = matched.second.front(); + auto const right = matched.second.back(); // For `inner_join`, we can freely choose either the `left` or `right` table to use for // building/probing the hash map. Because building is typically more expensive than probing, we // build the hash map from the smaller table. if (right.num_rows() > left.num_rows()) { - cudf::hash_join hj_obj(left, left_on, compare_nulls, stream); - auto actual_columns_in_common = columns_in_common; - std::for_each(actual_columns_in_common.begin(), actual_columns_in_common.end(), [](auto& pair) { - std::swap(pair.first, pair.second); - }); - auto probe_build_pair = hj_obj.inner_join(right, - right_on, - actual_columns_in_common, - cudf::hash_join::common_columns_output_side::BUILD, - compare_nulls, - stream, - mr); - return cudf::detail::combine_table_pair(std::move(probe_build_pair.second), - std::move(probe_build_pair.first)); + cudf::hash_join hj_obj(left, compare_nulls, stream); + auto result = hj_obj.inner_join(right, compare_nulls, stream, mr); + return std::make_pair(std::move(result.second), std::move(result.first)); } else { - cudf::hash_join hj_obj(right, right_on, compare_nulls, stream); - auto probe_build_pair = hj_obj.inner_join(left, - left_on, - columns_in_common, - cudf::hash_join::common_columns_output_side::PROBE, - compare_nulls, - stream, - mr); - return cudf::detail::combine_table_pair(std::move(probe_build_pair.first), - std::move(probe_build_pair.second)); + cudf::hash_join hj_obj(right, compare_nulls, stream); + return hj_obj.inner_join(left, compare_nulls, stream, mr); } } -std::unique_ptr
left_join( - table_view const& left_input, - table_view const& right_input, - std::vector const& left_on, - std::vector const& right_on, - std::vector> const& columns_in_common, - null_equality compare_nulls, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr
inner_join(table_view const& left_input, + table_view const& right_input, + std::vector const& left_on, + std::vector const& right_on, + null_equality compare_nulls, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + // Make sure any dictionary columns have matched key sets. + // This will return any new dictionary columns created as well as updated table_views. + auto matched = cudf::dictionary::detail::match_dictionaries( + {left_input.select(left_on), right_input.select(right_on)}, + stream, + rmm::mr::get_current_device_resource()); // temporary objects returned + + // now rebuild the table views with the updated ones + auto const left = scatter_columns(matched.second.front(), left_on, left_input); + auto const right = scatter_columns(matched.second.back(), right_on, right_input); + + auto join_indices = inner_join(left.select(left_on), right.select(right_on), compare_nulls, mr); + std::unique_ptr
left_result = detail::gather(left, + join_indices.first->begin(), + join_indices.first->end(), + out_of_bounds_policy::DONT_CHECK, + stream, + mr); + std::unique_ptr
right_result = detail::gather(right, + join_indices.second->begin(), + join_indices.second->end(), + out_of_bounds_policy::DONT_CHECK, + stream, + mr); + return combine_table_pair(std::move(left_result), std::move(right_result)); +} + +std::pair>, + std::unique_ptr>> +left_join(table_view const& left_input, + table_view const& right_input, + null_equality compare_nulls, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + // Make sure any dictionary columns have matched key sets. + // This will return any new dictionary columns created as well as updated table_views. + auto matched = cudf::dictionary::detail::match_dictionaries( + {left_input, right_input}, // these should match + stream, + rmm::mr::get_current_device_resource()); // temporary objects returned + // now rebuild the table views with the updated ones + table_view const left = matched.second.front(); + table_view const right = matched.second.back(); + + cudf::hash_join hj_obj(right, compare_nulls, stream); + return hj_obj.left_join(left, compare_nulls, stream, mr); +} + +std::unique_ptr
left_join(table_view const& left_input, + table_view const& right_input, + std::vector const& left_on, + std::vector const& right_on, + null_equality compare_nulls, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { // Make sure any dictionary columns have matched key sets. // This will return any new dictionary columns created as well as updated table_views. @@ -99,19 +133,58 @@ std::unique_ptr
left_join( table_view const left = scatter_columns(matched.second.front(), left_on, left_input); table_view const right = scatter_columns(matched.second.back(), right_on, right_input); - cudf::hash_join hj_obj(right, right_on, compare_nulls, stream); - return hj_obj.left_join(left, left_on, columns_in_common, compare_nulls, stream, mr); + auto join_indices = left_join(left.select(left_on), right.select(right_on), compare_nulls); + + if ((left_on.empty() || right_on.empty()) || + is_trivial_join(left, right, cudf::detail::join_kind::LEFT_JOIN)) { + auto probe_build_pair = get_empty_joined_table(left, right); + return cudf::detail::combine_table_pair(std::move(probe_build_pair.first), + std::move(probe_build_pair.second)); + } + std::unique_ptr
left_result = detail::gather(left, + join_indices.first->begin(), + join_indices.first->end(), + out_of_bounds_policy::NULLIFY, + stream, + mr); + std::unique_ptr
right_result = detail::gather(right, + join_indices.second->begin(), + join_indices.second->end(), + out_of_bounds_policy::NULLIFY, + stream, + mr); + return combine_table_pair(std::move(left_result), std::move(right_result)); } -std::unique_ptr
full_join( - table_view const& left_input, - table_view const& right_input, - std::vector const& left_on, - std::vector const& right_on, - std::vector> const& columns_in_common, - null_equality compare_nulls, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::pair>, + std::unique_ptr>> +full_join(table_view const& left_input, + table_view const& right_input, + null_equality compare_nulls, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + // Make sure any dictionary columns have matched key sets. + // This will return any new dictionary columns created as well as updated table_views. + auto matched = cudf::dictionary::detail::match_dictionaries( + {left_input, right_input}, // these should match + stream, + rmm::mr::get_current_device_resource()); // temporary objects returned + // now rebuild the table views with the updated ones + table_view const left = matched.second.front(); + table_view const right = matched.second.back(); + + cudf::hash_join hj_obj(right, compare_nulls, stream); + return hj_obj.full_join(left, compare_nulls, stream, mr); +} + +std::unique_ptr
full_join(table_view const& left_input, + table_view const& right_input, + std::vector const& left_on, + std::vector const& right_on, + null_equality compare_nulls, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { // Make sure any dictionary columns have matched key sets. // This will return any new dictionary columns created as well as updated table_views. @@ -123,8 +196,27 @@ std::unique_ptr
full_join( table_view const left = scatter_columns(matched.second.front(), left_on, left_input); table_view const right = scatter_columns(matched.second.back(), right_on, right_input); - cudf::hash_join hj_obj(right, right_on, compare_nulls, stream); - return hj_obj.full_join(left, left_on, columns_in_common, compare_nulls, stream, mr); + auto join_indices = full_join(left.select(left_on), right.select(right_on), compare_nulls); + + if ((left_on.empty() || right_on.empty()) || + is_trivial_join(left, right, cudf::detail::join_kind::FULL_JOIN)) { + auto probe_build_pair = get_empty_joined_table(left, right); + return cudf::detail::combine_table_pair(std::move(probe_build_pair.first), + std::move(probe_build_pair.second)); + } + std::unique_ptr
left_result = detail::gather(left, + join_indices.first->begin(), + join_indices.first->end(), + out_of_bounds_policy::NULLIFY, + stream, + mr); + std::unique_ptr
right_result = detail::gather(right, + join_indices.second->begin(), + join_indices.second->end(), + out_of_bounds_policy::NULLIFY, + stream, + mr); + return combine_table_pair(std::move(left_result), std::move(right_result)); } } // namespace detail @@ -132,90 +224,111 @@ std::unique_ptr
full_join( hash_join::~hash_join() = default; hash_join::hash_join(cudf::table_view const& build, - std::vector const& build_on, null_equality compare_nulls, rmm::cuda_stream_view stream) - : impl{std::make_unique(build, build_on, compare_nulls, stream)} + : impl{std::make_unique(build, compare_nulls, stream)} { } -std::pair, std::unique_ptr> hash_join::inner_join( - cudf::table_view const& probe, - std::vector const& probe_on, - std::vector> const& columns_in_common, - common_columns_output_side common_columns_output_side, - null_equality compare_nulls, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const +std::pair>, + std::unique_ptr>> +hash_join::inner_join(cudf::table_view const& probe, + null_equality compare_nulls, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const { - return impl->inner_join( - probe, probe_on, columns_in_common, common_columns_output_side, compare_nulls, stream, mr); + return impl->inner_join(probe, compare_nulls, stream, mr); } -std::unique_ptr hash_join::left_join( - cudf::table_view const& probe, - std::vector const& probe_on, - std::vector> const& columns_in_common, - null_equality compare_nulls, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const +std::pair>, + std::unique_ptr>> +hash_join::left_join(cudf::table_view const& probe, + null_equality compare_nulls, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const { - return impl->left_join(probe, probe_on, columns_in_common, compare_nulls, stream, mr); + return impl->left_join(probe, compare_nulls, stream, mr); } -std::unique_ptr hash_join::full_join( - cudf::table_view const& probe, - std::vector const& probe_on, - std::vector> const& columns_in_common, - null_equality compare_nulls, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const +std::pair>, + std::unique_ptr>> +hash_join::full_join(cudf::table_view const& probe, + null_equality compare_nulls, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const { - return impl->full_join(probe, probe_on, columns_in_common, compare_nulls, stream, mr); + return impl->full_join(probe, compare_nulls, stream, mr); } // external APIs -std::unique_ptr
inner_join( - table_view const& left, - table_view const& right, - std::vector const& left_on, - std::vector const& right_on, - std::vector> const& columns_in_common, - null_equality compare_nulls, - rmm::mr::device_memory_resource* mr) +std::pair>, + std::unique_ptr>> +inner_join(table_view const& left, + table_view const& right, + null_equality compare_nulls, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::inner_join(left, right, compare_nulls, rmm::cuda_stream_default, mr); +} + +std::unique_ptr
inner_join(table_view const& left, + table_view const& right, + std::vector const& left_on, + std::vector const& right_on, + null_equality compare_nulls, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); return detail::inner_join( - left, right, left_on, right_on, columns_in_common, compare_nulls, rmm::cuda_stream_default, mr); + left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr); } -std::unique_ptr
left_join( - table_view const& left, - table_view const& right, - std::vector const& left_on, - std::vector const& right_on, - std::vector> const& columns_in_common, - null_equality compare_nulls, - rmm::mr::device_memory_resource* mr) +std::pair>, + std::unique_ptr>> +left_join(table_view const& left, + table_view const& right, + null_equality compare_nulls, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::left_join(left, right, compare_nulls, rmm::cuda_stream_default, mr); +} + +std::unique_ptr
left_join(table_view const& left, + table_view const& right, + std::vector const& left_on, + std::vector const& right_on, + null_equality compare_nulls, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); return detail::left_join( - left, right, left_on, right_on, columns_in_common, compare_nulls, rmm::cuda_stream_default, mr); + left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr); +} + +std::pair>, + std::unique_ptr>> +full_join(table_view const& left, + table_view const& right, + null_equality compare_nulls, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::full_join(left, right, compare_nulls, rmm::cuda_stream_default, mr); } -std::unique_ptr
full_join( - table_view const& left, - table_view const& right, - std::vector const& left_on, - std::vector const& right_on, - std::vector> const& columns_in_common, - null_equality compare_nulls, - rmm::mr::device_memory_resource* mr) +std::unique_ptr
full_join(table_view const& left, + table_view const& right, + std::vector const& left_on, + std::vector const& right_on, + null_equality compare_nulls, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); return detail::full_join( - left, right, left_on, right_on, columns_in_common, compare_nulls, rmm::cuda_stream_default, mr); + left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr); } } // namespace cudf diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp index f0c158c1ef6..9312704f065 100644 --- a/cpp/src/join/join_common_utils.hpp +++ b/cpp/src/join/join_common_utils.hpp @@ -19,6 +19,8 @@ #include #include +#include + #include #include @@ -29,9 +31,10 @@ constexpr size_type MAX_JOIN_SIZE{std::numeric_limits::max()}; constexpr int DEFAULT_JOIN_BLOCK_SIZE = 128; constexpr int DEFAULT_JOIN_CACHE_SIZE = 128; -constexpr size_type JoinNoneValue = -1; +constexpr size_type JoinNoneValue = std::numeric_limits::min(); -using VectorPair = std::pair, rmm::device_vector>; +using VectorPair = std::pair>, + std::unique_ptr>>; using multimap_type = concurrent_unordered_multimap; enum class join_kind { INNER_JOIN, LEFT_JOIN, FULL_JOIN, LEFT_SEMI_JOIN, LEFT_ANTI_JOIN }; -inline bool is_trivial_join(table_view const& left, - table_view const& right, - std::vector const& left_on, - std::vector const& right_on, - join_kind join_type) +inline bool is_trivial_join(table_view const& left, table_view const& right, join_kind join_type) { // If there is nothing to join, then send empty table with all columns - if (left_on.empty() || right_on.empty()) { return true; } + if (left.is_empty() || right.is_empty()) { return true; } // If left join and the left table is empty, return immediately if ((join_kind::LEFT_JOIN == join_type) && (0 == left.num_rows())) { return true; } diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu index 9d046f9983c..80a1ef9e204 100644 --- a/cpp/src/join/semi_join.cu +++ b/cpp/src/join/semi_join.cu @@ -17,20 +17,106 @@ #include #include +#include + #include #include #include +#include #include #include +#include #include #include #include +#include #include #include namespace cudf { namespace detail { + +template +std::unique_ptr> left_semi_anti_join( + cudf::table_view const& left_keys, + cudf::table_view const& right_keys, + null_equality compare_nulls, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + CUDF_EXPECTS(0 != left_keys.num_columns(), "Left table is empty"); + CUDF_EXPECTS(0 != right_keys.num_columns(), "Right table is empty"); + + if (is_trivial_join(left_keys, right_keys, JoinKind)) { + return std::make_unique>(0, stream, mr); + } + if ((join_kind::LEFT_ANTI_JOIN == JoinKind) && (0 == right_keys.num_rows())) { + auto result = + std::make_unique>(left_keys.num_rows(), stream, mr); + thrust::sequence(thrust::cuda::par.on(stream.value()), result->begin(), result->end()); + return result; + } + + auto const left_num_rows = left_keys.num_rows(); + auto const right_num_rows = right_keys.num_rows(); + + // Only care about existence, so we'll use an unordered map (other joins need a multimap) + using hash_table_type = concurrent_unordered_map; + + // Create hash table containing all keys found in right table + auto right_rows_d = table_device_view::create(right_keys, stream); + size_t const hash_table_size = compute_hash_table_size(right_num_rows); + row_hash hash_build{*right_rows_d}; + row_equality equality_build{*right_rows_d, *right_rows_d, compare_nulls == null_equality::EQUAL}; + + // Going to join it with left table + auto left_rows_d = table_device_view::create(left_keys, stream); + row_hash hash_probe{*left_rows_d}; + row_equality equality_probe{*left_rows_d, *right_rows_d, compare_nulls == null_equality::EQUAL}; + + auto hash_table_ptr = hash_table_type::create(hash_table_size, + stream, + std::numeric_limits::max(), + std::numeric_limits::max(), + hash_build, + equality_build); + auto hash_table = *hash_table_ptr; + + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + right_num_rows, + [hash_table] __device__(size_type idx) mutable { + hash_table.insert(thrust::make_pair(idx, true)); + }); + + // + // Now we have a hash table, we need to iterate over the rows of the left table + // and check to see if they are contained in the hash table + // + + // For semi join we want contains to be true, for anti join we want contains to be false + bool join_type_boolean = (JoinKind == join_kind::LEFT_SEMI_JOIN); + + auto gather_map = + std::make_unique>(left_num_rows, stream, mr); + + // gather_map_end will be the end of valid data in gather_map + auto gather_map_end = thrust::copy_if( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(left_num_rows), + gather_map->begin(), + [hash_table, join_type_boolean, hash_probe, equality_probe] __device__(size_type idx) { + auto pos = hash_table.find(idx, hash_probe, equality_probe); + return (pos != hash_table.end()) == join_type_boolean; + }); + + auto join_size = thrust::distance(gather_map->begin(), gather_map_end); + gather_map->resize(join_size, stream); + return gather_map; +} + /** * @brief Performs a left semi or anti join on the specified columns of two * tables (left, right) @@ -57,8 +143,6 @@ namespace detail { * The column from `right` indicated by `right_on[i]` * will be compared against the column from `left` * indicated by `left_on[i]`. - * @param[in] return_columns A vector of column indices from `left` to - * include in the returned table. * @param[in] compare_nulls Controls whether null join-key values should match or not. * @param[in] mr Device memory resource to used to allocate the returned table's * device memory @@ -66,8 +150,7 @@ namespace detail { * @tparam join_kind Indicates whether to do LEFT_SEMI_JOIN or LEFT_ANTI_JOIN * * @returns Result of joining `left` and `right` tables on the columns - * specified by `left_on` and `right_on`. The resulting table - * will contain `return_columns` from `left` that match in right. + * specified by `left_on` and `right_on`. */ template std::unique_ptr left_semi_anti_join( @@ -75,27 +158,19 @@ std::unique_ptr left_semi_anti_join( cudf::table_view const& right, std::vector const& left_on, std::vector const& right_on, - std::vector const& return_columns, null_equality compare_nulls, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { - CUDF_EXPECTS(0 != left.num_columns(), "Left table is empty"); - CUDF_EXPECTS(0 != right.num_columns(), "Right table is empty"); CUDF_EXPECTS(left_on.size() == right_on.size(), "Mismatch in number of columns to be joined on"); - if (0 == return_columns.size()) { return empty_like(left.select(return_columns)); } - - if (is_trivial_join(left, right, left_on, right_on, JoinKind)) { - return empty_like(left.select(return_columns)); + if ((left_on.empty() || right_on.empty()) || is_trivial_join(left, right, JoinKind)) { + return empty_like(left); } - auto const left_num_rows = left.num_rows(); - auto const right_num_rows = right.num_rows(); - - if ((join_kind::LEFT_ANTI_JOIN == JoinKind) && (0 == right_num_rows)) { + if ((join_kind::LEFT_ANTI_JOIN == JoinKind) && (0 == right.num_rows())) { // Everything matches, just copy the proper columns from the left table - return std::make_unique
(left.select(return_columns), stream, mr); + return std::make_unique
(left, stream, mr); } // Make sure any dictionary columns have matched key sets. @@ -108,91 +183,64 @@ std::unique_ptr left_semi_anti_join( auto const left_selected = matched.second.front(); auto const right_selected = matched.second.back(); - // Only care about existence, so we'll use an unordered map (other joins need a multimap) - using hash_table_type = concurrent_unordered_map; - - // Create hash table containing all keys found in right table - auto right_rows_d = table_device_view::create(right_selected, stream); - size_t const hash_table_size = compute_hash_table_size(right_num_rows); - row_hash hash_build{*right_rows_d}; - row_equality equality_build{*right_rows_d, *right_rows_d, compare_nulls == null_equality::EQUAL}; - - // Going to join it with left table - auto left_rows_d = table_device_view::create(left_selected, stream); - row_hash hash_probe{*left_rows_d}; - row_equality equality_probe{*left_rows_d, *right_rows_d, compare_nulls == null_equality::EQUAL}; - - auto hash_table_ptr = hash_table_type::create(hash_table_size, - stream, - std::numeric_limits::max(), - std::numeric_limits::max(), - hash_build, - equality_build); - auto hash_table = *hash_table_ptr; - - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - right_num_rows, - [hash_table] __device__(size_type idx) mutable { - hash_table.insert(thrust::make_pair(idx, true)); - }); - - // - // Now we have a hash table, we need to iterate over the rows of the left table - // and check to see if they are contained in the hash table - // + auto gather_map = + left_semi_anti_join(left_selected, right_selected, compare_nulls, stream); - // For semi join we want contains to be true, for anti join we want contains to be false - bool join_type_boolean = (JoinKind == join_kind::LEFT_SEMI_JOIN); - - rmm::device_vector gather_map(left_num_rows); - - // gather_map_end will be the end of valid data in gather_map - auto gather_map_end = thrust::copy_if( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(left_num_rows), - gather_map.begin(), - [hash_table, join_type_boolean, hash_probe, equality_probe] __device__(size_type idx) { - auto pos = hash_table.find(idx, hash_probe, equality_probe); - return (pos != hash_table.end()) == join_type_boolean; - }); - - // rebuild left table for call to gather auto const left_updated = scatter_columns(left_selected, left_on, left); - return cudf::detail::gather(left_updated.select(return_columns), - gather_map.begin(), - gather_map_end, + return cudf::detail::gather(left_updated, + gather_map->begin(), + gather_map->end(), out_of_bounds_policy::DONT_CHECK, stream, mr); } + } // namespace detail std::unique_ptr left_semi_join(cudf::table_view const& left, cudf::table_view const& right, std::vector const& left_on, std::vector const& right_on, - std::vector const& return_columns, null_equality compare_nulls, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); return detail::left_semi_anti_join( - left, right, left_on, right_on, return_columns, compare_nulls, rmm::cuda_stream_default, mr); + left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr); +} + +std::unique_ptr> left_semi_join( + cudf::table_view const& left, + cudf::table_view const& right, + null_equality compare_nulls, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::left_semi_anti_join( + left, right, compare_nulls, rmm::cuda_stream_default, mr); } std::unique_ptr left_anti_join(cudf::table_view const& left, cudf::table_view const& right, std::vector const& left_on, std::vector const& right_on, - std::vector const& return_columns, null_equality compare_nulls, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); return detail::left_semi_anti_join( - left, right, left_on, right_on, return_columns, compare_nulls, rmm::cuda_stream_default, mr); + left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr); +} + +std::unique_ptr> left_anti_join( + cudf::table_view const& left, + cudf::table_view const& right, + null_equality compare_nulls, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::left_semi_anti_join( + left, right, compare_nulls, rmm::cuda_stream_default, mr); } } // namespace cudf diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp index efc5330ea7d..32192234c56 100644 --- a/cpp/tests/join/join_tests.cpp +++ b/cpp/tests/join/join_tests.cpp @@ -33,11 +33,15 @@ #include #include +#include + template using column_wrapper = cudf::test::fixed_width_column_wrapper; using strcol_wrapper = cudf::test::strings_column_wrapper; using CVector = std::vector>; using Table = cudf::table; +constexpr cudf::size_type NoneValue = + std::numeric_limits::min(); // TODO: how to test if this isn't public? struct JoinTest : public cudf::test::BaseFixture { }; @@ -58,58 +62,11 @@ TEST_F(JoinTest, EmptySentinelRepro) cudf::table_view left({left_first_col, left_second_col, left_third_col}); cudf::table_view right({right_first_col, right_second_col, right_third_col}); - auto result = cudf::inner_join(left, right, {0, 1, 2}, {0, 1, 2}, {{0, 0}, {1, 1}, {2, 2}}); + auto result = cudf::inner_join(left, right, {0, 1, 2}, {0, 1, 2}); EXPECT_EQ(result->num_rows(), 1); } -TEST_F(JoinTest, InvalidCommonColumnIndices) -{ - column_wrapper col0_0{{3, 1, 2, 0, 3}}; - column_wrapper col0_1{{0, 1, 2, 4, 1}}; - - column_wrapper col1_0{{2, 2, 0, 4, 3}}; - column_wrapper col1_1{{1, 0, 1, 2, 1}}; - - CVector cols0, cols1; - cols0.push_back(col0_0.release()); - cols0.push_back(col0_1.release()); - cols1.push_back(col1_0.release()); - cols1.push_back(col1_1.release()); - - Table t0(std::move(cols0)); - Table t1(std::move(cols1)); - - EXPECT_THROW(cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 1}, {1, 0}}), cudf::logic_error); -} - -TEST_F(JoinTest, FullJoinNoCommon) -{ - column_wrapper col0_0{{0, 1}}; - column_wrapper col1_0{{0, 2}}; - CVector cols0, cols1; - cols0.push_back(col0_0.release()); - cols1.push_back(col1_0.release()); - - Table t0(std::move(cols0)); - Table t1(std::move(cols1)); - - column_wrapper exp_col0_0{{0, 1, -1}, {1, 1, 0}}; - column_wrapper exp_col0_1{{0, -1, 2}, {1, 0, 1}}; - CVector exp_cols; - exp_cols.push_back(exp_col0_0.release()); - exp_cols.push_back(exp_col0_1.release()); - Table gold(std::move(exp_cols)); - - auto result = cudf::full_join(t0, t1, {0}, {0}, {}); - auto result_sort_order = cudf::sorted_order(result->view()); - auto sorted_result = cudf::gather(result->view(), *result_sort_order); - - auto gold_sort_order = cudf::sorted_order(gold.view()); - auto sorted_gold = cudf::gather(gold.view(), *gold_sort_order); - CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result); -} - TEST_F(JoinTest, LeftJoinNoNullsWithNoCommon) { column_wrapper col0_0{{3, 1, 2, 0, 3}}; @@ -131,7 +88,7 @@ TEST_F(JoinTest, LeftJoinNoNullsWithNoCommon) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::left_join(t0, t1, {0}, {0}, {}); + auto result = cudf::left_join(t0, t1, {0}, {0}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -153,7 +110,7 @@ TEST_F(JoinTest, LeftJoinNoNullsWithNoCommon) auto gold_sort_order = cudf::sorted_order(gold.view()); auto sorted_gold = cudf::gather(gold.view(), *gold_sort_order); - CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result); } TEST_F(JoinTest, FullJoinNoNulls) @@ -177,24 +134,32 @@ TEST_F(JoinTest, FullJoinNoNulls) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::full_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); + auto result = cudf::full_join(t0, t1, {0, 1}, {0, 1}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); - column_wrapper col_gold_0{{2, 2, 0, 4, 3, 3, 1, 2, 0}}; - strcol_wrapper col_gold_1({"s1", "s0", "s1", "s2", "s1", "s0", "s1", "s2", "s4"}); - column_wrapper col_gold_2{{-1, -1, -1, -1, 1, 0, 1, 2, 4}, {0, 0, 0, 0, 1, 1, 1, 1, 1}}; - column_wrapper col_gold_3{{1, 0, 1, 2, 1, -1, -1, -1, -1}, {1, 1, 1, 1, 1, 0, 0, 0, 0}}; + column_wrapper col_gold_0{{3, 1, 2, 0, 3, -1, -1, -1, -1}, {1, 1, 1, 1, 1, 0, 0, 0, 0}}; + strcol_wrapper col_gold_1({"s0", "s1", "s2", "s4", "s1", "", "", "", ""}, + {1, 1, 1, 1, 1, 0, 0, 0, 0}); + column_wrapper col_gold_2{{0, 1, 2, 4, 1, -1, -1, -1, -1}, {1, 1, 1, 1, 1, 0, 0, 0, 0}}; + column_wrapper col_gold_3{{-1, -1, -1, -1, 3, 2, 2, 0, 4}, {0, 0, 0, 0, 1, 1, 1, 1, 1}}; + strcol_wrapper col_gold_4({"", "", "", "", "s1", "s1", "s0", "s1", "s2"}, + {0, 0, 0, 0, 1, 1, 1, 1, 1}); + column_wrapper col_gold_5{{-1, -1, -1, -1, 1, 1, 0, 1, 2}, {0, 0, 0, 0, 1, 1, 1, 1, 1}}; + CVector cols_gold; cols_gold.push_back(col_gold_0.release()); cols_gold.push_back(col_gold_1.release()); cols_gold.push_back(col_gold_2.release()); cols_gold.push_back(col_gold_3.release()); + cols_gold.push_back(col_gold_4.release()); + cols_gold.push_back(col_gold_5.release()); + Table gold(std::move(cols_gold)); auto gold_sort_order = cudf::sorted_order(gold.view()); auto sorted_gold = cudf::gather(gold.view(), *gold_sort_order); - CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result); } TEST_F(JoinTest, FullJoinWithNulls) @@ -218,24 +183,32 @@ TEST_F(JoinTest, FullJoinWithNulls) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::full_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); + auto result = cudf::full_join(t0, t1, {0, 1}, {0, 1}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); - column_wrapper col_gold_0{{2, 2, 0, -1, 3, 3, 1, 2, 0}, {1, 1, 1, 0, 1, 1, 1, 1, 1}}; - strcol_wrapper col_gold_1({"s1", "s0", "s1", "s2", "s1", "s0", "s1", "s2", "s4"}); - column_wrapper col_gold_2{{-1, -1, -1, -1, 1, 0, 1, 2, 4}, {0, 0, 0, 0, 1, 1, 1, 1, 1}}; - column_wrapper col_gold_3{{1, 0, 1, 2, 1, -1, -1, -1, -1}, {1, 1, 1, 1, 1, 0, 0, 0, 0}}; + column_wrapper col_gold_0{{3, 1, 2, 0, 3, -1, -1, -1, -1}, {1, 1, 1, 1, 1, 0, 0, 0, 0}}; + strcol_wrapper col_gold_1({"s0", "s1", "s2", "s4", "s1", "", "", "", ""}, + {1, 1, 1, 1, 1, 0, 0, 0, 0}); + column_wrapper col_gold_2{{0, 1, 2, 4, 1, -1, -1, -1, -1}, {1, 1, 1, 1, 1, 0, 0, 0, 0}}; + column_wrapper col_gold_3{{-1, -1, -1, -1, 3, 2, 2, 0, 4}, {0, 0, 0, 0, 1, 1, 1, 1, 0}}; + strcol_wrapper col_gold_4({"", "", "", "", "s1", "s1", "s0", "s1", "s2"}, + {0, 0, 0, 0, 1, 1, 1, 1, 1}); + column_wrapper col_gold_5{{-1, -1, -1, -1, 1, 1, 0, 1, 2}, {0, 0, 0, 0, 1, 1, 1, 1, 1}}; + CVector cols_gold; cols_gold.push_back(col_gold_0.release()); cols_gold.push_back(col_gold_1.release()); cols_gold.push_back(col_gold_2.release()); cols_gold.push_back(col_gold_3.release()); + cols_gold.push_back(col_gold_4.release()); + cols_gold.push_back(col_gold_5.release()); + Table gold(std::move(cols_gold)); auto gold_sort_order = cudf::sorted_order(gold.view()); auto sorted_gold = cudf::gather(gold.view(), *gold_sort_order); - CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result); } TEST_F(JoinTest, FullJoinOnNulls) @@ -262,7 +235,7 @@ TEST_F(JoinTest, FullJoinOnNulls) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::full_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); + auto result = cudf::full_join(t0, t1, {0, 1}, {0, 1}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -273,20 +246,26 @@ TEST_F(JoinTest, FullJoinOnNulls) cudf::test::print(sorted_result->get_column(2).view(), std::cout, ",\t\t"); cudf::test::print(sorted_result->get_column(3).view(), std::cout, ",\t\t"); #endif - - column_wrapper col_gold_0{{ 2, 5, 3, -1}, - { 1, 1, 1, 0}}; - strcol_wrapper col_gold_1({ "s1", "s0", "s0", "s1"}); - column_wrapper col_gold_2{{ -1, -1, 0, 1}, - { 0, 0, 1, 1}}; - column_wrapper col_gold_3{{ 1, 4, 2, 8}, - { 1, 1, 1, 1}}; + + column_wrapper col_gold_0{{ 3, -1, -1, -1}, + { 1, 0, 0, 0}}; + strcol_wrapper col_gold_1{{ "s0", "s1", "", ""}, + { 1, 1, 0, 0}}; + column_wrapper col_gold_2{{ 0, 1, -1, -1}, + { 1, 1, 0, 0}}; + column_wrapper col_gold_3{{ 3, -1, 2, 5}, + { 1, 0, 1, 1}}; + strcol_wrapper col_gold_4{{ "s0", "s1", "s1", "s0"}}; + column_wrapper col_gold_5{{ 2, 8, 1, 4}}; CVector cols_gold; cols_gold.push_back(col_gold_0.release()); cols_gold.push_back(col_gold_1.release()); cols_gold.push_back(col_gold_2.release()); cols_gold.push_back(col_gold_3.release()); + cols_gold.push_back(col_gold_4.release()); + cols_gold.push_back(col_gold_5.release()); + Table gold(std::move(cols_gold)); auto gold_sort_order = cudf::sorted_order(gold.view()); @@ -300,22 +279,27 @@ TEST_F(JoinTest, FullJoinOnNulls) cudf::test::print(sorted_gold->get_column(3).view(), std::cout, ",\t\t"); #endif - CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result); // Repeat test with compare_nulls_equal=false, // as per SQL standard. - result = cudf::full_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}, cudf::null_equality::UNEQUAL); + result = cudf::full_join(t0, t1, {0, 1}, {0, 1}, cudf::null_equality::UNEQUAL); result_sort_order = cudf::sorted_order(result->view()); sorted_result = cudf::gather(result->view(), *result_sort_order); - col_gold_0 = {{ 2, 5, 3, -1, -1}, - { 1, 1, 1, 0, 0}}; - col_gold_1 = strcol_wrapper({ "s1", "s0", "s0", "s1", "s1"}); - col_gold_2 = {{ -1, -1, 0, -1, 1}, - { 0, 0, 1, 0, 1}}; - col_gold_3 = {{ 1, 4, 2, 8, -1}, - { 1, 1, 1, 1, 0}}; + col_gold_0 = {{ 3, -1, -1, -1, -1}, + { 1, 0, 0, 0, 0}}; + col_gold_1 = strcol_wrapper{{ "s0", "s1", "", "", ""}, + { 1, 1, 0, 0, 0}}; + col_gold_2 = {{ 0, 1, -1, -1, -1}, + { 1, 1, 0, 0, 0}}; + col_gold_3 = {{ 3, -1, 2, 5, -1}, + { 1, 0, 1, 1, 0}}; + col_gold_4 = strcol_wrapper{{ "s0", "", "s1", "s0", "s1"}, + { 1, 0, 1, 1, 1}}; + col_gold_5 = {{ 2, -1, 1, 4, 8}, + { 1, 0, 1, 1, 1}}; // clang-format on @@ -324,23 +308,26 @@ TEST_F(JoinTest, FullJoinOnNulls) cols_gold_nulls_unequal.push_back(col_gold_1.release()); cols_gold_nulls_unequal.push_back(col_gold_2.release()); cols_gold_nulls_unequal.push_back(col_gold_3.release()); + cols_gold_nulls_unequal.push_back(col_gold_4.release()); + cols_gold_nulls_unequal.push_back(col_gold_5.release()); + Table gold_nulls_unequal{std::move(cols_gold_nulls_unequal)}; gold_sort_order = cudf::sorted_order(gold_nulls_unequal.view()); sorted_gold = cudf::gather(gold_nulls_unequal.view(), *gold_sort_order); - CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result); } TEST_F(JoinTest, LeftJoinNoNulls) { - column_wrapper col0_0{{3, 1, 2, 0, 3}}; + column_wrapper col0_0({3, 1, 2, 0, 3}); strcol_wrapper col0_1({"s0", "s1", "s2", "s4", "s1"}); - column_wrapper col0_2{{0, 1, 2, 4, 1}}; + column_wrapper col0_2({0, 1, 2, 4, 1}); - column_wrapper col1_0{{2, 2, 0, 4, 3}}; + column_wrapper col1_0({2, 2, 0, 4, 3}); strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"}); - column_wrapper col1_2{{1, 0, 1, 2, 1}}; + column_wrapper col1_2({1, 0, 1, 2, 1}); CVector cols0, cols1; cols0.push_back(col0_0.release()); @@ -353,30 +340,34 @@ TEST_F(JoinTest, LeftJoinNoNulls) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::left_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); + auto result = cudf::left_join(t0, t1, {0, 1}, {0, 1}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); - column_wrapper col_gold_0{{3, 3, 1, 2, 0}, {1, 1, 1, 1, 1}}; - strcol_wrapper col_gold_1({"s1", "s0", "s1", "s2", "s4"}, {1, 1, 1, 1, 1, 1}); - column_wrapper col_gold_2{{1, 0, 1, 2, 4}, {1, 1, 1, 1, 1}}; - column_wrapper col_gold_3{{1, -1, -1, -1, -1}, {1, 0, 0, 0, 0}}; + column_wrapper col_gold_0({3, 1, 2, 0, 3}); + strcol_wrapper col_gold_1({"s0", "s1", "s2", "s4", "s1"}); + column_wrapper col_gold_2({0, 1, 2, 4, 1}); + column_wrapper col_gold_3{{-1, -1, -1, -1, 3}, {0, 0, 0, 0, 1}}; + strcol_wrapper col_gold_4{{"", "", "", "", "s1"}, {0, 0, 0, 0, 1}}; + column_wrapper col_gold_5{{-1, -1, -1, -1, 1}, {0, 0, 0, 0, 1}}; CVector cols_gold; cols_gold.push_back(col_gold_0.release()); cols_gold.push_back(col_gold_1.release()); cols_gold.push_back(col_gold_2.release()); cols_gold.push_back(col_gold_3.release()); + cols_gold.push_back(col_gold_4.release()); + cols_gold.push_back(col_gold_5.release()); Table gold(std::move(cols_gold)); auto gold_sort_order = cudf::sorted_order(gold.view()); auto sorted_gold = cudf::gather(gold.view(), *gold_sort_order); - CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result); } TEST_F(JoinTest, LeftJoinWithNulls) { column_wrapper col0_0{{3, 1, 2, 0, 2}}; - strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"}, {1, 1, 0, 1, 1}); + strcol_wrapper col0_1({"s1", "s1", "", "s4", "s0"}, {1, 1, 0, 1, 1}); column_wrapper col0_2{{0, 1, 2, 4, 1}}; column_wrapper col1_0{{2, 2, 0, 4, 3}}; @@ -394,24 +385,29 @@ TEST_F(JoinTest, LeftJoinWithNulls) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::left_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); + auto result = cudf::left_join(t0, t1, {0, 1}, {0, 1}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); - column_wrapper col_gold_0{{3, 2, 1, 2, 0}, {1, 1, 1, 1, 1}}; - strcol_wrapper col_gold_1({"s1", "s0", "s1", "", "s4"}, {1, 1, 1, 0, 1}); - column_wrapper col_gold_2{{0, 1, 1, 2, 4}, {1, 1, 1, 1, 1}}; - column_wrapper col_gold_3{{1, -1, -1, -1, -1}, {1, 0, 0, 0, 0}}; + column_wrapper col_gold_0{{3, 1, 2, 0, 2}, {1, 1, 1, 1, 1}}; + strcol_wrapper col_gold_1({"s1", "s1", "", "s4", "s0"}, {1, 1, 0, 1, 1}); + column_wrapper col_gold_2{{0, 1, 2, 4, 1}, {1, 1, 1, 1, 1}}; + column_wrapper col_gold_3{{3, -1, -1, -1, 2}, {1, 0, 0, 0, 1}}; + strcol_wrapper col_gold_4{{"s1", "", "", "", "s0"}, {1, 0, 0, 0, 1}}; + column_wrapper col_gold_5{{1, -1, -1, -1, -1}, {1, 0, 0, 0, 0}}; + CVector cols_gold; cols_gold.push_back(col_gold_0.release()); cols_gold.push_back(col_gold_1.release()); cols_gold.push_back(col_gold_2.release()); cols_gold.push_back(col_gold_3.release()); + cols_gold.push_back(col_gold_4.release()); + cols_gold.push_back(col_gold_5.release()); Table gold(std::move(cols_gold)); auto gold_sort_order = cudf::sorted_order(gold.view()); auto sorted_gold = cudf::gather(gold.view(), *gold_sort_order); - CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result); } TEST_F(JoinTest, LeftJoinOnNulls) @@ -438,7 +434,7 @@ TEST_F(JoinTest, LeftJoinOnNulls) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::left_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); + auto result = cudf::left_join(t0, t1, {0, 1}, {0, 1}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -449,21 +445,27 @@ TEST_F(JoinTest, LeftJoinOnNulls) cudf::test::print(sorted_result->get_column(2).view(), std::cout, ",\t\t"); cudf::test::print(sorted_result->get_column(3).view(), std::cout, ",\t\t"); #endif - + column_wrapper col_gold_0{{ 3, -1, 2}, { 1, 0, 1}}; strcol_wrapper col_gold_1({ "s0", "s1", "s2"}, { 1, 1, 1}); - column_wrapper col_gold_2{{ 0, 1, 2}, + column_wrapper col_gold_2{{ 0, 1, 2}, { 1, 1, 1}}; - column_wrapper col_gold_3{{ 2, 8, -1}, + column_wrapper col_gold_3{{ 3, -1, -1}, + { 1, 0, 0}}; + strcol_wrapper col_gold_4({ "s0", "s1", ""}, + { 1, 1, 0}); + column_wrapper col_gold_5{{ 2, 8, -1}, { 1, 1, 0}}; - + CVector cols_gold; cols_gold.push_back(col_gold_0.release()); cols_gold.push_back(col_gold_1.release()); cols_gold.push_back(col_gold_2.release()); cols_gold.push_back(col_gold_3.release()); + cols_gold.push_back(col_gold_4.release()); + cols_gold.push_back(col_gold_5.release()); Table gold(std::move(cols_gold)); auto gold_sort_order = cudf::sorted_order(gold.view()); @@ -477,23 +479,28 @@ TEST_F(JoinTest, LeftJoinOnNulls) cudf::test::print(sorted_gold->get_column(3).view(), std::cout, ",\t\t"); #endif - CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result); // Repeat test with compare_nulls_equal=false, // as per SQL standard. - result = cudf::left_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}, cudf::null_equality::UNEQUAL); + result = cudf::left_join(t0, t1, {0, 1}, {0, 1}, cudf::null_equality::UNEQUAL); result_sort_order = cudf::sorted_order(result->view()); sorted_result = cudf::gather(result->view(), *result_sort_order); - col_gold_0 = {{ 3, -1, 2}, - { 1, 0, 1}}; - col_gold_1 = strcol_wrapper({ "s0", "s1", "s2"}, - { 1, 1, 1}); - col_gold_2 = {{ 0, 1, 2}, - { 1, 1, 1}}; - col_gold_3 = {{ 2, -1, -1}, - { 1, 0, 0}}; + + col_gold_0 = {{ 3, -1, 2}, + { 1, 0, 1}}; + col_gold_1 = {{ "s0", "s1", "s2"}, + { 1, 1, 1}}; + col_gold_2 = {{ 0, 1, 2}, + { 1, 1, 1}}; + col_gold_3 = {{ 3, -1, -1}, + { 1, 0, 0}}; + col_gold_4 = {{ "s0", "", ""}, + { 1, 0, 0}}; + col_gold_5 = {{ 2, -1, -1}, + { 1, 0, 0}}; // clang-format on CVector cols_gold_nulls_unequal; @@ -506,7 +513,7 @@ TEST_F(JoinTest, LeftJoinOnNulls) gold_sort_order = cudf::sorted_order(gold_nulls_unequal.view()); sorted_gold = cudf::gather(gold_nulls_unequal.view(), *gold_sort_order); - CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result); } TEST_F(JoinTest, InnerJoinSizeOverflow) @@ -529,7 +536,7 @@ TEST_F(JoinTest, InnerJoinSizeOverflow) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - EXPECT_THROW(cudf::inner_join(t0, t1, {0}, {0}, {{0, 0}}), cudf::logic_error); + EXPECT_THROW(cudf::inner_join(t0, t1, {0}, {0}), cudf::logic_error); } TEST_F(JoinTest, InnerJoinNoNulls) @@ -553,86 +560,28 @@ TEST_F(JoinTest, InnerJoinNoNulls) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); + auto result = cudf::inner_join(t0, t1, {0, 1}, {0, 1}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); column_wrapper col_gold_0{{3, 2, 2}}; strcol_wrapper col_gold_1({"s1", "s0", "s0"}); column_wrapper col_gold_2{{0, 2, 1}}; - column_wrapper col_gold_3{{1, 0, 0}}; + column_wrapper col_gold_3{{3, 2, 2}}; + strcol_wrapper col_gold_4({"s1", "s0", "s0"}); + column_wrapper col_gold_5{{1, 0, 0}}; CVector cols_gold; cols_gold.push_back(col_gold_0.release()); cols_gold.push_back(col_gold_1.release()); cols_gold.push_back(col_gold_2.release()); cols_gold.push_back(col_gold_3.release()); + cols_gold.push_back(col_gold_4.release()); + cols_gold.push_back(col_gold_5.release()); Table gold(std::move(cols_gold)); auto gold_sort_order = cudf::sorted_order(gold.view()); auto sorted_gold = cudf::gather(gold.view(), *gold_sort_order); - CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result); -} - -TEST_F(JoinTest, InnerJoinNonAlignedCommon) -{ - CVector cols0, cols1; - cols0.emplace_back(column_wrapper{{3, 1, 2, 0, 2}}.release()); - cols0.emplace_back(column_wrapper{{3, 1, 2, 0, 2}}.release()); - cols0.emplace_back(strcol_wrapper({"s1", "s1", "s0", "s4", "s0"}).release()); - cols0.emplace_back(column_wrapper{{0, 1, 2, 4, 1}}.release()); - cols1.emplace_back(column_wrapper{{2, 2, 0, 4, 3}}.release()); - cols1.emplace_back(strcol_wrapper({"s1", "s0", "s1", "s2", "s1"}).release()); - cols1.emplace_back(column_wrapper{{1, 0, 1, 2, 1}}.release()); - - Table t0(std::move(cols0)); - Table t1(std::move(cols1)); - - auto result = cudf::inner_join(t0, t1, {1, 2}, {0, 1}, {{1, 0}, {2, 1}}); - auto result_sort_order = cudf::sorted_order(result->view()); - auto sorted_result = cudf::gather(result->view(), *result_sort_order); - - CVector cols_gold; - cols_gold.emplace_back(column_wrapper{{3, 2, 2}}.release()); - cols_gold.emplace_back(column_wrapper{{3, 2, 2}}.release()); - cols_gold.emplace_back(strcol_wrapper({"s1", "s0", "s0"}).release()); - cols_gold.emplace_back(column_wrapper{{0, 2, 1}}.release()); - cols_gold.emplace_back(column_wrapper{{1, 0, 0}}.release()); - Table gold(std::move(cols_gold)); - - auto gold_sort_order = cudf::sorted_order(gold.view()); - auto sorted_gold = cudf::gather(gold.view(), *gold_sort_order); - CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result); -} - -TEST_F(JoinTest, InnerJoinNonAlignedCommonSwap) -{ - CVector cols0, cols1; - cols0.emplace_back(column_wrapper{{3, 1, 2, 0, 2}}.release()); - cols0.emplace_back(column_wrapper{{3, 1, 2, 0, 2}}.release()); - cols0.emplace_back(strcol_wrapper({"s1", "s1", "s0", "s4", "s0"}).release()); - cols0.emplace_back(column_wrapper{{0, 1, 2, 4, 1}}.release()); - cols1.emplace_back(column_wrapper{{2, 2, 0, 4, 3, 5}}.release()); - cols1.emplace_back(strcol_wrapper({"s1", "s0", "s1", "s2", "s1", "s0"}).release()); - cols1.emplace_back(column_wrapper{{1, 0, 1, 2, 1, 0}}.release()); - - Table t0(std::move(cols0)); - Table t1(std::move(cols1)); - - auto result = cudf::inner_join(t0, t1, {1, 2}, {0, 1}, {{1, 0}, {2, 1}}); - auto result_sort_order = cudf::sorted_order(result->view()); - auto sorted_result = cudf::gather(result->view(), *result_sort_order); - - CVector cols_gold; - cols_gold.emplace_back(column_wrapper{{3, 2, 2}}.release()); - cols_gold.emplace_back(column_wrapper{{3, 2, 2}}.release()); - cols_gold.emplace_back(strcol_wrapper({"s1", "s0", "s0"}).release()); - cols_gold.emplace_back(column_wrapper{{0, 2, 1}}.release()); - cols_gold.emplace_back(column_wrapper{{1, 0, 0}}.release()); - Table gold(std::move(cols_gold)); - - auto gold_sort_order = cudf::sorted_order(gold.view()); - auto sorted_gold = cudf::gather(gold.view(), *gold_sort_order); - CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result); } TEST_F(JoinTest, InnerJoinWithNulls) @@ -656,37 +605,41 @@ TEST_F(JoinTest, InnerJoinWithNulls) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); + auto result = cudf::inner_join(t0, t1, {0, 1}, {0, 1}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); column_wrapper col_gold_0{{3, 2}}; strcol_wrapper col_gold_1({"s1", "s0"}, {1, 1}); column_wrapper col_gold_2{{0, 1}}; - column_wrapper col_gold_3{{1, -1}, {1, 0}}; + column_wrapper col_gold_3{{3, 2}}; + strcol_wrapper col_gold_4({"s1", "s0"}, {1, 1}); + column_wrapper col_gold_5{{1, -1}, {1, 0}}; CVector cols_gold; cols_gold.push_back(col_gold_0.release()); cols_gold.push_back(col_gold_1.release()); cols_gold.push_back(col_gold_2.release()); cols_gold.push_back(col_gold_3.release()); + cols_gold.push_back(col_gold_4.release()); + cols_gold.push_back(col_gold_5.release()); Table gold(std::move(cols_gold)); auto gold_sort_order = cudf::sorted_order(gold.view()); auto sorted_gold = cudf::gather(gold.view(), *gold_sort_order); - CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result); } -// Test to check join behaviour when join keys are null. +// // Test to check join behaviour when join keys are null. TEST_F(JoinTest, InnerJoinOnNulls) { // clang-format off column_wrapper col0_0{{ 3, 1, 2, 0, 2}}; - strcol_wrapper col0_1({"s1", "s1", "s8", "s4", "s0"}, + strcol_wrapper col0_1({"s1", "s1", "s8", "s4", "s0"}, { 1, 1, 0, 1, 1}); column_wrapper col0_2{{ 0, 1, 2, 4, 1}}; column_wrapper col1_0{{ 2, 2, 0, 4, 3}}; - strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"}, + strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"}, { 1, 0, 1, 1, 1}); column_wrapper col1_2{{ 1, 0, 1, 2, 1}}; @@ -701,38 +654,47 @@ TEST_F(JoinTest, InnerJoinOnNulls) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); + auto result = cudf::inner_join(t0, t1, {0, 1}, {0, 1}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); column_wrapper col_gold_0 {{ 3, 2}}; - strcol_wrapper col_gold_1 ({"s1", "s0"}, + strcol_wrapper col_gold_1 ({"s1", "s0"}, { 1, 0}); column_wrapper col_gold_2{{ 0, 2}}; - column_wrapper col_gold_3{{ 1, 0}}; + column_wrapper col_gold_3 {{ 3, 2}}; + strcol_wrapper col_gold_4 ({"s1", "s0"}, + { 1, 0}); + column_wrapper col_gold_5{{ 1, 0}}; CVector cols_gold; cols_gold.push_back(col_gold_0.release()); cols_gold.push_back(col_gold_1.release()); cols_gold.push_back(col_gold_2.release()); cols_gold.push_back(col_gold_3.release()); + cols_gold.push_back(col_gold_4.release()); + cols_gold.push_back(col_gold_5.release()); + Table gold(std::move(cols_gold)); auto gold_sort_order = cudf::sorted_order(gold.view()); auto sorted_gold = cudf::gather(gold.view(), *gold_sort_order); - CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result); - + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result); + // Repeat test with compare_nulls_equal=false, // as per SQL standard. - result = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}, cudf::null_equality::UNEQUAL); + result = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, cudf::null_equality::UNEQUAL); result_sort_order = cudf::sorted_order(result->view()); sorted_result = cudf::gather(result->view(), *result_sort_order); col_gold_0 = {{ 3}}; - col_gold_1 = strcol_wrapper({"s1"}, + col_gold_1 = strcol_wrapper({"s1"}, { 1}); col_gold_2 = {{ 0}}; - col_gold_3 = {{ 1}}; + col_gold_3 = {{ 3}}; + col_gold_4 = strcol_wrapper({"s1"}, + { 1}); + col_gold_5 = {{ 1}}; // clang-format on @@ -741,11 +703,13 @@ TEST_F(JoinTest, InnerJoinOnNulls) cols_gold_sql.push_back(col_gold_1.release()); cols_gold_sql.push_back(col_gold_2.release()); cols_gold_sql.push_back(col_gold_3.release()); + cols_gold_sql.push_back(col_gold_4.release()); + cols_gold_sql.push_back(col_gold_5.release()); Table gold_sql(std::move(cols_gold_sql)); gold_sort_order = cudf::sorted_order(gold_sql.view()); sorted_gold = cudf::gather(gold_sql.view(), *gold_sort_order); - CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result); } // Empty Left Table @@ -766,8 +730,8 @@ TEST_F(JoinTest, EmptyLeftTableInnerJoin) Table empty0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::inner_join(empty0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); - CUDF_TEST_EXPECT_TABLES_EQUAL(empty0, *result); + auto result = cudf::inner_join(empty0, t1, {0, 1}, {0, 1}); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty0, *result); } TEST_F(JoinTest, EmptyLeftTableLeftJoin) @@ -787,36 +751,8 @@ TEST_F(JoinTest, EmptyLeftTableLeftJoin) Table empty0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::left_join(empty0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); - CUDF_TEST_EXPECT_TABLES_EQUAL(empty0, *result); -} - -TEST_F(JoinTest, EmptyLeftTableLeftJoinNonAlignedCommon) -{ - column_wrapper col0_0; - - column_wrapper col1_0{{2, 2, 0, 4, 3}}; - column_wrapper col1_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}}; - - CVector cols0, cols1; - cols0.emplace_back(col0_0.release()); - cols1.emplace_back(col1_0.release()); - cols1.emplace_back(col1_1.release()); - - Table t0(std::move(cols0)); - Table t1(std::move(cols1)); - - column_wrapper col_gold_0; - column_wrapper col_gold_1; - - CVector cols_gold; - cols_gold.emplace_back(col_gold_0.release()); - cols_gold.emplace_back(col_gold_1.release()); - - Table gold(std::move(cols_gold)); - - auto result = cudf::left_join(t0, t1, {0}, {1}, {{0, 1}}); - CUDF_TEST_EXPECT_TABLES_EQUAL(gold, *result); + auto result = cudf::left_join(empty0, t1, {0, 1}, {0, 1}); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty0, *result); } TEST_F(JoinTest, EmptyLeftTableFullJoin) @@ -833,11 +769,29 @@ TEST_F(JoinTest, EmptyLeftTableFullJoin) cols1.push_back(col1_0.release()); cols1.push_back(col1_1.release()); - Table empty0(std::move(cols0)); - Table t1(std::move(cols1)); + Table lhs(std::move(cols0)); + Table rhs(std::move(cols1)); + + auto result = cudf::full_join(lhs, rhs, {0, 1}, {0, 1}); + auto result_sort_order = cudf::sorted_order(result->view()); + auto sorted_result = cudf::gather(result->view(), *result_sort_order); + + column_wrapper col_gold_0{{-1, -1, -1, -1, -1}, {0, 0, 0, 0, 0}}; + column_wrapper col_gold_1{{-1, -1, -1, -1, -1}, {0, 0, 0, 0, 0}}; + column_wrapper col_gold_2{{2, 2, 0, 4, 3}}; + column_wrapper col_gold_3{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}}; - auto result = cudf::full_join(empty0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); - CUDF_TEST_EXPECT_TABLES_EQUAL(t1, *result); + CVector cols_gold; + cols_gold.push_back(col_gold_0.release()); + cols_gold.push_back(col_gold_1.release()); + cols_gold.push_back(col_gold_2.release()); + cols_gold.push_back(col_gold_3.release()); + Table gold(std::move(cols_gold)); + + auto gold_sort_order = cudf::sorted_order(gold.view()); + auto sorted_gold = cudf::gather(gold.view(), *gold_sort_order); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result); } // Empty Right Table @@ -858,36 +812,8 @@ TEST_F(JoinTest, EmptyRightTableInnerJoin) Table t0(std::move(cols0)); Table empty1(std::move(cols1)); - auto result = cudf::inner_join(t0, empty1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); - CUDF_TEST_EXPECT_TABLES_EQUAL(empty1, *result); -} - -TEST_F(JoinTest, EmptyRightTableInnerJoinNonAlignedCommon) -{ - column_wrapper col0_0{{2, 2, 0, 4, 3}}; - column_wrapper col0_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}}; - - column_wrapper col1_0; - - CVector cols0, cols1; - cols0.emplace_back(col0_0.release()); - cols0.emplace_back(col0_1.release()); - cols1.emplace_back(col1_0.release()); - - Table t0(std::move(cols0)); - Table t1(std::move(cols1)); - - column_wrapper col_gold_0; - column_wrapper col_gold_1; - - CVector cols_gold; - cols_gold.emplace_back(col_gold_0.release()); - cols_gold.emplace_back(col_gold_1.release()); - - Table gold(std::move(cols_gold)); - - auto result = cudf::inner_join(t0, t1, {1}, {0}, {{1, 0}}); - CUDF_TEST_EXPECT_TABLES_EQUAL(gold, *result); + auto result = cudf::inner_join(t0, empty1, {0, 1}, {0, 1}); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty1, *result); } TEST_F(JoinTest, EmptyRightTableLeftJoin) @@ -907,8 +833,8 @@ TEST_F(JoinTest, EmptyRightTableLeftJoin) Table t0(std::move(cols0)); Table empty1(std::move(cols1)); - auto result = cudf::left_join(t0, empty1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); - CUDF_TEST_EXPECT_TABLES_EQUAL(t0, *result); + auto result = cudf::left_join(t0, empty1, {0, 1}, {0, 1}); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(t0, *result); } TEST_F(JoinTest, EmptyRightTableFullJoin) @@ -928,8 +854,8 @@ TEST_F(JoinTest, EmptyRightTableFullJoin) Table t0(std::move(cols0)); Table empty1(std::move(cols1)); - auto result = cudf::full_join(t0, empty1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); - CUDF_TEST_EXPECT_TABLES_EQUAL(t0, *result); + auto result = cudf::full_join(t0, empty1, {0, 1}, {0, 1}); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(t0, *result); } // Both tables empty @@ -950,8 +876,8 @@ TEST_F(JoinTest, BothEmptyInnerJoin) Table t0(std::move(cols0)); Table empty1(std::move(cols1)); - auto result = cudf::inner_join(t0, empty1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); - CUDF_TEST_EXPECT_TABLES_EQUAL(empty1, *result); + auto result = cudf::inner_join(t0, empty1, {0, 1}, {0, 1}); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty1, *result); } TEST_F(JoinTest, BothEmptyLeftJoin) @@ -971,8 +897,8 @@ TEST_F(JoinTest, BothEmptyLeftJoin) Table t0(std::move(cols0)); Table empty1(std::move(cols1)); - auto result = cudf::left_join(t0, empty1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); - CUDF_TEST_EXPECT_TABLES_EQUAL(empty1, *result); + auto result = cudf::left_join(t0, empty1, {0, 1}, {0, 1}); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty1, *result); } TEST_F(JoinTest, BothEmptyFullJoin) @@ -992,11 +918,11 @@ TEST_F(JoinTest, BothEmptyFullJoin) Table t0(std::move(cols0)); Table empty1(std::move(cols1)); - auto result = cudf::full_join(t0, empty1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); - CUDF_TEST_EXPECT_TABLES_EQUAL(empty1, *result); + auto result = cudf::full_join(t0, empty1, {0, 1}, {0, 1}); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty1, *result); } -// EqualValues X Inner,Left,Full +// // EqualValues X Inner,Left,Full TEST_F(JoinTest, EqualValuesInnerJoin) { @@ -1015,16 +941,22 @@ TEST_F(JoinTest, EqualValuesInnerJoin) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); + auto result = cudf::inner_join(t0, t1, {0, 1}, {0, 1}); column_wrapper col_gold_0{{0, 0, 0, 0}}; strcol_wrapper col_gold_1({"s0", "s0", "s0", "s0"}); + column_wrapper col_gold_2{{0, 0, 0, 0}}; + strcol_wrapper col_gold_3({"s0", "s0", "s0", "s0"}); + CVector cols_gold; cols_gold.push_back(col_gold_0.release()); cols_gold.push_back(col_gold_1.release()); + cols_gold.push_back(col_gold_2.release()); + cols_gold.push_back(col_gold_3.release()); + Table gold(std::move(cols_gold)); - CUDF_TEST_EXPECT_TABLES_EQUAL(gold, *result); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(gold, *result); } TEST_F(JoinTest, EqualValuesLeftJoin) @@ -1044,16 +976,21 @@ TEST_F(JoinTest, EqualValuesLeftJoin) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::left_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); + auto result = cudf::left_join(t0, t1, {0, 1}, {0, 1}); column_wrapper col_gold_0{{0, 0, 0, 0}, {1, 1, 1, 1}}; strcol_wrapper col_gold_1({"s0", "s0", "s0", "s0"}, {1, 1, 1, 1}); + column_wrapper col_gold_2{{0, 0, 0, 0}, {1, 1, 1, 1}}; + strcol_wrapper col_gold_3({"s0", "s0", "s0", "s0"}, {1, 1, 1, 1}); + CVector cols_gold; cols_gold.push_back(col_gold_0.release()); cols_gold.push_back(col_gold_1.release()); + cols_gold.push_back(col_gold_2.release()); + cols_gold.push_back(col_gold_3.release()); Table gold(std::move(cols_gold)); - CUDF_TEST_EXPECT_TABLES_EQUAL(gold, *result); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(gold, *result); } TEST_F(JoinTest, EqualValuesFullJoin) @@ -1073,16 +1010,21 @@ TEST_F(JoinTest, EqualValuesFullJoin) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::full_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); + auto result = cudf::full_join(t0, t1, {0, 1}, {0, 1}); column_wrapper col_gold_0{{0, 0, 0, 0}}; strcol_wrapper col_gold_1({"s0", "s0", "s0", "s0"}); + column_wrapper col_gold_2{{0, 0, 0, 0}}; + strcol_wrapper col_gold_3({"s0", "s0", "s0", "s0"}); + CVector cols_gold; cols_gold.push_back(col_gold_0.release()); cols_gold.push_back(col_gold_1.release()); + cols_gold.push_back(col_gold_2.release()); + cols_gold.push_back(col_gold_3.release()); Table gold(std::move(cols_gold)); - CUDF_TEST_EXPECT_TABLES_EQUAL(gold, *result); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(gold, *result); } TEST_F(JoinTest, InnerJoinCornerCase) @@ -1097,18 +1039,20 @@ TEST_F(JoinTest, InnerJoinCornerCase) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::inner_join(t0, t1, {0}, {0}, {{0, 0}}); + auto result = cudf::inner_join(t0, t1, {0}, {0}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); column_wrapper col_gold_0{{2, 2, 2, 2}}; + column_wrapper col_gold_1{{2, 2, 2, 2}}; CVector cols_gold; cols_gold.push_back(col_gold_0.release()); + cols_gold.push_back(col_gold_1.release()); Table gold(std::move(cols_gold)); auto gold_sort_order = cudf::sorted_order(gold.view()); auto sorted_gold = cudf::gather(gold.view(), *gold_sort_order); - CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result); } TEST_F(JoinTest, HashJoinSequentialProbes) @@ -1116,129 +1060,106 @@ TEST_F(JoinTest, HashJoinSequentialProbes) CVector cols1; cols1.emplace_back(column_wrapper{{2, 2, 0, 4, 3}}.release()); cols1.emplace_back(strcol_wrapper{{"s1", "s0", "s1", "s2", "s1"}}.release()); - cols1.emplace_back(column_wrapper{{1, 0, 1, 2, 1}}.release()); Table t1(std::move(cols1)); - cudf::hash_join hash_join(t1, {0, 1}, cudf::null_equality::EQUAL); + cudf::hash_join hash_join(t1, cudf::null_equality::EQUAL); { CVector cols0; cols0.emplace_back(column_wrapper{{3, 1, 2, 0, 3}}.release()); cols0.emplace_back(strcol_wrapper({"s0", "s1", "s2", "s4", "s1"}).release()); - cols0.emplace_back(column_wrapper{{0, 1, 2, 4, 1}}.release()); Table t0(std::move(cols0)); - auto result = hash_join.full_join(t0, {0, 1}, {{0, 0}, {1, 1}}); - auto result_sort_order = cudf::sorted_order(result->view()); - auto sorted_result = cudf::gather(result->view(), *result_sort_order); + auto result = hash_join.full_join(t0); + + auto result_table = + cudf::table_view({cudf::column_view{cudf::data_type{cudf::type_id::INT32}, + static_cast(result.first->size()), + result.first->data()}, + cudf::column_view{cudf::data_type{cudf::type_id::INT32}, + static_cast(result.second->size()), + result.second->data()}}); + auto result_sort_order = cudf::sorted_order(result_table); + auto sorted_result = cudf::gather(result_table, *result_sort_order); + + column_wrapper col_gold_0{{NoneValue, NoneValue, NoneValue, NoneValue, 4, 0, 1, 2, 3}}; + column_wrapper col_gold_1{{0, 1, 2, 3, 4, NoneValue, NoneValue, NoneValue, NoneValue}}; CVector cols_gold; - cols_gold.emplace_back(column_wrapper{{2, 2, 0, 4, 3, 3, 1, 2, 0}}.release()); - cols_gold.emplace_back( - strcol_wrapper({"s1", "s0", "s1", "s2", "s1", "s0", "s1", "s2", "s4"}).release()); - cols_gold.emplace_back( - column_wrapper{{-1, -1, -1, -1, 1, 0, 1, 2, 4}, {0, 0, 0, 0, 1, 1, 1, 1, 1}} - .release()); - cols_gold.emplace_back( - column_wrapper{{1, 0, 1, 2, 1, -1, -1, -1, -1}, {1, 1, 1, 1, 1, 0, 0, 0, 0}} - .release()); - Table gold(std::move(cols_gold)); + cols_gold.push_back(col_gold_0.release()); + cols_gold.push_back(col_gold_1.release()); + Table gold(std::move(cols_gold)); auto gold_sort_order = cudf::sorted_order(gold.view()); auto sorted_gold = cudf::gather(gold.view(), *gold_sort_order); - CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result); } { CVector cols0; cols0.emplace_back(column_wrapper{{3, 1, 2, 0, 3}}.release()); cols0.emplace_back(strcol_wrapper({"s0", "s1", "s2", "s4", "s1"}).release()); - cols0.emplace_back(column_wrapper{{0, 1, 2, 4, 1}}.release()); Table t0(std::move(cols0)); - auto result = hash_join.left_join(t0, {0, 1}, {{0, 0}, {1, 1}}); - auto result_sort_order = cudf::sorted_order(result->view()); - auto sorted_result = cudf::gather(result->view(), *result_sort_order); - - CVector cols_gold; - cols_gold.emplace_back(column_wrapper{{3, 3, 1, 2, 0}, {1, 1, 1, 1, 1}}.release()); - cols_gold.emplace_back( - strcol_wrapper({"s1", "s0", "s1", "s2", "s4"}, {1, 1, 1, 1, 1, 1}).release()); - cols_gold.emplace_back(column_wrapper{{1, 0, 1, 2, 4}, {1, 1, 1, 1, 1}}.release()); - cols_gold.emplace_back(column_wrapper{{1, -1, -1, -1, -1}, {1, 0, 0, 0, 0}}.release()); - Table gold(std::move(cols_gold)); - - auto gold_sort_order = cudf::sorted_order(gold.view()); - auto sorted_gold = cudf::gather(gold.view(), *gold_sort_order); - CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result); - } - - { - CVector cols0; - cols0.emplace_back(column_wrapper{{3, 1, 2, 0, 2}}.release()); - cols0.emplace_back(column_wrapper{{3, 1, 2, 0, 2}}.release()); - cols0.emplace_back(strcol_wrapper({"s1", "s1", "s0", "s4", "s0"}).release()); - cols0.emplace_back(column_wrapper{{0, 1, 2, 4, 1}}.release()); - - Table t0(std::move(cols0)); + auto result = hash_join.left_join(t0); + auto result_table = + cudf::table_view({cudf::column_view{cudf::data_type{cudf::type_id::INT32}, + static_cast(result.first->size()), + result.first->data()}, + cudf::column_view{cudf::data_type{cudf::type_id::INT32}, + static_cast(result.second->size()), + result.second->data()}}); + auto result_sort_order = cudf::sorted_order(result_table); + auto sorted_result = cudf::gather(result_table, *result_sort_order); - auto probe_build_pair = hash_join.inner_join(t0, {1, 2}, {{1, 0}, {2, 1}}); - auto joined_cols = probe_build_pair.first->release(); - auto build_cols = probe_build_pair.second->release(); - joined_cols.insert(joined_cols.end(), - std::make_move_iterator(build_cols.begin()), - std::make_move_iterator(build_cols.end())); - auto result = std::make_unique(std::move(joined_cols)); - auto result_sort_order = cudf::sorted_order(result->view()); - auto sorted_result = cudf::gather(result->view(), *result_sort_order); + column_wrapper col_gold_0{{0, 1, 2, 3, 4}}; + column_wrapper col_gold_1{{NoneValue, NoneValue, NoneValue, NoneValue, 4}}; CVector cols_gold; - cols_gold.emplace_back(column_wrapper{{3, 2, 2}}.release()); - cols_gold.emplace_back(column_wrapper{{3, 2, 2}}.release()); - cols_gold.emplace_back(strcol_wrapper({"s1", "s0", "s0"}).release()); - cols_gold.emplace_back(column_wrapper{{0, 2, 1}}.release()); - cols_gold.emplace_back(column_wrapper{{1, 0, 0}}.release()); - Table gold(std::move(cols_gold)); + cols_gold.push_back(col_gold_0.release()); + cols_gold.push_back(col_gold_1.release()); + Table gold(std::move(cols_gold)); auto gold_sort_order = cudf::sorted_order(gold.view()); auto sorted_gold = cudf::gather(gold.view(), *gold_sort_order); - CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result); } { CVector cols0; cols0.emplace_back(column_wrapper{{3, 1, 2, 0, 2}}.release()); - cols0.emplace_back(column_wrapper{{3, 1, 2, 0, 2}}.release()); cols0.emplace_back(strcol_wrapper({"s1", "s1", "s0", "s4", "s0"}).release()); - cols0.emplace_back(column_wrapper{{0, 1, 2, 4, 1}}.release()); Table t0(std::move(cols0)); - auto probe_build_pair = hash_join.inner_join( - t0, {1, 2}, {{1, 0}, {2, 1}}, cudf::hash_join::common_columns_output_side::BUILD); - auto joined_cols = probe_build_pair.second->release(); - auto probe_cols = probe_build_pair.first->release(); - joined_cols.insert(joined_cols.end(), - std::make_move_iterator(probe_cols.begin()), - std::make_move_iterator(probe_cols.end())); - auto result = std::make_unique(std::move(joined_cols)); - auto result_sort_order = cudf::sorted_order(result->view()); - auto sorted_result = cudf::gather(result->view(), *result_sort_order); + auto result = hash_join.inner_join(t0); + auto result_table = + cudf::table_view({cudf::column_view{cudf::data_type{cudf::type_id::INT32}, + static_cast(result.first->size()), + result.first->data()}, + cudf::column_view{cudf::data_type{cudf::type_id::INT32}, + static_cast(result.second->size()), + result.second->data()}}); + auto result_sort_order = cudf::sorted_order(result_table); + auto sorted_result = cudf::gather(result_table, *result_sort_order); + + column_wrapper col_gold_0{{2, 4, 0}}; + column_wrapper col_gold_1{{1, 1, 4}}; CVector cols_gold; - cols_gold.emplace_back(column_wrapper{{3, 2, 2}}.release()); - cols_gold.emplace_back(strcol_wrapper({"s1", "s0", "s0"}).release()); - cols_gold.emplace_back(column_wrapper{{1, 0, 0}}.release()); - cols_gold.emplace_back(column_wrapper{{3, 2, 2}}.release()); - cols_gold.emplace_back(column_wrapper{{0, 2, 1}}.release()); - Table gold(std::move(cols_gold)); + cols_gold.push_back(col_gold_0.release()); + cols_gold.push_back(col_gold_1.release()); + Table gold(std::move(cols_gold)); auto gold_sort_order = cudf::sorted_order(gold.view()); auto sorted_gold = cudf::gather(gold.view(), *gold_sort_order); - CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result); } } @@ -1262,7 +1183,7 @@ TEST_F(JoinDictionaryTest, LeftJoinNoNulls) auto g0 = cudf::table_view({col0_0, col0_1_w, col0_2}); auto g1 = cudf::table_view({col1_0, col1_1_w, col1_2}); { - auto result = cudf::left_join(t0, t1, {0}, {0}, {}); + auto result = cudf::left_join(t0, t1, {0}, {0}); auto result_view = result->view(); auto decoded1 = cudf::dictionary::decode(result_view.column(1)); auto decoded4 = cudf::dictionary::decode(result_view.column(4)); @@ -1273,18 +1194,8 @@ TEST_F(JoinDictionaryTest, LeftJoinNoNulls) decoded4->view(), result_view.column(5)}); - auto gold = cudf::left_join(g0, g1, {0}, {0}, {}); - CUDF_TEST_EXPECT_TABLES_EQUAL(*gold, cudf::table_view(result_decoded)); - } - { - auto result = cudf::left_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); - auto result_view = result->view(); - auto decoded1 = cudf::dictionary::decode(result_view.column(1)); - std::vector result_decoded( - {result_view.column(0), decoded1->view(), result_view.column(2), result_view.column(3)}); - - auto gold = cudf::left_join(g0, g1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); - CUDF_TEST_EXPECT_TABLES_EQUAL(*gold, cudf::table_view(result_decoded)); + auto gold = cudf::left_join(g0, g1, {0}, {0}); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*gold, cudf::table_view(result_decoded)); } } @@ -1303,17 +1214,21 @@ TEST_F(JoinDictionaryTest, LeftJoinWithNulls) auto t0 = cudf::table_view({col0_0, col0_1, col0_2->view()}); auto t1 = cudf::table_view({col1_0, col1_1, col1_2->view()}); - auto result = cudf::left_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); + auto result = cudf::left_join(t0, t1, {0, 1}, {0, 1}); auto result_view = result->view(); auto decoded2 = cudf::dictionary::decode(result_view.column(2)); - auto decoded3 = cudf::dictionary::decode(result_view.column(3)); - std::vector result_decoded( - {result_view.column(0), result_view.column(1), decoded2->view(), decoded3->view()}); + auto decoded5 = cudf::dictionary::decode(result_view.column(5)); + std::vector result_decoded({result_view.column(0), + result_view.column(1), + decoded2->view(), + result_view.column(3), + result_view.column(4), + decoded5->view()}); auto g0 = cudf::table_view({col0_0, col0_1, col0_2_w}); auto g1 = cudf::table_view({col1_0, col1_1, col1_2_w}); - auto gold = cudf::left_join(g0, g1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); - CUDF_TEST_EXPECT_TABLES_EQUAL(*gold, cudf::table_view(result_decoded)); + auto gold = cudf::left_join(g0, g1, {0, 1}, {0, 1}); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*gold, cudf::table_view(result_decoded)); } TEST_F(JoinDictionaryTest, InnerJoinNoNulls) @@ -1331,15 +1246,20 @@ TEST_F(JoinDictionaryTest, InnerJoinNoNulls) auto t0 = cudf::table_view({col0_0, col0_1->view(), col0_2}); auto t1 = cudf::table_view({col1_0, col1_1->view(), col1_2}); - auto result = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); + auto result = cudf::inner_join(t0, t1, {0, 1}, {0, 1}); auto result_view = result->view(); auto decoded1 = cudf::dictionary::decode(result_view.column(1)); - std::vector result_decoded( - {result_view.column(0), decoded1->view(), result_view.column(2), result_view.column(3)}); + auto decoded4 = cudf::dictionary::decode(result_view.column(4)); + std::vector result_decoded({result_view.column(0), + decoded1->view(), + result_view.column(2), + result_view.column(3), + decoded4->view(), + result_view.column(5)}); auto g0 = cudf::table_view({col0_0, col0_1_w, col0_2}); auto g1 = cudf::table_view({col1_0, col1_1_w, col1_2}); - auto gold = cudf::inner_join(g0, g1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); + auto gold = cudf::inner_join(g0, g1, {0, 1}, {0, 1}); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*gold, cudf::table_view(result_decoded)); } @@ -1358,16 +1278,20 @@ TEST_F(JoinDictionaryTest, InnerJoinWithNulls) auto t0 = cudf::table_view({col0_0, col0_1, col0_2->view()}); auto t1 = cudf::table_view({col1_0, col1_1, col1_2->view()}); - auto result = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); + auto result = cudf::inner_join(t0, t1, {0, 1}, {0, 1}); auto result_view = result->view(); auto decoded2 = cudf::dictionary::decode(result_view.column(2)); - auto decoded3 = cudf::dictionary::decode(result_view.column(3)); - std::vector result_decoded( - {result_view.column(0), result_view.column(1), decoded2->view(), decoded3->view()}); + auto decoded5 = cudf::dictionary::decode(result_view.column(5)); + std::vector result_decoded({result_view.column(0), + result_view.column(1), + decoded2->view(), + result_view.column(3), + result_view.column(4), + decoded5->view()}); auto g0 = cudf::table_view({col0_0, col0_1, col0_2_w}); auto g1 = cudf::table_view({col1_0, col1_1, col1_2_w}); - auto gold = cudf::inner_join(g0, g1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); + auto gold = cudf::inner_join(g0, g1, {0, 1}, {0, 1}); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*gold, cudf::table_view(result_decoded)); } @@ -1386,16 +1310,21 @@ TEST_F(JoinDictionaryTest, FullJoinNoNulls) auto t0 = cudf::table_view({col0_0, col0_1->view(), col0_2}); auto t1 = cudf::table_view({col1_0, col1_1->view(), col1_2}); - auto result = cudf::full_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); + auto result = cudf::full_join(t0, t1, {0, 1}, {0, 1}); auto result_view = result->view(); auto decoded1 = cudf::dictionary::decode(result_view.column(1)); - std::vector result_decoded( - {result_view.column(0), decoded1->view(), result_view.column(2), result_view.column(3)}); + auto decoded4 = cudf::dictionary::decode(result_view.column(4)); + std::vector result_decoded({result_view.column(0), + decoded1->view(), + result_view.column(2), + result_view.column(3), + decoded4->view(), + result_view.column(5)}); auto g0 = cudf::table_view({col0_0, col0_1_w, col0_2}); auto g1 = cudf::table_view({col1_0, col1_1_w, col1_2}); - auto gold = cudf::full_join(g0, g1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); - CUDF_TEST_EXPECT_TABLES_EQUAL(*gold, cudf::table_view(result_decoded)); + auto gold = cudf::full_join(g0, g1, {0, 1}, {0, 1}); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*gold, cudf::table_view(result_decoded)); } TEST_F(JoinDictionaryTest, FullJoinWithNulls) @@ -1413,16 +1342,21 @@ TEST_F(JoinDictionaryTest, FullJoinWithNulls) auto t0 = cudf::table_view({col0_0->view(), col0_1, col0_2}); auto t1 = cudf::table_view({col1_0->view(), col1_1, col1_2}); - auto result = cudf::full_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); + auto result = cudf::full_join(t0, t1, {0, 1}, {0, 1}); auto result_view = result->view(); auto decoded0 = cudf::dictionary::decode(result_view.column(0)); - std::vector result_decoded( - {decoded0->view(), result_view.column(1), result_view.column(2), result_view.column(3)}); + auto decoded3 = cudf::dictionary::decode(result_view.column(3)); + std::vector result_decoded({decoded0->view(), + result_view.column(1), + result_view.column(2), + decoded3->view(), + result_view.column(4), + result_view.column(5)}); auto g0 = cudf::table_view({col0_0_w, col0_1, col0_2}); auto g1 = cudf::table_view({col1_0_w, col1_1, col1_2}); - auto gold = cudf::full_join(g0, g1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}); - CUDF_TEST_EXPECT_TABLES_EQUAL(*gold, cudf::table_view(result_decoded)); + auto gold = cudf::full_join(g0, g1, {0, 1}, {0, 1}); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*gold, cudf::table_view(result_decoded)); } CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/join/semi_join_tests.cpp b/cpp/tests/join/semi_join_tests.cpp index 13c74616484..8de9610b07d 100644 --- a/cpp/tests/join/semi_join_tests.cpp +++ b/cpp/tests/join/semi_join_tests.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -33,809 +34,3 @@ using column_wrapper = cudf::test::fixed_width_column_wrapper; struct JoinTest : public cudf::test::BaseFixture { }; - -TEST_F(JoinTest, LeftSemiJoin) -{ - std::vector a_strings{ - "quick", "accénted", "turtlé", "composéd", "result", "", "words"}; - std::vector b_strings{"quick", "words", "result"}; - std::vector e_strings{"quick", "composéd", "result", ""}; - - column_wrapper a_0{10, 20, 20, 20, 20, 20, 50}; - column_wrapper a_1{5.0, .5, .5, .7, .7, .7, .7}; - column_wrapper a_2{90, 77, 78, 61, 62, 63, 41}; - - cudf::test::strings_column_wrapper a_3( - a_strings.begin(), - a_strings.end(), - thrust::make_transform_iterator(a_strings.begin(), [](auto str) { return str != nullptr; })); - - column_wrapper b_0{10, 20, 20}; - column_wrapper b_1{5.0, .7, .7}; - column_wrapper b_2{90, 75, 62}; - - cudf::test::strings_column_wrapper b_3( - b_strings.begin(), - b_strings.end(), - thrust::make_transform_iterator(b_strings.begin(), [](auto str) { return str != nullptr; })); - - column_wrapper expect_0{10, 20, 20, 20}; - column_wrapper expect_1{5.0, .7, .7, .7}; - column_wrapper expect_2{90, 61, 62, 63}; - - cudf::test::strings_column_wrapper expect_3( - e_strings.begin(), - e_strings.end(), - thrust::make_transform_iterator(e_strings.begin(), [](auto str) { return str != nullptr; })); - - std::vector> column_a; - column_a.push_back(a_0.release()); - column_a.push_back(a_1.release()); - column_a.push_back(a_2.release()); - column_a.push_back(a_3.release()); - - std::vector> column_b; - column_b.push_back(b_0.release()); - column_b.push_back(b_1.release()); - column_b.push_back(b_2.release()); - column_b.push_back(b_3.release()); - - cudf::table table_a(std::move(column_a)); - cudf::table table_b(std::move(column_b)); - - auto join_table = cudf::left_semi_join(table_a, table_b, {0, 1}, {0, 1}, {0, 1, 2, 3}); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(0), expect_0); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(1), expect_1); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(2), expect_2); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(3), expect_3); -} - -TEST_F(JoinTest, LeftSemiJoin_with_a_string_key) -{ - std::vector a_strings{ - "quick", "accénted", "turtlé", "composéd", "result", "", "words"}; - std::vector b_strings{"quick", "words", "result"}; - std::vector e_strings{"quick", "result"}; - - column_wrapper a_0{10, 20, 20, 20, 20, 20, 50}; - column_wrapper a_1{5.0, .5, .5, .7, .7, .7, .7}; - column_wrapper a_2{90, 77, 78, 61, 62, 63, 41}; - - cudf::test::strings_column_wrapper a_3( - a_strings.begin(), - a_strings.end(), - thrust::make_transform_iterator(a_strings.begin(), [](auto str) { return str != nullptr; })); - - column_wrapper b_0{10, 20, 20}; - column_wrapper b_1{5.0, .7, .7}; - column_wrapper b_2{90, 75, 62}; - - cudf::test::strings_column_wrapper b_3( - b_strings.begin(), - b_strings.end(), - thrust::make_transform_iterator(b_strings.begin(), [](auto str) { return str != nullptr; })); - - column_wrapper expect_0{10, 20}; - column_wrapper expect_1{5.0, .7}; - column_wrapper expect_2{90, 62}; - - cudf::test::strings_column_wrapper expect_3( - e_strings.begin(), - e_strings.end(), - thrust::make_transform_iterator(e_strings.begin(), [](auto str) { return str != nullptr; })); - - std::vector> column_a; - column_a.push_back(a_0.release()); - column_a.push_back(a_1.release()); - column_a.push_back(a_2.release()); - column_a.push_back(a_3.release()); - - std::vector> column_b; - column_b.push_back(b_0.release()); - column_b.push_back(b_1.release()); - column_b.push_back(b_2.release()); - column_b.push_back(b_3.release()); - - cudf::table table_a(std::move(column_a)); - cudf::table table_b(std::move(column_b)); - - auto join_table = cudf::left_semi_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3}); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(0), expect_0); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(1), expect_1); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(2), expect_2); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(3), expect_3); -} - -TEST_F(JoinTest, LeftSemiJoin_with_null) -{ - std::vector a_strings{ - "quick", "accénted", "turtlé", "composéd", "result", "", "words"}; - std::vector b_strings{"quick", "words", "result", nullptr}; - std::vector e_strings{"quick", "result"}; - - column_wrapper a_0{10, 20, 20, 20, 20, 20, 50}; - column_wrapper a_1{5.0, .5, .5, .7, .7, .7, .7}; - column_wrapper a_2{90, 77, 78, 61, 62, 63, 41}; - - cudf::test::strings_column_wrapper a_3( - a_strings.begin(), - a_strings.end(), - thrust::make_transform_iterator(a_strings.begin(), [](auto str) { return str != nullptr; })); - - column_wrapper b_0{10, 20, 20, 50}; - column_wrapper b_1{5.0, .7, .7, .7}; - column_wrapper b_2{90, 75, 62, 41}; - - cudf::test::strings_column_wrapper b_3( - b_strings.begin(), - b_strings.end(), - thrust::make_transform_iterator(b_strings.begin(), [](auto str) { return str != nullptr; })); - - column_wrapper expect_0{10, 20}; - column_wrapper expect_1{5.0, .7}; - column_wrapper expect_2{90, 62}; - - cudf::test::strings_column_wrapper expect_3( - e_strings.begin(), - e_strings.end(), - thrust::make_transform_iterator(e_strings.begin(), [](auto str) { return str != nullptr; })); - - std::vector> column_a; - column_a.push_back(a_0.release()); - column_a.push_back(a_1.release()); - column_a.push_back(a_2.release()); - column_a.push_back(a_3.release()); - - std::vector> column_b; - column_b.push_back(b_0.release()); - column_b.push_back(b_1.release()); - column_b.push_back(b_2.release()); - column_b.push_back(b_3.release()); - - cudf::table table_a(std::move(column_a)); - cudf::table table_b(std::move(column_b)); - - auto join_table = cudf::left_semi_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3}); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(0), expect_0); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(1), expect_1); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(2), expect_2); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(3), expect_3); -} - -TEST_F(JoinTest, LeftAntiJoin) -{ - std::vector a_strings{ - "quick", "accénted", "turtlé", "composéd", "result", "", "words"}; - std::vector b_strings{"quick", "words", "result"}; - std::vector e_strings{"accénted", "turtlé", "words"}; - - column_wrapper a_0{10, 20, 20, 20, 20, 20, 50}; - column_wrapper a_1{5.0, .5, .5, .7, .7, .7, .7}; - column_wrapper a_2{90, 77, 78, 61, 62, 63, 41}; - - cudf::test::strings_column_wrapper a_3( - a_strings.begin(), - a_strings.end(), - thrust::make_transform_iterator(a_strings.begin(), [](auto str) { return str != nullptr; })); - - column_wrapper b_0{10, 20, 20}; - column_wrapper b_1{5.0, .7, .7}; - column_wrapper b_2{90, 75, 62}; - - cudf::test::strings_column_wrapper b_3( - b_strings.begin(), - b_strings.end(), - thrust::make_transform_iterator(b_strings.begin(), [](auto str) { return str != nullptr; })); - - column_wrapper expect_0{20, 20, 50}; - column_wrapper expect_1{.5, .5, .7}; - column_wrapper expect_2{77, 78, 41}; - - cudf::test::strings_column_wrapper expect_3( - e_strings.begin(), - e_strings.end(), - thrust::make_transform_iterator(e_strings.begin(), [](auto str) { return str != nullptr; })); - - std::vector> column_a; - column_a.push_back(a_0.release()); - column_a.push_back(a_1.release()); - column_a.push_back(a_2.release()); - column_a.push_back(a_3.release()); - - std::vector> column_b; - column_b.push_back(b_0.release()); - column_b.push_back(b_1.release()); - column_b.push_back(b_2.release()); - column_b.push_back(b_3.release()); - - cudf::table table_a(std::move(column_a)); - cudf::table table_b(std::move(column_b)); - - auto join_table = cudf::left_anti_join(table_a, table_b, {0, 1}, {0, 1}, {0, 1, 2, 3}); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(0), expect_0); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(1), expect_1); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(2), expect_2); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(3), expect_3); -} - -TEST_F(JoinTest, LeftAntiJoin_with_a_string_key) -{ - std::vector a_strings{ - "quick", "accénted", "turtlé", "composéd", "result", "", "words"}; - std::vector b_strings{"quick", "words", "result"}; - std::vector e_strings{"accénted", "turtlé", "composéd", "", "words"}; - - column_wrapper a_0{10, 20, 20, 20, 20, 20, 50}; - column_wrapper a_1{5.0, .5, .5, .7, .7, .7, .7}; - column_wrapper a_2{90, 77, 78, 61, 62, 63, 41}; - - cudf::test::strings_column_wrapper a_3( - a_strings.begin(), - a_strings.end(), - thrust::make_transform_iterator(a_strings.begin(), [](auto str) { return str != nullptr; })); - - column_wrapper b_0{10, 20, 20}; - column_wrapper b_1{5.0, .7, .7}; - column_wrapper b_2{90, 75, 62}; - - cudf::test::strings_column_wrapper b_3( - b_strings.begin(), - b_strings.end(), - thrust::make_transform_iterator(b_strings.begin(), [](auto str) { return str != nullptr; })); - - column_wrapper expect_0{20, 20, 20, 20, 50}; - column_wrapper expect_1{.5, .5, .7, .7, .7}; - column_wrapper expect_2{77, 78, 61, 63, 41}; - - cudf::test::strings_column_wrapper expect_3( - e_strings.begin(), - e_strings.end(), - thrust::make_transform_iterator(e_strings.begin(), [](auto str) { return str != nullptr; })); - - std::vector> column_a; - column_a.push_back(a_0.release()); - column_a.push_back(a_1.release()); - column_a.push_back(a_2.release()); - column_a.push_back(a_3.release()); - - std::vector> column_b; - column_b.push_back(b_0.release()); - column_b.push_back(b_1.release()); - column_b.push_back(b_2.release()); - column_b.push_back(b_3.release()); - - cudf::table table_a(std::move(column_a)); - cudf::table table_b(std::move(column_b)); - - auto join_table = cudf::left_anti_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3}); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(0), expect_0); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(1), expect_1); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(2), expect_2); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(3), expect_3); -} - -TEST_F(JoinTest, LeftAntiJoin_with_null) -{ - std::vector a_strings{ - "quick", "accénted", "turtlé", "composéd", "result", "", "words"}; - std::vector b_strings{"quick", "words", "result", nullptr}; - std::vector e_strings{"accénted", "turtlé", "composéd", "", "words"}; - - column_wrapper a_0{10, 20, 20, 20, 20, 20, 50}; - column_wrapper a_1{5.0, .5, .5, .7, .7, .7, .7}; - column_wrapper a_2{90, 77, 78, 61, 62, 63, 41}; - - cudf::test::strings_column_wrapper a_3( - a_strings.begin(), - a_strings.end(), - thrust::make_transform_iterator(a_strings.begin(), [](auto str) { return str != nullptr; })); - - column_wrapper b_0{10, 20, 20, 50}; - column_wrapper b_1{5.0, .7, .7, .7}; - column_wrapper b_2{90, 75, 62, 41}; - - cudf::test::strings_column_wrapper b_3( - b_strings.begin(), - b_strings.end(), - thrust::make_transform_iterator(b_strings.begin(), [](auto str) { return str != nullptr; })); - - column_wrapper expect_0{20, 20, 20, 20, 50}; - column_wrapper expect_1{.5, .5, .7, .7, .7}; - column_wrapper expect_2{77, 78, 61, 63, 41}; - - cudf::test::strings_column_wrapper expect_3( - e_strings.begin(), - e_strings.end(), - thrust::make_transform_iterator(e_strings.begin(), [](auto str) { return str != nullptr; })); - - std::vector> column_a; - column_a.push_back(a_0.release()); - column_a.push_back(a_1.release()); - column_a.push_back(a_2.release()); - column_a.push_back(a_3.release()); - - std::vector> column_b; - column_b.push_back(b_0.release()); - column_b.push_back(b_1.release()); - column_b.push_back(b_2.release()); - column_b.push_back(b_3.release()); - - cudf::table table_a(std::move(column_a)); - cudf::table table_b(std::move(column_b)); - - auto join_table = cudf::left_anti_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3}); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(0), expect_0); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(1), expect_1); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(2), expect_2); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(3), expect_3); -} - -TEST_F(JoinTest, LeftSemiAntiJoin_exceptions) -{ - std::vector b_strings{"quick", "words", "result", nullptr}; - - column_wrapper b_0{10, 20, 20, 50}; - column_wrapper b_1{5.0, .7, .7, .7}; - column_wrapper b_2{90, 75, 62, 41}; - - cudf::test::strings_column_wrapper b_3( - b_strings.begin(), - b_strings.end(), - thrust::make_transform_iterator(b_strings.begin(), [](auto str) { return str != nullptr; })); - - std::vector> column_a; - - std::vector> column_b; - column_b.push_back(b_0.release()); - column_b.push_back(b_1.release()); - column_b.push_back(b_2.release()); - column_b.push_back(b_3.release()); - - cudf::table table_a(std::move(column_a)); - cudf::table table_b(std::move(column_b)); - - // - // table_a has no columns, table_b has columns - // Let's check different permutations of passing table - // with no columns to verify that exceptions are thrown - // - EXPECT_THROW(cudf::left_semi_join(table_a, table_b, {}, {}, {}), cudf::logic_error); - - EXPECT_THROW(cudf::left_anti_join(table_a, table_b, {}, {}, {}), cudf::logic_error); - - EXPECT_THROW(cudf::left_semi_join(table_b, table_a, {}, {}, {}), cudf::logic_error); - - EXPECT_THROW(cudf::left_anti_join(table_b, table_a, {}, {}, {}), cudf::logic_error); - - // - // table_b has columns, so we'll pass the column checks, but - // these should fail the exception check that the number of - // join columns must be the same for each table - // - EXPECT_THROW(cudf::left_semi_join(table_b, table_b, {0}, {}, {}), cudf::logic_error); - - EXPECT_THROW(cudf::left_anti_join(table_b, table_b, {0}, {}, {}), cudf::logic_error); - - EXPECT_THROW(cudf::left_semi_join(table_b, table_b, {}, {0}, {}), cudf::logic_error); - - EXPECT_THROW(cudf::left_anti_join(table_b, table_b, {}, {0}, {}), cudf::logic_error); -} - -TEST_F(JoinTest, LeftSemiJoin_empty_result) -{ - std::vector a_strings{ - "quick", "accénted", "turtlé", "composéd", "result", "", "words"}; - std::vector b_strings{"quick", "words", "result", nullptr}; - std::vector e_strings{}; - - column_wrapper a_0{10, 20, 20, 20, 20, 20, 50}; - column_wrapper a_1{5.0, .5, .5, .7, .7, .7, .7}; - column_wrapper a_2{90, 77, 78, 61, 62, 63, 41}; - - cudf::test::strings_column_wrapper a_3( - a_strings.begin(), - a_strings.end(), - thrust::make_transform_iterator(a_strings.begin(), [](auto str) { return str != nullptr; })); - - column_wrapper b_0{10, 20, 20, 50}; - column_wrapper b_1{5.0, .7, .7, .7}; - column_wrapper b_2{90, 75, 62, 41}; - - cudf::test::strings_column_wrapper b_3( - b_strings.begin(), - b_strings.end(), - thrust::make_transform_iterator(b_strings.begin(), [](auto str) { return str != nullptr; })); - - column_wrapper expect_0{}; - column_wrapper expect_1{}; - column_wrapper expect_2{}; - - cudf::test::strings_column_wrapper expect_3( - e_strings.begin(), - e_strings.end(), - thrust::make_transform_iterator(e_strings.begin(), [](auto str) { return str != nullptr; })); - - std::vector> column_a; - column_a.push_back(a_0.release()); - column_a.push_back(a_1.release()); - column_a.push_back(a_2.release()); - column_a.push_back(a_3.release()); - - std::vector> column_b; - column_b.push_back(b_0.release()); - column_b.push_back(b_1.release()); - column_b.push_back(b_2.release()); - column_b.push_back(b_3.release()); - - cudf::table table_a(std::move(column_a)); - cudf::table table_b(std::move(column_b)); - - auto join_table = cudf::left_semi_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {}); - - EXPECT_EQ(join_table->num_columns(), 0); - EXPECT_EQ(join_table->num_rows(), 0); - - auto join_table2 = cudf::left_semi_join(table_a, table_b, {}, {}, {0, 1, 3}); - - EXPECT_EQ(join_table2->num_columns(), 3); - EXPECT_EQ(join_table2->num_rows(), 0); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table2->get_column(0), expect_0); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table2->get_column(1), expect_1); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(join_table2->get_column(2), expect_3); -} - -TEST_F(JoinTest, LeftAntiJoin_empty_result) -{ - std::vector a_strings{ - "quick", "accénted", "turtlé", "composéd", "result", "", "words"}; - std::vector b_strings{"quick", "words", "result", nullptr}; - std::vector e_strings{}; - - column_wrapper a_0{10, 20, 20, 20, 20, 20, 50}; - column_wrapper a_1{5.0, .5, .5, .7, .7, .7, .7}; - column_wrapper a_2{90, 77, 78, 61, 62, 63, 41}; - - cudf::test::strings_column_wrapper a_3( - a_strings.begin(), - a_strings.end(), - thrust::make_transform_iterator(a_strings.begin(), [](auto str) { return str != nullptr; })); - - column_wrapper b_0{10, 20, 20, 50}; - column_wrapper b_1{5.0, .7, .7, .7}; - column_wrapper b_2{90, 75, 62, 41}; - - cudf::test::strings_column_wrapper b_3( - b_strings.begin(), - b_strings.end(), - thrust::make_transform_iterator(b_strings.begin(), [](auto str) { return str != nullptr; })); - - column_wrapper expect_0{}; - column_wrapper expect_1{}; - column_wrapper expect_2{}; - - cudf::test::strings_column_wrapper expect_3( - e_strings.begin(), - e_strings.end(), - thrust::make_transform_iterator(e_strings.begin(), [](auto str) { return str != nullptr; })); - - std::vector> column_a; - column_a.push_back(a_0.release()); - column_a.push_back(a_1.release()); - column_a.push_back(a_2.release()); - column_a.push_back(a_3.release()); - - std::vector> column_b; - column_b.push_back(b_0.release()); - column_b.push_back(b_1.release()); - column_b.push_back(b_2.release()); - column_b.push_back(b_3.release()); - - cudf::table table_a(std::move(column_a)); - cudf::table table_b(std::move(column_b)); - - auto join_table = cudf::left_anti_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {}); - - EXPECT_EQ(join_table->num_columns(), 0); - EXPECT_EQ(join_table->num_rows(), 0); - - auto join_table2 = cudf::left_anti_join(table_a, table_b, {}, {}, {0, 1, 3}); - - EXPECT_EQ(join_table2->num_columns(), 3); - EXPECT_EQ(join_table2->num_rows(), 0); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table2->get_column(0), expect_0); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table2->get_column(1), expect_1); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(join_table2->get_column(2), expect_3); -} - -TEST_F(JoinTest, LeftSemiAntiJoin_empty_table) -{ - std::vector a_strings{}; - std::vector b_strings{"quick", "words", "result", nullptr}; - std::vector e_strings{}; - - column_wrapper a_0{}; - column_wrapper a_1{}; - column_wrapper a_2{}; - - cudf::test::strings_column_wrapper a_3( - a_strings.begin(), - a_strings.end(), - thrust::make_transform_iterator(a_strings.begin(), [](auto str) { return str != nullptr; })); - - column_wrapper b_0{10, 20, 20, 50}; - column_wrapper b_1{5.0, .7, .7, .7}; - column_wrapper b_2{90, 75, 62, 41}; - - cudf::test::strings_column_wrapper b_3( - b_strings.begin(), - b_strings.end(), - thrust::make_transform_iterator(b_strings.begin(), [](auto str) { return str != nullptr; })); - - column_wrapper expect_0{}; - column_wrapper expect_1{}; - column_wrapper expect_2{}; - - cudf::test::strings_column_wrapper expect_3( - e_strings.begin(), - e_strings.end(), - thrust::make_transform_iterator(e_strings.begin(), [](auto str) { return str != nullptr; })); - - std::vector> column_a; - column_a.push_back(a_0.release()); - column_a.push_back(a_1.release()); - column_a.push_back(a_2.release()); - column_a.push_back(a_3.release()); - - std::vector> column_b; - column_b.push_back(b_0.release()); - column_b.push_back(b_1.release()); - column_b.push_back(b_2.release()); - column_b.push_back(b_3.release()); - - cudf::table table_a(std::move(column_a)); - cudf::table table_b(std::move(column_b)); - - auto join_table = cudf::left_semi_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3}); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(0), expect_0); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(1), expect_1); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(2), expect_2); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(join_table->get_column(3), expect_3); - - auto join_table2 = cudf::left_semi_join(table_b, table_a, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3}); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table2->get_column(0), expect_0); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table2->get_column(1), expect_1); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table2->get_column(2), expect_2); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(join_table2->get_column(3), expect_3); - - auto join_table3 = cudf::left_anti_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3}); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table3->get_column(0), expect_0); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table3->get_column(1), expect_1); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table3->get_column(2), expect_2); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(join_table3->get_column(3), expect_3); - - auto join_table4 = cudf::left_anti_join(table_a, table_a, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3}); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table4->get_column(0), expect_0); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table4->get_column(1), expect_1); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table4->get_column(2), expect_2); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(join_table4->get_column(3), expect_3); - - auto join_table5 = cudf::left_anti_join(table_a, table_a, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3}); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table5->get_column(0), expect_0); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table5->get_column(1), expect_1); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table5->get_column(2), expect_2); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(join_table5->get_column(3), expect_3); -} - -TEST_F(JoinTest, LeftAntiJoin_empty_right_table) -{ - std::vector a_strings{"quick", "words", "result", nullptr}; - std::vector b_strings{}; - std::vector e_strings{"quick", "words", "result", nullptr}; - - column_wrapper a_0{10, 20, 20, 50}; - column_wrapper a_1{5.0, .7, .7, .7}; - column_wrapper a_2{90, 75, 62, 41}; - - cudf::test::strings_column_wrapper a_3( - a_strings.begin(), - a_strings.end(), - thrust::make_transform_iterator(a_strings.begin(), [](auto str) { return str != nullptr; })); - - column_wrapper b_0{}; - column_wrapper b_1{}; - column_wrapper b_2{}; - - cudf::test::strings_column_wrapper b_3( - b_strings.begin(), - b_strings.end(), - thrust::make_transform_iterator(b_strings.begin(), [](auto str) { return str != nullptr; })); - - column_wrapper expect_0{10, 20, 20, 50}; - column_wrapper expect_1{5.0, .7, .7, .7}; - column_wrapper expect_2{90, 75, 62, 41}; - - cudf::test::strings_column_wrapper expect_3( - e_strings.begin(), - e_strings.end(), - thrust::make_transform_iterator(e_strings.begin(), [](auto str) { return str != nullptr; })); - - std::vector> column_a; - column_a.push_back(a_0.release()); - column_a.push_back(a_1.release()); - column_a.push_back(a_2.release()); - column_a.push_back(a_3.release()); - - std::vector> column_b; - column_b.push_back(b_0.release()); - column_b.push_back(b_1.release()); - column_b.push_back(b_2.release()); - column_b.push_back(b_3.release()); - - cudf::table table_a(std::move(column_a)); - cudf::table table_b(std::move(column_b)); - - auto join_table = cudf::left_anti_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3}); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(0), expect_0); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(1), expect_1); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(2), expect_2); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(3), expect_3); -} - -struct JoinDictionaryTest : public cudf::test::BaseFixture { -}; - -TEST_F(JoinDictionaryTest, LeftSemiJoin) -{ - column_wrapper a_0{10, 20, 20, 20, 20, 20, 50}; - column_wrapper a_1{5.0, .5, .5, .7, .7, .7, .7}; - column_wrapper a_2{90, 77, 78, 61, 62, 63, 41}; - cudf::test::strings_column_wrapper a_3_w( - {"quick", "accénted", "turtlé", "composéd", "result", "", "words"}); - auto a_3 = cudf::dictionary::encode(a_3_w); - - column_wrapper b_0{10, 20, 20}; - column_wrapper b_1{5.0, .7, .7}; - column_wrapper b_2{90, 75, 62}; - cudf::test::strings_column_wrapper b_3_w({"quick", "words", "result"}); - auto b_3 = cudf::dictionary::encode(b_3_w); - - auto table_a = cudf::table_view({a_0, a_1, a_2, a_3->view()}); - auto table_b = cudf::table_view({b_0, b_1, b_2, b_3->view()}); - auto expect_a = cudf::table_view({a_0, a_1, a_2, a_3_w}); - auto expect_b = cudf::table_view({b_0, b_1, b_2, b_3_w}); - { - auto result = cudf::left_semi_join(table_a, table_b, {0, 1}, {0, 1}, {0, 1, 2, 3}); - auto result_view = result->view(); - auto decoded3 = cudf::dictionary::decode(result_view.column(3)); - std::vector result_decoded( - {result_view.column(0), result_view.column(1), result_view.column(2), decoded3->view()}); - - auto expected = cudf::left_semi_join(expect_a, expect_b, {0, 1}, {0, 1}, {0, 1, 2, 3}); - CUDF_TEST_EXPECT_TABLES_EQUAL(cudf::table_view(result_decoded), *expected); - } - { - auto result = cudf::left_semi_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3}); - auto result_view = result->view(); - auto decoded3 = cudf::dictionary::decode(result_view.column(3)); - std::vector result_decoded( - {result_view.column(0), result_view.column(1), result_view.column(2), decoded3->view()}); - - auto expected = cudf::left_semi_join(expect_a, expect_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3}); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view(result_decoded), *expected); - } -} - -TEST_F(JoinDictionaryTest, LeftSemiJoinWithNulls) -{ - column_wrapper a_0{10, 20, 20, 20, 20, 20, 50}; - column_wrapper a_1{5.0, .5, .5, .7, .7, .7, .7}; - column_wrapper a_2{90, 77, 78, 61, 62, 63, 41}; - cudf::test::strings_column_wrapper a_3_w( - {"quick", "accénted", "turtlé", "composéd", "result", "", "words"}); - auto a_3 = cudf::dictionary::encode(a_3_w); - - column_wrapper b_0{10, 20, 20, 50}; - column_wrapper b_1{5.0, .7, .7, .7}; - column_wrapper b_2{90, 75, 62, 41}; - cudf::test::strings_column_wrapper b_3_w({"quick", "words", "result", ""}, {1, 1, 1, 0}); - auto b_3 = cudf::dictionary::encode(b_3_w); - - auto table_a = cudf::table_view({a_0, a_1, a_2, a_3->view()}); - auto table_b = cudf::table_view({b_0, b_1, b_2, b_3->view()}); - - auto result = cudf::left_semi_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3}); - auto result_view = result->view(); - auto decoded3 = cudf::dictionary::decode(result_view.column(3)); - std::vector result_decoded( - {result_view.column(0), result_view.column(1), result_view.column(2), decoded3->view()}); - - auto expect_a = cudf::table_view({a_0, a_1, a_2, a_3_w}); - auto expect_b = cudf::table_view({b_0, b_1, b_2, b_3_w}); - auto expected = cudf::left_semi_join(expect_a, expect_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3}); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view(result_decoded), *expected); -} - -TEST_F(JoinDictionaryTest, LeftAntiJoin) -{ - column_wrapper a_0{10, 20, 20, 20, 20, 20, 50}; - column_wrapper a_1{5.0, .5, .5, .7, .7, .7, .7}; - column_wrapper a_2{90, 77, 78, 61, 62, 63, 41}; - cudf::test::strings_column_wrapper a_3_w( - {"quick", "accénted", "turtlé", "composéd", "result", "", "words"}); - auto a_3 = cudf::dictionary::encode(a_3_w); - - column_wrapper b_0{10, 20, 20}; - column_wrapper b_1{5.0, .7, .7}; - column_wrapper b_2{90, 75, 62}; - cudf::test::strings_column_wrapper b_3_w({"quick", "words", "result"}); - auto b_3 = cudf::dictionary::encode(b_3_w); - - auto table_a = cudf::table_view({a_0, a_1, a_2, a_3->view()}); - auto table_b = cudf::table_view({b_0, b_1, b_2, b_3->view()}); - auto expect_a = cudf::table_view({a_0, a_1, a_2, a_3_w}); - auto expect_b = cudf::table_view({b_0, b_1, b_2, b_3_w}); - { - auto result = cudf::left_anti_join(table_a, table_b, {0, 1}, {0, 1}, {0, 1, 2, 3}); - auto result_view = result->view(); - auto decoded3 = cudf::dictionary::decode(result_view.column(3)); - std::vector result_decoded( - {result_view.column(0), result_view.column(1), result_view.column(2), decoded3->view()}); - - auto expected = cudf::left_anti_join(expect_a, expect_b, {0, 1}, {0, 1}, {0, 1, 2, 3}); - CUDF_TEST_EXPECT_TABLES_EQUAL(cudf::table_view(result_decoded), *expected); - } - { - auto result = cudf::left_anti_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3}); - auto result_view = result->view(); - auto decoded3 = cudf::dictionary::decode(result_view.column(3)); - std::vector result_decoded( - {result_view.column(0), result_view.column(1), result_view.column(2), decoded3->view()}); - - auto expected = cudf::left_anti_join(expect_a, expect_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3}); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view(result_decoded), *expected); - } -} - -TEST_F(JoinDictionaryTest, LeftAntiJoinWithNulls) -{ - column_wrapper a_0{10, 20, 20, 20, 20, 20, 50}; - column_wrapper a_1{5.0, .5, .5, .7, .7, .7, .7}; - column_wrapper a_2{90, 77, 78, 61, 62, 63, 41}; - cudf::test::strings_column_wrapper a_3_w( - {"quick", "accénted", "turtlé", "composéd", "result", "", "words"}); - auto a_3 = cudf::dictionary::encode(a_3_w); - - column_wrapper b_0{10, 20, 20, 50}; - column_wrapper b_1{5.0, .7, .7, .7}; - column_wrapper b_2{90, 75, 62, 41}; - cudf::test::strings_column_wrapper b_3_w({"quick", "words", "result", ""}, {1, 1, 1, 0}); - auto b_3 = cudf::dictionary::encode(b_3_w); - - auto table_a = cudf::table_view({a_0, a_1, a_2, a_3->view()}); - auto table_b = cudf::table_view({b_0, b_1, b_2, b_3->view()}); - - auto result = cudf::left_anti_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3}); - auto result_view = result->view(); - auto decoded3 = cudf::dictionary::decode(result_view.column(3)); - std::vector result_decoded( - {result_view.column(0), result_view.column(1), result_view.column(2), decoded3->view()}); - - auto expect_a = cudf::table_view({a_0, a_1, a_2, a_3_w}); - auto expect_b = cudf::table_view({b_0, b_1, b_2, b_3_w}); - auto expected = cudf::left_anti_join(expect_a, expect_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3}); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view(result_decoded), *expected); -} diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index e5501428624..4c72ba2e055 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -134,11 +134,16 @@ def copy_range(Column input_column, input_begin, input_end, target_begin) -def gather(Table source_table, Column gather_map, bool keep_index=True): +def gather( + Table source_table, + Column gather_map, + bool keep_index=True, + bool nullify=False +): if not pd.api.types.is_integer_dtype(gather_map.dtype): raise ValueError("Gather map is not integer dtype.") - if len(gather_map) > 0: + if len(gather_map) > 0 and not nullify: gm_min, gm_max = minmax(gather_map) if gm_min < -len(source_table) or gm_max >= len(source_table): raise IndexError(f"Gather map index with min {gm_min}," @@ -154,7 +159,8 @@ def gather(Table source_table, Column gather_map, bool keep_index=True): source_table_view = source_table.data_view() cdef column_view gather_map_view = gather_map.view() cdef cpp_copying.out_of_bounds_policy policy = ( - cpp_copying.out_of_bounds_policy.DONT_CHECK + cpp_copying.out_of_bounds_policy.NULLIFY if nullify + else cpp_copying.out_of_bounds_policy.DONT_CHECK ) with nogil: diff --git a/python/cudf/cudf/_lib/cpp/join.pxd b/python/cudf/cudf/_lib/cpp/join.pxd index 10edf370f5d..c221fea926d 100644 --- a/python/cudf/cudf/_lib/cpp/join.pxd +++ b/python/cudf/cudf/_lib/cpp/join.pxd @@ -4,44 +4,40 @@ from libcpp.memory cimport unique_ptr from libcpp.vector cimport vector from libcpp.pair cimport pair from libcpp cimport bool +from libcpp.pair cimport pair +from libcpp.memory cimport unique_ptr +from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view +from cudf._lib.cpp.types cimport size_type +from rmm._lib.device_uvector cimport device_uvector +ctypedef unique_ptr[device_uvector[size_type]] gather_map_type + cdef extern from "cudf/join.hpp" namespace "cudf" nogil: - cdef unique_ptr[table] inner_join( - const table_view left, - const table_view right, - const vector[int] left_on, - const vector[int] right_on, - const vector[pair[int, int]] columns_in_common + cdef pair[gather_map_type, gather_map_type] inner_join( + const table_view left_keys, + const table_view right_keys, ) except + - cdef unique_ptr[table] left_join( - const table_view left, - const table_view right, - const vector[int] left_on, - const vector[int] right_on, - const vector[pair[int, int]] columns_in_common + + cdef pair[gather_map_type, gather_map_type] left_join( + const table_view left_keys, + const table_view right_keys, ) except + - cdef unique_ptr[table] full_join( - const table_view left, - const table_view right, - const vector[int] left_on, - const vector[int] right_on, - const vector[pair[int, int]] columns_in_common + + cdef pair[gather_map_type, gather_map_type] full_join( + const table_view left_keys, + const table_view right_keys, ) except + - cdef unique_ptr[table] left_semi_join( - const table_view left, - const table_view right, - const vector[int] left_on, - const vector[int] right_on, - const vector[int] return_columns + + cdef gather_map_type left_semi_join( + const table_view left_keys, + const table_view right_keys, ) except + - cdef unique_ptr[table] left_anti_join( - const table_view left, - const table_view right, - const vector[int] left_on, - const vector[int] right_on, - const vector[int] return_columns + + cdef gather_map_type left_anti_join( + const table_view left_keys, + const table_view right_keys, ) except + diff --git a/python/cudf/cudf/_lib/cpp/table/table_view.pxd b/python/cudf/cudf/_lib/cpp/table/table_view.pxd index 2f386d337cd..7bbfa69836c 100644 --- a/python/cudf/cudf/_lib/cpp/table/table_view.pxd +++ b/python/cudf/cudf/_lib/cpp/table/table_view.pxd @@ -15,6 +15,7 @@ cdef extern from "cudf/table/table_view.hpp" namespace "cudf" nogil: column_view column(size_type column_index) except + size_type num_columns() except + size_type num_rows() except + + table_view select(vector[size_type] column_indices) except + cdef cppclass mutable_table_view: mutable_table_view() except + diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx index 38f13b9f994..69b8004cede 100644 --- a/python/cudf/cudf/_lib/join.pyx +++ b/python/cudf/cudf/_lib/join.pyx @@ -1,222 +1,88 @@ # Copyright (c) 2020, NVIDIA CORPORATION. +import cudf + from collections import OrderedDict from itertools import chain -from libcpp.memory cimport unique_ptr +from libcpp.memory cimport unique_ptr, make_unique from libcpp.utility cimport move from libcpp.vector cimport vector from libcpp.pair cimport pair from libcpp cimport bool +from cudf._lib.column cimport Column from cudf._lib.table cimport Table, columns_from_ptr +from cudf._lib.cpp.column.column cimport column +from cudf._lib.cpp.types cimport size_type, data_type, type_id from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view cimport cudf._lib.cpp.join as cpp_join -cpdef join(Table lhs, - Table rhs, - object how, - object method, - object left_on=None, - object right_on=None, - bool left_index=False, - bool right_index=False - ): - """ - Call libcudf++ join for full outer, inner and left joins. - """ - - cdef Table c_lhs = lhs - cdef Table c_rhs = rhs - - # Views might or might not include index - cdef table_view lhs_view - cdef table_view rhs_view - - # Will hold the join column indices into L and R tables - cdef vector[int] left_on_ind - cdef vector[int] right_on_ind - - # If left/right index, will pass a full view - # must offset the data column indices by # of index columns - num_inds_left = len(left_on) + (lhs._num_indices * left_index) - num_inds_right = len(right_on) + (rhs._num_indices * right_index) - left_on_ind.reserve(num_inds_left) - right_on_ind.reserve(num_inds_right) - - # Only used for semi or anti joins - # The result columns are only the left hand columns - cdef vector[int] all_left_inds = range( - lhs._num_columns + (lhs._num_indices * left_index) - ) - cdef vector[int] all_right_inds = range( - rhs._num_columns + (rhs._num_indices * right_index) - ) - result_col_names = compute_result_col_names(lhs, rhs, how) - - columns_in_common = OrderedDict() - cdef vector[pair[int, int]] c_columns_in_common - - # keep track of where the desired index column will end up - result_index_pos = None - if left_index or right_index: - # If either true, we need to process both indices as columns - lhs_view = c_lhs.view() - rhs_view = c_rhs.view() - - left_join_cols = list(lhs._index_names) + list(lhs._data.keys()) - right_join_cols = list(rhs._index_names) + list(rhs._data.keys()) - if left_index and right_index: - # Index columns will be common, on the left, dropped from right - # Index name is from the left - # Both views, must take index column indices - left_on_indices = right_on_indices = range(lhs._num_indices) - result_idx_positions = range(lhs._num_indices) - result_index_names = lhs._index_names - - elif left_index: - # Joins left index columns with right 'on' columns - left_on_indices = range(lhs._num_indices) - right_on_indices = [ - right_join_cols.index(on_col) for on_col in right_on - ] - - # The left index columns 'become' the new RHS columns - # and the right index 'survives' - result_idx_positions = range( - len(left_join_cols), len(left_join_cols) + lhs._num_indices - ) - result_index_names = rhs._index_names - - # but since the common columns are gathered from the left - # the rhs 'on' cols are returned on the left of the result - # rearrange the names so account for this - common = [None] * rhs._num_indices - for i in range(rhs._num_indices): - common[i] = result_col_names.pop( - result_col_names.index(right_on[i]) - ) - result_col_names = common + result_col_names - elif right_index: - # Joins right index columns with left 'on' columns - right_on_indices = range(rhs._num_indices) - left_on_indices = [ - left_join_cols.index(on_col) for on_col in left_on - ] - - # The right index columns 'become' the new LHS columns - # and the left index survives - # since they are already gathered from the left, - # no rearranging has to be done - result_idx_positions = range(lhs._num_indices) - result_index_names = lhs._index_names - for i_l, i_r in zip(left_on_indices, right_on_indices): - left_on_ind.push_back(i_l) - right_on_ind.push_back(i_r) - columns_in_common[(i_l, i_r)] = None - else: - # cuDF's Python layer will create a new RangeIndex for this case - lhs_view = c_lhs.data_view() - rhs_view = c_rhs.data_view() - - left_join_cols = list(lhs._data.keys()) - right_join_cols = list(rhs._data.keys()) - - # If both left/right_index, joining on indices plus additional cols - # If neither, joining on just cols, not indices - # In both cases, must match up additional column indices in lhs/rhs - if left_index == right_index: - for name in left_on: - left_on_ind.push_back(left_join_cols.index(name)) - if name in right_on: - if (left_on.index(name) == right_on.index(name)): - columns_in_common[( - left_join_cols.index(name), - right_join_cols.index(name) - )] = None - for name in right_on: - right_on_ind.push_back(right_join_cols.index(name)) - c_columns_in_common = list(columns_in_common.keys()) - cdef unique_ptr[table] c_result - if how == 'inner': - with nogil: - c_result = move(cpp_join.inner_join( - lhs_view, - rhs_view, - left_on_ind, - right_on_ind, - c_columns_in_common - )) - elif how == 'left': - with nogil: - c_result = move(cpp_join.left_join( - lhs_view, - rhs_view, - left_on_ind, - right_on_ind, - c_columns_in_common - )) - elif how == 'outer': - with nogil: - c_result = move(cpp_join.full_join( - lhs_view, - rhs_view, - left_on_ind, - right_on_ind, - c_columns_in_common - )) - elif how == 'leftsemi': - with nogil: - c_result = move(cpp_join.left_semi_join( - lhs_view, - rhs_view, - left_on_ind, - right_on_ind, - all_left_inds - )) - elif how == 'leftanti': - with nogil: - c_result = move(cpp_join.left_anti_join( - lhs_view, - rhs_view, - left_on_ind, - right_on_ind, - all_left_inds - )) - - all_cols_py = columns_from_ptr(move(c_result)) - if left_index or right_index: - ind_cols = OrderedDict() - for name, pos in zip( - result_index_names[::-1], result_idx_positions[::-1] - ): - ind_cols[name] = all_cols_py.pop(pos) - index = OrderedDict() - for k, v in reversed(ind_cols.items()): - index[k] = v - index = Table(index) +# The functions below return the *gathermaps* that represent +# the join result when joining on the keys `lhs` and `rhs`. + +cpdef join(Table lhs, Table rhs, how=None): + cdef pair[cpp_join.gather_map_type, cpp_join.gather_map_type] c_result + cdef table_view c_lhs = lhs.view() + cdef table_view c_rhs = rhs.view() + + if how == "inner": + c_result = move(cpp_join.inner_join( + c_lhs, + c_rhs + )) + elif how == "left": + c_result = move(cpp_join.left_join( + c_lhs, + c_rhs + )) + elif how == "outer": + c_result = move(cpp_join.full_join( + c_lhs, + c_rhs + )) else: - index = None - data_ordered_dict = OrderedDict(zip(result_col_names, all_cols_py)) - return Table(data=data_ordered_dict, index=index) - - -def compute_result_col_names(lhs, rhs, how): - """ - Determine the names of the data columns in the result of - a libcudf join, based on the original left and right frames - as well as the type of join that was performed. - """ - if how in {"left", "inner", "outer", "leftsemi", "leftanti"}: - a = lhs._data.keys() - if how not in {"leftsemi", "leftanti"}: - return list(chain(a, (k for k in rhs._data.keys() - if k not in lhs._data.keys()))) - return list(a) + raise ValueError(f"Invalid join type {how}") + + cdef Column left_rows = _gather_map_as_column(move(c_result.first)) + cdef Column right_rows = _gather_map_as_column(move(c_result.second)) + return left_rows, right_rows + + +cpdef semi_join(Table lhs, Table rhs, how=None): + # left-semi and left-anti joins + cdef cpp_join.gather_map_type c_result + cdef table_view c_lhs = lhs.view() + cdef table_view c_rhs = rhs.view() + + if how == "leftsemi": + c_result = move(cpp_join.left_semi_join( + c_lhs, + c_rhs + )) + elif how == "leftanti": + c_result = move(cpp_join.left_anti_join( + c_lhs, + c_rhs + )) else: - raise NotImplementedError( - f"{how} merge not supported yet" - ) + raise ValueError(f"Invalid join type {how}") + + cdef Column left_rows = _gather_map_as_column(move(c_result)) + return ( + left_rows, + None + ) + + +cdef Column _gather_map_as_column(cpp_join.gather_map_type gather_map): + # helple to convert a gather map to a Column + cdef size_type size = gather_map.get()[0].size() + cdef unique_ptr[column] c_col = make_unique[column]( + data_type(type_id.INT32), + size, + gather_map.get()[0].release()) + return Column.from_unique_ptr(move(c_col)) diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index 39c278d2abf..bb1bf3c5d5c 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -750,6 +750,9 @@ def _set_categories( ordered=ordered, ) + def _decategorize(self) -> ColumnBase: + return self._column._get_decategorized_column() + class CategoricalColumn(column.ColumnBase): """Implements operations for Columns of Categorical type diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index dd06d97d105..e59b395ec0f 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -827,7 +827,12 @@ def quantile( def median(self, skipna: bool = None) -> ScalarLike: raise TypeError(f"cannot perform median with type {self.dtype}") - def take(self: T, indices: ColumnBase, keep_index: bool = True) -> T: + def take( + self: T, + indices: ColumnBase, + keep_index: bool = True, + nullify: bool = False, + ) -> T: """Return Column by taking values from the corresponding *indices*. """ # Handle zero size @@ -836,7 +841,7 @@ def take(self: T, indices: ColumnBase, keep_index: bool = True) -> T: try: return ( self.as_frame() - ._gather(indices, keep_index=keep_index) + ._gather(indices, keep_index=keep_index, nullify=nullify) ._as_column() ) except RuntimeError as e: @@ -1004,7 +1009,9 @@ def sort_by_values( ascending: bool = True, na_position: builtins.str = "last", ) -> Tuple[ColumnBase, "cudf.core.column.NumericalColumn"]: - col_inds = self.as_frame()._get_sorted_inds(ascending, na_position) + col_inds = self.as_frame()._get_sorted_inds( + ascending=ascending, na_position=na_position + ) col_keys = self.take(col_inds) return col_keys, col_inds @@ -1016,6 +1023,9 @@ def distinct_count( raise NotImplementedError(msg) return cpp_distinct_count(self, ignore_nulls=dropna) + def can_cast_safely(self, to_dtype: Dtype) -> bool: + raise NotImplementedError() + def astype(self, dtype: Dtype, **kwargs) -> ColumnBase: if is_numerical_dtype(dtype): return self.as_numerical_column(dtype) diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 7ad6eed65a8..da77517c75d 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -362,7 +362,9 @@ def _numeric_quantile( ) -> NumericalColumn: quant = [float(q)] if not isinstance(q, (Sequence, np.ndarray)) else q # get sorted indices and exclude nulls - sorted_indices = self.as_frame()._get_sorted_inds(True, "first") + sorted_indices = self.as_frame()._get_sorted_inds( + ascending=True, na_position="first" + ) sorted_indices = sorted_indices[self.null_count :] return cpp_quantile(self, quant, interpolation, sorted_indices, exact) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index b5f57356698..01b96151485 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -4497,12 +4497,9 @@ def merge( else: lsuffix, rsuffix = suffixes - lhs = self.copy(deep=False) - rhs = right.copy(deep=False) - # Compute merge - gdf_result = super(DataFrame, lhs)._merge( - rhs, + gdf_result = super()._merge( + right, on=on, left_on=left_on, right_on=right_on, @@ -4510,8 +4507,6 @@ def merge( right_index=right_index, how=how, sort=sort, - lsuffix=lsuffix, - rsuffix=rsuffix, method=method, indicator=indicator, suffixes=suffixes, diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index ecff3dee573..fb746d6c794 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -20,6 +20,7 @@ from cudf import _lib as libcudf from cudf._typing import ColumnLike, DataFrameOrSeries from cudf.core.column import as_column, build_categorical_column, column_empty +from cudf.core.join import merge from cudf.utils.dtypes import ( is_categorical_dtype, is_column_like, @@ -595,7 +596,7 @@ def _explode(self, explode_column: Any, ignore_index: bool): res.index.names = self._index.names return res - def _get_columns_by_label(self, labels, downcast): + def _get_columns_by_label(self, labels, downcast=False): """ Returns columns of the Frame specified by `labels` @@ -612,15 +613,18 @@ def _get_columns_by_index(self, indices): data, columns=data.to_pandas_index(), index=self.index ) - def _gather(self, gather_map, keep_index=True): + def _gather(self, gather_map, keep_index=True, nullify=False): if not pd.api.types.is_integer_dtype(gather_map.dtype): gather_map = gather_map.astype("int32") result = self.__class__._from_table( libcudf.copying.gather( - self, as_column(gather_map), keep_index=keep_index + self, + as_column(gather_map), + keep_index=keep_index, + nullify=nullify, ) ) - result._copy_type_metadata(self) + result._copy_type_metadata(self, include_index=keep_index) if keep_index and self._index is not None: result._index.names = self._index.names return result @@ -2754,12 +2758,15 @@ def searchsorted( else: return result - def _get_sorted_inds(self, ascending=True, na_position="last"): + def _get_sorted_inds(self, by=None, ascending=True, na_position="last"): """ Sort by the values. Parameters ---------- + by: list, optional + Labels specifying columns to sort by. By default, + sort by all columns of `self` ascending : bool or list of bool, default True If True, sort values in ascending order, otherwise descending. na_position : {‘first’ or ‘last’}, default ‘last’ @@ -2794,11 +2801,17 @@ def _get_sorted_inds(self, ascending=True, na_position="last"): ) na_position = 0 + to_sort = ( + self + if by is None + else self._get_columns_by_label(by, downcast=False) + ) + # If given a scalar need to construct a sequence of length # of columns if np.isscalar(ascending): - ascending = [ascending] * self._num_columns + ascending = [ascending] * to_sort._num_columns - return libcudf.sort.order_by(self, ascending, na_position) + return libcudf.sort.order_by(to_sort, ascending, na_position) def sin(self): """ @@ -3329,77 +3342,6 @@ def sqrt(self): """ return self._unaryop("sqrt") - @staticmethod - def _validate_merge_cfg( - lhs, - rhs, - left_on, - right_on, - on, - how, - left_index=False, - right_index=False, - lsuffix=None, - rsuffix=None, - ): - """ - Error for various combinations of merge input parameters - """ - len_left_on = len(left_on) if left_on is not None else 0 - len_right_on = len(right_on) if right_on is not None else 0 - - # must actually support the requested merge type - if how not in ["left", "inner", "outer", "leftanti", "leftsemi"]: - raise NotImplementedError(f"{how} merge not supported yet") - - # Passing 'on' with 'left_on' or 'right_on' is potentially ambiguous - if on: - if left_on or right_on: - raise ValueError( - 'Can only pass argument "on" OR "left_on" ' - 'and "right_on", not a combination of both.' - ) - - # Require same total number of columns to join on in both operands - if not (len_left_on + left_index * len(lhs.index.names)) == ( - len_right_on + right_index * len(rhs.index.names) - ): - raise ValueError( - "Merge operands must have same number of join key columns" - ) - - # If nothing specified, must have common cols to use implicitly - same_named_columns = set(lhs._data.keys()) & set(rhs._data.keys()) - if not (left_index or right_index): - if not (left_on or right_on): - if len(same_named_columns) == 0: - raise ValueError("No common columns to perform merge on") - - for name in same_named_columns: - if not ( - name in left_on - and name in right_on - and (left_on.index(name) == right_on.index(name)) - ): - if not (lsuffix or rsuffix): - raise ValueError( - "there are overlapping columns but " - "lsuffix and rsuffix are not defined" - ) - - if on: - on_keys = [on] if not isinstance(on, list) else on - for key in on_keys: - if not (key in lhs._data.keys() and key in rhs._data.keys()): - raise KeyError(f"Key {on} not in both operands") - else: - for key in left_on: - if key not in lhs._data.keys(): - raise KeyError(f'Key "{key}" not in left operand') - for key in right_on: - if key not in rhs._data.keys(): - raise KeyError(f'Key "{key}" not in right operand') - def _merge( self, right, @@ -3410,84 +3352,33 @@ def _merge( right_index=False, how="inner", sort=False, - lsuffix=None, - rsuffix=None, method="hash", indicator=False, suffixes=("_x", "_y"), ): - # Merge doesn't support right, so just swap + lhs, rhs = self, right if how == "right": - return right._merge( - self, - on=on, - left_on=right_on, - right_on=left_on, - left_index=right_index, - right_index=left_index, - how="left", - sort=sort, - lsuffix=rsuffix, - rsuffix=lsuffix, - method=method, - indicator=indicator, - suffixes=suffixes, - ) - - lhs = self - rhs = right - - from cudf.core.join import Merge - - mergeop = Merge( + # Merge doesn't support right, so just swap + how = "left" + lhs, rhs = right, self + left_on, right_on = right_on, left_on + left_index, right_index = right_index, left_index + suffixes = (suffixes[1], suffixes[0]) + + return merge( lhs, rhs, - on, - left_on, - right_on, - left_index, - right_index, - how, - sort, - lsuffix, - rsuffix, - method, - indicator, - suffixes, + on=on, + left_on=left_on, + right_on=right_on, + left_index=left_index, + right_index=right_index, + how=how, + sort=sort, + method=method, + indicator=indicator, + suffixes=suffixes, ) - to_return = mergeop.perform_merge() - - # If sort=True, Pandas would sort on the key columns in the - # same order as given in 'on'. If the indices are used as - # keys, the index will be sorted. If one index is specified, - # the key column on the other side will be used to sort. - # If no index is specified, return a new RangeIndex - if sort: - to_sort = cudf.DataFrame() - if left_index and right_index: - by = list(to_return._index._data.columns) - if left_on and right_on: - by.extend(to_return[mergeop.left_on]._data.columns) - elif left_index: - by = list(to_return[mergeop.right_on]._data.columns) - elif right_index: - by = list(to_return[mergeop.left_on]._data.columns) - else: - # left_on == right_on, or different names but same columns - # in both cases we can sort by either - by = [to_return._data[name] for name in mergeop.left_on] - for i, col in enumerate(by): - to_sort[i] = col - inds = to_sort.argsort() - if isinstance(to_return, cudf.Index): - to_return = to_return.take(inds) - else: - to_return = to_return.take( - inds, keep_index=(left_index or right_index) - ) - return to_return - else: - return to_return def _is_sorted(self, ascending=None, null_position=None): """ diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 2a5d2647e95..5104629eee0 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -13,6 +13,7 @@ from pandas._config import get_option import cudf +from cudf._typing import DtypeObj from cudf.core.abc import Serializable from cudf.core.column import ( CategoricalColumn, @@ -66,6 +67,9 @@ def _to_frame(this_index, index=True, name=None): class Index(Frame, Serializable): + + dtype: DtypeObj + def __new__( cls, data=None, @@ -1544,6 +1548,10 @@ def _from_table(cls, table): else: return as_index(table) + @classmethod + def _from_data(cls, data, index=None): + return cls._from_table(Frame(data=data)) + _accessors = set() # type: Set[Any] @property diff --git a/python/cudf/cudf/core/join/__init__.py b/python/cudf/cudf/core/join/__init__.py index 6d126c8af4d..0463b8f9df1 100644 --- a/python/cudf/cudf/core/join/__init__.py +++ b/python/cudf/cudf/core/join/__init__.py @@ -1,3 +1,3 @@ # Copyright (c) 2020, NVIDIA CORPORATION. -from cudf.core.join.join import Merge +from cudf.core.join.join import merge diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py new file mode 100644 index 00000000000..3807f408369 --- /dev/null +++ b/python/cudf/cudf/core/join/_join_helpers.py @@ -0,0 +1,203 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +from __future__ import annotations + +import collections +import warnings +from typing import TYPE_CHECKING, Any, Iterable, Tuple + +import numpy as np +import pandas as pd + +import cudf +from cudf.core.dtypes import CategoricalDtype + +if TYPE_CHECKING: + from cudf.core.column import CategoricalColumn, ColumnBase + from cudf.core.frame import Frame + + +class _Indexer: + # Indexer into a column (either a data column or index level). + # + # >>> df + # a + # b + # 4 1 + # 5 2 + # 6 3 + # >>> _Indexer("a", column=True).get(df) # returns column "a" of df + # >>> _Indexer("b", index=True).get(df) # returns index level "b" of df + + def __init__(self, name: Any, column=False, index=False): + if column and index: + raise ValueError("Cannot specify both column and index") + self.name = name + self.column, self.index = column, index + + def get(self, obj: Frame) -> ColumnBase: + # get the column from `obj` + if self.column: + return obj._data[self.name] + else: + if obj._index is not None: + return obj._index._data[self.name] + raise KeyError() + + def set(self, obj: Frame, value: ColumnBase, validate=False): + # set the colum in `obj` + if self.column: + obj._data.set_by_label(self.name, value, validate=validate) + else: + if obj._index is not None: + obj._index._data.set_by_label( + self.name, value, validate=validate + ) + else: + raise KeyError() + + +def _frame_select_by_indexers( + frame: Frame, indexers: Iterable[_Indexer] +) -> Frame: + # Select columns from the given `Frame` using `indexers`, + # and return a new `Frame`. + index_data = frame._data.__class__() + data = frame._data.__class__() + + for idx in indexers: + if idx.index: + index_data.set_by_label(idx.name, idx.get(frame), validate=False) + else: + data.set_by_label(idx.name, idx.get(frame), validate=False) + + result_index = cudf.Index._from_data(index_data) if index_data else None + result = cudf.core.frame.Frame(data=data, index=result_index) + return result + + +def _match_join_keys( + lcol: ColumnBase, rcol: ColumnBase, how: str +) -> Tuple[ColumnBase, ColumnBase]: + # returns the common dtype that lcol and rcol should be casted to, + # before they can be used as left and right join keys. + # If no casting is necessary, returns None + + common_type = None + + # cast the keys lcol and rcol to a common dtype + ltype = lcol.dtype + rtype = rcol.dtype + + # if either side is categorical, different logic + if isinstance(ltype, CategoricalDtype) or isinstance( + rtype, CategoricalDtype + ): + return _match_categorical_dtypes(lcol, rcol, how) + + if pd.api.types.is_dtype_equal(ltype, rtype): + return lcol, rcol + + if (np.issubdtype(ltype, np.number)) and (np.issubdtype(rtype, np.number)): + common_type = ( + max(ltype, rtype) + if ltype.kind == rtype.kind + else np.find_common_type([], (ltype, rtype)) + ) + + elif np.issubdtype(ltype, np.datetime64) and np.issubdtype( + rtype, np.datetime64 + ): + common_type = max(ltype, rtype) + + if how == "left": + if rcol.fillna(0).can_cast_safely(ltype): + return lcol, rcol.astype(ltype) + else: + warnings.warn( + f"Can't safely cast column from {rtype} to {ltype}, " + "upcasting to {common_type}." + ) + + return lcol.astype(common_type), rcol.astype(common_type) + + +def _match_categorical_dtypes( + lcol: ColumnBase, rcol: ColumnBase, how: str +) -> Tuple[ColumnBase, ColumnBase]: + # cast the keys lcol and rcol to a common dtype + # when at least one of them is a categorical type + ltype, rtype = lcol.dtype, rcol.dtype + + if isinstance(lcol, cudf.core.column.CategoricalColumn) and isinstance( + rcol, cudf.core.column.CategoricalColumn + ): + # if both are categoricals, logic is complicated: + return _match_categorical_dtypes_both(lcol, rcol, how) + + if isinstance(ltype, CategoricalDtype): + if how in {"left", "leftsemi", "leftanti"}: + return lcol, rcol.astype(ltype) + common_type = ltype.categories.dtype + elif isinstance(rtype, CategoricalDtype): + common_type = rtype.categories.dtype + return lcol.astype(common_type), rcol.astype(common_type) + + +def _match_categorical_dtypes_both( + lcol: CategoricalColumn, rcol: CategoricalColumn, how: str +) -> Tuple[ColumnBase, ColumnBase]: + # The commontype depends on both `how` and the specifics of the + # categorical variables to be merged. + + ltype, rtype = lcol.dtype, rcol.dtype + + # when both are ordered and both have the same categories, + # no casting required: + if ltype == rtype: + return lcol, rcol + + # Merging categorical variables when only one side is ordered is + # ambiguous and not allowed. + if ltype.ordered != rtype.ordered: + raise TypeError( + "Merging on categorical variables with mismatched" + " ordering is ambiguous" + ) + + if ltype.ordered and rtype.ordered: + # if we get to here, categories must be what causes the + # dtype equality check to fail. And we can never merge + # two ordered categoricals with different categories + raise TypeError( + f"{how} merge between categoricals with " + "different categories is only valid when " + "neither side is ordered" + ) + + # the following should now always hold + assert not ltype.ordered and not rtype.ordered + + if how == "inner": + # cast to category types -- we must cast them back later + return _match_join_keys( + lcol.cat()._decategorize(), rcol.cat()._decategorize(), how, + ) + elif how in {"left", "leftanti", "leftsemi"}: + # always cast to left type + return lcol, rcol.astype(ltype) + else: + # merge categories + merged_categories = cudf.concat( + [ltype.categories, rtype.categories] + ).unique() + common_type = cudf.CategoricalDtype( + categories=merged_categories, ordered=False + ) + return lcol.astype(common_type), rcol.astype(common_type) + + +def _coerce_to_tuple(obj): + if isinstance(obj, collections.abc.Iterable) and not isinstance(obj, str): + return tuple(obj) + else: + return (obj,) diff --git a/python/cudf/cudf/core/join/casting_logic.py b/python/cudf/cudf/core/join/casting_logic.py deleted file mode 100644 index eb85cecd14d..00000000000 --- a/python/cudf/cudf/core/join/casting_logic.py +++ /dev/null @@ -1,207 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. - -import warnings - -import numpy as np -import pandas as pd - -import cudf -from cudf.core.dtypes import CategoricalDtype - - -def _input_to_libcudf_castrules_both_cat(lcol, rcol, how): - """ - Based off the left and right operands, determine the libcudf - merge dtype or error for corner cases where the merge cannot - proceed. This function handles categorical variables. - Categorical variable typecasting logic depends on both `how` - and the specifics of the categorical variables to be merged. - Merging categorical variables when only one side is ordered - is ambiguous and not allowed. Merging when both categoricals - are ordered is allowed, but only when the categories are - exactly equal and have equal ordering, and will result in the - common dtype. - When both sides are unordered, the result categorical depends - on the kind of join: - - For inner joins, the result will be the intersection of the - categories - - For left or right joins, the result will be the the left or - right dtype respectively. This extends to semi and anti joins. - - For outer joins, the result will be the union of categories - from both sides. - - """ - ltype = lcol.dtype - rtype = rcol.dtype - - # this function is only to be used to resolve the result when both - # sides are categorical - if not isinstance(ltype, CategoricalDtype) and isinstance( - rtype, CategoricalDtype - ): - raise TypeError("Both operands must be CategoricalDtype") - - # true for every configuration - if ltype == rtype: - return ltype - - # raise for any join where ordering doesn't match - if ltype.ordered != rtype.ordered: - raise TypeError( - "Merging on categorical variables with mismatched" - " ordering is ambiguous" - ) - elif ltype.ordered and rtype.ordered: - # if we get to here, categories must be what causes the - # dtype equality check to fail. And we can never merge - # two ordered categoricals with different categories - raise TypeError( - f"{how} merge between categoricals with " - "different categories is only valid when " - "neither side is ordered" - ) - - elif how == "inner": - # neither ordered, so categories must be different - # demote to underlying types - return _input_to_libcudf_castrules_any( - ltype.categories, rtype.categories, how - ) - - elif how == "left": - return ltype - elif how == "right": - return rtype - - elif how == "outer": - new_cats = cudf.concat([ltype.categories, rtype.categories]).unique() - return cudf.CategoricalDtype(categories=new_cats, ordered=False) - - -def _input_to_libcudf_castrules_any_cat(lcol, rcol, how): - - l_is_cat = isinstance(lcol.dtype, CategoricalDtype) - r_is_cat = isinstance(rcol.dtype, CategoricalDtype) - - if l_is_cat and r_is_cat: - return _input_to_libcudf_castrules_both_cat(lcol, rcol, how) - elif l_is_cat or r_is_cat: - if l_is_cat and how == "left": - return lcol.dtype - if r_is_cat and how == "right": - return rcol.dtype - return ( - lcol.dtype.categories.dtype - if l_is_cat - else rcol.dtype.categories.dtype - ) - else: - raise ValueError("Neither operand is categorical") - - -def _input_to_libcudf_castrules_any(lcol, rcol, how): - """ - Determine what dtype the left and right hand - input columns must be cast to for a libcudf - join to proceed. - """ - - cast_warn = ( - "can't safely cast column from {} with type" - " {} to {}, upcasting to {}" - ) - - ltype = lcol.dtype - rtype = rcol.dtype - - # if either side is categorical, different logic - if isinstance(ltype, CategoricalDtype) or isinstance( - rtype, CategoricalDtype - ): - return _input_to_libcudf_castrules_any_cat(lcol, rcol, how) - - libcudf_join_type = None - if pd.api.types.is_dtype_equal(ltype, rtype): - libcudf_join_type = ltype - elif how == "left": - check_col = rcol.fillna(0) - if not check_col.can_cast_safely(ltype): - libcudf_join_type = _input_to_libcudf_castrules_any( - lcol, rcol, "inner" - ) - warnings.warn( - cast_warn.format("right", rtype, ltype, libcudf_join_type) - ) - else: - libcudf_join_type = ltype - elif how == "right": - check_col = lcol.fillna(0) - if not check_col.can_cast_safely(rtype): - libcudf_join_type = _input_to_libcudf_castrules_any( - lcol, rcol, "inner" - ) - warnings.warn( - cast_warn.format("left", ltype, rtype, libcudf_join_type) - ) - else: - libcudf_join_type = rtype - elif how in {"inner", "outer"}: - if (np.issubdtype(ltype, np.number)) and ( - np.issubdtype(rtype, np.number) - ): - if ltype.kind == rtype.kind: - # both ints or both floats - libcudf_join_type = max(ltype, rtype) - else: - libcudf_join_type = np.find_common_type([], [ltype, rtype]) - elif np.issubdtype(ltype, np.datetime64) and np.issubdtype( - rtype, np.datetime64 - ): - libcudf_join_type = max(ltype, rtype) - return libcudf_join_type - - -def _libcudf_to_output_castrules(lcol, rcol, how): - """ - Determine what dtype an output merge key column should be - cast to after it has been processed by libcudf. Determine - if a column should be promoted to a categorical datatype. - For inner merges between unordered categoricals, we get a - new categorical variable containing the intersection of - the two source variables. For left or right joins, we get - the original categorical variable from whichever was the - major operand of the join, e.g. left for a left join or - right for a right join. In the case of an outer join, the - result will be a new categorical variable with both sets - of categories. - """ - merge_return_type = None - - ltype = lcol.dtype - rtype = rcol.dtype - - if pd.api.types.is_dtype_equal(ltype, rtype): - return ltype - - l_is_cat = isinstance(ltype, CategoricalDtype) - r_is_cat = isinstance(rtype, CategoricalDtype) - - # we currently only need to do this for categorical variables - if how == "inner": - if l_is_cat and r_is_cat: - merge_return_type = "category" - elif how == "left": - if l_is_cat: - merge_return_type = ltype - elif how == "right": - if r_is_cat: - merge_return_type = rtype - elif how == "outer": - if l_is_cat and r_is_cat: - new_cats = cudf.concat( - [ltype.categories, rtype.categories] - ).unique() - merge_return_type = cudf.CategoricalDtype( - categories=new_cats, ordered=ltype.ordered - ) - return merge_return_type diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py index c6da3ee8dc4..1a4826d0570 100644 --- a/python/cudf/cudf/core/join/join.py +++ b/python/cudf/cudf/core/join/join.py @@ -1,22 +1,85 @@ # Copyright (c) 2020-2021, NVIDIA CORPORATION. -import itertools +from __future__ import annotations -import pandas as pd +import functools +from collections import namedtuple +from typing import TYPE_CHECKING, Callable, Tuple import cudf from cudf import _lib as libcudf -from cudf._lib.join import compute_result_col_names -from cudf.core.join.casting_logic import ( - _input_to_libcudf_castrules_any, - _libcudf_to_output_castrules, +from cudf.core.join._join_helpers import ( + _coerce_to_tuple, + _frame_select_by_indexers, + _Indexer, + _match_join_keys, ) +if TYPE_CHECKING: + from cudf.core.frame import Frame + + +def merge( + lhs, + rhs, + *, + on, + left_on, + right_on, + left_index, + right_index, + how, + sort, + method, + indicator, + suffixes, +): + if how in {"leftsemi", "leftanti"}: + merge_cls = MergeSemi + else: + merge_cls = Merge + mergeobj = merge_cls( + lhs, + rhs, + on=on, + left_on=left_on, + right_on=right_on, + left_index=left_index, + right_index=right_index, + how=how, + sort=sort, + method=method, + indicator=indicator, + suffixes=suffixes, + ) + return mergeobj.perform_merge() + + +_JoinKeys = namedtuple("JoinKeys", ["left", "right"]) + class Merge(object): + # A namedtuple of indexers representing the left and right keys + _keys: _JoinKeys + + # The joiner function must have the following signature: + # + # def joiner( + # lhs: Frame, + # rhs: Frame + # ) -> Tuple[Optional[Column], Optional[Column]]: + # ... + # + # where `lhs` and `rhs` are Frames composed of the left and right + # join key. The `joiner` returns a tuple of two Columns + # representing the rows to gather from the left- and right- side + # tables respectively. + _joiner: Callable + def __init__( self, lhs, rhs, + *, on, left_on, right_on, @@ -24,8 +87,6 @@ def __init__( right_index, how, sort, - lsuffix, - rsuffix, method, indicator, suffixes, @@ -60,140 +121,252 @@ def __init__( sort : bool Boolean flag indicating if the output Frame is to be sorted on the output's join keys, in left to right order. - lsuffix : string - The suffix to be appended to left hand column names that - are found to exist in the right frame, but are not specified - as join keys themselves. - rsuffix : string - The suffix to be appended to right hand column names that - are found to exist in the left frame, but are not specified - as join keys themselves. suffixes : list like Left and right suffixes specified together, unpacked into lsuffix and rsuffix. """ + self._validate_merge_params( + lhs, + rhs, + on=on, + left_on=left_on, + right_on=right_on, + left_index=left_index, + right_index=right_index, + how=how, + suffixes=suffixes, + ) + self._joiner = functools.partial(libcudf.join.join, how=how) + self.lhs = lhs self.rhs = rhs + self.on = on + self.left_on = left_on + self.right_on = right_on self.left_index = left_index self.right_index = right_index - self.method = method - self.sort = sort - - # check that the merge is valid - - self.validate_merge_cfg( - lhs, - rhs, - on, - left_on, - right_on, - left_index, - right_index, - how, - lsuffix, - rsuffix, - suffixes, - ) self.how = how - self.preprocess_merge_params( - on, left_on, right_on, lsuffix, rsuffix, suffixes - ) - - def perform_merge(self): - """ - Call libcudf to perform a merge between the operands. If - necessary, cast the input key columns to compatible types. - Potentially also cast the output back to categorical. - """ - output_dtypes = self.compute_output_dtypes() - self.typecast_input_to_libcudf() - libcudf_result = libcudf.join.join( - self.lhs, - self.rhs, - self.how, - self.method, - left_on=self.left_on, - right_on=self.right_on, - left_index=self.left_index, - right_index=self.right_index, - ) - result = self.out_class._from_table(libcudf_result) - result = self.typecast_libcudf_to_output(result, output_dtypes) - if isinstance(result, cudf.Index): - return result - else: - return result[ - compute_result_col_names(self.lhs, self.rhs, self.how) - ] + self.sort = sort + if suffixes: + self.lsuffix, self.rsuffix = suffixes + self._compute_join_keys() - def preprocess_merge_params( - self, on, left_on, right_on, lsuffix, rsuffix, suffixes - ): - """ - Translate a valid configuration of user input parameters into - the subset of input configurations handled by the cython layer. - Apply suffixes to columns. - """ + @property + def _out_class(self): + # type of the result + out_class = cudf.DataFrame - self.out_class = cudf.DataFrame if isinstance(self.lhs, cudf.MultiIndex) or isinstance( self.rhs, cudf.MultiIndex ): - self.out_class = cudf.MultiIndex + out_class = cudf.MultiIndex elif isinstance(self.lhs, cudf.Index): - self.out_class = self.lhs.__class__ + out_class = self.lhs.__class__ + return out_class - if on: - on = [on] if isinstance(on, str) else list(on) - left_on = right_on = on - else: - if left_on: - left_on = ( - [left_on] if isinstance(left_on, str) else list(left_on) - ) - if right_on: - right_on = ( - [right_on] if isinstance(right_on, str) else list(right_on) - ) + def perform_merge(self) -> Frame: + lhs, rhs = self._match_key_dtypes(self.lhs, self.rhs) - same_named_columns = set(self.lhs._data.keys()) & set( - self.rhs._data.keys() + left_table = _frame_select_by_indexers(lhs, self._keys.left) + right_table = _frame_select_by_indexers(rhs, self._keys.right) + + left_rows, right_rows = self._joiner( + left_table, right_table, how=self.how, ) - if not (left_on or right_on) and not ( - self.left_index and self.right_index - ): - left_on = right_on = list(same_named_columns) - - no_suffix_cols = [] - if left_on and right_on: - no_suffix_cols = [ - left_name - for left_name, right_name in zip(left_on, right_on) - if left_name == right_name and left_name in same_named_columns - ] + lhs, rhs = self._restore_categorical_keys(lhs, rhs) - if suffixes: - lsuffix, rsuffix = suffixes - for name in same_named_columns: - if name not in no_suffix_cols: - self.lhs.rename( - {name: f"{name}{lsuffix}"}, inplace=True, axis=1 + left_result = cudf.core.frame.Frame() + right_result = cudf.core.frame.Frame() + + gather_index = self.left_index or self.right_index + if left_rows is not None: + left_result = lhs._gather( + left_rows, nullify=True, keep_index=gather_index + ) + if right_rows is not None: + right_result = rhs._gather( + right_rows, nullify=True, keep_index=gather_index + ) + + result = self._merge_results(left_result, right_result) + + if self.sort: + result = self._sort_result(result) + return result + + def _compute_join_keys(self): + # Computes self._keys + if ( + self.left_index + or self.right_index + or self.left_on + or self.right_on + ): + left_keys = [] + right_keys = [] + if self.left_index: + left_keys.extend( + [ + _Indexer(name=on, index=True) + for on in self.lhs.index.names + ] ) - self.rhs.rename( - {name: f"{name}{rsuffix}"}, inplace=True, axis=1 + if self.left_on: + # TODO: require left_on or left_index to be specified + left_keys.extend( + [ + _Indexer(name=on, column=True) + for on in _coerce_to_tuple(self.left_on) + ] ) - if left_on and name in left_on: - left_on[left_on.index(name)] = f"{name}{lsuffix}" - if right_on and name in right_on: - right_on[right_on.index(name)] = f"{name}{rsuffix}" + if self.right_index: + right_keys.extend( + [ + _Indexer(name=on, index=True) + for on in self.rhs.index.names + ] + ) + if self.right_on: + # TODO: require right_on or right_index to be specified + right_keys.extend( + [ + _Indexer(name=on, column=True) + for on in _coerce_to_tuple(self.right_on) + ] + ) + else: + # Use `on` if provided. Otherwise, + # implicitly use identically named columns as the key columns: + on_names = ( + _coerce_to_tuple(self.on) + if self.on is not None + else set(self.lhs._data) & set(self.rhs._data) + ) + left_keys = [_Indexer(name=on, column=True) for on in on_names] + right_keys = [_Indexer(name=on, column=True) for on in on_names] + + if len(left_keys) != len(right_keys): + raise ValueError( + "Merge operands must have same number of join key columns" + ) + + self._keys = _JoinKeys(left=left_keys, right=right_keys) + + def _merge_results(self, left_result: Frame, right_result: Frame) -> Frame: + # Merge the Frames `left_result` and `right_result` into a single + # `Frame`, suffixing column names if necessary. + + # If two key columns have the same name, a single output column appears + # in the result. For all other join types, the key column from the rhs + # is simply dropped. For outer joins, the two key columns are combined + # by filling nulls in the left key column with corresponding values + # from the right key column: + if self.how == "outer": + for lkey, rkey in zip(*self._keys): + if lkey.name == rkey.name: + # fill nulls in lhs from values in the rhs + lkey.set( + left_result, + lkey.get(left_result).fillna(rkey.get(right_result)), + validate=False, + ) + + # Compute the result column names: + # left_names and right_names will be a mappings of input column names + # to the corresponding names in the final result. + left_names = dict(zip(left_result._data, left_result._data)) + right_names = dict(zip(right_result._data, right_result._data)) + + # For any columns from left_result and right_result that have the same + # name: + # - if they are key columns, keep only the left column + # - if they are not key columns, use suffixes to differentiate them + # in the final result + common_names = set(left_names) & set(right_names) + + if self.on: + key_columns_with_same_name = self.on + else: + key_columns_with_same_name = [ + lkey.name + for lkey, rkey in zip(*self._keys) + if ( + (lkey.index, rkey.index) == (False, False) + and lkey.name == rkey.name + ) + ] + for name in common_names: + if name not in key_columns_with_same_name: + left_names[name] = f"{name}{self.lsuffix}" + right_names[name] = f"{name}{self.rsuffix}" + else: + del right_names[name] + + # Assemble the data columns of the result: + data = left_result._data.__class__() + + for lcol in left_names: + data.set_by_label( + left_names[lcol], left_result._data[lcol], validate=False + ) + for rcol in right_names: + data.set_by_label( + right_names[rcol], right_result._data[rcol], validate=False + ) + + # Index of the result: + if self.left_index and self.right_index: + index = left_result._index + elif self.left_index: + # left_index and right_on + index = right_result._index + elif self.right_index: + # right_index and left_on + index = left_result._index + else: + index = None - self.left_on = left_on if left_on is not None else [] - self.right_on = right_on if right_on is not None else [] - self.lsuffix = lsuffix - self.rsuffix = rsuffix + # Construct result from data and index: + result = self._out_class._from_data(data=data, index=index) + + return result + + def _sort_result(self, result: Frame) -> Frame: + # Pandas sorts on the key columns in the + # same order as given in 'on'. If the indices are used as + # keys, the index will be sorted. If one index is specified, + # the key columns on the other side will be used to sort. + if self.on: + if isinstance(result, cudf.Index): + sort_order = result._get_sorted_inds() + else: + # need a list instead of a tuple here because + # _get_sorted_inds calls down to ColumnAccessor.get_by_label + # which handles lists and tuples differently + sort_order = result._get_sorted_inds( + list(_coerce_to_tuple(self.on)) + ) + return result._gather(sort_order, keep_index=False) + by = [] + if self.left_index and self.right_index: + if result._index is not None: + by.extend(result._index._data.columns) + if self.left_on: + by.extend( + [result._data[col] for col in _coerce_to_tuple(self.left_on)] + ) + if self.right_on: + by.extend( + [result._data[col] for col in _coerce_to_tuple(self.right_on)] + ) + if by: + to_sort = cudf.DataFrame._from_columns(by) + sort_order = to_sort.argsort() + result = result._gather(sort_order) + return result @staticmethod - def validate_merge_cfg( + def _validate_merge_params( lhs, rhs, on, @@ -202,14 +375,11 @@ def validate_merge_cfg( left_index, right_index, how, - lsuffix, - rsuffix, suffixes, ): """ Error for various invalid combinations of merge input parameters """ - # must actually support the requested merge type if how not in {"left", "inner", "outer", "leftanti", "leftsemi"}: raise NotImplementedError(f"{how} merge not supported yet") @@ -227,52 +397,8 @@ def validate_merge_cfg( ): raise ValueError("Can not merge on unnamed Series") - # Keys need to be in their corresponding operands - if on: - if isinstance(on, str): - on_keys = [on] - elif isinstance(on, tuple): - on_keys = list(on) - else: - on_keys = on - for key in on_keys: - if not (key in lhs._data.keys() and key in rhs._data.keys()): - raise KeyError(f"on key {on} not in both operands") - elif left_on and right_on: - left_on_keys = ( - [left_on] if not isinstance(left_on, list) else left_on - ) - right_on_keys = ( - [right_on] if not isinstance(right_on, list) else right_on - ) - - for key in left_on_keys: - if key not in lhs._data.keys(): - raise KeyError(f'Key "{key}" not in left operand') - for key in right_on_keys: - if key not in rhs._data.keys(): - raise KeyError(f'Key "{key}" not in right operand') - - # Require same total number of columns to join on in both operands - len_left_on = 0 - len_right_on = 0 - if left_on: - len_left_on += ( - len(left_on) if pd.api.types.is_list_like(left_on) else 1 - ) - if right_on: - len_right_on += ( - len(right_on) if pd.api.types.is_list_like(right_on) else 1 - ) - if not (len_left_on + left_index * lhs._num_indices) == ( - len_right_on + right_index * rhs._num_indices - ): - raise ValueError( - "Merge operands must have same number of join key columns" - ) - # If nothing specified, must have common cols to use implicitly - same_named_columns = set(lhs._data.keys()) & set(rhs._data.keys()) + same_named_columns = set(lhs._data) & set(rhs._data) if ( not (left_index or right_index) and not (left_on or right_on) @@ -280,8 +406,7 @@ def validate_merge_cfg( ): raise ValueError("No common columns to perform merge on") - if suffixes: - lsuffix, rsuffix = suffixes + lsuffix, rsuffix = suffixes for name in same_named_columns: if name == left_on == right_on: continue @@ -297,134 +422,59 @@ def validate_merge_cfg( "lsuffix and rsuffix are not defined" ) - def typecast_input_to_libcudf(self): - """ - Check each pair of join keys in the left and right hand - operands and apply casting rules to match their types - before passing the result to libcudf. - """ - lhs_keys, rhs_keys, lhs_cols, rhs_cols = [], [], [], [] - if self.left_index: - lhs_keys.append(self.lhs.index._data.keys()) - lhs_cols.append(self.lhs.index) - if self.right_index: - rhs_keys.append(self.rhs.index._data.keys()) - rhs_cols.append(self.rhs.index) - if self.left_on: - lhs_keys.append(self.left_on) - lhs_cols.append(self.lhs) - if self.right_on: - rhs_keys.append(self.right_on) - rhs_cols.append(self.rhs) - - for l_key_grp, r_key_grp, l_col_grp, r_col_grp in zip( - lhs_keys, rhs_keys, lhs_cols, rhs_cols - ): - for l_key, r_key in zip(l_key_grp, r_key_grp): - to_dtype = _input_to_libcudf_castrules_any( - l_col_grp._data[l_key], r_col_grp._data[r_key], self.how - ) - l_col_grp._data[l_key] = l_col_grp._data[l_key].astype( - to_dtype - ) - r_col_grp._data[r_key] = r_col_grp._data[r_key].astype( - to_dtype - ) - - def compute_output_dtypes(self): - """ - Determine what datatypes should be applied to the result - of a libcudf join, baesd on the original left and right - frames. - """ - - index_dtypes = {} - l_data_join_cols = {} - r_data_join_cols = {} - - data_dtypes = { - name: col.dtype - for name, col in itertools.chain( - self.lhs._data.items(), self.rhs._data.items() + def _match_key_dtypes(self, lhs: Frame, rhs: Frame) -> Tuple[Frame, Frame]: + # Match the dtypes of the key columns from lhs and rhs + out_lhs = lhs.copy(deep=False) + out_rhs = rhs.copy(deep=False) + for left_key, right_key in zip(*self._keys): + lcol, rcol = left_key.get(lhs), right_key.get(rhs) + lcol_casted, rcol_casted = _match_join_keys( + lcol, rcol, how=self.how ) - } - - if self.left_index and self.right_index: - l_idx_join_cols = list(self.lhs.index._data.values()) - r_idx_join_cols = list(self.rhs.index._data.values()) - elif self.left_on and self.right_index: - # Keep the orignal dtypes in the LEFT index if possible - # should trigger a bunch of no-ops - l_idx_join_cols = list(self.lhs.index._data.values()) - r_idx_join_cols = list(self.lhs.index._data.values()) - for i, name in enumerate(self.left_on): - l_data_join_cols[name] = self.lhs._data[name] - r_data_join_cols[name] = list(self.rhs.index._data.values())[i] - - elif self.left_index and self.right_on: - # see above - l_idx_join_cols = list(self.rhs.index._data.values()) - r_idx_join_cols = list(self.rhs.index._data.values()) - for i, name in enumerate(self.right_on): - l_data_join_cols[name] = list(self.lhs.index._data.values())[i] - r_data_join_cols[name] = self.rhs._data[name] - - if self.left_on and self.right_on: - l_data_join_cols = self.lhs._data - r_data_join_cols = self.rhs._data - - if self.left_index or self.right_index: - for i in range(len(self.lhs.index._data.items())): - index_dtypes[i] = _libcudf_to_output_castrules( - l_idx_join_cols[i], r_idx_join_cols[i], self.how - ) - - for name in itertools.chain(self.left_on, self.right_on): - if name in self.left_on and name in self.right_on: - data_dtypes[name] = _libcudf_to_output_castrules( - l_data_join_cols[name], r_data_join_cols[name], self.how - ) - return (index_dtypes, data_dtypes) + if lcol is not lcol_casted: + left_key.set(out_lhs, lcol_casted, validate=False) + if rcol is not rcol_casted: + right_key.set(out_rhs, rcol_casted, validate=False) + return out_lhs, out_rhs + + def _restore_categorical_keys( + self, lhs: Frame, rhs: Frame + ) -> Tuple[Frame, Frame]: + # For inner joins, any categorical keys in `self.lhs` and `self.rhs` + # were casted to their category type to produce `lhs` and `rhs`. + # Here, we cast them back. + out_lhs = lhs.copy(deep=False) + out_rhs = rhs.copy(deep=False) + if self.how == "inner": + for left_key, right_key in zip(*self._keys): + if isinstance( + left_key.get(self.lhs).dtype, cudf.CategoricalDtype + ) and isinstance( + right_key.get(self.rhs).dtype, cudf.CategoricalDtype + ): + left_key.set( + out_lhs, + left_key.get(out_lhs).astype("category"), + validate=False, + ) + right_key.set( + out_rhs, + right_key.get(out_rhs).astype("category"), + validate=False, + ) + return out_lhs, out_rhs - def typecast_libcudf_to_output(self, output, output_dtypes): - """ - Apply precomputed output index and data column data types - to the output of a libcudf join. - """ - index_dtypes, data_dtypes = output_dtypes - if output._index and len(index_dtypes) > 0: - for index_dtype, index_col_lbl, index_col in zip( - index_dtypes.values(), - output._index._data.keys(), - output._index._data.values(), - ): - if index_dtype: - output._index._data[ - index_col_lbl - ] = self._build_output_col(index_col, index_dtype) - # reconstruct the Index object as the underlying data types - # have changed: - output._index = cudf.core.index.Index._from_table(output._index) - - for data_col_lbl, data_col in output._data.items(): - data_dtype = data_dtypes[data_col_lbl] - if data_dtype: - output._data[data_col_lbl] = self._build_output_col( - data_col, data_dtype - ) - return output +class MergeSemi(Merge): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._joiner = functools.partial( + libcudf.join.semi_join, how=kwargs["how"] + ) - def _build_output_col(self, col, dtype): - if isinstance( - dtype, (cudf.core.dtypes.CategoricalDtype, pd.CategoricalDtype) - ): - outcol = cudf.core.column.build_categorical_column( - categories=dtype.categories, - codes=col.set_mask(None), - mask=col.base_mask, - ordered=dtype.ordered, - ) + def _merge_results(self, lhs: Frame, rhs: Frame) -> Frame: + # semi-join result includes only lhs columns + if issubclass(self._out_class, cudf.Index): + return self._out_class._from_data(lhs._data) else: - outcol = col.astype(dtype) - return outcol + return self._out_class._from_data(lhs._data, index=lhs._index) diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 82e89bb00f4..1c1e48e7372 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -1,4 +1,5 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Copyright (c) 2019-2020, NVIDIA CORPORATION. +from __future__ import annotations import itertools import numbers @@ -18,6 +19,7 @@ from cudf._typing import DataFrameOrSeries from cudf.core._compat import PANDAS_GE_120 from cudf.core.column import column +from cudf.core.column_accessor import ColumnAccessor from cudf.core.frame import Frame from cudf.core.index import Index, as_index @@ -188,6 +190,19 @@ def names(self): def names(self, value): value = [None] * self.nlevels if value is None else value assert len(value) == self.nlevels + + if len(value) == len(set(value)): + # IMPORTANT: if the provided names are unique, + # we reconstruct self._data with the names as keys. + # If they are not unique, the keys of self._data + # and self._names will be different, which can lead + # to unexpected behaviour in some cases. This is + # definitely buggy, but we can't disallow non-unique + # names either... + self._data = self._data.__class__._create_unsafe( + dict(zip(value, self._data.values())), + level_names=self._data.level_names, + ) self._names = pd.core.indexes.frozen.FrozenList(value) def rename(self, names, inplace=False): @@ -234,7 +249,6 @@ def rename(self, names, inplace=False): ValueError: Length of names must match number of levels in MultiIndex. """ - return self.set_names(names, level=None, inplace=inplace) def set_names(self, names, level=None, inplace=False): @@ -278,6 +292,10 @@ def set_names(self, names, level=None, inplace=False): return self._set_names(names=names, inplace=inplace) + @classmethod + def _from_data(cls, data: ColumnAccessor, index=None) -> MultiIndex: + return cls.from_frame(cudf.DataFrame._from_data(data)) + @classmethod def _from_table(cls, table, names=None): df = cudf.DataFrame(table._data) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index a664c4fb182..71a4a48a07a 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -6299,17 +6299,24 @@ def merge( method="hash", suffixes=("_x", "_y"), ): - if left_on not in (self.name, None): raise ValueError( "Series to other merge uses series name as key implicitly" ) - lhs = self.copy(deep=False) - rhs = other.copy(deep=False) + if lsuffix or rsuffix: + raise ValueError( + "The lsuffix and rsuffix keywords have been replaced with the " + "``suffixes=`` keyword. " + "Please provide the following instead: \n\n" + " suffixes=('%s', '%s')" + % (lsuffix or "_x", rsuffix or "_y") + ) + else: + lsuffix, rsuffix = suffixes - result = super(Series, lhs)._merge( - rhs, + result = super()._merge( + other, on=on, left_on=left_on, right_on=right_on, @@ -6317,8 +6324,6 @@ def merge( right_index=right_index, how=how, sort=sort, - lsuffix=lsuffix, - rsuffix=rsuffix, method=method, indicator=False, suffixes=suffixes, diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py index 969cf1bf549..9164bfe98d1 100644 --- a/python/cudf/cudf/tests/test_joining.py +++ b/python/cudf/cudf/tests/test_joining.py @@ -14,11 +14,13 @@ assert_exceptions_equal, ) +_JOIN_TYPES = ("left", "inner", "outer", "right", "leftanti", "leftsemi") + def make_params(): np.random.seed(0) - hows = "left,inner,outer,right,leftanti,leftsemi".split(",") + hows = _JOIN_TYPES methods = "hash,sort".split(",") # Test specific cases (1) @@ -69,6 +71,37 @@ def pd_odd_joins(left, right, join_type): return left[left.index.isin(right.index)][left.columns] +def assert_join_results_equal(expect, got, how, **kwargs): + if how not in _JOIN_TYPES: + raise ValueError(f"Unrecognized join type {how}") + if how == "right": + got = got[expect.columns] + + if isinstance(expect, (pd.Series, cudf.Series)): + return assert_eq( + expect.sort_values().reset_index(drop=True), + got.sort_values().reset_index(drop=True), + **kwargs, + ) + elif isinstance(expect, (pd.DataFrame, cudf.DataFrame)): + if not len( + expect.columns + ): # can't sort_values() on a df without columns + return assert_eq(expect, got, **kwargs) + + return assert_eq( + expect.sort_values(expect.columns.to_list()).reset_index( + drop=True + ), + got.sort_values(got.columns.to_list()).reset_index(drop=True), + **kwargs, + ) + elif isinstance(expect, (pd.Index, cudf.Index)): + return assert_eq(expect.sort_values(), got.sort_values(), **kwargs) + else: + raise ValueError(f"Not a join result: {type(expect).__name__}") + + @pytest.mark.parametrize("aa,bb,how,method", make_params()) def test_dataframe_join_how(aa, bb, how, method): df = cudf.DataFrame() @@ -113,12 +146,7 @@ def work_gdf(df): # TODO: What is the less hacky way? expect.index.name = "bob" got.index.name = "mary" - assert_eq( - got.sort_values(got.columns.to_list()).reset_index(drop=True), - expect.sort_values(expect.columns.to_list()).reset_index( - drop=True - ), - ) + assert_join_results_equal(expect, got, how=how) # if(how=='right'): # _sorted_check_series(expect['a'], expect['b'], # got['a'], got['b']) @@ -187,10 +215,7 @@ def test_dataframe_join_cats(): expect = lhs.to_pandas().join(rhs.to_pandas()) # Note: pandas make an object Index after joining - assert_eq( - got.sort_values(by="b").sort_index().reset_index(drop=True), - expect.reset_index(drop=True), - ) + assert_join_results_equal(expect, got, how="inner") # Just do some rough checking here. assert list(got.columns) == ["b", "c"] @@ -264,7 +289,7 @@ def test_dataframe_join_mismatch_cats(how): expect.data_col_right = expect.data_col_right.astype(np.int64) expect.data_col_left = expect.data_col_left.astype(np.int64) - assert_eq(expect, got) + assert_join_results_equal(expect, got, how=how, check_categorical=False) @pytest.mark.parametrize("on", ["key1", ["key1", "key2"], None]) @@ -323,7 +348,7 @@ def test_dataframe_merge_on(on): list(pddf_joined.columns) ).reset_index(drop=True) - assert_eq(cdf_result, pdf_result, check_like=True) + assert_join_results_equal(cdf_result, pdf_result, how="left") merge_func_result_cdf = ( join_result_cudf.to_pandas() @@ -331,7 +356,7 @@ def test_dataframe_merge_on(on): .reset_index(drop=True) ) - assert_eq(merge_func_result_cdf, cdf_result, check_like=True) + assert_join_results_equal(merge_func_result_cdf, cdf_result, how="left") def test_dataframe_merge_on_unknown_column(): @@ -383,7 +408,7 @@ def test_dataframe_empty_merge(): expect = cudf.DataFrame({"a": [], "b": [], "c": []}) got = gdf1.merge(gdf2, how="left", on=["a"]) - assert_eq(expect, got) + assert_join_results_equal(expect, got, how="left") def test_dataframe_merge_order(): @@ -408,7 +433,7 @@ def test_dataframe_merge_order(): df2["a"] = [7, 8] df = df1.merge(df2, how="left", on=["id", "a"]) - assert_eq(gdf, df) + assert_join_results_equal(df, gdf, how="left") @pytest.mark.parametrize( @@ -550,7 +575,7 @@ def test_merge_left_index_zero(): pd_merge = left.merge(right, left_on="x", right_on="y") gd_merge = gleft.merge(gright, left_on="x", right_on="y") - assert_eq(pd_merge, gd_merge) + assert_join_results_equal(pd_merge, gd_merge, how="left") @pytest.mark.parametrize( @@ -571,7 +596,7 @@ def test_merge_left_right_index_left_right_on_zero_kwargs(kwargs): gright = cudf.from_pandas(right) pd_merge = left.merge(right, **kwargs) gd_merge = gleft.merge(gright, **kwargs) - assert_eq(pd_merge, gd_merge) + assert_join_results_equal(pd_merge, gd_merge, how="left") @pytest.mark.parametrize( @@ -592,7 +617,7 @@ def test_merge_left_right_index_left_right_on_kwargs(kwargs): gright = cudf.from_pandas(right) pd_merge = left.merge(right, **kwargs) gd_merge = gleft.merge(gright, **kwargs) - assert_eq(pd_merge, gd_merge) + assert_join_results_equal(pd_merge, gd_merge, how="left") def test_indicator(): @@ -608,9 +633,10 @@ def test_indicator(): def test_merge_suffixes(): pdf = cudf.DataFrame({"x": [1, 2, 1]}) gdf = cudf.DataFrame({"x": [1, 2, 1]}) - assert_eq( + assert_join_results_equal( gdf.merge(gdf, suffixes=("left", "right")), pdf.merge(pdf, suffixes=("left", "right")), + how="left", ) assert_exceptions_equal( @@ -628,11 +654,14 @@ def test_merge_left_on_right_on(): gleft = cudf.from_pandas(left) gright = cudf.from_pandas(right) - assert_eq(left.merge(right, on="xx"), gleft.merge(gright, on="xx")) + assert_join_results_equal( + left.merge(right, on="xx"), gleft.merge(gright, on="xx"), how="left" + ) - assert_eq( + assert_join_results_equal( left.merge(right, left_on="xx", right_on="xx"), gleft.merge(gright, left_on="xx", right_on="xx"), + how="left", ) @@ -708,7 +737,9 @@ def test_merge_sort(ons, hows): pd_merge = left.merge(right, **kwargs) # require the join keys themselves to be sorted correctly # the non-key columns will NOT match pandas ordering - assert_eq(pd_merge[kwargs["on"]], gd_merge[kwargs["on"]]) + assert_join_results_equal( + pd_merge[kwargs["on"]], gd_merge[kwargs["on"]], how="left" + ) pd_merge = pd_merge.drop(kwargs["on"], axis=1) gd_merge = gd_merge.drop(kwargs["on"], axis=1) if not pd_merge.empty: @@ -720,7 +751,7 @@ def test_merge_sort(ons, hows): drop=True ) - assert_eq(pd_merge, gd_merge) + assert_join_results_equal(pd_merge, gd_merge, how="left") @pytest.mark.parametrize( @@ -781,7 +812,7 @@ def test_join_datetimes_index(dtype): assert gdf["d"].dtype == np.dtype(dtype) - assert_eq(pdf, gdf) + assert_join_results_equal(pdf, gdf, how="inner") def test_join_with_different_names(): @@ -791,7 +822,7 @@ def test_join_with_different_names(): gright = cudf.from_pandas(right) pd_merge = left.merge(right, how="outer", left_on=["a"], right_on=["b"]) gd_merge = gleft.merge(gright, how="outer", left_on=["a"], right_on=["b"]) - assert_eq(pd_merge, gd_merge.sort_values(by=["a"]).reset_index(drop=True)) + assert_join_results_equal(pd_merge, gd_merge, how="outer") def test_join_same_name_different_order(): @@ -801,9 +832,7 @@ def test_join_same_name_different_order(): gright = cudf.from_pandas(right) pd_merge = left.merge(right, left_on=["a", "b"], right_on=["b", "a"]) gd_merge = gleft.merge(gright, left_on=["a", "b"], right_on=["b", "a"]) - assert_eq( - pd_merge, gd_merge.sort_values(by=["a_x"]).reset_index(drop=True) - ) + assert_join_results_equal(pd_merge, gd_merge, how="left") def test_join_empty_table_dtype(): @@ -874,10 +903,7 @@ def test_join_multi(how, column_a, column_b, column_c): gdf_result = gdf_result[columns] pdf_result = pdf_result[columns] - assert_eq( - gdf_result.reset_index(drop=True).fillna(-1), - pdf_result.sort_index().reset_index(drop=True).fillna(-1), - ) + assert_join_results_equal(pdf_result, gdf_result, how="inner") @pytest.mark.parametrize( @@ -967,7 +993,7 @@ def test_merge_multi(kwargs): expect.index = range(len(expect)) got.index = range(len(got)) - assert_eq(expect, got) + assert_join_results_equal(expect, got, how="left") @pytest.mark.parametrize("dtype_l", INTEGER_TYPES) @@ -997,7 +1023,7 @@ def test_typecast_on_join_int_to_int(dtype_l, dtype_r): got = gdf_l.merge(gdf_r, on="join_col", how="inner") - assert_eq(expect, got) + assert_join_results_equal(expect, got, how="inner") @pytest.mark.parametrize("dtype_l", ["float32", "float64"]) @@ -1032,7 +1058,7 @@ def test_typecast_on_join_float_to_float(dtype_l, dtype_r): got = gdf_l.merge(gdf_r, on="join_col", how="inner") - assert_eq(expect, got) + assert_join_results_equal(expect, got, how="inner") @pytest.mark.parametrize("dtype_l", NUMERIC_TYPES) @@ -1068,7 +1094,7 @@ def test_typecast_on_join_mixed_int_float(dtype_l, dtype_r): got = gdf_l.merge(gdf_r, on="join_col", how="inner") - assert_eq(expect, got) + assert_join_results_equal(expect, got, how="inner") def test_typecast_on_join_no_float_round(): @@ -1092,7 +1118,7 @@ def test_typecast_on_join_no_float_round(): got = gdf_l.merge(gdf_r, on="join_col", how="left") - assert_eq(expect, got) + assert_join_results_equal(expect, got, how="left") @pytest.mark.parametrize( @@ -1121,10 +1147,7 @@ def test_typecast_on_join_overflow_unsafe(dtypes): with pytest.warns( UserWarning, - match=( - f"can't safely cast column" - f" from right with type {dtype_r} to {dtype_l}" - ), + match=(f"Can't safely cast column" f" from {dtype_r} to {dtype_l}"), ): merged = lhs.merge(rhs, on="a", how="left") # noqa: F841 @@ -1165,7 +1188,7 @@ def test_typecast_on_join_dt_to_dt(dtype_l, dtype_r): got = gdf_l.merge(gdf_r, on="join_col", how="inner") - assert_eq(expect, got) + assert_join_results_equal(expect, got, how="inner") @pytest.mark.parametrize("dtype_l", ["category", "str", "int32", "float32"]) @@ -1200,7 +1223,7 @@ def test_typecast_on_join_categorical(dtype_l, dtype_r): ) got = gdf_l.merge(gdf_r, on="join_col", how="inner") - assert_eq(expect, got) + assert_join_results_equal(expect, got, how="inner") def make_categorical_dataframe(categories, ordered=False): @@ -1220,7 +1243,7 @@ def test_categorical_typecast_inner(): expect_dtype = CategoricalDtype(categories=[1, 2, 3], ordered=False) expect_data = cudf.Series([1, 2, 3], dtype=expect_dtype, name="key") - assert_eq(expect_data, result["key"]) + assert_eq(expect_data, result["key"], check_categorical=False) # Equal categories, unequal ordering -> error left = make_categorical_dataframe([1, 2, 3], ordered=False) @@ -1238,7 +1261,7 @@ def test_categorical_typecast_inner(): expect_dtype = cudf.CategoricalDtype(categories=[2, 3], ordered=False) expect_data = cudf.Series([2, 3], dtype=expect_dtype, name="key") - assert_eq(expect_data, result["key"]) + assert_eq(expect_data, result["key"], check_categorical=False) # One is ordered -> error left = make_categorical_dataframe([1, 2, 3], ordered=False) @@ -1427,20 +1450,10 @@ def test_index_join(lhs, rhs, how, level): g_lhs = l_df.set_index(lhs).index g_rhs = r_df.set_index(rhs).index - expected = ( - p_lhs.join(p_rhs, level=level, how=how) - .to_frame(index=False) - .sort_values(by=lhs) - .reset_index(drop=True) - ) - got = ( - g_lhs.join(g_rhs, level=level, how=how) - .to_frame(index=False) - .sort_values(by=lhs) - .reset_index(drop=True) - ) + expected = p_lhs.join(p_rhs, level=level, how=how).to_frame(index=False) + got = g_lhs.join(g_rhs, level=level, how=how).to_frame(index=False) - assert_eq(expected, got) + assert_join_results_equal(expected, got, how=how) def test_index_join_corner_cases(): @@ -1461,20 +1474,10 @@ def test_index_join_corner_cases(): p_rhs = r_pdf.set_index(rhs).index g_lhs = l_df.set_index(lhs).index g_rhs = r_df.set_index(rhs).index - expected = ( - p_lhs.join(p_rhs, level=level, how=how) - .to_frame(index=False) - .sort_values(by=lhs) - .reset_index(drop=True) - ) - got = ( - g_lhs.join(g_rhs, level=level, how=how) - .to_frame(index=False) - .sort_values(by=lhs) - .reset_index(drop=True) - ) + expected = p_lhs.join(p_rhs, level=level, how=how).to_frame(index=False) + got = g_lhs.join(g_rhs, level=level, how=how).to_frame(index=False) - assert_eq(expected, got) + assert_join_results_equal(expected, got, how=how) # sort is supported only in case of two non-MultiIndex join # Join when column name doesn't match with level @@ -1490,7 +1493,7 @@ def test_index_join_corner_cases(): expected = p_lhs.join(p_rhs, how=how, sort=True) got = g_lhs.join(g_rhs, how=how, sort=True) - assert_eq(expected, got) + assert_join_results_equal(expected, got, how=how) # Pandas Index.join on categorical column returns generic column # but cudf will be returning a categorical column itself. @@ -1504,22 +1507,12 @@ def test_index_join_corner_cases(): p_rhs = r_pdf.set_index(rhs).index g_lhs = l_df.set_index(lhs).index g_rhs = r_df.set_index(rhs).index - expected = ( - p_lhs.join(p_rhs, level=level, how=how) - .to_frame(index=False) - .sort_values(by=lhs) - .reset_index(drop=True) - ) - got = ( - g_lhs.join(g_rhs, level=level, how=how) - .to_frame(index=False) - .sort_values(by=lhs) - .reset_index(drop=True) - ) + expected = p_lhs.join(p_rhs, level=level, how=how).to_frame(index=False) + got = g_lhs.join(g_rhs, level=level, how=how).to_frame(index=False) got["a"] = got["a"].astype(expected["a"].dtype) - assert_eq(expected, got) + assert_join_results_equal(expected, got, how=how) def test_index_join_exception_cases(): @@ -1573,7 +1566,7 @@ def test_typecast_on_join_indexes(): got = gdf_l.join(gdf_r, how="inner", lsuffix="_x", rsuffix="_y") - assert_eq(expect, got) + assert_join_results_equal(expect, got, how="inner") def test_typecast_on_join_multiindices(): @@ -1624,7 +1617,7 @@ def test_typecast_on_join_multiindices(): expect = expect.set_index(["join_col_0", "join_col_1", "join_col_2"]) got = gdf_l.join(gdf_r, how="inner", lsuffix="_x", rsuffix="_y") - assert_eq(expect, got) + assert_join_results_equal(expect, got, how="inner") def test_typecast_on_join_indexes_matching_categorical(): @@ -1651,7 +1644,7 @@ def test_typecast_on_join_indexes_matching_categorical(): expect = expect.set_index("join_col") got = gdf_l.join(gdf_r, how="inner", lsuffix="_x", rsuffix="_y") - assert_eq(expect, got) + assert_join_results_equal(expect, got, how="inner") @pytest.mark.parametrize( @@ -1703,9 +1696,10 @@ def test_series_dataframe_mixed_merging(lhs, rhs, how, kwargs): expect = check_lhs.merge(check_rhs, how=how, **kwargs) got = lhs.merge(rhs, how=how, **kwargs) - assert_eq(expect, got) + assert_join_results_equal(expect, got, how=how) +@pytest.mark.xfail(reason="Cannot sort values of list dtype") @pytest.mark.parametrize( "how", ["left", "inner", "right", "leftanti", "leftsemi"] ) @@ -1730,4 +1724,17 @@ def test_merge_with_lists(how): expect = pd_left.merge(pd_right, on="a") got = gd_left.merge(gd_right, on="a") - assert_eq(expect, got) + assert_join_results_equal(expect, got, how=how) + + +def test_join_renamed_index(): + df = cudf.DataFrame( + {0: [1, 2, 3, 4, 5], 1: [1, 2, 3, 4, 5], "c": [1, 2, 3, 4, 5]} + ).set_index([0, 1]) + df.index.names = ["a", "b"] # doesn't actually change df._index._data + + expect = df.to_pandas().merge( + df.to_pandas(), left_index=True, right_index=True + ) + got = df.merge(df, left_index=True, right_index=True, how="inner") + assert_join_results_equal(expect, got, how="inner") diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index 8b1ad696f04..2ca6bc622be 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -17,6 +17,7 @@ from cudf.core._compat import PANDAS_GE_110 from cudf.core.column.string import StringColumn from cudf.core.index import StringIndex, as_index +from cudf.tests.test_joining import assert_join_results_equal from cudf.tests.utils import ( DATETIME_TYPES, NUMERIC_TYPES, @@ -919,16 +920,12 @@ def test_string_split(data, pat, n, expand): @pytest.mark.parametrize( - "str_data,str_data_raise", - [ - ([], 0), - (["a", "b", "c", "d", "e"], 0), - ([None, None, None, None, None], 1), - ], + "str_data", + [[], ["a", "b", "c", "d", "e"], [None, None, None, None, None]], ) @pytest.mark.parametrize("num_keys", [1, 2, 3]) @pytest.mark.parametrize("how", ["left", "right", "inner", "outer"]) -def test_string_join_key(str_data, str_data_raise, num_keys, how): +def test_string_join_key(str_data, num_keys, how): other_data = [1, 2, 3, 4, 5][: len(str_data)] pdf = pd.DataFrame() @@ -942,19 +939,17 @@ def test_string_join_key(str_data, str_data_raise, num_keys, how): pdf2 = pdf.copy() gdf2 = gdf.copy() - expectation = raise_builder( - [0 if how == "right" else str_data_raise], (AssertionError) - ) + expect = pdf.merge(pdf2, on=list(range(num_keys)), how=how) + got = gdf.merge(gdf2, on=list(range(num_keys)), how=how) - with expectation: - expect = pdf.merge(pdf2, on=list(range(num_keys)), how=how) - got = gdf.merge(gdf2, on=list(range(num_keys)), how=how) + if len(expect) == 0 and len(got) == 0: + expect = expect.reset_index(drop=True) + got = got[expect.columns] # reorder columns - if len(expect) == 0 and len(got) == 0: - expect = expect.reset_index(drop=True) - got = got[expect.columns] + if how == "right": + got = got[expect.columns] # reorder columns - assert_eq(expect, got) + assert_join_results_equal(expect, got, how=how) @pytest.mark.parametrize( @@ -998,7 +993,7 @@ def test_string_join_key_nulls(str_data_nulls): expect["vals_y"] = expect["vals_y"].fillna(-1).astype("int64") - assert_eq(expect, got) + assert_join_results_equal(expect, got, how="left") @pytest.mark.parametrize( @@ -1027,7 +1022,10 @@ def test_string_join_non_key(str_data, num_cols, how): expect = expect.reset_index(drop=True) got = got[expect.columns] - assert_eq(expect, got) + if how == "right": + got = got[expect.columns] # reorder columns + + assert_join_results_equal(expect, got, how=how) @pytest.mark.parametrize( @@ -1068,7 +1066,7 @@ def test_string_join_non_key_nulls(str_data_nulls): expect = expect.reset_index(drop=True) got = got[expect.columns] - assert_eq(expect, got) + assert_join_results_equal(expect, got, how="left") def test_string_join_values_nulls(): @@ -1108,7 +1106,7 @@ def test_string_join_values_nulls(): expect = expect.sort_values(by=["a", "b", "c"]).reset_index(drop=True) got = got.sort_values(by=["a", "b", "c"]).reset_index(drop=True) - assert_eq(expect, got) + assert_join_results_equal(expect, got, how="left") @pytest.mark.parametrize( From 8188ddbf2837caea731f7c4833945dfa9598b4b5 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Mon, 29 Mar 2021 19:16:38 -0500 Subject: [PATCH 16/20] Add Java bindings to join gather map APIs (#7751) Adds Java bindings for the libcudf join APIs that return gather maps. Depends upon #7454. Authors: - Jason Lowe (@jlowe) Approvers: - Robert (Bobby) Evans (@revans2) URL: https://github.com/rapidsai/cudf/pull/7751 --- .../main/java/ai/rapids/cudf/ColumnView.java | 43 ++ .../main/java/ai/rapids/cudf/GatherMap.java | 85 ++++ java/src/main/java/ai/rapids/cudf/Table.java | 139 ++++++ java/src/main/native/include/jni_utils.hpp | 18 +- java/src/main/native/src/TableJni.cpp | 376 ++++++++++++----- .../java/ai/rapids/cudf/GatherMapTest.java | 100 +++++ .../test/java/ai/rapids/cudf/TableTest.java | 396 ++++++++++++++++++ 7 files changed, 1061 insertions(+), 96 deletions(-) create mode 100644 java/src/main/java/ai/rapids/cudf/GatherMap.java create mode 100644 java/src/test/java/ai/rapids/cudf/GatherMapTest.java diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 90fe3553abc..5d869ab75fb 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -2523,6 +2523,49 @@ public static ColumnView makeStructView(ColumnView... columns) { return makeStructView(columns[0].rows, columns); } + /** + * Create a new column view from a raw device buffer. Note that this will NOT copy + * the contents of the buffer but only creates a view. The view MUST NOT outlive + * the underlying device buffer. The column view will be created without a validity + * vector, so it is not possible to create a view containing null elements. Additionally + * only fixed-width primitive types are supported. + * + * @param buffer device memory that will back the column view + * @param startOffset byte offset into the device buffer where the column data starts + * @param type type of data in the column view + * @param rows number of data elements in the column view + * @return new column view instance that must not outlive the backing device buffer + */ + public static ColumnView fromDeviceBuffer(BaseDeviceMemoryBuffer buffer, + long startOffset, + DType type, + int rows) { + if (buffer == null) { + throw new NullPointerException("buffer is null"); + } + int typeSize = type.getSizeInBytes(); + if (typeSize <= 0) { + throw new IllegalArgumentException("Unsupported type: " + type); + } + if (startOffset < 0) { + throw new IllegalArgumentException("Invalid start offset: " + startOffset); + } + if (rows < 0) { + throw new IllegalArgumentException("Invalid row count: " + rows); + } + long dataSize = typeSize * rows; + if (startOffset + dataSize > buffer.length) { + throw new IllegalArgumentException("View extends beyond buffer range"); + } + long dataAddress = buffer.getAddress() + startOffset; + if (dataAddress % typeSize != 0) { + throw new IllegalArgumentException("Data address " + Long.toHexString(dataAddress) + + " is misaligned relative to type size of " + typeSize + " bytes"); + } + return new ColumnView(makeCudfColumnView(type.typeId.getNativeId(), type.getScale(), + dataAddress, dataSize, 0, 0, 0, rows, null)); + } + /** * Create a column of bool values indicating whether the specified scalar * is an element of each row of a list column. diff --git a/java/src/main/java/ai/rapids/cudf/GatherMap.java b/java/src/main/java/ai/rapids/cudf/GatherMap.java new file mode 100644 index 00000000000..12ff741bb69 --- /dev/null +++ b/java/src/main/java/ai/rapids/cudf/GatherMap.java @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ai.rapids.cudf; + +/** + * This class tracks the data associated with a gather map, a buffer of INT32 elements that index + * a source table and can be passed to a table gather operation. + */ +public class GatherMap implements AutoCloseable { + private DeviceMemoryBuffer buffer; + + /** + * Construct a gather map instance from a device buffer. The buffer length must be a multiple of + * the {@link DType#INT32} size, as each row of the gather map is an INT32. + * @param buffer device buffer backing the gather map data + */ + public GatherMap(DeviceMemoryBuffer buffer) { + if (buffer.getLength() % DType.INT32.getSizeInBytes() != 0) { + throw new IllegalArgumentException("buffer length not a multiple of 4"); + } + this.buffer = buffer; + } + + /** Return the number of rows in the gather map */ + public long getRowCount() { + ensureOpen(); + return buffer.getLength() / 4; + } + + /** + * Create a column view that can be used to perform a gather operation. Note that the resulting + * column view MUST NOT outlive the underlying device buffer within this instance! + * @param startRow row offset where the resulting gather map will start + * @param numRows number of rows in the resulting gather map + * @return column view of gather map data + */ + public ColumnView toColumnView(long startRow, int numRows) { + ensureOpen(); + return ColumnView.fromDeviceBuffer(buffer, startRow * 4, DType.INT32, numRows); + } + + /** + * Release the underlying device buffer instance. After this is called, closing this instance + * will not close the underlying device buffer. It is the responsibility of the caller to close + * the returned device buffer. + * @return device buffer backing gather map data or null if the buffer has already been released + */ + public DeviceMemoryBuffer releaseBuffer() { + DeviceMemoryBuffer result = buffer; + buffer = null; + return result; + } + + /** Close the device buffer backing the gather map data. */ + @Override + public void close() { + if (buffer != null) { + buffer.close(); + buffer = null; + } + } + + private void ensureOpen() { + if (buffer == null) { + throw new IllegalStateException("instance is closed"); + } + if (buffer.closed) { + throw new IllegalStateException("buffer is closed"); + } + } +} diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java index 6e0b7d3bb94..fc6ad55044a 100644 --- a/java/src/main/java/ai/rapids/cudf/Table.java +++ b/java/src/main/java/ai/rapids/cudf/Table.java @@ -482,18 +482,33 @@ private static native long[] merge(long[] tableHandles, int[] sortKeyIndexes, private static native long[] leftJoin(long leftTable, int[] leftJoinCols, long rightTable, int[] rightJoinCols, boolean compareNullsEqual) throws CudfException; + private static native long[] leftJoinGatherMaps(long leftKeys, long rightKeys, + boolean compareNullsEqual) throws CudfException; + private static native long[] innerJoin(long leftTable, int[] leftJoinCols, long rightTable, int[] rightJoinCols, boolean compareNullsEqual) throws CudfException; + private static native long[] innerJoinGatherMaps(long leftKeys, long rightKeys, + boolean compareNullsEqual) throws CudfException; + private static native long[] fullJoin(long leftTable, int[] leftJoinCols, long rightTable, int[] rightJoinCols, boolean compareNullsEqual) throws CudfException; + private static native long[] fullJoinGatherMaps(long leftKeys, long rightKeys, + boolean compareNullsEqual) throws CudfException; + private static native long[] leftSemiJoin(long leftTable, int[] leftJoinCols, long rightTable, int[] rightJoinCols, boolean compareNullsEqual) throws CudfException; + private static native long[] leftSemiJoinGatherMap(long leftKeys, long rightKeys, + boolean compareNullsEqual) throws CudfException; + private static native long[] leftAntiJoin(long leftTable, int[] leftJoinCols, long rightTable, int[] rightJoinCols, boolean compareNullsEqual) throws CudfException; + private static native long[] leftAntiJoinGatherMap(long leftKeys, long rightKeys, + boolean compareNullsEqual) throws CudfException; + private static native long[] crossJoin(long leftTable, long rightTable) throws CudfException; private static native long[] concatenate(long[] cudfTablePointers) throws CudfException; @@ -1925,6 +1940,130 @@ public Table gather(ColumnVector gatherMap, boolean checkBounds) { return new Table(gather(nativeHandle, gatherMap.getNativeView(), checkBounds)); } + private GatherMap[] buildJoinGatherMaps(long[] gatherMapData) { + long bufferSize = gatherMapData[0]; + long leftAddr = gatherMapData[1]; + long leftHandle = gatherMapData[2]; + long rightAddr = gatherMapData[3]; + long rightHandle = gatherMapData[4]; + GatherMap[] maps = new GatherMap[2]; + maps[0] = new GatherMap(DeviceMemoryBuffer.fromRmm(leftAddr, bufferSize, leftHandle)); + maps[1] = new GatherMap(DeviceMemoryBuffer.fromRmm(rightAddr, bufferSize, rightHandle)); + return maps; + } + + /** + * Computes the gather maps that can be used to manifest the result of a left equi-join between + * two tables. It is assumed this table instance holds the key columns from the left table, and + * the table argument represents the key columns from the right table. Two {@link GatherMap} + * instances will be returned that can be used to gather the left and right tables, + * respectively, to produce the result of the left join. + * It is the responsibility of the caller to close the resulting gather map instances. + * @param rightKeys join key columns from the right table + * @param compareNullsEqual true if null key values should match otherwise false + * @return left and right table gather maps + */ + public GatherMap[] leftJoinGatherMaps(Table rightKeys, boolean compareNullsEqual) { + if (getNumberOfColumns() != rightKeys.getNumberOfColumns()) { + throw new IllegalArgumentException("column count mismatch, this: " + getNumberOfColumns() + + "rightKeys: " + rightKeys.getNumberOfColumns()); + } + long[] gatherMapData = + leftJoinGatherMaps(getNativeView(), rightKeys.getNativeView(), compareNullsEqual); + return buildJoinGatherMaps(gatherMapData); + } + + /** + * Computes the gather maps that can be used to manifest the result of an inner equi-join between + * two tables. It is assumed this table instance holds the key columns from the left table, and + * the table argument represents the key columns from the right table. Two {@link GatherMap} + * instances will be returned that can be used to gather the left and right tables, + * respectively, to produce the result of the inner join. + * It is the responsibility of the caller to close the resulting gather map instances. + * @param rightKeys join key columns from the right table + * @param compareNullsEqual true if null key values should match otherwise false + * @return left and right table gather maps + */ + public GatherMap[] innerJoinGatherMaps(Table rightKeys, boolean compareNullsEqual) { + if (getNumberOfColumns() != rightKeys.getNumberOfColumns()) { + throw new IllegalArgumentException("column count mismatch, this: " + getNumberOfColumns() + + "rightKeys: " + rightKeys.getNumberOfColumns()); + } + long[] gatherMapData = + innerJoinGatherMaps(getNativeView(), rightKeys.getNativeView(), compareNullsEqual); + return buildJoinGatherMaps(gatherMapData); + } + + /** + * Computes the gather maps that can be used to manifest the result of an full equi-join between + * two tables. It is assumed this table instance holds the key columns from the left table, and + * the table argument represents the key columns from the right table. Two {@link GatherMap} + * instances will be returned that can be used to gather the left and right tables, + * respectively, to produce the result of the full join. + * It is the responsibility of the caller to close the resulting gather map instances. + * @param rightKeys join key columns from the right table + * @param compareNullsEqual true if null key values should match otherwise false + * @return left and right table gather maps + */ + public GatherMap[] fullJoinGatherMaps(Table rightKeys, boolean compareNullsEqual) { + if (getNumberOfColumns() != rightKeys.getNumberOfColumns()) { + throw new IllegalArgumentException("column count mismatch, this: " + getNumberOfColumns() + + "rightKeys: " + rightKeys.getNumberOfColumns()); + } + long[] gatherMapData = + fullJoinGatherMaps(getNativeView(), rightKeys.getNativeView(), compareNullsEqual); + return buildJoinGatherMaps(gatherMapData); + } + + private GatherMap buildSemiJoinGatherMap(long[] gatherMapData) { + long bufferSize = gatherMapData[0]; + long leftAddr = gatherMapData[1]; + long leftHandle = gatherMapData[2]; + return new GatherMap(DeviceMemoryBuffer.fromRmm(leftAddr, bufferSize, leftHandle)); + } + + /** + * Computes the gather map that can be used to manifest the result of a left semi-join between + * two tables. It is assumed this table instance holds the key columns from the left table, and + * the table argument represents the key columns from the right table. The {@link GatherMap} + * instance returned can be used to gather the left table to produce the result of the + * left semi-join. + * It is the responsibility of the caller to close the resulting gather map instance. + * @param rightKeys join key columns from the right table + * @param compareNullsEqual true if null key values should match otherwise false + * @return left table gather map + */ + public GatherMap leftSemiJoinGatherMap(Table rightKeys, boolean compareNullsEqual) { + if (getNumberOfColumns() != rightKeys.getNumberOfColumns()) { + throw new IllegalArgumentException("column count mismatch, this: " + getNumberOfColumns() + + "rightKeys: " + rightKeys.getNumberOfColumns()); + } + long[] gatherMapData = + leftSemiJoinGatherMap(getNativeView(), rightKeys.getNativeView(), compareNullsEqual); + return buildSemiJoinGatherMap(gatherMapData); + } + + /** + * Computes the gather map that can be used to manifest the result of a left anti-join between + * two tables. It is assumed this table instance holds the key columns from the left table, and + * the table argument represents the key columns from the right table. The {@link GatherMap} + * instance returned can be used to gather the left table to produce the result of the + * left anti-join. + * It is the responsibility of the caller to close the resulting gather map instance. + * @param rightKeys join key columns from the right table + * @param compareNullsEqual true if null key values should match otherwise false + * @return left table gather map + */ + public GatherMap leftAntiJoinGatherMap(Table rightKeys, boolean compareNullsEqual) { + if (getNumberOfColumns() != rightKeys.getNumberOfColumns()) { + throw new IllegalArgumentException("column count mismatch, this: " + getNumberOfColumns() + + "rightKeys: " + rightKeys.getNumberOfColumns()); + } + long[] gatherMapData = + leftAntiJoinGatherMap(getNativeView(), rightKeys.getNativeView(), compareNullsEqual); + return buildSemiJoinGatherMap(gatherMapData); + } + /** * Convert this table of columns into a row major format that is useful for interacting with other * systems that do row major processing of the data. Currently only fixed-width column types are diff --git a/java/src/main/native/include/jni_utils.hpp b/java/src/main/native/include/jni_utils.hpp index 84694c177a1..3ce136dda19 100644 --- a/java/src/main/native/include/jni_utils.hpp +++ b/java/src/main/native/include/jni_utils.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -243,6 +243,22 @@ template class nativ return data_ptr; } + const N_TYPE *const begin() const { + return data(); + } + + N_TYPE *begin() { + return data(); + } + + const N_TYPE *const end() const { + return data() + size(); + } + + N_TYPE *end() { + return data() + size(); + } + const J_ARRAY_TYPE get_jArray() const { return orig; } J_ARRAY_TYPE get_jArray() { return orig; } diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index 6beedf54f5a..0e66cde3ee1 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,8 @@ #include "dtype_utils.hpp" #include "row_conversion.hpp" +#include + namespace cudf { namespace jni { @@ -620,6 +623,116 @@ bool valid_window_parameters(native_jintArray const &values, values.size() == preceding.size() && values.size() == following.size(); } +// Generate gather maps needed to manifest the result of a join between two tables. +// The resulting Java long array contains the following at each index: +// 0: Size of each gather map in bytes +// 1: Device address of the gather map for the left table +// 2: Host address of the rmm::device_buffer instance that owns the left gather map data +// 3: Device address of the gather map for the right table +// 4: Host address of the rmm::device_buffer instance that owns the right gather map data +template +jlongArray join_gather_maps(JNIEnv *env, jlong j_left_keys, jlong j_right_keys, + jboolean compare_nulls_equal, T join_func) { + JNI_NULL_CHECK(env, j_left_keys, "left_table is null", NULL); + JNI_NULL_CHECK(env, j_right_keys, "right_table is null", NULL); + try { + cudf::jni::auto_set_device(env); + auto left_keys = reinterpret_cast(j_left_keys); + auto right_keys = reinterpret_cast(j_right_keys); + auto nulleq = compare_nulls_equal ? cudf::null_equality::EQUAL : cudf::null_equality::UNEQUAL; + std::pair>, + std::unique_ptr>> + join_maps = join_func(*left_keys, *right_keys, nulleq); + + // release the underlying device buffer to Java + auto left_map_buffer = std::make_unique(join_maps.first->release()); + auto right_map_buffer = std::make_unique(join_maps.second->release()); + cudf::jni::native_jlongArray result(env, 5); + result[0] = static_cast(left_map_buffer->size()); + result[1] = reinterpret_cast(left_map_buffer->data()); + result[2] = reinterpret_cast(left_map_buffer.release()); + result[3] = reinterpret_cast(right_map_buffer->data()); + result[4] = reinterpret_cast(right_map_buffer.release()); + return result.get_jArray(); + } + CATCH_STD(env, NULL); +} + +// Generate gather maps needed to manifest the result of a join between two tables. +// The resulting Java long array contains the following at each index: +// 0: Size of the gather map in bytes +// 1: Device address of the gather map +// 2: Host address of the rmm::device_buffer instance that owns the gather map data +template +jlongArray join_gather_single_map(JNIEnv *env, jlong j_left_keys, jlong j_right_keys, + jboolean compare_nulls_equal, T join_func) { + JNI_NULL_CHECK(env, j_left_keys, "left_table is null", NULL); + JNI_NULL_CHECK(env, j_right_keys, "right_table is null", NULL); + try { + cudf::jni::auto_set_device(env); + auto left_keys = reinterpret_cast(j_left_keys); + auto right_keys = reinterpret_cast(j_right_keys); + auto nulleq = compare_nulls_equal ? cudf::null_equality::EQUAL : cudf::null_equality::UNEQUAL; + std::unique_ptr> join_map = + join_func(*left_keys, *right_keys, nulleq); + + // release the underlying device buffer to Java + auto gather_map_buffer = std::make_unique(join_map->release()); + cudf::jni::native_jlongArray result(env, 3); + result[0] = static_cast(gather_map_buffer->size()); + result[1] = reinterpret_cast(gather_map_buffer->data()); + result[2] = reinterpret_cast(gather_map_buffer.release()); + return result.get_jArray(); + } + CATCH_STD(env, NULL); +} + +// Returns a table view containing only the columns at the specified indices +cudf::table_view const get_keys_table(cudf::table_view const *t, + native_jintArray const &key_indices) { + std::vector key_cols; + key_cols.reserve(key_indices.size()); + std::transform(key_indices.begin(), key_indices.end(), std::back_inserter(key_cols), + [t](int idx) { return t->column(idx); }); + return table_view(key_cols); +} + +// Returns a table view containing only the columns that are NOT at the specified indices +cudf::table_view const get_non_keys_table(cudf::table_view const *t, + native_jintArray const &key_indices) { + std::vector non_key_indices; + for (int i = 0; i < t->num_columns(); ++i) { + if (std::find(key_indices.begin(), key_indices.end(), i) == key_indices.end()) { + non_key_indices.push_back(i); + } + } + std::vector cols; + std::transform(non_key_indices.begin(), non_key_indices.end(), std::back_inserter(cols), + [&t](int idx) { return t->column(idx); }); + return table_view(cols); +} + +// Combine left and right join results into a column pointer array that can be returned to the JVM. +jlongArray combine_join_results(JNIEnv *env, std::vector> left_cols, + std::vector> right_cols) { + cudf::jni::native_jlongArray outcol_handles(env, left_cols.size() + right_cols.size()); + auto iter = std::transform( + left_cols.begin(), left_cols.end(), outcol_handles.begin(), + [](std::unique_ptr &col) { return reinterpret_cast(col.release()); }); + std::transform( + right_cols.begin(), right_cols.end(), iter, + [](std::unique_ptr &col) { return reinterpret_cast(col.release()); }); + return outcol_handles.get_jArray(); +} + +// Combine left and right join results into a column pointer array that can be returned to the JVM. +jlongArray combine_join_results(JNIEnv *env, cudf::table &left_results, + cudf::table &right_results) { + std::vector> left_cols = left_results.release(); + std::vector> right_cols = right_results.release(); + return combine_join_results(env, std::move(left_cols), std::move(right_cols)); +} + } // namespace } // namespace jni @@ -1455,109 +1568,143 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Table_readArrowIPCEnd(JNIEnv *env, jc } JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_leftJoin( - JNIEnv *env, jclass, jlong left_table, jintArray left_col_join_indices, jlong right_table, - jintArray right_col_join_indices, jboolean compare_nulls_equal) { - JNI_NULL_CHECK(env, left_table, "left_table is null", NULL); - JNI_NULL_CHECK(env, left_col_join_indices, "left_col_join_indices is null", NULL); - JNI_NULL_CHECK(env, right_table, "right_table is null", NULL); - JNI_NULL_CHECK(env, right_col_join_indices, "right_col_join_indices is null", NULL); + JNIEnv *env, jclass, jlong j_left_table, jintArray j_left_key_indices, jlong j_right_table, + jintArray j_right_key_indices, jboolean compare_nulls_equal) { + JNI_NULL_CHECK(env, j_left_table, "left_table is null", NULL); + JNI_NULL_CHECK(env, j_left_key_indices, "left_col_join_indices is null", NULL); + JNI_NULL_CHECK(env, j_right_table, "right_table is null", NULL); + JNI_NULL_CHECK(env, j_right_key_indices, "right_col_join_indices is null", NULL); try { cudf::jni::auto_set_device(env); - cudf::table_view *n_left_table = reinterpret_cast(left_table); - cudf::table_view *n_right_table = reinterpret_cast(right_table); - cudf::jni::native_jintArray left_join_cols_arr(env, left_col_join_indices); - std::vector left_join_cols( - left_join_cols_arr.data(), left_join_cols_arr.data() + left_join_cols_arr.size()); - cudf::jni::native_jintArray right_join_cols_arr(env, right_col_join_indices); - std::vector right_join_cols( - right_join_cols_arr.data(), right_join_cols_arr.data() + right_join_cols_arr.size()); - - int dedupe_size = left_join_cols.size(); - std::vector> dedupe(dedupe_size); - for (int i = 0; i < dedupe_size; i++) { - dedupe[i].first = left_join_cols[i]; - dedupe[i].second = right_join_cols[i]; - } - - std::unique_ptr result = - cudf::left_join(*n_left_table, *n_right_table, left_join_cols, right_join_cols, dedupe, - static_cast(compare_nulls_equal) ? cudf::null_equality::EQUAL : - cudf::null_equality::UNEQUAL); - - return cudf::jni::convert_table_for_return(env, result); + auto left_in_table = reinterpret_cast(j_left_table); + auto right_in_table = reinterpret_cast(j_right_table); + cudf::jni::native_jintArray left_key_indices(env, j_left_key_indices); + auto left_keys_table = cudf::jni::get_keys_table(left_in_table, left_key_indices); + left_key_indices.cancel(); + cudf::jni::native_jintArray right_key_indices(env, j_right_key_indices); + auto right_keys_table = cudf::jni::get_keys_table(right_in_table, right_key_indices); + auto nulleq = compare_nulls_equal ? cudf::null_equality::EQUAL : cudf::null_equality::UNEQUAL; + + // compute gather maps for the left and right tables that can produce the join result rows + auto join_maps = cudf::left_join(left_keys_table, right_keys_table, nulleq); + CUDF_EXPECTS(join_maps.first->size() <= std::numeric_limits::max(), + "join result exceeds maximum column length"); + auto num_join_rows = static_cast(join_maps.first->size()); + + // compute the join result rows for the left table columns + auto left_gather_col = cudf::column_view(cudf::data_type{cudf::type_id::INT32}, num_join_rows, + join_maps.first->data()); + auto left_out_table = + cudf::gather(*left_in_table, left_gather_col, cudf::out_of_bounds_policy::DONT_CHECK); + + // compute the join result rows for the right table columns + auto right_non_keys_table = cudf::jni::get_non_keys_table(right_in_table, right_key_indices); + right_key_indices.cancel(); + auto right_gather_col = cudf::column_view(cudf::data_type{cudf::type_id::INT32}, num_join_rows, + join_maps.second->data()); + auto right_out_table = + cudf::gather(right_non_keys_table, right_gather_col, cudf::out_of_bounds_policy::NULLIFY); + + return cudf::jni::combine_join_results(env, *left_out_table, *right_out_table); } CATCH_STD(env, NULL); } JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_innerJoin( - JNIEnv *env, jclass, jlong left_table, jintArray left_col_join_indices, jlong right_table, - jintArray right_col_join_indices, jboolean compare_nulls_equal) { - JNI_NULL_CHECK(env, left_table, "left_table is null", NULL); - JNI_NULL_CHECK(env, left_col_join_indices, "left_col_join_indices is null", NULL); - JNI_NULL_CHECK(env, right_table, "right_table is null", NULL); - JNI_NULL_CHECK(env, right_col_join_indices, "right_col_join_indices is null", NULL); + JNIEnv *env, jclass, jlong j_left_table, jintArray j_left_key_indices, jlong j_right_table, + jintArray j_right_key_indices, jboolean compare_nulls_equal) { + JNI_NULL_CHECK(env, j_left_table, "left_table is null", NULL); + JNI_NULL_CHECK(env, j_left_key_indices, "left_col_join_indices is null", NULL); + JNI_NULL_CHECK(env, j_right_table, "right_table is null", NULL); + JNI_NULL_CHECK(env, j_right_key_indices, "right_col_join_indices is null", NULL); try { cudf::jni::auto_set_device(env); - cudf::table_view *n_left_table = reinterpret_cast(left_table); - cudf::table_view *n_right_table = reinterpret_cast(right_table); - cudf::jni::native_jintArray left_join_cols_arr(env, left_col_join_indices); - std::vector left_join_cols( - left_join_cols_arr.data(), left_join_cols_arr.data() + left_join_cols_arr.size()); - cudf::jni::native_jintArray right_join_cols_arr(env, right_col_join_indices); - std::vector right_join_cols( - right_join_cols_arr.data(), right_join_cols_arr.data() + right_join_cols_arr.size()); - - int dedupe_size = left_join_cols.size(); - std::vector> dedupe(dedupe_size); - for (int i = 0; i < dedupe_size; i++) { - dedupe[i].first = left_join_cols[i]; - dedupe[i].second = right_join_cols[i]; - } - - std::unique_ptr result = - cudf::inner_join(*n_left_table, *n_right_table, left_join_cols, right_join_cols, dedupe, - static_cast(compare_nulls_equal) ? cudf::null_equality::EQUAL : - cudf::null_equality::UNEQUAL); - - return cudf::jni::convert_table_for_return(env, result); + auto left_in_table = reinterpret_cast(j_left_table); + auto right_in_table = reinterpret_cast(j_right_table); + cudf::jni::native_jintArray left_key_indices(env, j_left_key_indices); + auto left_keys_table = cudf::jni::get_keys_table(left_in_table, left_key_indices); + left_key_indices.cancel(); + cudf::jni::native_jintArray right_key_indices(env, j_right_key_indices); + auto right_keys_table = cudf::jni::get_keys_table(right_in_table, right_key_indices); + auto nulleq = compare_nulls_equal ? cudf::null_equality::EQUAL : cudf::null_equality::UNEQUAL; + + // compute gather maps for the left and right tables that can produce the join result rows + auto join_maps = cudf::inner_join(left_keys_table, right_keys_table, nulleq); + CUDF_EXPECTS(join_maps.first->size() <= std::numeric_limits::max(), + "join result exceeds maximum column length"); + auto num_join_rows = static_cast(join_maps.first->size()); + + // compute the join result rows for the left table columns + auto left_gather_col = cudf::column_view(cudf::data_type{cudf::type_id::INT32}, num_join_rows, + join_maps.first->data()); + auto left_out_table = + cudf::gather(*left_in_table, left_gather_col, cudf::out_of_bounds_policy::DONT_CHECK); + + // compute the join result rows for the right table columns + auto right_non_keys_table = cudf::jni::get_non_keys_table(right_in_table, right_key_indices); + right_key_indices.cancel(); + auto right_gather_col = cudf::column_view(cudf::data_type{cudf::type_id::INT32}, num_join_rows, + join_maps.second->data()); + auto right_out_table = cudf::gather(right_non_keys_table, right_gather_col, + cudf::out_of_bounds_policy::DONT_CHECK); + + return cudf::jni::combine_join_results(env, *left_out_table, *right_out_table); } CATCH_STD(env, NULL); } JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_fullJoin( - JNIEnv *env, jclass, jlong left_table, jintArray left_col_join_indices, jlong right_table, - jintArray right_col_join_indices, jboolean compare_nulls_equal) { - JNI_NULL_CHECK(env, left_table, "left_table is null", NULL); - JNI_NULL_CHECK(env, left_col_join_indices, "left_col_join_indices is null", NULL); - JNI_NULL_CHECK(env, right_table, "right_table is null", NULL); - JNI_NULL_CHECK(env, right_col_join_indices, "right_col_join_indices is null", NULL); + JNIEnv *env, jclass, jlong j_left_table, jintArray j_left_key_indices, jlong j_right_table, + jintArray j_right_key_indices, jboolean compare_nulls_equal) { + JNI_NULL_CHECK(env, j_left_table, "left_table is null", NULL); + JNI_NULL_CHECK(env, j_left_key_indices, "left_col_join_indices is null", NULL); + JNI_NULL_CHECK(env, j_right_table, "right_table is null", NULL); + JNI_NULL_CHECK(env, j_right_key_indices, "right_col_join_indices is null", NULL); try { cudf::jni::auto_set_device(env); - cudf::table_view *n_left_table = reinterpret_cast(left_table); - cudf::table_view *n_right_table = reinterpret_cast(right_table); - cudf::jni::native_jintArray left_join_cols_arr(env, left_col_join_indices); - std::vector left_join_cols( - left_join_cols_arr.data(), left_join_cols_arr.data() + left_join_cols_arr.size()); - cudf::jni::native_jintArray right_join_cols_arr(env, right_col_join_indices); - std::vector right_join_cols( - right_join_cols_arr.data(), right_join_cols_arr.data() + right_join_cols_arr.size()); - - int dedupe_size = left_join_cols.size(); - std::vector> dedupe(dedupe_size); - for (int i = 0; i < dedupe_size; i++) { - dedupe[i].first = left_join_cols[i]; - dedupe[i].second = right_join_cols[i]; - } - - std::unique_ptr result = - cudf::full_join(*n_left_table, *n_right_table, left_join_cols, right_join_cols, dedupe, - static_cast(compare_nulls_equal) ? cudf::null_equality::EQUAL : - cudf::null_equality::UNEQUAL); - - return cudf::jni::convert_table_for_return(env, result); + auto left_in_table = reinterpret_cast(j_left_table); + auto right_in_table = reinterpret_cast(j_right_table); + cudf::jni::native_jintArray left_key_indices(env, j_left_key_indices); + auto left_keys_table = cudf::jni::get_keys_table(left_in_table, left_key_indices); + cudf::jni::native_jintArray right_key_indices(env, j_right_key_indices); + auto right_keys_table = cudf::jni::get_keys_table(right_in_table, right_key_indices); + auto nulleq = compare_nulls_equal ? cudf::null_equality::EQUAL : cudf::null_equality::UNEQUAL; + + // compute gather maps for the left and right tables that can produce the join result rows + auto join_maps = cudf::full_join(left_keys_table, right_keys_table, nulleq); + CUDF_EXPECTS(join_maps.first->size() <= std::numeric_limits::max(), + "join result exceeds maximum column length"); + auto num_join_rows = static_cast(join_maps.first->size()); + + // compute the join result rows for the left table columns + auto left_gather_col = cudf::column_view(cudf::data_type{cudf::type_id::INT32}, num_join_rows, + join_maps.first->data()); + auto left_out_table = + cudf::gather(*left_in_table, left_gather_col, cudf::out_of_bounds_policy::NULLIFY); + // Replace any nulls in the left key column results with the right key column results. + std::vector> result_cols = left_out_table->release(); + auto right_gather_col = cudf::column_view(cudf::data_type{cudf::type_id::INT32}, num_join_rows, + join_maps.second->data()); + for (int i = 0; i < left_key_indices.size(); ++i) { + std::unique_ptr &colptr = result_cols[left_key_indices[i]]; + auto right_key_col = right_in_table->column(right_key_indices[i]); + auto gathered = cudf::gather(cudf::table_view{{right_key_col}}, right_gather_col, + cudf::out_of_bounds_policy::NULLIFY); + auto replaced_col = cudf::replace_nulls(*colptr, gathered->get_column(0)); + colptr.reset(replaced_col.release()); + } + left_key_indices.cancel(); + + // compute the join result rows for the right table columns + auto right_non_keys_table = cudf::jni::get_non_keys_table(right_in_table, right_key_indices); + right_key_indices.cancel(); + auto right_out_table = + cudf::gather(right_non_keys_table, right_gather_col, cudf::out_of_bounds_policy::NULLIFY); + + return cudf::jni::combine_join_results(env, std::move(result_cols), right_out_table->release()); } CATCH_STD(env, NULL); } @@ -1580,13 +1727,9 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_leftSemiJoin( cudf::jni::native_jintArray right_join_cols_arr(env, right_col_join_indices); std::vector right_join_cols( right_join_cols_arr.data(), right_join_cols_arr.data() + right_join_cols_arr.size()); - std::vector return_cols(n_left_table->num_columns()); - for (cudf::size_type i = 0; i < n_left_table->num_columns(); ++i) { - return_cols[i] = i; - } std::unique_ptr result = cudf::left_semi_join( - *n_left_table, *n_right_table, left_join_cols, right_join_cols, return_cols, + *n_left_table, *n_right_table, left_join_cols, right_join_cols, static_cast(compare_nulls_equal) ? cudf::null_equality::EQUAL : cudf::null_equality::UNEQUAL); @@ -1613,13 +1756,9 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_leftAntiJoin( cudf::jni::native_jintArray right_join_cols_arr(env, right_col_join_indices); std::vector right_join_cols( right_join_cols_arr.data(), right_join_cols_arr.data() + right_join_cols_arr.size()); - std::vector return_cols(n_left_table->num_columns()); - for (cudf::size_type i = 0; i < n_left_table->num_columns(); ++i) { - return_cols[i] = i; - } std::unique_ptr result = cudf::left_anti_join( - *n_left_table, *n_right_table, left_join_cols, right_join_cols, return_cols, + *n_left_table, *n_right_table, left_join_cols, right_join_cols, static_cast(compare_nulls_equal) ? cudf::null_equality::EQUAL : cudf::null_equality::UNEQUAL); @@ -1628,6 +1767,51 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_leftAntiJoin( CATCH_STD(env, NULL); } +JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_leftJoinGatherMaps( + JNIEnv *env, jclass, jlong j_left_keys, jlong j_right_keys, jboolean compare_nulls_equal) { + return cudf::jni::join_gather_maps( + env, j_left_keys, j_right_keys, compare_nulls_equal, + [](cudf::table_view const &left, cudf::table_view const &right, cudf::null_equality nulleq) { + return cudf::left_join(left, right, nulleq); + }); +} + +JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_innerJoinGatherMaps( + JNIEnv *env, jclass, jlong j_left_keys, jlong j_right_keys, jboolean compare_nulls_equal) { + return cudf::jni::join_gather_maps( + env, j_left_keys, j_right_keys, compare_nulls_equal, + [](cudf::table_view const &left, cudf::table_view const &right, cudf::null_equality nulleq) { + return cudf::inner_join(left, right, nulleq); + }); +} + +JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_fullJoinGatherMaps( + JNIEnv *env, jclass, jlong j_left_keys, jlong j_right_keys, jboolean compare_nulls_equal) { + return cudf::jni::join_gather_maps( + env, j_left_keys, j_right_keys, compare_nulls_equal, + [](cudf::table_view const &left, cudf::table_view const &right, cudf::null_equality nulleq) { + return cudf::full_join(left, right, nulleq); + }); +} + +JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_leftSemiJoinGatherMap( + JNIEnv *env, jclass, jlong j_left_keys, jlong j_right_keys, jboolean compare_nulls_equal) { + return cudf::jni::join_gather_single_map( + env, j_left_keys, j_right_keys, compare_nulls_equal, + [](cudf::table_view const &left, cudf::table_view const &right, cudf::null_equality nulleq) { + return cudf::left_semi_join(left, right, nulleq); + }); +} + +JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_leftAntiJoinGatherMap( + JNIEnv *env, jclass, jlong j_left_keys, jlong j_right_keys, jboolean compare_nulls_equal) { + return cudf::jni::join_gather_single_map( + env, j_left_keys, j_right_keys, compare_nulls_equal, + [](cudf::table_view const &left, cudf::table_view const &right, cudf::null_equality nulleq) { + return cudf::left_anti_join(left, right, nulleq); + }); +} + JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_crossJoin(JNIEnv *env, jclass, jlong left_table, jlong right_table) { @@ -1859,7 +2043,9 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_gather(JNIEnv *env, jclas cudf::jni::auto_set_device(env); cudf::table_view *input = reinterpret_cast(j_input); cudf::column_view *map = reinterpret_cast(j_map); - std::unique_ptr result = cudf::gather(*input, *map); + auto bounds_policy = + check_bounds ? cudf::out_of_bounds_policy::NULLIFY : cudf::out_of_bounds_policy::DONT_CHECK; + std::unique_ptr result = cudf::gather(*input, *map, bounds_policy); return cudf::jni::convert_table_for_return(env, result); } CATCH_STD(env, 0); diff --git a/java/src/test/java/ai/rapids/cudf/GatherMapTest.java b/java/src/test/java/ai/rapids/cudf/GatherMapTest.java new file mode 100644 index 00000000000..b0e78a2c2cd --- /dev/null +++ b/java/src/test/java/ai/rapids/cudf/GatherMapTest.java @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ai.rapids.cudf; + +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; + +public class GatherMapTest { + @Test + void testInvalidBuffer() { + try (DeviceMemoryBuffer buffer = DeviceMemoryBuffer.allocate(707)) { + assertThrows(IllegalArgumentException.class, () -> new GatherMap(buffer)); + } + } + + @Test + void testRowCount() { + try (GatherMap map = new GatherMap(DeviceMemoryBuffer.allocate(700))) { + assertEquals(175, map.getRowCount()); + } + } + + @Test + void testClose() { + DeviceMemoryBuffer mockBuffer = Mockito.mock(DeviceMemoryBuffer.class); + GatherMap map = new GatherMap(mockBuffer); + map.close(); + Mockito.verify(mockBuffer).close(); + } + + @Test + void testReleaseBuffer() { + DeviceMemoryBuffer mockBuffer = Mockito.mock(DeviceMemoryBuffer.class); + GatherMap map = new GatherMap(mockBuffer); + DeviceMemoryBuffer buffer = map.releaseBuffer(); + assertSame(mockBuffer, buffer); + map.close(); + Mockito.verify(mockBuffer, Mockito.never()).close(); + } + + @Test + void testInvalidColumnView() { + try (GatherMap map = new GatherMap(DeviceMemoryBuffer.allocate(1024))) { + assertThrows(IllegalArgumentException.class, () -> map.toColumnView(0, 257)); + assertThrows(IllegalArgumentException.class, () -> map.toColumnView(257, 0)); + assertThrows(IllegalArgumentException.class, () -> map.toColumnView(-4, 253)); + assertThrows(IllegalArgumentException.class, () -> map.toColumnView(4, -2)); + } + } + + @Test + void testToColumnView() { + try (HostMemoryBuffer hostBuffer = HostMemoryBuffer.allocate(8 * 4)) { + hostBuffer.setInts(0, new int[]{10, 11, 12, 13, 14, 15, 16, 17}, 0, 8); + try (DeviceMemoryBuffer devBuffer = DeviceMemoryBuffer.allocate(8*4)) { + devBuffer.copyFromHostBuffer(hostBuffer); + devBuffer.incRefCount(); + try (GatherMap map = new GatherMap(devBuffer)) { + ColumnView view = map.toColumnView(0, 8); + assertEquals(DType.INT32, view.getType()); + assertEquals(0, view.getNullCount()); + assertEquals(8, view.getRowCount()); + try (HostMemoryBuffer viewHostBuffer = HostMemoryBuffer.allocate(8 * 4)) { + viewHostBuffer.copyFromDeviceBuffer(view.getData()); + for (int i = 0; i < 8; i++) { + assertEquals(i + 10, viewHostBuffer.getInt(4*i)); + } + } + view = map.toColumnView(3, 2); + assertEquals(DType.INT32, view.getType()); + assertEquals(0, view.getNullCount()); + assertEquals(2, view.getRowCount()); + try (HostMemoryBuffer viewHostBuffer = HostMemoryBuffer.allocate(8)) { + viewHostBuffer.copyFromDeviceBuffer(view.getData()); + assertEquals(13, viewHostBuffer.getInt(0)); + assertEquals(14, viewHostBuffer.getInt(4)); + } + } + } + } + } +} diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index b6350a207c1..ac71f96d3c3 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -929,6 +929,51 @@ void testLeftJoin() { } } + @Test + void testLeftJoinLeftEmpty() { + final Integer[] emptyInts = new Integer[0]; + try (Table leftTable = new Table.TestBuilder() + .column(emptyInts) + .column(emptyInts) + .build(); + Table rightTable = new Table.TestBuilder() + .column(306, 301, 360, 109, 335, 254, 317, 361, 251, 326) + .column( 20, 21, 22, 23, 24, 25, 26, 27, 28, 29) + .build(); + Table joinedTable = leftTable.onColumns(0).leftJoin(rightTable.onColumns(0), true); + Table expected = new Table.TestBuilder() + .column(emptyInts) // common + .column(emptyInts) // left + .column(emptyInts) // right + .build()) { + assertTablesAreEqual(expected, joinedTable); + } + } + + @Test + void testLeftJoinRightEmpty() { + final Integer[] emptyInts = new Integer[0]; + final Integer[] nullInts = new Integer[10]; + Arrays.fill(nullInts, null); + try (Table leftTable = new Table.TestBuilder() + .column(360, 326, 254, 306, 109, 361, 251, 335, 301, 317) + .column( 10, 11, 12, 13, 14, 15, 16, 17, 18, 19) + .build(); + Table rightTable = new Table.TestBuilder() + .column(emptyInts) + .column(emptyInts) + .build(); + Table joinedTable = leftTable.onColumns(0).leftJoin(rightTable.onColumns(0), true); + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true)); + Table expected = new Table.TestBuilder() + .column(360, 326, 254, 306, 109, 361, 251, 335, 301, 317) // common + .column( 10, 11, 12, 13, 14, 15, 16, 17, 18, 19) // left + .column(nullInts) // right + .build()) { + assertTablesAreEqual(expected, orderedJoinedTable); + } + } + @Test void testFullJoinWithNonCommonKeys() { try (Table leftTable = new Table.TestBuilder() @@ -950,6 +995,46 @@ void testFullJoinWithNonCommonKeys() { } } + @Test + void testFullJoinLeftEmpty() { + final Integer[] emptyInts = new Integer[0]; + final Integer[] nullInts = new Integer[6]; + try (Table leftTable = new Table.TestBuilder().column(emptyInts).column(emptyInts).build(); + Table rightTable = new Table.TestBuilder() + .column( 6, 5, 9, 8, 10, 32) + .column(200, 201, 202, 203, 204, 205) + .build(); + Table expected = new Table.TestBuilder() + .column( 5, 6, 8, 9, 10, 32) // common + .column(nullInts) // left + .column( 201, 200, 203, 202, 204, 205) // right + .build(); + Table joinedTable = leftTable.onColumns(0).fullJoin(rightTable.onColumns(0), true); + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(0, true))) { + assertTablesAreEqual(expected, orderedJoinedTable); + } + } + + @Test + void testFullJoinRightEmpty() { + final Integer[] emptyInts = new Integer[0]; + final Integer[] nullInts = new Integer[10]; + try (Table leftTable = new Table.TestBuilder() + .column( 2, 3, 9, 0, 1, 7, 4, 6, 5, 8) + .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) + .build(); + Table rightTable = new Table.TestBuilder().column(emptyInts).column(emptyInts).build(); + Table expected = new Table.TestBuilder() + .column( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9) // common + .column( 103, 104, 100, 101, 106, 108, 107, 105, 109, 102) // left + .column(nullInts) // right + .build(); + Table joinedTable = leftTable.onColumns(0).fullJoin(rightTable.onColumns(0), true); + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(0, true))) { + assertTablesAreEqual(expected, orderedJoinedTable); + } + } + @Test void testFullJoinOnNullKeys() { try (Table leftTable = new Table.TestBuilder() @@ -1028,6 +1113,36 @@ void testInnerJoinWithNonCommonKeys() { } } + @Test + void testInnerJoinLeftEmpty() { + final Integer[] emptyInts = new Integer[0]; + try (Table leftTable = new Table.TestBuilder() + .column( 2, 3, 9, 0, 1, 7, 4, 6, 5, 8) + .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) + .build(); + Table rightTable = new Table.TestBuilder().column(emptyInts).column(emptyInts).build(); + Table expected = new Table.TestBuilder() + .column(emptyInts).column(emptyInts).column(emptyInts).build(); + Table joinedTable = leftTable.onColumns(0).innerJoin(rightTable.onColumns(0), true)) { + assertTablesAreEqual(expected, joinedTable); + } + } + + @Test + void testInnerJoinRightEmpty() { + final Integer[] emptyInts = new Integer[0]; + try (Table leftTable = new Table.TestBuilder() + .column( 2, 3, 9, 0, 1, 7, 4, 6, 5, 8) + .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) + .build(); + Table rightTable = new Table.TestBuilder().column(emptyInts).column(emptyInts).build(); + Table expected = new Table.TestBuilder() + .column(emptyInts).column(emptyInts).column(emptyInts).build(); + Table joinedTable = leftTable.onColumns(0).innerJoin(rightTable.onColumns(0), true)) { + assertTablesAreEqual(expected, joinedTable); + } + } + @Test void testInnerJoinOnNullKeys() { try (Table leftTable = new Table.TestBuilder() @@ -1104,6 +1219,32 @@ void testLeftSemiJoin() { } } + @Test + void testLeftSemiJoinLeftEmpty() { + final Integer[] emptyInts = new Integer[0]; + try (Table leftTable = new Table.TestBuilder().column(emptyInts).column(emptyInts).build(); + Table rightTable = new Table.TestBuilder() + .column( 6, 5, 9, 8, 10, 32) + .column(201, 202, 203, 204, 205, 206) + .build(); + Table joinedTable = leftTable.onColumns(0).leftSemiJoin(rightTable.onColumns(0), true)) { + assertTablesAreEqual(leftTable, joinedTable); + } + } + + @Test + void testLeftSemiJoinRightEmpty() { + final Integer[] emptyInts = new Integer[0]; + try (Table leftTable = new Table.TestBuilder() + .column( 2, 3, 9, 0, 1, 7, 4, 6, 5, 8) + .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) + .build(); + Table rightTable = new Table.TestBuilder().column(emptyInts).column(emptyInts).build(); + Table joinedTable = leftTable.onColumns(0).leftSemiJoin(rightTable.onColumns(0), true)) { + assertTablesAreEqual(rightTable, joinedTable); + } + } + @Test void testLeftSemiJoinWithNulls() { try (Table leftTable = new Table.TestBuilder() @@ -1179,6 +1320,32 @@ void testLeftAntiJoin() { } } + @Test + void testLeftAntiJoinLeftEmpty() { + final Integer[] emptyInts = new Integer[0]; + try (Table leftTable = new Table.TestBuilder().column(emptyInts).column(emptyInts).build(); + Table rightTable = new Table.TestBuilder() + .column( 6, 5, 9, 8, 10, 32) + .column(201, 202, 203, 204, 205, 206) + .build(); + Table joinedTable = leftTable.onColumns(0).leftAntiJoin(rightTable.onColumns(0), true)) { + assertTablesAreEqual(leftTable, joinedTable); + } + } + + @Test + void testLeftAntiJoinRightEmpty() { + final Integer[] emptyInts = new Integer[0]; + try (Table leftTable = new Table.TestBuilder() + .column( 2, 3, 9, 0, 1, 7, 4, 6, 5, 8) + .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) + .build(); + Table rightTable = new Table.TestBuilder().column(emptyInts).column(emptyInts).build(); + Table joinedTable = leftTable.onColumns(0).leftAntiJoin(rightTable.onColumns(0), true)) { + assertTablesAreEqual(leftTable, joinedTable); + } + } + @Test void testLeftAntiJoinOnNullKeys() { try (Table leftTable = new Table.TestBuilder() @@ -1255,6 +1422,215 @@ void testCrossJoin() { } } + private void verifyJoinGatherMaps(GatherMap[] maps, Table expected) { + assertEquals(2, maps.length); + int numRows = (int) expected.getRowCount(); + assertEquals(numRows, maps[0].getRowCount()); + assertEquals(numRows, maps[1].getRowCount()); + try (ColumnVector leftMap = maps[0].toColumnView(0, numRows).copyToColumnVector(); + ColumnVector rightMap = maps[1].toColumnView(0, numRows).copyToColumnVector(); + Table result = new Table(leftMap, rightMap); + Table orderedResult = result.orderBy(OrderByArg.asc(0, true))) { + assertTablesAreEqual(expected, orderedResult); + } + } + + private void verifySemiJoinGatherMap(GatherMap map, Table expected) { + int numRows = (int) expected.getRowCount(); + assertEquals(numRows, map.getRowCount()); + try (ColumnVector leftMap = map.toColumnView(0, numRows).copyToColumnVector(); + Table result = new Table(leftMap); + Table orderedResult = result.orderBy(OrderByArg.asc(0, true))) { + assertTablesAreEqual(expected, orderedResult); + } + } + + @Test + void testLeftJoinGatherMaps() { + final int inv = Integer.MIN_VALUE; + try (Table leftKeys = new Table.TestBuilder().column(2, 3, 9, 0, 1, 7, 4, 6, 5, 8).build(); + Table rightKeys = new Table.TestBuilder().column(6, 5, 9, 8, 10, 32).build(); + Table expected = new Table.TestBuilder() + .column( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9) + .column(inv, inv, 2, inv, inv, inv, inv, 0, 1, 3) + .build()) { + GatherMap[] maps = leftKeys.leftJoinGatherMaps(rightKeys, false); + try { + verifyJoinGatherMaps(maps, expected); + } finally { + for (GatherMap map : maps) { + map.close(); + } + } + } + } + + @Test + void testLeftJoinGatherMapsNulls() { + final int inv = Integer.MIN_VALUE; + try (Table leftKeys = new Table.TestBuilder() + .column(2, 3, 9, 0, 1, 7, 4, null, null, 8) + .build(); + Table rightKeys = new Table.TestBuilder() + .column(null, null, 9, 8, 10, 32) + .build(); + Table expected = new Table.TestBuilder() + .column( 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 8, 9) // left + .column(inv, inv, 2, inv, inv, inv, inv, 0, 1, 0, 1, 3) // right + .build()) { + GatherMap[] maps = leftKeys.leftJoinGatherMaps(rightKeys, true); + try { + verifyJoinGatherMaps(maps, expected); + } finally { + for (GatherMap map : maps) { + map.close(); + } + } + } + } + + @Test + void testInnerJoinGatherMaps() { + try (Table leftKeys = new Table.TestBuilder().column(2, 3, 9, 0, 1, 7, 4, 6, 5, 8).build(); + Table rightKeys = new Table.TestBuilder().column(6, 5, 9, 8, 10, 32).build(); + Table expected = new Table.TestBuilder() + .column(2, 7, 8, 9) // left + .column(2, 0, 1, 3) // right + .build()) { + GatherMap[] maps = leftKeys.innerJoinGatherMaps(rightKeys, false); + try { + verifyJoinGatherMaps(maps, expected); + } finally { + for (GatherMap map : maps) { + map.close(); + } + } + } + } + + @Test + void testInnerJoinGatherMapsNulls() { + try (Table leftKeys = new Table.TestBuilder() + .column(2, 3, 9, 0, 1, 7, 4, null, null, 8) + .build(); + Table rightKeys = new Table.TestBuilder() + .column(null, null, 9, 8, 10, 32) + .build(); + Table expected = new Table.TestBuilder() + .column(2, 7, 7, 8, 8, 9) // left + .column(2, 0, 1, 0, 1, 3) // right + .build()) { + GatherMap[] maps = leftKeys.innerJoinGatherMaps(rightKeys, true); + try { + verifyJoinGatherMaps(maps, expected); + } finally { + for (GatherMap map : maps) { + map.close(); + } + } + } + } + + @Test + void testFullJoinGatherMaps() { + final int inv = Integer.MIN_VALUE; + try (Table leftKeys = new Table.TestBuilder().column(2, 3, 9, null, 1, 7, 4, 6, 5, 8).build(); + Table rightKeys = new Table.TestBuilder().column(6, 5, 9, 8, 10, null).build(); + Table expected = new Table.TestBuilder() + .column(inv, inv, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9) // left + .column( 4, 5, inv, inv, 2, inv, inv, inv, inv, 0, 1, 3) // right + .build()) { + GatherMap[] maps = leftKeys.fullJoinGatherMaps(rightKeys, false); + try { + verifyJoinGatherMaps(maps, expected); + } finally { + for (GatherMap map : maps) { + map.close(); + } + } + } + } + + @Test + void testFullJoinGatherMapsNulls() { + final int inv = Integer.MIN_VALUE; + try (Table leftKeys = new Table.TestBuilder() + .column(2, 3, 9, 0, 1, 7, 4, null, null, 8) + .build(); + Table rightKeys = new Table.TestBuilder() + .column(null, null, 9, 8, 10, 32) + .build(); + Table expected = new Table.TestBuilder() + .column(inv, inv, 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 8, 9) // left + .column( 4, 5, inv, inv, 2, inv, inv, inv, inv, 0, 1, 0, 1, 3) // right + .build()) { + GatherMap[] maps = leftKeys.fullJoinGatherMaps(rightKeys, true); + try { + verifyJoinGatherMaps(maps, expected); + } finally { + for (GatherMap map : maps) { + map.close(); + } + } + } + } + + @Test + void testLeftSemiJoinGatherMap() { + try (Table leftKeys = new Table.TestBuilder().column(2, 3, 9, 0, 1, 7, 4, 6, 5, 8).build(); + Table rightKeys = new Table.TestBuilder().column(6, 5, 9, 8, 10, 32).build(); + Table expected = new Table.TestBuilder() + .column(2, 7, 8, 9) // left + .build(); + GatherMap map = leftKeys.leftSemiJoinGatherMap(rightKeys, false)) { + verifySemiJoinGatherMap(map, expected); + } + } + + @Test + void testLeftSemiJoinGatherMapNulls() { + try (Table leftKeys = new Table.TestBuilder() + .column(2, 3, 9, 0, 1, 7, 4, null, null, 8) + .build(); + Table rightKeys = new Table.TestBuilder() + .column(null, null, 9, 8, 10, 32) + .build(); + Table expected = new Table.TestBuilder() + .column(2, 7, 8, 9) // left + .build(); + GatherMap map = leftKeys.leftSemiJoinGatherMap(rightKeys, true)) { + verifySemiJoinGatherMap(map, expected); + } + } + + @Test + void testAntiSemiJoinGatherMap() { + try (Table leftKeys = new Table.TestBuilder().column(2, 3, 9, 0, 1, 7, 4, 6, 5, 8).build(); + Table rightKeys = new Table.TestBuilder().column(6, 5, 9, 8, 10, 32).build(); + Table expected = new Table.TestBuilder() + .column(0, 1, 3, 4, 5, 6) // left + .build(); + GatherMap map = leftKeys.leftAntiJoinGatherMap(rightKeys, false)) { + verifySemiJoinGatherMap(map, expected); + } + } + + @Test + void testAntiSemiJoinGatherMapNulls() { + try (Table leftKeys = new Table.TestBuilder() + .column(2, 3, 9, 0, 1, 7, 4, null, null, 8) + .build(); + Table rightKeys = new Table.TestBuilder() + .column(null, null, 9, 8, 10, 32) + .build(); + Table expected = new Table.TestBuilder() + .column(0, 1, 3, 4, 5, 6) // left + .build(); + GatherMap map = leftKeys.leftAntiJoinGatherMap(rightKeys, true)) { + verifySemiJoinGatherMap(map, expected); + } + } + @Test void testBoundsNulls() { boolean[] descFlags = new boolean[1]; @@ -3988,6 +4364,26 @@ void testSimpleGather() { } } + @Test + void testBoundsCheckedGather() { + try (Table testTable = new Table.TestBuilder() + .column(1, 2, 3, 4, 5) + .column("A", "AA", "AAA", "AAAA", "AAAAA") + .decimal32Column(-3, 1, 2, 3, 4, 5) + .decimal64Column(-8, 100001L, 200002L, 300003L, 400004L, 500005L) + .build(); + ColumnVector gatherMap = ColumnVector.fromInts(0, 100, 4, -2); + Table expected = new Table.TestBuilder() + .column(1, null, 5, 4) + .column("A", null, "AAAAA", "AAAA") + .decimal32Column(-3, 1, null, 5, 4) + .decimal64Column(-8, 100001L, null, 500005L, 400004L) + .build(); + Table found = testTable.gather(gatherMap)) { + assertTablesAreEqual(expected, found); + } + } + @Test void testMaskWithoutValidity() { try (ColumnVector mask = ColumnVector.fromBoxedBooleans(true, false, true, false, true); From 2d24a9b0060025ebbefaaa102c1fcb8e3ea6a978 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Mon, 29 Mar 2021 20:33:29 -0500 Subject: [PATCH 17/20] Disable column_view data accessors for unsupported types (#7725) Fixes https://github.com/rapidsai/cudf/issues/7712 `column_view` provides data accessors like `column_view::data` and `column_view::begin`. These accessors are only valid for fixed-width primitive types that can be constructed by simply casting the underlying `void*` to `T*`. However, the accessors never actually enforced this rule, e.g., `column_view::data` should fail to compile. This PR disables these accessors for invalid types. This uncovered a number of places that were erroneously instantiating `column_view` accessors, which would lead to silent failures (e.g., `scatter` was failing silently for `struct` columns). I added a few new things to aid me in this effort: - `CUDF_ENABLE_IF` macro to make it easier to SFINAE. - `is_rep_layout_compatbile()` to identify types that are layout compatible with their rep (e.g., `duration_ns` is layout compatible with its `int64_t` rep. The `decimal32` type is _not_ layout compatible with it's `int32_t` rep). - `column_device_view::has_element_accessor()` identifies if `column_device_view::element()` has a valid overload. Authors: - Jake Hemstad (@jrhemstad) Approvers: - Christopher Harris (@cwharris) - Conor Hoekstra (@codereport) - Vyas Ramasubramani (@vyasr) URL: https://github.com/rapidsai/cudf/pull/7725 --- .../type_dispatcher_benchmark.cu | 17 +- cpp/include/cudf/ast/detail/transform.cuh | 26 +- .../cudf/column/column_device_view.cuh | 362 +++++++++++------- cpp/include/cudf/column/column_view.hpp | 46 ++- cpp/include/cudf/detail/gather.cuh | 193 +++++----- cpp/include/cudf/detail/scatter.cuh | 56 +-- cpp/include/cudf/lists/detail/scatter.cuh | 36 +- cpp/include/cudf/table/row_operators.cuh | 40 +- cpp/include/cudf/utilities/traits.hpp | 30 ++ cpp/src/copying/copy.cu | 30 +- cpp/src/copying/copy_range.cu | 44 ++- cpp/src/filling/fill.cu | 12 +- cpp/src/interop/dlpack.cpp | 9 +- cpp/src/interop/from_arrow.cpp | 14 +- cpp/src/interop/to_arrow.cpp | 9 +- cpp/src/jit/type.cpp | 21 +- cpp/src/merge/merge.cu | 15 +- cpp/src/replace/nulls.cu | 5 +- cpp/tests/copying/copy_tests.cu | 5 +- 19 files changed, 617 insertions(+), 353 deletions(-) diff --git a/cpp/benchmarks/type_dispatcher/type_dispatcher_benchmark.cu b/cpp/benchmarks/type_dispatcher/type_dispatcher_benchmark.cu index df3a373c576..18ef5a1168e 100644 --- a/cpp/benchmarks/type_dispatcher/type_dispatcher_benchmark.cu +++ b/cpp/benchmarks/type_dispatcher/type_dispatcher_benchmark.cu @@ -27,6 +27,7 @@ #include +#include #include #include #include "../fixture/benchmark_fixture.hpp" @@ -87,7 +88,7 @@ __global__ void host_dispatching_kernel(mutable_column_device_view source_column template struct ColumnHandle { - template + template ())> void operator()(mutable_column_device_view source_column, int work_per_thread) { cudf::detail::grid_1d grid_config{source_column.size(), block_size}; @@ -95,6 +96,12 @@ struct ColumnHandle { // Launch the kernel. host_dispatching_kernel<<>>(source_column); } + + template ())> + void operator()(mutable_column_device_view source_column, int work_per_thread) + { + CUDF_FAIL("Invalid type to benchmark."); + } }; // The following is for DEVICE_DISPATCHING: @@ -104,12 +111,18 @@ struct ColumnHandle { // n_rows * n_cols. template struct RowHandle { - template + template ())> __device__ void operator()(mutable_column_device_view source, cudf::size_type index) { using F = Functor; source.data()[index] = F::f(source.data()[index]); } + + template ())> + __device__ void operator()(mutable_column_device_view source, cudf::size_type index) + { + cudf_assert(false && "Unsupported type."); + } }; // This is for DEVICE_DISPATCHING diff --git a/cpp/include/cudf/ast/detail/transform.cuh b/cpp/include/cudf/ast/detail/transform.cuh index 2719a8b5077..da15ac07c63 100644 --- a/cpp/include/cudf/ast/detail/transform.cuh +++ b/cpp/include/cudf/ast/detail/transform.cuh @@ -20,10 +20,12 @@ #include #include #include +#include #include #include #include #include +#include #include @@ -55,10 +57,19 @@ struct row_output { * @param row_index Row index of data column. * @param result Value to assign to output. */ - template + template ())> __device__ void resolve_output(detail::device_data_reference device_data_reference, cudf::size_type row_index, Element result) const; + // Definition below after row_evaluator is a complete type + + template ())> + __device__ void resolve_output(detail::device_data_reference device_data_reference, + cudf::size_type row_index, + Element result) const + { + cudf_assert(false && "Invalid type in resolve_output."); + } private: row_evaluator const& evaluator; @@ -167,7 +178,7 @@ struct row_evaluator { * @param row_index Row index of data column. * @return Element */ - template + template ())> __device__ Element resolve_input(detail::device_data_reference device_data_reference, cudf::size_type row_index) const { @@ -187,6 +198,15 @@ struct row_evaluator { } } + template ())> + __device__ Element resolve_input(detail::device_data_reference device_data_reference, + cudf::size_type row_index) const + { + cudf_assert(false && "Unsupported type in resolve_input."); + return {}; + } + /** * @brief Callable to perform a unary operation. * @@ -249,7 +269,7 @@ struct row_evaluator { mutable_column_device_view* output_column; }; -template +template ()>*> __device__ void row_output::resolve_output(detail::device_data_reference device_data_reference, cudf::size_type row_index, Element result) const diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 14d44b77fad..a842e51c94a 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -71,10 +71,14 @@ class alignas(16) column_device_view_base { * a column, and instead, accessing the elements should be done via *`data()`. * + * This function will only participate in overload resolution if `is_rep_layout_compatible()` + * or `std::is_same::value` are true. + * * @tparam The type to cast to * @return T const* Typed pointer to underlying data */ - template + template ::value or is_rep_layout_compatible())> __host__ __device__ T const* head() const noexcept { return static_cast(_data); @@ -89,10 +93,13 @@ class alignas(16) column_device_view_base { * For columns with children, the pointer returned is undefined * and should not be used. * + * This function does not participate in overload resolution if `is_rep_layout_compatible` is + * false. + * * @tparam T The type to cast to * @return T const* Typed pointer to underlying data, including the offset */ - template + template ())> __host__ __device__ T const* data() const noexcept { return head() + _offset; @@ -235,6 +242,18 @@ class alignas(16) column_device_view_base { : _type{type}, _size{size}, _data{data}, _null_mask{null_mask}, _offset{offset} { } + + template + struct has_element_accessor_impl : std::false_type { + }; + + template + struct has_element_accessor_impl< + C, + T, + void_t().template element(std::declval()))>> + : std::true_type { + }; }; // Forward declaration @@ -283,15 +302,145 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * * This function accounts for the offset. * + * This function does not participate in overload resolution if `is_rep_layout_compatible` is + * false. Specializations of this function may exist for types `T` where + *`is_rep_layout_compatible` is false. + * * @tparam T The element type * @param element_index Position of the desired element */ - template - __device__ T const element(size_type element_index) const noexcept + template ())> + __device__ T element(size_type element_index) const noexcept { return data()[element_index]; } + /** + * @brief Returns `string_view` to the string element at the specified index. + * + * If the element at the specified index is NULL, i.e., `is_null(element_index) + * == true`, then any attempt to use the result will lead to undefined behavior. + * + * This function accounts for the offset. + * + * @param element_index Position of the desired string element + * @return string_view instance representing this element at this index + */ + template ::value)> + __device__ T element(size_type element_index) const noexcept + { + size_type index = element_index + offset(); // account for this view's _offset + const int32_t* d_offsets = + d_children[strings_column_view::offsets_column_index].data(); + const char* d_strings = d_children[strings_column_view::chars_column_index].data(); + size_type offset = d_offsets[index]; + return string_view{d_strings + offset, d_offsets[index + 1] - offset}; + } + + private: + /** + * @brief Dispatch functor for resolving the index value for a dictionary element. + * + * The basic dictionary elements are the indices which can be any index type. + */ + struct index_element_fn { + template () and std::is_unsigned::value)> + __device__ size_type operator()(column_device_view const& indices, size_type index) + { + return static_cast(indices.element(index)); + } + + template () and + std::is_unsigned::value))> + __device__ size_type operator()(Args&&... args) + { + cudf_assert(false and "dictionary indices must be an unsigned integral type"); + return 0; + } + }; + + public: + /** + * @brief Returns `dictionary32` element at the specified index for a + * dictionary column. + * + * `dictionary32` is a strongly typed wrapper around an `int32_t` value that holds the + * offset into the dictionary keys for the specified element. + * + * For example, given a dictionary column `d` with: + * ```c++ + * keys: {"foo", "bar", "baz"} + * indices: {2, 0, 2, 1, 0} + * + * d.element(0) == dictionary32{2}; + * d.element(1) == dictionary32{0}; + * ``` + * + * If the element at the specified index is NULL, i.e., `is_null(element_index) == true`, + * then any attempt to use the result will lead to undefined behavior. + * + * This function accounts for the offset. + * + * @param element_index Position of the desired element + * @return dictionary32 instance representing this element at this index + */ + template ::value)> + __device__ T element(size_type element_index) const noexcept + { + size_type index = element_index + offset(); // account for this view's _offset + auto const indices = d_children[0]; + return dictionary32{type_dispatcher(indices.type(), index_element_fn{}, indices, index)}; + } + + /** + * @brief Returns a `numeric::decimal32` element at the specified index for a `fixed_point` + * column. + * + * If the element at the specified index is NULL, i.e., `is_null(element_index) == true`, + * then any attempt to use the result will lead to undefined behavior. + * + * @param element_index Position of the desired element + * @return numeric::decimal32 representing the element at this index + */ + template ::value)> + __device__ T element(size_type element_index) const noexcept + { + using namespace numeric; + auto const scale = scale_type{_type.scale()}; + return decimal32{scaled_integer{data()[element_index], scale}}; + } + + /** + * @brief Returns a `numeric::decimal64` element at the specified index for a `fixed_point` + * column. + * + * If the element at the specified index is NULL, i.e., `is_null(element_index) == true`, + * then any attempt to use the result will lead to undefined behavior. + * + * @param element_index Position of the desired element + * @return numeric::decimal64 representing the element at this index + */ + template ::value)> + __device__ T element(size_type element_index) const noexcept + { + using namespace numeric; + auto const scale = scale_type{_type.scale()}; + return decimal64{scaled_integer{data()[element_index], scale}}; + } + + /** + * @brief For a given `T`, indicates if `column_device_view::element()` has a valid overload. + * + */ + template + static constexpr bool has_element_accessor() + { + return has_element_accessor_impl::value; + } + /** * @brief Iterator for navigating this column */ @@ -306,9 +455,12 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * with columns where `has_nulls() == true` will result in undefined behavior * when accessing null elements. * + * This function does not participate in overload resolution if + * `column_device_view::has_element_accessor()` is false. + * * For columns with null elements, use `make_null_replacement_iterator`. */ - template + template ())> const_iterator begin() const { return const_iterator{count_it{0}, detail::value_accessor{*this}}; @@ -321,9 +473,12 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * with columns where `has_nulls() == true` will result in undefined behavior * when accessing null elements. * + * This function does not participate in overload resolution if + * `column_device_view::has_element_accessor()` is false. + * * For columns with null elements, use `make_null_replacement_iterator`. */ - template + template ())> const_iterator end() const { return const_iterator{count_it{size()}, detail::value_accessor{*this}}; @@ -357,11 +512,16 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * Else, if the element at `i` is null, then the value of `p.first` is * undefined and `p.second == false`. * + * This function does not participate in overload resolution if + * `column_device_view::has_element_accessor()` is false. + * * @throws cudf::logic_error if tparam `has_nulls == true` and * `nullable() == false` * @throws cudf::logic_error if column datatype and Element type mismatch. */ - template + template ())> const_pair_iterator pair_begin() const { return const_pair_iterator{count_it{0}, @@ -382,11 +542,16 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * Else, if the element at `i` is null, then the value of `p.first` is * undefined and `p.second == false`. * + * This function does not participate in overload resolution if + * `column_device_view::has_element_accessor()` is false. + * * @throws cudf::logic_error if tparam `has_nulls == true` and * `nullable() == false` * @throws cudf::logic_error if column datatype and Element type mismatch. */ - template + template ())> const_pair_rep_iterator pair_rep_begin() const { return const_pair_rep_iterator{count_it{0}, @@ -397,11 +562,16 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * @brief Return a pair iterator to the element following the last element of * the column. * + * This function does not participate in overload resolution if + * `column_device_view::has_element_accessor()` is false. + * * @throws cudf::logic_error if tparam `has_nulls == true` and * `nullable() == false` * @throws cudf::logic_error if column datatype and Element type mismatch. */ - template + template ())> const_pair_iterator pair_end() const { return const_pair_iterator{count_it{size()}, @@ -412,11 +582,16 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * @brief Return a pair iterator to the element following the last element of * the column. * + * This function does not participate in overload resolution if + * `column_device_view::has_element_accessor()` is false. + * * @throws cudf::logic_error if tparam `has_nulls == true` and * `nullable() == false` * @throws cudf::logic_error if column datatype and Element type mismatch. */ - template + template ())> const_pair_rep_iterator pair_rep_end() const { return const_pair_rep_iterator{count_it{size()}, @@ -549,6 +724,9 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view * @brief Returns pointer to the base device memory allocation casted to * the specified type. * + * This function will only participate in overload resolution if `is_rep_layout_compatible()` + * or `std::is_same::value` are true. + * * @note If `offset() == 0`, then `head() == data()` * * @note It should be rare to need to access the `head()` allocation of @@ -558,7 +736,8 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view * @tparam The type to cast to * @return T* Typed pointer to underlying data */ - template + template ::value or is_rep_layout_compatible())> __host__ __device__ T* head() const noexcept { return const_cast(detail::column_device_view_base::head()); @@ -568,14 +747,15 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view * @brief Returns the underlying data casted to the specified type, plus the * offset. * - * @note If `offset() == 0`, then `head() == data()` + * This function does not participate in overload resolution if `is_rep_layout_compatible` is + * false. * - * This pointer is undefined for columns with children. + * @note If `offset() == 0`, then `head() == data()` * * @tparam T The type to cast to * @return T* Typed pointer to underlying data, including the offset */ - template + template ())> __host__ __device__ T* data() const noexcept { return const_cast(detail::column_device_view_base::data()); @@ -586,15 +766,31 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view * * This function accounts for the offset. * + * This function does not participate in overload resolution if `is_rep_layout_compatible` is + * false. Specializations of this function may exist for types `T` where + *`is_rep_layout_compatible` is false. + * + * * @tparam T The element type * @param element_index Position of the desired element */ - template - __device__ T& element(size_type element_index) noexcept + template ())> + __device__ T& element(size_type element_index) const noexcept { return data()[element_index]; } + /** + * @brief For a given `T`, indicates if `mutable_column_device_view::element()` has a valid + * overload. + * + */ + template + static constexpr bool has_element_accessor() + { + return has_element_accessor_impl::value; + } + /** * @brief Returns raw pointer to the underlying bitmask allocation. * @@ -618,11 +814,14 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view * @brief Return first element (accounting for offset) after underlying data * is casted to the specified type. * + * This function does not participate in overload resolution if + * `mutable_column_device_view::has_element_accessor()` is false. + * * @tparam T The desired type * @return T* Pointer to the first element after casting */ - template - std::enable_if_t(), iterator> begin() + template ())> + iterator begin() { return iterator{count_it{0}, detail::mutable_value_accessor{*this}}; } @@ -631,11 +830,14 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view * @brief Return one past the last element after underlying data is casted to * the specified type. * + * This function does not participate in overload resolution if + * `mutable_column_device_view::has_element_accessor()` is false. + * * @tparam T The desired type * @return T const* Pointer to one past the last element after casting */ - template - std::enable_if_t(), iterator> end() + template ())> + iterator end() { return iterator{count_it{size()}, detail::mutable_value_accessor{*this}}; } @@ -740,121 +942,6 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view mutable_column_device_view(mutable_column_view source); }; -/** - * @brief Returns `string_view` to the string element at the specified index. - * - * If the element at the specified index is NULL, i.e., `is_null(element_index) - * == true`, then any attempt to use the result will lead to undefined behavior. - * - * This function accounts for the offset. - * - * @param element_index Position of the desired string element - * @return string_view instance representing this element at this index - */ -template <> -__device__ inline string_view const column_device_view::element( - size_type element_index) const noexcept -{ - size_type index = element_index + offset(); // account for this view's _offset - const int32_t* d_offsets = d_children[strings_column_view::offsets_column_index].data(); - const char* d_strings = d_children[strings_column_view::chars_column_index].data(); - size_type offset = d_offsets[index]; - return string_view{d_strings + offset, d_offsets[index + 1] - offset}; -} - -/** - * @brief Dispatch functor for resolving the index value for a dictionary element. - * - * The basic dictionary elements are the indices which can be any index type. - */ -struct index_element_fn { - template < - typename IndexType, - std::enable_if_t() and std::is_unsigned::value>* = nullptr> - __device__ size_type operator()(column_device_view const& input, size_type index) - { - return static_cast(input.element(index)); - } - template () and - std::is_unsigned::value)>* = nullptr> - __device__ size_type operator()(Args&&... args) - { - cudf_assert(false and "dictionary indices must be an unsigned integral type"); - return 0; - } -}; - -/** - * @brief Returns `dictionary32` element at the specified index for a - * dictionary column. - * - * `dictionary32` is a strongly typed wrapper around an `int32_t` value that holds the - * offset into the dictionary keys for the specified element. - * - * For example, given a dictionary column `d` with: - * ```c++ - * keys: {"foo", "bar", "baz"} - * indices: {2, 0, 2, 1, 0} - * - * d.element(0) == dictionary32{2}; - * d.element(1) == dictionary32{0}; - * ``` - * - * If the element at the specified index is NULL, i.e., `is_null(element_index) == true`, - * then any attempt to use the result will lead to undefined behavior. - * - * This function accounts for the offset. - * - * @param element_index Position of the desired element - * @return dictionary32 instance representing this element at this index - */ -template <> -__device__ inline dictionary32 const column_device_view::element( - size_type element_index) const noexcept -{ - size_type index = element_index + offset(); // account for this view's _offset - auto const indices = d_children[0]; - return dictionary32{type_dispatcher(indices.type(), index_element_fn{}, indices, index)}; -} - -/** - * @brief Returns a `numeric::decimal32` element at the specified index for a `fixed_point` column. - * - * If the element at the specified index is NULL, i.e., `is_null(element_index) == true`, - * then any attempt to use the result will lead to undefined behavior. - * - * @param element_index Position of the desired element - * @return numeric::decimal32 representing the element at this index - */ -template <> -__device__ inline numeric::decimal32 const column_device_view::element( - size_type element_index) const noexcept -{ - using namespace numeric; - auto const scale = scale_type{_type.scale()}; - return decimal32{scaled_integer{data()[element_index], scale}}; -} - -/** - * @brief Returns a `numeric::decimal64` element at the specified index for a `fixed_point` column. - * - * If the element at the specified index is NULL, i.e., `is_null(element_index) == true`, - * then any attempt to use the result will lead to undefined behavior. - * - * @param element_index Position of the desired element - * @return numeric::decimal64 representing the element at this index - */ -template <> -__device__ inline numeric::decimal64 const column_device_view::element( - size_type element_index) const noexcept -{ - using namespace numeric; - auto const scale = scale_type{_type.scale()}; - return decimal64{scaled_integer{data()[element_index], scale}}; -} - namespace detail { #ifdef __CUDACC__ // because set_bit in bit.hpp is wrapped with __CUDACC__ @@ -896,7 +983,6 @@ __device__ inline bitmask_type get_mask_offset_word(bitmask_type const* __restri * * @tparam T The type of elements in the column */ - template struct value_accessor { column_device_view const col; ///< column view of column in device @@ -1023,8 +1109,8 @@ struct mutable_value_accessor { }; /** - * @brief Helper function for use by column_device_view and mutable_column_device_view constructors - * to build device_views from views. + * @brief Helper function for use by column_device_view and mutable_column_device_view + * constructors to build device_views from views. * * It is used to build the array of child columns in device memory. Since child columns can * also have child columns, this uses recursion to build up the flat device buffer to contain diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp index 168db61f672..82326a21d7d 100644 --- a/cpp/include/cudf/column/column_view.hpp +++ b/cpp/include/cudf/column/column_view.hpp @@ -16,6 +16,8 @@ #pragma once #include +#include + #include /** @@ -55,10 +57,14 @@ class column_view_base { *a column, and instead, accessing the elements should be done via *`data()`. * + * This function will only participate in overload resolution if `is_rep_layout_compatible()` + * or `std::is_same::value` are true. + * * @tparam The type to cast to * @return T const* Typed pointer to underlying data */ - template + template ::value or is_rep_layout_compatible())> T const* head() const noexcept { return static_cast(_data); @@ -70,12 +76,13 @@ class column_view_base { * * @note If `offset() == 0`, then `head() == data()` * - * @TODO Clarify behavior for variable-width types. + * This function does not participate in overload resolution if `is_rep_layout_compatible` is + * false. * * @tparam T The type to cast to * @return T const* Typed pointer to underlying data, including the offset */ - template + template ())> T const* data() const noexcept { return head() + _offset; @@ -85,10 +92,13 @@ class column_view_base { * @brief Return first element (accounting for offset) after underlying data * is casted to the specified type. * + * This function does not participate in overload resolution if `is_rep_layout_compatible` is + * false. + * * @tparam T The desired type * @return T const* Pointer to the first element after casting */ - template + template ())> T const* begin() const noexcept { return data(); @@ -98,10 +108,13 @@ class column_view_base { * @brief Return one past the last element after underlying data is casted to * the specified type. * + * This function does not participate in overload resolution if `is_rep_layout_compatible` is + * false. + * * @tparam T The desired type * @return T const* Pointer to one past the last element after casting */ - template + template ())> T const* end() const noexcept { return begin() + size(); @@ -438,6 +451,9 @@ class mutable_column_view : public detail::column_view_base { * @brief Returns pointer to the base device memory allocation casted to * the specified type. * + * This function will only participate in overload resolution if `is_rep_layout_compatible()` + * or `std::is_same::value` are true. + * * @note If `offset() == 0`, then `head() == data()` * * @note It should be rare to need to access the `head()` allocation of a @@ -446,7 +462,8 @@ class mutable_column_view : public detail::column_view_base { * @tparam The type to cast to * @return T* Typed pointer to underlying data */ - template + template ::value or is_rep_layout_compatible())> T* head() const noexcept { return const_cast(detail::column_view_base::head()); @@ -456,14 +473,15 @@ class mutable_column_view : public detail::column_view_base { * @brief Returns the underlying data casted to the specified type, plus the * offset. * - * @note If `offset() == 0`, then `head() == data()` + * This function does not participate in overload resolution if `is_rep_layout_compatible` is + * false. * - * @TODO Clarify behavior for variable-width types. + * @note If `offset() == 0`, then `head() == data()` * * @tparam T The type to cast to * @return T* Typed pointer to underlying data, including the offset */ - template + template ())> T* data() const noexcept { return const_cast(detail::column_view_base::data()); @@ -473,10 +491,13 @@ class mutable_column_view : public detail::column_view_base { * @brief Return first element (accounting for offset) when underlying data is * casted to the specified type. * + * This function does not participate in overload resolution if `is_rep_layout_compatible` is + * false. + * * @tparam T The desired type * @return T* Pointer to the first element after casting */ - template + template ())> T* begin() const noexcept { return const_cast(detail::column_view_base::begin()); @@ -486,10 +507,13 @@ class mutable_column_view : public detail::column_view_base { * @brief Return one past the last element after underlying data is casted to * the specified type. * + * This function does not participate in overload resolution if `is_rep_layout_compatible` is + * false. + * * @tparam T The desired type * @return T* Pointer to one past the last element after casting */ - template + template ())> T* end() const noexcept { return const_cast(detail::column_view_base::end()); diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index 73647ac2292..bf488621d52 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -139,6 +139,46 @@ void gather_helper(InputItr source_itr, } } +// Error case when no other overload or specialization is available +template +struct column_gatherer_impl { + std::unique_ptr operator()(...) { CUDF_FAIL("Unsupported type in gather."); } +}; + +/** + * @brief Function object for gathering a type-erased + * column. To be used with the cudf::type_dispatcher. + */ +struct column_gatherer { + /** + * @brief Type-dispatched function to gather from one column to another based + * on a `gather_map`. + * + * @tparam Element Dispatched type for the column being gathered + * @tparam MapIterator Iterator type for the gather map + * @param source_column View into the column to gather from + * @param gather_map_begin Beginning of iterator range of integral values representing the gather + * map + * @param gather_map_end End of iterator range of integral values representing the gather map + * @param nullify_out_of_bounds Nullify values in `gather_map` that are out of bounds + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory + */ + template + std::unique_ptr operator()(column_view const& source_column, + MapIterator gather_map_begin, + MapIterator gather_map_end, + bool nullify_out_of_bounds, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + column_gatherer_impl gatherer{}; + + return gatherer( + source_column, gather_map_begin, gather_map_end, nullify_out_of_bounds, stream, mr); + } +}; + /** * @brief Function object for gathering a type-erased column. * @@ -148,8 +188,8 @@ void gather_helper(InputItr source_itr, * @tparam Element Dispatched type for the column being gathered * @tparam MapIterator Iterator type for the gather map */ -template -struct column_gatherer_impl { +template +struct column_gatherer_impl()>> { /** * @brief Type-dispatched function to gather from one column to another based * on a `gather_map`. @@ -164,6 +204,7 @@ struct column_gatherer_impl { * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory */ + template std::unique_ptr operator()(column_view const& source_column, MapIterator gather_map_begin, MapIterator gather_map_end, @@ -195,8 +236,8 @@ struct column_gatherer_impl { * * @tparam MapIterator Iterator type for the gather map */ -template -struct column_gatherer_impl { +template <> +struct column_gatherer_impl { /** * @brief Type-dispatched function to gather from one column to another based * on a `gather_map`. This handles string_view type column_views only. @@ -209,6 +250,7 @@ struct column_gatherer_impl { * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory */ + template std::unique_ptr operator()(column_view const& source_column, MapItType gather_map_begin, MapItType gather_map_end, @@ -234,8 +276,8 @@ struct column_gatherer_impl { * This functor is invoked only on the root column of a hierarchy of list * columns. Recursion is handled internally. */ -template -struct column_gatherer_impl { +template <> +struct column_gatherer_impl { /** * @brief Gather a list column from a hierarchy of list columns. * @@ -282,6 +324,7 @@ struct column_gatherer_impl { * @returns column with elements gathered based on the gather map * */ + template std::unique_ptr operator()(column_view const& column, MapItRoot gather_map_begin, MapItRoot gather_map_end, @@ -326,45 +369,11 @@ struct column_gatherer_impl { } }; -/** - * @brief Function object for gathering a type-erased - * column. To be used with the cudf::type_dispatcher. - */ -struct column_gatherer { - /** - * @brief Type-dispatched function to gather from one column to another based - * on a `gather_map`. - * - * @tparam Element Dispatched type for the column being gathered - * @tparam MapIterator Iterator type for the gather map - * @param source_column View into the column to gather from - * @param gather_map_begin Beginning of iterator range of integral values representing the gather - * map - * @param gather_map_end End of iterator range of integral values representing the gather map - * @param nullify_out_of_bounds Nullify values in `gather_map` that are out of bounds - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned column's device memory - */ - template - std::unique_ptr operator()(column_view const& source_column, - MapIterator gather_map_begin, - MapIterator gather_map_end, - bool nullify_out_of_bounds, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) - { - column_gatherer_impl gatherer{}; - - return gatherer( - source_column, gather_map_begin, gather_map_end, nullify_out_of_bounds, stream, mr); - } -}; - /** * @brief Column gather specialization for dictionary column type. */ -template -struct column_gatherer_impl { +template <> +struct column_gatherer_impl { /** * @brief Type-dispatched function to gather from one column to another based * on a `gather_map`. @@ -378,6 +387,7 @@ struct column_gatherer_impl { * @param mr Device memory resource used to allocate the returned column's device memory * @return New dictionary column with gathered rows. */ + template std::unique_ptr operator()(column_view const& source_column, MapItType gather_map_begin, MapItType gather_map_end, @@ -426,6 +436,56 @@ struct column_gatherer_impl { } }; +template <> +struct column_gatherer_impl { + template + std::unique_ptr operator()(column_view const& column, + MapItRoot gather_map_begin, + MapItRoot gather_map_end, + bool nullify_out_of_bounds, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + structs_column_view structs_column(column); + auto gather_map_size{std::distance(gather_map_begin, gather_map_end)}; + if (gather_map_size == 0) { return empty_like(column); } + + std::vector> output_struct_members; + std::transform(structs_column.child_begin(), + structs_column.child_end(), + std::back_inserter(output_struct_members), + [&gather_map_begin, &gather_map_end, nullify_out_of_bounds, stream, mr]( + cudf::column_view const& col) { + return cudf::type_dispatcher(col.type(), + column_gatherer{}, + col, + gather_map_begin, + gather_map_end, + nullify_out_of_bounds, + stream, + mr); + }); + + gather_bitmask( + // Table view of struct column. + cudf::table_view{ + std::vector{structs_column.child_begin(), structs_column.child_end()}}, + gather_map_begin, + output_struct_members, + nullify_out_of_bounds ? gather_bitmask_op::NULLIFY : gather_bitmask_op::DONT_CHECK, + stream, + mr); + + return cudf::make_structs_column( + gather_map_size, + std::move(output_struct_members), + 0, + rmm::device_buffer{0, stream, mr}, // Null mask will be fixed up in cudf::gather(). + stream, + mr); + } +}; + /** * @brief Function object for applying a transformation on the gathermap * that converts negative indices to positive indices @@ -538,55 +598,6 @@ void gather_bitmask(table_view const& source, } } -template -struct column_gatherer_impl { - std::unique_ptr operator()(column_view const& column, - MapItRoot gather_map_begin, - MapItRoot gather_map_end, - bool nullify_out_of_bounds, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) - { - structs_column_view structs_column(column); - auto gather_map_size{std::distance(gather_map_begin, gather_map_end)}; - if (gather_map_size == 0) { return empty_like(column); } - - std::vector> output_struct_members; - std::transform(structs_column.child_begin(), - structs_column.child_end(), - std::back_inserter(output_struct_members), - [&gather_map_begin, &gather_map_end, nullify_out_of_bounds, stream, mr]( - cudf::column_view const& col) { - return cudf::type_dispatcher(col.type(), - column_gatherer{}, - col, - gather_map_begin, - gather_map_end, - nullify_out_of_bounds, - stream, - mr); - }); - - gather_bitmask( - // Table view of struct column. - cudf::table_view{ - std::vector{structs_column.child_begin(), structs_column.child_end()}}, - gather_map_begin, - output_struct_members, - nullify_out_of_bounds ? gather_bitmask_op::NULLIFY : gather_bitmask_op::DONT_CHECK, - stream, - mr); - - return cudf::make_structs_column( - gather_map_size, - std::move(output_struct_members), - 0, - rmm::device_buffer{0, stream, mr}, // Null mask will be fixed up in cudf::gather(). - stream, - mr); - } -}; - /** * @brief Gathers the specified rows of a set of columns according to a gather map. * diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index 2cb1cbffc68..30764b9b89f 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -79,8 +79,14 @@ auto scatter_to_gather(MapIterator scatter_map_begin, return gather_map; } -template +template struct column_scatterer_impl { + std::unique_ptr operator()(...) const { CUDF_FAIL("Unsupported type for scatter."); } +}; + +template +struct column_scatterer_impl()>> { + template std::unique_ptr operator()(column_view const& source, MapIterator scatter_map_begin, MapIterator scatter_map_end, @@ -103,8 +109,9 @@ struct column_scatterer_impl { } }; -template -struct column_scatterer_impl { +template <> +struct column_scatterer_impl { + template std::unique_ptr operator()(column_view const& source, MapIterator scatter_map_begin, MapIterator scatter_map_end, @@ -119,8 +126,9 @@ struct column_scatterer_impl { } }; -template -struct column_scatterer_impl { +template <> +struct column_scatterer_impl { + template std::unique_ptr operator()(column_view const& source, MapIterator scatter_map_begin, MapIterator scatter_map_end, @@ -133,23 +141,9 @@ struct column_scatterer_impl { } }; -template -struct column_scatterer { - template - std::unique_ptr operator()(column_view const& source, - MapIterator scatter_map_begin, - MapIterator scatter_map_end, - column_view const& target, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const - { - column_scatterer_impl scatterer{}; - return scatterer(source, scatter_map_begin, scatter_map_end, target, stream, mr); - } -}; - -template -struct column_scatterer_impl { +template <> +struct column_scatterer_impl { + template std::unique_ptr operator()(column_view const& source_in, MapIterator scatter_map_begin, MapIterator scatter_map_end, @@ -206,6 +200,20 @@ struct column_scatterer_impl { } }; +struct column_scatterer { + template + std::unique_ptr operator()(column_view const& source, + MapIterator scatter_map_begin, + MapIterator scatter_map_end, + column_view const& target, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const + { + column_scatterer_impl scatterer{}; + return scatterer(source, scatter_map_begin, scatter_map_end, target, stream, mr); + } +}; + /** * @brief Scatters the rows of the source table into a copy of the target table * according to a scatter map. @@ -276,15 +284,13 @@ std::unique_ptr
scatter( auto result = std::vector>(target.num_columns()); - auto scatter_functor = column_scatterer{}; - std::transform(source.begin(), source.end(), target.begin(), result.begin(), [=](auto const& source_col, auto const& target_col) { return type_dispatcher(source_col.type(), - scatter_functor, + column_scatterer{}, source_col, updated_scatter_map_begin, updated_scatter_map_end, diff --git a/cpp/include/cudf/lists/detail/scatter.cuh b/cpp/include/cudf/lists/detail/scatter.cuh index 5dd3db1117c..8e2ecdf49a7 100644 --- a/cpp/include/cudf/lists/detail/scatter.cuh +++ b/cpp/include/cudf/lists/detail/scatter.cuh @@ -577,15 +577,15 @@ struct list_child_constructor { auto child_offsets = cudf::strings::detail::make_offsets_child_column( begin, begin + child_list_views.size(), stream, mr); - auto child_column = - cudf::type_dispatcher(source_lists_column_view.child().child(1).type(), - list_child_constructor{}, - child_list_views, - child_offsets->view(), - cudf::lists_column_view(source_lists_column_view.child()), - cudf::lists_column_view(target_lists_column_view.child()), - stream, - mr); + auto child_column = cudf::type_dispatcher( + source_lists_column_view.child().child(1).type(), + list_child_constructor{}, + child_list_views, + child_offsets->view(), + cudf::lists_column_view(source_lists_column_view.child()), + cudf::lists_column_view(target_lists_column_view.child()), + stream, + mr); auto child_null_mask = source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable() @@ -672,7 +672,7 @@ struct list_child_constructor { iter_target_member_as_list, std::back_inserter(child_columns), [&](auto source_struct_member_as_list, auto target_struct_member_as_list) { - return cudf::type_dispatcher( + return cudf::type_dispatcher( source_struct_member_as_list->child(cudf::lists_column_view::child_column_index).type(), list_child_constructor{}, list_vector, @@ -780,14 +780,14 @@ std::unique_ptr scatter( auto offsets_column = cudf::strings::detail::make_offsets_child_column( list_size_begin, list_size_begin + target.size(), stream, mr); - auto child_column = cudf::type_dispatcher(child_column_type, - list_child_constructor{}, - target_vector, - offsets_column->view(), - source_lists_column_view, - target_lists_column_view, - stream, - mr); + auto child_column = cudf::type_dispatcher(child_column_type, + list_child_constructor{}, + target_vector, + offsets_column->view(), + source_lists_column_view, + target_lists_column_view, + stream, + mr); auto null_mask = target.has_nulls() ? copy_bitmask(target, stream, mr) : rmm::device_buffer{0, stream, mr}; diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh index 5af3c29a3d9..decd2879f54 100644 --- a/cpp/include/cudf/table/row_operators.cuh +++ b/cpp/include/cudf/table/row_operators.cuh @@ -28,6 +28,8 @@ #include #include +#include + namespace cudf { /** @@ -407,39 +409,47 @@ class row_lexicographic_comparator { template