From df3c0f054e3a882eda60d35b9cceb4fbd1c445e6 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 24 Mar 2021 15:49:46 -0500 Subject: [PATCH 01/24] Fix return type of `DataFrame.argsort` (#7706) Fixes: #7577 This PR fixes the return type of `DataFrame.argsort` to return `cudf.Series` instead of a `NumericalColumn`. Authors: - GALI PREM SAGAR (@galipremsagar) Approvers: - Keith Kraus (@kkraus14) URL: https://github.com/rapidsai/cudf/pull/7706 --- python/cudf/cudf/core/dataframe.py | 24 +++++++++++++++++++++++- python/cudf/cudf/tests/test_dataframe.py | 21 +++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index bd009a9ad84..b5f57356698 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -3841,10 +3841,32 @@ def argsort(self, ascending=True, na_position="last"): - Support axis='index' only. - Not supporting: inplace, kind - Ascending can be a list of bools to control per column + + Examples + -------- + >>> import cudf + >>> df = cudf.DataFrame({'a':[10, 0, 2], 'b':[-10, 10, 1]}) + >>> df + a b + 0 10 -10 + 1 0 10 + 2 2 1 + >>> inds = df.argsort() + >>> inds + 0 1 + 1 2 + 2 0 + dtype: int32 + >>> df.take(inds) + a b + 1 0 10 + 2 2 1 + 0 10 -10 """ - return self._get_sorted_inds( + inds_col = self._get_sorted_inds( ascending=ascending, na_position=na_position ) + return cudf.Series(inds_col) @annotate("SORT_INDEX", color="red", domain="cudf_python") def sort_index( diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 76a02d5e74a..d72b88f1713 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -8495,3 +8495,24 @@ def test_explode(data, labels, ignore_index, p_index, label_to_explode): got = gdf.explode(label_to_explode, ignore_index) assert_eq(expect, got, check_dtype=False) + + +@pytest.mark.parametrize( + "df,ascending,expected", + [ + ( + cudf.DataFrame({"a": [10, 0, 2], "b": [-10, 10, 1]}), + True, + cudf.Series([1, 2, 0], dtype="int32"), + ), + ( + cudf.DataFrame({"a": [10, 0, 2], "b": [-10, 10, 1]}), + False, + cudf.Series([0, 2, 1], dtype="int32"), + ), + ], +) +def test_dataframe_argsort(df, ascending, expected): + actual = df.argsort(ascending=ascending) + + assert_eq(actual, expected) From 14172979ffa3ad7023f6eae7a311fa132b7ad8d1 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 24 Mar 2021 17:13:58 -0500 Subject: [PATCH 02/24] Materialize `RangeIndex` when `index=True` in parquet writer (#7711) Resolves: #6873 This PR enables support to materialize a `RangeIndex` when `index=True`. Didn't add any tests as we already test for this in `test_parquet_index` but we were having the tests wrong due to a typo which was writing to the same file both pandas & cudf dataframes. This test is now fixed in this PR. Authors: - GALI PREM SAGAR (@galipremsagar) Approvers: - Keith Kraus (@kkraus14) URL: https://github.com/rapidsai/cudf/pull/7711 --- python/cudf/cudf/_lib/parquet.pyx | 4 +++- python/cudf/cudf/_lib/utils.pyx | 32 +++++++++++++++++++------- python/cudf/cudf/tests/test_parquet.py | 25 ++++++++++---------- python/cudf/cudf/utils/ioutils.py | 7 ++++-- 4 files changed, 45 insertions(+), 23 deletions(-) diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx index 0158df46cc4..d8b4fbbbe4b 100644 --- a/python/cudf/cudf/_lib/parquet.pyx +++ b/python/cudf/cudf/_lib/parquet.pyx @@ -294,7 +294,9 @@ cpdef write_parquet( cdef unique_ptr[cudf_io_types.data_sink] _data_sink cdef cudf_io_types.sink_info sink = make_sink_info(path, _data_sink) - if index is not False and not isinstance(table._index, cudf.RangeIndex): + if index is True or ( + index is None and not isinstance(table._index, cudf.RangeIndex) + ): tv = table.view() tbl_meta = make_unique[table_input_metadata](tv) for level, idx_name in enumerate(table._index.names): diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx index 4fe795e57a9..13eedb34c18 100644 --- a/python/cudf/cudf/_lib/utils.pyx +++ b/python/cudf/cudf/_lib/utils.pyx @@ -99,15 +99,31 @@ cpdef generate_pandas_metadata(Table table, index): idx = table.index if isinstance(idx, cudf.core.index.RangeIndex): - descr = { - "kind": "range", - "name": table.index.name, - "start": table.index.start, - "stop": table.index.stop, - "step": table.index.step, - } + if index is None: + descr = { + "kind": "range", + "name": table.index.name, + "start": table.index.start, + "stop": table.index.stop, + "step": table.index.step, + } + else: + # When `index=True`, RangeIndex needs to be materialized. + materialized_idx = cudf.Index(idx._values, name=idx.name) + descr = \ + _index_level_name( + index_name=materialized_idx.name, + level=level, + column_names=col_names + ) + index_levels.append(materialized_idx) else: - descr = _index_level_name(idx.name, level, col_names) + descr = \ + _index_level_name( + index_name=idx.name, + level=level, + column_names=col_names + ) if is_categorical_dtype(idx): raise ValueError( "'category' column dtypes are currently not " diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index a7a11c95e30..fe418d1ade1 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -1,4 +1,5 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. + import datetime import math import os @@ -1718,24 +1719,24 @@ def test_parquet_nullable_boolean(tmpdir, engine): ], ) @pytest.mark.parametrize("index", [None, True, False]) -def test_parquet_index(tmpdir, pdf, index): - pandas_path = tmpdir.join("pandas_index.parquet") - cudf_path = tmpdir.join("pandas_index.parquet") +def test_parquet_index(pdf, index): + pandas_buffer = BytesIO() + cudf_buffer = BytesIO() gdf = cudf.from_pandas(pdf) - pdf.to_parquet(pandas_path, index=index) - gdf.to_parquet(cudf_path, index=index) + pdf.to_parquet(pandas_buffer, index=index) + gdf.to_parquet(cudf_buffer, index=index) - expected = pd.read_parquet(cudf_path) - actual = cudf.read_parquet(cudf_path) + expected = pd.read_parquet(cudf_buffer) + actual = cudf.read_parquet(pandas_buffer) - assert_eq(expected, actual) + assert_eq(expected, actual, check_index_type=True) - expected = pd.read_parquet(pandas_path) - actual = cudf.read_parquet(pandas_path) + expected = pd.read_parquet(pandas_buffer) + actual = cudf.read_parquet(cudf_buffer) - assert_eq(expected, actual) + assert_eq(expected, actual, check_index_type=True) @pytest.mark.parametrize("engine", ["cudf", "pyarrow"]) diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py index 5d52d6c7da4..16511627aa2 100644 --- a/python/cudf/cudf/utils/ioutils.py +++ b/python/cudf/cudf/utils/ioutils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. import datetime import os @@ -193,7 +193,10 @@ index : bool, default None If ``True``, include the dataframe's index(es) in the file output. If ``False``, they will not be written to the file. If ``None``, the - engine's default behavior will be used. + engine's default behavior will be used. However, instead of being saved + as values, the ``RangeIndex`` will be stored as a range in the metadata + so it doesn’t require much space and is faster. Other indexes will + be included as columns in the file output. partition_cols : list, optional, default None Column names by which to partition the dataset Columns are partitioned in the order they are given From f38daf384a7cd78b681a3a7e6c854b5faadfc1dc Mon Sep 17 00:00:00 2001 From: ChrisJar Date: Wed, 24 Mar 2021 18:21:53 -0500 Subject: [PATCH 03/24] Implement scan operations for decimal columns (#7707) This adds support for `cummin`, `cummax`, and `cumsum` in cuDF for columns with type `decimal` Authors: - @ChrisJar Approvers: - GALI PREM SAGAR (@galipremsagar) URL: https://github.com/rapidsai/cudf/pull/7707 --- python/cudf/cudf/core/column/decimal.py | 3 ++ python/cudf/cudf/core/series.py | 10 ++++-- python/cudf/cudf/tests/test_scan.py | 46 +++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 7fbe602f07a..4ba675516ae 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -72,6 +72,9 @@ def binary_operator(self, op, other, reflect=False): result.dtype.precision = _binop_precision(self.dtype, other.dtype, op) return result + def _apply_scan_op(self, op: str) -> ColumnBase: + return libcudf.reduce.scan(op, self, True) + def as_decimal_column( self, dtype: Dtype, **kwargs ) -> "cudf.core.column.DecimalColumn": diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 9d4643da637..a664c4fb182 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -4725,8 +4725,9 @@ def cumsum(self, axis=0, skipna=True, *args, **kwargs): result_col[first_index:] = None # pandas always returns int64 dtype if original dtype is int or `bool` - if np.issubdtype(result_col.dtype, np.integer) or np.issubdtype( - result_col.dtype, np.bool_ + if not is_decimal_dtype(result_col.dtype) and ( + np.issubdtype(result_col.dtype, np.integer) + or np.issubdtype(result_col.dtype, np.bool_) ): return Series( result_col.astype(np.int64)._apply_scan_op("sum"), @@ -4774,6 +4775,11 @@ def cumprod(self, axis=0, skipna=True, *args, **kwargs): if axis not in (None, 0): raise NotImplementedError("axis parameter is not implemented yet") + if is_decimal_dtype(self.dtype): + raise NotImplementedError( + "cumprod does not currently support decimal types" + ) + skipna = True if skipna is None else skipna if skipna: diff --git a/python/cudf/cudf/tests/test_scan.py b/python/cudf/cudf/tests/test_scan.py index dce65947460..f7e8c5a8563 100644 --- a/python/cudf/cudf/tests/test_scan.py +++ b/python/cudf/cudf/tests/test_scan.py @@ -6,6 +6,7 @@ import cudf from cudf.tests.utils import INTEGER_TYPES, NUMERIC_TYPES, assert_eq, gen_rand +from cudf.core.dtypes import Decimal64Dtype params_sizes = [0, 1, 2, 5] @@ -61,6 +62,21 @@ def test_cumsum_masked(): assert_eq(got, expected) +@pytest.mark.parametrize( + "dtype", + [Decimal64Dtype(8, 4), Decimal64Dtype(10, 5), Decimal64Dtype(12, 7)], +) +def test_cumsum_decimal(dtype): + data = ["243.32", "48.245", "-7234.298", np.nan, "-467.2"] + gser = cudf.Series(data).astype(dtype) + pser = pd.Series(data, dtype="float64") + + got = gser.cumsum() + expected = cudf.Series.from_pandas(pser.cumsum()).astype(dtype) + + assert_eq(got, expected) + + @pytest.mark.parametrize("dtype,nelem", list(_gen_params())) def test_cummin(dtype, nelem): if dtype == np.int8: @@ -103,6 +119,21 @@ def test_cummin_masked(): assert_eq(gs.cummin(), expected) +@pytest.mark.parametrize( + "dtype", + [Decimal64Dtype(8, 4), Decimal64Dtype(11, 6), Decimal64Dtype(14, 7)], +) +def test_cummin_decimal(dtype): + data = ["8394.294", np.nan, "-9940.444", np.nan, "-23.928"] + gser = cudf.Series(data).astype(dtype) + pser = pd.Series(data, dtype="float64") + + got = gser.cummin() + expected = cudf.Series.from_pandas(pser.cummin()).astype(dtype) + + assert_eq(got, expected) + + @pytest.mark.parametrize("dtype,nelem", list(_gen_params())) def test_cummax(dtype, nelem): if dtype == np.int8: @@ -145,6 +176,21 @@ def test_cummax_masked(): assert_eq(gs.cummax(), expected) +@pytest.mark.parametrize( + "dtype", + [Decimal64Dtype(8, 4), Decimal64Dtype(11, 6), Decimal64Dtype(14, 7)], +) +def test_cummax_decimal(dtype): + data = [np.nan, "54.203", "8.222", "644.32", "-562.272"] + gser = cudf.Series(data).astype(dtype) + pser = pd.Series(data, dtype="float64") + + got = gser.cummax() + expected = cudf.Series.from_pandas(pser.cummax()).astype(dtype) + + assert_eq(got, expected) + + @pytest.mark.parametrize("dtype,nelem", list(_gen_params())) def test_cumprod(dtype, nelem): if dtype == np.int8: From 31361242612a2f1198f1defb64cd560ee4eecfa8 Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Wed, 24 Mar 2021 18:35:41 -0500 Subject: [PATCH 04/24] Fix comparison between Datetime/Timedelta columns and NULL scalars (#7504) Fixes https://github.com/rapidsai/cudf/issues/6897 Authors: - @brandon-b-miller Approvers: - GALI PREM SAGAR (@galipremsagar) - Ram (Ramakrishna Prabhu) (@rgsl888prabhu) URL: https://github.com/rapidsai/cudf/pull/7504 --- python/cudf/cudf/core/column/datetime.py | 2 + python/cudf/cudf/core/column/timedelta.py | 2 + python/cudf/cudf/tests/test_binops.py | 45 +++++++++++++++++++++++ 3 files changed, 49 insertions(+) diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index a563248f4ab..0bacbe04356 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -178,6 +178,8 @@ def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike: return cudf.Scalar(None, dtype=other.dtype) return cudf.Scalar(other) + elif other is None: + return cudf.Scalar(other, dtype=self.dtype) else: raise TypeError(f"cannot normalize {type(other)}") diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index e22b511db01..a39638106bb 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -275,6 +275,8 @@ def normalize_binop_value(self, other) -> BinaryOperand: return cudf.Scalar(other) elif np.isscalar(other): return cudf.Scalar(other) + elif other is None: + return cudf.Scalar(other, dtype=self.dtype) else: raise TypeError(f"cannot normalize {type(other)}") diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 18f2d7e474b..eb8aaaadd51 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -1773,6 +1773,51 @@ def decimal_series(input, dtype): utils.assert_eq(expect, got) +@pytest.mark.parametrize( + "dtype", + [ + "uint8", + "uint16", + "uint32", + "uint64", + "int8", + "int16", + "int32", + "int64", + "float32", + "float64", + "str", + "datetime64[ns]", + "datetime64[us]", + "datetime64[ms]", + "datetime64[s]", + "timedelta64[ns]", + "timedelta64[us]", + "timedelta64[ms]", + "timedelta64[s]", + ], +) +@pytest.mark.parametrize("null_scalar", [None, cudf.NA, np.datetime64("NaT")]) +@pytest.mark.parametrize("cmpop", _cmpops) +def test_column_null_scalar_comparison(dtype, null_scalar, cmpop): + # This test is meant to validate that comparing + # a series of any dtype with a null scalar produces + # a new series where all the elements are . + + if isinstance(null_scalar, np.datetime64): + if np.dtype(dtype).kind not in "mM": + pytest.skip() + null_scalar = null_scalar.astype(dtype) + + dtype = np.dtype(dtype) + + data = [1, 2, 3, 4, 5] + sr = cudf.Series(data, dtype=dtype) + result = cmpop(sr, null_scalar) + + assert result.isnull().all() + + @pytest.mark.parametrize("fn", ["eq", "ne", "lt", "gt", "le", "ge"]) def test_equality_ops_index_mismatch(fn): a = cudf.Series( From b85459814e84ef783389dbaabdea345de9b93513 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 25 Mar 2021 11:28:02 +1100 Subject: [PATCH 05/24] Convert cudf::concatenate APIs to use spans and device_uvector (#7621) Contributes to #7287 This PR replaces `std::vector` with `host_span` in public and detail `cudf::contatenate` functions, and replaces `rmm::device_vector` with `rmm::device_uvector` in the concatenate implementations. It also strengthens the SFINAE restrictions on `cudf::host_span` and `cudf::device_span` so that they cannot be constructed from containers unless the container's value_type is the same as the span's value_type. This PR also - [x] Updates cython. - [x] benchmarks before and after Authors: - Mark Harris (@harrism) Approvers: - Jake Hemstad (@jrhemstad) - Vukasin Milovanovic (@vuule) - Ashwin Srinath (@shwina) URL: https://github.com/rapidsai/cudf/pull/7621 --- cpp/include/cudf/column/column_factories.hpp | 1 - cpp/include/cudf/concatenate.hpp | 18 +- cpp/include/cudf/detail/concatenate.cuh | 9 +- cpp/include/cudf/detail/concatenate.hpp | 11 +- .../cudf/dictionary/detail/concatenate.hpp | 5 +- cpp/include/cudf/lists/detail/concatenate.hpp | 5 +- .../cudf/strings/detail/concatenate.hpp | 5 +- .../cudf/structs/detail/concatenate.hpp | 5 +- cpp/include/cudf/utilities/span.hpp | 35 +++- cpp/src/copying/concatenate.cu | 61 +++---- cpp/src/dictionary/detail/concatenate.cu | 5 +- cpp/src/interop/from_arrow.cpp | 3 +- cpp/src/join/hash_join.cu | 4 +- cpp/src/lists/copying/concatenate.cu | 6 +- cpp/src/replace/replace.cu | 3 +- cpp/src/strings/copying/concatenate.cu | 50 +++-- cpp/src/structs/copying/concatenate.cu | 6 +- cpp/src/structs/utilities.cu | 3 +- cpp/src/structs/utilities.hpp | 5 +- cpp/tests/copying/concatenate_tests.cu | 172 +++++++++++------- cpp/tests/io/orc_test.cpp | 15 +- cpp/tests/io/parquet_test.cpp | 31 ++-- cpp/tests/merge/merge_test.cpp | 2 +- python/cudf/cudf/_lib/cpp/concatenate.pxd | 14 +- .../cudf/_lib/cpp/utilities/host_span.pxd | 8 + 25 files changed, 282 insertions(+), 200 deletions(-) create mode 100644 python/cudf/cudf/_lib/cpp/utilities/host_span.pxd diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp index 31196824845..43c2407d629 100644 --- a/cpp/include/cudf/column/column_factories.hpp +++ b/cpp/include/cudf/column/column_factories.hpp @@ -21,7 +21,6 @@ #include #include -#include namespace cudf { /** diff --git a/cpp/include/cudf/concatenate.hpp b/cpp/include/cudf/concatenate.hpp index 8333cf41b77..182cbbdc3ec 100644 --- a/cpp/include/cudf/concatenate.hpp +++ b/cpp/include/cudf/concatenate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,9 +17,9 @@ #include #include +#include #include -#include namespace cudf { /** @@ -36,13 +36,13 @@ namespace cudf { * * Returns empty `device_buffer` if the column is not nullable * - * @param views Vector of column views whose bitmask will to be concatenated + * @param views host_span of column views whose bitmask will to be concatenated * @param mr Device memory resource used for allocating the new device_buffer * @return rmm::device_buffer A `device_buffer` containing the bitmasks of all * the column views in the views vector */ rmm::device_buffer concatenate_masks( - std::vector const& views, + host_span views, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -51,14 +51,13 @@ rmm::device_buffer concatenate_masks( * @throws cudf::logic_error * If types of the input columns mismatch * - * @param columns_to_concat The column views to be concatenated into a single - * column + * @param columns_to_concat host_span of column views to be concatenated into a single column * @param mr Device memory resource used to allocate the returned column's device memory. * @return Unique pointer to a single table having all the rows from the * elements of `columns_to_concat` respectively in the same order. */ std::unique_ptr concatenate( - std::vector const& columns_to_concat, + host_span columns_to_concat, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -82,14 +81,13 @@ std::unique_ptr concatenate( * @throws cudf::logic_error * If number of columns mismatch * - * @param tables_to_concat The table views to be concatenated into a single - * table + * @param tables_to_concat host_span of table views to be concatenated into a single table * @param mr Device memory resource used to allocate the returned table's device memory. * @return Unique pointer to a single table having all the rows from the * elements of `tables_to_concat` respectively in the same order. */ std::unique_ptr concatenate( - std::vector const& tables_to_concat, + host_span tables_to_concat, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/detail/concatenate.cuh b/cpp/include/cudf/detail/concatenate.cuh index a30ad6e853d..5f0399d6172 100644 --- a/cpp/include/cudf/detail/concatenate.cuh +++ b/cpp/include/cudf/detail/concatenate.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -34,8 +35,8 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -void concatenate_masks(rmm::device_vector const& d_views, - rmm::device_vector const& d_offsets, +void concatenate_masks(device_span d_views, + device_span d_offsets, bitmask_type* dest_mask, size_type output_size, rmm::cuda_stream_view stream); @@ -45,7 +46,7 @@ void concatenate_masks(rmm::device_vector const& d_views, * * @param stream CUDA stream used for device memory operations and kernel launches. */ -void concatenate_masks(std::vector const& views, +void concatenate_masks(host_span views, bitmask_type* dest_mask, rmm::cuda_stream_view stream); diff --git a/cpp/include/cudf/detail/concatenate.hpp b/cpp/include/cudf/detail/concatenate.hpp index 43eb5203b37..f7f5567cd76 100644 --- a/cpp/include/cudf/detail/concatenate.hpp +++ b/cpp/include/cudf/detail/concatenate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -27,22 +28,22 @@ namespace cudf { //! Inner interfaces and implementations namespace detail { /** - * @copydoc cudf::concatenate(std::vector const&,rmm::mr::device_memory_resource*) + * @copydoc cudf::concatenate(host_span,rmm::mr::device_memory_resource*) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr concatenate( - std::vector const& columns_to_concat, + host_span columns_to_concat, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @copydoc cudf::concatenate(std::vector const&,rmm::mr::device_memory_resource*) + * @copydoc cudf::concatenate(host_span,rmm::mr::device_memory_resource*) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr
concatenate( - std::vector const& tables_to_concat, + host_span tables_to_concat, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/dictionary/detail/concatenate.hpp b/cpp/include/cudf/dictionary/detail/concatenate.hpp index ae2e0f0ba38..c2fe2dce1fe 100644 --- a/cpp/include/cudf/dictionary/detail/concatenate.hpp +++ b/cpp/include/cudf/dictionary/detail/concatenate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include #include +#include #include @@ -36,7 +37,7 @@ namespace detail { * @return New column with concatenated results. */ std::unique_ptr concatenate( - std::vector const& columns, + host_span columns, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/lists/detail/concatenate.hpp b/cpp/include/cudf/lists/detail/concatenate.hpp index f9adc893b8e..30797443c35 100644 --- a/cpp/include/cudf/lists/detail/concatenate.hpp +++ b/cpp/include/cudf/lists/detail/concatenate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -42,7 +43,7 @@ namespace detail { * @return New column with concatenated results. */ std::unique_ptr concatenate( - std::vector const& columns, + host_span columns, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/strings/detail/concatenate.hpp b/cpp/include/cudf/strings/detail/concatenate.hpp index 3e6fc6d67fc..0740039e896 100644 --- a/cpp/include/cudf/strings/detail/concatenate.hpp +++ b/cpp/include/cudf/strings/detail/concatenate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -41,7 +42,7 @@ namespace detail { * @return New column with concatenated results. */ std::unique_ptr concatenate( - std::vector const& columns, + host_span columns, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/structs/detail/concatenate.hpp b/cpp/include/cudf/structs/detail/concatenate.hpp index ef3da82cfeb..a098703e4b0 100644 --- a/cpp/include/cudf/structs/detail/concatenate.hpp +++ b/cpp/include/cudf/structs/detail/concatenate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include #include #include +#include namespace cudf { namespace structs { @@ -48,7 +49,7 @@ namespace detail { * @return New column with concatenated results. */ std::unique_ptr concatenate( - std::vector const& columns, + host_span columns, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp index c13e5ce44ae..999306d4ee7 100644 --- a/cpp/include/cudf/utilities/span.hpp +++ b/cpp/include/cudf/utilities/span.hpp @@ -126,16 +126,31 @@ struct host_span : public cudf::detail::span_base::value>* = nullptr> + // Constructor from container + template < + typename C, + // Only supported containers of types convertible to T + std::enable_if_t::value && + std::is_convertible().data()))> (*)[], + T (*)[]>::value>* = nullptr> constexpr host_span(C& in) : base(in.data(), in.size()) { } - template ::value>* = nullptr> + // Constructor from const container + template < + typename C, + // Only supported containers of types convertible to T + std::enable_if_t::value && + std::is_convertible().data()))> (*)[], + T (*)[]>::value>* = nullptr> constexpr host_span(C const& in) : base(in.data(), in.size()) { } + // Copy construction to support const conversion template ::value>* = nullptr> + template < + typename C, + // Only supported containers of types convertible to T + std::enable_if_t::value && + std::is_convertible().data()))> (*)[], + T (*)[]>::value>* = nullptr> constexpr device_span(C& in) : base(thrust::raw_pointer_cast(in.data()), in.size()) { } - template ::value>* = nullptr> + template < + typename C, + // Only supported containers of types convertible to T + std::enable_if_t::value && + std::is_convertible().data()))> (*)[], + T (*)[]>::value>* = nullptr> constexpr device_span(C const& in) : base(thrust::raw_pointer_cast(in.data()), in.size()) { } diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu index 8cf9db465f3..1b948083982 100644 --- a/cpp/src/copying/concatenate.cu +++ b/cpp/src/copying/concatenate.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -29,7 +30,6 @@ #include #include -#include #include #include @@ -50,19 +50,18 @@ constexpr bool use_fused_kernel_heuristic(bool const has_nulls, size_t const num return has_nulls || num_columns > 4; } -auto create_device_views(std::vector const& views, rmm::cuda_stream_view stream) +auto create_device_views(host_span views, rmm::cuda_stream_view stream) { // Create device views for each input view using CDViewPtr = decltype( column_device_view::create(std::declval(), std::declval())); auto device_view_owners = std::vector(views.size()); - std::transform( - views.cbegin(), views.cend(), device_view_owners.begin(), [stream](auto const& col) { - // TODO creating this device view can invoke null count computation - // even though it isn't used. See this issue: - // https://github.com/rapidsai/cudf/issues/4368 - return column_device_view::create(col, stream); - }); + std::transform(views.begin(), views.end(), device_view_owners.begin(), [stream](auto const& col) { + // TODO creating this device view can invoke null count computation + // even though it isn't used. See this issue: + // https://github.com/rapidsai/cudf/issues/4368 + return column_device_view::create(col, stream); + }); // Assemble contiguous array of device views auto device_views = thrust::host_vector(); @@ -74,7 +73,7 @@ auto create_device_views(std::vector const& views, rmm::cuda_stream // TODO each of these device vector copies invoke stream synchronization // which appears to add unnecessary overhead. See this issue: // https://github.com/rapidsai/rmm/issues/120 - auto d_views = rmm::device_vector{device_views}; + auto d_views = make_device_uvector_async(device_views); // Compute the partition offsets auto offsets = thrust::host_vector(views.size() + 1); @@ -85,7 +84,7 @@ auto create_device_views(std::vector const& views, rmm::cuda_stream std::next(offsets.begin()), [](auto const& col) { return col.size(); }, thrust::plus{}); - auto const d_offsets = rmm::device_vector{offsets}; + auto d_offsets = make_device_uvector_async(offsets); auto const output_size = offsets.back(); return std::make_tuple( @@ -132,8 +131,8 @@ __global__ void concatenate_masks_kernel(column_device_view const* views, } } -void concatenate_masks(rmm::device_vector const& d_views, - rmm::device_vector const& d_offsets, +void concatenate_masks(device_span d_views, + device_span d_offsets, bitmask_type* dest_mask, size_type output_size, rmm::cuda_stream_view stream) @@ -141,14 +140,14 @@ void concatenate_masks(rmm::device_vector const& d_views, constexpr size_type block_size{256}; cudf::detail::grid_1d config(output_size, block_size); concatenate_masks_kernel<<>>( - d_views.data().get(), - d_offsets.data().get(), + d_views.data(), + d_offsets.data(), static_cast(d_views.size()), dest_mask, output_size); } -void concatenate_masks(std::vector const& views, +void concatenate_masks(host_span views, bitmask_type* dest_mask, rmm::cuda_stream_view stream) { @@ -214,7 +213,7 @@ __global__ void fused_concatenate_kernel(column_device_view const* input_views, } template -std::unique_ptr fused_concatenate(std::vector const& views, +std::unique_ptr fused_concatenate(host_span views, bool const has_nulls, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -245,8 +244,8 @@ std::unique_ptr fused_concatenate(std::vector const& views, auto const kernel = has_nulls ? fused_concatenate_kernel : fused_concatenate_kernel; kernel<<>>( - d_views.data().get(), - d_offsets.data().get(), + d_views.data(), + d_offsets.data(), static_cast(d_views.size()), *d_out_view, d_valid_count.data()); @@ -257,7 +256,7 @@ std::unique_ptr fused_concatenate(std::vector const& views, } template -std::unique_ptr for_each_concatenate(std::vector const& views, +std::unique_ptr for_each_concatenate(host_span views, bool const has_nulls, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -289,7 +288,7 @@ std::unique_ptr for_each_concatenate(std::vector const& vie } struct concatenate_dispatch { - std::vector const& views; + host_span views; rmm::cuda_stream_view stream; rmm::mr::device_memory_resource* mr; @@ -298,7 +297,7 @@ struct concatenate_dispatch { std::unique_ptr operator()() { bool const has_nulls = - std::any_of(views.cbegin(), views.cend(), [](auto const& col) { return col.has_nulls(); }); + std::any_of(views.begin(), views.end(), [](auto const& col) { return col.has_nulls(); }); // Use a heuristic to guess when the fused kernel will be faster if (use_fused_kernel_heuristic(has_nulls, views.size())) { @@ -392,7 +391,7 @@ void bounds_and_type_check(ColIter begin, ColIter end) } // anonymous namespace // Concatenates the elements from a vector of column_views -std::unique_ptr concatenate(std::vector const& columns_to_concat, +std::unique_ptr concatenate(host_span columns_to_concat, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -411,15 +410,15 @@ std::unique_ptr concatenate(std::vector const& columns_to_c columns_to_concat.front().type(), concatenate_dispatch{columns_to_concat, stream, mr}); } -std::unique_ptr
concatenate(std::vector const& tables_to_concat, +std::unique_ptr
concatenate(host_span tables_to_concat, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { if (tables_to_concat.empty()) { return std::make_unique
(); } table_view const first_table = tables_to_concat.front(); - CUDF_EXPECTS(std::all_of(tables_to_concat.cbegin(), - tables_to_concat.cend(), + CUDF_EXPECTS(std::all_of(tables_to_concat.begin(), + tables_to_concat.end(), [&first_table](auto const& t) { return t.num_columns() == first_table.num_columns(); }), @@ -428,8 +427,8 @@ std::unique_ptr
concatenate(std::vector const& tables_to_conc std::vector> concat_columns; for (size_type i = 0; i < first_table.num_columns(); ++i) { std::vector cols; - std::transform(tables_to_concat.cbegin(), - tables_to_concat.cend(), + std::transform(tables_to_concat.begin(), + tables_to_concat.end(), std::back_inserter(cols), [i](auto const& t) { return t.column(i); }); @@ -442,7 +441,7 @@ std::unique_ptr
concatenate(std::vector const& tables_to_conc } // namespace detail -rmm::device_buffer concatenate_masks(std::vector const& views, +rmm::device_buffer concatenate_masks(host_span views, rmm::mr::device_memory_resource* mr) { bool const has_nulls = @@ -465,14 +464,14 @@ rmm::device_buffer concatenate_masks(std::vector const& views, } // Concatenates the elements from a vector of column_views -std::unique_ptr concatenate(std::vector const& columns_to_concat, +std::unique_ptr concatenate(host_span columns_to_concat, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); return detail::concatenate(columns_to_concat, rmm::cuda_stream_default, mr); } -std::unique_ptr
concatenate(std::vector const& tables_to_concat, +std::unique_ptr
concatenate(host_span tables_to_concat, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); diff --git a/cpp/src/dictionary/detail/concatenate.cu b/cpp/src/dictionary/detail/concatenate.cu index 05349a5f968..cdf086e3f4a 100644 --- a/cpp/src/dictionary/detail/concatenate.cu +++ b/cpp/src/dictionary/detail/concatenate.cu @@ -62,8 +62,7 @@ struct compute_children_offsets_fn { * * @param columns The input dictionary columns. */ - compute_children_offsets_fn(std::vector const& columns) - : columns_ptrs{columns.size()} + compute_children_offsets_fn(host_span columns) : columns_ptrs{columns.size()} { std::transform( columns.begin(), columns.end(), columns_ptrs.begin(), [](auto& cv) { return &cv; }); @@ -187,7 +186,7 @@ struct dispatch_compute_indices { } // namespace -std::unique_ptr concatenate(std::vector const& columns, +std::unique_ptr concatenate(host_span columns, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/src/interop/from_arrow.cpp b/cpp/src/interop/from_arrow.cpp index 729b98d85a8..612e2111b66 100644 --- a/cpp/src/interop/from_arrow.cpp +++ b/cpp/src/interop/from_arrow.cpp @@ -150,8 +150,7 @@ struct dispatch_to_cudf_column { std::unique_ptr get_empty_type_column(size_type size) { - return std::make_unique( - data_type(type_id::EMPTY), size, std::move(rmm::device_buffer(0))); + return std::make_unique(data_type(type_id::EMPTY), size, rmm::device_buffer(0)); } /** diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index b64e91c18bd..d827d03a6c0 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -442,7 +442,9 @@ std::pair, std::unique_ptr
> construct_join_output_ stream, rmm::mr::get_current_device_resource()); common_table = cudf::detail::concatenate( - {common_from_build->view(), common_from_probe->view()}, stream, mr); + std::vector({common_from_build->view(), common_from_probe->view()}), + stream, + mr); } joined_indices = concatenate_vector_pairs(complement_indices, joined_indices); } else { diff --git a/cpp/src/lists/copying/concatenate.cu b/cpp/src/lists/copying/concatenate.cu index c6ca56085c8..facf2827f56 100644 --- a/cpp/src/lists/copying/concatenate.cu +++ b/cpp/src/lists/copying/concatenate.cu @@ -48,7 +48,7 @@ namespace { * @param[in] mr Device memory resource used to allocate the * returned column's device memory. */ -std::unique_ptr merge_offsets(std::vector const& columns, +std::unique_ptr merge_offsets(host_span columns, size_type total_list_count, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -90,7 +90,7 @@ std::unique_ptr merge_offsets(std::vector const& colu * @copydoc cudf::lists::detail::concatenate */ std::unique_ptr concatenate( - std::vector const& columns, + host_span columns, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { @@ -119,7 +119,7 @@ std::unique_ptr concatenate( // if any of the input columns have nulls, construct the output mask bool const has_nulls = - std::any_of(columns.cbegin(), columns.cend(), [](auto const& col) { return col.has_nulls(); }); + std::any_of(columns.begin(), columns.end(), [](auto const& col) { return col.has_nulls(); }); rmm::device_buffer null_mask = create_null_mask( total_list_count, has_nulls ? mask_state::UNINITIALIZED : mask_state::UNALLOCATED); if (has_nulls) { diff --git a/cpp/src/replace/replace.cu b/cpp/src/replace/replace.cu index 783e0b4b1de..cb142c2c1e2 100644 --- a/cpp/src/replace/replace.cu +++ b/cpp/src/replace/replace.cu @@ -450,7 +450,8 @@ std::unique_ptr replace_kernel_forwarder::operator()({values.keys(), replacements.keys()}), stream); return cudf::dictionary::detail::add_keys(input, new_keys->view(), stream, mr); }(); auto matched_view = cudf::dictionary_column_view(matched_input->view()); diff --git a/cpp/src/strings/copying/concatenate.cu b/cpp/src/strings/copying/concatenate.cu index 65c6c8f2836..48358cb4a38 100644 --- a/cpp/src/strings/copying/concatenate.cu +++ b/cpp/src/strings/copying/concatenate.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +28,7 @@ #include #include +#include "thrust/iterator/transform_iterator.h" #include #include @@ -65,8 +67,7 @@ struct chars_size_transform { } }; -auto create_strings_device_views(std::vector const& views, - rmm::cuda_stream_view stream) +auto create_strings_device_views(host_span views, rmm::cuda_stream_view stream) { CUDF_FUNC_RANGE(); // Assemble contiguous array of device views @@ -77,33 +78,30 @@ auto create_strings_device_views(std::vector const& views, // Compute the partition offsets and size of offset column // Note: Using 64-bit size_t so we can detect overflow of 32-bit size_type - auto input_offsets = thrust::host_vector(views.size() + 1); + auto input_offsets = std::vector(views.size() + 1); auto offset_it = std::next(input_offsets.begin()); thrust::transform( - thrust::host, views.cbegin(), views.cend(), offset_it, [](auto const& col) -> size_t { + thrust::host, views.begin(), views.end(), offset_it, [](auto const& col) -> size_t { return static_cast(col.size()); }); thrust::inclusive_scan(thrust::host, offset_it, input_offsets.end(), offset_it); - auto const d_input_offsets = rmm::device_vector{input_offsets}; - auto const output_size = input_offsets.back(); + auto d_input_offsets = cudf::detail::make_device_uvector_async(input_offsets, stream); + auto const output_size = input_offsets.back(); // Compute the partition offsets and size of chars column // Note: Using 64-bit size_t so we can detect overflow of 32-bit size_type - // Note: Using separate transform and inclusive_scan because - // transform_inclusive_scan fails to compile with: - // error: the default constructor of "cudf::column_device_view" cannot be - // referenced -- it is a deleted function - auto d_partition_offsets = rmm::device_vector(views.size() + 1); - thrust::transform(rmm::exec_policy(stream), - device_views_ptr, - device_views_ptr + views.size(), - std::next(d_partition_offsets.begin()), - chars_size_transform{}); - thrust::inclusive_scan(rmm::exec_policy(stream), - d_partition_offsets.cbegin(), - d_partition_offsets.cend(), - d_partition_offsets.begin()); - auto const output_chars_size = d_partition_offsets.back(); + auto d_partition_offsets = rmm::device_uvector(views.size() + 1, stream); + size_t zero{0}; + d_partition_offsets.set_element_async(0, zero, stream); // zero first element + + thrust::transform_inclusive_scan(rmm::exec_policy(stream), + device_views_ptr, + device_views_ptr + views.size(), + std::next(d_partition_offsets.begin()), + chars_size_transform{}, + thrust::plus{}); + auto const output_chars_size = d_partition_offsets.back_element(stream); + stream.synchronize(); // ensure copy of output_chars_size is complete before returning return std::make_tuple(std::move(device_view_owners), device_views_ptr, @@ -205,7 +203,7 @@ __global__ void fused_concatenate_string_chars_kernel(column_device_view const* } } -std::unique_ptr concatenate(std::vector const& columns, +std::unique_ptr concatenate(host_span columns, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -257,8 +255,8 @@ std::unique_ptr concatenate(std::vector const& columns, : fused_concatenate_string_offset_kernel; kernel<<>>( d_views, - d_input_offsets.data().get(), - d_partition_offsets.data().get(), + d_input_offsets.data(), + d_partition_offsets.data(), static_cast(columns.size()), strings_count, d_new_offsets, @@ -277,7 +275,7 @@ std::unique_ptr concatenate(std::vector const& columns, auto const kernel = fused_concatenate_string_chars_kernel; kernel<<>>( d_views, - d_partition_offsets.data().get(), + d_partition_offsets.data(), static_cast(columns.size()), total_bytes, d_new_chars); diff --git a/cpp/src/structs/copying/concatenate.cu b/cpp/src/structs/copying/concatenate.cu index b2f861c7c8d..6f18c4bcbd4 100644 --- a/cpp/src/structs/copying/concatenate.cu +++ b/cpp/src/structs/copying/concatenate.cu @@ -36,7 +36,7 @@ namespace detail { /** * @copydoc cudf::structs::detail::concatenate */ -std::unique_ptr concatenate(std::vector const& columns, +std::unique_ptr concatenate(host_span columns, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -49,7 +49,7 @@ std::unique_ptr concatenate(std::vector const& columns, std::transform(ordered_children.begin(), ordered_children.end(), std::back_inserter(children), - [mr, stream](std::vector const& cols) { + [mr, stream](host_span cols) { return cudf::detail::concatenate(cols, stream, mr); }); @@ -57,7 +57,7 @@ std::unique_ptr concatenate(std::vector const& columns, // if any of the input columns have nulls, construct the output mask bool const has_nulls = - std::any_of(columns.cbegin(), columns.cend(), [](auto const& col) { return col.has_nulls(); }); + std::any_of(columns.begin(), columns.end(), [](auto const& col) { return col.has_nulls(); }); rmm::device_buffer null_mask = create_null_mask(total_length, has_nulls ? mask_state::UNINITIALIZED : mask_state::UNALLOCATED); if (has_nulls) { diff --git a/cpp/src/structs/utilities.cu b/cpp/src/structs/utilities.cu index 09e6c5d949d..274a88d3a05 100644 --- a/cpp/src/structs/utilities.cu +++ b/cpp/src/structs/utilities.cu @@ -18,6 +18,7 @@ #include #include +#include namespace cudf { namespace structs { @@ -27,7 +28,7 @@ namespace detail { * @copydoc cudf::structs::detail::extract_ordered_struct_children */ std::vector> extract_ordered_struct_children( - std::vector const& struct_cols) + host_span struct_cols) { auto const num_children = struct_cols[0].num_children(); auto const num_cols = static_cast(struct_cols.size()); diff --git a/cpp/src/structs/utilities.hpp b/cpp/src/structs/utilities.hpp index 1e0511cfd83..613754fc765 100644 --- a/cpp/src/structs/utilities.hpp +++ b/cpp/src/structs/utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #pragma once #include +#include namespace cudf { namespace structs { @@ -45,7 +46,7 @@ namespace detail { * @return New column with concatenated results. */ std::vector> extract_ordered_struct_children( - std::vector const& struct_cols); + host_span struct_cols); } // namespace detail } // namespace structs diff --git a/cpp/tests/copying/concatenate_tests.cu b/cpp/tests/copying/concatenate_tests.cu index e63cbac1e72..cea53326895 100644 --- a/cpp/tests/copying/concatenate_tests.cu +++ b/cpp/tests/copying/concatenate_tests.cu @@ -99,7 +99,7 @@ TYPED_TEST(TypedColumnTest, ConcatenateNoColumns) TYPED_TEST(TypedColumnTest, ConcatenateColumnView) { - cudf::column original{this->type(), this->num_elements(), this->data, this->mask}; + column original{this->type(), this->num_elements(), this->data, this->mask}; std::vector indices{0, this->num_elements() / 3, this->num_elements() / 3, @@ -223,7 +223,7 @@ TEST_F(TableTest, ConcatenateTables) cols_table2.push_back(col3_table2.release()); Table t2(std::move(cols_table2)); - auto concat_table = cudf::concatenate({t1.view(), t2.view()}); + auto concat_table = cudf::concatenate(std::vector({t1, t2})); CUDF_TEST_EXPECT_TABLES_EQUAL(*concat_table, gold_table); } @@ -341,7 +341,8 @@ TEST_F(TableTest, SizeOverflowTest) auto many_chars = cudf::make_fixed_width_column(cudf::data_type{cudf::type_id::INT8}, size); cudf::table_view tbl({*many_chars}); - EXPECT_THROW(cudf::concatenate({tbl, tbl, tbl, tbl, tbl, tbl}), cudf::logic_error); + EXPECT_THROW(cudf::concatenate(std::vector({tbl, tbl, tbl, tbl, tbl, tbl})), + cudf::logic_error); } // string column, overflow on chars @@ -356,7 +357,8 @@ TEST_F(TableTest, SizeOverflowTest) 1, offsets.release(), std::move(many_chars), 0, rmm::device_buffer{0}); cudf::table_view tbl({*col}); - EXPECT_THROW(cudf::concatenate({tbl, tbl, tbl, tbl, tbl, tbl}), cudf::logic_error); + EXPECT_THROW(cudf::concatenate(std::vector({tbl, tbl, tbl, tbl, tbl, tbl})), + cudf::logic_error); } // string column, overflow on offsets (rows) @@ -372,7 +374,8 @@ TEST_F(TableTest, SizeOverflowTest) size, std::move(many_offsets), chars.release(), 0, rmm::device_buffer{0}); cudf::table_view tbl({*col}); - EXPECT_THROW(cudf::concatenate({tbl, tbl, tbl, tbl, tbl, tbl}), cudf::logic_error); + EXPECT_THROW(cudf::concatenate(std::vector({tbl, tbl, tbl, tbl, tbl, tbl})), + cudf::logic_error); } // list, structs too long @@ -395,8 +398,8 @@ TEST_F(TableTest, SizeOverflowTest) 1, offsets.release(), std::move(struct_col), 0, rmm::device_buffer{0}); cudf::table_view tbl({*col}); - EXPECT_THROW(cudf::concatenate({tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl}), - cudf::logic_error); + auto tables = std::vector({tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl}); + EXPECT_THROW(cudf::concatenate(tables), cudf::logic_error); } // struct, list child too long @@ -419,8 +422,8 @@ TEST_F(TableTest, SizeOverflowTest) auto col = cudf::make_structs_column(size, std::move(children), 0, rmm::device_buffer{0}); cudf::table_view tbl({*col}); - EXPECT_THROW(cudf::concatenate({tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl}), - cudf::logic_error); + auto tables = std::vector({tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl}); + EXPECT_THROW(cudf::concatenate(tables), cudf::logic_error); } } @@ -463,12 +466,14 @@ TEST_F(StructsColumnTest, ConcatenateStructs) // build expected output std::vector> expected_children; - expected_children.push_back( - cudf::concatenate({name_cols[0], name_cols[1], name_cols[2], name_cols[3]})); - expected_children.push_back( - cudf::concatenate({age_cols[0], age_cols[1], age_cols[2], age_cols[3]})); - expected_children.push_back( - cudf::concatenate({is_human_cols[0], is_human_cols[1], is_human_cols[2], is_human_cols[3]})); + auto name_col_vec = + std::vector({name_cols[0], name_cols[1], name_cols[2], name_cols[3]}); + auto age_col_vec = std::vector({age_cols[0], age_cols[1], age_cols[2], age_cols[3]}); + auto is_human_col_vec = std::vector( + {is_human_cols[0], is_human_cols[1], is_human_cols[2], is_human_cols[3]}); + expected_children.push_back(cudf::concatenate(name_col_vec)); + expected_children.push_back(cudf::concatenate(age_col_vec)); + expected_children.push_back(cudf::concatenate(is_human_col_vec)); std::vector struct_validity({1, 0, 1, 1, 1, 0}); auto expected = make_structs_column( 6, @@ -484,7 +489,7 @@ TEST_F(StructsColumnTest, ConcatenateStructs) src.push_back(structs_column_wrapper({name_cols[3], age_cols[3], is_human_cols[3]}, {1, 0})); // concatenate - auto result = cudf::concatenate({src[0], src[1], src[2], src[3]}); + auto result = cudf::concatenate(std::vector({src[0], src[1], src[2], src[3]})); cudf::test::expect_columns_equivalent(*result, *expected); } @@ -536,9 +541,13 @@ TEST_F(StructsColumnTest, ConcatenateSplitStructs) // build expected output std::vector> expected_children; - expected_children.push_back(cudf::concatenate({split_names_cols[0], split_names_cols[1]})); - expected_children.push_back(cudf::concatenate({split_ages_cols[0], split_ages_cols[1]})); - expected_children.push_back(cudf::concatenate({split_is_human_cols[0], split_is_human_cols[1]})); + auto expected_names = std::vector({split_names_cols[0], split_names_cols[1]}); + auto expected_ages = std::vector({split_ages_cols[0], split_ages_cols[1]}); + auto expected_is_human = + std::vector({split_is_human_cols[0], split_is_human_cols[1]}); + expected_children.push_back(cudf::concatenate(expected_names)); + expected_children.push_back(cudf::concatenate(expected_ages)); + expected_children.push_back(cudf::concatenate(expected_is_human)); auto expected = make_structs_column(7, std::move(expected_children), 0, rmm::device_buffer{}); // concatenate as structs @@ -552,7 +561,8 @@ TEST_F(StructsColumnTest, ConcatenateSplitStructs) } // concatenate - auto result = cudf::concatenate({src[0], src[1]}); + + auto result = cudf::concatenate(std::vector({src[0], src[1]})); cudf::test::expect_columns_equivalent(*result, *expected); } @@ -607,8 +617,11 @@ TEST_F(StructsColumnTest, ConcatenateStructsNested) // build expected output std::vector> expected_children; - expected_children.push_back(cudf::concatenate({inner_structs[0], inner_structs[1]})); - expected_children.push_back(cudf::concatenate({inner_lists[0], inner_lists[1]})); + + expected_children.push_back( + cudf::concatenate(std::vector({inner_structs[0], inner_structs[1]}))); + expected_children.push_back( + cudf::concatenate(std::vector({inner_lists[0], inner_lists[1]}))); auto expected = make_structs_column(11, std::move(expected_children), 0, rmm::device_buffer{}); // concatenate as structs @@ -621,7 +634,7 @@ TEST_F(StructsColumnTest, ConcatenateStructsNested) } // concatenate - auto result = cudf::concatenate({src[0], src[1]}); + auto result = cudf::concatenate(std::vector({src[0], src[1]})); cudf::test::expect_columns_equivalent(*result, *expected); } @@ -635,7 +648,7 @@ TEST_F(ListsColumnTest, ConcatenateLists) cudf::test::lists_column_wrapper b{4, 5, 6, 7, 8, 9, 10}; cudf::test::lists_column_wrapper expected{{0, 1, 2, 3}, {4, 5, 6, 7, 8, 9, 10}}; - auto result = cudf::concatenate({a, b}); + auto result = cudf::concatenate(std::vector({a, b})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -646,7 +659,7 @@ TEST_F(ListsColumnTest, ConcatenateLists) cudf::test::lists_column_wrapper expected{ {0, 1, 1}, {2, 3}, {4, 5}, {6}, {8, 9, 9, 9}, {10, 11}}; - auto result = cudf::concatenate({a, b}); + auto result = cudf::concatenate(std::vector({a, b})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -657,7 +670,7 @@ TEST_F(ListsColumnTest, ConcatenateLists) cudf::test::lists_column_wrapper expected{ {0, 1}, {2, 3, 4, 5}, {6, 7, 8}, {9}, {10, 11}, {12, 13, 14, 15}}; - auto result = cudf::concatenate({a, b}); + auto result = cudf::concatenate(std::vector({a, b})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -674,7 +687,7 @@ TEST_F(ListsColumnTest, ConcatenateEmptyLists) cudf::test::lists_column_wrapper b{4, 5, 6, 7}; cudf::test::lists_column_wrapper expected{4, 5, 6, 7}; - auto result = cudf::concatenate({a, b}); + auto result = cudf::concatenate(std::vector({a, b})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -684,7 +697,7 @@ TEST_F(ListsColumnTest, ConcatenateEmptyLists) cudf::test::lists_column_wrapper d{4, 5, 6, 7}; cudf::test::lists_column_wrapper expected{4, 5, 6, 7}; - auto result = cudf::concatenate({a, b, c, d}); + auto result = cudf::concatenate(std::vector({a, b, c, d})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -694,7 +707,7 @@ TEST_F(ListsColumnTest, ConcatenateEmptyLists) cudf::test::lists_column_wrapper b{4, 5, 6, 7}; cudf::test::lists_column_wrapper expected{LCW{}, {4, 5, 6, 7}}; - auto result = cudf::concatenate({a, b}); + auto result = cudf::concatenate(std::vector({a, b})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -704,7 +717,7 @@ TEST_F(ListsColumnTest, ConcatenateEmptyLists) cudf::test::lists_column_wrapper d{4, 5, 6, 7}; cudf::test::lists_column_wrapper expected{LCW{}, LCW{}, LCW{}, {4, 5, 6, 7}}; - auto result = cudf::concatenate({a, b, c, d}); + auto result = cudf::concatenate(std::vector({a, b, c, d})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -715,7 +728,7 @@ TEST_F(ListsColumnTest, ConcatenateEmptyLists) cudf::test::lists_column_wrapper d{4, 5, 6, 7}; cudf::test::lists_column_wrapper expected{{1, 2}, LCW{}, LCW{}, {4, 5, 6, 7}}; - auto result = cudf::concatenate({a, b, c, d}); + auto result = cudf::concatenate(std::vector({a, b, c, d})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -732,7 +745,7 @@ TEST_F(ListsColumnTest, ConcatenateListsWithNulls) cudf::test::lists_column_wrapper b{{{4, 6, 7}, valids}}; cudf::test::lists_column_wrapper expected{{{0, 1, 2, 3}, valids}, {{4, 6, 7}, valids}}; - auto result = cudf::concatenate({a, b}); + auto result = cudf::concatenate(std::vector({a, b})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -746,7 +759,7 @@ TEST_F(ListsColumnTest, ConcatenateNestedLists) cudf::test::lists_column_wrapper expected{ {{0, 1}, {2}}, {{4, 5, 6, 7, 8, 9, 10}}, {{6, 7}}, {{8, 9, 10}, {11, 12}}}; - auto result = cudf::concatenate({a, b}); + auto result = cudf::concatenate(std::vector({a, b})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -770,7 +783,7 @@ TEST_F(ListsColumnTest, ConcatenateNestedLists) {{{31, 32}, {33, 34}}, {{35, 36}, {37, 38}}, {{39, 40}}}, {{{71, 72}, {74}}, {{75, 76, 77, 78}, {77, 78}}, {{79, 80, 81}}}}; - auto result = cudf::concatenate({a, b}); + auto result = cudf::concatenate(std::vector({a, b})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -789,7 +802,7 @@ TEST_F(ListsColumnTest, ConcatenateNestedEmptyLists) cudf::test::lists_column_wrapper expected{ {{LCW{}}}, {{0, 1}, {2, 3}}, {{6, 7}}, {LCW{}, {11, 12}}}; - auto result = cudf::concatenate({a, b}); + auto result = cudf::concatenate(std::vector({a, b})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -815,7 +828,7 @@ TEST_F(ListsColumnTest, ConcatenateNestedEmptyLists) {{{31, 32}, {33, 34}}, {{35, 36}, {37, 38}, {1, 2}}, {{39, 40}}}, {{{LCW{}}}}}; - auto result = cudf::concatenate({a, b}); + auto result = cudf::concatenate(std::vector({a, b})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -834,7 +847,7 @@ TEST_F(ListsColumnTest, ConcatenateNestedListsWithNulls) cudf::test::lists_column_wrapper expected{{{{0, 1}, {2, 3}}, valids}, {{{4}, {6, 7}}, valids}}; - auto result = cudf::concatenate({a, b}); + auto result = cudf::concatenate(std::vector({a, b})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -848,7 +861,7 @@ TEST_F(ListsColumnTest, ConcatenateNestedListsWithNulls) {{6, 7}}, {{{{8, 9, 10}, valids}, {11, 12}}, valids}}; - auto result = cudf::concatenate({a, b}); + auto result = cudf::concatenate(std::vector({a, b})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -864,7 +877,8 @@ TEST_F(ListsColumnTest, ConcatenateMismatchedHierarchies) cudf::test::lists_column_wrapper a{{{{LCW{}}}}}; cudf::test::lists_column_wrapper b{{{LCW{}}}}; cudf::test::lists_column_wrapper c{{LCW{}}}; - EXPECT_THROW(cudf::concatenate({a, b, c}), cudf::logic_error); + + EXPECT_THROW(cudf::concatenate(std::vector({a, b, c})), cudf::logic_error); } { @@ -872,20 +886,23 @@ TEST_F(ListsColumnTest, ConcatenateMismatchedHierarchies) cudf::test::lists_column_wrapper a{{{{{LCW{}}}}, valids.begin()}}; cudf::test::lists_column_wrapper b{{{LCW{}}}}; cudf::test::lists_column_wrapper c{{LCW{}}}; - EXPECT_THROW(cudf::concatenate({a, b, c}), cudf::logic_error); + + EXPECT_THROW(cudf::concatenate(std::vector({a, b, c})), cudf::logic_error); } { cudf::test::lists_column_wrapper a{{{{LCW{}}}}}; cudf::test::lists_column_wrapper b{1, 2, 3}; cudf::test::lists_column_wrapper c{{3, 4, 5}}; - EXPECT_THROW(cudf::concatenate({a, b, c}), cudf::logic_error); + + EXPECT_THROW(cudf::concatenate(std::vector({a, b, c})), cudf::logic_error); } { cudf::test::lists_column_wrapper a{{{1, 2, 3}}}; cudf::test::lists_column_wrapper b{{4, 5}}; - EXPECT_THROW(cudf::concatenate({a, b}), cudf::logic_error); + + EXPECT_THROW(cudf::concatenate(std::vector({a, b})), cudf::logic_error); } } @@ -910,14 +927,16 @@ TEST_F(ListsColumnTest, SlicedColumns) {{4, 4, 4}, {5, 5}, {6, 6}}, {{-1, -1, -1, -1}, {-2}}, {{-3, -3, -3, -3}, {-4}}}; - auto result0 = cudf::concatenate({split_a[0], split_b[0]}); + + auto result0 = cudf::concatenate(std::vector({split_a[0], split_b[0]})); cudf::test::expect_columns_equivalent(*result0, expected0); cudf::test::lists_column_wrapper expected1{{{1, 1, 1}, {2, 2}, {3, 3}}, {{4, 4, 4}, {5, 5}, {6, 6}}, {{-5, -5, -5, -5}, {-6}}, {{-7, -7, -7, -7}, {-8}}}; - auto result1 = cudf::concatenate({split_a[0], split_b[1]}); + + auto result1 = cudf::concatenate(std::vector({split_a[0], split_b[1]})); cudf::test::expect_columns_equivalent(*result1, expected1); cudf::test::lists_column_wrapper expected2{ @@ -926,14 +945,16 @@ TEST_F(ListsColumnTest, SlicedColumns) {{-1, -1, -1, -1}, {-2}}, {{-3, -3, -3, -3}, {-4}}, }; - auto result2 = cudf::concatenate({split_a[1], split_b[0]}); + + auto result2 = cudf::concatenate(std::vector({split_a[1], split_b[0]})); cudf::test::expect_columns_equivalent(*result2, expected2); cudf::test::lists_column_wrapper expected3{{{7, 7, 7}, {8, 8}, {9, 9}}, {{10, 10, 10}, {11, 11}, {12, 12}}, {{-5, -5, -5, -5}, {-6}}, {{-7, -7, -7, -7}, {-8}}}; - auto result3 = cudf::concatenate({split_a[1], split_b[1]}); + + auto result3 = cudf::concatenate(std::vector({split_a[1], split_b[1]})); cudf::test::expect_columns_equivalent(*result3, expected3); } @@ -958,7 +979,9 @@ TEST_F(ListsColumnTest, SlicedColumns) {LCW{}, {LCW{}}, {{6, 6}, {2}}}, {{LCW{}}}, {LCW{}, {LCW{}}}}; - auto result0 = cudf::concatenate({split_a[0], split_b[0]}); + + auto result0 = cudf::concatenate(std::vector({split_a[0], split_b[0]})); + cudf::test::expect_columns_equivalent(*result0, expected0); cudf::test::lists_column_wrapper expected1{ @@ -967,7 +990,8 @@ TEST_F(ListsColumnTest, SlicedColumns) {{{1, 2, 9}, LCW{}}, {{5, 6, 7, 8, 9}, {0}, {15, 17}}}, {{LCW{}}}, }; - auto result1 = cudf::concatenate({split_a[0], split_b[1]}); + + auto result1 = cudf::concatenate(std::vector({split_a[0], split_b[1]})); cudf::test::expect_columns_equivalent(*result1, expected1); cudf::test::lists_column_wrapper expected2{ @@ -975,7 +999,8 @@ TEST_F(ListsColumnTest, SlicedColumns) {LCW{}, LCW{}, {{10, 10, 10}, {11, 11}, {12, 12}}, LCW{}}, {{LCW{}}}, {LCW{}, {LCW{}}}}; - auto result2 = cudf::concatenate({split_a[1], split_b[0]}); + + auto result2 = cudf::concatenate(std::vector({split_a[1], split_b[0]})); cudf::test::expect_columns_equivalent(*result2, expected2); cudf::test::lists_column_wrapper expected3{ @@ -984,7 +1009,8 @@ TEST_F(ListsColumnTest, SlicedColumns) {{{1, 2, 9}, LCW{}}, {{5, 6, 7, 8, 9}, {0}, {15, 17}}}, {{LCW{}}}, }; - auto result3 = cudf::concatenate({split_a[1], split_b[1]}); + + auto result3 = cudf::concatenate(std::vector({split_a[1], split_b[1]})); cudf::test::expect_columns_equivalent(*result3, expected3); } } @@ -1015,14 +1041,16 @@ TEST_F(ListsColumnTest, SlicedColumnsWithNulls) {{{{-1, -1, -1, -1}, valids}, {-2}}, valids}, {{{{-3, -3, -3, -3}, valids}, {-4}}, valids}, {{{{-5, -5, -5, -5}, valids}, {-6}}, valids}}; - auto result0 = cudf::concatenate({split_a[0], split_b[0]}); + + auto result0 = cudf::concatenate(std::vector({split_a[0], split_b[0]})); cudf::test::expect_columns_equivalent(*result0, expected0); cudf::test::lists_column_wrapper expected1{{{{1, 1, 1}, valids}, {2, 2}, {{3, 3}, valids}}, {{{4, 4, 4}, {{5, 5}, valids}, {6, 6}}, valids}, {{7, 7, 7}, {8, 8}, {9, 9}}, {{{{-7, -7, -7, -7}, valids}, {-8}}, valids}}; - auto result1 = cudf::concatenate({split_a[0], split_b[1]}); + + auto result1 = cudf::concatenate(std::vector({split_a[0], split_b[1]})); cudf::test::expect_columns_equivalent(*result1, expected1); cudf::test::lists_column_wrapper expected2{ @@ -1030,13 +1058,15 @@ TEST_F(ListsColumnTest, SlicedColumnsWithNulls) {{{{-1, -1, -1, -1}, valids}, {-2}}, valids}, {{{{-3, -3, -3, -3}, valids}, {-4}}, valids}, {{{{-5, -5, -5, -5}, valids}, {-6}}, valids}}; - auto result2 = cudf::concatenate({split_a[1], split_b[0]}); + + auto result2 = cudf::concatenate(std::vector({split_a[1], split_b[0]})); cudf::test::expect_columns_equivalent(*result2, expected2); cudf::test::lists_column_wrapper expected3{ {{{10, 10, 10}, {11, 11}, {{12, 12}, valids}}, valids}, {{{{-7, -7, -7, -7}, valids}, {-8}}, valids}}; - auto result3 = cudf::concatenate({split_a[1], split_b[1]}); + + auto result3 = cudf::concatenate(std::vector({split_a[1], split_b[1]})); cudf::test::expect_columns_equivalent(*result3, expected3); } @@ -1068,7 +1098,8 @@ TEST_F(ListsColumnTest, SlicedColumnsWithNulls) {{LCW{}, {{LCW{}}, valids}}, valids}, {{{{1, 2, 9}, LCW{}}, {{5, 6, 7, 8, 9}, {0}, {15, 17}}}, valids}, }; - auto result0 = cudf::concatenate({split_a[0], split_b[0]}); + + auto result0 = cudf::concatenate(std::vector({split_a[0], split_b[0]})); cudf::test::expect_columns_equivalent(*result0, expected0); cudf::test::lists_column_wrapper expected1{ @@ -1079,7 +1110,8 @@ TEST_F(ListsColumnTest, SlicedColumnsWithNulls) {{{LCW{}, LCW{}}, valids}}, {{LCW{}}}, }; - auto result1 = cudf::concatenate({split_a[0], split_b[1]}); + + auto result1 = cudf::concatenate(std::vector({split_a[0], split_b[1]})); cudf::test::expect_columns_equivalent(*result1, expected1); cudf::test::lists_column_wrapper expected2{ @@ -1088,14 +1120,16 @@ TEST_F(ListsColumnTest, SlicedColumnsWithNulls) {{LCW{}, {{LCW{}}, valids}}, valids}, {{{{1, 2, 9}, LCW{}}, {{5, 6, 7, 8, 9}, {0}, {15, 17}}}, valids}, }; - auto result2 = cudf::concatenate({split_a[1], split_b[0]}); + + auto result2 = cudf::concatenate(std::vector({split_a[1], split_b[0]})); cudf::test::expect_columns_equivalent(*result2, expected2); cudf::test::lists_column_wrapper expected3{ {LCW{}, LCW{}, {{{10, 10, 10}, {{11, 11}, valids}, {12, 12}}, valids}, LCW{}}, {{LCW{}}}, }; - auto result3 = cudf::concatenate({split_a[1], split_b[1]}); + + auto result3 = cudf::concatenate(std::vector({split_a[1], split_b[1]})); cudf::test::expect_columns_equivalent(*result3, expected3); } } @@ -1140,11 +1174,12 @@ TEST_F(ListsColumnTest, ListOfStructs) } // build expected output - auto expected_child = - cudf::concatenate({inner_structs[0], inner_structs[1], inner_structs[2], inner_structs[3]}); + auto struct_views = std::vector( + {inner_structs[0], inner_structs[1], inner_structs[2], inner_structs[3]}); + auto expected_child = cudf::concatenate(struct_views); fixed_width_column_wrapper offsets_w{0, 1, 1, 1, 1, 4, 6, 6, 6, 10, 11}; - auto expected = make_lists_column( - 10, std::move(offsets_w.release()), std::move(expected_child), 0, rmm::device_buffer{}); + auto expected = + make_lists_column(10, offsets_w.release(), std::move(expected_child), 0, rmm::device_buffer{}); // lists std::vector> offsets; @@ -1154,7 +1189,7 @@ TEST_F(ListsColumnTest, ListOfStructs) offsets.push_back({0, 0, 4, 5}); // concatenate as lists - std::vector> src; + std::vector> src; for (size_t idx = 0; idx < inner_structs.size(); idx++) { int size = static_cast(offsets[idx]).size() - 1; src.push_back(make_lists_column( @@ -1162,7 +1197,7 @@ TEST_F(ListsColumnTest, ListOfStructs) } // concatenate - auto result = cudf::concatenate({*src[0], *src[1], *src[2], *src[3]}); + auto result = cudf::concatenate(std::vector({*src[0], *src[1], *src[2], *src[3]})); cudf::test::expect_columns_equivalent(*result, *expected); } @@ -1189,8 +1224,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointConcatentate) auto const b = fw_wrapper(vec.begin() + 300, vec.begin() + 700); auto const c = fw_wrapper(vec.begin() + 700, vec.end()); - auto const columns = std::vector{a, b, c}; - auto const results = cudf::concatenate(columns); + auto const results = cudf::concatenate(std::vector{a, b, c}); auto const expected = fw_wrapper(vec.begin(), vec.end()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); @@ -1208,8 +1242,7 @@ TEST_F(FixedPointTest, FixedPointConcatentate) auto const b = fp_wrapper(vec.begin() + 300, vec.begin() + 700, scale_type{-2}); auto const c = fp_wrapper(vec.begin() + 700, vec.end(), /*****/ scale_type{-2}); - auto const columns = std::vector{a, b, c}; - auto const results = cudf::concatenate(columns); + auto const results = cudf::concatenate(std::vector{a, b, c}); auto const expected = fp_wrapper(vec.begin(), vec.end(), scale_type{-2}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); @@ -1227,8 +1260,7 @@ TEST_F(FixedPointTest, FixedPointScaleMismatch) auto const b = fp_wrapper(vec.begin() + 300, vec.begin() + 700, scale_type{-2}); auto const c = fp_wrapper(vec.begin() + 700, vec.end(), /*****/ scale_type{-3}); - auto const columns = std::vector{a, b, c}; - EXPECT_THROW(cudf::concatenate(columns), cudf::logic_error); + EXPECT_THROW(cudf::concatenate(std::vector{a, b, c}), cudf::logic_error); } struct DictionaryConcatTest : public cudf::test::BaseFixture { diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index b0dc01ea001..108befa80a7 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -395,7 +396,7 @@ TEST_F(OrcWriterTest, MultiColumnWithNulls) auto col3_mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i == (num_rows - 1)); }); auto col4_mask = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i >= 40 || i <= 60); }); + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i >= 40 && i <= 60); }); auto col5_mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i > 80); }); @@ -657,7 +658,7 @@ TEST_F(OrcChunkedWriterTest, SimpleTable) auto table1 = create_random_fixed_table(5, 5, true); auto table2 = create_random_fixed_table(5, 5, true); - auto full_table = cudf::concatenate({*table1, *table2}); + auto full_table = cudf::concatenate(std::vector({*table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedSimple.orc"); cudf_io::chunked_orc_writer_options opts = @@ -677,7 +678,7 @@ TEST_F(OrcChunkedWriterTest, LargeTables) auto table1 = create_random_fixed_table(512, 4096, true); auto table2 = create_random_fixed_table(512, 8192, true); - auto full_table = cudf::concatenate({*table1, *table2}); + auto full_table = cudf::concatenate(std::vector({*table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedLarge.orc"); cudf_io::chunked_orc_writer_options opts = @@ -737,7 +738,7 @@ TEST_F(OrcChunkedWriterTest, Strings) cols.push_back(strings2.release()); cudf::table tbl2(std::move(cols)); - auto expected = cudf::concatenate({tbl1, tbl2}); + auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); auto filepath = temp_env->get_temp_filepath("ChunkedStrings.orc"); cudf_io::chunked_orc_writer_options opts = @@ -799,7 +800,7 @@ TEST_F(OrcChunkedWriterTest, ReadStripes) auto table1 = create_random_fixed_table(5, 5, true); auto table2 = create_random_fixed_table(5, 5, true); - auto full_table = cudf::concatenate({*table2, *table1, *table2}); + auto full_table = cudf::concatenate(std::vector({*table2, *table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedStripes.orc"); cudf_io::chunked_orc_writer_options opts = @@ -863,7 +864,7 @@ TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize) cols.push_back(c2b_w.release()); cudf::table tbl2(std::move(cols)); - auto expected = cudf::concatenate({tbl1, tbl2}); + auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize.orc"); cudf_io::chunked_orc_writer_options opts = @@ -910,7 +911,7 @@ TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize2) cols.push_back(c2b_w.release()); cudf::table tbl2(std::move(cols)); - auto expected = cudf::concatenate({tbl1, tbl2}); + auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize2.orc"); cudf_io::chunked_orc_writer_options opts = diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index 013457d8ed6..880f11aaeb2 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -472,7 +473,7 @@ TEST_F(ParquetWriterTest, MultiColumnWithNulls) auto col3_mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i == (num_rows - 1)); }); auto col4_mask = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i >= 40 || i <= 60); }); + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i >= 40 && i <= 60); }); auto col5_mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i > 80); }); auto col6_mask = @@ -1218,7 +1219,7 @@ TEST_F(ParquetChunkedWriterTest, SimpleTable) auto table1 = create_random_fixed_table(5, 5, true); auto table2 = create_random_fixed_table(5, 5, true); - auto full_table = cudf::concatenate({*table1, *table2}); + auto full_table = cudf::concatenate(std::vector({*table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedSimple.parquet"); cudf_io::chunked_parquet_writer_options args = @@ -1238,7 +1239,7 @@ TEST_F(ParquetChunkedWriterTest, LargeTables) auto table1 = create_random_fixed_table(512, 4096, true); auto table2 = create_random_fixed_table(512, 8192, true); - auto full_table = cudf::concatenate({*table1, *table2}); + auto full_table = cudf::concatenate(std::vector({*table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedLarge.parquet"); cudf_io::chunked_parquet_writer_options args = @@ -1300,7 +1301,7 @@ TEST_F(ParquetChunkedWriterTest, Strings) cols.push_back(strings2.release()); cudf::table tbl2(std::move(cols)); - auto expected = cudf::concatenate({tbl1, tbl2}); + auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); auto filepath = temp_env->get_temp_filepath("ChunkedStrings.parquet"); cudf_io::chunked_parquet_writer_options args = @@ -1359,7 +1360,7 @@ TEST_F(ParquetChunkedWriterTest, ListColumn) auto tbl0 = table_view({col0_tbl0, col1_tbl0, col2_tbl0}); auto tbl1 = table_view({col0_tbl1, col1_tbl1, col2_tbl1}); - auto expected = cudf::concatenate({tbl0, tbl1}); + auto expected = cudf::concatenate(std::vector({tbl0, tbl1})); auto filepath = temp_env->get_temp_filepath("ChunkedLists.parquet"); cudf_io::chunked_parquet_writer_options args = @@ -1413,7 +1414,7 @@ TEST_F(ParquetChunkedWriterTest, ListOfStruct) auto table_2 = table_view({*list_col_2}); - auto full_table = cudf::concatenate({table_1, table_2}); + auto full_table = cudf::concatenate(std::vector({table_1, table_2})); cudf_io::table_input_metadata expected_metadata(table_1); expected_metadata.column_metadata[0].set_name("family"); @@ -1504,7 +1505,7 @@ TEST_F(ParquetChunkedWriterTest, ListOfStructOfStructOfListOfList) auto table_2 = table_view({*list_col_2}); - auto full_table = cudf::concatenate({table_1, table_2}); + auto full_table = cudf::concatenate(std::vector({table_1, table_2})); cudf_io::table_input_metadata expected_metadata(table_1); expected_metadata.column_metadata[0].set_name("family"); @@ -1639,7 +1640,7 @@ TEST_F(ParquetChunkedWriterTest, DifferentNullability) auto table1 = create_random_fixed_table(5, 5, true); auto table2 = create_random_fixed_table(5, 5, false); - auto full_table = cudf::concatenate({*table1, *table2}); + auto full_table = cudf::concatenate(std::vector({*table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedNullable.parquet"); cudf_io::chunked_parquet_writer_options args = @@ -1678,7 +1679,7 @@ TEST_F(ParquetChunkedWriterTest, DifferentNullabilityStruct) auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}}; auto table_2 = cudf::table_view({struct_2_2}); - auto full_table = cudf::concatenate({table_1, table_2}); + auto full_table = cudf::concatenate(std::vector({table_1, table_2})); cudf_io::table_input_metadata expected_metadata(table_1); expected_metadata.column_metadata[0].set_name("being"); @@ -1707,7 +1708,7 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullability) auto table1 = create_random_fixed_table(5, 5, false); auto table2 = create_random_fixed_table(5, 5, false); - auto full_table = cudf::concatenate({*table1, *table2}); + auto full_table = cudf::concatenate(std::vector({*table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedNoNullable.parquet"); @@ -1764,7 +1765,7 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullabilityList) auto table1 = table_view({col00, col10}); auto table2 = table_view({col01, col11}); - auto full_table = cudf::concatenate({table1, table2}); + auto full_table = cudf::concatenate(std::vector({table1, table2})); cudf_io::table_input_metadata metadata(table1); metadata.column_metadata[0].set_nullability(true); // List is nullable at first (root) level @@ -1809,7 +1810,7 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullabilityStruct) auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}}; auto table_2 = cudf::table_view({struct_2_2}); - auto full_table = cudf::concatenate({table_1, table_2}); + auto full_table = cudf::concatenate(std::vector({table_1, table_2})); cudf_io::table_input_metadata expected_metadata(table_1); expected_metadata.column_metadata[0].set_name("being").set_nullability(false); @@ -1838,7 +1839,7 @@ TEST_F(ParquetChunkedWriterTest, ReadRowGroups) auto table1 = create_random_fixed_table(5, 5, true); auto table2 = create_random_fixed_table(5, 5, true); - auto full_table = cudf::concatenate({*table2, *table1, *table2}); + auto full_table = cudf::concatenate(std::vector({*table2, *table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedRowGroups.parquet"); cudf_io::chunked_parquet_writer_options args = @@ -1951,7 +1952,7 @@ TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize) cols.push_back(c2b_w.release()); cudf::table tbl2(std::move(cols)); - auto expected = cudf::concatenate({tbl1, tbl2}); + auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize.parquet"); cudf_io::chunked_parquet_writer_options args = @@ -1998,7 +1999,7 @@ TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize2) cols.push_back(c2b_w.release()); cudf::table tbl2(std::move(cols)); - auto expected = cudf::concatenate({tbl1, tbl2}); + auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize2.parquet"); cudf_io::chunked_parquet_writer_options args = diff --git a/cpp/tests/merge/merge_test.cpp b/cpp/tests/merge/merge_test.cpp index 451fa82d5a3..b7d98704aff 100644 --- a/cpp/tests/merge/merge_test.cpp +++ b/cpp/tests/merge/merge_test.cpp @@ -705,7 +705,7 @@ TEST_F(MergeTest, KeysWithNulls) auto valids2 = cudf::detail::make_counting_transform_iterator( 0, [](auto row) { return (row % 15 == 0) ? false : true; }); cudf::test::fixed_width_column_wrapper data2(data_iter, data_iter + nrows, valids2); - auto all_data = cudf::concatenate({data1, data2}); + auto all_data = cudf::concatenate(std::vector{{data1, data2}}); std::vector column_orders{cudf::order::ASCENDING, cudf::order::DESCENDING}; std::vector null_precedences{cudf::null_order::AFTER, cudf::null_order::BEFORE}; diff --git a/python/cudf/cudf/_lib/cpp/concatenate.pxd b/python/cudf/cudf/_lib/cpp/concatenate.pxd index b5ec3bcb7d4..c776d23aa85 100644 --- a/python/cudf/cudf/_lib/cpp/concatenate.pxd +++ b/python/cudf/cudf/_lib/cpp/concatenate.pxd @@ -5,12 +5,22 @@ from libcpp.vector cimport vector from cudf._lib.cpp.column.column cimport column, column_view from cudf._lib.cpp.table.table cimport table, table_view -from rmm._lib.device_buffer cimport device_buffer +from cudf._lib.cpp.utilities.host_span cimport host_span +from rmm._lib.device_buffer cimport device_buffer cdef extern from "cudf/concatenate.hpp" namespace "cudf" nogil: + # The versions of concatenate taking vectors don't exist in libcudf + # C++, but passing a vector works because a host_span is implicitly + # constructable from a vector. In case they are needed in the future, + # host_span versions can be added, e.g: + # + # cdef device_buffer concatenate_masks "cudf::concatenate_masks"( + # host_span[column_view] views + # ) except + + cdef device_buffer concatenate_masks "cudf::concatenate_masks"( - const vector[column_view] columns + const vector[column_view] views ) except + cdef unique_ptr[column] concatenate_columns "cudf::concatenate"( const vector[column_view] columns diff --git a/python/cudf/cudf/_lib/cpp/utilities/host_span.pxd b/python/cudf/cudf/_lib/cpp/utilities/host_span.pxd new file mode 100644 index 00000000000..cbbe3710347 --- /dev/null +++ b/python/cudf/cudf/_lib/cpp/utilities/host_span.pxd @@ -0,0 +1,8 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. + +from libcpp.vector cimport vector + +cdef extern from "cudf/utilities/span.hpp" namespace "cudf" nogil: + cdef cppclass host_span[T]: + host_span() except + + host_span(vector[T]) except + From f1f1d0fbeae24faec3a82c7a4a9dd6f3cedc9ef1 Mon Sep 17 00:00:00 2001 From: Kumar Aatish Date: Thu, 25 Mar 2021 01:14:10 -0400 Subject: [PATCH 06/24] Add column_device_view to orc writer (#7676) This PR adds column_device_view members to EncChunk, DictionaryChunk and StripeDictionary structures which are used in the ORC writer. The idea is to replace members in these structures which replicate the same information. Usage of nvstrdesc_s has also been eliminated in the ORC writer. Fixes #7347, Addresses #5682, Addresses #7334 Authors: - Kumar Aatish (@kaatish) Approvers: - Vukasin Milovanovic (@vuule) - Devavret Makkar (@devavret) URL: https://github.com/rapidsai/cudf/pull/7676 --- cpp/src/io/orc/dict_enc.cu | 151 +++++++++--------- cpp/src/io/orc/orc_gpu.h | 69 ++++++--- cpp/src/io/orc/stripe_enc.cu | 82 ++++++---- cpp/src/io/orc/writer_impl.cu | 213 ++++++++++---------------- cpp/src/io/orc/writer_impl.hpp | 10 +- cpp/src/io/parquet/page_dict.cu | 8 +- cpp/src/io/parquet/page_enc.cu | 10 +- cpp/src/io/parquet/writer_impl.cu | 2 +- cpp/src/io/statistics/column_stats.cu | 6 - cpp/src/io/statistics/column_stats.h | 5 +- 10 files changed, 283 insertions(+), 273 deletions(-) diff --git a/cpp/src/io/orc/dict_enc.cu b/cpp/src/io/orc/dict_enc.cu index 99157a23fcb..5695e882a95 100644 --- a/cpp/src/io/orc/dict_enc.cu +++ b/cpp/src/io/orc/dict_enc.cu @@ -17,6 +17,7 @@ #include "orc_common.h" #include "orc_gpu.h" +#include #include #include @@ -46,14 +47,16 @@ struct dictinit_state_s { }; /** - * @brief Return a 12-bit hash from a byte sequence + * @brief Return a 12-bit hash from a string */ -static inline __device__ uint32_t nvstr_init_hash(char const *ptr, uint32_t len) +static inline __device__ uint32_t hash_string(const string_view val) { - if (len != 0) { - return (ptr[0] + (ptr[len - 1] << 5) + (len << 10)) & ((1 << init_hash_bits) - 1); - } else { + if (val.empty()) { return 0; + } else { + char const *ptr = val.data(); + uint32_t len = val.size_bytes(); + return (ptr[0] + (ptr[len - 1] << 5) + (len << 10)) & ((1 << init_hash_bits) - 1); } } @@ -71,7 +74,8 @@ static __device__ void LoadNonNullIndices(volatile dictinit_state_s *s, { if (t == 0) { s->nnz = 0; } for (uint32_t i = 0; i < s->chunk.num_rows; i += block_size) { - const uint32_t *valid_map = s->chunk.valid_map_base; + const uint32_t *valid_map = s->chunk.leaf_column->null_mask(); + auto column_offset = s->chunk.leaf_column->offset(); uint32_t is_valid, nz_pos; if (t < block_size / 32) { if (!valid_map) { @@ -80,10 +84,10 @@ static __device__ void LoadNonNullIndices(volatile dictinit_state_s *s, uint32_t const row = s->chunk.start_row + i + t * 32; auto const chunk_end = s->chunk.start_row + s->chunk.num_rows; - auto const valid_map_idx = (row + s->chunk.column_offset) / 32; + auto const valid_map_idx = (row + column_offset) / 32; uint32_t valid = (row < chunk_end) ? valid_map[valid_map_idx] : 0; - auto const rows_in_next_word = (row + s->chunk.column_offset) & 0x1f; + auto const rows_in_next_word = (row + column_offset) & 0x1f; if (rows_in_next_word != 0) { auto const rows_in_current_word = 32 - rows_in_next_word; // Read next word if any rows are within the chunk @@ -111,12 +115,18 @@ static __device__ void LoadNonNullIndices(volatile dictinit_state_s *s, * @brief Gather all non-NULL string rows and compute total character data size * * @param[in] chunks DictionaryChunk device array [rowgroup][column] - * @param[in] num_columns Number of columns + * @param[in] num_columns Number of string columns */ // blockDim {block_size,1,1} template __global__ void __launch_bounds__(block_size, 2) - gpuInitDictionaryIndices(DictionaryChunk *chunks, uint32_t num_columns) + gpuInitDictionaryIndices(DictionaryChunk *chunks, + const table_device_view view, + uint32_t *dict_data, + uint32_t *dict_index, + size_t row_index_stride, + size_type *str_col_ids, + uint32_t num_columns) { __shared__ __align__(16) dictinit_state_s state_g; @@ -131,12 +141,21 @@ __global__ void __launch_bounds__(block_size, 2) dictinit_state_s *const s = &state_g; uint32_t col_id = blockIdx.x; uint32_t group_id = blockIdx.y; - const nvstrdesc_s *ck_data; - uint32_t *dict_data; uint32_t nnz, start_row, dict_char_count; int t = threadIdx.x; - if (t == 0) s->chunk = chunks[group_id * num_columns + col_id]; + if (t == 0) { + column_device_view *leaf_column_view = view.begin() + str_col_ids[col_id]; + s->chunk = chunks[group_id * num_columns + col_id]; + s->chunk.leaf_column = leaf_column_view; + s->chunk.dict_data = + dict_data + col_id * leaf_column_view->size() + group_id * row_index_stride; + s->chunk.dict_index = dict_index + col_id * leaf_column_view->size(); + s->chunk.start_row = group_id * row_index_stride; + s->chunk.num_rows = + min(row_index_stride, + max(static_cast(leaf_column_view->size() - s->chunk.start_row), size_t{0})); + } for (uint32_t i = 0; i < sizeof(s->map) / sizeof(uint32_t); i += block_size) { if (i + t < sizeof(s->map) / sizeof(uint32_t)) s->map.u32[i + t] = 0; } @@ -152,15 +171,15 @@ __global__ void __launch_bounds__(block_size, 2) nnz = s->nnz; dict_data = s->chunk.dict_data; start_row = s->chunk.start_row; - ck_data = static_cast(s->chunk.column_data_base) + start_row; for (uint32_t i = 0; i < nnz; i += block_size) { uint32_t ck_row = 0; uint32_t hash = 0; uint32_t len = 0; if (i + t < nnz) { - ck_row = s->dict[i + t]; - len = static_cast(ck_data[ck_row].count); - hash = nvstr_init_hash(ck_data[ck_row].ptr, len); + ck_row = s->dict[i + t]; + string_view string_val = s->chunk.leaf_column->element(ck_row + start_row); + len = static_cast(string_val.size_bytes()); + hash = hash_string(string_val); } len = block_reduce(temp_storage.reduce_storage).Sum(len); if (t == 0) s->chunk.string_char_count += len; @@ -200,10 +219,11 @@ __global__ void __launch_bounds__(block_size, 2) uint32_t ck_row = 0, pos = 0, hash = 0, pos_old, pos_new, sh, colliding_row; bool collision; if (i + t < nnz) { - ck_row = dict_data[i + t] - start_row; - hash = nvstr_init_hash(ck_data[ck_row].ptr, static_cast(ck_data[ck_row].count)); - sh = (hash & 1) ? 16 : 0; - pos_old = s->map.u16[hash]; + ck_row = dict_data[i + t] - start_row; + string_view string_val = s->chunk.leaf_column->element(ck_row + start_row); + hash = hash_string(string_val); + sh = (hash & 1) ? 16 : 0; + pos_old = s->map.u16[hash]; } // The isolation of the atomicAdd, along with pos_old/pos_new is to guarantee deterministic // behavior for the first row in the hash map that will be used for early duplicate detection @@ -233,18 +253,16 @@ __global__ void __launch_bounds__(block_size, 2) for (uint32_t i = 0; i < nnz; i += block_size) { uint32_t ck_row = 0, ck_row_ref = 0, is_dupe = 0; if (i + t < nnz) { - const char *str1, *str2; - uint32_t len1, len2, hash; - ck_row = s->dict[i + t]; - str1 = ck_data[ck_row].ptr; - len1 = static_cast(ck_data[ck_row].count); - hash = nvstr_init_hash(str1, len1); - ck_row_ref = s->dict[(hash > 0) ? s->map.u16[hash - 1] : 0]; + ck_row = s->dict[i + t]; + string_view string_value = s->chunk.leaf_column->element(ck_row + start_row); + auto const string_length = static_cast(string_value.size_bytes()); + auto const hash = hash_string(string_value); + ck_row_ref = s->dict[(hash > 0) ? s->map.u16[hash - 1] : 0]; if (ck_row_ref != ck_row) { - str2 = ck_data[ck_row_ref].ptr; - len2 = static_cast(ck_data[ck_row_ref].count); - is_dupe = nvstr_is_equal(str1, len1, str2, len2); - dict_char_count += (is_dupe) ? 0 : len1; + string_view reference_string = + s->chunk.leaf_column->element(ck_row_ref + start_row); + is_dupe = (string_value == reference_string); + dict_char_count += (is_dupe) ? 0 : string_length; } } uint32_t dupes_in_block; @@ -269,6 +287,12 @@ __global__ void __launch_bounds__(block_size, 2) chunks[group_id * num_columns + col_id].string_char_count = s->chunk.string_char_count; chunks[group_id * num_columns + col_id].num_dict_strings = nnz - s->total_dupes; chunks[group_id * num_columns + col_id].dict_char_count = dict_char_count; + chunks[group_id * num_columns + col_id].leaf_column = s->chunk.leaf_column; + + chunks[group_id * num_columns + col_id].dict_data = s->chunk.dict_data; + chunks[group_id * num_columns + col_id].dict_index = s->chunk.dict_index; + chunks[group_id * num_columns + col_id].start_row = s->chunk.start_row; + chunks[group_id * num_columns + col_id].num_rows = s->chunk.num_rows; } } @@ -357,7 +381,6 @@ __global__ void __launch_bounds__(block_size) uint32_t num_strings; uint32_t *dict_data, *dict_index; uint32_t dict_char_count; - const nvstrdesc_s *str_data; int t = threadIdx.x; if (t == 0) s->stripe = stripes[stripe_id * num_columns + col_id]; @@ -366,21 +389,17 @@ __global__ void __launch_bounds__(block_size) num_strings = s->stripe.num_strings; dict_data = s->stripe.dict_data; if (!dict_data) return; - dict_index = s->stripe.dict_index; - str_data = static_cast(s->stripe.column_data_base); - dict_char_count = 0; + dict_index = s->stripe.dict_index; + string_view current_string = string_view::min(); + dict_char_count = 0; for (uint32_t i = 0; i < num_strings; i += block_size) { uint32_t cur = (i + t < num_strings) ? dict_data[i + t] : 0; uint32_t cur_len = 0; - const char *cur_ptr; - bool is_dupe = false; - if (i + t < num_strings) { - cur_ptr = str_data[cur].ptr; - cur_len = str_data[cur].count; - } + bool is_dupe = false; + if (i + t < num_strings) { current_string = s->stripe.leaf_column->element(cur); } if (i + t != 0 && i + t < num_strings) { uint32_t prev = dict_data[i + t - 1]; - is_dupe = nvstr_is_equal(cur_ptr, cur_len, str_data[prev].ptr, str_data[prev].count); + is_dupe = (current_string == (s->stripe.leaf_column->element(prev))); } dict_char_count += (is_dupe) ? 0 : cur_len; uint32_t dupes_in_block; @@ -403,14 +422,14 @@ __global__ void __launch_bounds__(block_size) } /** - * @brief Launches kernel for initializing dictionary chunks - * - * @param[in] chunks DictionaryChunk device array [rowgroup][column] - * @param[in] num_columns Number of columns - * @param[in] num_rowgroups Number of row groups - * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` + * @copydoc cudf::io::orc::gpu::InitDictionaryIndices */ -void InitDictionaryIndices(DictionaryChunk *chunks, +void InitDictionaryIndices(const table_device_view &view, + DictionaryChunk *chunks, + uint32_t *dict_data, + uint32_t *dict_index, + size_t row_index_stride, + size_type *str_col_ids, uint32_t num_columns, uint32_t num_rowgroups, rmm::cuda_stream_view stream) @@ -418,20 +437,12 @@ void InitDictionaryIndices(DictionaryChunk *chunks, static constexpr int block_size = 512; dim3 dim_block(block_size, 1); dim3 dim_grid(num_columns, num_rowgroups); - gpuInitDictionaryIndices - <<>>(chunks, num_columns); + gpuInitDictionaryIndices<<>>( + chunks, view, dict_data, dict_index, row_index_stride, str_col_ids, num_columns); } /** - * @brief Launches kernel for building stripe dictionaries - * - * @param[in] stripes StripeDictionary device array [stripe][column] - * @param[in] stripes_host StripeDictionary host array [stripe][column] - * @param[in] chunks DictionaryChunk device array [rowgroup][column] - * @param[in] num_stripes Number of stripes - * @param[in] num_rowgroups Number of row groups - * @param[in] num_columns Number of columns - * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` + * @copydoc cudf::io::orc::gpu::BuildStripeDictionaries */ void BuildStripeDictionaries(StripeDictionary *stripes, StripeDictionary *stripes_host, @@ -447,18 +458,16 @@ void BuildStripeDictionaries(StripeDictionary *stripes, stripes, chunks, num_columns); for (uint32_t i = 0; i < num_stripes * num_columns; i++) { if (stripes_host[i].dict_data != nullptr) { - thrust::device_ptr p = thrust::device_pointer_cast(stripes_host[i].dict_data); - const nvstrdesc_s *str_data = - static_cast(stripes_host[i].column_data_base); + thrust::device_ptr dict_data_ptr = + thrust::device_pointer_cast(stripes_host[i].dict_data); + column_device_view *string_column = stripes_host[i].leaf_column; // NOTE: Requires the --expt-extended-lambda nvcc flag thrust::sort(rmm::exec_policy(stream), - p, - p + stripes_host[i].num_strings, - [str_data] __device__(const uint32_t &lhs, const uint32_t &rhs) { - return nvstr_is_lesser(str_data[lhs].ptr, - (uint32_t)str_data[lhs].count, - str_data[rhs].ptr, - (uint32_t)str_data[rhs].count); + dict_data_ptr, + dict_data_ptr + stripes_host[i].num_strings, + [string_column] __device__(const uint32_t &lhs, const uint32_t &rhs) { + return string_column->element(lhs) < + string_column->element(rhs); }); } } diff --git a/cpp/src/io/orc/orc_gpu.h b/cpp/src/io/orc/orc_gpu.h index 7ad92e40cb4..55df0adf95b 100644 --- a/cpp/src/io/orc/orc_gpu.h +++ b/cpp/src/io/orc/orc_gpu.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -124,16 +125,15 @@ struct RowGroup { * @brief Struct to describe an encoder data chunk */ struct EncChunk { - const uint32_t *valid_map_base; // base ptr of input valid bit map - size_type column_offset; // index of the first element relative to the base memory - const void *column_data_base; // base ptr of input column data - uint32_t start_row; // start row of this chunk - uint32_t num_rows; // number of rows in this chunk - uint32_t valid_rows; // max number of valid rows - uint8_t encoding_kind; // column encoding kind (orc::ColumnEncodingKind) - uint8_t type_kind; // column data type (orc::TypeKind) - uint8_t dtype_len; // data type length - uint8_t scale; // scale for decimals or timestamps + uint32_t start_row; // start row of this chunk + uint32_t num_rows; // number of rows in this chunk + uint8_t encoding_kind; // column encoding kind (orc::ColumnEncodingKind) + uint8_t type_kind; // column data type (orc::TypeKind) + uint8_t dtype_len; // data type length + uint8_t scale; // scale for decimals or timestamps + + uint32_t *dict_index; // dictionary index from row index + column_device_view *leaf_column; }; /** @@ -163,10 +163,7 @@ struct StripeStream { * @brief Struct to describe a dictionary chunk */ struct DictionaryChunk { - const uint32_t *valid_map_base; // base ptr of input valid bit map - size_type column_offset; // index of the first element relative to the base memory - const void *column_data_base; // base ptr of column data (ptr,len pair) - uint32_t *dict_data; // dictionary data (index of non-null rows) + uint32_t *dict_data; // dictionary data (index of non-null rows) uint32_t *dict_index; // row indices of corresponding string (row from dictionary index) uint32_t start_row; // start row of this chunk uint32_t num_rows; // num rows in this chunk @@ -175,20 +172,23 @@ struct DictionaryChunk { string_char_count; // total size of string data (NOTE: assumes less than 4G bytes per chunk) uint32_t num_dict_strings; // number of strings in dictionary uint32_t dict_char_count; // size of dictionary string data for this chunk + + column_device_view *leaf_column; //!< Pointer to string column }; /** * @brief Struct to describe a dictionary */ struct StripeDictionary { - const void *column_data_base; // base ptr of column data (ptr,len pair) - uint32_t *dict_data; // row indices of corresponding string (row from dictionary index) - uint32_t *dict_index; // dictionary index from row index - uint32_t column_id; // real column id - uint32_t start_chunk; // first chunk in stripe - uint32_t num_chunks; // number of chunks in the stripe - uint32_t num_strings; // number of unique strings in the dictionary - uint32_t dict_char_count; // total size of dictionary string data + uint32_t *dict_data; // row indices of corresponding string (row from dictionary index) + uint32_t *dict_index; // dictionary index from row index + uint32_t column_id; // real column id + uint32_t start_chunk; // first chunk in stripe + uint32_t num_chunks; // number of chunks in the stripe + uint32_t num_strings; // number of unique strings in the dictionary + uint32_t dict_char_count; // total size of dictionary string data + + column_device_view *leaf_column; //!< Pointer to string column }; /** @@ -313,6 +313,17 @@ void EncodeStripeDictionaries(StripeDictionary *stripes, detail::device_2dspan enc_streams, rmm::cuda_stream_view stream = rmm::cuda_stream_default); +/** + * @brief Set leaf column element of EncChunk + * + * @param[in] view table device view representing input table + * @param[in,out] chunks encoder chunk device array [column][rowgroup] + * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` + */ +void set_chunk_columns(const table_device_view &view, + detail::device_2dspan chunks, + rmm::cuda_stream_view stream); + /** * @brief Launches kernel for compacting chunked column data prior to compression * @@ -350,15 +361,25 @@ void CompressOrcDataStreams(uint8_t *compressed_data, /** * @brief Launches kernel for initializing dictionary chunks * + * @param[in] view table device view representing input table * @param[in,out] chunks DictionaryChunk device array [rowgroup][column] + * @param[in] dict_data dictionary data (index of non-null rows) + * @param[in] dict_index row indices of corresponding string (row from dictionary index) + * @param[in] row_index_stride Rowgroup size in rows + * @param[in] str_col_ids List of columns that are strings type * @param[in] num_columns Number of columns * @param[in] num_rowgroups Number of row groups * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ -void InitDictionaryIndices(DictionaryChunk *chunks, +void InitDictionaryIndices(const table_device_view &view, + DictionaryChunk *chunks, + uint32_t *dict_data, + uint32_t *dict_index, + size_t row_index_stride, + size_type *str_col_ids, uint32_t num_columns, uint32_t num_rowgroups, - rmm::cuda_stream_view stream = rmm::cuda_stream_default); + rmm::cuda_stream_view stream); /** * @brief Launches kernel for building stripe dictionaries diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index aef32efaf6e..10932d36309 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -669,19 +669,20 @@ __global__ void __launch_bounds__(block_size) if (t * 8 < nrows) { uint32_t row = s->chunk.start_row + present_rows + t * 8; uint8_t valid = 0; - if (row < s->chunk.valid_rows) { - if (s->chunk.valid_map_base) { - size_type current_valid_offset = row + s->chunk.column_offset; - size_type next_valid_offset = current_valid_offset + min(32, s->chunk.valid_rows); + if (row < s->chunk.leaf_column->size()) { + if (s->chunk.leaf_column->nullable()) { + size_type current_valid_offset = row + s->chunk.leaf_column->offset(); + size_type next_valid_offset = + current_valid_offset + min(32, s->chunk.leaf_column->size()); bitmask_type mask = cudf::detail::get_mask_offset_word( - s->chunk.valid_map_base, 0, current_valid_offset, next_valid_offset); + s->chunk.leaf_column->null_mask(), 0, current_valid_offset, next_valid_offset); valid = 0xff & mask; } else { valid = 0xff; } - if (row + 7 > s->chunk.valid_rows) { - valid = valid & ((1 << (s->chunk.valid_rows & 7)) - 1); + if (row + 7 > s->chunk.leaf_column->size()) { + valid = valid & ((1 << (s->chunk.leaf_column->size() & 7)) - 1); } } s->valid_buf[(row >> 3) & 0x1ff] = valid; @@ -729,19 +730,18 @@ __global__ void __launch_bounds__(block_size) lengths_to_positions(s->buf.u32, 512, t); __syncthreads(); if (valid) { - int nz_idx = (s->nnz + s->buf.u32[t] - 1) & (maxnumvals - 1); - void const *base = s->chunk.column_data_base; + int nz_idx = (s->nnz + s->buf.u32[t] - 1) & (maxnumvals - 1); switch (s->chunk.type_kind) { case INT: case DATE: - case FLOAT: s->vals.u32[nz_idx] = static_cast(base)[row]; break; + case FLOAT: s->vals.u32[nz_idx] = s->chunk.leaf_column->element(row); break; case DOUBLE: - case LONG: s->vals.u64[nz_idx] = static_cast(base)[row]; break; - case SHORT: s->vals.u32[nz_idx] = static_cast(base)[row]; break; + case LONG: s->vals.u64[nz_idx] = s->chunk.leaf_column->element(row); break; + case SHORT: s->vals.u32[nz_idx] = s->chunk.leaf_column->element(row); break; case BOOLEAN: - case BYTE: s->vals.u8[nz_idx] = static_cast(base)[row]; break; + case BYTE: s->vals.u8[nz_idx] = s->chunk.leaf_column->element(row); break; case TIMESTAMP: { - int64_t ts = static_cast(base)[row]; + int64_t ts = s->chunk.leaf_column->element(row); int32_t ts_scale = kTimeScale[min(s->chunk.scale, 9)]; int64_t seconds = ts / ts_scale; int64_t nanos = (ts - seconds * ts_scale); @@ -772,16 +772,13 @@ __global__ void __launch_bounds__(block_size) } case STRING: if (s->chunk.encoding_kind == DICTIONARY_V2) { - uint32_t dict_idx = static_cast(base)[row]; - if (dict_idx > 0x7fffffffu) - dict_idx = static_cast(base)[dict_idx & 0x7fffffffu]; + uint32_t dict_idx = s->chunk.dict_index[row]; + if (dict_idx > 0x7fffffffu) dict_idx = s->chunk.dict_index[dict_idx & 0x7fffffffu]; s->vals.u32[nz_idx] = dict_idx; } else { - const nvstrdesc_s *str_desc = static_cast(base) + row; - const char *ptr = str_desc->ptr; - uint32_t count = static_cast(str_desc->count); - s->u.strenc.str_data[s->buf.u32[t] - 1] = ptr; - s->lengths.u32[nz_idx] = count; + string_view value = s->chunk.leaf_column->element(row); + s->u.strenc.str_data[s->buf.u32[t] - 1] = value.data(); + s->lengths.u32[nz_idx] = value.size_bytes(); } break; default: break; @@ -899,8 +896,8 @@ __global__ void __launch_bounds__(block_size) streams[col_id][group_id].lengths[t] = s->strm_pos[t]; if (!s->stream.data_ptrs[t]) { streams[col_id][group_id].data_ptrs[t] = - static_cast(const_cast(s->chunk.column_data_base)) + - s->chunk.start_row * s->chunk.dtype_len; + static_cast(const_cast(s->chunk.leaf_column->head())) + + (s->chunk.leaf_column->offset() + s->chunk.start_row) * s->chunk.dtype_len; } } } @@ -939,8 +936,8 @@ __global__ void __launch_bounds__(block_size) s->nrows = s->u.dict_stripe.num_strings; s->cur_row = 0; } - auto const str_desc = static_cast(s->u.dict_stripe.column_data_base); - auto const dict_data = s->u.dict_stripe.dict_data; + column_device_view *string_column = s->u.dict_stripe.leaf_column; + auto const dict_data = s->u.dict_stripe.dict_data; __syncthreads(); if (s->chunk.encoding_kind != DICTIONARY_V2) { return; // This column isn't using dictionary encoding -> bail out @@ -951,8 +948,13 @@ __global__ void __launch_bounds__(block_size) uint32_t string_idx = (t < numvals) ? dict_data[s->cur_row + t] : 0; if (cid == CI_DICTIONARY) { // Encoding string contents - const char *ptr = (t < numvals) ? str_desc[string_idx].ptr : 0; - uint32_t count = (t < numvals) ? static_cast(str_desc[string_idx].count) : 0; + const char *ptr = 0; + uint32_t count = 0; + if (t < numvals) { + auto string_val = string_column->element(string_idx); + ptr = string_val.data(); + count = string_val.size_bytes(); + } s->u.strenc.str_data[t] = ptr; StoreStringData(s->stream.data_ptrs[CI_DICTIONARY] + s->strm_pos[CI_DICTIONARY], &s->u.strenc, @@ -961,7 +963,10 @@ __global__ void __launch_bounds__(block_size) if (!t) { s->strm_pos[CI_DICTIONARY] += s->u.strenc.char_count; } } else { // Encoding string lengths - uint32_t count = (t < numvals) ? static_cast(str_desc[string_idx].count) : 0; + uint32_t count = + (t < numvals) + ? static_cast(string_column->element(string_idx).size_bytes()) + : 0; uint32_t nz_idx = (s->cur_row + t) & 0x3ff; if (t < numvals) s->lengths.u32[nz_idx] = count; __syncthreads(); @@ -982,6 +987,15 @@ __global__ void __launch_bounds__(block_size) if (t == 0) { strm_ptr->lengths[cid] = s->strm_pos[cid]; } } +__global__ void __launch_bounds__(512) + gpu_set_chunk_columns(const table_device_view view, device_2dspan chunks) +{ + // Set leaf_column member of EncChunk + for (size_type i = threadIdx.x; i < chunks.size().second; i += blockDim.x) { + chunks[blockIdx.x][i].leaf_column = view.begin() + blockIdx.x; + } +} + /** * @brief Merge chunked column data into a single contiguous stream * @@ -1189,6 +1203,16 @@ void EncodeStripeDictionaries(StripeDictionary *stripes, <<>>(stripes, chunks, enc_streams); } +void set_chunk_columns(const table_device_view &view, + device_2dspan chunks, + rmm::cuda_stream_view stream) +{ + dim3 dim_block(512, 1); + dim3 dim_grid(chunks.size().first, 1); + + gpu_set_chunk_columns<<>>(view, chunks); +} + void CompactOrcDataStreams(device_2dspan strm_desc, device_2dspan enc_streams, rmm::cuda_stream_view stream) diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index eb5e90bbeec..cb75698fd8d 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -42,7 +42,6 @@ namespace detail { namespace orc { using namespace cudf::io::orc; using namespace cudf::io; -using cudf::io::orc::gpu::nvstrdesc_s; struct row_group_index_info { int32_t pos = -1; // Position @@ -111,39 +110,6 @@ constexpr T to_clockscale(cudf::type_id timestamp_id) } // namespace -/** - * @brief Helper kernel for converting string data/offsets into nvstrdesc - * REMOVEME: Once we eliminate the legacy readers/writers, the kernels could be - * made to use the native offset+data layout. - */ -__global__ void stringdata_to_nvstrdesc(gpu::nvstrdesc_s *dst, - const size_type *offsets, - const char *strdata, - const uint32_t *nulls, - const size_type column_offset, - size_type column_size) -{ - size_type row = blockIdx.x * blockDim.x + threadIdx.x; - if (row < column_size) { - uint32_t is_valid = (nulls != nullptr) - ? (nulls[(row + column_offset) / 32] >> ((row + column_offset) % 32)) & 1 - : 1; - size_t count; - const char *ptr; - if (is_valid) { - size_type cur = offsets[row]; - size_type next = offsets[row + 1]; - ptr = strdata + cur; - count = (next > cur) ? next - cur : 0; - } else { - ptr = nullptr; - count = 0; - } - dst[row].ptr = ptr; - dst[row].count = count; - } -} - /** * @brief Helper class that adds ORC-specific column info */ @@ -160,31 +126,14 @@ class orc_column_view { rmm::cuda_stream_view stream) : _id(id), _str_id(str_id), - _string_type(col.type().id() == type_id::STRING), - _type_width(_string_type ? 0 : cudf::size_of(col.type())), + _is_string_type(col.type().id() == type_id::STRING), + _type_width(_is_string_type ? 0 : cudf::size_of(col.type())), _data_count(col.size()), _null_count(col.null_count()), - _data(col.head() + col.offset() * _type_width), _nulls(col.null_mask()), - _column_offset(col.offset()), _clockscale(to_clockscale(col.type().id())), _type_kind(to_orc_type(col.type().id())) { - if (_string_type && _data_count > 0) { - strings_column_view view{col}; - _indexes = rmm::device_buffer(_data_count * sizeof(gpu::nvstrdesc_s), stream); - - stringdata_to_nvstrdesc<<<((_data_count - 1) >> 8) + 1, 256, 0, stream.value()>>>( - static_cast(_indexes.data()), - view.offsets().data() + view.offset(), - view.chars().data(), - _nulls, - _column_offset, - _data_count); - _data = _indexes.data(); - - stream.synchronize(); - } // Generating default name if name isn't present in metadata if (metadata && _id < metadata->column_names.size()) { _name = metadata->column_names[_id]; @@ -193,7 +142,7 @@ class orc_column_view { } } - auto is_string() const noexcept { return _string_type; } + auto is_string() const noexcept { return _is_string_type; } void set_dict_stride(size_t stride) noexcept { dict_stride = stride; } auto get_dict_stride() const noexcept { return dict_stride; } @@ -207,7 +156,7 @@ class orc_column_view { } auto host_dict_chunk(size_t rowgroup) const { - assert(_string_type); + assert(_is_string_type); return &dict[rowgroup * dict_stride + _str_id]; } auto device_dict_chunk() const { return d_dict; } @@ -223,7 +172,7 @@ class orc_column_view { } auto host_stripe_dict(size_t stripe) const { - assert(_string_type); + assert(_is_string_type); return &stripe_dict[stripe * dict_stride + _str_id]; } auto device_stripe_dict() const { return d_stripe_dict; } @@ -233,9 +182,7 @@ class orc_column_view { size_t data_count() const noexcept { return _data_count; } size_t null_count() const noexcept { return _null_count; } bool nullable() const noexcept { return (_nulls != nullptr); } - void const *data() const noexcept { return _data; } uint32_t const *nulls() const noexcept { return _nulls; } - size_type column_offset() const noexcept { return _column_offset; } uint8_t clockscale() const noexcept { return _clockscale; } void set_orc_encoding(ColumnEncodingKind e) { _encoding_kind = e; } @@ -245,17 +192,15 @@ class orc_column_view { private: // Identifier within set of columns and string columns, respectively - size_t _id = 0; - size_t _str_id = 0; - bool _string_type = false; - - size_t _type_width = 0; - size_t _data_count = 0; - size_t _null_count = 0; - void const *_data = nullptr; - uint32_t const *_nulls = nullptr; - size_type _column_offset = 0; - uint8_t _clockscale = 0; + size_t _id = 0; + size_t _str_id = 0; + bool _is_string_type = false; + + size_t _type_width = 0; + size_t _data_count = 0; + size_t _null_count = 0; + uint32_t const *_nulls = nullptr; + uint8_t _clockscale = 0; // ORC-related members std::string _name{}; @@ -263,7 +208,6 @@ class orc_column_view { ColumnEncodingKind _encoding_kind; // String dictionary-related members - rmm::device_buffer _indexes; size_t dict_stride = 0; gpu::DictionaryChunk const *dict = nullptr; gpu::StripeDictionary const *stripe_dict = nullptr; @@ -308,8 +252,10 @@ std::vector writer::impl::gather_stripe_info( return infos; } -void writer::impl::init_dictionaries(orc_column_view *columns, +void writer::impl::init_dictionaries(const table_device_view &view, + orc_column_view *columns, std::vector const &str_col_ids, + device_span d_str_col_ids, uint32_t *dict_data, uint32_t *dict_index, hostdevice_vector *dict) @@ -321,26 +267,17 @@ void writer::impl::init_dictionaries(orc_column_view *columns, auto &str_column = columns[str_col_ids[i]]; str_column.set_dict_stride(str_col_ids.size()); str_column.attach_dict_chunk(dict->host_ptr(), dict->device_ptr()); - - for (size_t g = 0; g < num_rowgroups; g++) { - auto *ck = &(*dict)[g * str_col_ids.size() + i]; - ck->valid_map_base = str_column.nulls(); - ck->column_offset = str_column.column_offset(); - ck->column_data_base = str_column.data(); - ck->dict_data = dict_data + i * str_column.data_count() + g * row_index_stride_; - ck->dict_index = dict_index + i * str_column.data_count(); // Indexed by abs row - ck->start_row = g * row_index_stride_; - ck->num_rows = std::min(row_index_stride_, - std::max(str_column.data_count() - ck->start_row, 0)); - ck->num_strings = 0; - ck->string_char_count = 0; - ck->num_dict_strings = 0; - ck->dict_char_count = 0; - } } - dict->host_to_device(stream); - gpu::InitDictionaryIndices(dict->device_ptr(), str_col_ids.size(), num_rowgroups, stream); + gpu::InitDictionaryIndices(view, + dict->device_ptr(), + dict_data, + dict_index, + row_index_stride_, + d_str_col_ids.data(), + d_str_col_ids.size(), + num_rowgroups, + stream); dict->device_to_host(stream, true); } @@ -358,19 +295,19 @@ void writer::impl::build_dictionaries(orc_column_view *columns, str_column.attach_stripe_dict(stripe_dict.host_ptr(), stripe_dict.device_ptr()); for (auto const &stripe : stripe_bounds) { - auto &sd = stripe_dict[stripe.id * str_col_ids.size() + col_idx]; - sd.column_data_base = str_column.host_dict_chunk(0)->column_data_base; - sd.dict_data = str_column.host_dict_chunk(stripe.first)->dict_data; - sd.dict_index = dict_index + col_idx * str_column.data_count(); // Indexed by abs row - sd.column_id = str_col_ids[col_idx]; - sd.start_chunk = stripe.first; - sd.num_chunks = stripe.size; - sd.dict_char_count = 0; + auto &sd = stripe_dict[stripe.id * str_col_ids.size() + col_idx]; + sd.dict_data = str_column.host_dict_chunk(stripe.first)->dict_data; + sd.dict_index = dict_index + col_idx * str_column.data_count(); // Indexed by abs row + sd.column_id = str_col_ids[col_idx]; + sd.start_chunk = stripe.first; + sd.num_chunks = stripe.size; + sd.dict_char_count = 0; sd.num_strings = std::accumulate(stripe.cbegin(), stripe.cend(), 0, [&](auto dt_str_cnt, auto rg_idx) { const auto &dt = dict[rg_idx * str_col_ids.size() + col_idx]; return dt_str_cnt + dt.num_dict_strings; }); + sd.leaf_column = dict[col_idx].leaf_column; } if (enable_dictionary_) { @@ -593,15 +530,16 @@ struct segmented_valid_cnt_input { std::vector indices; }; -encoded_data writer::impl::encode_columns(host_span columns, +encoded_data writer::impl::encode_columns(const table_device_view &view, + host_span columns, std::vector const &str_col_ids, host_span stripe_bounds, orc_streams const &streams) { auto const num_columns = columns.size(); auto const num_rowgroups = stripes_size(stripe_bounds); - hostdevice_2dvector chunks(num_columns, num_rowgroups); - hostdevice_2dvector chunk_streams(num_columns, num_rowgroups); + hostdevice_2dvector chunks(num_columns, num_rowgroups, stream); + hostdevice_2dvector chunk_streams(num_columns, num_rowgroups, stream); auto const stream_offsets = streams.compute_offsets(columns, num_rowgroups); rmm::device_uvector encoded_data(stream_offsets.data_size(), stream); @@ -614,23 +552,17 @@ encoded_data writer::impl::encode_columns(host_span colum auto const rg_idx = *rg_idx_it; auto &ck = chunks[column.id()][rg_idx]; - ck.start_row = (rg_idx * row_index_stride_); - ck.num_rows = std::min(row_index_stride_, column.data_count() - ck.start_row); - ck.valid_rows = column.data_count(); + ck.start_row = (rg_idx * row_index_stride_); + ck.num_rows = std::min(row_index_stride_, column.data_count() - ck.start_row); ck.encoding_kind = column.orc_encoding(); ck.type_kind = column.orc_kind(); if (ck.type_kind == TypeKind::STRING) { - ck.valid_map_base = column.nulls(); - ck.column_offset = column.column_offset(); - ck.column_data_base = (ck.encoding_kind == DICTIONARY_V2) - ? column.host_stripe_dict(stripe.id)->dict_index - : column.data(); + ck.dict_index = (ck.encoding_kind == DICTIONARY_V2) + ? column.host_stripe_dict(stripe.id)->dict_index + : nullptr; ck.dtype_len = 1; } else { - ck.valid_map_base = column.nulls(); - ck.column_offset = column.column_offset(); - ck.column_data_base = column.data(); - ck.dtype_len = column.type_width(); + ck.dtype_len = column.type_width(); } ck.scale = column.clockscale(); // Only need to check row groups that end within the stripe @@ -730,6 +662,8 @@ encoded_data writer::impl::encode_columns(host_span colum chunks.host_to_device(stream); chunk_streams.host_to_device(stream); + gpu::set_chunk_columns(view, chunks, stream); + if (!str_col_ids.empty()) { auto d_stripe_dict = columns[str_col_ids[0]].device_stripe_dict(); gpu::EncodeStripeDictionaries( @@ -791,8 +725,8 @@ std::vector> writer::impl::gather_statistic_blobs( size_t num_chunks = num_rowgroups * columns.size(); std::vector> stat_blobs(num_stat_blobs); - hostdevice_vector stat_desc(columns.size()); - hostdevice_vector stat_merge(num_stat_blobs); + hostdevice_vector stat_desc(columns.size(), stream); + hostdevice_vector stat_merge(num_stat_blobs, stream); rmm::device_uvector stat_chunks(num_chunks + num_stat_blobs, stream); rmm::device_uvector stat_groups(num_chunks, stream); @@ -811,11 +745,8 @@ std::vector> writer::impl::gather_statistic_blobs( case TypeKind::STRING: desc->stats_dtype = dtype_string; break; default: desc->stats_dtype = dtype_none; break; } - desc->num_rows = column.data_count(); - desc->num_values = column.data_count(); - desc->valid_map_base = column.nulls(); - desc->column_offset = column.column_offset(); - desc->column_data_base = column.data(); + desc->num_rows = column.data_count(); + desc->num_values = column.data_count(); if (desc->stats_dtype == dtype_timestamp64) { // Timestamp statistics are in milliseconds switch (column.clockscale()) { @@ -869,8 +800,8 @@ std::vector> writer::impl::gather_statistic_blobs( stat_merge.device_ptr(), stat_chunks.data() + num_chunks, num_stat_blobs, stream); stat_merge.device_to_host(stream, true); - hostdevice_vector blobs(stat_merge[num_stat_blobs - 1].start_chunk + - stat_merge[num_stat_blobs - 1].num_chunks); + hostdevice_vector blobs( + stat_merge[num_stat_blobs - 1].start_chunk + stat_merge[num_stat_blobs - 1].num_chunks, stream); gpu::orc_encode_statistics(blobs.device_ptr(), stat_merge.device_ptr(), stat_chunks.data() + num_chunks, @@ -1061,6 +992,22 @@ void writer::impl::init_state() out_sink_->host_write(MAGIC, std::strlen(MAGIC)); } +rmm::device_uvector get_string_column_ids(const table_device_view &view, + rmm::cuda_stream_view stream) +{ + rmm::device_uvector string_column_ids(view.num_columns(), stream); + auto iter = thrust::make_counting_iterator(0); + auto end_iter = thrust::copy_if(rmm::exec_policy(stream), + iter, + iter + view.num_columns(), + string_column_ids.begin(), + [view] __device__(size_type index) { + return (view.column(index).type().id() == type_id::STRING); + }); + string_column_ids.resize(end_iter - string_column_ids.begin(), stream); + return string_column_ids; +} + void writer::impl::write(table_view const &table) { CUDF_EXPECTS(not closed, "Data has already been flushed to out and closed"); @@ -1074,6 +1021,9 @@ void writer::impl::write(table_view const &table) "be specified"); } + auto device_columns = table_device_view::create(table, stream); + auto string_column_ids = get_string_column_ids(*device_columns, stream); + // Wrapper around cudf columns to attach ORC-specific type info std::vector orc_columns; orc_columns.reserve(num_columns); @@ -1093,9 +1043,15 @@ void writer::impl::write(table_view const &table) // Build per-column dictionary indices const auto num_rowgroups = div_by_rowgroups(num_rows); const auto num_dict_chunks = num_rowgroups * str_col_ids.size(); - hostdevice_vector dict(num_dict_chunks); + hostdevice_vector dict(num_dict_chunks, stream); if (!str_col_ids.empty()) { - init_dictionaries(orc_columns.data(), str_col_ids, dict_data.data(), dict_index.data(), &dict); + init_dictionaries(*device_columns, + orc_columns.data(), + str_col_ids, + string_column_ids, + dict_data.data(), + dict_index.data(), + &dict); } // Decide stripe boundaries early on, based on uncompressed size @@ -1103,23 +1059,22 @@ void writer::impl::write(table_view const &table) // Build stripe-level dictionaries const auto num_stripe_dict = stripe_bounds.size() * str_col_ids.size(); - hostdevice_vector stripe_dict(num_stripe_dict); + hostdevice_vector stripe_dict(num_stripe_dict, stream); if (!str_col_ids.empty()) { build_dictionaries( orc_columns.data(), str_col_ids, stripe_bounds, dict, dict_index.data(), stripe_dict); } auto streams = create_streams(orc_columns, stripe_bounds); - auto enc_data = encode_columns(orc_columns, str_col_ids, stripe_bounds, streams); + auto enc_data = encode_columns(*device_columns, orc_columns, str_col_ids, stripe_bounds, streams); // Assemble individual disparate column chunks into contiguous data streams const auto num_index_streams = (num_columns + 1); const auto num_data_streams = streams.size() - num_index_streams; - hostdevice_2dvector strm_descs(stripe_bounds.size(), num_data_streams); + hostdevice_2dvector strm_descs(stripe_bounds.size(), num_data_streams, stream); auto stripes = gather_stripes(num_rows, num_index_streams, stripe_bounds, &enc_data.streams, &strm_descs); - auto device_columns = table_device_view::create(table); // Gather column statistics std::vector> column_stats; if (enable_statistics_ && num_columns > 0 && num_rows > 0) { @@ -1160,8 +1115,8 @@ void writer::impl::write(table_view const &table) // Compress the data streams rmm::device_buffer compressed_data(compressed_bfr_size, stream); - hostdevice_vector comp_out(num_compressed_blocks); - hostdevice_vector comp_in(num_compressed_blocks); + hostdevice_vector comp_out(num_compressed_blocks, stream); + hostdevice_vector comp_in(num_compressed_blocks, stream); if (compression_kind_ != NONE) { strm_descs.host_to_device(stream); gpu::CompressOrcDataStreams(static_cast(compressed_data.data()), diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index f0ec3a70cec..352cb11440f 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -186,14 +186,18 @@ class writer::impl { /** * @brief Builds up column dictionaries indices * + * @param view Table device view representing input table * @param columns List of columns * @param str_col_ids List of columns that are strings type + * @param d_str_col_ids List of columns that are strings type in device memory * @param dict_data Dictionary data memory * @param dict_index Dictionary index memory * @param dict List of dictionary chunks */ - void init_dictionaries(orc_column_view* columns, + void init_dictionaries(const table_device_view& view, + orc_column_view* columns, std::vector const& str_col_ids, + device_span d_str_col_ids, uint32_t* dict_data, uint32_t* dict_index, hostdevice_vector* dict); @@ -238,13 +242,15 @@ class writer::impl { /** * @brief Encodes the input columns into streams. * + * @param view Table device view representing input table * @param columns List of columns * @param str_col_ids List of columns that are strings type * @param stripe_bounds List of stripe boundaries * @param stream CUDA stream used for device memory operations and kernel launches * @return Encoded data and per-chunk stream descriptors */ - encoded_data encode_columns(host_span columns, + encoded_data encode_columns(const table_device_view& view, + host_span columns, std::vector const& str_col_ids, host_span stripe_bounds, orc_streams const& streams); diff --git a/cpp/src/io/parquet/page_dict.cu b/cpp/src/io/parquet/page_dict.cu index 46d471d5cf7..2676f30474d 100644 --- a/cpp/src/io/parquet/page_dict.cu +++ b/cpp/src/io/parquet/page_dict.cu @@ -52,8 +52,10 @@ inline __device__ uint32_t uint64_hash16(uint64_t v) return uint32_hash16((uint32_t)(v + (v >> 32))); } -inline __device__ uint32_t nvstr_hash16(const uint8_t *p, uint32_t len) +inline __device__ uint32_t hash_string(const string_view &val) { + const char *p = val.data(); + uint32_t len = val.size_bytes(); uint32_t hash = len; if (len > 0) { uint32_t align_p = 3 & reinterpret_cast(p); @@ -181,7 +183,7 @@ __global__ void __launch_bounds__(block_size, 1) } else if (dtype == INT96) { dtype_len_in = 8; } else { - dtype_len_in = (dtype == BYTE_ARRAY) ? sizeof(nvstrdesc_s) : dtype_len; + dtype_len_in = dtype_len; } __syncthreads(); while (s->row_cnt < s->ck.num_rows) { @@ -206,7 +208,7 @@ __global__ void __launch_bounds__(block_size, 1) if (dtype == BYTE_ARRAY) { auto str1 = s->col.leaf_column->element(row); len += str1.size_bytes(); - hash = nvstr_hash16(reinterpret_cast(str1.data()), str1.size_bytes()); + hash = hash_string(str1); // Walk the list of rows with the same hash next_addr = &s->hashmap[hash]; while ((next = atomicCAS(next_addr, 0, row + 1)) != 0) { diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index 3b29394686f..51ec0013f1a 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -79,8 +79,10 @@ struct page_enc_state_s { /** * @brief Return a 12-bit hash from a byte sequence */ -inline __device__ uint32_t nvstr_init_hash(const uint8_t *ptr, uint32_t len) +inline __device__ uint32_t hash_string(const string_view &val) { + char const *ptr = val.data(); + uint32_t len = val.size_bytes(); if (len != 0) { return (ptr[0] + (ptr[len - 1] << 5) + (len << 10)) & ((1 << init_hash_bits) - 1); } else { @@ -199,7 +201,7 @@ __global__ void __launch_bounds__(block_size) // dtype_len, which determines how much memory we need to allocate for the fragment. dtype_len_in = 8; } else { - dtype_len_in = (dtype == BYTE_ARRAY) ? sizeof(nvstrdesc_s) : dtype_len; + dtype_len_in = dtype_len; } __syncthreads(); @@ -218,7 +220,7 @@ __global__ void __launch_bounds__(block_size) if (dtype == BYTE_ARRAY) { auto str = s->col.leaf_column->element(val_idx); len += str.size_bytes(); - hash = nvstr_init_hash(reinterpret_cast(str.data()), str.size_bytes()); + hash = hash_string(str); } else if (dtype_len_in == 8) { hash = uint64_init_hash(s->col.leaf_column->element(val_idx)); } else { @@ -1059,7 +1061,7 @@ __global__ void __launch_bounds__(128, 8) gpuEncodePages(EncPage *pages, } else if (dtype == INT96) { dtype_len_in = 8; } else { - dtype_len_in = (dtype == BYTE_ARRAY) ? sizeof(nvstrdesc_s) : dtype_len_out; + dtype_len_in = dtype_len_out; } dict_bits = (dtype == BOOLEAN) ? 1 : (s->page.dict_bits_plus1 - 1); if (t == 0) { diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 31baf419f45..1e8a6920ea4 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -969,7 +969,7 @@ void writer::impl::write(table_view const &table) } // Create table_device_view so that corresponding column_device_view data // can be written into col_desc members - auto parent_column_table_device_view = table_device_view::create(single_streams_table); + auto parent_column_table_device_view = table_device_view::create(single_streams_table, stream); rmm::device_uvector leaf_column_views(0, stream); // Initialize column description diff --git a/cpp/src/io/statistics/column_stats.cu b/cpp/src/io/statistics/column_stats.cu index 128bd905259..52f21f0a9ad 100644 --- a/cpp/src/io/statistics/column_stats.cu +++ b/cpp/src/io/statistics/column_stats.cu @@ -187,12 +187,6 @@ gatherFloatColumnStats(stats_state_s *s, statistics_dtype dtype, uint32_t t, Sto } } -// FIXME: Use native libcudf string type -struct nvstrdesc_s { - const char *ptr; - size_t count; -}; - /** * @brief Gather statistics for string columns * diff --git a/cpp/src/io/statistics/column_stats.h b/cpp/src/io/statistics/column_stats.h index d1d414aa7b4..d7895de50ce 100644 --- a/cpp/src/io/statistics/column_stats.h +++ b/cpp/src/io/statistics/column_stats.h @@ -45,10 +45,7 @@ struct stats_column_desc { uint32_t num_rows; //!< number of rows in column uint32_t num_values; //!< Number of data values in column. Different from num_rows in case of //!< nested columns - const uint32_t *valid_map_base; //!< base of valid bit map for this column (null if not present) - size_type column_offset; //! < index of the first element relative to the base memory - const void *column_data_base; //!< base ptr to column data - int32_t ts_scale; //!< timestamp scale (>0: multiply by scale, <0: divide by -scale) + int32_t ts_scale; //!< timestamp scale (>0: multiply by scale, <0: divide by -scale) column_device_view *leaf_column; //!< Pointer to leaf column column_device_view *parent_column; //!< Pointer to parent column. Is nullptr if not list type. From eb92145d524735f63bb74afa8ca97669991bac1b Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Thu, 25 Mar 2021 10:15:07 -0400 Subject: [PATCH 07/24] cudf_kafka now uses cuDF CMake export targets (CPM) (#7674) This integrates the changes from https://github.com/rapidsai/cudf/pull/7484 plus others required to get `cudf_kafka` to build cleanly after the CMake refactoring of `cudf` Authors: - Robert Maynard (@robertmaynard) - Jeremy Dyer (@jdye64) Approvers: - Mike Wendt (@mike-wendt) - Keith Kraus (@kkraus14) URL: https://github.com/rapidsai/cudf/pull/7674 --- build.sh | 27 ++-- ci/release/update-version.sh | 3 + cpp/cmake/cudf-config.cmake.in | 4 +- cpp/libcudf_kafka/CMakeLists.txt | 128 +++--------------- .../cmake/Modules/ConfigureGoogleTest.cmake | 46 ------- .../Templates/GoogleTest.CMakeLists.txt.cmake | 12 -- .../cmake/thirdparty/CUDF_KAFKA_GetCUDF.cmake | 52 +++++++ .../thirdparty/CUDF_KAFKA_GetRDKafka.cmake | 25 ++++ cpp/libcudf_kafka/tests/CMakeLists.txt | 103 +++----------- 9 files changed, 132 insertions(+), 268 deletions(-) delete mode 100644 cpp/libcudf_kafka/cmake/Modules/ConfigureGoogleTest.cmake delete mode 100644 cpp/libcudf_kafka/cmake/Templates/GoogleTest.CMakeLists.txt.cmake create mode 100644 cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetCUDF.cmake create mode 100644 cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetRDKafka.cmake diff --git a/build.sh b/build.sh index bc49b76d44e..70b93427d5c 100755 --- a/build.sh +++ b/build.sh @@ -134,18 +134,20 @@ if hasArg clean; then done fi -if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then - CUDF_CMAKE_CUDA_ARCHITECTURES="-DCMAKE_CUDA_ARCHITECTURES=" - echo "Building for the architecture of the GPU in the system..." -else - CUDF_CMAKE_CUDA_ARCHITECTURES="" - echo "Building for *ALL* supported GPU architectures..." -fi ################################################################################ # Configure, build, and install libcudf if buildAll || hasArg libcudf; then + + if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then + CUDF_CMAKE_CUDA_ARCHITECTURES="-DCMAKE_CUDA_ARCHITECTURES=" + echo "Building for the architecture of the GPU in the system..." + else + CUDF_CMAKE_CUDA_ARCHITECTURES="" + echo "Building for *ALL* supported GPU architectures..." + fi + cmake -S $REPODIR/cpp -B ${LIB_BUILD_DIR} \ -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \ ${CUDF_CMAKE_CUDA_ARCHITECTURES} \ @@ -192,19 +194,16 @@ fi # Build libcudf_kafka library if hasArg libcudf_kafka; then cmake -S $REPODIR/cpp/libcudf_kafka -B ${KAFKA_LIB_BUILD_DIR} \ - ${CUDF_CMAKE_CUDA_ARCHITECTURES} \ -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \ + -DBUILD_TESTS=${BUILD_TESTS} \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} + cd ${KAFKA_LIB_BUILD_DIR} + cmake --build . -j${PARALLEL_LEVEL} ${VERBOSE_FLAG} + if [[ ${INSTALL_TARGET} != "" ]]; then cmake --build . -j${PARALLEL_LEVEL} --target install ${VERBOSE_FLAG} - else - cmake --build . -j${PARALLEL_LEVEL} --target libcudf_kafka ${VERBOSE_FLAG} - fi - - if [[ ${BUILD_TESTS} == "ON" ]]; then - cmake --build . -j${PARALLEL_LEVEL} --target build_tests_libcudf_kafka ${VERBOSE_FLAG} fi fi diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 61f551f4b6d..819a0dcf6bf 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -49,6 +49,9 @@ function sed_runner() { # cpp update sed_runner 's/'"CUDA_DATAFRAME VERSION .* LANGUAGES"'/'"CUDA_DATAFRAME VERSION ${NEXT_FULL_TAG} LANGUAGES"'/g' cpp/CMakeLists.txt +# cpp libcudf_kafka update +sed_runner 's/'"CUDA_KAFKA VERSION .* LANGUAGES"'/'"CUDA_KAFKA VERSION ${NEXT_FULL_TAG} LANGUAGES"'/g' cpp/libcudf_kafka/CMakeLists.txt + # doxyfile update sed_runner 's/PROJECT_NUMBER = .*/PROJECT_NUMBER = '${NEXT_FULL_TAG}'/g' cpp/doxygen/Doxyfile diff --git a/cpp/cmake/cudf-config.cmake.in b/cpp/cmake/cudf-config.cmake.in index 14f8a661c2f..6a280264d3c 100644 --- a/cpp/cmake/cudf-config.cmake.in +++ b/cpp/cmake/cudf-config.cmake.in @@ -23,7 +23,7 @@ targets: cudf::cudf - The main cudf library. This module offers an optional testing component which defines the -following IMPORTED GLOBAL targets: +following IMPORTED GLOBAL targets: cudf::cudftestutil - The main cudf testing library cudf::gmock @@ -37,7 +37,7 @@ Result Variables This module will set the following variables in your project:: - CUDF_FOUND + cudf_FOUND CUDF_VERSION CUDF_VERSION_MAJOR CUDF_VERSION_MINOR diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt index a307bf9d3f0..e178f5a6280 100644 --- a/cpp/libcudf_kafka/CMakeLists.txt +++ b/cpp/libcudf_kafka/CMakeLists.txt @@ -13,112 +13,51 @@ # See the License for the specific language governing permissions and # limitations under the License. #============================================================================= -cmake_minimum_required(VERSION 3.14 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18 FATAL_ERROR) -project(CUDA_KAFKA VERSION 0.15.0 LANGUAGES C CXX CUDA) - -# TODO: Since we have no actual CUDA code in cudf_kafka this should be removed in the future -# in favor of using FindCUDAToolkit to get the needed CUDA include headers -if(NOT CMAKE_CUDA_COMPILER) - message(SEND_ERROR "CMake cannot locate a CUDA compiler") -endif(NOT CMAKE_CUDA_COMPILER) - -################################################################################################### -# - build type ------------------------------------------------------------------------------------ - -# Set a default build type if none was specified -set(DEFAULT_BUILD_TYPE "Release") - -if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) - message(STATUS "Setting build type to '${DEFAULT_BUILD_TYPE}' since none specified.") - set(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE - STRING "Choose the type of build." FORCE) - # Set the possible values of build type for cmake-gui - set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS - "Debug" "Release" "MinSizeRel" "RelWithDebInfo") -endif(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) +project(CUDA_KAFKA VERSION 0.19.0 LANGUAGES CXX) ################################################################################################### -# - compiler options ------------------------------------------------------------------------------ - -set(CMAKE_CXX_STANDARD 14) -set(CMAKE_CXX_STANDARD_REQUIRED ON) +# - Build options +option(BUILD_TESTS "Build tests for libcudf_kafka" ON) -# To apply RUNPATH to transitive dependencies (this is a temporary solution) -set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--disable-new-dtags") -set(CMAKE_EXE_LINKER_FLAGS "-Wl,--disable-new-dtags") - -# Build options -option(BUILD_TESTS "Configure CMake to build tests" ON) +message(VERBOSE "CUDF_KAFKA: Build gtests: ${BUILD_TESTS}") ################################################################################################### -# - cmake modules --------------------------------------------------------------------------------- - -message(VERBOSE "CMAKE_CURRENT_SOURCE_DIR: ${CMAKE_CURRENT_SOURCE_DIR}") -set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/" ${CMAKE_MODULE_PATH}) +# - Dependencies -include(FeatureSummary) -include(CheckIncludeFiles) -include(CheckLibraryExists) +# CPM +include(../cmake/thirdparty/CUDF_GetCPM.cmake) -################################################################################################### -# - conda environment ----------------------------------------------------------------------------- +# libcudf +include(cmake/thirdparty/CUDF_KAFKA_GetCUDF.cmake) -if("$ENV{CONDA_BUILD}" STREQUAL "1") - set(CMAKE_SYSTEM_PREFIX_PATH "$ENV{BUILD_PREFIX};$ENV{PREFIX};${CMAKE_SYSTEM_PREFIX_PATH}") - set(CONDA_INCLUDE_DIRS "$ENV{BUILD_PREFIX}/include" "$ENV{PREFIX}/include") - set(CONDA_LINK_DIRS "$ENV{BUILD_PREFIX}/lib" "$ENV{PREFIX}/lib") - message(VERBOSE "Conda build detected, CMAKE_SYSTEM_PREFIX_PATH set to: ${CMAKE_SYSTEM_PREFIX_PATH}") -endif() +# librdkafka +include(cmake/thirdparty/CUDF_KAFKA_GetRDKafka.cmake) -################################################################################################### -# - add gtest ------------------------------------------------------------------------------------- +# # GTests if enabled +if (BUILD_TESTS) + # GoogleTest + include(../cmake/thirdparty/CUDF_GetGTest.cmake) -# TODO: This is currently using a nearly duplicate Google Test Module due to CMake source limitations. -# this should be standardized in the future to use the same Google Test Module as cudf -if(BUILD_TESTS) + # include CTest module -- automatically calls enable_testing() include(CTest) - include(ConfigureGoogleTest) - - if(GTEST_FOUND) - message(VERBOSE "Google C++ Testing Framework (Google Test) found in ${GTEST_ROOT}") - include_directories(${GTEST_INCLUDE_DIR}) - add_subdirectory(${CMAKE_SOURCE_DIR}/tests) - else() - message(AUTHOR_WARNING "Google C++ Testing Framework (Google Test) not found: automated tests are disabled.") - endif(GTEST_FOUND) -endif(BUILD_TESTS) - -message(VERBOSE "CUDF_KAFKA_TEST_LIST set to: ${CUDF_KAFKA_TEST_LIST}") + add_subdirectory(tests) +endif() ################################################################################################### # - include paths --------------------------------------------------------------------------------- -if(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES) - include_directories("${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}") -endif(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES) - include_directories("${CMAKE_BINARY_DIR}/include" - "${CMAKE_BINARY_DIR}/include/jit" "${CMAKE_SOURCE_DIR}/include" "${CMAKE_SOURCE_DIR}/src") -if(CONDA_INCLUDE_DIRS) - include_directories("${CONDA_INCLUDE_DIRS}") -endif(CONDA_INCLUDE_DIRS) - ################################################################################################### # - library paths --------------------------------------------------------------------------------- link_directories("${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported variable containing the link directories for nvcc "${CMAKE_BINARY_DIR}/lib" - "${CMAKE_BINARY_DIR}" - "${GTEST_LIBRARY_DIR}" - "${RMM_LIBRARY}") - -if(CONDA_LINK_DIRS) - link_directories("${CONDA_LINK_DIRS}") -endif(CONDA_LINK_DIRS) + "${CMAKE_BINARY_DIR}") ################################################################################################### # - library target -------------------------------------------------------------------------------- @@ -127,37 +66,12 @@ add_library(cudf_kafka SHARED src/kafka_consumer.cpp ) -set_target_properties(cudf_kafka PROPERTIES BUILD_RPATH "\$ORIGIN") - -# Include paths -include_directories("${CMAKE_SOURCE_DIR}/include" - "${CMAKE_CURRENT_SOURCE_DIR}/include/cudf") - -################################################################################################### -# cudf_kafka - librdkafka ------------------------------------------------------------------------- - -find_path(RDKAFKA_INCLUDE "librdkafka" HINTS "$ENV{RDKAFKA_ROOT}/include") -find_library(RDKAFKA++_LIBRARY "rdkafka++" HINTS "$ENV{RDKAFKA_ROOT}/lib" "$ENV{RDKAFKA_ROOT}/build") - -message(VERBOSE "RDKAFKA: RDKAFKA++_LIBRARY set to ${RDKAFKA++_LIBRARY}") -message(VERBOSE "RDKAFKA: RDKAFKA_INCLUDE set to ${RDKAFKA_INCLUDE}") - -target_link_libraries(cudf_kafka ${RDKAFKA++_LIBRARY}) -include_directories("${RDKAFKA_INCLUDE}") - ################################################################################################### # - cudf_kafka Install ---------------------------------------------------------------------------- -target_link_libraries(cudf_kafka cudf) +target_link_libraries(cudf_kafka cudf::cudf RDKAFKA::RDKAFKA) install(TARGETS cudf_kafka DESTINATION lib) install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include DESTINATION include) - -add_custom_target(build_tests_libcudf_kafka - DEPENDS ${CUDF_KAFKA_TEST_LIST}) - -add_custom_target(test_libcudf_kafka - COMMAND ctest - DEPENDS build_tests_libcudf_kafka) diff --git a/cpp/libcudf_kafka/cmake/Modules/ConfigureGoogleTest.cmake b/cpp/libcudf_kafka/cmake/Modules/ConfigureGoogleTest.cmake deleted file mode 100644 index e2b3aff8546..00000000000 --- a/cpp/libcudf_kafka/cmake/Modules/ConfigureGoogleTest.cmake +++ /dev/null @@ -1,46 +0,0 @@ -set(GTEST_ROOT "${CMAKE_BINARY_DIR}/googletest") - -set(GTEST_CMAKE_ARGS "") - -configure_file("${CMAKE_SOURCE_DIR}/cmake/Templates/GoogleTest.CMakeLists.txt.cmake" - "${GTEST_ROOT}/CMakeLists.txt") - -file(MAKE_DIRECTORY "${GTEST_ROOT}/build") -file(MAKE_DIRECTORY "${GTEST_ROOT}/install") - -execute_process(COMMAND ${CMAKE_COMMAND} -G ${CMAKE_GENERATOR} . - RESULT_VARIABLE GTEST_CONFIG - WORKING_DIRECTORY ${GTEST_ROOT}) - -if(GTEST_CONFIG) - message(FATAL_ERROR "Configuring GoogleTest failed: " ${GTEST_CONFIG}) -endif(GTEST_CONFIG) - -set(PARALLEL_BUILD -j) -if($ENV{PARALLEL_LEVEL}) - set(NUM_JOBS $ENV{PARALLEL_LEVEL}) - set(PARALLEL_BUILD "${PARALLEL_BUILD}${NUM_JOBS}") -endif($ENV{PARALLEL_LEVEL}) - -if(${NUM_JOBS}) - if(${NUM_JOBS} EQUAL 1) - message(VERBOSE "GTEST BUILD: Enabling Sequential CMake build") - elseif(${NUM_JOBS} GREATER 1) - message(VERBOSE "GTEST BUILD: Enabling Parallel CMake build with ${NUM_JOBS} jobs") - endif(${NUM_JOBS} EQUAL 1) -else() - message(VERBOSE "GTEST BUILD: Enabling Parallel CMake build with all threads") -endif(${NUM_JOBS}) - -execute_process(COMMAND ${CMAKE_COMMAND} --build .. -- ${PARALLEL_BUILD} - RESULT_VARIABLE GTEST_BUILD - WORKING_DIRECTORY ${GTEST_ROOT}/build) - -if(GTEST_BUILD) - message(FATAL_ERROR "Building GoogleTest failed: " ${GTEST_BUILD}) -endif(GTEST_BUILD) - -message(VERBOSE "GoogleTest installed here: " ${GTEST_ROOT}/install) -set(GTEST_INCLUDE_DIR "${GTEST_ROOT}/install/include") -set(GTEST_LIBRARY_DIR "${GTEST_ROOT}/install/lib") -set(GTEST_FOUND TRUE) diff --git a/cpp/libcudf_kafka/cmake/Templates/GoogleTest.CMakeLists.txt.cmake b/cpp/libcudf_kafka/cmake/Templates/GoogleTest.CMakeLists.txt.cmake deleted file mode 100644 index 07692cd3d32..00000000000 --- a/cpp/libcudf_kafka/cmake/Templates/GoogleTest.CMakeLists.txt.cmake +++ /dev/null @@ -1,12 +0,0 @@ -cmake_minimum_required(VERSION 3.12) - -include(ExternalProject) - -ExternalProject_Add(GoogleTest - GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG release-1.8.0 - GIT_SHALLOW true - SOURCE_DIR "${GTEST_ROOT}/googletest" - BINARY_DIR "${GTEST_ROOT}/build" - INSTALL_DIR "${GTEST_ROOT}/install" - CMAKE_ARGS ${GTEST_CMAKE_ARGS} -DCMAKE_INSTALL_PREFIX=${GTEST_ROOT}/install) diff --git a/cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetCUDF.cmake b/cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetCUDF.cmake new file mode 100644 index 00000000000..4796495413e --- /dev/null +++ b/cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetCUDF.cmake @@ -0,0 +1,52 @@ +#============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(cudfkafka_save_if_enabled var) + if(CUDF_KAFKA_${var}) + unset(${var} PARENT_SCOPE) + unset(${var} CACHE) + endif() +endfunction() + +function(cudfkafka_restore_if_enabled var) + if(CUDF_KAFKA_${var}) + set(${var} ON CACHE INTERNAL "" FORCE) + endif() +endfunction() + +function(find_and_configure_cudf VERSION) + cudfkafka_save_if_enabled(BUILD_TESTS) + cudfkafka_save_if_enabled(BUILD_BENCHMARKS) + CPMFindPackage(NAME cudf + VERSION ${VERSION} + GIT_REPOSITORY https://github.com/rapidsai/cudf.git + GIT_TAG branch-${VERSION} + GIT_SHALLOW TRUE + SOURCE_SUBDIR cpp + OPTIONS "BUILD_TESTS OFF" + "BUILD_BENCHMARKS OFF") + cudfkafka_restore_if_enabled(BUILD_TESTS) + cudfkafka_restore_if_enabled(BUILD_BENCHMARKS) + + if(NOT cudf_BINARY_DIR IN_LIST CMAKE_PREFIX_PATH) + list(APPEND CMAKE_PREFIX_PATH "${cudf_BINARY_DIR}") + set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} PARENT_SCOPE) + endif() + +endfunction() + +set(CUDF_KAFKA_MIN_VERSION_cudf 0.19) +find_and_configure_cudf(${CUDF_KAFKA_MIN_VERSION_cudf}) diff --git a/cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetRDKafka.cmake b/cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetRDKafka.cmake new file mode 100644 index 00000000000..5c07db66668 --- /dev/null +++ b/cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetRDKafka.cmake @@ -0,0 +1,25 @@ +#============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +find_path(RDKAFKA_INCLUDE "librdkafka" HINTS "$ENV{RDKAFKA_ROOT}/include") +find_library(RDKAFKA++_LIBRARY "rdkafka++" HINTS "$ENV{RDKAFKA_ROOT}/lib" "$ENV{RDKAFKA_ROOT}/build") + +if(RDKAFKA_INCLUDE AND RDKAFKA++_LIBRARY) + add_library(rdkafka INTERFACE) + target_link_libraries(rdkafka INTERFACE "${RDKAFKA++_LIBRARY}") + target_include_directories(rdkafka INTERFACE "${RDKAFKA_INCLUDE}") + add_library(RDKAFKA::RDKAFKA ALIAS rdkafka) +endif() \ No newline at end of file diff --git a/cpp/libcudf_kafka/tests/CMakeLists.txt b/cpp/libcudf_kafka/tests/CMakeLists.txt index af0ea1c8239..f556d36d9d2 100644 --- a/cpp/libcudf_kafka/tests/CMakeLists.txt +++ b/cpp/libcudf_kafka/tests/CMakeLists.txt @@ -1,5 +1,5 @@ #============================================================================= -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,100 +14,29 @@ # limitations under the License. #============================================================================= -cmake_minimum_required(VERSION 3.14 FATAL_ERROR) - -project(KAFKA_TESTS VERSION 0.15.0 LANGUAGES C CXX CUDA) - -# TODO: Since we have no actual CUDA code in cudf_kafka this should be removed in the future -# in favor of using FindCUDAToolkit to get the needed CUDA include headers -if(NOT CMAKE_CUDA_COMPILER) - message(SEND_ERROR "CMake cannot locate a CUDA compiler") -endif(NOT CMAKE_CUDA_COMPILER) - -################################################################################################### -# - build type ------------------------------------------------------------------------------------ - -# Set a default build type if none was specified -set(DEFAULT_BUILD_TYPE "Release") - -if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) - message(STATUS "Setting build type to '${DEFAULT_BUILD_TYPE}' since none specified.") - set(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE - STRING "Choose the type of build." FORCE) - # Set the possible values of build type for cmake-gui - set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS - "Debug" "Release" "MinSizeRel" "RelWithDebInfo") -endif(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) - -################################################################################################### -# - compiler options ------------------------------------------------------------------------------ - -set(CMAKE_CXX_STANDARD 14) -set(CMAKE_CXX_STANDARD_REQUIRED ON) - -# To apply RUNPATH to transitive dependencies (this is a temporary solution) -set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--disable-new-dtags") -set(CMAKE_EXE_LINKER_FLAGS "-Wl,--disable-new-dtags") - -################################################################################################### -# - conda environment ----------------------------------------------------------------------------- - -if("$ENV{CONDA_BUILD}" STREQUAL "1") - set(CMAKE_SYSTEM_PREFIX_PATH "$ENV{BUILD_PREFIX};$ENV{PREFIX};${CMAKE_SYSTEM_PREFIX_PATH}") - set(CONDA_INCLUDE_DIRS "$ENV{BUILD_PREFIX}/include" "$ENV{PREFIX}/include") - set(CONDA_LINK_DIRS "$ENV{BUILD_PREFIX}/lib" "$ENV{PREFIX}/lib") - message(STATUS "Conda build detected, CMAKE_SYSTEM_PREFIX_PATH set to: ${CMAKE_SYSTEM_PREFIX_PATH}") -endif() - ################################################################################################### # - compiler function ----------------------------------------------------------------------------- -set(CUDF_KAFKA_TEST_LIST CACHE INTERNAL "CUDF_KAFKA_TEST_LIST") - -function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC) - add_executable(${CMAKE_TEST_NAME} - ${CMAKE_TEST_SRC}) - set_target_properties(${CMAKE_TEST_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) - target_link_libraries(${CMAKE_TEST_NAME} gmock gtest gtest_main pthread cuda cudf_kafka) - set_target_properties(${CMAKE_TEST_NAME} PROPERTIES - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/gtests") +function(ConfigureTest CMAKE_TEST_NAME ) + add_executable(${CMAKE_TEST_NAME} ${ARGN}) + set_target_properties(${CMAKE_TEST_NAME} + PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$") + if(TARGET cudf::gmock_main) + target_link_libraries(${CMAKE_TEST_NAME} PRIVATE cudf::gmock_main cudf::gtest_main cudf_kafka) + else() + target_link_libraries(${CMAKE_TEST_NAME} PRIVATE GTest::gmock_main GTest::gtest_main cudf_kafka) + endif() + target_include_directories(${CMAKE_TEST_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../include) add_test(NAME ${CMAKE_TEST_NAME} COMMAND ${CMAKE_TEST_NAME}) - set(CUDF_KAFKA_TEST_LIST ${CUDF_KAFKA_TEST_LIST} ${CMAKE_TEST_NAME} CACHE INTERNAL "CUDF_KAFKA_TEST_LIST") -endfunction(ConfigureTest) +endfunction() ################################################################################################### -# - include paths --------------------------------------------------------------------------------- - -if(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES) - include_directories("${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}") -endif(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES) - -include_directories("${CMAKE_BINARY_DIR}/include" - "${CMAKE_SOURCE_DIR}/include" - "${CMAKE_SOURCE_DIR}../../../tests" - "${CMAKE_SOURCE_DIR}" - "${CMAKE_SOURCE_DIR}/src" - "${GTEST_INCLUDE_DIR}") - -if(CONDA_INCLUDE_DIRS) - include_directories("${CONDA_INCLUDE_DIRS}") -endif(CONDA_INCLUDE_DIRS) +# - Kafka host tests ---------------------------------------------------------------------------------- +ConfigureTest(KAFKA_HOST_TEST + kafka_consumer_tests.cpp) ################################################################################################### -# - library paths --------------------------------------------------------------------------------- - -link_directories("${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported variable containing the link directories for nvcc - "${CMAKE_BINARY_DIR}/lib" - "${CMAKE_BINARY_DIR}" - "${GTEST_LIBRARY_DIR}" - "${RMM_LIBRARY}") - -if(CONDA_LINK_DIRS) - link_directories("${CONDA_LINK_DIRS}") -endif(CONDA_LINK_DIRS) - +### enable testing ################################################################################ ################################################################################################### -# - create tests ---------------------------------------------------------------------------------- -ConfigureTest(CUDF_KAFKA_HOST_READ kafka_consumer_tests.cpp) enable_testing() From 1a1bd66d1e06e74bd44c5fed3c8dab35e94599dc Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 25 Mar 2021 07:16:03 -0700 Subject: [PATCH 08/24] Don't identify decimals as strings. (#7710) As documented in [this pandas issue](https://github.com/pandas-dev/pandas/issues/15585), `is_string_type` for pandas is not strict and will characterize a whole bunch of things as strings that aren't. For our purposes, this is problematic because basically all subclasses of `ExtensionDType` will be classified as strings by that function. This is definitely not appropriate, so I modified our version of `is_string_dtype` to explicitly reject all of our extension dtypes (previously it was only excluding categorical types). I'm not 100% confident that no other parts of the code base rely on the current (erroneous) behavior, but the cudf tests all passed for me locally and my attempt to trace all calls of `utils.is_string_dtype` all look to be places where the change gives more correct behavior, so I think our best bet is to just move forward with this change. Any problems that result from this change in the future due to other code relying on the current behavior should probably be characterized as bugs in the calling code and fixed there. The same goes for for external codes that relied on this behavior; this change is potentially breaking for them as well, but again is something that they should be addressing. Authors: - Vyas Ramasubramani (@vyasr) Approvers: - Keith Kraus (@kkraus14) URL: https://github.com/rapidsai/cudf/pull/7710 --- python/cudf/cudf/utils/dtypes.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py index 8875a36dba8..8af225ecb58 100644 --- a/python/cudf/cudf/utils/dtypes.py +++ b/python/cudf/cudf/utils/dtypes.py @@ -154,7 +154,15 @@ def is_numerical_dtype(obj): def is_string_dtype(obj): - return pd.api.types.is_string_dtype(obj) and not is_categorical_dtype(obj) + return ( + pd.api.types.is_string_dtype(obj) + # Reject all cudf extension types. + and not is_categorical_dtype(obj) + and not is_decimal_dtype(obj) + and not is_list_dtype(obj) + and not is_struct_dtype(obj) + and not is_interval_dtype(obj) + ) def is_datetime_dtype(obj): From a9b4705bea1d24417eab421215557b2462fc6452 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Fri, 26 Mar 2021 04:41:17 +1100 Subject: [PATCH 09/24] Replace device_vector with device_uvector in null_mask (#7715) Replaces remaining `device_vector` instances in null_mask.cu with `device_uvector`. Change the interface of `segmented_count_[un]set_bits` to take `host_span` instead of `std::vector`. Authors: - Mark Harris (@harrism) Approvers: - Jake Hemstad (@jrhemstad) - Vyas Ramasubramani (@vyasr) - Paul Taylor (@trxcllnt) URL: https://github.com/rapidsai/cudf/pull/7715 --- cpp/include/cudf/detail/null_mask.hpp | 4 +-- cpp/include/cudf/null_mask.hpp | 35 ++++++++++++--------------- cpp/src/bitmask/null_mask.cu | 26 ++++++++++---------- 3 files changed, 30 insertions(+), 35 deletions(-) diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp index b0870ef8d9a..77cb321a12c 100644 --- a/cpp/include/cudf/detail/null_mask.hpp +++ b/cpp/include/cudf/detail/null_mask.hpp @@ -53,7 +53,7 @@ void set_null_mask(bitmask_type *bitmask, * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ std::vector segmented_count_set_bits(bitmask_type const *bitmask, - std::vector const &indices, + host_span indices, rmm::cuda_stream_view stream); /** @@ -62,7 +62,7 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ std::vector segmented_count_unset_bits(bitmask_type const *bitmask, - std::vector const &indices, + host_span indices, rmm::cuda_stream_view stream); /** diff --git a/cpp/include/cudf/null_mask.hpp b/cpp/include/cudf/null_mask.hpp index 0d4de1a9beb..ae6c0cfdbd7 100644 --- a/cpp/include/cudf/null_mask.hpp +++ b/cpp/include/cudf/null_mask.hpp @@ -16,6 +16,7 @@ #pragma once #include +#include #include @@ -136,38 +137,32 @@ cudf::size_type count_unset_bits(bitmask_type const* bitmask, size_type start, s * `[indices[2*i], indices[(2*i)+1])` (where 0 <= i < indices.size() / 2). * * Returns an empty vector if `bitmask == nullptr`. + * * @throws cudf::logic_error if `indices.size() % 2 != 0` - * @throws cudf::logic_error if `indices[2*i] < 0 or - * indices[2*i] > indices[(2*i)+1]` - * - * @param[in] bitmask Bitmask residing in device memory whose bits will be - * counted - * @param[in] indices A vector of indices used to specify ranges to count the - * number of set bits - * @return std::vector A vector storing the number of non-zero bits - * in the specified ranges + * @throws cudf::logic_error if `indices[2*i] < 0 or indices[2*i] > indices[(2*i)+1]` + * + * @param[in] bitmask Bitmask residing in device memory whose bits will be counted + * @param[in] indices A host_span of indices specifying ranges to count the number of set bits + * @return A vector storing the number of non-zero bits in the specified ranges */ std::vector segmented_count_set_bits(bitmask_type const* bitmask, - std::vector const& indices); + host_span indices); /** * @brief Given a bitmask, counts the number of unset (0) bits in every range * `[indices[2*i], indices[(2*i)+1])` (where 0 <= i < indices.size() / 2). * * Returns an empty vector if `bitmask == nullptr`. + * * @throws cudf::logic_error if `indices.size() % 2 != 0` - * @throws cudf::logic_error if `indices[2*i] < 0 or - * indices[2*i] > indices[(2*i)+1]` - * - * @param[in] bitmask Bitmask residing in device memory whose bits will be - * counted - * @param[in] indices A vector of indices used to specify ranges to count the - * number of unset bits - * @return std::vector A vector storing the number of zero bits in - * the specified ranges + * @throws cudf::logic_error if `indices[2*i] < 0 or indices[2*i] > indices[(2*i)+1]` + * + * @param[in] bitmask Bitmask residing in device memory whose bits will be counted + * @param[in] indices A host_span of indices specifying ranges to count the number of unset bits + * @return A vector storing the number of zero bits in the specified ranges */ std::vector segmented_count_unset_bits(bitmask_type const* bitmask, - std::vector const& indices); + host_span indices); /** * @brief Creates a `device_buffer` from a slice of bitmask defined by a range diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index 845a5512c27..28d1411c30d 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -30,7 +31,6 @@ #include #include #include -#include #include #include @@ -466,7 +466,7 @@ cudf::size_type count_unset_bits(bitmask_type const *bitmask, } std::vector segmented_count_set_bits(bitmask_type const *bitmask, - std::vector const &indices, + host_span indices, rmm::cuda_stream_view stream) { CUDF_EXPECTS(indices.size() % 2 == 0, @@ -489,8 +489,8 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, } size_type num_ranges = indices.size() / 2; - thrust::host_vector h_first_indices(num_ranges); - thrust::host_vector h_last_indices(num_ranges); + std::vector h_first_indices(num_ranges); + std::vector h_last_indices(num_ranges); thrust::stable_partition_copy(thrust::seq, std::begin(indices), std::end(indices), @@ -499,9 +499,9 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, h_last_indices.begin(), [](auto i) { return (i % 2) == 0; }); - rmm::device_vector d_first_indices = h_first_indices; - rmm::device_vector d_last_indices = h_last_indices; - rmm::device_vector d_null_counts(num_ranges, 0); + auto d_first_indices = make_device_uvector_async(h_first_indices, stream); + auto d_last_indices = make_device_uvector_async(h_last_indices, stream); + rmm::device_uvector d_null_counts(num_ranges, stream); auto word_num_set_bits = thrust::make_transform_iterator( thrust::make_counting_iterator(0), @@ -510,12 +510,12 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, thrust::make_counting_iterator(0), // We cannot use lambda as cub::DeviceSegmentedReduce::Sum() requires // first_word_indices and last_word_indices to have the same type. - to_word_index(true, d_first_indices.data().get())); + to_word_index(true, d_first_indices.data())); auto last_word_indices = thrust::make_transform_iterator( thrust::make_counting_iterator(0), // We cannot use lambda as cub::DeviceSegmentedReduce::Sum() requires // first_word_indices and last_word_indices to have the same type. - to_word_index(false, d_last_indices.data().get())); + to_word_index(false, d_last_indices.data())); // first allocate temporary memroy @@ -560,7 +560,7 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, std::vector ret(num_ranges); CUDA_TRY(cudaMemcpyAsync(ret.data(), - d_null_counts.data().get(), + d_null_counts.data(), num_ranges * sizeof(size_type), cudaMemcpyDeviceToHost, stream.value())); @@ -571,7 +571,7 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, } std::vector segmented_count_unset_bits(bitmask_type const *bitmask, - std::vector const &indices, + host_span indices, rmm::cuda_stream_view stream) { if (indices.empty()) { @@ -669,7 +669,7 @@ cudf::size_type count_unset_bits(bitmask_type const *bitmask, size_type start, s // Count non-zero bits in the specified ranges std::vector segmented_count_set_bits(bitmask_type const *bitmask, - std::vector const &indices) + host_span indices) { CUDF_FUNC_RANGE(); return detail::segmented_count_set_bits(bitmask, indices, rmm::cuda_stream_default); @@ -677,7 +677,7 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, // Count zero bits in the specified ranges std::vector segmented_count_unset_bits(bitmask_type const *bitmask, - std::vector const &indices) + host_span indices) { CUDF_FUNC_RANGE(); return detail::segmented_count_unset_bits(bitmask, indices, rmm::cuda_stream_default); From 000978e1771a8a2f34b2d29dfe4c1884a4c729b7 Mon Sep 17 00:00:00 2001 From: skirui-source <71867292+skirui-source@users.noreply.github.com> Date: Thu, 25 Mar 2021 13:22:49 -0700 Subject: [PATCH 10/24] Add Python bindings for `lists::contains` (#7547) Authors: - @skirui-source - Keith Kraus (@kkraus14) Approvers: - Keith Kraus (@kkraus14) - Michael Wang (@isVoid) URL: https://github.com/rapidsai/cudf/pull/7547 --- python/cudf/cudf/_lib/cpp/lists/contains.pxd | 15 +++++++ python/cudf/cudf/_lib/lists.pyx | 24 +++++++++++ python/cudf/cudf/core/column/lists.py | 45 +++++++++++++++++++- python/cudf/cudf/tests/test_list.py | 35 +++++++++++++++ 4 files changed, 118 insertions(+), 1 deletion(-) create mode 100644 python/cudf/cudf/_lib/cpp/lists/contains.pxd diff --git a/python/cudf/cudf/_lib/cpp/lists/contains.pxd b/python/cudf/cudf/_lib/cpp/lists/contains.pxd new file mode 100644 index 00000000000..ec2f61d08fa --- /dev/null +++ b/python/cudf/cudf/_lib/cpp/lists/contains.pxd @@ -0,0 +1,15 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. + +from libcpp.memory cimport unique_ptr +from cudf._lib.cpp.scalar.scalar cimport scalar + +from cudf._lib.cpp.column.column cimport column +from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view + +from cudf._lib.cpp.column.column_view cimport column_view + +cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil: + cdef unique_ptr[column] contains( + lists_column_view lists, + scalar search_key, + ) except + diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx index 2971aad8313..7f745e58c67 100644 --- a/python/cudf/cudf/_lib/lists.pyx +++ b/python/cudf/cudf/_lib/lists.pyx @@ -17,6 +17,9 @@ from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.column.column cimport column +from cudf._lib.scalar cimport DeviceScalar +from cudf._lib.cpp.scalar.scalar cimport scalar + from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.cpp.types cimport size_type, order, null_order @@ -29,6 +32,8 @@ from cudf._lib.types cimport ( ) from cudf.core.dtypes import ListDtype +from cudf._lib.cpp.lists.contains cimport contains + from cudf._lib.cpp.lists.extract cimport extract_list_element @@ -93,6 +98,7 @@ def extract_element(Column col, size_type index): cdef shared_ptr[lists_column_view] list_view = ( make_shared[lists_column_view](col.view()) ) + cdef unique_ptr[column] c_result with nogil: @@ -100,3 +106,21 @@ def extract_element(Column col, size_type index): result = Column.from_unique_ptr(move(c_result)) return result + + +def contains_scalar(Column col, DeviceScalar search_key): + cdef shared_ptr[lists_column_view] list_view = ( + make_shared[lists_column_view](col.view()) + ) + cdef const scalar* search_key_value = search_key.get_raw_ptr() + + cdef unique_ptr[column] c_result + + with nogil: + c_result = move(contains( + list_view.get()[0], + search_key_value[0], + )) + + result = Column.from_unique_ptr(move(c_result)) + return result diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index 2204fbdea1f..b7f34e8c007 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -7,7 +7,12 @@ import cudf from cudf._lib.copying import segmented_gather -from cudf._lib.lists import count_elements, extract_element, sort_lists +from cudf._lib.lists import ( + contains_scalar, + count_elements, + extract_element, + sort_lists, +) from cudf.core.buffer import Buffer from cudf.core.column import ColumnBase, as_column, column from cudf.core.column.methods import ColumnMethodsMixin @@ -210,6 +215,44 @@ def get(self, index): else: raise IndexError("list index out of range") + def contains(self, search_key): + """ + Creates a column of bool values indicating whether the specified scalar + is an element of each row of a list column. + + Parameters + ---------- + search_key : scalar + element being searched for in each row of the list column + + Returns + ------- + Column + + Examples + -------- + >>> s = cudf.Series([[1, 2, 3], [3, 4, 5], [4, 5, 6]]) + >>> s.list.contains(4) + Series([False, True, True]) + dtype: bool + """ + try: + res = self._return_or_inplace( + contains_scalar(self._column, search_key.device_value) + ) + except RuntimeError as e: + if ( + "Type/Scale of search key does not" + "match list column element type" in str(e) + ): + raise TypeError( + "Type/Scale of search key does not" + "match list column element type" + ) from e + raise + else: + return res + @property def leaves(self): """ diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py index 2ab1382b34e..5645ce60596 100644 --- a/python/cudf/cudf/tests/test_list.py +++ b/python/cudf/cudf/tests/test_list.py @@ -246,3 +246,38 @@ def test_get_nulls(): with pytest.raises(IndexError, match="list index out of range"): sr = cudf.Series([[], [], []]) sr.list.get(100) + + +@pytest.mark.parametrize( + "data, scalar, expect", + [ + ([[1, 2, 3], []], 1, [True, False],), + ([[1, 2, 3], [], [3, 4, 5]], 6, [False, False, False],), + ([[1.0, 2.0, 3.0], None, []], 2.0, [True, None, False],), + ([[None, "b", "c"], [], ["b", "e", "f"]], "b", [True, False, True],), + ([[None, 2, 3], None, []], 1, [None, None, False]), + ([[None, "b", "c"], [], ["b", "e", "f"]], "d", [None, False, False],), + ], +) +def test_contains_scalar(data, scalar, expect): + sr = cudf.Series(data) + expect = cudf.Series(expect) + got = sr.list.contains(cudf.Scalar(scalar, sr.dtype.element_type)) + assert_eq(expect, got) + + +@pytest.mark.parametrize( + "data, expect", + [ + ([[1, 2, 3], []], [None, None],), + ([[1.0, 2.0, 3.0], None, []], [None, None, None],), + ([[None, 2, 3], [], None], [None, None, None],), + ([[1, 2, 3], [3, 4, 5]], [None, None],), + ([[], [], []], [None, None, None],), + ], +) +def test_contains_null_search_key(data, expect): + sr = cudf.Series(data) + expect = cudf.Series(expect, dtype="bool") + got = sr.list.contains(cudf.Scalar(cudf.NA, sr.dtype.element_type)) + assert_eq(expect, got) From b8f149ab76e81d813bc3f4672644828870d02c59 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Thu, 25 Mar 2021 15:35:37 -0500 Subject: [PATCH 11/24] Change JNI API to avoid loading native dependencies when creating sort order classes. (#7729) This is to address an issue we found in the Spark out of core sorting implementation. Authors: - Robert (Bobby) Evans (@revans2) Approvers: - Jason Lowe (@jlowe) URL: https://github.com/rapidsai/cudf/pull/7729 --- .../main/java/ai/rapids/cudf/OrderByArg.java | 59 ++++++++ java/src/main/java/ai/rapids/cudf/Table.java | 38 +---- .../test/java/ai/rapids/cudf/TableTest.java | 136 +++++++++--------- 3 files changed, 128 insertions(+), 105 deletions(-) create mode 100644 java/src/main/java/ai/rapids/cudf/OrderByArg.java diff --git a/java/src/main/java/ai/rapids/cudf/OrderByArg.java b/java/src/main/java/ai/rapids/cudf/OrderByArg.java new file mode 100644 index 00000000000..fbdd7035c76 --- /dev/null +++ b/java/src/main/java/ai/rapids/cudf/OrderByArg.java @@ -0,0 +1,59 @@ +/* + * + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package ai.rapids.cudf; + +import java.io.Serializable; + +/** + * Provides the ordering for specific columns. + */ +public final class OrderByArg implements Serializable { + final int index; + final boolean isDescending; + final boolean isNullSmallest; + + OrderByArg(int index, boolean isDescending, boolean isNullSmallest) { + this.index = index; + this.isDescending = isDescending; + this.isNullSmallest = isNullSmallest; + } + + public static OrderByArg asc(final int index) { + return new OrderByArg(index, false, false); + } + + public static OrderByArg desc(final int index) { + return new OrderByArg(index, true, false); + } + + public static OrderByArg asc(final int index, final boolean isNullSmallest) { + return new OrderByArg(index, false, isNullSmallest); + } + + public static OrderByArg desc(final int index, final boolean isNullSmallest) { + return new OrderByArg(index, true, isNullSmallest); + } + + @Override + public String toString() { + return "ORDER BY " + index + + (isDescending ? " DESC " : " ASC ") + + (isNullSmallest ? "NULL SMALLEST" : "NULL LARGEST"); + } +} diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java index 4da99d811f2..6e0b7d3bb94 100644 --- a/java/src/main/java/ai/rapids/cudf/Table.java +++ b/java/src/main/java/ai/rapids/cudf/Table.java @@ -25,7 +25,6 @@ import ai.rapids.cudf.HostColumnVector.StructType; import java.io.File; -import java.io.Serializable; import java.math.BigDecimal; import java.math.RoundingMode; import java.nio.ByteBuffer; @@ -1444,7 +1443,7 @@ public ColumnVector sortOrder(OrderByArg... args) { * responsible for cleaning up * the {@link ColumnVector} returned as part of the output {@link Table} *

- * Example usage: orderBy(true, Table.asc(0), Table.desc(3)...); + * Example usage: orderBy(true, OrderByArg.asc(0), OrderByArg.desc(3)...); * @param args Suppliers to initialize sortKeys. * @return Sorted Table */ @@ -1512,22 +1511,6 @@ public static Table merge(List

tables, OrderByArg... args) { return merge(tables.toArray(new Table[tables.size()]), args); } - public static OrderByArg asc(final int index) { - return new OrderByArg(index, false, false); - } - - public static OrderByArg desc(final int index) { - return new OrderByArg(index, true, false); - } - - public static OrderByArg asc(final int index, final boolean isNullSmallest) { - return new OrderByArg(index, false, isNullSmallest); - } - - public static OrderByArg desc(final int index, final boolean isNullSmallest) { - return new OrderByArg(index, true, isNullSmallest); - } - /** * Returns count aggregation with only valid values. * Null values are skipped. @@ -2093,25 +2076,6 @@ public static Table fromPackedTable(ByteBuffer metadata, DeviceMemoryBuffer data // HELPER CLASSES ///////////////////////////////////////////////////////////////////////////// - public static final class OrderByArg implements Serializable { - final int index; - final boolean isDescending; - final boolean isNullSmallest; - - OrderByArg(int index, boolean isDescending, boolean isNullSmallest) { - this.index = index; - this.isDescending = isDescending; - this.isNullSmallest = isNullSmallest; - } - - @Override - public String toString() { - return "ORDER BY " + index + - (isDescending ? " DESC " : " ASC ") + - (isNullSmallest ? "NULL SMALLEST" : "NULL LARGEST"); - } - } - /** * class to encapsulate indices and table */ diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index 4eee3e97e6e..b6350a207c1 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -369,9 +369,9 @@ void testMergeSimple() { .column(3, 2, 1, 2, null, 3, 5, 2) .column(1, 9, 7, 3, 5, 3, 1, 10) .build(); - Table sortedTable1 = table1.orderBy(Table.asc(0), Table.desc(1)); - Table sortedTable2 = table2.orderBy(Table.asc(0), Table.desc(1)); - Table merged = Table.merge(Arrays.asList(sortedTable1, sortedTable2), Table.asc(0), Table.desc(1))) { + Table sortedTable1 = table1.orderBy(OrderByArg.asc(0), OrderByArg.desc(1)); + Table sortedTable2 = table2.orderBy(OrderByArg.asc(0), OrderByArg.desc(1)); + Table merged = Table.merge(Arrays.asList(sortedTable1, sortedTable2), OrderByArg.asc(0), OrderByArg.desc(1))) { assertTablesAreEqual(expected, merged); } } @@ -388,7 +388,7 @@ void testOrderByAD() { .column(2, 1, 4, 3, 5) .column(9, 7, 5, 3, 1) .build(); - Table sortedTable = table.orderBy(Table.asc(0), Table.desc(1))) { + Table sortedTable = table.orderBy(OrderByArg.asc(0), OrderByArg.desc(1))) { assertTablesAreEqual(expected, sortedTable); } } @@ -405,7 +405,7 @@ void testSortOrderSimple() { .column(2, 1, 4, 3, 5) .column(9, 7, 5, 3, 1) .build(); - ColumnVector gatherMap = table.sortOrder(Table.asc(0), Table.desc(1)); + ColumnVector gatherMap = table.sortOrder(OrderByArg.asc(0), OrderByArg.desc(1)); Table sortedTable = table.gather(gatherMap)) { assertTablesAreEqual(expected, sortedTable); } @@ -423,7 +423,7 @@ void testOrderByDD() { .column(5, 4, 3, 2, 1) .column(1, 5, 3, 9, 7) .build(); - Table sortedTable = table.orderBy(Table.desc(0), Table.desc(1))) { + Table sortedTable = table.orderBy(OrderByArg.desc(0), OrderByArg.desc(1))) { assertTablesAreEqual(expected, sortedTable); } } @@ -442,7 +442,7 @@ void testOrderByWithNulls() { .column("1", "0", "2", "4", "3") .column(7, 9, 5, 1, 3) .build(); - Table sortedTable = table.orderBy(Table.asc(0), Table.desc(1))) { + Table sortedTable = table.orderBy(OrderByArg.asc(0), OrderByArg.desc(1))) { assertTablesAreEqual(expected, sortedTable); } } @@ -461,7 +461,7 @@ void testOrderByWithNullsAndStrings() { .column(null, null, 4, 3, 5) .column(9, 7, 5, 3, 1) .build(); - Table sortedTable = table.orderBy(Table.asc(0))) { + Table sortedTable = table.orderBy(OrderByArg.asc(0))) { assertTablesAreEqual(expected, sortedTable); } } @@ -867,7 +867,7 @@ void testLeftJoinWithNulls() { .column(null, null, 203, null, null, null, null, 201, 202, 204) // right .build(); Table joinedTable = leftTable.onColumns(0).leftJoin(rightTable.onColumns(0), true); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { assertTablesAreEqual(expected, orderedJoinedTable); } } @@ -891,7 +891,7 @@ void testLeftJoinOnNullKeys() { .build(); Table joinedTable = leftTable.onColumns(0).leftJoin(rightTable.onColumns(0)); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { assertTablesAreEqual(expectedResults, orderedJoinedTable); } @@ -902,7 +902,7 @@ void testLeftJoinOnNullKeys() { .build(); Table joinedTable = leftTable.onColumns(0).leftJoin(rightTable.onColumns(0), false); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { assertTablesAreEqual(expectedResults, orderedJoinedTable); } } @@ -919,7 +919,7 @@ void testLeftJoin() { .column( 20, 21, 22, 23, 24, 25, 26, 27, 28, 29) .build(); Table joinedTable = leftTable.onColumns(0).leftJoin(rightTable.onColumns(0), true); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true)); + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true)); Table expected = new Table.TestBuilder() .column(360, 326, 254, 306, 109, 361, 251, 335, 301, 317) // common .column( 10, 11, 12, 13, 14, 15, 16, 17, 18, 19) // left @@ -945,7 +945,7 @@ void testFullJoinWithNonCommonKeys() { .column(null, null, null, null, null, 201, 200, null, 203, 202, 204, 205) // right .build(); Table joinedTable = leftTable.onColumns(0).fullJoin(rightTable.onColumns(0), true); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(0, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(0, true))) { assertTablesAreEqual(expected, orderedJoinedTable); } } @@ -968,7 +968,7 @@ void testFullJoinOnNullKeys() { .column( 200, 202, 200, 202, null, null, null, null, null, 201, null, 203, 204, 205) // right .build(); Table joinedTable = leftTable.onColumns(0).fullJoin(rightTable.onColumns(0)); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(0, true), Table.asc(1, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(0, true), OrderByArg.asc(1, true))) { assertTablesAreEqual(expectedResults, orderedJoinedTable); } @@ -980,7 +980,7 @@ void testFullJoinOnNullKeys() { .build(); Table joinedTable = leftTable.onColumns(0).fullJoin(rightTable.onColumns(0), false); Table orderedJoinedTable = joinedTable.orderBy( - Table.asc(0, true), Table.asc(1, true), Table.asc(2, true))) { + OrderByArg.asc(0, true), OrderByArg.asc(1, true), OrderByArg.asc(2, true))) { assertTablesAreEqual(expectedResults, orderedJoinedTable); } } @@ -997,7 +997,7 @@ void testFullJoinWithOnlyCommonKeys() { .column(200, 201, 202, 203, 204, 205, 206, 207, 208, 209) .build(); Table joinedTable = leftTable.onColumns(0).fullJoin(rightTable.onColumns(new int[]{0}), true); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true)); + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true)); Table expected = new Table.TestBuilder() .column(360, 326, 254, 306, 109, 361, 251, 335, 301, 317) // common .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) // left @@ -1023,7 +1023,7 @@ void testInnerJoinWithNonCommonKeys() { .column(202, 200, 201, 203) // right .build(); Table joinedTable = leftTable.onColumns(0).innerJoin(rightTable.onColumns(0), true); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { assertTablesAreEqual(expected, orderedJoinedTable); } } @@ -1046,7 +1046,7 @@ void testInnerJoinOnNullKeys() { .column(202, 200, 201, 203) // right .build(); Table joinedTable = leftTable.onColumns(0).innerJoin(rightTable.onColumns(0)); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { assertTablesAreEqual(expected, orderedJoinedTable); } @@ -1057,7 +1057,7 @@ void testInnerJoinOnNullKeys() { .column(202, 200, 203) // right .build(); Table joinedTable = leftTable.onColumns(0).innerJoin(rightTable.onColumns(0), false); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true))){ + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))){ assertTablesAreEqual(expected, orderedJoinedTable); } } @@ -1074,7 +1074,7 @@ void testInnerJoinWithOnlyCommonKeys() { .column(200, 201, 202, 203, 204, 205, 206, 207, 208, 209) .build(); Table joinedTable = leftTable.onColumns(0).innerJoin(rightTable.onColumns(new int[]{0}), true); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true)); + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true)); Table expected = new Table.TestBuilder() .column(360, 326, 254, 306, 109, 361, 251, 335, 301, 317) // common .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) // left @@ -1099,7 +1099,7 @@ void testLeftSemiJoin() { .column(102, 107, 108, 109) .build(); Table joinedTable = leftTable.onColumns(0).leftSemiJoin(rightTable.onColumns(0), true); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { assertTablesAreEqual(expected, orderedJoinedTable); } } @@ -1116,7 +1116,7 @@ void testLeftSemiJoinWithNulls() { .column("20", "21", "22", "23", "24", "25", "26", "27", "28", "29") .build(); Table joinedTable = leftTable.onColumns(0, 2).leftSemiJoin(rightTable.onColumns(0, 1), true); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(0, true)); + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(0, true)); Table expected = new Table.TestBuilder() .column(254, 326, 361) .column(null, 11, 17) @@ -1143,7 +1143,7 @@ void testLeftSemiJoinOnNullKeys() { .column(102, 107, 108, 109) .build(); Table joinedTable = leftTable.onColumns(0).leftSemiJoin(rightTable.onColumns(0)); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { assertTablesAreEqual(expected, orderedJoinedTable); } @@ -1153,7 +1153,7 @@ void testLeftSemiJoinOnNullKeys() { .column(102, 107, 109) .build(); Table joinedTable = leftTable.onColumns(0).leftSemiJoin(rightTable.onColumns(0), false); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { assertTablesAreEqual(expected, orderedJoinedTable); } } @@ -1174,7 +1174,7 @@ void testLeftAntiJoin() { .column(100, 101, 103, 104, 105, 106) .build(); Table joinedTable = leftTable.onColumns(0).leftAntiJoin(rightTable.onColumns(0), true); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { assertTablesAreEqual(expected, orderedJoinedTable); } } @@ -1196,7 +1196,7 @@ void testLeftAntiJoinOnNullKeys() { .column(100, 101, 103, 104, 105, 106) .build(); Table joinedTable = leftTable.onColumns(0).leftAntiJoin(rightTable.onColumns(0)); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { assertTablesAreEqual(expected, orderedJoinedTable); } @@ -1206,7 +1206,7 @@ void testLeftAntiJoinOnNullKeys() { .column(100, 101, 103, 104, 105, 106, 108) .build(); Table joinedTable = leftTable.onColumns(0).leftAntiJoin(rightTable.onColumns(0), false); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { assertTablesAreEqual(expected, orderedJoinedTable); } } @@ -1224,7 +1224,7 @@ void testLeftAntiJoinWithNulls() { .column("20", "21", "22", "23", "24", "25", "26", "27", "28", "29") .build(); Table joinedTable = leftTable.onColumns(0, 2).leftAntiJoin(rightTable.onColumns(0, 1), true); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(2, true)); + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(2, true)); Table expected = new Table.TestBuilder() .column( 360, 326, null, 306, null, 251, 301, 317) .column( 10, 11, null, 13, 14, 16, 18, 19) @@ -1249,8 +1249,8 @@ void testCrossJoin() { Table joinedTable = leftTable.crossJoin(rightTable); Table orderedJoinedTable = joinedTable.orderBy( - Table.asc(0, true), - Table.asc(1, true))) { + OrderByArg.asc(0, true), + OrderByArg.asc(1, true))) { assertTablesAreEqual(expected, orderedJoinedTable); } } @@ -2297,7 +2297,7 @@ void testGroupByUniqueCount() { try (Table t3 = t1 .groupBy(0, 1) .aggregate(Aggregation.nunique().onColumn(0)); - Table sorted = t3.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); + Table sorted = t3.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); Table expected = new Table.TestBuilder() .column( "1", "1", "1", "1") .column( 0, 1, 3, 5) @@ -2318,7 +2318,7 @@ void testGroupByUniqueCountNulls() { try (Table t3 = t1 .groupBy(0, 1) .aggregate(Aggregation.nunique(true).onColumn(0)); - Table sorted = t3.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); + Table sorted = t3.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); Table expected = new Table.TestBuilder() .column( "1", "1", "1", "1") .column( 0, 1, 3, 5) @@ -2370,8 +2370,8 @@ void testWindowingCount() { .decimal32Column(-1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3) // Decimal GBY Key .decimal64Column(1, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L) // Decimal OBY Key .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); - Table decSorted = unsorted.orderBy(Table.asc(0), Table.asc(4), Table.asc(5)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); + Table decSorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(4), OrderByArg.asc(5)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -2406,8 +2406,8 @@ void testWindowingMin() { .decimal64Column(1, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L) // Decimal OBY Key .decimal64Column(2, 7L, 5L, 1L, 9L, 7L, 9L, 8L, 2L, 8L, 0L, 6L, 6L) // Decimal Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); - Table decSorted = unsorted.orderBy(Table.asc(0), Table.asc(4), Table.asc(5)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); + Table decSorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(4), OrderByArg.asc(5)); ColumnVector expectSortedAggCol = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6); ColumnVector expectDecSortedAggCol = ColumnVector.decimalFromLongs(2, 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6)) { ColumnVector sortedAggColumn = sorted.getColumn(3); @@ -2444,8 +2444,8 @@ void testWindowingMax() { .decimal64Column(1, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L) // Decimal OBY Key .decimal64Column(2, 7L, 5L, 1L, 9L, 7L, 9L, 8L, 2L, 8L, 0L, 6L, 6L) // Decimal Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); - Table decSorted = unsorted.orderBy(Table.asc(0), Table.asc(4), Table.asc(5)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); + Table decSorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(4), OrderByArg.asc(5)); ColumnVector expectSortedAggCol = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6); ColumnVector expectDecSortedAggCol = ColumnVector.decimalFromLongs(2, 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6)) { ColumnVector sortedAggColumn = sorted.getColumn(3); @@ -2479,7 +2479,7 @@ void testWindowingSum() { .column(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6) // OBY Key .column(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -2509,8 +2509,8 @@ void testWindowingRowNumber() { .decimal64Column(1, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L) // Decimal OBY Key .decimal64Column(2, 7L, 5L, 1L, 9L, 7L, 9L, 8L, 2L, 8L, 0L, 6L, 6L) // Decimal Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); - Table decSorted = unsorted.orderBy(Table.asc(0), Table.asc(4), Table.asc(5)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); + Table decSorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(4), OrderByArg.asc(5)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6); ColumnVector expectDecSortedAggColumn = ColumnVector.decimalFromLongs(2, 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6)) { ColumnVector sortedAggColumn = sorted.getColumn(3); @@ -2590,7 +2590,7 @@ void testWindowingCollect() { ).build(); ColumnVector expectSortedAggColumn = ColumnVector .fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, null, 0, 6, null)) { - try (Table sorted = raw.orderBy(Table.asc(0), Table.asc(1), Table.asc(2))) { + try (Table sorted = raw.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2))) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -2652,8 +2652,8 @@ void testWindowingLead() { .decimal64Column(-2, 7L, 5L, 1L, 9L, 7L, 9L, 8L, 2L, 8L, 0L, 6L, 6L) // Decimal Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); - Table decSorted = unsorted.orderBy(Table.asc(0), Table.asc(4), Table.asc(5)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); + Table decSorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(4), OrderByArg.asc(5)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6); ColumnVector expectDecSortedAggColumn = ColumnVector.decimalFromLongs(-2, 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6)) { ColumnVector sortedAggColumn = sorted.getColumn(3); @@ -2745,8 +2745,8 @@ void testWindowingLag() { .decimal64Column(-2, 7L, 5L, 1L, 9L, 7L, 9L, 8L, 2L, 8L, 0L, 6L, 6L) // Decimal Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); - Table decSorted = unsorted.orderBy(Table.asc(0), Table.asc(4), Table.asc(5)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); + Table decSorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(4), OrderByArg.asc(5)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6); ColumnVector decExpectSortedAggColumn = ColumnVector.decimalFromLongs(-2, 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6)) { ColumnVector sortedAggColumn = sorted.getColumn(3); @@ -2833,7 +2833,7 @@ void testWindowingMean() { .column( 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6) // OBY Key .column( 7, 5, 3, 7, 7, 9, 8, 4, 8, 0, 4, 8) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); ColumnVector expectedSortedAggCol = ColumnVector.fromBoxedInts(7, 5, 3, 7, 7, 9, 8, 4, 8, 0, 4, 8)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectedSortedAggCol, sortedAggColumn); @@ -2859,7 +2859,7 @@ void testWindowingOnMultipleDifferentColumns() { .column( 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6) // OBY Key .column( 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); ColumnVector expectedSortedAggCol = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectedSortedAggCol, sortedAggColumn); @@ -2909,7 +2909,7 @@ void testWindowingWithoutGroupByColumns() { .build(); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6)) { - try (Table sorted = unsorted.orderBy(Table.asc(0))) { + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0))) { ColumnVector sortedAggColumn = sorted.getColumn(1); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -2934,7 +2934,7 @@ void testTimeRangeWindowingCount() { .timestampDayColumn( 1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7) // Timestamp Key .column( 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -2962,7 +2962,7 @@ void testTimeRangeWindowingLead() { .timestampDayColumn( 1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7) // Timestamp Key .column( 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -2991,7 +2991,7 @@ void testTimeRangeWindowingMax() { .timestampDayColumn( 1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7) // Timestamp Key .column( 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -3029,7 +3029,7 @@ void testTimeRangeWindowingRowNumber() { .timestampDayColumn( 1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7) // Timestamp Key .column( 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -3056,7 +3056,7 @@ void testTimeRangeWindowingCountDescendingTimestamps() { .timestampDayColumn( 7, 6, 6, 5, 5, 4, 4, 3, 3, 3, 2, 1, 1) // Timestamp Key .column( 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.desc(2)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.desc(2)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -3093,7 +3093,7 @@ void testTimeRangeWindowingWithoutGroupByColumns() { try (Table unsorted = new Table.TestBuilder().timestampDayColumn( 1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7) // Timestamp Key .column( 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8)) { ColumnVector sortedAggColumn = sorted.getColumn(1); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -3137,7 +3137,7 @@ void testTimeRangeWindowingCountUnboundedPreceding() { .timestampDayColumn( 1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7) // Timestamp Key .column( 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -3167,7 +3167,7 @@ void testTimeRangeWindowingCountUnboundedASCWithNullsFirst() { .timestampDayColumn( X, X, X, 2, 3, 5, X, X, 1, 2, 4, 5, 7) // Timestamp Key .column( 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2, true)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2, true)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -3239,7 +3239,7 @@ void testTimeRangeWindowingCountUnboundedDESCWithNullsFirst() { .timestampDayColumn( X, X, X, 5, 3, 2, X, X, 7, 5, 4, 2, 1) // Timestamp Key .column( 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.desc(2, false)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.desc(2, false)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -3316,7 +3316,7 @@ void testTimeRangeWindowingCountUnboundedASCWithNullsLast() { .timestampDayColumn( 2, 3, 5, X, X, X, 1, 2, 4, 5, 7, X, X) // Timestamp Key .column( 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2, false)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2, false)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -3387,7 +3387,7 @@ void testTimeRangeWindowingCountUnboundedDESCWithNullsLast() { .timestampDayColumn( 5, 3, 2, X, X, X, 7, 5, 4, 2, 1, X, X) // Timestamp Key .column( 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.desc(2, true)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.desc(2, true)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -3463,7 +3463,7 @@ void testGroupByCountWithNulls() { .column( 1, 1, 1, null, 1, 1) .build()) { try (Table tmp = t1.groupBy(0).aggregate(count(1), count(2), count(3)); - Table t3 = tmp.orderBy(Table.asc(0, true)); + Table t3 = tmp.orderBy(OrderByArg.asc(0, true)); HostColumnVector groupCol = t3.getColumn(0).copyToHost(); HostColumnVector countCol = t3.getColumn(1).copyToHost(); HostColumnVector nullCountCol = t3.getColumn(2).copyToHost(); @@ -3500,7 +3500,7 @@ void testGroupByCountWithNullsIncluded() { .column( 1, 1, 1, null, 1, 1) .build()) { try (Table tmp = t1.groupBy(0).aggregate(count(1, true), count(2, true), count(3, true), count(3)); - Table t3 = tmp.orderBy(Table.asc(0, true)); + Table t3 = tmp.orderBy(OrderByArg.asc(0, true)); HostColumnVector groupCol = t3.getColumn(0).copyToHost(); HostColumnVector countCol = t3.getColumn(1).copyToHost(); HostColumnVector nullCountCol = t3.getColumn(2).copyToHost(); @@ -3547,7 +3547,7 @@ void testGroupByCountWithCollapsingNulls() { .build(); try (Table tmp = t1.groupBy(options, 0).aggregate(count(1), count(2), count(3)); - Table t3 = tmp.orderBy(Table.asc(0, true)); + Table t3 = tmp.orderBy(OrderByArg.asc(0, true)); HostColumnVector groupCol = t3.getColumn(0).copyToHost(); HostColumnVector countCol = t3.getColumn(1).copyToHost(); HostColumnVector nullCountCol = t3.getColumn(2).copyToHost(); @@ -3615,7 +3615,7 @@ void testGroupByArgMax() { try (Table t3 = t1.groupBy(0, 1) .aggregate(Aggregation.argMax().onColumn(2)); Table sorted = t3 - .orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); + .orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); Table expected = new Table.TestBuilder() .column(1, 1, 1, 1) .column(0, 1, 2, 3) @@ -3637,7 +3637,7 @@ void testGroupByArgMin() { try (Table t3 = t1.groupBy(0, 1) .aggregate(Aggregation.argMin().onColumn(2)); Table sorted = t3 - .orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); + .orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); Table expected = new Table.TestBuilder() .column(1, 1, 1, 1) .column(0, 1, 2, 3) @@ -3654,7 +3654,7 @@ void testGroupByMinBool() { .column(true, null, false, true, null, null) .column( 1, 1, 2, 2, 3, 3).build(); Table other = t1.groupBy(1).aggregate(min(0)); - Table ordered = other.orderBy(Table.asc(0)); + Table ordered = other.orderBy(OrderByArg.asc(0)); Table expected = new Table.TestBuilder() .column(1, 2, 3) .column (true, false, null) @@ -3669,7 +3669,7 @@ void testGroupByMaxBool() { .column(false, null, false, true, null, null) .column( 1, 1, 2, 2, 3, 3).build(); Table other = t1.groupBy(1).aggregate(max(0)); - Table ordered = other.orderBy(Table.asc(0)); + Table ordered = other.orderBy(OrderByArg.asc(0)); Table expected = new Table.TestBuilder() .column(1, 2, 3) .column (false, true, null) @@ -3695,7 +3695,7 @@ void testGroupByDuplicateAggregates() { .column( 1, 2, 2, 1).build()) { try (Table t3 = t1.groupBy(0, 1) .aggregate(max(2), min(2), min(2), max(2), min(2), count(1)); - Table t4 = t3.orderBy(Table.asc(2))) { + Table t4 = t3.orderBy(OrderByArg.asc(2))) { // verify t4 assertEquals(4, t4.getRowCount()); assertTablesAreEqual(t4, expected); From 20509d0b75cf33e0597325fe17426766ed366eb3 Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Fri, 26 Mar 2021 08:04:52 +0530 Subject: [PATCH 12/24] Add struct column support to cudf::sort and cudf::sorted_order (#7422) closes #7226 Add struct column support to `cudf::sort`, `cudf::sorted_order` struct is supported by flattening the struct into individual columns in table_view, null mask of struct is converted to boolean column with same null_mask. Authors: - Karthikeyan (@karthikeyann) Approvers: - Gera Shegalov (@gerashegalov) - David (@davidwendt) - Nghia Truong (@ttnghia) - Jake Hemstad (@jrhemstad) - Conor Hoekstra (@codereport) URL: https://github.com/rapidsai/cudf/pull/7422 --- .../cudf/column/column_device_view.cuh | 7 + cpp/include/cudf/table/row_operators.cuh | 46 ++- cpp/include/cudf/table/table_device_view.cuh | 8 +- cpp/include/cudf/table/table_view.hpp | 19 +- cpp/include/cudf/types.hpp | 6 +- cpp/src/sort/is_sorted.cu | 11 +- cpp/src/sort/sort_impl.cuh | 14 +- cpp/src/structs/utilities.cu | 101 ++++- cpp/src/structs/utilities.hpp | 20 + cpp/src/table/table_device_view.cu | 44 +- cpp/tests/interop/from_arrow_test.cpp | 4 +- cpp/tests/interop/to_arrow_test.cpp | 4 +- cpp/tests/sort/is_sorted_tests.cpp | 72 +++- cpp/tests/sort/sort_test.cpp | 382 +++++++++++++++++- cpp/tests/structs/structs_column_tests.cu | 14 +- 15 files changed, 707 insertions(+), 45 deletions(-) diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 5a02f5bbe55..14d44b77fad 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -472,6 +472,13 @@ class alignas(16) column_device_view : public detail::column_device_view_base { return d_children[child_index]; } + /** + * @brief Returns the number of child columns + * + * @return The number of child columns + */ + __host__ __device__ size_type num_child_columns() const noexcept { return _num_children; } + protected: column_device_view* d_children{}; ///< Array of `column_device_view` ///< objects in device memory. diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh index 04d215ff7cb..5af3c29a3d9 100644 --- a/cpp/include/cudf/table/row_operators.cuh +++ b/cpp/include/cudf/table/row_operators.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -91,6 +91,26 @@ __device__ weak_ordering relational_compare(Element lhs, Element rhs) return detail::compare_elements(lhs, rhs); } +/** + * @brief Compare the nulls according to null order. + * + * @param lhs_is_null boolean representing if lhs is null + * @param rhs_is_null boolean representing if lhs is null + * @param null_precedence null order + * @return Indicates the relationship between null in lhs and rhs columns. + */ +inline __device__ auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence) +{ + if (lhs_is_null and rhs_is_null) { // null (table_device_view const& lhs, + table_device_view const& rhs); +extern template bool is_relationally_comparable( + mutable_table_device_view const& lhs, mutable_table_device_view const& rhs); +} // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/table/table_view.hpp b/cpp/include/cudf/table/table_view.hpp index 083366cc310..5cdecab9115 100644 --- a/cpp/include/cudf/table/table_view.hpp +++ b/cpp/include/cudf/table/table_view.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -291,4 +291,21 @@ table_view scatter_columns(table_view const& source, std::vector const& map, table_view const& target); +namespace detail { +/** + * @brief Indicates whether respective columns in input tables are relationally comparable. + * + * @param lhs The first table + * @param rhs The second table (may be the same table as `lhs`) + * @return true all of respective columns on `lhs` and 'rhs` tables are comparable. + * @return false any of respective columns on `lhs` and 'rhs` tables are not comparable. + */ +template +bool is_relationally_comparable(TableView const& lhs, TableView const& rhs); + +extern template bool is_relationally_comparable(table_view const& lhs, + table_view const& rhs); +extern template bool is_relationally_comparable(mutable_table_view const& lhs, + mutable_table_view const& rhs); +} // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp index 7a3316a0571..727284194d8 100644 --- a/cpp/include/cudf/types.hpp +++ b/cpp/include/cudf/types.hpp @@ -260,12 +260,12 @@ class data_type { /** * @brief Returns the type identifier */ - CUDA_HOST_DEVICE_CALLABLE type_id id() const noexcept { return _id; } + constexpr type_id id() const noexcept { return _id; } /** * @brief Returns the scale (for fixed_point types) */ - CUDA_HOST_DEVICE_CALLABLE int32_t scale() const noexcept { return _fixed_point_scale; } + constexpr int32_t scale() const noexcept { return _fixed_point_scale; } private: type_id _id{type_id::EMPTY}; @@ -287,7 +287,7 @@ class data_type { * @return true `lhs` is equal to `rhs` * @return false `lhs` is not equal to `rhs` */ -inline bool operator==(data_type const& lhs, data_type const& rhs) +constexpr bool operator==(data_type const& lhs, data_type const& rhs) { // use std::tie in the future, breaks JITIFY currently return lhs.id() == rhs.id() && lhs.scale() == rhs.scale(); diff --git a/cpp/src/sort/is_sorted.cu b/cpp/src/sort/is_sorted.cu index 5c31e565530..d1a1169dae4 100644 --- a/cpp/src/sort/is_sorted.cu +++ b/cpp/src/sort/is_sorted.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -34,10 +35,12 @@ auto is_sorted(cudf::table_view const& in, std::vector const& null_precedence, rmm::cuda_stream_view stream) { - auto in_d = table_device_view::create(in); - rmm::device_vector d_column_order(column_order); + // 0-table_view, 1-column_order, 2-null_precedence, 3-validity_columns + auto flattened = structs::detail::flatten_nested_columns(in, column_order, null_precedence); + auto in_d = table_device_view::create(std::get<0>(flattened), stream); + rmm::device_vector d_column_order(std::get<1>(flattened)); rmm::device_vector const d_null_precedence = - (has_nulls) ? rmm::device_vector{null_precedence} + (has_nulls) ? rmm::device_vector{std::get<2>(flattened)} : rmm::device_vector{}; auto ineq_op = row_lexicographic_comparator( *in_d, *in_d, d_column_order.data().get(), d_null_precedence.data().get()); diff --git a/cpp/src/sort/sort_impl.cuh b/cpp/src/sort/sort_impl.cuh index 4fc83d343d5..506334c2a3d 100644 --- a/cpp/src/sort/sort_impl.cuh +++ b/cpp/src/sort/sort_impl.cuh @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include @@ -112,7 +114,7 @@ std::unique_ptr sorted_order(table_view input, 0); // fast-path for single column sort - if (input.num_columns() == 1) { + if (input.num_columns() == 1 and not cudf::is_nested(input.column(0).type())) { auto const single_col = input.column(0); auto const col_order = column_order.empty() ? order::ASCENDING : column_order.front(); auto const null_prec = null_precedence.empty() ? null_order::BEFORE : null_precedence.front(); @@ -120,11 +122,13 @@ std::unique_ptr sorted_order(table_view input, : sorted_order(single_col, col_order, null_prec, stream, mr); } - auto device_table = table_device_view::create(input, stream); - rmm::device_vector d_column_order(column_order); + auto flattened = structs::detail::flatten_nested_columns(input, column_order, null_precedence); + auto& input_flattened = std::get<0>(flattened); + auto device_table = table_device_view::create(input_flattened, stream); + rmm::device_vector d_column_order(std::get<1>(flattened)); - if (has_nulls(input)) { - rmm::device_vector d_null_precedence(null_precedence); + if (has_nulls(input_flattened)) { + rmm::device_vector d_null_precedence(std::get<2>(flattened)); auto comparator = row_lexicographic_comparator( *device_table, *device_table, d_column_order.data().get(), d_null_precedence.data().get()); if (stable) { diff --git a/cpp/src/structs/utilities.cu b/cpp/src/structs/utilities.cu index 274a88d3a05..174e36a1628 100644 --- a/cpp/src/structs/utilities.cu +++ b/cpp/src/structs/utilities.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,8 @@ #include #include +#include +#include #include #include @@ -57,6 +59,103 @@ std::vector> extract_ordered_struct_children( return result; } +/** + * @brief Flattens struct columns to constituent non-struct columns in the input table. + * + */ +struct flattened_table { + // reference variables + table_view const& input; + std::vector const& column_order; + std::vector const& null_precedence; + // output + std::vector> validity_as_column; + std::vector flat_columns; + std::vector flat_column_order; + std::vector flat_null_precedence; + + flattened_table(table_view const& input, + std::vector const& column_order, + std::vector const& null_precedence) + : input(input), column_order(column_order), null_precedence(null_precedence) + { + } + + // Convert null_mask to BOOL8 columns and flatten the struct children in order. + void flatten_struct_column(structs_column_view const& col, + order col_order, + null_order col_null_order) + { + if (col.nullable()) { + validity_as_column.push_back(cudf::is_valid(col)); + validity_as_column.back()->set_null_mask(copy_bitmask(col)); + flat_columns.push_back(validity_as_column.back()->view()); + if (not column_order.empty()) flat_column_order.push_back(col_order); // doesn't matter. + if (not null_precedence.empty()) flat_null_precedence.push_back(col_null_order); + } + for (decltype(col.num_children()) i = 0; i < col.num_children(); ++i) { + auto const& child = col.get_sliced_child(i); + if (child.type().id() == type_id::STRUCT) { + flatten_struct_column(structs_column_view{child}, col_order, null_order::BEFORE); + // default spark behaviour is null_order::BEFORE + } else { + flat_columns.push_back(child); + if (not column_order.empty()) flat_column_order.push_back(col_order); + if (not null_precedence.empty()) flat_null_precedence.push_back(null_order::BEFORE); + // default spark behaviour is null_order::BEFORE + } + } + } + // Note: possibly expand for flattening list columns too. + + /** + * @copydoc flattened_table + * + * @return tuple with flattened table, flattened column order, flattened null precedence, + * vector of boolean columns (struct validity). + */ + auto operator()() + { + for (auto i = 0; i < input.num_columns(); ++i) { + auto const& col = input.column(i); + if (col.type().id() == type_id::STRUCT) { + flatten_struct_column(structs_column_view{col}, + (column_order.empty() ? order() : column_order[i]), + (null_precedence.empty() ? null_order() : null_precedence[i])); + } else { + flat_columns.push_back(col); + if (not column_order.empty()) flat_column_order.push_back(column_order[i]); + if (not null_precedence.empty()) flat_null_precedence.push_back(null_precedence[i]); + } + } + + return std::make_tuple(table_view{flat_columns}, + std::move(flat_column_order), + std::move(flat_null_precedence), + std::move(validity_as_column)); + } +}; + +/** + * @copydoc cudf::detail::flatten_nested_columns + */ +std::tuple, + std::vector, + std::vector>> +flatten_nested_columns(table_view const& input, + std::vector const& column_order, + std::vector const& null_precedence) +{ + std::vector> validity_as_column; + auto const has_struct = std::any_of( + input.begin(), input.end(), [](auto const& col) { return col.type().id() == type_id::STRUCT; }); + if (not has_struct) + return std::make_tuple(input, column_order, null_precedence, std::move(validity_as_column)); + + return flattened_table{input, column_order, null_precedence}(); +} + } // namespace detail } // namespace structs } // namespace cudf diff --git a/cpp/src/structs/utilities.hpp b/cpp/src/structs/utilities.hpp index 613754fc765..c0111d0bbde 100644 --- a/cpp/src/structs/utilities.hpp +++ b/cpp/src/structs/utilities.hpp @@ -16,6 +16,7 @@ #pragma once #include +#include #include namespace cudf { @@ -48,6 +49,25 @@ namespace detail { std::vector> extract_ordered_struct_children( host_span struct_cols); +/** + * @brief Flatten table with struct columns to table with constituent columns of struct columns. + * + * If a table does not have struct columns, same input arguments are returned. + * + * @param input input table to be flattened + * @param column_order column order for input table + * @param null_precedence null order for input table + * @return tuple with flattened table, flattened column order, flattened null precedence, + * vector of boolean columns (struct validity). + */ +std::tuple, + std::vector, + std::vector>> +flatten_nested_columns(table_view const& input, + std::vector const& column_order, + std::vector const& null_precedence); + } // namespace detail } // namespace structs } // namespace cudf diff --git a/cpp/src/table/table_device_view.cu b/cpp/src/table/table_device_view.cu index bdce1c325c5..62daeed6d79 100644 --- a/cpp/src/table/table_device_view.cu +++ b/cpp/src/table/table_device_view.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,8 @@ #include +#include + namespace cudf { namespace detail { template @@ -52,5 +54,45 @@ template class table_device_view_base; // Explicit instantiation for a device table of mutable views template class table_device_view_base; +namespace { +struct is_relationally_comparable_impl { + template + constexpr bool operator()() + { + return cudf::is_relationally_comparable(); + } +}; +} // namespace + +template +bool is_relationally_comparable(TableView const& lhs, TableView const& rhs) +{ + return thrust::all_of(thrust::counting_iterator(0), + thrust::counting_iterator(lhs.num_columns()), + [lhs, rhs] __device__(auto const i) { + // Simplified this for compile time. (Ideally use double_type_dispatcher) + // TODO: possible to implement without double type dispatcher. + return lhs.column(i).type() == rhs.column(i).type() and + type_dispatcher(lhs.column(i).type(), + is_relationally_comparable_impl{}); + }); +} + +// Explicit extern template instantiation for a table of immutable views +extern template bool is_relationally_comparable(table_view const& lhs, + table_view const& rhs); + +// Explicit extern template instantiation for a table of mutable views +extern template bool is_relationally_comparable(mutable_table_view const& lhs, + mutable_table_view const& rhs); + +// Explicit extern template instantiation for a device table of immutable views +template bool is_relationally_comparable(table_device_view const& lhs, + table_device_view const& rhs); + +// Explicit extern template instantiation for a device table of mutable views +template bool is_relationally_comparable( + mutable_table_device_view const& lhs, mutable_table_device_view const& rhs); + } // namespace detail } // namespace cudf diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp index 9f5bbe2dcb9..d79307dcbf6 100644 --- a/cpp/tests/interop/from_arrow_test.cpp +++ b/cpp/tests/interop/from_arrow_test.cpp @@ -168,7 +168,7 @@ TEST_F(FromArrowTest, StructColumn) std::vector>{{"string", "integral", "bool", "nested_list", "struct"}}; auto str_col = cudf::test::strings_column_wrapper{ - "Samuel Vimes", "Carrot Ironfoundersson", "Angua von Uberwald"} + "Samuel Vimes", "Carrot Ironfoundersson", "Angua von Ăśberwald"} .release(); auto str_col2 = cudf::test::strings_column_wrapper{{"CUDF", "ROCKS", "EVERYWHERE"}, {0, 1, 0}}.release(); @@ -198,7 +198,7 @@ TEST_F(FromArrowTest, StructColumn) cudf::table_view expected_cudf_table({struct_col->view()}); // Create Arrow table - std::vector str{"Samuel Vimes", "Carrot Ironfoundersson", "Angua von Uberwald"}; + std::vector str{"Samuel Vimes", "Carrot Ironfoundersson", "Angua von Ăśberwald"}; std::vector str2{"CUDF", "ROCKS", "EVERYWHERE"}; auto str_array = get_arrow_array(str); auto int_array = get_arrow_array({48, 27, 25}); diff --git a/cpp/tests/interop/to_arrow_test.cpp b/cpp/tests/interop/to_arrow_test.cpp index c8e56711135..57275433516 100644 --- a/cpp/tests/interop/to_arrow_test.cpp +++ b/cpp/tests/interop/to_arrow_test.cpp @@ -270,7 +270,7 @@ TEST_F(ToArrowTest, StructColumn) std::vector>{{"string", "integral", "bool", "nested_list", "struct"}}; auto str_col = cudf::test::strings_column_wrapper{ - "Samuel Vimes", "Carrot Ironfoundersson", "Angua von Uberwald"} + "Samuel Vimes", "Carrot Ironfoundersson", "Angua von Ăśberwald"} .release(); auto str_col2 = cudf::test::strings_column_wrapper{{"CUDF", "ROCKS", "EVERYWHERE"}, {0, 1, 0}}.release(); @@ -306,7 +306,7 @@ TEST_F(ToArrowTest, StructColumn) metadata.children_meta = {{"string"}, {"integral"}, {"bool"}, {"nested_list"}, sub_metadata}; // Create Arrow table - std::vector str{"Samuel Vimes", "Carrot Ironfoundersson", "Angua von Uberwald"}; + std::vector str{"Samuel Vimes", "Carrot Ironfoundersson", "Angua von Ăśberwald"}; std::vector str2{"CUDF", "ROCKS", "EVERYWHERE"}; auto str_array = get_arrow_array(str); auto int_array = get_arrow_array({48, 27, 25}); diff --git a/cpp/tests/sort/is_sorted_tests.cpp b/cpp/tests/sort/is_sorted_tests.cpp index 1e6bb2a70fb..abc9a9bfe9e 100644 --- a/cpp/tests/sort/is_sorted_tests.cpp +++ b/cpp/tests/sort/is_sorted_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -162,6 +162,73 @@ auto nulls_before() return strings_column_wrapper({"identical", "identical"}, {0, 1}); } +// ----- struct_view {"nestedInt" : {"Int" : 0 }, "float" : 1} + +template +typename std::enable_if::value, structs_column_wrapper>::type +ascending() +{ + using T1 = int32_t; + auto int_col = fixed_width_column_wrapper({std::numeric_limits::lowest(), + T1(-100), + T1(-10), + T1(-10), + T1(0), + T1(10), + T1(10), + T1(100), + std::numeric_limits::max()}); + auto nestedInt_col = structs_column_wrapper{{int_col}}; + auto float_col = ascending(); + return structs_column_wrapper{{nestedInt_col, float_col}}; +} + +template +typename std::enable_if::value, structs_column_wrapper>::type +descending() +{ + using T1 = int32_t; + auto int_col = fixed_width_column_wrapper({std::numeric_limits::max(), + T1(100), + T1(10), + T1(10), + T1(0), + T1(-10), + T1(-10), + T1(-100), + std::numeric_limits::lowest()}); + auto nestedInt_col = structs_column_wrapper{{int_col}}; + auto float_col = descending(); + return structs_column_wrapper{{nestedInt_col, float_col}}; +} + +template <> +auto empty() +{ + auto int_col = fixed_width_column_wrapper(); + auto col1 = structs_column_wrapper{{int_col}}; + auto col2 = fixed_width_column_wrapper(); + return structs_column_wrapper{{col1, col2}}; +} + +template <> +auto nulls_after() +{ + auto int_col = fixed_width_column_wrapper({1, 1}); + auto col1 = structs_column_wrapper{{int_col}}; + auto col2 = fixed_width_column_wrapper({1, 1}); + return structs_column_wrapper{{col1, col2}, {1, 0}}; +} + +template <> +auto nulls_before() +{ + auto int_col = fixed_width_column_wrapper({1, 1}); + auto col1 = structs_column_wrapper{{int_col}}; + auto col2 = fixed_width_column_wrapper({1, 1}); + return structs_column_wrapper{{col1, col2}, {0, 1}}; +} + } // namespace testdata } // anonymous namespace @@ -172,7 +239,8 @@ template struct IsSortedTest : public BaseFixture { }; -TYPED_TEST_CASE(IsSortedTest, ComparableTypes); +using SupportedTypes = Concat>; +TYPED_TEST_CASE(IsSortedTest, SupportedTypes); TYPED_TEST(IsSortedTest, NoColumns) { diff --git a/cpp/tests/sort/sort_test.cpp b/cpp/tests/sort/sort_test.cpp index 5359014a831..9eb082c513c 100644 --- a/cpp/tests/sort/sort_test.cpp +++ b/cpp/tests/sort/sort_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -197,6 +197,386 @@ TYPED_TEST(Sort, WithAllValid) } } +TYPED_TEST(Sort, WithStructColumn) +{ + using T = TypeParam; + + std::initializer_list names = {"Samuel Vimes", + "Carrot Ironfoundersson", + "Angua von Ăśberwald", + "Cheery Littlebottom", + "Detritus", + "Mr Slant"}; + auto num_rows{std::distance(names.begin(), names.end())}; + auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()}; + auto ages_col = cudf::test::fixed_width_column_wrapper{{48, 27, 25, 31, 351, 351}}; + + auto is_human_col = cudf::test::fixed_width_column_wrapper{ + {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}}; + + auto struct_col = + cudf::test::structs_column_wrapper{{names_col, ages_col, is_human_col}}.release(); + auto struct_col_view{struct_col->view()}; + EXPECT_EQ(num_rows, struct_col->size()); + + fixed_width_column_wrapper col1{{5, 4, 3, 5, 8, 9}}; + strings_column_wrapper col2({"d", "e", "a", "d", "k", "a"}); + fixed_width_column_wrapper col3{{10, 40, 70, 5, 2, 20}}; + table_view input{{col1, col2, col3, struct_col_view}}; + + fixed_width_column_wrapper expected{{2, 1, 0, 3, 4, 5}}; + std::vector column_order{ + order::ASCENDING, order::ASCENDING, order::DESCENDING, order::ASCENDING}; + + auto got = sorted_order(input, column_order); + + // Skip validating bools order. Valid true bools are all + // equivalent, and yield random order after thrust::sort + if (!std::is_same::value) { + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); + + // Run test for sort and sort_by_key + run_sort_test(input, expected, column_order); + } else { + // Run test for sort and sort_by_key + fixed_width_column_wrapper expected_for_bool{{2, 5, 3, 0, 1, 4}}; + run_sort_test(input, expected_for_bool, column_order); + } +} + +TYPED_TEST(Sort, WithNestedStructColumn) +{ + using T = TypeParam; + + std::initializer_list names = {"Samuel Vimes", + "Carrot Ironfoundersson", + "Angua von Ăśberwald", + "Cheery Littlebottom", + "Detritus", + "Mr Slant"}; + std::vector v{1, 1, 0, 1, 1, 0}; + auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()}; + auto ages_col = cudf::test::fixed_width_column_wrapper{{48, 27, 25, 31, 351, 351}}; + auto is_human_col = cudf::test::fixed_width_column_wrapper{ + {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}}; + auto struct_col1 = cudf::test::structs_column_wrapper{{names_col, ages_col, is_human_col}, v}; + + auto ages_col2 = cudf::test::fixed_width_column_wrapper{{48, 27, 25, 31, 351, 351}}; + auto struct_col2 = cudf::test::structs_column_wrapper{{ages_col2, struct_col1}}.release(); + + auto struct_col_view{struct_col2->view()}; + + fixed_width_column_wrapper col1{{6, 6, 6, 6, 6, 6}}; + fixed_width_column_wrapper col2{{1, 1, 1, 2, 2, 2}}; + table_view input{{col1, col2, struct_col_view}}; + + fixed_width_column_wrapper expected{{3, 5, 4, 2, 1, 0}}; + std::vector column_order{order::ASCENDING, order::DESCENDING, order::ASCENDING}; + + auto got = sorted_order(input, column_order); + + // Skip validating bools order. Valid true bools are all + // equivalent, and yield random order after thrust::sort + if (!std::is_same::value) { + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); + + // Run test for sort and sort_by_key + run_sort_test(input, expected, column_order); + } else { + // Run test for sort and sort_by_key + fixed_width_column_wrapper expected_for_bool{{2, 5, 1, 3, 4, 0}}; + run_sort_test(input, expected_for_bool, column_order); + } +} + +TYPED_TEST(Sort, WithSingleStructColumn) +{ + using T = TypeParam; + + std::initializer_list names = {"Samuel Vimes", + "Carrot Ironfoundersson", + "Angua von Ăśberwald", + "Cheery Littlebottom", + "Detritus", + "Mr Slant"}; + std::vector v{1, 1, 0, 1, 1, 0}; + auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()}; + auto ages_col = cudf::test::fixed_width_column_wrapper{{48, 27, 25, 31, 351, 351}}; + auto is_human_col = cudf::test::fixed_width_column_wrapper{ + {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}}; + auto struct_col = + cudf::test::structs_column_wrapper{{names_col, ages_col, is_human_col}, v}.release(); + auto struct_col_view{struct_col->view()}; + table_view input{{struct_col_view}}; + + fixed_width_column_wrapper expected{{2, 5, 1, 3, 4, 0}}; + std::vector column_order{order::ASCENDING}; + + auto got = sorted_order(input, column_order); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); + + // Run test for sort and sort_by_key + run_sort_test(input, expected, column_order); +} + +TYPED_TEST(Sort, WithSlicedStructColumn) +{ + using T = TypeParam; + /* + /+-------------+ + | s| + +--------------+ + 0 | {"bbe", 1, 7}| + 1 | {"bbe", 1, 8}| + 2 | {"aaa", 0, 1}| + 3 | {"abc", 0, 1}| + 4 | {"ab", 0, 9}| + 5 | {"za", 2, 5}| + 6 | {"b", 1, 7}| + 7 | { @, 3, 3}| + +--------------+ + */ + // clang-format off + using FWCW = cudf::test::fixed_width_column_wrapper; + std::vector string_valids{ 1, 1, 1, 1, 1, 1, 1, 0}; + std::initializer_list names = {"bbe", "bbe", "aaa", "abc", "ab", "za", "b", "x"}; + auto col2 = FWCW{{ 1, 1, 0, 0, 0, 2, 1, 3}}; + auto col3 = FWCW{{ 7, 8, 1, 1, 9, 5, 7, 3}}; + auto col1 = cudf::test::strings_column_wrapper{names.begin(), names.end(), string_valids.begin()}; + auto struct_col = structs_column_wrapper{{col1, col2, col3}}.release(); + // clang-format on + auto struct_col_view{struct_col->view()}; + table_view input{{struct_col_view}}; + auto sliced_columns = cudf::split(struct_col_view, std::vector{3}); + auto sliced_tables = cudf::split(input, std::vector{3}); + std::vector column_order{order::ASCENDING}; + /* + asce_null_first sliced[3:] + /+-------------+ + | s| + +--------------+ + 7 | { @, 3, 3}| 7=4 + 2 | {"aaa", 0, 1}| + 4 | {"ab", 0, 9}| 4=1 + 3 | {"abc", 0, 1}| 3=0 + 6 | {"b", 1, 7}| 6=3 + 0 | {"bbe", 1, 7}| + 1 | {"bbe", 1, 8}| + 5 | {"za", 2, 5}| 5=2 + +--------------+ + */ + + // normal + fixed_width_column_wrapper expected{{7, 2, 4, 3, 6, 0, 1, 5}}; + auto got = sorted_order(input, column_order); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); + // Run test for sort and sort_by_key + run_sort_test(input, expected, column_order); + + // table with sliced column + table_view input2{{sliced_columns[1]}}; + fixed_width_column_wrapper expected2{{4, 1, 0, 3, 2}}; + got = sorted_order(input2, column_order); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got->view()); + // Run test for sort and sort_by_key + run_sort_test(input2, expected2, column_order); + + // sliced table[1] + fixed_width_column_wrapper expected3{{4, 1, 0, 3, 2}}; + got = sorted_order(sliced_tables[1], column_order); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected3, got->view()); + // Run test for sort and sort_by_key + run_sort_test(sliced_tables[1], expected3, column_order); + + // sliced table[0] + fixed_width_column_wrapper expected4{{2, 0, 1}}; + got = sorted_order(sliced_tables[0], column_order); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected4, got->view()); + // Run test for sort and sort_by_key + run_sort_test(sliced_tables[0], expected4, column_order); +} + +TYPED_TEST(Sort, SlicedColumns) +{ + using T = TypeParam; + using FWCW = cudf::test::fixed_width_column_wrapper; + + // clang-format off + std::vector string_valids{ 1, 1, 1, 1, 1, 1, 1, 0}; + std::initializer_list names = {"bbe", "bbe", "aaa", "abc", "ab", "za", "b", "x"}; + auto col2 = FWCW{{ 7, 8, 1, 1, 9, 5, 7, 3}}; + auto col1 = cudf::test::strings_column_wrapper{names.begin(), names.end(), string_valids.begin()}; + // clang-format on + table_view input{{col1, col2}}; + auto sliced_columns1 = cudf::split(col1, std::vector{3}); + auto sliced_columns2 = cudf::split(col1, std::vector{3}); + auto sliced_tables = cudf::split(input, std::vector{3}); + std::vector column_order{order::ASCENDING, order::ASCENDING}; + + // normal + // fixed_width_column_wrapper expected{{2, 3, 7, 5, 0, 6, 1, 4}}; + fixed_width_column_wrapper expected{{7, 2, 4, 3, 6, 0, 1, 5}}; + auto got = sorted_order(input, column_order); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); + // Run test for sort and sort_by_key + run_sort_test(input, expected, column_order); + + // table with sliced column + table_view input2{{sliced_columns1[1], sliced_columns2[1]}}; + // fixed_width_column_wrapper expected2{{0, 4, 2, 3, 1}}; + fixed_width_column_wrapper expected2{{4, 1, 0, 3, 2}}; + got = sorted_order(input2, column_order); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got->view()); + // Run test for sort and sort_by_key + run_sort_test(input2, expected2, column_order); +} + +TYPED_TEST(Sort, WithStructColumnCombinations) +{ + using T = TypeParam; + using FWCW = cudf::test::fixed_width_column_wrapper; + + // clang-format off + /* + +------------+ + | s| + +------------+ + 0 | {0, null}| + 1 | {1, null}| + 2 | null| + 3 |{null, null}| + 4 | null| + 5 |{null, null}| + 6 | {null, 1}| + 7 | {null, 0}| + +------------+ + */ + std::vector struct_valids{1, 1, 0, 1, 0, 1, 1, 1}; + auto col1 = FWCW{{ 0, 1, 9, -1, 9, -1, -1, -1}, {1, 1, 1, 0, 1, 0, 0, 0}}; + auto col2 = FWCW{{-1, -1, 9, -1, 9, -1, 1, 0}, {0, 0, 1, 0, 1, 0, 1, 1}}; + auto struct_col = cudf::test::structs_column_wrapper{{col1, col2}, struct_valids}.release(); + /* + desc_nulls_first desc_nulls_last asce_nulls_first asce_nulls_last + +------------+ +------------+ +------------+ +------------+ + | s| | s| | s| | s| + +------------+ +------------+ +------------+ +------------+ + 2 | null| 1 | {1, null}| 2 | null| 3 |{null, null}| + 4 | null| 0 | {0, null}| 4 | null| 5 |{null, null}| + 1 | {1, null}| 6 | {null, 1}| 3 |{null, null}| 7 | {null, 0}| + 0 | {0, null}| 7 | {null, 0}| 5 |{null, null}| 6 | {null, 1}| + 6 | {null, 1}| 3 |{null, null}| 7 | {null, 0}| 0 | {0, null}| + 7 | {null, 0}| 5 |{null, null}| 6 | {null, 1}| 1 | {1, null}| + 3 |{null, null}| 2 | null| 0 | {0, null}| 2 | null| + 5 |{null, null}| 4 | null| 1 | {1, null}| 4 | null| + +------------+ +------------+ +------------+ +------------+ + */ + // clang-format on + auto struct_col_view{struct_col->view()}; + table_view input{{struct_col_view}}; + std::vector column_order1{order::DESCENDING}; + + // desc_nulls_first + fixed_width_column_wrapper expected1{{2, 4, 1, 0, 6, 7, 3, 5}}; + auto got = sorted_order(input, column_order1, {null_order::AFTER}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, got->view()); + // Run test for sort and sort_by_key + run_sort_test(input, expected1, column_order1, {null_order::AFTER}); + + // desc_nulls_last + fixed_width_column_wrapper expected2{{1, 0, 6, 7, 3, 5, 2, 4}}; + got = sorted_order(input, column_order1, {null_order::BEFORE}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got->view()); + // Run test for sort and sort_by_key + run_sort_test(input, expected2, column_order1, {null_order::BEFORE}); + + // asce_nulls_first + std::vector column_order2{order::ASCENDING}; + fixed_width_column_wrapper expected3{{2, 4, 3, 5, 7, 6, 0, 1}}; + got = sorted_order(input, column_order2, {null_order::BEFORE}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected3, got->view()); + // Run test for sort and sort_by_key + run_sort_test(input, expected3, column_order2, {null_order::BEFORE}); + + // asce_nulls_last + fixed_width_column_wrapper expected4{{3, 5, 7, 6, 0, 1, 2, 4}}; + got = sorted_order(input, column_order2, {null_order::AFTER}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected4, got->view()); + // Run test for sort and sort_by_key + run_sort_test(input, expected4, column_order2, {null_order::AFTER}); +} + +TYPED_TEST(Sort, WithStructColumnCombinationsWithoutNulls) +{ + using T = TypeParam; + using FWCW = cudf::test::fixed_width_column_wrapper; + + // clang-format off + /* + +------------+ + | s| + +------------+ + 0 | {0, null}| + 1 | {1, null}| + 2 | {9, 9}| + 3 |{null, null}| + 4 | {9, 9}| + 5 |{null, null}| + 6 | {null, 1}| + 7 | {null, 0}| + +------------+ + */ + auto col1 = FWCW{{ 0, 1, 9, -1, 9, -1, -1, -1}, {1, 1, 1, 0, 1, 0, 0, 0}}; + auto col2 = FWCW{{-1, -1, 9, -1, 9, -1, 1, 0}, {0, 0, 1, 0, 1, 0, 1, 1}}; + auto struct_col = cudf::test::structs_column_wrapper{{col1, col2}}.release(); + /* (nested columns are always nulls_first, spark requirement) + desc_nulls_* asce_nulls_* + +------------+ +------------+ + | s| | s| + +------------+ +------------+ + 2 | {9, 9}| 3 |{null, null}| + 4 | {9, 9}| 5 |{null, null}| + 1 | {1, null}| 7 | {null, 0}| + 0 | {0, null}| 6 | {null, 1}| + 6 | {null, 1}| 0 | {0, null}| + 7 | {null, 0}| 1 | {1, null}| + 3 |{null, null}| 2 | {9, 9}| + 5 |{null, null}| 4 | {9, 9}| + +------------+ +------------+ + */ + // clang-format on + auto struct_col_view{struct_col->view()}; + table_view input{{struct_col_view}}; + std::vector column_order{order::DESCENDING}; + + // desc_nulls_first + fixed_width_column_wrapper expected1{{2, 4, 1, 0, 6, 7, 3, 5}}; + auto got = sorted_order(input, column_order, {null_order::AFTER}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, got->view()); + // Run test for sort and sort_by_key + run_sort_test(input, expected1, column_order, {null_order::AFTER}); + + // desc_nulls_last + got = sorted_order(input, column_order, {null_order::BEFORE}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, got->view()); + // Run test for sort and sort_by_key + run_sort_test(input, expected1, column_order, {null_order::BEFORE}); + + // asce_nulls_first + std::vector column_order2{order::ASCENDING}; + fixed_width_column_wrapper expected2{{3, 5, 7, 6, 0, 1, 2, 4}}; + got = sorted_order(input, column_order2, {null_order::BEFORE}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got->view()); + // Run test for sort and sort_by_key + run_sort_test(input, expected2, column_order2, {null_order::BEFORE}); + + // asce_nulls_last + got = sorted_order(input, column_order2, {null_order::AFTER}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got->view()); + // Run test for sort and sort_by_key + run_sort_test(input, expected2, column_order2, {null_order::AFTER}); +} + TYPED_TEST(Sort, Stable) { using T = TypeParam; diff --git a/cpp/tests/structs/structs_column_tests.cu b/cpp/tests/structs/structs_column_tests.cu index 2a0856133ba..e1438c33044 100644 --- a/cpp/tests/structs/structs_column_tests.cu +++ b/cpp/tests/structs/structs_column_tests.cu @@ -68,7 +68,7 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestColumnFactoryConstruction) { auto names_col = cudf::test::strings_column_wrapper{ - "Samuel Vimes", "Carrot Ironfoundersson", "Angua von Uberwald"} + "Samuel Vimes", "Carrot Ironfoundersson", "Angua von Ăśberwald"} .release(); int num_rows{names_col->size()}; @@ -95,7 +95,7 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestColumnFactoryConstruction) // Check child columns for exactly correct values. vector_of_columns expected_children; expected_children.emplace_back(cudf::test::strings_column_wrapper{ - "Samuel Vimes", "Carrot Ironfoundersson", "Angua von Uberwald"} + "Samuel Vimes", "Carrot Ironfoundersson", "Angua von Ăśberwald"} .release()); expected_children.emplace_back( cudf::test::fixed_width_column_wrapper{48, 27, 25}.release()); @@ -116,7 +116,7 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestColumnWrapperConstruction) { std::initializer_list names = {"Samuel Vimes", "Carrot Ironfoundersson", - "Angua von Uberwald", + "Angua von Ăśberwald", "Cheery Littlebottom", "Detritus", "Mr Slant"}; @@ -174,7 +174,7 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestStructsContainingLists) std::initializer_list names = {"Samuel Vimes", "Carrot Ironfoundersson", - "Angua von Uberwald", + "Angua von Ăśberwald", "Cheery Littlebottom", "Detritus", "Mr Slant"}; @@ -234,7 +234,7 @@ TYPED_TEST(TypedStructColumnWrapperTest, StructOfStructs) auto names = {"Samuel Vimes", "Carrot Ironfoundersson", - "Angua von Uberwald", + "Angua von Ăśberwald", "Cheery Littlebottom", "Detritus", "Mr Slant"}; @@ -300,7 +300,7 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestNullMaskPropagationForNonNullStruct auto names = {"Samuel Vimes", "Carrot Ironfoundersson", - "Angua von Uberwald", + "Angua von Ăśberwald", "Cheery Littlebottom", "Detritus", "Mr Slant"}; @@ -393,7 +393,7 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestListsOfStructs) std::initializer_list names = {"Samuel Vimes", "Carrot Ironfoundersson", - "Angua von Uberwald", + "Angua von Ăśberwald", "Cheery Littlebottom", "Detritus", "Mr Slant"}; From e2693e054566b7bd3c0892a8dfa07d7414967bfc Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 25 Mar 2021 22:06:29 -0700 Subject: [PATCH 13/24] Expose libcudf's label_bins function to cudf (#7724) This PR is a follow-up to #7554 and exposes the feature implemented there via Cython for consumption in cudf's Python API. Authors: - Vyas Ramasubramani (@vyasr) Approvers: - Keith Kraus (@kkraus14) URL: https://github.com/rapidsai/cudf/pull/7724 --- python/cudf/cudf/_lib/cpp/labeling.pxd | 19 +++++++++++ python/cudf/cudf/_lib/labeling.pyx | 47 ++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 python/cudf/cudf/_lib/cpp/labeling.pxd create mode 100644 python/cudf/cudf/_lib/labeling.pyx diff --git a/python/cudf/cudf/_lib/cpp/labeling.pxd b/python/cudf/cudf/_lib/cpp/labeling.pxd new file mode 100644 index 00000000000..996ae4f9e38 --- /dev/null +++ b/python/cudf/cudf/_lib/cpp/labeling.pxd @@ -0,0 +1,19 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. + +from libcpp.memory cimport unique_ptr + +from cudf._lib.cpp.column.column cimport column +from cudf._lib.cpp.column.column_view cimport column_view + +cdef extern from "cudf/labeling/label_bins.hpp" namespace "cudf" nogil: + ctypedef enum inclusive: + YES "cudf::inclusive::YES" + NO "cudf::inclusive::NO" + + cdef unique_ptr[column] label_bins ( + const column_view &input, + const column_view &left_edges, + inclusive left_inclusive, + const column_view &right_edges, + inclusive right_inclusive + ) except + diff --git a/python/cudf/cudf/_lib/labeling.pyx b/python/cudf/cudf/_lib/labeling.pyx new file mode 100644 index 00000000000..1b553024347 --- /dev/null +++ b/python/cudf/cudf/_lib/labeling.pyx @@ -0,0 +1,47 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. + +import numpy as np +from enum import IntEnum + +from libc.stdint cimport uint32_t +from libcpp cimport bool as cbool +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move + +from cudf._lib.column cimport Column +from cudf._lib.replace import replace_nulls + +from cudf._lib.cpp.labeling cimport inclusive +from cudf._lib.cpp.labeling cimport label_bins as cpp_label_bins +from cudf._lib.cpp.column.column cimport column +from cudf._lib.cpp.column.column_view cimport column_view + + +# Note that the parameter input shadows a Python built-in in the local scope, +# but I'm not too concerned about that since there's no use-case for actual +# input in this context. +def label_bins(Column input, Column left_edges, cbool left_inclusive, + Column right_edges, cbool right_inclusive): + cdef inclusive c_left_inclusive = \ + inclusive.YES if left_inclusive else inclusive.NO + cdef inclusive c_right_inclusive = \ + inclusive.YES if right_inclusive else inclusive.NO + + cdef column_view input_view = input.view() + cdef column_view left_edges_view = left_edges.view() + cdef column_view right_edges_view = right_edges.view() + + cdef unique_ptr[column] c_result + + with nogil: + c_result = move( + cpp_label_bins( + input_view, + left_edges_view, + c_left_inclusive, + right_edges_view, + c_right_inclusive, + ) + ) + + return Column.from_unique_ptr(move(c_result)) From b0e350b205bf50a405889b6fc0207f146f96ac81 Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Fri, 26 Mar 2021 13:15:08 +0530 Subject: [PATCH 14/24] Cleanup groupby to use host_span, device_span, device_uvector (#7698) addresses part of https://github.com/rapidsai/cudf/issues/7287 - replaced `std::vector const&` with `host_span` - replaced `rmm::device_[u]vector const&` with `device_span` - replaced `rmm::device_vector` with `rmm::device_uvector` Authors: - Karthikeyan (@karthikeyann) Approvers: - Jake Hemstad (@jrhemstad) - Nghia Truong (@ttnghia) - Mark Harris (@harrism) URL: https://github.com/rapidsai/cudf/pull/7698 --- cpp/include/cudf/detail/groupby.hpp | 7 +++--- cpp/include/cudf/groupby.hpp | 11 +++++---- cpp/src/groupby/common/utils.hpp | 3 ++- cpp/src/groupby/groupby.cu | 10 ++++---- cpp/src/groupby/hash/groupby.cu | 28 +++++++++++------------ cpp/src/groupby/sort/aggregate.cpp | 2 +- cpp/src/groupby/sort/group_nth_element.cu | 10 ++++---- cpp/src/groupby/sort/group_quantiles.cu | 8 +++---- cpp/src/groupby/sort/scan.cpp | 2 +- 9 files changed, 42 insertions(+), 39 deletions(-) diff --git a/cpp/include/cudf/detail/groupby.hpp b/cpp/include/cudf/detail/groupby.hpp index ce5fdb92bd1..36a76c7b6de 100644 --- a/cpp/include/cudf/detail/groupby.hpp +++ b/cpp/include/cudf/detail/groupby.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #include #include +#include #include @@ -36,12 +37,12 @@ namespace hash { * @return true A hash-based groupby can be used * @return false A hash-based groupby cannot be used */ -bool can_use_hash_groupby(table_view const& keys, std::vector const& requests); +bool can_use_hash_groupby(table_view const& keys, host_span requests); // Hash-based groupby std::pair, std::vector> groupby( table_view const& keys, - std::vector const& requests, + host_span requests, null_policy include_null_keys, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); diff --git a/cpp/include/cudf/groupby.hpp b/cpp/include/cudf/groupby.hpp index 1dfacd53e0d..19f87873873 100644 --- a/cpp/include/cudf/groupby.hpp +++ b/cpp/include/cudf/groupby.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -163,7 +164,7 @@ class groupby { * specified in `requests`. */ std::pair, std::vector> aggregate( - std::vector const& requests, + host_span requests, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -218,7 +219,7 @@ class groupby { * specified in `requests`. */ std::pair, std::vector> scan( - std::vector const& requests, + host_span requests, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -277,18 +278,18 @@ class groupby { * aggregation requests. */ std::pair, std::vector> dispatch_aggregation( - std::vector const& requests, + host_span requests, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); // Sort-based groupby std::pair, std::vector> sort_aggregate( - std::vector const& requests, + host_span requests, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); std::pair, std::vector> sort_scan( - std::vector const& requests, + host_span requests, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); }; diff --git a/cpp/src/groupby/common/utils.hpp b/cpp/src/groupby/common/utils.hpp index 40bc96c6103..e8d5c60f81a 100644 --- a/cpp/src/groupby/common/utils.hpp +++ b/cpp/src/groupby/common/utils.hpp @@ -18,13 +18,14 @@ #include #include +#include #include namespace cudf { namespace groupby { namespace detail { inline std::vector extract_results( - std::vector const& requests, cudf::detail::result_cache& cache) + host_span requests, cudf::detail::result_cache& cache) { std::vector results(requests.size()); diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index cdd8ceb0a6c..0312d17a37c 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -55,7 +55,7 @@ groupby::groupby(table_view const& keys, // Select hash vs. sort groupby implementation std::pair, std::vector> groupby::dispatch_aggregation( - std::vector const& requests, + host_span requests, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -79,7 +79,7 @@ groupby::~groupby() = default; namespace { /// Make an empty table with appropriate types for requested aggs -auto empty_results(std::vector const& requests) +auto empty_results(host_span requests) { std::vector empty_results; @@ -102,7 +102,7 @@ auto empty_results(std::vector const& requests) } /// Verifies the agg requested on the request's values is valid -void verify_valid_requests(std::vector const& requests) +void verify_valid_requests(host_span requests) { CUDF_EXPECTS( std::all_of( @@ -143,7 +143,7 @@ void verify_valid_requests(std::vector const& requests) // Compute aggregation requests std::pair, std::vector> groupby::aggregate( - std::vector const& requests, rmm::mr::device_memory_resource* mr) + host_span requests, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); CUDF_EXPECTS( @@ -161,7 +161,7 @@ std::pair, std::vector> groupby::aggr // Compute scan requests std::pair, std::vector> groupby::scan( - std::vector const& requests, rmm::mr::device_memory_resource* mr) + host_span requests, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); CUDF_EXPECTS( diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index c54ecee9ccb..38aacbe59a7 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -110,7 +110,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final data_type result_type; cudf::detail::result_cache* sparse_results; cudf::detail::result_cache* dense_results; - rmm::device_vector const& gather_map; + device_span gather_map; size_type const map_size; Map const& map; bitmask_type const* __restrict__ row_bitmask; @@ -122,7 +122,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final column_view col, cudf::detail::result_cache* sparse_results, cudf::detail::result_cache* dense_results, - rmm::device_vector const& gather_map, + device_span gather_map, size_type map_size, Map const& map, bitmask_type const* row_bitmask, @@ -272,7 +272,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final // flatten aggs to filter in single pass aggs std::tuple, std::vector> -flatten_single_pass_aggs(std::vector const& requests) +flatten_single_pass_aggs(host_span requests) { std::vector columns; std::vector agg_kinds; @@ -311,10 +311,10 @@ flatten_single_pass_aggs(std::vector const& requests) */ template void sparse_to_dense_results(table_view const& keys, - std::vector const& requests, + host_span requests, cudf::detail::result_cache* sparse_results, cudf::detail::result_cache* dense_results, - rmm::device_vector const& gather_map, + device_span gather_map, size_type map_size, Map const& map, bool keys_have_nulls, @@ -421,7 +421,7 @@ auto create_sparse_results_table(table_view const& flattened_values, */ template void compute_single_pass_aggs(table_view const& keys, - std::vector const& requests, + host_span requests, cudf::detail::result_cache* sparse_results, Map& map, null_policy include_null_keys, @@ -469,10 +469,10 @@ void compute_single_pass_aggs(table_view const& keys, * `map`. */ template -std::pair, size_type> extract_populated_keys( +std::pair, size_type> extract_populated_keys( Map map, size_type num_keys, rmm::cuda_stream_view stream) { - rmm::device_vector populated_keys(num_keys); + rmm::device_uvector populated_keys(num_keys, stream); auto get_key = [] __device__(auto const& element) { size_type key, value; @@ -520,7 +520,7 @@ std::pair, size_type> extract_populated_keys( */ template std::unique_ptr
groupby_null_templated(table_view const& keys, - std::vector const& requests, + host_span requests, cudf::detail::result_cache* cache, null_policy include_null_keys, rmm::cuda_stream_view stream, @@ -539,9 +539,9 @@ std::unique_ptr
groupby_null_templated(table_view const& keys, // Extract the populated indices from the hash map and create a gather map. // Gathering using this map from sparse results will give dense results. - rmm::device_vector gather_map; - size_type map_size; - std::tie(gather_map, map_size) = extract_populated_keys(*map, keys.num_rows(), stream); + auto map_and_size = extract_populated_keys(*map, keys.num_rows(), stream); + rmm::device_uvector gather_map{std::move(map_and_size.first)}; + size_type const map_size = map_and_size.second; // Compact all results from sparse_results and insert into cache sparse_to_dense_results(keys, @@ -576,7 +576,7 @@ std::unique_ptr
groupby_null_templated(table_view const& keys, * @return true A hash-based groupby should be used * @return false A hash-based groupby should not be used */ -bool can_use_hash_groupby(table_view const& keys, std::vector const& requests) +bool can_use_hash_groupby(table_view const& keys, host_span requests) { return std::all_of(requests.begin(), requests.end(), [](aggregation_request const& r) { return std::all_of(r.aggregations.begin(), r.aggregations.end(), [](auto const& a) { @@ -588,7 +588,7 @@ bool can_use_hash_groupby(table_view const& keys, std::vector, std::vector> groupby( table_view const& keys, - std::vector const& requests, + host_span requests, null_policy include_null_keys, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index b171b19413b..86e2837967e 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -385,7 +385,7 @@ void aggregrate_result_functor::operator()(aggregation // Sort-based groupby std::pair, std::vector> groupby::sort_aggregate( - std::vector const& requests, + host_span requests, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/src/groupby/sort/group_nth_element.cu b/cpp/src/groupby/sort/group_nth_element.cu index 5c8e8b790d4..e6c10aa1056 100644 --- a/cpp/src/groupby/sort/group_nth_element.cu +++ b/cpp/src/groupby/sort/group_nth_element.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,7 +69,7 @@ std::unique_ptr group_nth_element(column_view const &values, auto bitmask_iterator = thrust::make_transform_iterator(cudf::detail::make_validity_iterator(*values_view), [] __device__(auto b) { return static_cast(b); }); - rmm::device_vector intra_group_index(values.size()); + rmm::device_uvector intra_group_index(values.size(), stream); // intra group index for valids only. thrust::exclusive_scan_by_key(rmm::exec_policy(stream), group_labels.begin(), @@ -77,9 +77,9 @@ std::unique_ptr group_nth_element(column_view const &values, bitmask_iterator, intra_group_index.begin()); // group_size to recalculate n if n<0 - rmm::device_vector group_count = [&] { + rmm::device_uvector group_count = [&] { if (n < 0) { - rmm::device_vector group_count(num_groups); + rmm::device_uvector group_count(num_groups, stream); thrust::reduce_by_key(rmm::exec_policy(stream), group_labels.begin(), group_labels.end(), @@ -88,7 +88,7 @@ std::unique_ptr group_nth_element(column_view const &values, group_count.begin()); return group_count; } else { - return rmm::device_vector(); + return rmm::device_uvector(0, stream); } }(); // gather the valid index == n diff --git a/cpp/src/groupby/sort/group_quantiles.cu b/cpp/src/groupby/sort/group_quantiles.cu index fcadb2e71fb..c9f9e3cad9e 100644 --- a/cpp/src/groupby/sort/group_quantiles.cu +++ b/cpp/src/groupby/sort/group_quantiles.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -80,7 +80,7 @@ struct quantiles_functor { column_view const& group_sizes, cudf::device_span group_offsets, size_type const num_groups, - rmm::device_vector const& quantile, + device_span quantile, interpolation interpolation, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -112,7 +112,7 @@ struct quantiles_functor { *group_size_view, *result_view, group_offsets.data(), - quantile.data().get(), + quantile.data(), static_cast(quantile.size()), interpolation}); } else { @@ -125,7 +125,7 @@ struct quantiles_functor { *group_size_view, *result_view, group_offsets.data(), - quantile.data().get(), + quantile.data(), static_cast(quantile.size()), interpolation}); } diff --git a/cpp/src/groupby/sort/scan.cpp b/cpp/src/groupby/sort/scan.cpp index 63de4ea8684..3d7ccf18242 100644 --- a/cpp/src/groupby/sort/scan.cpp +++ b/cpp/src/groupby/sort/scan.cpp @@ -107,7 +107,7 @@ void scan_result_functor::operator()(aggregation const& // Sort-based groupby std::pair, std::vector> groupby::sort_scan( - std::vector const& requests, + host_span requests, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { From ad5452d7eb417527ad6bd0b6a29a544466b38429 Mon Sep 17 00:00:00 2001 From: David <45795991+davidwendt@users.noreply.github.com> Date: Fri, 26 Mar 2021 10:24:00 -0400 Subject: [PATCH 15/24] Add gbenchmark for nvtext replace-tokens function (#7708) Reference #5696 Creates gbenchmarks for `nvtext::replace_tokens()` function. The benchmarks measures various string lengths and number of rows with the default whitespace delimiter and 4 hardcoded tokens. This API already uses the `make_strings_children` utility. Authors: - David (@davidwendt) Approvers: - Karthikeyan (@karthikeyann) - Nghia Truong (@ttnghia) - @nvdbaranec - Keith Kraus (@kkraus14) URL: https://github.com/rapidsai/cudf/pull/7708 --- cpp/benchmarks/CMakeLists.txt | 5 +- cpp/benchmarks/text/replace_benchmark.cpp | 85 +++++++++++++++++++++++ 2 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 cpp/benchmarks/text/replace_benchmark.cpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 7fd84b508ac..43ca6de11b4 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -177,8 +177,9 @@ ConfigureBench(BINARYOP_BENCH binaryop/binaryop_benchmark.cu) ConfigureBench(TEXT_BENCH text/normalize_benchmark.cpp text/normalize_spaces_benchmark.cpp - text/tokenize_benchmark.cpp - text/subword_benchmark.cpp) + text/replace_benchmark.cpp + text/subword_benchmark.cpp + text/tokenize_benchmark.cpp) ################################################################################################### # - strings benchmark ------------------------------------------------------------------- diff --git a/cpp/benchmarks/text/replace_benchmark.cpp b/cpp/benchmarks/text/replace_benchmark.cpp new file mode 100644 index 00000000000..f5428aee225 --- /dev/null +++ b/cpp/benchmarks/text/replace_benchmark.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include + +class TextReplace : public cudf::benchmark { +}; + +static void BM_replace(benchmark::State& state) +{ + auto const n_rows = static_cast(state.range(0)); + auto const n_length = static_cast(state.range(1)); + + std::vector words{" ", "one ", "two ", "three ", "four ", + "five ", "six ", "sevén ", "eight ", "nine ", + "ten ", "eleven ", "twelve ", "thirteen ", "fourteen ", + "fifteen ", "sixteen ", "seventeen ", "eighteen ", "nineteen "}; + + std::default_random_engine generator; + std::uniform_int_distribution tokens_dist(0, words.size() - 1); + std::string row; // build a row of random tokens + while (static_cast(row.size()) < n_length) row += words[tokens_dist(generator)]; + + std::uniform_int_distribution position_dist(0, 16); + + auto elements = cudf::detail::make_counting_transform_iterator( + 0, [&](auto idx) { return row.c_str() + position_dist(generator); }); + cudf::test::strings_column_wrapper input(elements, elements + n_rows); + cudf::strings_column_view view(input); + + cudf::test::strings_column_wrapper targets({"one", "two", "sevén", "zero"}); + cudf::test::strings_column_wrapper replacements({"1", "2", "7", "0"}); + + for (auto _ : state) { + cuda_event_timer raii(state, true, 0); + nvtext::replace_tokens( + view, cudf::strings_column_view(targets), cudf::strings_column_view(replacements)); + } + + state.SetBytesProcessed(state.iterations() * view.chars_size()); +} + +static void generate_bench_args(benchmark::internal::Benchmark* b) +{ + int const min_rows = 1 << 12; + int const max_rows = 1 << 24; + int const row_multiplier = 8; + int const min_row_length = 1 << 5; + int const max_row_length = 1 << 13; + int const length_multiplier = 4; + generate_string_bench_args( + b, min_rows, max_rows, row_multiplier, min_row_length, max_row_length, length_multiplier); +} + +#define NVTEXT_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(TextReplace, name) \ + (::benchmark::State & st) { BM_replace(st); } \ + BENCHMARK_REGISTER_F(TextReplace, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +NVTEXT_BENCHMARK_DEFINE(replace) From bf2e96c70c9c7097ecf64ad413550be2f75374b8 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath <3190405+shwina@users.noreply.github.com> Date: Fri, 26 Mar 2021 11:42:37 -0400 Subject: [PATCH 16/24] Add support for `unique` groupby aggregation (#7726) Adds support for `SeriesGroupBy.unique()`. Also adds support for `DataFrameGroupBy.unique()` but that's not tested, as Pandas doesn't support it (yet?). Resolves https://github.com/rapidsai/cudf/issues/2973 Authors: - Ashwin Srinath (@shwina) Approvers: - Keith Kraus (@kkraus14) URL: https://github.com/rapidsai/cudf/pull/7726 --- docs/cudf/source/groupby.md | 1 + python/cudf/cudf/_lib/aggregation.pyx | 7 +++--- python/cudf/cudf/_lib/groupby.pyx | 31 +++++++++++++++++++------- python/cudf/cudf/tests/test_groupby.py | 31 +++++++++++++++++++++++++- 4 files changed, 57 insertions(+), 13 deletions(-) diff --git a/docs/cudf/source/groupby.md b/docs/cudf/source/groupby.md index 7e96d4fe38c..5376df261e7 100644 --- a/docs/cudf/source/groupby.md +++ b/docs/cudf/source/groupby.md @@ -137,6 +137,7 @@ The following table summarizes the available aggregations and the types that sup | nunique | ✅ | ✅ | ✅ | ✅ | | | | nth | ✅ | ✅ | ✅ | | | | | collect | ✅ | ✅ | ✅ | | ✅ | | +| unique | ✅ | ✅ | ✅ | ✅ | | | ## GroupBy apply diff --git a/python/cudf/cudf/_lib/aggregation.pyx b/python/cudf/cudf/_lib/aggregation.pyx index 840f0c98987..7138bb49743 100644 --- a/python/cudf/cudf/_lib/aggregation.pyx +++ b/python/cudf/cudf/_lib/aggregation.pyx @@ -41,7 +41,7 @@ class AggregationKind(Enum): ALL = libcudf_aggregation.aggregation.Kind.ALL SUM_OF_SQUARES = libcudf_aggregation.aggregation.Kind.SUM_OF_SQUARES MEAN = libcudf_aggregation.aggregation.Kind.MEAN - VARIANCE = libcudf_aggregation.aggregation.Kind.VARIANCE + VAR = libcudf_aggregation.aggregation.Kind.VARIANCE STD = libcudf_aggregation.aggregation.Kind.STD MEDIAN = libcudf_aggregation.aggregation.Kind.MEDIAN QUANTILE = libcudf_aggregation.aggregation.Kind.QUANTILE @@ -50,13 +50,12 @@ class AggregationKind(Enum): NUNIQUE = libcudf_aggregation.aggregation.Kind.NUNIQUE NTH = libcudf_aggregation.aggregation.Kind.NTH_ELEMENT COLLECT = libcudf_aggregation.aggregation.Kind.COLLECT - COLLECT_SET = libcudf_aggregation.aggregation.Kind.COLLECT_SET + UNIQUE = libcudf_aggregation.aggregation.Kind.COLLECT_SET PTX = libcudf_aggregation.aggregation.Kind.PTX CUDA = libcudf_aggregation.aggregation.Kind.CUDA cdef class Aggregation: - def __init__(self, op, **kwargs): self.c_obj = move(make_aggregation(op, kwargs)) @@ -246,7 +245,7 @@ cdef class _AggregationFactory: return agg @classmethod - def collect_set(cls): + def unique(cls): cdef Aggregation agg = Aggregation.__new__(Aggregation) agg.c_obj = move(libcudf_aggregation.make_collect_set_aggregation()) return agg diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx index 0f5cdc73d3b..713a2274a77 100644 --- a/python/cudf/cudf/_lib/groupby.pyx +++ b/python/cudf/cudf/_lib/groupby.pyx @@ -35,13 +35,15 @@ _GROUPBY_AGGS = { "median", "nunique", "nth", - "collect" + "collect", + "unique", } _CATEGORICAL_AGGS = { "count", "size", "nunique", + "unique", } _STRING_AGGS = { @@ -51,13 +53,15 @@ _STRING_AGGS = { "min", "nunique", "nth", - "collect" + "collect", + "unique", } _LIST_AGGS = { - "collect" + "collect", } + cdef class GroupBy: cdef unique_ptr[libcudf_groupby.groupby] c_obj cdef dict __dict__ @@ -145,12 +149,23 @@ cdef class GroupBy: vector[libcudf_groupby.aggregation_result] ] c_result - with nogil: - c_result = move( - self.c_obj.get()[0].aggregate( - c_agg_requests + try: + with nogil: + c_result = move( + self.c_obj.get()[0].aggregate( + c_agg_requests + ) ) - ) + except RuntimeError as e: + # TODO: remove this try..except after + # https://github.com/rapidsai/cudf/issues/7611 + # is resolved + if ("make_empty_column") in str(e): + raise NotImplementedError( + "Aggregation not supported for empty columns" + ) from e + else: + raise grouped_keys = Table.from_unique_ptr( move(c_result.first), diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index 8011510d340..a96db59dee3 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -12,7 +12,13 @@ import cudf from cudf.core import DataFrame, Series from cudf.core._compat import PANDAS_GE_110 -from cudf.tests.utils import assert_eq, assert_exceptions_equal +from cudf.tests.utils import ( + DATETIME_TYPES, + SIGNED_TYPES, + TIMEDELTA_TYPES, + assert_eq, + assert_exceptions_equal, +) _now = np.datetime64("now") _tomorrow = _now + np.timedelta64(1, "D") @@ -1532,3 +1538,26 @@ def test_groupby_nonempty_no_keys(pdf): lambda: gdf.groupby([]), compare_error_message=False, ) + + +@pytest.mark.parametrize( + "by,data", + [ + # ([], []), # error? + ([1, 1, 2, 2], [0, 0, 1, 1]), + ([1, 2, 3, 4], [0, 0, 0, 0]), + ([1, 2, 1, 2], [0, 1, 1, 1]), + ], +) +@pytest.mark.parametrize( + "dtype", + SIGNED_TYPES + DATETIME_TYPES + TIMEDELTA_TYPES + ["string", "category"], +) +def test_groupby_unique(by, data, dtype): + pdf = pd.DataFrame({"by": by, "data": data}) + pdf["data"] = pdf["data"].astype(dtype) + gdf = cudf.from_pandas(pdf) + + expect = pdf.groupby("by")["data"].unique() + got = gdf.groupby("by")["data"].unique() + assert_eq(expect, got) From b0586c4e8988b836d8bcdeddfd5d384b7011af6f Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Fri, 26 Mar 2021 14:23:06 -0500 Subject: [PATCH 17/24] Added JNI support for new is_integer (#7739) Adds JNI bindings for improved is_integer with bounds checks Authors: - Robert (Bobby) Evans (@revans2) Approvers: - Jason Lowe (@jlowe) URL: https://github.com/rapidsai/cudf/pull/7739 --- .../main/java/ai/rapids/cudf/ColumnView.java | 21 ++++++- java/src/main/native/src/ColumnViewJni.cpp | 17 +++++ .../java/ai/rapids/cudf/ColumnVectorTest.java | 63 +++++++++++++++++++ 3 files changed, 99 insertions(+), 2 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index e50a9e86ead..b29b873092d 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -288,19 +288,34 @@ public final ColumnVector isNull() { /** * Returns a Boolean vector with the same number of rows as this instance, that has * TRUE for any entry that is an integer, and FALSE if its not an integer. A null will be returned - * for null entries + * for null entries. * * NOTE: Integer doesn't mean a 32-bit integer. It means a number that is not a fraction. * i.e. If this method returns true for a value it could still result in an overflow or underflow * if you convert it to a Java integral type * - * @return - Boolean vector + * @return Boolean vector */ public final ColumnVector isInteger() { assert type.equals(DType.STRING); return new ColumnVector(isInteger(getNativeView())); } + /** + * Returns a Boolean vector with the same number of rows as this instance, that has + * TRUE for any entry that is an integer, and FALSE if its not an integer. A null will be returned + * for null entries. + * + * @param intType the data type that should be used for bounds checking. Note that only + * integer types are allowed. + * @return Boolean vector + */ + public final ColumnVector isInteger(DType intType) { + assert type.equals(DType.STRING); + return new ColumnVector(isIntegerWithType(getNativeView(), + intType.getTypeId().getNativeId(), intType.getScale())); + } + /** * Returns a Boolean vector with the same number of rows as this instance, that has * TRUE for any entry that is a float, and FALSE if its not a float. A null will be returned @@ -2845,6 +2860,8 @@ private static native long rollingWindow( private static native long isInteger(long viewHandle); + private static native long isIntegerWithType(long viewHandle, int typeId, int typeScale); + private static native long isNotNanNative(long viewHandle); private static native long isNotNullNative(long viewHandle); diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 4132016d85c..3928794b55c 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -1788,6 +1788,23 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_isInteger(JNIEnv *env, jo CATCH_STD(env, 0) } +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_isIntegerWithType(JNIEnv *env, jobject, + jlong handle, + jint j_dtype, + jint scale) { + + JNI_NULL_CHECK(env, handle, "native view handle is null", 0) + + try { + cudf::jni::auto_set_device(env); + cudf::column_view *view = reinterpret_cast(handle); + cudf::data_type int_dtype = cudf::jni::make_data_type(j_dtype, scale); + std::unique_ptr result = cudf::strings::is_integer(*view, int_dtype); + return reinterpret_cast(result.release()); + } + CATCH_STD(env, 0) +} + JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_copyColumnViewToCV(JNIEnv *env, jobject j_object, jlong handle) { diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 02fbe56431b..5a9404f5760 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -3339,6 +3339,69 @@ void testNansToNulls() { } } + @Test + void testIsIntegerWithBounds() { + String[] intStrings = {"A", "nan", "Inf", "-Inf", "3.5", + String.valueOf(Byte.MIN_VALUE), + String.valueOf(Byte.MIN_VALUE + 1L), + String.valueOf(Byte.MIN_VALUE - 1L), + String.valueOf(Byte.MAX_VALUE), + String.valueOf(Byte.MAX_VALUE + 1L), + String.valueOf(Byte.MAX_VALUE - 1L), + String.valueOf(Short.MIN_VALUE), + String.valueOf(Short.MIN_VALUE + 1L), + String.valueOf(Short.MIN_VALUE - 1L), + String.valueOf(Short.MAX_VALUE), + String.valueOf(Short.MAX_VALUE + 1L), + String.valueOf(Short.MAX_VALUE - 1L), + String.valueOf(Integer.MIN_VALUE), + String.valueOf(Integer.MIN_VALUE + 1L), + String.valueOf(Integer.MIN_VALUE - 1L), + String.valueOf(Integer.MAX_VALUE), + String.valueOf(Integer.MAX_VALUE + 1L), + String.valueOf(Integer.MAX_VALUE - 1L), + String.valueOf(Long.MIN_VALUE), + String.valueOf(Long.MIN_VALUE + 1L), + "-9223372036854775809", + String.valueOf(Long.MAX_VALUE), + "9223372036854775808", + String.valueOf(Long.MAX_VALUE - 1L)}; + try (ColumnVector intStringCV = ColumnVector.fromStrings(intStrings); + ColumnVector isByte = intStringCV.isInteger(DType.INT8); + ColumnVector expectedByte = ColumnVector.fromBoxedBooleans( + false, false, false, false, false, + true, true, false, true, false, true, + false, false, false, false, false, false, + false, false, false, false, false, false, + false, false, false, false, false, false); + ColumnVector isShort = intStringCV.isInteger(DType.INT16); + ColumnVector expectedShort = ColumnVector.fromBoxedBooleans( + false, false, false, false, false, + true, true, true, true, true, true, + true, true, false, true, false, true, + false, false, false, false, false, false, + false, false, false, false, false, false); + ColumnVector isInt = intStringCV.isInteger(DType.INT32); + ColumnVector expectedInt = ColumnVector.fromBoxedBooleans( + false, false, false, false, false, + true, true, true, true, true, true, + true, true, true, true, true, true, + true, true, false, true, false, true, + false, false, false, false, false, false); + ColumnVector isLong = intStringCV.isInteger(DType.INT64); + ColumnVector expectedLong = ColumnVector.fromBoxedBooleans( + false, false, false, false, false, + true, true, true, true, true, true, + true, true, true, true, true, true, + true, true, true, true, true, true, + true, true, false, true, false, true)) { + assertColumnsAreEqual(expectedByte, isByte); + assertColumnsAreEqual(expectedShort, isShort); + assertColumnsAreEqual(expectedInt, isInt); + assertColumnsAreEqual(expectedLong, isLong); + } + } + @Test void testIsInteger() { String[] intStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "2147483647", From add4b4535999dcc200b7fdf83298b90d0495af96 Mon Sep 17 00:00:00 2001 From: Kumar Aatish Date: Fri, 26 Mar 2021 22:26:02 -0400 Subject: [PATCH 18/24] Fix string length in stripe dictionary building (#7744) In PR #7676 the length of the current string being referred to while building stripe dictionaries was always set to 0 while incrementing the dictionary character count of a StripeDictionary. This led to corrupted strings when the dictionary encoding was used as noted in issue #7741. This has been fixed in this PR. Fixes #7741 Authors: - Kumar Aatish (@kaatish) Approvers: - Vukasin Milovanovic (@vuule) - Nghia Truong (@ttnghia) URL: https://github.com/rapidsai/cudf/pull/7744 --- cpp/src/io/orc/dict_enc.cu | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/orc/dict_enc.cu b/cpp/src/io/orc/dict_enc.cu index 5695e882a95..e69a61bde66 100644 --- a/cpp/src/io/orc/dict_enc.cu +++ b/cpp/src/io/orc/dict_enc.cu @@ -396,7 +396,10 @@ __global__ void __launch_bounds__(block_size) uint32_t cur = (i + t < num_strings) ? dict_data[i + t] : 0; uint32_t cur_len = 0; bool is_dupe = false; - if (i + t < num_strings) { current_string = s->stripe.leaf_column->element(cur); } + if (i + t < num_strings) { + current_string = s->stripe.leaf_column->element(cur); + cur_len = current_string.size_bytes(); + } if (i + t != 0 && i + t < num_strings) { uint32_t prev = dict_data[i + t - 1]; is_dupe = (current_string == (s->stripe.leaf_column->element(prev))); From 44adf97fc49e5569b83b31ad5c7f05f6b64c20bd Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Fri, 26 Mar 2021 22:48:53 -0700 Subject: [PATCH 19/24] Fix dictionary size computation in ORC writer (#7737) Fixes #7661 Corrects the field order in `std::accumulate` that computes the string column size w.r.t encoding. Authors: - Vukasin Milovanovic (@vuule) Approvers: - Kumar Aatish (@kaatish) - Ram (Ramakrishna Prabhu) (@rgsl888prabhu) URL: https://github.com/rapidsai/cudf/pull/7737 --- cpp/src/io/orc/writer_impl.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index cb75698fd8d..10050806552 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -321,8 +321,8 @@ void writer::impl::build_dictionaries(orc_column_view *columns, string_column_cost{}, [&](auto cost, auto rg_idx) -> string_column_cost { const auto &dt = dict[rg_idx * str_col_ids.size() + col_idx]; - return {cost.dictionary + dt.dict_char_count + dt.num_dict_strings, - cost.direct + dt.string_char_count}; + return {cost.direct + dt.string_char_count, + cost.dictionary + dt.dict_char_count + dt.num_dict_strings}; }); // Disable dictionary if it does not reduce the output size if (col_cost.dictionary >= col_cost.direct) { From ccc28d55202f6f6bb14718ed9022881ef0176b6e Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Sat, 27 Mar 2021 13:56:20 +0530 Subject: [PATCH 20/24] Use stream in groupby calls (#7705) **sort_groupby_helper::** - [x] sorted_values() - [x] grouped_values() - unique_keys() - sorted_keys() - [x] num_groups() - num_keys() - [x] key_sort_order() - [x] group_offsets() - [x] group_labels() - [x] unsorted_keys_labels() - [x] keys_bitmask_column() **groupby::** - [x] - dispatch_aggregation() Authors: - Karthikeyan (@karthikeyann) Approvers: - David (@davidwendt) - Ram (Ramakrishna Prabhu) (@rgsl888prabhu) URL: https://github.com/rapidsai/cudf/pull/7705 --- .../cudf/detail/groupby/sort_helper.hpp | 26 ++++---- cpp/src/groupby/groupby.cu | 4 +- cpp/src/groupby/sort/aggregate.cpp | 60 ++++++++++--------- cpp/src/groupby/sort/functors.hpp | 4 +- cpp/src/groupby/sort/scan.cpp | 13 ++-- cpp/src/groupby/sort/sort_helper.cu | 26 ++++---- cpp/src/rolling/grouped_rolling.cu | 4 +- 7 files changed, 72 insertions(+), 65 deletions(-) diff --git a/cpp/include/cudf/detail/groupby/sort_helper.hpp b/cpp/include/cudf/detail/groupby/sort_helper.hpp index a68d649b8c8..bfc9673d3cb 100644 --- a/cpp/include/cudf/detail/groupby/sort_helper.hpp +++ b/cpp/include/cudf/detail/groupby/sort_helper.hpp @@ -93,7 +93,7 @@ struct sort_groupby_helper { */ std::unique_ptr sorted_values( column_view const& values, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -108,7 +108,7 @@ struct sort_groupby_helper { */ std::unique_ptr grouped_values( column_view const& values, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -117,7 +117,7 @@ struct sort_groupby_helper { * @return a new table in which each row is a unique row in the sorted key table. */ std::unique_ptr
unique_keys( - rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -126,13 +126,13 @@ struct sort_groupby_helper { * @return a new table containing the sorted keys. */ std::unique_ptr
sorted_keys( - rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Get the number of groups in `keys` */ - size_type num_groups() { return group_offsets().size() - 1; } + size_type num_groups(rmm::cuda_stream_view stream) { return group_offsets(stream).size() - 1; } /** * @brief Return the effective number of keys @@ -141,7 +141,7 @@ struct sort_groupby_helper { * When include_null_keys = NO, returned value is the number of rows in `keys` * in which no element is null */ - size_type num_keys(rmm::cuda_stream_view stream = rmm::cuda_stream_default); + size_type num_keys(rmm::cuda_stream_view stream); /** * @brief Get the sorted order of `keys`. @@ -156,7 +156,7 @@ struct sort_groupby_helper { * * @return the sort order indices for `keys`. */ - column_view key_sort_order(rmm::cuda_stream_view stream = rmm::cuda_stream_default); + column_view key_sort_order(rmm::cuda_stream_view stream); /** * @brief Get each group's offset into the sorted order of `keys`. @@ -169,13 +169,13 @@ struct sort_groupby_helper { * @return vector of offsets of the starting point of each group in the sorted * key table */ - index_vector const& group_offsets(rmm::cuda_stream_view stream = rmm::cuda_stream_default); + index_vector const& group_offsets(rmm::cuda_stream_view stream); /** * @brief Get the group labels corresponding to the sorted order of `keys`. * * Each group is assigned a unique numerical "label" in - * `[0, 1, 2, ... , num_groups() - 1, num_groups())`. + * `[0, 1, 2, ... , num_groups() - 1, num_groups(stream))`. * For a row in sorted `keys`, its corresponding group label indicates which * group it belongs to. * @@ -184,7 +184,7 @@ struct sort_groupby_helper { * * @return vector of group labels for each row in the sorted key column */ - index_vector const& group_labels(rmm::cuda_stream_view stream = rmm::cuda_stream_default); + index_vector const& group_labels(rmm::cuda_stream_view stream); private: /** @@ -192,7 +192,7 @@ struct sort_groupby_helper { * * Returns the group label for every row in the original `keys` table. For a * given unique key row, its group label is equivalent to what is returned by - * `group_labels()`. However, if a row contains a null value, and + * `group_labels(stream)`. However, if a row contains a null value, and * `include_null_keys == NO`, then its label is NULL. * * Computes and stores unsorted labels on first invocation and returns stored @@ -201,7 +201,7 @@ struct sort_groupby_helper { * @return A nullable column of `INT32` containing group labels in the order * of the unsorted key table */ - column_view unsorted_keys_labels(rmm::cuda_stream_view stream = rmm::cuda_stream_default); + column_view unsorted_keys_labels(rmm::cuda_stream_view stream); /** * @brief Get the column representing the row bitmask for the `keys` @@ -215,7 +215,7 @@ struct sort_groupby_helper { * Computes and stores bitmask on first invocation and returns stored column * on subsequent calls. */ - column_view keys_bitmask_column(rmm::cuda_stream_view stream = rmm::cuda_stream_default); + column_view keys_bitmask_column(rmm::cuda_stream_view stream); private: column_ptr _key_sorted_order; ///< Indices to produce _keys in sorted order diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index 0312d17a37c..34c57996af3 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -156,7 +156,7 @@ std::pair, std::vector> groupby::aggr if (_keys.num_rows() == 0) { return std::make_pair(empty_like(_keys), empty_results(requests)); } - return dispatch_aggregation(requests, 0, mr); + return dispatch_aggregation(requests, rmm::cuda_stream_default, mr); } // Compute scan requests @@ -190,7 +190,7 @@ groupby::groups groupby::get_groups(table_view values, rmm::mr::device_memory_re if (values.num_columns()) { auto grouped_values = cudf::detail::gather(values, - helper().key_sort_order(), + helper().key_sort_order(rmm::cuda_stream_default), cudf::out_of_bounds_policy::DONT_CHECK, cudf::detail::negative_index_policy::NOT_ALLOWED, rmm::cuda_stream_default, diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 86e2837967e..4e2303c8b9b 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -70,8 +70,9 @@ void aggregrate_result_functor::operator()(aggregation agg, get_grouped_values().nullable() ? detail::group_count_valid( - get_grouped_values(), helper.group_labels(), helper.num_groups(), stream, mr) - : detail::group_count_all(helper.group_offsets(), helper.num_groups(), stream, mr)); + get_grouped_values(), helper.group_labels(stream), helper.num_groups(stream), stream, mr) + : detail::group_count_all( + helper.group_offsets(stream), helper.num_groups(stream), stream, mr)); } template <> @@ -80,7 +81,9 @@ void aggregrate_result_functor::operator()(aggregation c if (cache.has_result(col_idx, agg)) return; cache.add_result( - col_idx, agg, detail::group_count_all(helper.group_offsets(), helper.num_groups(), stream, mr)); + col_idx, + agg, + detail::group_count_all(helper.group_offsets(stream), helper.num_groups(stream), stream, mr)); } template <> @@ -88,10 +91,11 @@ void aggregrate_result_functor::operator()(aggregation const& { if (cache.has_result(col_idx, agg)) return; - cache.add_result(col_idx, - agg, - detail::group_sum( - get_grouped_values(), helper.num_groups(), helper.group_labels(), stream, mr)); + cache.add_result( + col_idx, + agg, + detail::group_sum( + get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr)); }; template <> @@ -102,9 +106,9 @@ void aggregrate_result_functor::operator()(aggregation cons cache.add_result(col_idx, agg, detail::group_argmax(get_grouped_values(), - helper.num_groups(), - helper.group_labels(), - helper.key_sort_order(), + helper.num_groups(stream), + helper.group_labels(stream), + helper.key_sort_order(stream), stream, mr)); }; @@ -117,9 +121,9 @@ void aggregrate_result_functor::operator()(aggregation cons cache.add_result(col_idx, agg, detail::group_argmin(get_grouped_values(), - helper.num_groups(), - helper.group_labels(), - helper.key_sort_order(), + helper.num_groups(stream), + helper.group_labels(stream), + helper.key_sort_order(stream), stream, mr)); }; @@ -132,7 +136,7 @@ void aggregrate_result_functor::operator()(aggregation const& auto result = [&]() { if (cudf::is_fixed_width(values.type())) { return detail::group_min( - get_grouped_values(), helper.num_groups(), helper.group_labels(), stream, mr); + get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr); } else { auto argmin_agg = make_argmin_aggregation(); operator()(*argmin_agg); @@ -169,7 +173,7 @@ void aggregrate_result_functor::operator()(aggregation const& auto result = [&]() { if (cudf::is_fixed_width(values.type())) { return detail::group_max( - get_grouped_values(), helper.num_groups(), helper.group_labels(), stream, mr); + get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr); } else { auto argmax_agg = make_argmax_aggregation(); operator()(*argmax_agg); @@ -238,7 +242,7 @@ void aggregrate_result_functor::operator()(aggregation co auto result = detail::group_var(get_grouped_values(), mean_result, group_sizes, - helper.group_labels(), + helper.group_labels(stream), var_agg._ddof, stream, mr); @@ -271,8 +275,8 @@ void aggregrate_result_functor::operator()(aggregation co auto result = detail::group_quantiles(get_sorted_values(), group_sizes, - helper.group_offsets(), - helper.num_groups(), + helper.group_offsets(stream), + helper.num_groups(stream), quantile_agg._quantiles, quantile_agg._interpolation, stream, @@ -291,8 +295,8 @@ void aggregrate_result_functor::operator()(aggregation cons auto result = detail::group_quantiles(get_sorted_values(), group_sizes, - helper.group_offsets(), - helper.num_groups(), + helper.group_offsets(stream), + helper.num_groups(stream), {0.5}, interpolation::LINEAR, stream, @@ -308,9 +312,9 @@ void aggregrate_result_functor::operator()(aggregation con auto nunique_agg = static_cast(agg); auto result = detail::group_nunique(get_sorted_values(), - helper.group_labels(), - helper.num_groups(), - helper.group_offsets(), + helper.group_labels(stream), + helper.num_groups(stream), + helper.group_offsets(stream), nunique_agg._null_handling, stream, mr); @@ -337,9 +341,9 @@ void aggregrate_result_functor::operator()(aggregation agg, detail::group_nth_element(get_grouped_values(), group_sizes, - helper.group_labels(), - helper.group_offsets(), - helper.num_groups(), + helper.group_labels(stream), + helper.group_offsets(stream), + helper.num_groups(stream), nth_element_agg._n, nth_element_agg._null_handling, stream, @@ -357,7 +361,7 @@ void aggregrate_result_functor::operator()(aggregatio if (cache.has_result(col_idx, agg)) return; auto result = detail::group_collect( - get_grouped_values(), helper.group_offsets(), helper.num_groups(), stream, mr); + get_grouped_values(), helper.group_offsets(stream), helper.num_groups(stream), stream, mr); cache.add_result(col_idx, agg, std::move(result)); }; @@ -373,7 +377,7 @@ void aggregrate_result_functor::operator()(aggregation if (cache.has_result(col_idx, agg)) { return; } auto const collect_result = detail::group_collect( - get_grouped_values(), helper.group_offsets(), helper.num_groups(), stream, mr); + get_grouped_values(), helper.group_offsets(stream), helper.num_groups(stream), stream, mr); auto const nulls_equal = static_cast(agg)._null_equal; cache.add_result(col_idx, diff --git a/cpp/src/groupby/sort/functors.hpp b/cpp/src/groupby/sort/functors.hpp index 565320fbe80..afb92f8e141 100644 --- a/cpp/src/groupby/sort/functors.hpp +++ b/cpp/src/groupby/sort/functors.hpp @@ -64,7 +64,7 @@ struct store_result_functor { // It's overridden in scan implementation. return sorted_values->view(); else - return (grouped_values = helper.grouped_values(values))->view(); + return (grouped_values = helper.grouped_values(values, stream))->view(); }; /** @@ -76,7 +76,7 @@ struct store_result_functor { column_view get_sorted_values() { return sorted_values ? sorted_values->view() - : (sorted_values = helper.sorted_values(values))->view(); + : (sorted_values = helper.sorted_values(values, stream))->view(); }; protected: diff --git a/cpp/src/groupby/sort/scan.cpp b/cpp/src/groupby/sort/scan.cpp index 3d7ccf18242..336a6777ffa 100644 --- a/cpp/src/groupby/sort/scan.cpp +++ b/cpp/src/groupby/sort/scan.cpp @@ -59,7 +59,7 @@ struct scan_result_functor final : store_result_functor { if (grouped_values) return grouped_values->view(); else - return (grouped_values = helper.grouped_values(values))->view(); + return (grouped_values = helper.grouped_values(values, stream))->view(); }; }; @@ -71,7 +71,8 @@ void scan_result_functor::operator()(aggregation const& agg) cache.add_result( col_idx, agg, - detail::sum_scan(get_grouped_values(), helper.num_groups(), helper.group_labels(), stream, mr)); + detail::sum_scan( + get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr)); } template <> @@ -82,7 +83,8 @@ void scan_result_functor::operator()(aggregation const& agg) cache.add_result( col_idx, agg, - detail::min_scan(get_grouped_values(), helper.num_groups(), helper.group_labels(), stream, mr)); + detail::min_scan( + get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr)); } template <> @@ -93,7 +95,8 @@ void scan_result_functor::operator()(aggregation const& agg) cache.add_result( col_idx, agg, - detail::max_scan(get_grouped_values(), helper.num_groups(), helper.group_labels(), stream, mr)); + detail::max_scan( + get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr)); } template <> @@ -101,7 +104,7 @@ void scan_result_functor::operator()(aggregation const& { if (cache.has_result(col_idx, agg)) return; - cache.add_result(col_idx, agg, detail::count_scan(helper.group_labels(), stream, mr)); + cache.add_result(col_idx, agg, detail::count_scan(helper.group_labels(stream), stream, mr)); } } // namespace detail diff --git a/cpp/src/groupby/sort/sort_helper.cu b/cpp/src/groupby/sort/sort_helper.cu index 6a9da36e21b..5e944f75712 100644 --- a/cpp/src/groupby/sort/sort_helper.cu +++ b/cpp/src/groupby/sort/sort_helper.cu @@ -141,7 +141,7 @@ column_view sort_groupby_helper::key_sort_order(rmm::cuda_stream_view stream) // presence of a null value within a row. This allows moving all rows that // contain a null value to the end of the sorted order. - auto augmented_keys = table_view({table_view({keys_bitmask_column()}), _keys}); + auto augmented_keys = table_view({table_view({keys_bitmask_column(stream)}), _keys}); _key_sorted_order = cudf::detail::stable_sorted_order( augmented_keys, @@ -164,7 +164,7 @@ sort_groupby_helper::index_vector const& sort_groupby_helper::group_offsets( _group_offsets = std::make_unique(num_keys(stream) + 1, stream); auto device_input_table = table_device_view::create(_keys, stream); - auto sorted_order = key_sort_order().data(); + auto sorted_order = key_sort_order(stream).data(); decltype(_group_offsets->begin()) result_end; if (has_nulls(_keys)) { @@ -207,9 +207,9 @@ sort_groupby_helper::index_vector const& sort_groupby_helper::group_labels( group_labels.end(), index_vector::value_type{0}); thrust::scatter(rmm::exec_policy(stream), - thrust::make_constant_iterator(1, decltype(num_groups())(1)), - thrust::make_constant_iterator(1, num_groups()), - group_offsets().begin() + 1, + thrust::make_constant_iterator(1, decltype(num_groups(stream))(1)), + thrust::make_constant_iterator(1, num_groups(stream)), + group_offsets(stream).begin() + 1, group_labels.begin()); thrust::inclusive_scan( @@ -226,9 +226,9 @@ column_view sort_groupby_helper::unsorted_keys_labels(rmm::cuda_stream_view stre data_type(type_to_id()), _keys.num_rows(), mask_state::ALL_NULL, stream); auto group_labels_view = cudf::column_view( - data_type(type_to_id()), group_labels().size(), group_labels().data()); + data_type(type_to_id()), group_labels(stream).size(), group_labels(stream).data()); - auto scatter_map = key_sort_order(); + auto scatter_map = key_sort_order(stream); std::unique_ptr
t_unsorted_keys_labels = cudf::detail::scatter(table_view({group_labels_view}), @@ -267,7 +267,7 @@ sort_groupby_helper::column_ptr sort_groupby_helper::sorted_values( column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { column_ptr values_sort_order = - cudf::detail::stable_sorted_order(table_view({unsorted_keys_labels(), values}), + cudf::detail::stable_sorted_order(table_view({unsorted_keys_labels(stream), values}), {}, std::vector(2, null_order::AFTER), stream, @@ -289,7 +289,7 @@ sort_groupby_helper::column_ptr sort_groupby_helper::sorted_values( sort_groupby_helper::column_ptr sort_groupby_helper::grouped_values( column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto gather_map = key_sort_order(); + auto gather_map = key_sort_order(stream); auto grouped_values_table = cudf::detail::gather(table_view({values}), gather_map, @@ -304,14 +304,14 @@ sort_groupby_helper::column_ptr sort_groupby_helper::grouped_values( std::unique_ptr
sort_groupby_helper::unique_keys(rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto idx_data = key_sort_order().data(); + auto idx_data = key_sort_order(stream).data(); auto gather_map_it = thrust::make_transform_iterator( - group_offsets().begin(), [idx_data] __device__(size_type i) { return idx_data[i]; }); + group_offsets(stream).begin(), [idx_data] __device__(size_type i) { return idx_data[i]; }); return cudf::detail::gather(_keys, gather_map_it, - gather_map_it + num_groups(), + gather_map_it + num_groups(stream), out_of_bounds_policy::DONT_CHECK, stream, mr); @@ -321,7 +321,7 @@ std::unique_ptr
sort_groupby_helper::sorted_keys(rmm::cuda_stream_view st rmm::mr::device_memory_resource* mr) { return cudf::detail::gather(_keys, - key_sort_order(), + key_sort_order(stream), cudf::out_of_bounds_policy::DONT_CHECK, cudf::detail::negative_index_policy::NOT_ALLOWED, stream, diff --git a/cpp/src/rolling/grouped_rolling.cu b/cpp/src/rolling/grouped_rolling.cu index b8cb5e45fec..34d6d5fa194 100644 --- a/cpp/src/rolling/grouped_rolling.cu +++ b/cpp/src/rolling/grouped_rolling.cu @@ -838,8 +838,8 @@ std::unique_ptr grouped_time_range_rolling_window(table_view const& grou index_vector group_offsets(0, stream), group_labels(0, stream); if (group_keys.num_columns() > 0) { sort_groupby_helper helper{group_keys, cudf::null_policy::INCLUDE, cudf::sorted::YES}; - group_offsets = index_vector(helper.group_offsets(), stream); - group_labels = index_vector(helper.group_labels(), stream); + group_offsets = index_vector(helper.group_offsets(stream), stream); + group_labels = index_vector(helper.group_labels(stream), stream); } // Assumes that `timestamp_column` is actually of a timestamp type. From fe7ec857c01a410521cffbb215527742510c642c Mon Sep 17 00:00:00 2001 From: Conor Hoekstra <36027403+codereport@users.noreply.github.com> Date: Mon, 29 Mar 2021 05:35:28 -0400 Subject: [PATCH 21/24] Fix `cudf::cast` overflow for `decimal64` to `int32_t` or smaller in certain cases (#7733) @galipremsagar found an issue with `cudf::cast` for `decimal64`. His test case was when you have a value un-representable in `int32_t`. The cast operation would cast to early and therefore overflow. This PR fixes that issue. Resolves https://github.com/rapidsai/cudf/issues/7689 Authors: - Conor Hoekstra (@codereport) Approvers: - Mike Wilson (@hyperbolic2346) - Ram (Ramakrishna Prabhu) (@rgsl888prabhu) URL: https://github.com/rapidsai/cudf/pull/7733 --- cpp/include/cudf/fixed_point/fixed_point.hpp | 51 ++++++++++++++++---- cpp/tests/unary/cast_tests.cpp | 27 +++++++++++ 2 files changed, 69 insertions(+), 9 deletions(-) diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp index eb752a8a0ea..952075b1703 100644 --- a/cpp/include/cudf/fixed_point/fixed_point.hpp +++ b/cpp/include/cudf/fixed_point/fixed_point.hpp @@ -218,14 +218,15 @@ class fixed_point { using rep = Rep; /** - * @brief Constructor that will perform shifting to store value appropriately + * @brief Constructor that will perform shifting to store value appropriately (from floating point + * types) * - * @tparam T The type that you are constructing from (integral or floating) + * @tparam T The floating point type that you are constructing from * @param value The value that will be constructed from * @param scale The exponent that is applied to Rad to perform shifting */ template () && + typename cuda::std::enable_if_t() && is_supported_representation_type()>* = nullptr> CUDA_HOST_DEVICE_CALLABLE explicit fixed_point(T const& value, scale_type const& scale) : _value{static_cast(detail::shift(value, scale))}, _scale{scale} @@ -233,8 +234,25 @@ class fixed_point { } /** - * @brief Constructor that will not perform shifting (assumes value already - * shifted) + * @brief Constructor that will perform shifting to store value appropriately (from integral + * types) + * + * @tparam T The integral type that you are constructing from + * @param value The value that will be constructed from + * @param scale The exponent that is applied to Rad to perform shifting + */ + template () && + is_supported_representation_type()>* = nullptr> + CUDA_HOST_DEVICE_CALLABLE explicit fixed_point(T const& value, scale_type const& scale) + // `value` is cast to `Rep` to avoid overflow in cases where + // constructing to `Rep` that is wider than `T` + : _value{detail::shift(static_cast(value), scale)}, _scale{scale} + { + } + + /** + * @brief Constructor that will not perform shifting (assumes value already shifted) * * @param s scaled_integer that contains scale and already shifted value */ @@ -260,18 +278,33 @@ class fixed_point { fixed_point() : _value{0}, _scale{scale_type{0}} {} /** - * @brief Explicit conversion operator + * @brief Explicit conversion operator for casting to floating point types * - * @tparam U The type that is being explicitly converted to (integral or floating) + * @tparam U The floating point type that is being explicitly converted to * @return The `fixed_point` number in base 10 (aka human readable format) */ template ()>* = nullptr> - CUDA_HOST_DEVICE_CALLABLE explicit constexpr operator U() const + typename cuda::std::enable_if_t::value>* = nullptr> + explicit constexpr operator U() const { return detail::shift(static_cast(_value), detail::negate(_scale)); } + /** + * @brief Explicit conversion operator for casting to integral types + * + * @tparam U The integral type that is being explicitly converted to + * @return The `fixed_point` number in base 10 (aka human readable format) + */ + template ::value>* = nullptr> + explicit constexpr operator U() const + { + // Don't cast to U until converting to Rep because in certain cases casting to U before shifting + // will result in integer overflow (i.e. if U = int32_t, Rep = int64_t and _value > 2 billion) + return static_cast(detail::shift(_value, detail::negate(_scale))); + } + CUDA_HOST_DEVICE_CALLABLE operator scaled_integer() const { return scaled_integer{_value, _scale}; diff --git a/cpp/tests/unary/cast_tests.cpp b/cpp/tests/unary/cast_tests.cpp index e8953ab9a30..15d014f9d9c 100644 --- a/cpp/tests/unary/cast_tests.cpp +++ b/cpp/tests/unary/cast_tests.cpp @@ -537,6 +537,9 @@ inline auto make_fixed_point_data_type(int32_t scale) return cudf::data_type{cudf::type_to_id(), scale}; } +struct FixedPointTestSingleType : public cudf::test::BaseFixture { +}; + template struct FixedPointTests : public cudf::test::BaseFixture { }; @@ -592,6 +595,18 @@ TYPED_TEST(FixedPointTests, CastToInt32) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } +TEST_F(FixedPointTestSingleType, CastDecimal64ToInt32) +{ + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + using fw_wrapper = cudf::test::fixed_width_column_wrapper; + + auto const input = fp_wrapper{{7246212000}, numeric::scale_type{-5}}; + auto const expected = fw_wrapper{72462}; + auto const result = cudf::cast(input, make_data_type()); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + TYPED_TEST(FixedPointTests, CastToIntLarge) { using namespace numeric; @@ -659,6 +674,18 @@ TYPED_TEST(FixedPointTests, CastFromInt) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } +TEST_F(FixedPointTestSingleType, CastInt32ToDecimal64) +{ + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + using fw_wrapper = cudf::test::fixed_width_column_wrapper; + + auto const input = fw_wrapper{-48938}; + auto const expected = fp_wrapper{{-4893800000LL}, numeric::scale_type{-5}}; + auto const result = cudf::cast(input, make_fixed_point_data_type(-5)); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + TYPED_TEST(FixedPointTests, CastFromIntLarge) { using namespace numeric; From d9103c4b7998610abc05aa9d85a5a89f3b347251 Mon Sep 17 00:00:00 2001 From: David <45795991+davidwendt@users.noreply.github.com> Date: Mon, 29 Mar 2021 11:50:06 -0400 Subject: [PATCH 22/24] Add gbenchmark for nvtext ngrams functions (#7693) Reference #5696 Creates a gbenchmark for `nvtext::generate_ngrams()` and `nvtext::generate_character_ngrams()` functions. The benchmarks measures various string lengths and number of rows. The `nvtext::generate_ngrams()` was refactored to use the more efficient `make_strings_children` which improved its performance by about 50%. Authors: - David (@davidwendt) Approvers: - Nghia Truong (@ttnghia) - Mark Harris (@harrism) URL: https://github.com/rapidsai/cudf/pull/7693 --- cpp/benchmarks/CMakeLists.txt | 1 + cpp/benchmarks/text/ngrams_benchmark.cpp | 76 ++++++++++++++++++++++++ cpp/src/text/generate_ngrams.cu | 37 ++++-------- 3 files changed, 87 insertions(+), 27 deletions(-) create mode 100644 cpp/benchmarks/text/ngrams_benchmark.cpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 43ca6de11b4..5aa7e0132f8 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -175,6 +175,7 @@ ConfigureBench(BINARYOP_BENCH binaryop/binaryop_benchmark.cu) ################################################################################################### # - nvtext benchmark ------------------------------------------------------------------- ConfigureBench(TEXT_BENCH + text/ngrams_benchmark.cpp text/normalize_benchmark.cpp text/normalize_spaces_benchmark.cpp text/replace_benchmark.cpp diff --git a/cpp/benchmarks/text/ngrams_benchmark.cpp b/cpp/benchmarks/text/ngrams_benchmark.cpp new file mode 100644 index 00000000000..1fe8e3b7f2e --- /dev/null +++ b/cpp/benchmarks/text/ngrams_benchmark.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +class TextNGrams : public cudf::benchmark { +}; + +enum class ngrams_type { tokens, characters }; + +static void BM_ngrams(benchmark::State& state, ngrams_type nt) +{ + auto const n_rows = static_cast(state.range(0)); + auto const max_str_length = static_cast(state.range(1)); + data_profile table_profile; + table_profile.set_distribution_params( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + auto const table = + create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); + cudf::strings_column_view input(table->view().column(0)); + + for (auto _ : state) { + cuda_event_timer raii(state, true, 0); + switch (nt) { + case ngrams_type::tokens: nvtext::generate_ngrams(input); break; + case ngrams_type::characters: nvtext::generate_character_ngrams(input); break; + } + } + + state.SetBytesProcessed(state.iterations() * input.chars_size()); +} + +static void generate_bench_args(benchmark::internal::Benchmark* b) +{ + int const min_rows = 1 << 12; + int const max_rows = 1 << 24; + int const row_mult = 8; + int const min_rowlen = 5; + int const max_rowlen = 40; + int const len_mult = 2; + generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); +} + +#define NVTEXT_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(TextNGrams, name) \ + (::benchmark::State & st) { BM_ngrams(st, ngrams_type::name); } \ + BENCHMARK_REGISTER_F(TextNGrams, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +NVTEXT_BENCHMARK_DEFINE(tokens) +NVTEXT_BENCHMARK_DEFINE(characters) diff --git a/cpp/src/text/generate_ngrams.cu b/cpp/src/text/generate_ngrams.cu index 3c583622ed8..4a41dacbd30 100644 --- a/cpp/src/text/generate_ngrams.cu +++ b/cpp/src/text/generate_ngrams.cu @@ -50,7 +50,7 @@ struct ngram_generator_fn { cudf::column_device_view const d_strings; cudf::size_type ngrams; cudf::string_view const d_separator; - int32_t const* d_offsets{}; + int32_t* d_offsets{}; char* d_chars{}; /** @@ -62,7 +62,7 @@ struct ngram_generator_fn { * @param idx Index of the kernel thread. * @return Number of bytes required for the string for this thread. */ - __device__ cudf::size_type operator()(cudf::size_type idx) + __device__ void operator()(cudf::size_type idx) { char* out_ptr = d_chars ? d_chars + d_offsets[idx] : nullptr; cudf::size_type bytes = 0; @@ -74,7 +74,7 @@ struct ngram_generator_fn { bytes += d_separator.size_bytes(); if (out_ptr) out_ptr = cudf::strings::detail::copy_string(out_ptr, d_separator); } - return bytes; + if (!d_chars) d_offsets[idx] = bytes; } }; @@ -109,11 +109,11 @@ std::unique_ptr generate_ngrams( if (d_strings.is_null(idx)) return false; return !d_strings.element(idx).empty(); }, - stream, - mr) + stream) ->release(); strings_count = table_offsets.front()->size() - 1; - return std::move(table_offsets.front()); + auto result = std::move(table_offsets.front()); + return result; }(); // this allows freeing the temporary table_offsets CUDF_EXPECTS(strings_count >= ngrams, "Insufficient number of strings to generate ngrams"); @@ -131,30 +131,13 @@ std::unique_ptr generate_ngrams( // compute the number of strings of ngrams auto const ngrams_count = strings_count - ngrams + 1; - // build output offsets by computing the output bytes for each generated ngram - auto offsets_transformer_itr = cudf::detail::make_counting_transform_iterator( - 0, ngram_generator_fn{d_strings, ngrams, d_separator}); - auto offsets_column = cudf::strings::detail::make_offsets_child_column( - offsets_transformer_itr, offsets_transformer_itr + ngrams_count, stream, mr); - auto d_offsets = offsets_column->view().data(); - - // build the chars column - // generate the ngrams from the input strings and copy them into the chars data buffer - cudf::size_type const total_bytes = thrust::device_pointer_cast(d_offsets)[ngrams_count]; - auto chars_column = - cudf::strings::detail::create_chars_child_column(ngrams_count, 0, total_bytes, stream, mr); - char* const d_chars = chars_column->mutable_view().data(); - - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - ngrams_count, - ngram_generator_fn{d_strings, ngrams, d_separator, d_offsets, d_chars}); - chars_column->set_null_count(0); + auto children = cudf::strings::detail::make_strings_children( + ngram_generator_fn{d_strings, ngrams, d_separator}, ngrams_count, 0, stream, mr); // make the output strings column from the offsets and chars column return cudf::make_strings_column(ngrams_count, - std::move(offsets_column), - std::move(chars_column), + std::move(children.first), + std::move(children.second), 0, rmm::device_buffer{0, stream, mr}, stream, From 54dfaaa9e99a15e6e8f76106adba842f424fb160 Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Mon, 29 Mar 2021 12:13:04 -0500 Subject: [PATCH 23/24] Create and promote library aliases in libcudf installations (#7734) This PR ensures all `cudf::*` library aliases are created and promoted to `IMPORTED_GLOBAL` when `find_package(cudf)` finds cudf in a local build directory. ~This PR shouldn't affect CI or the targets you'd see when `libcudf` is installed (e.g. by conda), only local source builds.~ edit: This now fixes `cudf::*` alias targets for the `libcudf` installations too, needed by https://github.com/rapidsai/cuspatial/pull/365. Validation method: ```shell $ docker run --rm -it \ -w /tmp/findpackagecudf \ -v "/tmp/findpackagecudf:/tmp/findpackagecudf" \ gpuci/miniconda-cuda:10.2-devel-ubuntu18.04 bash # Set up mamba environment conda install -y -n base -c conda-forge mamba mamba update -y -n base -c defaults conda && mamba update -y -n base -c conda-forge mamba mamba install -y -n base -c conda-forge -c rapidsai-nightly \ git gtest gmock ninja cmake=3.18 gdal=3.0.2 boost-cpp=1.72.0 cudatoolkit=10.2 libcudf=0.19 # Copy changes in this PR (from the host) to container's /opt/conda/lib/cmake/cudf # cmake --install $CUDF_ROOT --prefix $CUDF_ROOT/local-install # docker cp $CUDF_ROOT/local-install/lib/cmake/cudf frosty_agnesi:/opt/conda/lib/cmake/ # Clone cuspatial git clone https://github.com/trxcllnt/cuspatial.git && cd cuspatial && git checkout fix/cmake-exports # Configure cuspatial rm -rf cpp/build && mkdir -p cpp/build \ && cmake -GNinja -B cpp/build -S cpp \ -DBUILD_TESTS=ON -DBUILD_BENCHMARKS=ON -DCMAKE_CUDA_ARCHITECTURES= ``` Authors: - Paul Taylor (@trxcllnt) - Robert Maynard (@robertmaynard) Approvers: - Robert Maynard (@robertmaynard) - Keith Kraus (@kkraus14) - Ray Douglass (@raydouglass) URL: https://github.com/rapidsai/cudf/pull/7734 --- conda/recipes/libcudf/meta.yaml | 2 +- cpp/CMakeLists.txt | 15 ++++--- cpp/cmake/cudf-build-config.cmake.in | 44 ++++++++++++++++--- cpp/cmake/cudf-config.cmake.in | 28 ++++++------ cpp/cmake/thirdparty/CUDF_GetGTest.cmake | 10 +---- cpp/cmake/thirdparty/CUDF_GetRMM.cmake | 5 --- .../cmake/thirdparty/CUDF_KAFKA_GetCUDF.cmake | 6 --- cpp/libcudf_kafka/tests/CMakeLists.txt | 6 +-- 8 files changed, 62 insertions(+), 54 deletions(-) diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 1be8a6b450a..39587b4bd05 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -33,7 +33,7 @@ build: requirements: build: - - cmake >=3.17.0 + - cmake >=3.18 host: - librmm {{ minor_version }}.* - cudatoolkit {{ cuda_version }}.* diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index fc439ebfa7f..48562476070 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -554,12 +554,6 @@ if(CUDF_BUILD_BENCHMARKS) GIT_SHALLOW TRUE OPTIONS "BENCHMARK_ENABLE_TESTING OFF" "BENCHMARK_ENABLE_INSTALL OFF") - if(benchmark_ADDED) - install(TARGETS benchmark - benchmark_main - DESTINATION lib - EXPORT cudf-targets) - endif() add_subdirectory(benchmarks) endif() @@ -636,6 +630,15 @@ elseif(TARGET arrow_static) endif() endif() +if(TARGET gtest) + get_target_property(gtest_is_imported gtest IMPORTED) + if(NOT gtest_is_imported) + export(TARGETS gtest gmock gtest_main gmock_main + FILE ${CUDF_BINARY_DIR}/cudf-gtesting-targets.cmake + NAMESPACE GTest::) + endif() +endif() + export(EXPORT cudf-targets FILE ${CUDF_BINARY_DIR}/cudf-targets.cmake NAMESPACE cudf::) diff --git a/cpp/cmake/cudf-build-config.cmake.in b/cpp/cmake/cudf-build-config.cmake.in index d0c5a608e45..ed1926f20f0 100644 --- a/cpp/cmake/cudf-build-config.cmake.in +++ b/cpp/cmake/cudf-build-config.cmake.in @@ -2,6 +2,22 @@ cmake_minimum_required(VERSION 3.18) +set(_possible_targets_to_promote + cudf::cudf + GTest::gmock + GTest::gmock_main + GTest::gtest + GTest::gtest_main + cudf::cudftestutil + rmm::rmm + arrow_shared + arrow_cuda_shared ) +foreach(target IN LISTS _possible_targets_to_promote) + if(NOT TARGET ${target}) + list(APPEND _targets_to_promote ${target}) + endif() +endforeach() + set(CUDF_VERSION @CUDF_VERSION@) set(CUDF_VERSION_MAJOR @CUDF_VERSION_MAJOR@) set(CUDF_VERSION_MINOR @CUDF_VERSION_MINOR@) @@ -36,21 +52,29 @@ include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_GetThrust.cmake) # find rmm set(CUDF_MIN_VERSION_rmm "${CUDF_VERSION_MAJOR}.${CUDF_VERSION_MINOR}") include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_GetRMM.cmake) -# find gtest -include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_GetGTest.cmake) # find arrow -if(NOT EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-arrow-targets.cmake") +if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-arrow-targets.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/cudf-arrow-targets.cmake") +else() + if(NOT DEFINED CUDF_USE_ARROW_STATIC) + set(CUDF_USE_ARROW_STATIC OFF) + endif() include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_GetArrow.cmake) endif() +# find GTest +if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-gtesting-targets.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/cudf-gtesting-targets.cmake") +else() + # find gtest + include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_GetGTest.cmake) +endif() + list(POP_FRONT CMAKE_MODULE_PATH) -if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-arrow-targets.cmake") - include("${CMAKE_CURRENT_LIST_DIR}/cudf-arrow-targets.cmake") -endif() -include("${CMAKE_CURRENT_LIST_DIR}/cudf-targets.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/cudf-targets.cmake") if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-testing-targets.cmake") include("${CMAKE_CURRENT_LIST_DIR}/cudf-testing-targets.cmake") endif() @@ -59,6 +83,12 @@ include("${CMAKE_CURRENT_LIST_DIR}/cudf-config-version.cmake") check_required_components(cudf) +foreach(target IN LISTS _targets_to_promote) + if(TARGET ${target}) + fix_cmake_global_defaults(${target}) + endif() +endforeach() + set(${CMAKE_FIND_PACKAGE_NAME}_CONFIG "${CMAKE_CURRENT_LIST_FILE}") include(FindPackageHandleStandardArgs) diff --git a/cpp/cmake/cudf-config.cmake.in b/cpp/cmake/cudf-config.cmake.in index 6a280264d3c..66c669851fa 100644 --- a/cpp/cmake/cudf-config.cmake.in +++ b/cpp/cmake/cudf-config.cmake.in @@ -26,11 +26,6 @@ This module offers an optional testing component which defines the following IMPORTED GLOBAL targets: cudf::cudftestutil - The main cudf testing library - cudf::gmock - cudf::gmock_main - cudf::gtest - cudf::gtest_main - Result Variables ^^^^^^^^^^^^^^^^ @@ -49,13 +44,11 @@ cmake_minimum_required(VERSION 3.18) set(_possible_targets_to_promote cudf::cudf - cudf::benchmark - cudf::benchmark_main - cudf::gmock - cudf::gtest - cudf::gmock_main - cudf::gtest_main cudf::cudftestutil + GTest::gmock + GTest::gmock_main + GTest::gtest + GTest::gtest_main rmm::rmm arrow_shared arrow_cuda_shared ) @@ -101,17 +94,22 @@ include("${CMAKE_CURRENT_LIST_DIR}/cudf-targets.cmake") if(testing IN_LIST cudf_FIND_COMPONENTS) enable_language(CUDA) - find_dependency(GTest @CUDF_MIN_VERSION_GTest@) + find_dependency(GTest @CUDF_MIN_VERSION_GTest@ CONFIG) + include("${CMAKE_CURRENT_LIST_DIR}/cudf-testing-targets.cmake") + endif() include("${CMAKE_CURRENT_LIST_DIR}/cudf-config-version.cmake") check_required_components(cudf) -foreach(t IN LISTS _targets_to_promote) - if(TARGET ${t}) - set_target_properties(${t} PROPERTIES IMPORTED_GLOBAL TRUE) +foreach(target IN LISTS _targets_to_promote) + if(TARGET ${target}) + get_target_property(_already_global ${target} IMPORTED_GLOBAL) + if(NOT _already_global) + set_target_properties(${target} PROPERTIES IMPORTED_GLOBAL TRUE) + endif() endif() endforeach() set(${CMAKE_FIND_PACKAGE_NAME}_CONFIG "${CMAKE_CURRENT_LIST_FILE}") diff --git a/cpp/cmake/thirdparty/CUDF_GetGTest.cmake b/cpp/cmake/thirdparty/CUDF_GetGTest.cmake index 666ba0fbb2c..9e4f3c137b1 100644 --- a/cpp/cmake/thirdparty/CUDF_GetGTest.cmake +++ b/cpp/cmake/thirdparty/CUDF_GetGTest.cmake @@ -26,7 +26,7 @@ function(find_and_configure_gtest VERSION) GIT_REPOSITORY https://github.com/google/googletest.git GIT_TAG release-${VERSION} GIT_SHALLOW TRUE - OPTIONS "INSTALL_GTEST OFF" + OPTIONS "INSTALL_GTEST ON" # googletest >= 1.10.0 provides a cmake config file -- use it if it exists FIND_PACKAGE_ARGUMENTS "CONFIG") # Add GTest aliases if they don't already exist. @@ -43,14 +43,6 @@ function(find_and_configure_gtest VERSION) fix_cmake_global_defaults(GTest::gmock) fix_cmake_global_defaults(GTest::gtest_main) fix_cmake_global_defaults(GTest::gmock_main) - if(GTest_ADDED) - install(TARGETS gmock - gtest - gmock_main - gtest_main - DESTINATION lib - EXPORT cudf-testing-targets) - endif() endfunction() set(CUDF_MIN_VERSION_GTest 1.10.0) diff --git a/cpp/cmake/thirdparty/CUDF_GetRMM.cmake b/cpp/cmake/thirdparty/CUDF_GetRMM.cmake index e5d1f2f07a9..136947674f9 100644 --- a/cpp/cmake/thirdparty/CUDF_GetRMM.cmake +++ b/cpp/cmake/thirdparty/CUDF_GetRMM.cmake @@ -55,11 +55,6 @@ function(find_and_configure_rmm VERSION) # Make sure consumers of cudf can also see rmm::rmm fix_cmake_global_defaults(rmm::rmm) - - if(NOT rmm_BINARY_DIR IN_LIST CMAKE_PREFIX_PATH) - list(APPEND CMAKE_PREFIX_PATH "${rmm_BINARY_DIR}") - set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} PARENT_SCOPE) - endif() endfunction() set(CUDF_MIN_VERSION_rmm "${CUDF_VERSION_MAJOR}.${CUDF_VERSION_MINOR}") diff --git a/cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetCUDF.cmake b/cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetCUDF.cmake index 4796495413e..1f7c15d4f75 100644 --- a/cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetCUDF.cmake +++ b/cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetCUDF.cmake @@ -40,12 +40,6 @@ function(find_and_configure_cudf VERSION) "BUILD_BENCHMARKS OFF") cudfkafka_restore_if_enabled(BUILD_TESTS) cudfkafka_restore_if_enabled(BUILD_BENCHMARKS) - - if(NOT cudf_BINARY_DIR IN_LIST CMAKE_PREFIX_PATH) - list(APPEND CMAKE_PREFIX_PATH "${cudf_BINARY_DIR}") - set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} PARENT_SCOPE) - endif() - endfunction() set(CUDF_KAFKA_MIN_VERSION_cudf 0.19) diff --git a/cpp/libcudf_kafka/tests/CMakeLists.txt b/cpp/libcudf_kafka/tests/CMakeLists.txt index f556d36d9d2..e813ed5439e 100644 --- a/cpp/libcudf_kafka/tests/CMakeLists.txt +++ b/cpp/libcudf_kafka/tests/CMakeLists.txt @@ -21,11 +21,7 @@ function(ConfigureTest CMAKE_TEST_NAME ) add_executable(${CMAKE_TEST_NAME} ${ARGN}) set_target_properties(${CMAKE_TEST_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$") - if(TARGET cudf::gmock_main) - target_link_libraries(${CMAKE_TEST_NAME} PRIVATE cudf::gmock_main cudf::gtest_main cudf_kafka) - else() - target_link_libraries(${CMAKE_TEST_NAME} PRIVATE GTest::gmock_main GTest::gtest_main cudf_kafka) - endif() + target_link_libraries(${CMAKE_TEST_NAME} PRIVATE GTest::gmock_main GTest::gtest_main cudf_kafka) target_include_directories(${CMAKE_TEST_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../include) add_test(NAME ${CMAKE_TEST_NAME} COMMAND ${CMAKE_TEST_NAME}) endfunction() From cddafd9b1dd3ab815020a513626a611cd8a50de0 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Mon, 29 Mar 2021 12:35:27 -0500 Subject: [PATCH 24/24] Add replacements column support for Java replaceNulls (#7750) Adds Java bindings for `cudf::replace_nulls` with a columnar replacement parameter. Authors: - Jason Lowe (@jlowe) Approvers: - Robert (Bobby) Evans (@revans2) URL: https://github.com/rapidsai/cudf/pull/7750 --- .../main/java/ai/rapids/cudf/ColumnView.java | 18 ++++++- java/src/main/native/src/ColumnViewJni.cpp | 20 +++++++- .../java/ai/rapids/cudf/ColumnVectorTest.java | 50 ++++++++++++++++--- 3 files changed, 78 insertions(+), 10 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index b29b873092d..90fe3553abc 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -388,7 +388,19 @@ public final ColumnVector findAndReplaceAll(ColumnView oldValues, ColumnView new * @return - ColumnVector with nulls replaced by scalar */ public final ColumnVector replaceNulls(Scalar scalar) { - return new ColumnVector(replaceNulls(getNativeView(), scalar.getScalarHandle())); + return new ColumnVector(replaceNullsScalar(getNativeView(), scalar.getScalarHandle())); + } + + /** + * Returns a ColumnVector with any null values replaced with the corresponding row in the + * specified replacement column. + * This column and the replacement column must have the same type and number of rows. + * + * @param replacements column of replacement values + * @return column with nulls replaced by corresponding row of replacements column + */ + public final ColumnVector replaceNulls(ColumnView replacements) { + return new ColumnVector(replaceNullsColumn(getNativeView(), replacements.getNativeView())); } /** @@ -2840,7 +2852,9 @@ private static native long rollingWindow( private static native long charLengths(long viewHandle) throws CudfException; - private static native long replaceNulls(long viewHandle, long scalarHandle) throws CudfException; + private static native long replaceNullsScalar(long viewHandle, long scalarHandle) throws CudfException; + + private static native long replaceNullsColumn(long viewHandle, long replaceViewHandle) throws CudfException; private static native long ifElseVV(long predVec, long trueVec, long falseVec) throws CudfException; diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 3928794b55c..dc1acc50b5f 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -121,8 +121,9 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_lowerStrings(JNIEnv *env, CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_replaceNulls(JNIEnv *env, jclass, - jlong j_col, jlong j_scalar) { +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_replaceNullsScalar(JNIEnv *env, jclass, + jlong j_col, + jlong j_scalar) { JNI_NULL_CHECK(env, j_col, "column is null", 0); JNI_NULL_CHECK(env, j_scalar, "scalar is null", 0); try { @@ -135,6 +136,21 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_replaceNulls(JNIEnv *env, CATCH_STD(env, 0); } +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_replaceNullsColumn(JNIEnv *env, jclass, + jlong j_col, + jlong j_replace_col) { + JNI_NULL_CHECK(env, j_col, "column is null", 0); + JNI_NULL_CHECK(env, j_replace_col, "replacement column is null", 0); + try { + cudf::jni::auto_set_device(env); + auto col = reinterpret_cast(j_col); + auto replacements = reinterpret_cast(j_replace_col); + std::unique_ptr result = cudf::replace_nulls(*col, *replacements); + return reinterpret_cast(result.release()); + } + CATCH_STD(env, 0); +} + JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_ifElseVV(JNIEnv *env, jclass, jlong j_pred_vec, jlong j_true_vec, diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 5a9404f5760..fe1cba5ceb1 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -1368,7 +1368,7 @@ void testFromScalarNullByte() { } @Test - void testReplaceEmptyColumn() { + void testReplaceNullsScalarEmptyColumn() { try (ColumnVector input = ColumnVector.fromBoxedBooleans(); ColumnVector expected = ColumnVector.fromBoxedBooleans(); Scalar s = Scalar.fromBool(false); @@ -1378,7 +1378,7 @@ void testReplaceEmptyColumn() { } @Test - void testReplaceNullBoolsWithAllNulls() { + void testReplaceNullsScalarBoolsWithAllNulls() { try (ColumnVector input = ColumnVector.fromBoxedBooleans(null, null, null, null); ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, false, false); Scalar s = Scalar.fromBool(false); @@ -1388,7 +1388,7 @@ void testReplaceNullBoolsWithAllNulls() { } @Test - void testReplaceSomeNullBools() { + void testReplaceNullsScalarSomeNullBools() { try (ColumnVector input = ColumnVector.fromBoxedBooleans(false, null, null, false); ColumnVector expected = ColumnVector.fromBoxedBooleans(false, true, true, false); Scalar s = Scalar.fromBool(true); @@ -1398,7 +1398,7 @@ void testReplaceSomeNullBools() { } @Test - void testReplaceNullIntegersWithAllNulls() { + void testReplaceNullsScalarIntegersWithAllNulls() { try (ColumnVector input = ColumnVector.fromBoxedInts(null, null, null, null); ColumnVector expected = ColumnVector.fromBoxedInts(0, 0, 0, 0); Scalar s = Scalar.fromInt(0); @@ -1408,7 +1408,7 @@ void testReplaceNullIntegersWithAllNulls() { } @Test - void testReplaceSomeNullIntegers() { + void testReplaceNullsScalarSomeNullIntegers() { try (ColumnVector input = ColumnVector.fromBoxedInts(1, 2, null, 4, null); ColumnVector expected = ColumnVector.fromBoxedInts(1, 2, 999, 4, 999); Scalar s = Scalar.fromInt(999); @@ -1418,7 +1418,7 @@ void testReplaceSomeNullIntegers() { } @Test - void testReplaceNullsFailsOnTypeMismatch() { + void testReplaceNullsScalarFailsOnTypeMismatch() { try (ColumnVector input = ColumnVector.fromBoxedInts(1, 2, null, 4, null); Scalar s = Scalar.fromBool(true)) { assertThrows(CudfException.class, () -> input.replaceNulls(s).close()); @@ -1434,6 +1434,44 @@ void testReplaceNullsWithNullScalar() { } } + @Test + void testReplaceNullsColumnEmptyColumn() { + try (ColumnVector input = ColumnVector.fromBoxedBooleans(); + ColumnVector r = ColumnVector.fromBoxedBooleans(); + ColumnVector expected = ColumnVector.fromBoxedBooleans(); + ColumnVector result = input.replaceNulls(r)) { + assertColumnsAreEqual(expected, result); + } + } + + @Test + void testReplaceNullsColumnBools() { + try (ColumnVector input = ColumnVector.fromBoxedBooleans(null, true, null, false); + ColumnVector r = ColumnVector.fromBoxedBooleans(false, null, true, true); + ColumnVector expected = ColumnVector.fromBoxedBooleans(false, true, true, false); + ColumnVector result = input.replaceNulls(r)) { + assertColumnsAreEqual(expected, result); + } + } + + @Test + void testReplaceNullsColumnIntegers() { + try (ColumnVector input = ColumnVector.fromBoxedInts(1, 2, null, 4, null); + ColumnVector r = ColumnVector.fromBoxedInts(996, 997, 998, 909, null); + ColumnVector expected = ColumnVector.fromBoxedInts(1, 2, 998, 4, null); + ColumnVector result = input.replaceNulls(r)) { + assertColumnsAreEqual(expected, result); + } + } + + @Test + void testReplaceNullsColumnFailsOnTypeMismatch() { + try (ColumnVector input = ColumnVector.fromBoxedInts(1, 2, null, 4, null); + ColumnVector r = ColumnVector.fromBoxedBooleans(true)) { + assertThrows(CudfException.class, () -> input.replaceNulls(r).close()); + } + } + static QuantileMethod[] methods = {LINEAR, LOWER, HIGHER, MIDPOINT, NEAREST}; static double[] quantiles = {0.0, 0.25, 0.33, 0.5, 1.0};