From 0a00579350e37e9fb9c159f22c4def8c60c6f8f5 Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Wed, 25 Aug 2021 13:15:27 -0500 Subject: [PATCH 1/8] Remove -g from cython compile commands (#9074) Removes `-g` from the compile commands generated by distutils to compile Cython files. This will make our container images, conda packages, and python wheels smaller. --- ci/gpu/build.sh | 4 +- conda/environments/cudf_dev_cuda11.0.yml | 4 +- conda/environments/cudf_dev_cuda11.2.yml | 4 +- python/cudf/setup.py | 81 ++++++++++++++++-------- 4 files changed, 62 insertions(+), 31 deletions(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 8ebc85e5736..d825de2ad04 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -101,8 +101,8 @@ function install_dask { # Install the main version of dask, distributed, and streamz gpuci_logger "Install the main version of dask, distributed, and streamz" set -x - pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps - pip install "git+https://github.com/dask/dask.git@main" --upgrade --no-deps + pip install "git+https://github.com/dask/distributed.git@2021.07.1" --upgrade --no-deps + pip install "git+https://github.com/dask/dask.git@2021.07.1" --upgrade --no-deps # Need to uninstall streamz that is already in the env. pip uninstall -y streamz pip install "git+https://github.com/python-streamz/streamz.git@master" --upgrade --no-deps diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml index 62b59c3f081..d8635b09f8b 100644 --- a/conda/environments/cudf_dev_cuda11.0.yml +++ b/conda/environments/cudf_dev_cuda11.0.yml @@ -58,7 +58,7 @@ dependencies: - cachetools - transformers - pip: - - git+https://github.com/dask/dask.git@main - - git+https://github.com/dask/distributed.git@main + - git+https://github.com/dask/dask.git@2021.07.1 + - git+https://github.com/dask/distributed.git@2021.07.1 - git+https://github.com/python-streamz/streamz.git@master - pyorc diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml index 94c7116802b..61af2f8aef1 100644 --- a/conda/environments/cudf_dev_cuda11.2.yml +++ b/conda/environments/cudf_dev_cuda11.2.yml @@ -58,7 +58,7 @@ dependencies: - cachetools - transformers - pip: - - git+https://github.com/dask/dask.git@main - - git+https://github.com/dask/distributed.git@main + - git+https://github.com/dask/dask.git@2021.07.1 + - git+https://github.com/dask/distributed.git@2021.07.1 - git+https://github.com/python-streamz/streamz.git@master - pyorc diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 54921396b6f..e9fd3ae9d1f 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -6,13 +6,24 @@ import subprocess import sys import sysconfig + +# Must import in this order: +# setuptools -> Cython.Distutils.build_ext -> setuptools.command.build_ext +# Otherwise, setuptools.command.build_ext ends up inheriting from +# Cython.Distutils.old_build_ext which we do not want +import setuptools + +try: + from Cython.Distutils.build_ext import new_build_ext as _build_ext +except ImportError: + from setuptools.command.build_ext import build_ext as _build_ext + from distutils.spawn import find_executable from distutils.sysconfig import get_python_lib import numpy as np import pyarrow as pa -from Cython.Build import cythonize -from Cython.Distutils import build_ext +import setuptools.command.build_ext from setuptools import find_packages, setup from setuptools.extension import Extension @@ -105,22 +116,46 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): ), ) -try: - nthreads = int(os.environ.get("PARALLEL_LEVEL", "0") or "0") -except Exception: - nthreads = 0 -cmdclass = versioneer.get_cmdclass() +class build_ext_and_proto_no_debug(_build_ext): + def build_extensions(self): + def remove_flags(compiler, *flags): + for flag in flags: + try: + compiler.compiler_so = list( + filter((flag).__ne__, compiler.compiler_so) + ) + except Exception: + pass + # Full optimization + self.compiler.compiler_so.append("-O3") + # Silence '-Wunknown-pragmas' warning + self.compiler.compiler_so.append("-Wno-unknown-pragmas") + # No debug symbols, full optimization, no '-Wstrict-prototypes' warning + remove_flags( + self.compiler, "-g", "-G", "-O1", "-O2", "-Wstrict-prototypes" + ) + super().build_extensions() -class build_ext_and_proto(build_ext): - def build_extensions(self): - try: - # Silence the '-Wstrict-prototypes' warning - self.compiler.compiler_so.remove("-Wstrict-prototypes") - except Exception: - pass - build_ext.build_extensions(self) + def finalize_options(self): + if self.distribution.ext_modules: + # Delay import this to allow for Cython-less installs + from Cython.Build.Dependencies import cythonize + + nthreads = getattr(self, "parallel", None) # -j option in Py3.5+ + nthreads = int(nthreads) if nthreads else None + self.distribution.ext_modules = cythonize( + self.distribution.ext_modules, + nthreads=nthreads, + force=self.force, + gdb_debug=False, + compiler_directives=dict( + profile=False, language_level=3, embedsignature=True + ), + ) + # Skip calling super() and jump straight to setuptools + setuptools.command.build_ext.build_ext.finalize_options(self) def run(self): # Get protoc @@ -158,11 +193,9 @@ def run(self): src.write(new_src_content) # Run original Cython build_ext command - build_ext.run(self) + _build_ext.run(self) -cmdclass["build_ext"] = build_ext_and_proto - extensions = [ Extension( "*", @@ -196,6 +229,10 @@ def run(self): ) ] +cmdclass = versioneer.get_cmdclass() +cmdclass["build_ext"] = build_ext_and_proto_no_debug + + setup( name="cudf", version=versioneer.get_version(), @@ -214,13 +251,7 @@ def run(self): ], # Include the separately-compiled shared library setup_requires=["cython", "protobuf"], - ext_modules=cythonize( - extensions, - nthreads=nthreads, - compiler_directives=dict( - profile=False, language_level=3, embedsignature=True - ), - ), + ext_modules=extensions, packages=find_packages(include=["cudf", "cudf.*"]), package_data=dict.fromkeys( find_packages(include=["cudf._lib*"]), ["*.pxd"], From 9da7c01bf394243ae37319277e83a8edda3b4c70 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Wed, 22 Sep 2021 12:37:56 -0400 Subject: [PATCH 2/8] Fix call to thrust::reduce_by_key in argmin/argmax libcudf groupby (#9263) Closes #9156 This PR simplifies the parameters when calling thrust::reduce_by_key for the argmin/argmax aggregations in cudf::groupby. The illegalMemoryAccess found in #9156 was due to invalid data being passed from thrust::reduce_by_key through to the BinaryPredicate function as documented in NVIDIA/thrust#1525 The invalid data being passed is only a real issue for strings columns where the device pointer was neither nullptr nor a valid address. The new logic provides only size_type values to thrust::reduce_by_key so invalid values can only be out-of-bounds for the input column which is easily checked before retrieving the string_view objects within the ArgMin and ArgMax operators. This the same as #9244 but based on 21.10 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Devavret Makkar (https://github.com/devavret) - Nghia Truong (https://github.com/ttnghia) - Robert Maynard (https://github.com/robertmaynard) URL: https://github.com/rapidsai/cudf/pull/9263 --- .../sort/group_single_pass_reduction_util.cuh | 96 ++++++------------- 1 file changed, 30 insertions(+), 66 deletions(-) diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index 8eccadd653e..db2ae5b5d8e 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -31,77 +31,50 @@ #include #include #include -#include -#include #include namespace cudf { namespace groupby { namespace detail { -// ArgMin binary operator with tuple of (value, index) +/** + * @brief ArgMin binary operator with index values into input column. + * + * @tparam T Type of the underlying column. Must support '<' operator. + */ template struct ArgMin { - CUDA_HOST_DEVICE_CALLABLE auto operator()(thrust::tuple const& lhs, - thrust::tuple const& rhs) const - { - if (thrust::get<1>(lhs) == cudf::detail::ARGMIN_SENTINEL) - return rhs; - else if (thrust::get<1>(rhs) == cudf::detail::ARGMIN_SENTINEL) - return lhs; - else - return thrust::get<0>(lhs) < thrust::get<0>(rhs) ? lhs : rhs; - } -}; - -// ArgMax binary operator with tuple of (value, index) -template -struct ArgMax { - CUDA_HOST_DEVICE_CALLABLE auto operator()(thrust::tuple const& lhs, - thrust::tuple const& rhs) const - { - if (thrust::get<1>(lhs) == cudf::detail::ARGMIN_SENTINEL) - return rhs; - else if (thrust::get<1>(rhs) == cudf::detail::ARGMIN_SENTINEL) - return lhs; - else - return thrust::get<0>(lhs) > thrust::get<0>(rhs) ? lhs : rhs; - } -}; - -struct get_tuple_second_element { - template - __device__ size_type operator()(thrust::tuple const& rhs) const + column_device_view const d_col; + CUDA_DEVICE_CALLABLE auto operator()(size_type const& lhs, size_type const& rhs) const { - return thrust::get<1>(rhs); + // The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and + // github.com/NVIDIA/thrust/issues/1525 + // where invalid random values may be passed here by thrust::reduce_by_key + if (lhs < 0 || lhs >= d_col.size() || d_col.is_null(lhs)) { return rhs; } + if (rhs < 0 || rhs >= d_col.size() || d_col.is_null(rhs)) { return lhs; } + return d_col.element(lhs) < d_col.element(rhs) ? lhs : rhs; } }; /** - * @brief Functor to store the boolean value to null mask. + * @brief ArgMax binary operator with index values into input column. + * + * @tparam T Type of the underlying column. Must support '<' operator. */ -struct bool_to_nullmask { - mutable_column_device_view d_result; - __device__ void operator()(size_type i, bool rhs) +template +struct ArgMax { + column_device_view const d_col; + CUDA_DEVICE_CALLABLE auto operator()(size_type const& lhs, size_type const& rhs) const { - if (rhs) { - d_result.set_valid(i); - } else { - d_result.set_null(i); - } + // The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and + // github.com/NVIDIA/thrust/issues/1525 + // where invalid random values may be passed here by thrust::reduce_by_key + if (lhs < 0 || lhs >= d_col.size() || d_col.is_null(lhs)) { return rhs; } + if (rhs < 0 || rhs >= d_col.size() || d_col.is_null(rhs)) { return lhs; } + return d_col.element(rhs) < d_col.element(lhs) ? lhs : rhs; } }; -/** - * @brief Returns index for non-null element, and SENTINEL for null element in a column. - * - */ -struct null_as_sentinel { - column_device_view const col; - size_type const SENTINEL; - __device__ size_type operator()(size_type i) const { return col.is_null(i) ? SENTINEL : i; } -}; - /** * @brief Value accessor for column which supports dictionary column too. * @@ -191,25 +164,16 @@ struct reduce_functor { auto resultview = mutable_column_device_view::create(result->mutable_view(), stream); auto valuesview = column_device_view::create(values, stream); if constexpr (K == aggregation::ARGMAX || K == aggregation::ARGMIN) { - constexpr auto SENTINEL = - (K == aggregation::ARGMAX ? cudf::detail::ARGMAX_SENTINEL : cudf::detail::ARGMIN_SENTINEL); - auto idx_begin = - cudf::detail::make_counting_transform_iterator(0, null_as_sentinel{*valuesview, SENTINEL}); - // dictionary keys are sorted, so dictionary32 index comparison is enough. - auto column_begin = valuesview->begin(); - auto begin = thrust::make_zip_iterator(thrust::make_tuple(column_begin, idx_begin)); - auto result_begin = thrust::make_transform_output_iterator(resultview->begin(), - get_tuple_second_element{}); using OpType = std::conditional_t<(K == aggregation::ARGMAX), ArgMax, ArgMin>; thrust::reduce_by_key(rmm::exec_policy(stream), group_labels.data(), group_labels.data() + group_labels.size(), - begin, + thrust::make_counting_iterator(0), thrust::make_discard_iterator(), - result_begin, - thrust::equal_to{}, - OpType{}); + resultview->begin(), + thrust::equal_to{}, + OpType{*valuesview}); } else { auto init = OpType::template identity(); auto begin = cudf::detail::make_counting_transform_iterator( From 20713dff41fd6668e6e631c148e86424597b4934 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 22 Sep 2021 09:55:34 -0700 Subject: [PATCH 3/8] Explicit about bitwidth difference between cudf boolean and arrow boolean (#9192) Currently, we map boolean type to `pa.int8` because the bitwidth of cudf boolean mismatches that in arrow. However the implication of this mapping is subtle and may cause unwanted result such as: ```python >>> cudf.StructDtype({ "a": np.bool_, "b": np.int8, }) StructDtype({'a': dtype('int8'), 'b': dtype('int8')}) ``` This PR changes the mapping back to `pa.bool_`, and use explicit type handling when we are dealing with type conversion to arrow. Authors: - Michael Wang (https://github.com/isVoid) Approvers: - https://github.com/brandon-b-miller - H. Thomson Comer (https://github.com/thomcom) URL: https://github.com/rapidsai/cudf/pull/9192 --- python/cudf/cudf/_lib/utils.pyx | 20 ++++++++++++++++++-- python/cudf/cudf/core/column/column.py | 5 +---- python/cudf/cudf/tests/test_dtypes.py | 9 +++++++++ python/cudf/cudf/utils/dtypes.py | 7 ++++++- 4 files changed, 34 insertions(+), 7 deletions(-) diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx index dd12c92a15a..810cdd51df5 100644 --- a/python/cudf/cudf/_lib/utils.pyx +++ b/python/cudf/cudf/_lib/utils.pyx @@ -1,5 +1,6 @@ # Copyright (c) 2020-2021, NVIDIA CORPORATION. +import numpy as np import pyarrow as pa import cudf @@ -81,7 +82,14 @@ cpdef generate_pandas_metadata(Table table, index): ): types.append(col.dtype.to_arrow()) else: - types.append(np_to_pa_dtype(col.dtype)) + # A boolean element takes 8 bits in cudf and 1 bit in + # pyarrow. To make sure the cudf format is interperable + # in arrow, we use `int8` type when converting from a + # cudf boolean array. + if col.dtype.type == np.bool_: + types.append(pa.int8()) + else: + types.append(np_to_pa_dtype(col.dtype)) # Indexes if index is not False: @@ -125,7 +133,15 @@ cpdef generate_pandas_metadata(Table table, index): elif is_list_dtype(idx): types.append(col.dtype.to_arrow()) else: - types.append(np_to_pa_dtype(idx.dtype)) + # A boolean element takes 8 bits in cudf and 1 bit in + # pyarrow. To make sure the cudf format is interperable + # in arrow, we use `int8` type when converting from a + # cudf boolean array. + if idx.dtype.type == np.bool_: + types.append(pa.int8()) + else: + types.append(np_to_pa_dtype(idx.dtype)) + index_levels.append(idx) col_names.append(name) index_descriptors.append(descr) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 8f18d83eb31..de278db919d 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -2089,10 +2089,7 @@ def as_column( data ) np_type = np.dtype(dtype).type - if np_type == np.bool_: - pa_type = pa.bool_() - else: - pa_type = np_to_pa_dtype(np.dtype(dtype)) + pa_type = np_to_pa_dtype(np.dtype(dtype)) data = as_column( pa.array( arbitrary, diff --git a/python/cudf/cudf/tests/test_dtypes.py b/python/cudf/cudf/tests/test_dtypes.py index d98ab0504cc..877cec24afa 100644 --- a/python/cudf/cudf/tests/test_dtypes.py +++ b/python/cudf/cudf/tests/test_dtypes.py @@ -324,3 +324,12 @@ def test_dtype(in_dtype, expect): def test_dtype_raise(in_dtype): with pytest.raises(TypeError): cudf.dtype(in_dtype) + + +def test_dtype_np_bool_to_pa_bool(): + """This test case captures that utility np_to_pa_dtype + should map np.bool_ to pa.bool_, nuances on bit width + difference should be handled elsewhere. + """ + + assert np_to_pa_dtype(np.dtype("bool")) == pa.bool_() diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py index 5100f1a9c49..bdaf5e144a5 100644 --- a/python/cudf/cudf/utils/dtypes.py +++ b/python/cudf/cudf/utils/dtypes.py @@ -14,6 +14,11 @@ from cudf.core._compat import PANDAS_GE_120 _NA_REP = "" + +"""Map numpy dtype to pyarrow types. +Note that np.bool_ bitwidth (8) is different from pa.bool_ (1). Special +handling is required when converting a Boolean column into arrow. +""" _np_pa_dtypes = { np.float64: pa.float64(), np.float32: pa.float32(), @@ -22,7 +27,7 @@ np.int32: pa.int32(), np.int16: pa.int16(), np.int8: pa.int8(), - np.bool_: pa.int8(), + np.bool_: pa.bool_(), np.uint64: pa.uint64(), np.uint32: pa.uint32(), np.uint16: pa.uint16(), From b0c8bbbab0c819881fab1323fb68f70971a224d7 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Wed, 22 Sep 2021 12:21:31 -0500 Subject: [PATCH 4/8] Fix Java column leak in testParquetWriteMap (#9271) Fixes a Java column vector leak in TableTest#testParquetWriteMap. Authors: - Jason Lowe (https://github.com/jlowe) Approvers: - Robert (Bobby) Evans (https://github.com/revans2) URL: https://github.com/rapidsai/cudf/pull/9271 --- java/src/test/java/ai/rapids/cudf/TableTest.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index cd1e433d07b..b69dce57180 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -6669,8 +6669,9 @@ void testParquetWriteMap() throws IOException { HostColumnVector.StructType structType = new HostColumnVector.StructType(true, Arrays.asList(new HostColumnVector.BasicType(true, DType.STRING), new HostColumnVector.BasicType(true, DType.STRING))); - try (Table t0 = new Table(ColumnVector.fromLists(new HostColumnVector.ListType(true, - structType), list1, list2, list3))) { + try (ColumnVector listColumn = ColumnVector.fromLists(new HostColumnVector.ListType(true, + structType), list1, list2, list3); + Table t0 = new Table(listColumn)) { try (TableWriter writer = Table.writeParquetChunked(options, f)) { writer.write(t0); } From ef5ba4cee31a5f335314b5ceec9d0db473aef7a0 Mon Sep 17 00:00:00 2001 From: Mike Wilson Date: Wed, 22 Sep 2021 14:31:11 -0400 Subject: [PATCH 5/8] Fixing empty input to getMapValue crashing (#9262) This changes the calls in java/cudf to check for an empty input and return an empty result instead of crashing. Fixes #9253 Authors: - Mike Wilson (https://github.com/hyperbolic2346) Approvers: - Jason Lowe (https://github.com/jlowe) URL: https://github.com/rapidsai/cudf/pull/9262 --- java/src/main/native/src/map_lookup.cu | 6 +++++- .../test/java/ai/rapids/cudf/ColumnVectorTest.java | 11 +++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/java/src/main/native/src/map_lookup.cu b/java/src/main/native/src/map_lookup.cu index ad791747713..683651799e7 100644 --- a/java/src/main/native/src/map_lookup.cu +++ b/java/src/main/native/src/map_lookup.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -183,6 +183,10 @@ std::unique_ptr map_lookup(column_view const &map_column, string_scalar // Defensive checks. map_input_check(map_column, stream); + if (map_column.size() == 0) { + return make_empty_column(cudf::data_type{cudf::type_id::STRING}); + } + lists_column_view lcv{map_column}; column_view structs_column = lcv.get_sliced_child(stream); // Two-pass plan: construct gather map, and then gather() on structs_column.child(1). Plan A. diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 0643776a546..d1af0d9a2f6 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -5412,6 +5412,17 @@ void testGetMapValue() { } } + @Test + void testGetMapValueEmptyInput() { + HostColumnVector.StructType structType = new HostColumnVector.StructType(true, Arrays.asList(new HostColumnVector.BasicType(true, DType.STRING), + new HostColumnVector.BasicType(true, DType.STRING))); + try (ColumnVector cv = ColumnVector.fromLists(new HostColumnVector.ListType(true, structType)); + ColumnVector res = cv.getMapValue(Scalar.fromString("a")); + ColumnVector expected = ColumnVector.fromStrings()) { + assertColumnsAreEqual(expected, res); + } + } + @Test void testGetMapKeyExistence() { List list1 = Arrays.asList(new HostColumnVector.StructData("a", "b")); From 08cbbcdcea2c9fb18e5614f5e29ba99b5443d38f Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Thu, 23 Sep 2021 00:28:03 +0530 Subject: [PATCH 6/8] Use nvcomp's snappy compressor in ORC writer (#9242) Issue #9205 depends on #9235 Authors: - Devavret Makkar (https://github.com/devavret) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Elias Stehle (https://github.com/elstehle) - https://github.com/nvdbaranec - Mike Wilson (https://github.com/hyperbolic2346) URL: https://github.com/rapidsai/cudf/pull/9242 --- cpp/src/io/orc/orc_common.h | 3 +- cpp/src/io/orc/orc_gpu.h | 6 ++- cpp/src/io/orc/stripe_enc.cu | 96 ++++++++++++++++++++++++++++++----- cpp/src/io/orc/stripe_init.cu | 16 +++--- cpp/src/io/orc/writer_impl.cu | 51 ++++++++++--------- 5 files changed, 126 insertions(+), 46 deletions(-) diff --git a/cpp/src/io/orc/orc_common.h b/cpp/src/io/orc/orc_common.h index ab6788d01f1..eedaa9d4fc2 100644 --- a/cpp/src/io/orc/orc_common.h +++ b/cpp/src/io/orc/orc_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,7 @@ namespace orc { // ORC rows are divided into groups and assigned indexes for faster seeking static constexpr uint32_t default_row_index_stride = 10000; +static constexpr uint32_t BLOCK_HEADER_SIZE = 3; enum CompressionKind : uint8_t { NONE = 0, diff --git a/cpp/src/io/orc/orc_gpu.h b/cpp/src/io/orc/orc_gpu.h index 30687331c15..88d7e26b3b6 100644 --- a/cpp/src/io/orc/orc_gpu.h +++ b/cpp/src/io/orc/orc_gpu.h @@ -355,6 +355,7 @@ void CompactOrcDataStreams(device_2dspan strm_desc, * @param[in] num_compressed_blocks Total number of compressed blocks * @param[in] compression Type of compression * @param[in] comp_blk_size Compression block size + * @param[in] max_comp_blk_size Max size of any block after compression * @param[in,out] strm_desc StripeStream device array [stripe][stream] * @param[in,out] enc_streams chunk streams device array [column][rowgroup] * @param[out] comp_in Per-block compression input parameters @@ -365,10 +366,11 @@ void CompressOrcDataStreams(uint8_t* compressed_data, uint32_t num_compressed_blocks, CompressionKind compression, uint32_t comp_blk_size, + uint32_t max_comp_blk_size, device_2dspan strm_desc, device_2dspan enc_streams, - gpu_inflate_input_s* comp_in, - gpu_inflate_status_s* comp_out, + device_span comp_in, + device_span comp_out, rmm::cuda_stream_view stream); /** diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index d50d3898c3b..9348d817dad 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -24,6 +24,9 @@ #include #include +#include + +#include namespace cudf { namespace io { @@ -1102,15 +1105,17 @@ __global__ void __launch_bounds__(1024) * @param[out] comp_out Per-block compression status * @param[in] compressed_bfr Compression output buffer * @param[in] comp_blk_size Compression block size + * @param[in] max_comp_blk_size Max size of any block after compression */ // blockDim {256,1,1} __global__ void __launch_bounds__(256) gpuInitCompressionBlocks(device_2dspan strm_desc, device_2dspan streams, // const? - gpu_inflate_input_s* comp_in, - gpu_inflate_status_s* comp_out, + device_span comp_in, + device_span comp_out, uint8_t* compressed_bfr, - uint32_t comp_blk_size) + uint32_t comp_blk_size, + uint32_t max_comp_blk_size) { __shared__ __align__(16) StripeStream ss; __shared__ uint8_t* volatile uncomp_base_g; @@ -1135,8 +1140,8 @@ __global__ void __launch_bounds__(256) uint32_t blk_size = min(comp_blk_size, ss.stream_size - min(b * comp_blk_size, ss.stream_size)); blk_in->srcDevice = src + b * comp_blk_size; blk_in->srcSize = blk_size; - blk_in->dstDevice = dst + b * (3 + comp_blk_size) + 3; // reserve 3 bytes for block header - blk_in->dstSize = blk_size; + blk_in->dstDevice = dst + b * (BLOCK_HEADER_SIZE + max_comp_blk_size) + BLOCK_HEADER_SIZE; + blk_in->dstSize = max_comp_blk_size; blk_out->bytes_written = blk_size; blk_out->status = 1; blk_out->reserved = 0; @@ -1153,14 +1158,16 @@ __global__ void __launch_bounds__(256) * @param[in] comp_out Per-block compression status * @param[in] compressed_bfr Compression output buffer * @param[in] comp_blk_size Compression block size + * @param[in] max_comp_blk_size Max size of any block after compression */ // blockDim {1024,1,1} __global__ void __launch_bounds__(1024) gpuCompactCompressedBlocks(device_2dspan strm_desc, - gpu_inflate_input_s* comp_in, - gpu_inflate_status_s* comp_out, + device_span comp_in, + device_span comp_out, uint8_t* compressed_bfr, - uint32_t comp_blk_size) + uint32_t comp_blk_size, + uint32_t max_comp_blk_size) { __shared__ __align__(16) StripeStream ss; __shared__ const uint8_t* volatile comp_src_g; @@ -1271,20 +1278,83 @@ void CompressOrcDataStreams(uint8_t* compressed_data, uint32_t num_compressed_blocks, CompressionKind compression, uint32_t comp_blk_size, + uint32_t max_comp_blk_size, device_2dspan strm_desc, device_2dspan enc_streams, - gpu_inflate_input_s* comp_in, - gpu_inflate_status_s* comp_out, + device_span comp_in, + device_span comp_out, rmm::cuda_stream_view stream) { dim3 dim_block_init(256, 1); dim3 dim_grid(strm_desc.size().first, strm_desc.size().second); gpuInitCompressionBlocks<<>>( - strm_desc, enc_streams, comp_in, comp_out, compressed_data, comp_blk_size); - if (compression == SNAPPY) { gpu_snap(comp_in, comp_out, num_compressed_blocks, stream); } + strm_desc, enc_streams, comp_in, comp_out, compressed_data, comp_blk_size, max_comp_blk_size); + if (compression == SNAPPY) { + auto env_use_nvcomp = std::getenv("LIBCUDF_USE_NVCOMP"); + bool use_nvcomp = env_use_nvcomp != nullptr ? std::atoi(env_use_nvcomp) : 0; + if (use_nvcomp) { + try { + size_t temp_size; + nvcompStatus_t nvcomp_status = nvcompBatchedSnappyCompressGetTempSize( + num_compressed_blocks, comp_blk_size, nvcompBatchedSnappyDefaultOpts, &temp_size); + + CUDF_EXPECTS(nvcomp_status == nvcompStatus_t::nvcompSuccess, + "Error in getting snappy compression scratch size"); + + rmm::device_buffer scratch(temp_size, stream); + rmm::device_uvector uncompressed_data_ptrs(num_compressed_blocks, stream); + rmm::device_uvector uncompressed_data_sizes(num_compressed_blocks, stream); + rmm::device_uvector compressed_data_ptrs(num_compressed_blocks, stream); + rmm::device_uvector compressed_bytes_written(num_compressed_blocks, stream); + + auto comp_it = thrust::make_zip_iterator(uncompressed_data_ptrs.begin(), + uncompressed_data_sizes.begin(), + compressed_data_ptrs.begin()); + thrust::transform(rmm::exec_policy(stream), + comp_in.begin(), + comp_in.end(), + comp_it, + [] __device__(gpu_inflate_input_s in) { + return thrust::make_tuple(in.srcDevice, in.srcSize, in.dstDevice); + }); + nvcomp_status = nvcompBatchedSnappyCompressAsync(uncompressed_data_ptrs.data(), + uncompressed_data_sizes.data(), + max_comp_blk_size, + num_compressed_blocks, + scratch.data(), + scratch.size(), + compressed_data_ptrs.data(), + compressed_bytes_written.data(), + nvcompBatchedSnappyDefaultOpts, + stream.value()); + + CUDF_EXPECTS(nvcomp_status == nvcompStatus_t::nvcompSuccess, "Error in snappy compression"); + + thrust::transform(rmm::exec_policy(stream), + compressed_bytes_written.begin(), + compressed_bytes_written.end(), + comp_out.begin(), + [] __device__(size_t size) { + gpu_inflate_status_s status{}; + status.bytes_written = size; + return status; + }); + } catch (...) { + // If we reach this then there was an error in compressing so set an error status for each + // block + thrust::for_each(rmm::exec_policy(stream), + comp_out.begin(), + comp_out.end(), + [] __device__(gpu_inflate_status_s & stat) { stat.status = 1; }); + }; + + } else { + gpu_snap(comp_in.data(), comp_out.data(), num_compressed_blocks, stream); + } + } dim3 dim_block_compact(1024, 1); gpuCompactCompressedBlocks<<>>( - strm_desc, comp_in, comp_out, compressed_data, comp_blk_size); + strm_desc, comp_in, comp_out, compressed_data, comp_blk_size, max_comp_blk_size); } } // namespace gpu diff --git a/cpp/src/io/orc/stripe_init.cu b/cpp/src/io/orc/stripe_init.cu index 94d8de6561b..d6dbdbe6403 100644 --- a/cpp/src/io/orc/stripe_init.cu +++ b/cpp/src/io/orc/stripe_init.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -52,13 +52,13 @@ extern "C" __global__ void __launch_bounds__(128, 8) gpuParseCompressedStripeDat uint32_t max_uncompressed_block_size = 0; uint32_t num_compressed_blocks = 0; uint32_t num_uncompressed_blocks = 0; - while (cur + 3 < end) { + while (cur + BLOCK_HEADER_SIZE < end) { uint32_t block_len = shuffle((lane_id == 0) ? cur[0] | (cur[1] << 8) | (cur[2] << 16) : 0); uint32_t is_uncompressed = block_len & 1; uint32_t uncompressed_size; gpu_inflate_input_s* init_ctl = nullptr; block_len >>= 1; - cur += 3; + cur += BLOCK_HEADER_SIZE; if (block_len > block_size || cur + block_len > end) { // Fatal num_compressed_blocks = 0; @@ -145,12 +145,12 @@ extern "C" __global__ void __launch_bounds__(128, 8) uint32_t num_compressed_blocks = 0; uint32_t max_compressed_blocks = s->info.num_compressed_blocks; - while (cur + 3 < end) { + while (cur + BLOCK_HEADER_SIZE < end) { uint32_t block_len = shuffle((lane_id == 0) ? cur[0] | (cur[1] << 8) | (cur[2] << 16) : 0); uint32_t is_uncompressed = block_len & 1; uint32_t uncompressed_size_est, uncompressed_size_actual; block_len >>= 1; - cur += 3; + cur += BLOCK_HEADER_SIZE; if (cur + block_len > end) { break; } if (is_uncompressed) { uncompressed_size_est = block_len; @@ -367,9 +367,11 @@ static __device__ void gpuMapRowIndexToUncompressed(rowindex_state_s* s, for (;;) { uint32_t block_len, is_uncompressed; - if (cur + 3 > end || cur + 3 >= start + compressed_offset) { break; } + if (cur + BLOCK_HEADER_SIZE > end || cur + BLOCK_HEADER_SIZE >= start + compressed_offset) { + break; + } block_len = cur[0] | (cur[1] << 8) | (cur[2] << 16); - cur += 3; + cur += BLOCK_HEADER_SIZE; is_uncompressed = block_len & 1; block_len >>= 1; cur += block_len; diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index e0018ed7166..8a0112deb76 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -36,6 +36,8 @@ #include #include +#include + #include #include #include @@ -999,10 +1001,10 @@ void writer::impl::write_index_stream(int32_t stripe_id, record.pos += stream.lengths[type]; while ((record.pos >= 0) && (record.blk_pos >= 0) && (static_cast(record.pos) >= compression_blocksize_) && - (record.comp_pos + 3 + comp_out[record.blk_pos].bytes_written < + (record.comp_pos + BLOCK_HEADER_SIZE + comp_out[record.blk_pos].bytes_written < static_cast(record.comp_size))) { record.pos -= compression_blocksize_; - record.comp_pos += 3 + comp_out[record.blk_pos].bytes_written; + record.comp_pos += BLOCK_HEADER_SIZE + comp_out[record.blk_pos].bytes_written; record.blk_pos += 1; } } @@ -1472,29 +1474,31 @@ void writer::impl::write(table_view const& table) } // Allocate intermediate output stream buffer - size_t compressed_bfr_size = 0; - size_t num_compressed_blocks = 0; - auto stream_output = [&]() { + size_t compressed_bfr_size = 0; + size_t num_compressed_blocks = 0; + size_t max_compressed_block_size = 0; + if (compression_kind_ != NONE) { + nvcompBatchedSnappyCompressGetMaxOutputChunkSize( + compression_blocksize_, nvcompBatchedSnappyDefaultOpts, &max_compressed_block_size); + } + auto stream_output = [&]() { size_t max_stream_size = 0; bool all_device_write = true; - for (size_t stripe_id = 0; stripe_id < segmentation.num_stripes(); stripe_id++) { - for (size_t i = 0; i < num_data_streams; i++) { // TODO range for (at least) - gpu::StripeStream* ss = &strm_descs[stripe_id][i]; - if (!out_sink_->is_device_write_preferred(ss->stream_size)) { all_device_write = false; } - size_t stream_size = ss->stream_size; - if (compression_kind_ != NONE) { - ss->first_block = num_compressed_blocks; - ss->bfr_offset = compressed_bfr_size; - - auto num_blocks = std::max( - (stream_size + compression_blocksize_ - 1) / compression_blocksize_, 1); - stream_size += num_blocks * 3; - num_compressed_blocks += num_blocks; - compressed_bfr_size += stream_size; - } - max_stream_size = std::max(max_stream_size, stream_size); + for (auto& ss : strm_descs.host_view().flat_view()) { + if (!out_sink_->is_device_write_preferred(ss.stream_size)) { all_device_write = false; } + size_t stream_size = ss.stream_size; + if (compression_kind_ != NONE) { + ss.first_block = num_compressed_blocks; + ss.bfr_offset = compressed_bfr_size; + + auto num_blocks = std::max( + (stream_size + compression_blocksize_ - 1) / compression_blocksize_, 1); + stream_size += num_blocks * BLOCK_HEADER_SIZE; + num_compressed_blocks += num_blocks; + compressed_bfr_size += (max_compressed_block_size + BLOCK_HEADER_SIZE) * num_blocks; } + max_stream_size = std::max(max_stream_size, stream_size); } if (all_device_write) { @@ -1519,10 +1523,11 @@ void writer::impl::write(table_view const& table) num_compressed_blocks, compression_kind_, compression_blocksize_, + max_compressed_block_size, strm_descs, enc_data.streams, - comp_in.device_ptr(), - comp_out.device_ptr(), + comp_in, + comp_out, stream); strm_descs.device_to_host(stream); comp_out.device_to_host(stream, true); From 10fd071dc12e35f02192d7bdd14af03221bb2ae9 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 22 Sep 2021 12:05:14 -0700 Subject: [PATCH 7/8] Add `dseries.struct.explode` (#9086) Closes #8660 Per discussions in thread #8872 , this PR adds a struct-accessor member function to provide a lateral view to a struct type series. Example: ```python >>> import cudf, dask_cudf as dgd >>> ds = dgd.from_cudf(cudf.Series( ... [{'a': 42, 'b': 'str1', 'c': [-1]}, ... {'a': 0, 'b': 'str2', 'c': [400, 500]}, ... {'a': 7, 'b': '', 'c': []}]), npartitions=2) >>> ds.struct.explode().compute() a b c 0 42 str1 [-1] 1 0 str2 [400, 500] 2 7 [] ``` Authors: - Michael Wang (https://github.com/isVoid) Approvers: - Richard (Rick) Zamora (https://github.com/rjzamora) URL: https://github.com/rapidsai/cudf/pull/9086 --- python/dask_cudf/dask_cudf/accessors.py | 26 +++++++++++++++++++ .../dask_cudf/tests/test_accessor.py | 15 +++++++++++ 2 files changed, 41 insertions(+) diff --git a/python/dask_cudf/dask_cudf/accessors.py b/python/dask_cudf/dask_cudf/accessors.py index 77973ee34ff..1c21fca51c8 100644 --- a/python/dask_cudf/dask_cudf/accessors.py +++ b/python/dask_cudf/dask_cudf/accessors.py @@ -37,6 +37,32 @@ def field(self, key): meta=self.d_series._meta._constructor([], dtype=typ), ) + def explode(self): + """ + Creates a dataframe view of the struct column, one column per field. + + Returns + ------- + DataFrame + + Examples + -------- + >>> import cudf, dask_cudf + >>> ds = dask_cudf.from_cudf(cudf.Series( + ... [{'a': 42, 'b': 'str1', 'c': [-1]}, + ... {'a': 0, 'b': 'str2', 'c': [400, 500]}, + ... {'a': 7, 'b': '', 'c': []}]), npartitions=2) + >>> ds.struct.explode().compute() + a b c + 0 42 str1 [-1] + 1 0 str2 [400, 500] + 2 7 [] + """ + return self.d_series.map_partitions( + lambda s: s.struct.explode(), + meta=self.d_series._meta.struct.explode(), + ) + class ListMethods: def __init__(self, d_series): diff --git a/python/dask_cudf/dask_cudf/tests/test_accessor.py b/python/dask_cudf/dask_cudf/tests/test_accessor.py index 805927dd474..1521ce41806 100644 --- a/python/dask_cudf/dask_cudf/tests/test_accessor.py +++ b/python/dask_cudf/dask_cudf/tests/test_accessor.py @@ -499,3 +499,18 @@ def test_dask_struct_field_Int_Error(data): with pytest.raises(IndexError): got.struct.field(1000).compute() + + +@pytest.mark.parametrize( + "data", + [ + [{}, {}, {}], + [{"a": 100, "b": "abc"}, {"a": 42, "b": "def"}, {"a": -87, "b": ""}], + [{"a": [1, 2, 3], "b": {"c": 101}}, {"a": [4, 5], "b": {"c": 102}}], + ], +) +def test_struct_explode(data): + expect = Series(data).struct.explode() + got = dgd.from_cudf(Series(data), 2).struct.explode() + + assert_eq(expect, got.compute()) From 1cb527f01aac631f4d44866b5474e503501d58cd Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Thu, 23 Sep 2021 01:26:59 +0530 Subject: [PATCH 8/8] Add shallow hash function and shallow equality comparison for column_view (#9185) Fixes #9140 Added `shallow_hash(column_view)` Added unit tests It computes hash values based on the shallow states of `column_view`: type, size, data pointer, null_mask pointer, offset, and the hash value of the children. `null_count` is not used since it is a cached value and it may vary based on contents of `null_mask`, and may be pre-computed or not. Fixes #9139 Added `is_shallow_equivalent(column_view, column_view)` ~shallow_equal~ Added unit tests It compares two column_views based on the shallow states of column_view: type, size, data pointer, null_mask pointer, offset, and the column_view of the children. null_count is not used since it is a cached value and it may vary based on contents of null_mask, and may be pre-computed or not. Authors: - Karthikeyan (https://github.com/karthikeyann) Approvers: - Mark Harris (https://github.com/harrism) - Vyas Ramasubramani (https://github.com/vyasr) - Jake Hemstad (https://github.com/jrhemstad) - David Wendt (https://github.com/davidwendt) URL: https://github.com/rapidsai/cudf/pull/9185 --- cpp/include/cudf/column/column_view.hpp | 41 ++ cpp/include/cudf/detail/hashing.hpp | 36 ++ .../cudf/detail/utilities/hash_functions.cuh | 12 + cpp/include/cudf_test/type_lists.hpp | 12 + cpp/src/column/column_view.cpp | 55 +++ cpp/tests/CMakeLists.txt | 1 + cpp/tests/column/column_view_shallow_test.cpp | 442 ++++++++++++++++++ 7 files changed, 599 insertions(+) create mode 100644 cpp/tests/column/column_view_shallow_test.cpp diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp index 7feaeafbad0..cd490c3c832 100644 --- a/cpp/include/cudf/column/column_view.hpp +++ b/cpp/include/cudf/column/column_view.hpp @@ -633,4 +633,45 @@ column_view bit_cast(column_view const& input, data_type type); */ mutable_column_view bit_cast(mutable_column_view const& input, data_type type); +namespace detail { +/** + * @brief Computes a hash value from the shallow state of the specified column + * + * For any two columns, if `is_shallow_equivalent(c0,c1)` then `shallow_hash(c0) == + * shallow_hash(c1)`. + * + * The complexity of computing the hash value of `input` is `O( count_descendants(input) )`, i.e., + * it is independent of the number of elements in the column. + * + * This function does _not_ inspect the elements of `input` nor access any device memory or launch + * any kernels. + * + * @param input The `column_view` to compute hash + * @return The hash value derived from the shallow state of `input`. + */ +std::size_t shallow_hash(column_view const& input); + +/** + * @brief Uses only shallow state to determine if two `column_view`s view equivalent columns + * + * Two columns are equivalent if for any operation `F` then: + * ``` + * is_shallow_equivalent(c0, c1) ==> The results of F(c0) and F(c1) are equivalent + * ``` + * For any two non-empty columns, `is_shallow_equivalent(c0,c1)` is true only if they view the exact + * same physical column. In other words, two physically independent columns may have exactly + * equivalent elements but their shallow state would not be equivalent. + * + * The complexity of this function is `O( min(count_descendants(lhs), count_descendants(rhs)) )`, + * i.e., it is independent of the number of elements in either column. + * + * This function does _not_ inspect the elements of `lhs` or `rhs` nor access any device memory nor + * launch any kernels. + * + * @param lhs The left `column_view` to compare + * @param rhs The right `column_view` to compare + * @return If `lhs` and `rhs` have equivalent shallow state + */ +bool is_shallow_equivalent(column_view const& lhs, column_view const& rhs); +} // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/detail/hashing.hpp index 83d6be14709..bd5c8a42a51 100644 --- a/cpp/include/cudf/detail/hashing.hpp +++ b/cpp/include/cudf/detail/hashing.hpp @@ -19,6 +19,9 @@ #include +#include +#include + namespace cudf { namespace detail { @@ -53,5 +56,38 @@ std::unique_ptr serial_murmur_hash3_32( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/* Copyright 2005-2014 Daniel James. + * + * Use, modification and distribution is subject to the Boost Software + * License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +/** + * @brief Combines two hashed values into a single hashed value. + * + * Adapted from Boost hash_combine function, modified for 64-bit + * https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html + * + * @param lhs The first hashed value + * @param rhs The second hashed value + * @return Combined hash value + */ +constexpr std::size_t hash_combine(std::size_t lhs, std::size_t rhs) +{ + lhs ^= rhs + 0x9e3779b97f4a7c15 + (lhs << 6) + (lhs >> 2); + return lhs; +} } // namespace detail } // namespace cudf + +// specialization of std::hash for cudf::data_type +namespace std { +template <> +struct hash { + std::size_t operator()(cudf::data_type const& type) const noexcept + { + return cudf::detail::hash_combine(std::hash{}(static_cast(type.id())), + std::hash{}(type.scale())); + } +}; +} // namespace std diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index 6eab13ae9af..65deadd6cd0 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -395,6 +395,12 @@ struct MurmurHash3_32 { return h; } + /* Copyright 2005-2014 Daniel James. + * + * Use, modification and distribution is subject to the Boost Software + * License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ /** * @brief Combines two hash values into a new single hash value. Called * repeatedly to create a hash value from several variables. @@ -795,6 +801,12 @@ struct IdentityHash { IdentityHash() = default; constexpr IdentityHash(uint32_t seed) : m_seed(seed) {} + /* Copyright 2005-2014 Daniel James. + * + * Use, modification and distribution is subject to the Boost Software + * License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ /** * @brief Combines two hash values into a new single hash value. Called * repeatedly to create a hash value from several variables. diff --git a/cpp/include/cudf_test/type_lists.hpp b/cpp/include/cudf_test/type_lists.hpp index 74688b7f133..982c94ac402 100644 --- a/cpp/include/cudf_test/type_lists.hpp +++ b/cpp/include/cudf_test/type_lists.hpp @@ -315,6 +315,18 @@ using FixedWidthTypesWithoutChrono = Concat; */ using ComparableTypes = Concat; +/** + * @brief Provides a list of all compound types for use in GTest typed tests. + * + * Example: + * ``` + * // Invokes all typed fixture tests for all compound types in libcudf + * TYPED_TEST_CASE(MyTypedFixture, cudf::test::CompoundTypes); + * ``` + */ +using CompoundTypes = + cudf::test::Types; + /** * @brief Provides a list of all types supported in libcudf for use in a GTest * typed test. diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp index 186669ae697..5749cb48c0e 100644 --- a/cpp/src/column/column_view.cpp +++ b/cpp/src/column/column_view.cpp @@ -15,6 +15,7 @@ */ #include +#include #include #include #include @@ -22,6 +23,7 @@ #include +#include #include #include #include @@ -76,6 +78,59 @@ size_type column_view_base::null_count(size_type begin, size_type end) const ? 0 : cudf::count_unset_bits(null_mask(), offset() + begin, offset() + end); } + +// Struct to use custom hash combine and fold expression +struct HashValue { + std::size_t hash; + explicit HashValue(std::size_t h) : hash{h} {} + HashValue operator^(HashValue const& other) const + { + return HashValue{hash_combine(hash, other.hash)}; + } +}; + +template +constexpr auto hash(Ts&&... ts) +{ + return (... ^ HashValue(std::hash{}(ts))).hash; +} + +std::size_t shallow_hash_impl(column_view const& c, bool is_parent_empty = false) +{ + std::size_t const init = (is_parent_empty or c.is_empty()) + ? hash(c.type(), 0) + : hash(c.type(), c.size(), c.head(), c.null_mask(), c.offset()); + return std::accumulate(c.child_begin(), + c.child_end(), + init, + [&c, is_parent_empty](std::size_t hash, auto const& child) { + return hash_combine( + hash, shallow_hash_impl(child, c.is_empty() or is_parent_empty)); + }); +} + +std::size_t shallow_hash(column_view const& input) { return shallow_hash_impl(input); } + +bool shallow_equivalent_impl(column_view const& lhs, + column_view const& rhs, + bool is_parent_empty = false) +{ + bool const is_empty = (lhs.is_empty() and rhs.is_empty()) or is_parent_empty; + return (lhs.type() == rhs.type()) and + (is_empty or ((lhs.size() == rhs.size()) and (lhs.head() == rhs.head()) and + (lhs.null_mask() == rhs.null_mask()) and (lhs.offset() == rhs.offset()))) and + std::equal(lhs.child_begin(), + lhs.child_end(), + rhs.child_begin(), + rhs.child_end(), + [is_empty](auto const& lhs_child, auto const& rhs_child) { + return shallow_equivalent_impl(lhs_child, rhs_child, is_empty); + }); +} +bool is_shallow_equivalent(column_view const& lhs, column_view const& rhs) +{ + return shallow_equivalent_impl(lhs, rhs); +} } // namespace detail // Immutable view constructor diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 03f7967cee0..cde170fb598 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -33,6 +33,7 @@ endfunction() # - column tests ---------------------------------------------------------------------------------- ConfigureTest(COLUMN_TEST column/bit_cast_test.cpp + column/column_view_shallow_test.cpp column/column_test.cu column/column_device_view_test.cu column/compound_test.cu) diff --git a/cpp/tests/column/column_view_shallow_test.cpp b/cpp/tests/column/column_view_shallow_test.cpp new file mode 100644 index 00000000000..f76f682bb2f --- /dev/null +++ b/cpp/tests/column/column_view_shallow_test.cpp @@ -0,0 +1,442 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include + +// fixed_width, dict, string, list, struct +template ()>* = nullptr> +std::unique_ptr example_column() +{ + auto begin = thrust::make_counting_iterator(1); + auto end = thrust::make_counting_iterator(16); + return cudf::test::fixed_width_column_wrapper(begin, end).release(); +} + +template ()>* = nullptr> +std::unique_ptr example_column() +{ + return cudf::test::dictionary_column_wrapper( + {"fff", "aaa", "ddd", "bbb", "ccc", "ccc", "ccc", "", ""}, {1, 1, 1, 1, 1, 1, 1, 1, 0}) + .release(); +} + +template or + std::is_same_v>* = nullptr> +std::unique_ptr example_column() + +{ + return cudf::test::strings_column_wrapper( + {"fff", "aaa", "ddd", "bbb", "ccc", "ccc", "ccc", "", ""}) + .release(); +} + +template >* = nullptr> +std::unique_ptr example_column() +{ + return cudf::test::lists_column_wrapper({{1, 2, 3}, {4, 5}, {}, {6, 7, 8}}).release(); +} + +template >* = nullptr> +std::unique_ptr example_column() +{ + auto begin = thrust::make_counting_iterator(1); + auto end = thrust::make_counting_iterator(16); + auto member_0 = cudf::test::fixed_width_column_wrapper(begin, end); + auto member_1 = cudf::test::fixed_width_column_wrapper(begin + 10, end + 10); + return cudf::test::structs_column_wrapper({member_0, member_1}).release(); +} + +template +struct ColumnViewShallowTests : public cudf::test::BaseFixture { +}; + +using AllTypes = cudf::test::Concat; +TYPED_TEST_CASE(ColumnViewShallowTests, AllTypes); + +// Test for fixed_width, dict, string, list, struct +// column_view, column_view = same hash. +// column_view, make a copy = same hash. +// new column_view from colmn = same hash +// column_view, copy column = diff hash +// column_view, diff column = diff hash. +// +// column_view old, update data + new column_view = same hash. +// column_view old, add null_mask + new column_view = diff hash. +// column_view old, update nulls + new column_view = same hash. +// column_view old, set_null_count + new column_view = same hash. +// +// column_view, sliced[0, size) = same hash (for split too) +// column_view, sliced[n:) = diff hash (for split too) +// column_view, bit_cast = diff hash +// +// mutable_column_view, column_view = same hash +// mutable_column_view, modified mutable_column_view = same hash +// +// update the children column data = same hash +// update the children column_views = diff hash + +TYPED_TEST(ColumnViewShallowTests, shallow_hash_basic) +{ + using namespace cudf::detail; + auto col = example_column(); + auto col_view = cudf::column_view{*col}; + // same = same hash + { + EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_view)); + } + // copy column_view = same hash + { + auto col_view_copy = col_view; + EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_view_copy)); + } + + // new column_view from column = same hash + { + auto col_view_new = cudf::column_view{*col}; + EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_view_new)); + } + + // copy column = diff hash + { + auto col_new = std::make_unique(*col); + auto col_view_copy = col_new->view(); + EXPECT_NE(shallow_hash(col_view), shallow_hash(col_view_copy)); + } + + // column_view, diff column = diff hash. + { + auto col_diff = example_column(); + auto col_view_diff = cudf::column_view{*col_diff}; + EXPECT_NE(shallow_hash(col_view), shallow_hash(col_view_diff)); + } +} +TYPED_TEST(ColumnViewShallowTests, shallow_hash_update_data) +{ + using namespace cudf::detail; + auto col = example_column(); + auto col_view = cudf::column_view{*col}; + // update data + new column_view = same hash. + { + // update data by modifying some bits: fixed_width, string, dict, list, struct + if constexpr (cudf::is_fixed_width()) { + // Update data + auto data = reinterpret_cast(col->mutable_view().head()); + cudf::set_null_mask(data, 2, 64, true); + } else { + // Update child(0).data + auto data = reinterpret_cast(col->child(0).mutable_view().head()); + cudf::set_null_mask(data, 2, 64, true); + } + auto col_view_new = cudf::column_view{*col}; + EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_view_new)); + } + // add null_mask + new column_view = diff hash. + { + col->set_null_mask(cudf::create_null_mask(col->size(), cudf::mask_state::ALL_VALID)); + auto col_view_new = cudf::column_view{*col}; + EXPECT_NE(shallow_hash(col_view), shallow_hash(col_view_new)); + col_view_new.null_count(); + EXPECT_NE(shallow_hash(col_view), shallow_hash(col_view_new)); + auto col_view_new2 = cudf::column_view{*col}; + EXPECT_EQ(shallow_hash(col_view_new), shallow_hash(col_view_new2)); + } + col_view = cudf::column_view{*col}; // updating after adding null_mask + // update nulls + new column_view = same hash. + { + cudf::set_null_mask(col->mutable_view().null_mask(), 2, 4, false); + auto col_view_new = cudf::column_view{*col}; + EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_view_new)); + } + // set_null_count + new column_view = same hash. set_null_count(UNKNOWN_NULL_COUNT) + { + col->set_null_count(cudf::UNKNOWN_NULL_COUNT); + auto col_view_new = cudf::column_view{*col}; + EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_view_new)); + col->set_null_count(col->size()); + auto col_view_new2 = cudf::column_view{*col}; + EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_view_new2)); + } +} + +TYPED_TEST(ColumnViewShallowTests, shallow_hash_slice) +{ + using namespace cudf::detail; + auto col = example_column(); + auto col_view = cudf::column_view{*col}; + // column_view, sliced[0, size) = same hash (for split too) + { + auto col_sliced = cudf::slice(col_view, {0, col_view.size()}); + EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_sliced[0])); + auto col_split = cudf::split(col_view, {0}); + EXPECT_NE(shallow_hash(col_view), shallow_hash(col_split[0])); + EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_split[1])); + } + // column_view, sliced[n:] = diff hash (for split too) + { + auto col_sliced = cudf::slice(col_view, {1, col_view.size()}); + EXPECT_NE(shallow_hash(col_view), shallow_hash(col_sliced[0])); + auto col_split = cudf::split(col_view, {1}); + EXPECT_NE(shallow_hash(col_view), shallow_hash(col_split[0])); + EXPECT_NE(shallow_hash(col_view), shallow_hash(col_split[1])); + } + // column_view, col copy sliced[0, 0) = same hash (empty column) + { + auto col_new = std::make_unique(*col); + auto col_new_view = col_new->view(); + auto col_sliced = cudf::slice(col_view, {0, 0, 1, 1, col_view.size(), col_view.size()}); + auto col_new_sliced = cudf::slice(col_new_view, {0, 0, 1, 1, col_view.size(), col_view.size()}); + + EXPECT_EQ(shallow_hash(col_sliced[0]), shallow_hash(col_sliced[1])); + EXPECT_EQ(shallow_hash(col_sliced[1]), shallow_hash(col_sliced[2])); + EXPECT_EQ(shallow_hash(col_sliced[0]), shallow_hash(col_new_sliced[0])); + EXPECT_EQ(shallow_hash(col_sliced[1]), shallow_hash(col_new_sliced[1])); + EXPECT_EQ(shallow_hash(col_sliced[2]), shallow_hash(col_new_sliced[2])); + } + + // column_view, bit_cast = diff hash + { + if constexpr (std::is_integral_v and not std::is_same_v) { + using newType = std::conditional_t, + std::make_unsigned_t, + std::make_signed_t>; + auto new_type = cudf::data_type(cudf::type_to_id()); + auto col_bitcast = cudf::bit_cast(col_view, new_type); + EXPECT_NE(shallow_hash(col_view), shallow_hash(col_bitcast)); + } + } +} + +TYPED_TEST(ColumnViewShallowTests, shallow_hash_mutable) +{ + using namespace cudf::detail; + auto col = example_column(); + auto col_view = cudf::column_view{*col}; + // mutable_column_view, column_view = same hash + { + auto col_mutable = cudf::mutable_column_view{*col}; + EXPECT_EQ(shallow_hash(col_mutable), shallow_hash(col_view)); + } + // mutable_column_view, modified mutable_column_view = same hash + // update the children column data = same hash + { + auto col_mutable = cudf::mutable_column_view{*col}; + if constexpr (cudf::is_fixed_width()) { + // Update data + auto data = reinterpret_cast(col->mutable_view().head()); + cudf::set_null_mask(data, 1, 32, false); + } else { + // Update child(0).data + auto data = reinterpret_cast(col->child(0).mutable_view().head()); + cudf::set_null_mask(data, 1, 32, false); + } + EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_mutable)); + auto col_mutable_new = cudf::mutable_column_view{*col}; + EXPECT_EQ(shallow_hash(col_mutable), shallow_hash(col_mutable_new)); + } + // update the children column_views = diff hash + { + if constexpr (cudf::is_nested()) { + col->child(0).set_null_mask( + cudf::create_null_mask(col->child(0).size(), cudf::mask_state::ALL_NULL)); + auto col_child_updated = cudf::mutable_column_view{*col}; + EXPECT_NE(shallow_hash(col_view), shallow_hash(col_child_updated)); + } + } +} + +TYPED_TEST(ColumnViewShallowTests, is_shallow_equivalent_basic) +{ + using namespace cudf::detail; + auto col = example_column(); + auto col_view = cudf::column_view{*col}; + // same = same hash + { + EXPECT_TRUE(is_shallow_equivalent(col_view, col_view)); + } + // copy column_view = same hash + { + auto col_view_copy = col_view; + EXPECT_TRUE(is_shallow_equivalent(col_view, col_view_copy)); + } + + // new column_view from column = same hash + { + auto col_view_new = cudf::column_view{*col}; + EXPECT_TRUE(is_shallow_equivalent(col_view, col_view_new)); + } + + // copy column = diff hash + { + auto col_new = std::make_unique(*col); + auto col_view_copy = col_new->view(); + EXPECT_FALSE(is_shallow_equivalent(col_view, col_view_copy)); + } + + // column_view, diff column = diff hash. + { + auto col_diff = example_column(); + auto col_view_diff = cudf::column_view{*col_diff}; + EXPECT_FALSE(is_shallow_equivalent(col_view, col_view_diff)); + } +} +TYPED_TEST(ColumnViewShallowTests, is_shallow_equivalent_update_data) +{ + using namespace cudf::detail; + auto col = example_column(); + auto col_view = cudf::column_view{*col}; + // update data + new column_view = same hash. + { + // update data by modifying some bits: fixed_width, string, dict, list, struct + if constexpr (cudf::is_fixed_width()) { + // Update data + auto data = reinterpret_cast(col->mutable_view().head()); + cudf::set_null_mask(data, 2, 64, true); + } else { + // Update child(0).data + auto data = reinterpret_cast(col->child(0).mutable_view().head()); + cudf::set_null_mask(data, 2, 64, true); + } + auto col_view_new = cudf::column_view{*col}; + EXPECT_TRUE(is_shallow_equivalent(col_view, col_view_new)); + } + // add null_mask + new column_view = diff hash. + { + col->set_null_mask(cudf::create_null_mask(col->size(), cudf::mask_state::ALL_VALID)); + auto col_view_new = cudf::column_view{*col}; + EXPECT_FALSE(is_shallow_equivalent(col_view, col_view_new)); + col_view_new.null_count(); + EXPECT_FALSE(is_shallow_equivalent(col_view, col_view_new)); + auto col_view_new2 = cudf::column_view{*col}; + EXPECT_TRUE(is_shallow_equivalent(col_view_new, col_view_new2)); + } + col_view = cudf::column_view{*col}; // updating after adding null_mask + // update nulls + new column_view = same hash. + { + cudf::set_null_mask(col->mutable_view().null_mask(), 2, 4, false); + auto col_view_new = cudf::column_view{*col}; + EXPECT_TRUE(is_shallow_equivalent(col_view, col_view_new)); + } + // set_null_count + new column_view = same hash. set_null_count(UNKNOWN_NULL_COUNT) + { + col->set_null_count(cudf::UNKNOWN_NULL_COUNT); + auto col_view_new = cudf::column_view{*col}; + EXPECT_TRUE(is_shallow_equivalent(col_view, col_view_new)); + col->set_null_count(col->size()); + auto col_view_new2 = cudf::column_view{*col}; + EXPECT_TRUE(is_shallow_equivalent(col_view, col_view_new2)); + } +} + +TYPED_TEST(ColumnViewShallowTests, is_shallow_equivalent_slice) +{ + using namespace cudf::detail; + auto col = example_column(); + auto col_view = cudf::column_view{*col}; + // column_view, sliced[0, size) = same hash (for split too) + { + auto col_sliced = cudf::slice(col_view, {0, col_view.size()}); + EXPECT_TRUE(is_shallow_equivalent(col_view, col_sliced[0])); + auto col_split = cudf::split(col_view, {0}); + EXPECT_FALSE(is_shallow_equivalent(col_view, col_split[0])); + EXPECT_TRUE(is_shallow_equivalent(col_view, col_split[1])); + } + // column_view, sliced[n:] = diff hash (for split too) + { + auto col_sliced = cudf::slice(col_view, {1, col_view.size()}); + EXPECT_FALSE(is_shallow_equivalent(col_view, col_sliced[0])); + auto col_split = cudf::split(col_view, {1}); + EXPECT_FALSE(is_shallow_equivalent(col_view, col_split[0])); + EXPECT_FALSE(is_shallow_equivalent(col_view, col_split[1])); + } + // column_view, col copy sliced[0, 0) = same hash (empty column) + { + auto col_new = std::make_unique(*col); + auto col_new_view = col_new->view(); + auto col_sliced = cudf::slice(col_view, {0, 0, 1, 1, col_view.size(), col_view.size()}); + auto col_new_sliced = cudf::slice(col_new_view, {0, 0, 1, 1, col_view.size(), col_view.size()}); + + EXPECT_TRUE(is_shallow_equivalent(col_sliced[0], col_sliced[1])); + EXPECT_TRUE(is_shallow_equivalent(col_sliced[1], col_sliced[2])); + EXPECT_TRUE(is_shallow_equivalent(col_sliced[0], col_new_sliced[0])); + EXPECT_TRUE(is_shallow_equivalent(col_sliced[1], col_new_sliced[1])); + EXPECT_TRUE(is_shallow_equivalent(col_sliced[2], col_new_sliced[2])); + } + + // column_view, bit_cast = diff hash + { + if constexpr (std::is_integral_v and not std::is_same_v) { + using newType = std::conditional_t, + std::make_unsigned_t, + std::make_signed_t>; + auto new_type = cudf::data_type(cudf::type_to_id()); + auto col_bitcast = cudf::bit_cast(col_view, new_type); + EXPECT_FALSE(is_shallow_equivalent(col_view, col_bitcast)); + } + } +} + +TYPED_TEST(ColumnViewShallowTests, is_shallow_equivalent_mutable) +{ + using namespace cudf::detail; + auto col = example_column(); + auto col_view = cudf::column_view{*col}; + // mutable_column_view, column_view = same hash + { + auto col_mutable = cudf::mutable_column_view{*col}; + EXPECT_TRUE(is_shallow_equivalent(col_mutable, col_view)); + } + // mutable_column_view, modified mutable_column_view = same hash + // update the children column data = same hash + { + auto col_mutable = cudf::mutable_column_view{*col}; + if constexpr (cudf::is_fixed_width()) { + // Update data + auto data = reinterpret_cast(col->mutable_view().head()); + cudf::set_null_mask(data, 1, 32, false); + } else { + // Update child(0).data + auto data = reinterpret_cast(col->child(0).mutable_view().head()); + cudf::set_null_mask(data, 1, 32, false); + } + EXPECT_TRUE(is_shallow_equivalent(col_view, col_mutable)); + auto col_mutable_new = cudf::mutable_column_view{*col}; + EXPECT_TRUE(is_shallow_equivalent(col_mutable, col_mutable_new)); + } + // update the children column_views = diff hash + { + if constexpr (cudf::is_nested()) { + col->child(0).set_null_mask( + cudf::create_null_mask(col->child(0).size(), cudf::mask_state::ALL_NULL)); + auto col_child_updated = cudf::mutable_column_view{*col}; + EXPECT_FALSE(is_shallow_equivalent(col_view, col_child_updated)); + } + } +}