From a021d78ec455917adcad5af87faa61cbfeb7b8d4 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Fri, 28 Jun 2024 09:57:50 -0700 Subject: [PATCH] Fix merge conflicts --- .github/workflows/pr.yaml | 12 + CONTRIBUTING.md | 13 +- ci/test_cudf_polars.sh | 68 ++ cpp/CMakeLists.txt | 1 + .../io/parquet/parquet_reader_input.cpp | 50 +- .../developer_guide/DEVELOPER_GUIDE.md | 60 +- cpp/include/cudf/binaryop.hpp | 50 + cpp/include/cudf/detail/copy_if.cuh | 6 +- cpp/include/cudf/detail/gather.cuh | 13 +- cpp/include/cudf/detail/stream_compaction.hpp | 2 - .../cudf/detail/utilities/cuda_memcpy.hpp | 53 ++ cpp/include/cudf/lists/lists_column_view.hpp | 3 +- cpp/include/cudf/stream_compaction.hpp | 2 + .../cudf/strings/detail/strings_children.cuh | 7 +- cpp/include/cudf/strings/utilities.hpp | 62 ++ cpp/include/cudf/utilities/pinned_memory.hpp | 16 + cpp/include/cudf_test/column_wrapper.hpp | 7 +- cpp/src/binaryop/compiled/binary_ops.cuh | 14 +- cpp/src/binaryop/compiled/util.cpp | 12 +- cpp/src/copying/sample.cu | 1 + cpp/src/io/parquet/decode_fixed.cu | 896 ++++++++++-------- cpp/src/io/parquet/page_hdr.cu | 16 +- cpp/src/io/parquet/parquet_gpu.hpp | 46 +- cpp/src/io/parquet/reader_impl.cpp | 57 +- cpp/src/io/parquet/reader_impl_chunking.cu | 13 +- cpp/src/io/parquet/reader_impl_preprocess.cu | 3 +- cpp/src/io/utilities/hostdevice_vector.hpp | 13 +- cpp/src/join/conditional_join.cu | 13 +- cpp/src/lists/copying/segmented_gather.cu | 1 + cpp/src/stream_compaction/distinct.cu | 146 ++- cpp/src/stream_compaction/distinct_count.cu | 3 +- cpp/src/stream_compaction/distinct_helpers.cu | 189 ++-- .../stream_compaction/distinct_helpers.hpp | 58 +- cpp/src/stream_compaction/stable_distinct.cu | 4 +- .../stream_compaction_common.cuh | 5 +- .../stream_compaction_common.hpp | 35 - cpp/src/stream_compaction/unique.cu | 1 - cpp/src/strings/utilities.cu | 22 +- cpp/src/utilities/cuda_memcpy.cu | 71 ++ cpp/src/utilities/pinned_memory.cpp | 14 + cpp/tests/CMakeLists.txt | 1 + .../binop-compiled-fixed_point-test.cpp | 58 ++ cpp/tests/column/factories_test.cpp | 4 +- cpp/tests/copying/concatenate_tests.cpp | 8 +- cpp/tests/io/parquet_writer_test.cpp | 97 +- cpp/tests/join/conditional_join_tests.cu | 92 +- cpp/tests/streams/stream_compaction_test.cpp | 235 +++++ cpp/tests/strings/array_tests.cpp | 4 +- cpp/tests/strings/repeat_strings_tests.cpp | 4 +- .../_static/cudf-pandas-line-profile.png | Bin 0 -> 15125 bytes docs/cudf/source/cudf_pandas/faq.md | 16 + docs/cudf/source/cudf_pandas/usage.md | 34 +- .../cudf/source/user_guide/api_docs/index.rst | 3 + .../api_docs/performance_tracking.rst | 12 + .../user_guide/api_docs/pylibcudf/index.rst | 1 + .../user_guide/api_docs/pylibcudf/interop.rst | 6 + .../api_docs/pylibcudf/strings/index.rst | 1 + .../api_docs/pylibcudf/strings/slice.rst | 6 + docs/cudf/source/user_guide/index.md | 1 + .../source/user_guide/memory-profiling.md | 44 + python/cudf/cudf/_lib/lists.pyx | 72 +- python/cudf/cudf/_lib/parquet.pyx | 36 +- python/cudf/cudf/_lib/pylibcudf/column.pxd | 4 + python/cudf/cudf/_lib/pylibcudf/column.pyx | 9 + python/cudf/cudf/_lib/pylibcudf/join.pxd | 2 + python/cudf/cudf/_lib/pylibcudf/join.pyx | 30 +- .../cudf/cudf/_lib/pylibcudf/libcudf/join.pxd | 5 + .../_lib/pylibcudf/libcudf/lists/contains.pxd | 29 +- .../libcudf/lists/lists_column_view.pxd | 1 + .../libcudf/scalar/scalar_factories.pxd | 1 + python/cudf/cudf/_lib/pylibcudf/lists.pxd | 10 + python/cudf/cudf/_lib/pylibcudf/lists.pyx | 150 ++- .../_lib/pylibcudf/strings/CMakeLists.txt | 2 +- .../cudf/_lib/pylibcudf/strings/__init__.pxd | 1 + .../cudf/_lib/pylibcudf/strings/__init__.py | 1 + .../cudf/_lib/pylibcudf/strings/slice.pxd | 15 + .../cudf/_lib/pylibcudf/strings/slice.pyx | 102 ++ python/cudf/cudf/_lib/pylibcudf/table.pyx | 2 + python/cudf/cudf/_lib/strings/substring.pyx | 88 +- python/cudf/cudf/core/_base_index.py | 6 +- python/cudf/cudf/core/algorithms.py | 4 +- python/cudf/cudf/core/buffer/spill_manager.py | 4 +- .../cudf/cudf/core/buffer/spillable_buffer.py | 7 +- python/cudf/cudf/core/column/column.py | 4 +- python/cudf/cudf/core/column_accessor.py | 4 +- python/cudf/cudf/core/cut.py | 2 +- python/cudf/cudf/core/dataframe.py | 257 ++--- python/cudf/cudf/core/frame.py | 110 +-- python/cudf/cudf/core/groupby/groupby.py | 60 +- python/cudf/cudf/core/index.py | 241 ++--- python/cudf/cudf/core/indexed_frame.py | 155 ++- python/cudf/cudf/core/multiindex.py | 221 ++--- python/cudf/cudf/core/reshape.py | 62 +- python/cudf/cudf/core/series.py | 244 ++--- python/cudf/cudf/core/single_column_frame.py | 42 +- python/cudf/cudf/core/udf/groupby_utils.py | 4 +- python/cudf/cudf/core/udf/utils.py | 6 +- python/cudf/cudf/io/csv.py | 6 +- python/cudf/cudf/io/parquet.py | 48 +- python/cudf/cudf/io/text.py | 6 +- python/cudf/cudf/options.py | 14 + python/cudf/cudf/pandas/_wrappers/numpy.py | 13 + python/cudf/cudf/pandas/_wrappers/pandas.py | 16 + .../cudf/pandas/scripts/run-pandas-tests.sh | 3 +- python/cudf/cudf/pylibcudf_tests/test_join.py | 29 + .../cudf/cudf/pylibcudf_tests/test_lists.py | 107 ++- .../cudf/pylibcudf_tests/test_string_slice.py | 116 +++ .../cudf/cudf/pylibcudf_tests/test_table.py | 22 + python/cudf/cudf/tests/test_dataframe.py | 32 + python/cudf/cudf/tests/test_monotonic.py | 19 + python/cudf/cudf/tests/test_parquet.py | 11 + .../cudf/tests/test_performance_tracking.py | 41 + python/cudf/cudf/tests/test_repr.py | 10 +- python/cudf/cudf/utils/nvtx_annotation.py | 30 - .../cudf/cudf/utils/performance_tracking.py | 82 ++ python/cudf/cudf/utils/utils.py | 5 +- .../cudf_pandas_tests/test_cudf_pandas.py | 33 + .../strings/src/strings/udf/udf_apis.cu | 4 +- python/cudf_polars/cudf_polars/dsl/expr.py | 96 +- python/cudf_polars/cudf_polars/dsl/ir.py | 91 +- .../cudf_polars/cudf_polars/dsl/translate.py | 20 + .../cudf_polars/typing/__init__.py | 74 +- .../cudf_polars/tests/expressions/test_agg.py | 4 +- .../tests/expressions/test_booleanfunction.py | 58 +- .../tests/expressions/test_rolling.py | 12 +- .../tests/expressions/test_stringfunction.py | 16 +- .../tests/expressions/test_when_then.py | 27 + python/cudf_polars/tests/test_groupby.py | 11 +- python/cudf_polars/tests/test_join.py | 40 +- python/cudf_polars/tests/test_mapfunction.py | 32 +- python/cudf_polars/tests/test_python_scan.py | 7 +- python/cudf_polars/tests/test_union.py | 12 +- python/dask_cudf/dask_cudf/backends.py | 40 +- python/dask_cudf/dask_cudf/core.py | 62 +- python/dask_cudf/dask_cudf/groupby.py | 72 +- python/dask_cudf/dask_cudf/sorting.py | 16 +- 136 files changed, 4000 insertions(+), 2015 deletions(-) create mode 100755 ci/test_cudf_polars.sh create mode 100644 cpp/include/cudf/detail/utilities/cuda_memcpy.hpp create mode 100644 cpp/include/cudf/strings/utilities.hpp delete mode 100644 cpp/src/stream_compaction/stream_compaction_common.hpp create mode 100644 cpp/src/utilities/cuda_memcpy.cu create mode 100644 cpp/tests/streams/stream_compaction_test.cpp create mode 100644 docs/cudf/source/_static/cudf-pandas-line-profile.png create mode 100644 docs/cudf/source/user_guide/api_docs/performance_tracking.rst create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/interop.rst create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/strings/slice.rst create mode 100644 docs/cudf/source/user_guide/memory-profiling.md create mode 100644 python/cudf/cudf/_lib/pylibcudf/strings/slice.pxd create mode 100644 python/cudf/cudf/_lib/pylibcudf/strings/slice.pyx create mode 100644 python/cudf/cudf/pylibcudf_tests/test_join.py create mode 100644 python/cudf/cudf/pylibcudf_tests/test_string_slice.py create mode 100644 python/cudf/cudf/pylibcudf_tests/test_table.py create mode 100644 python/cudf/cudf/tests/test_performance_tracking.py delete mode 100644 python/cudf/cudf/utils/nvtx_annotation.py create mode 100644 python/cudf/cudf/utils/performance_tracking.py create mode 100644 python/cudf_polars/tests/expressions/test_when_then.py diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index cb582df21e0..a35802f2ab0 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -25,6 +25,7 @@ jobs: - docs-build - wheel-build-cudf - wheel-tests-cudf + - test-cudf-polars - wheel-build-dask-cudf - wheel-tests-dask-cudf - devcontainer @@ -132,6 +133,17 @@ jobs: with: build_type: pull-request script: ci/test_wheel_cudf.sh + test-cudf-polars: + needs: wheel-build-cudf + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.08 + with: + # This selects "ARCH=amd64 + the latest supported Python + CUDA". + matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) + build_type: pull-request + # This always runs, but only fails if this PR touches code in + # pylibcudf or cudf_polars + script: "ci/test_cudf_polars.sh" wheel-build-dask-cudf: needs: wheel-build-cudf secrets: inherit diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 98c2ec0a22e..4fbc28fa6e1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -71,15 +71,14 @@ for a minimal build of libcudf without using conda are also listed below. Compilers: -* `gcc` version 9.3+ -* `nvcc` version 11.5+ -* `cmake` version 3.26.4+ +* `gcc` version 11.4+ +* `nvcc` version 11.8+ +* `cmake` version 3.29.6+ -CUDA/GPU: +CUDA/GPU Runtime: -* CUDA 11.5+ -* NVIDIA driver 450.80.02+ -* Volta architecture or better (Compute Capability >=7.0) +* CUDA 11.4+ +* Volta architecture or better ([Compute Capability](https://docs.nvidia.com/deploy/cuda-compatibility/) >=7.0) You can obtain CUDA from [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads). diff --git a/ci/test_cudf_polars.sh b/ci/test_cudf_polars.sh new file mode 100755 index 00000000000..669e049ab26 --- /dev/null +++ b/ci/test_cudf_polars.sh @@ -0,0 +1,68 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -eou pipefail + +# We will only fail these tests if the PR touches code in pylibcudf +# or cudf_polars itself. +# Note, the three dots mean we are doing diff between the merge-base +# of upstream and HEAD. So this is asking, "does _this branch_ touch +# files in cudf_polars/pylibcudf", rather than "are there changes +# between upstream and this branch which touch cudf_polars/pylibcudf" +# TODO: is the target branch exposed anywhere in an environment variable? +if [ -n "$(git diff --name-only origin/branch-24.08...HEAD -- python/cudf_polars/ python/cudf/cudf/_lib/pylibcudf/)" ]; +then + HAS_CHANGES=1 +else + HAS_CHANGES=0 +fi + +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" +RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist + +RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"} +RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/ +mkdir -p "${RAPIDS_TESTS_DIR}" + +rapids-logger "Install cudf wheel" +# echo to expand wildcard before adding `[extra]` requires for pip +python -m pip install $(echo ./dist/cudf*.whl)[test] + +rapids-logger "Install polars (allow pre-release versions)" +python -m pip install 'polars>=1.0.0a0' + +rapids-logger "Install cudf_polars" +python -m pip install --no-deps python/cudf_polars + +rapids-logger "Run cudf_polars tests" + +function set_exitcode() +{ + EXITCODE=$? +} +EXITCODE=0 +trap set_exitcode ERR +set +e + +python -m pytest \ + --cache-clear \ + --cov cudf_polars \ + --cov-fail-under=100 \ + --cov-config=python/cudf_polars/pyproject.toml \ + --junitxml="${RAPIDS_TESTS_DIR}/junit-cudf_polars.xml" \ + python/cudf_polars/tests + +trap ERR +set -e + +if [ ${EXITCODE} != 0 ]; then + rapids-logger "Testing FAILED: exitcode ${EXITCODE}" +else + rapids-logger "Testing PASSED" +fi + +if [ ${HAS_CHANGES} == 1 ]; then + exit ${EXITCODE} +else + exit 0 +fi diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 5fd68bfb26c..35cf90411f2 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -662,6 +662,7 @@ add_library( src/unary/math_ops.cu src/unary/nan_ops.cu src/unary/null_ops.cu + src/utilities/cuda_memcpy.cu src/utilities/default_stream.cpp src/utilities/linked_column.cpp src/utilities/logger.cpp diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp index 019e0f30fe9..7563c823454 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -59,20 +59,18 @@ void parquet_read_common(cudf::size_type num_rows_to_read, } template -void BM_parquet_read_data(nvbench::state& state, nvbench::type_list>) +void BM_parquet_read_data_common(nvbench::state& state, + data_profile const& profile, + nvbench::type_list>) { auto const d_type = get_type_or_group(static_cast(DataType)); - auto const cardinality = static_cast(state.get_int64("cardinality")); - auto const run_length = static_cast(state.get_int64("run_length")); auto const source_type = retrieve_io_type_enum(state.get_string("io_type")); auto const compression = cudf::io::compression_type::SNAPPY; cuio_source_sink_pair source_sink(source_type); auto const num_rows_written = [&]() { - auto const tbl = create_random_table( - cycle_dtypes(d_type, num_cols), - table_size_bytes{data_size}, - data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); + auto const tbl = + create_random_table(cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, profile); auto const view = tbl->view(); cudf::io::parquet_writer_options write_opts = @@ -85,6 +83,32 @@ void BM_parquet_read_data(nvbench::state& state, nvbench::type_list +void BM_parquet_read_data(nvbench::state& state, + nvbench::type_list> type_list) +{ + auto const cardinality = static_cast(state.get_int64("cardinality")); + auto const run_length = static_cast(state.get_int64("run_length")); + BM_parquet_read_data_common( + state, data_profile_builder().cardinality(cardinality).avg_run_length(run_length), type_list); +} + +template +void BM_parquet_read_fixed_width_struct(nvbench::state& state, + nvbench::type_list> type_list) +{ + auto const cardinality = static_cast(state.get_int64("cardinality")); + auto const run_length = static_cast(state.get_int64("run_length")); + std::vector s_types{ + cudf::type_id::INT32, cudf::type_id::FLOAT32, cudf::type_id::INT64}; + BM_parquet_read_data_common(state, + data_profile_builder() + .cardinality(cardinality) + .avg_run_length(run_length) + .struct_types(s_types), + type_list); +} + void BM_parquet_read_io_compression(nvbench::state& state) { auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL), @@ -247,3 +271,13 @@ NVBENCH_BENCH(BM_parquet_read_io_small_mixed) .add_int64_axis("cardinality", {0, 1000}) .add_int64_axis("run_length", {1, 32}) .add_int64_axis("num_string_cols", {1, 2, 3}); + +// a benchmark for structs that only contain fixed-width types +using d_type_list_struct_only = nvbench::enum_type_list; +NVBENCH_BENCH_TYPES(BM_parquet_read_fixed_width_struct, NVBENCH_TYPE_AXES(d_type_list_struct_only)) + .set_name("parquet_read_fixed_width_struct") + .set_type_axes_names({"data_type"}) + .add_string_axis("io_type", {"DEVICE_BUFFER"}) + .set_min_samples(4) + .add_int64_axis("cardinality", {0, 1000}) + .add_int64_axis("run_length", {1, 32}); diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md index ff80c2daab8..0d097541692 100644 --- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md +++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md @@ -1,4 +1,4 @@ -# libcudf C++ Developer Guide +# libcudf C++ Developer Guide {#DEVELOPER_GUIDE} This document serves as a guide for contributors to libcudf C++ code. Developers should also refer to these additional files for further documentation of libcudf best practices. @@ -469,7 +469,7 @@ libcudf throws under different circumstances, see the [section on error handling # libcudf API and Implementation -## Streams +## Streams {#streams} libcudf is in the process of adding support for asynchronous execution using CUDA streams. In order to facilitate the usage of streams, all new libcudf APIs @@ -486,33 +486,37 @@ use only asynchronous versions of CUDA APIs with the stream parameter. In order to make the `detail` API callable from other libcudf functions, it should be exposed in a header placed in the `cudf/cpp/include/detail/` directory. +The declaration is not necessary if no other libcudf functions call the `detail` function. For example: ```c++ // cpp/include/cudf/header.hpp -void external_function(...); +void external_function(..., + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); // cpp/include/cudf/detail/header.hpp namespace detail{ -void external_function(..., rmm::cuda_stream_view stream) +void external_function(..., rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) } // namespace detail // cudf/src/implementation.cpp namespace detail{ - // Use the stream parameter in the detail implementation. - void external_function(..., rmm::cuda_stream_view stream){ - // Implementation uses the stream with async APIs. - rmm::device_buffer buff(...,stream); - CUDF_CUDA_TRY(cudaMemcpyAsync(...,stream.value())); - kernel<<<..., stream>>>(...); - thrust::algorithm(rmm::exec_policy(stream), ...); - } +// Use the stream parameter in the detail implementation. +void external_function(..., rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr){ + // Implementation uses the stream with async APIs. + rmm::device_buffer buff(..., stream, mr); + CUDF_CUDA_TRY(cudaMemcpyAsync(...,stream.value())); + kernel<<<..., stream>>>(...); + thrust::algorithm(rmm::exec_policy(stream), ...); +} } // namespace detail -void external_function(...){ - CUDF_FUNC_RANGE(); // Generates an NVTX range for the lifetime of this function. - detail::external_function(..., cudf::get_default_stream()); +void external_function(..., rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); // Generates an NVTX range for the lifetime of this function. + detail::external_function(..., stream, mr); } ``` @@ -703,28 +707,28 @@ The preferred style for how inputs are passed in and outputs are returned is the - `column_view const&` - Tables: - `table_view const&` - - Scalar: - - `scalar const&` - - Everything else: - - Trivial or inexpensively copied types - - Pass by value - - Non-trivial or expensive to copy types - - Pass by `const&` + - Scalar: + - `scalar const&` + - Everything else: + - Trivial or inexpensively copied types + - Pass by value + - Non-trivial or expensive to copy types + - Pass by `const&` - In/Outs - Columns: - `mutable_column_view&` - Tables: - `mutable_table_view&` - - Everything else: - - Pass by via raw pointer + - Everything else: + - Pass by via raw pointer - Outputs - Outputs should be *returned*, i.e., no output parameters - Columns: - `std::unique_ptr` - Tables: - `std::unique_ptr` - - Scalars: - - `std::unique_ptr` + - Scalars: + - `std::unique_ptr` ### Multiple Return Values @@ -908,6 +912,10 @@ functions that are specific to columns of Strings. These functions reside in the namespace. Similarly, functionality used exclusively for unit testing is in the `cudf::test::` namespace. +The public function is expected to contain a call to `CUDF_FUNC_RANGE()` followed by a call to +a `detail` function with same name and parameters as the public function. +See the [Streams](#streams) section for an example of this pattern. + ### Internal Many functions are not meant for public use, so place them in either the `detail` or an *anonymous* diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp index 5e41a871f32..22dad11e109 100644 --- a/cpp/include/cudf/binaryop.hpp +++ b/cpp/include/cudf/binaryop.hpp @@ -91,6 +91,56 @@ enum class binary_operator : int32_t { ///< (null, false) is null, and (valid, valid) == LOGICAL_OR(valid, valid) INVALID_BINARY ///< invalid operation }; + +/// Binary operation common type default +template +struct binary_op_common_type {}; + +/// Binary operation common type specialization +template +struct binary_op_common_type>> { + /// The common type of the template parameters + using type = std::common_type_t; +}; + +/// Binary operation common type specialization +template +struct binary_op_common_type< + L, + R, + std::enable_if_t() && cuda::std::is_floating_point_v>> { + /// The common type of the template parameters + using type = L; +}; + +/// Binary operation common type specialization +template +struct binary_op_common_type< + L, + R, + std::enable_if_t() && cuda::std::is_floating_point_v>> { + /// The common type of the template parameters + using type = R; +}; + +/// Binary operation common type helper +template +using binary_op_common_type_t = typename binary_op_common_type::type; + +namespace detail { +template +struct binary_op_has_common_type_impl : std::false_type {}; + +template +struct binary_op_has_common_type_impl>, L, R> + : std::true_type {}; +} // namespace detail + +/// Checks if binary operation types have a common type +template +constexpr inline bool binary_op_has_common_type_v = + detail::binary_op_has_common_type_impl::value; + /** * @brief Performs a binary operation between a scalar and a column. * diff --git a/cpp/include/cudf/detail/copy_if.cuh b/cpp/include/cudf/detail/copy_if.cuh index c98057d077a..b6310e6cd2f 100644 --- a/cpp/include/cudf/detail/copy_if.cuh +++ b/cpp/include/cudf/detail/copy_if.cuh @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include #include @@ -242,8 +242,8 @@ struct scatter_gather_functor { rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - auto output_column = cudf::detail::allocate_like( - input, output_size, cudf::mask_allocation_policy::RETAIN, stream, mr); + auto output_column = + cudf::allocate_like(input, output_size, cudf::mask_allocation_policy::RETAIN, stream, mr); auto output = output_column->mutable_view(); bool has_valid = input.nullable(); diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index c9d350ce983..5977c7341c1 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include #include @@ -217,10 +217,9 @@ struct column_gatherer_impl(), source_column.size(), @@ -413,8 +412,8 @@ struct column_gatherer_impl { auto keys_copy = std::make_unique(dictionary.keys(), stream, mr); // Perform gather on just the indices column_view indices = dictionary.get_indices_annotated(); - auto new_indices = cudf::detail::allocate_like( - indices, output_count, cudf::mask_allocation_policy::NEVER, stream, mr); + auto new_indices = + cudf::allocate_like(indices, output_count, cudf::mask_allocation_policy::NEVER, stream, mr); gather_helper( cudf::detail::indexalator_factory::make_input_iterator(indices), indices.size(), diff --git a/cpp/include/cudf/detail/stream_compaction.hpp b/cpp/include/cudf/detail/stream_compaction.hpp index e2974789ea1..e3ef4190fd2 100644 --- a/cpp/include/cudf/detail/stream_compaction.hpp +++ b/cpp/include/cudf/detail/stream_compaction.hpp @@ -88,8 +88,6 @@ std::unique_ptr
distinct(table_view const& input, /** * @copydoc cudf::stable_distinct - * - * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr
stable_distinct(table_view const& input, std::vector const& keys, diff --git a/cpp/include/cudf/detail/utilities/cuda_memcpy.hpp b/cpp/include/cudf/detail/utilities/cuda_memcpy.hpp new file mode 100644 index 00000000000..b66c461ab12 --- /dev/null +++ b/cpp/include/cudf/detail/utilities/cuda_memcpy.hpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cudf::detail { + +enum class host_memory_kind : uint8_t { PINNED, PAGEABLE }; + +/** + * @brief Asynchronously copies data between the host and device. + * + * Implementation may use different strategies depending on the size and type of host data. + * + * @param dst Destination memory address + * @param src Source memory address + * @param size Number of bytes to copy + * @param kind Type of host memory + * @param stream CUDA stream used for the copy + */ +void cuda_memcpy_async( + void* dst, void const* src, size_t size, host_memory_kind kind, rmm::cuda_stream_view stream); + +/** + * @brief Synchronously copies data between the host and device. + * + * Implementation may use different strategies depending on the size and type of host data. + * + * @param dst Destination memory address + * @param src Source memory address + * @param size Number of bytes to copy + * @param kind Type of host memory + * @param stream CUDA stream used for the copy + */ +void cuda_memcpy( + void* dst, void const* src, size_t size, host_memory_kind kind, rmm::cuda_stream_view stream); + +} // namespace cudf::detail diff --git a/cpp/include/cudf/lists/lists_column_view.hpp b/cpp/include/cudf/lists/lists_column_view.hpp index 57a4f724c2d..3397cb0ca1d 100644 --- a/cpp/include/cudf/lists/lists_column_view.hpp +++ b/cpp/include/cudf/lists/lists_column_view.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,6 +38,7 @@ namespace cudf { */ class lists_column_view : private column_view { public: + lists_column_view() = default; /** * @brief Construct a new lists column view object from a column view. * diff --git a/cpp/include/cudf/stream_compaction.hpp b/cpp/include/cudf/stream_compaction.hpp index c386b3a22b4..181af11adb8 100644 --- a/cpp/include/cudf/stream_compaction.hpp +++ b/cpp/include/cudf/stream_compaction.hpp @@ -320,6 +320,7 @@ std::unique_ptr distinct_indices( * @param keep Copy any, first, last, or none of the found duplicates * @param nulls_equal Flag to specify whether null elements should be considered as equal * @param nans_equal Flag to specify whether NaN elements should be considered as equal + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned table * @return Table with distinct rows, preserving input order */ @@ -329,6 +330,7 @@ std::unique_ptr
stable_distinct( duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY, null_equality nulls_equal = null_equality::EQUAL, nan_equality nans_equal = nan_equality::ALL_EQUAL, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** diff --git a/cpp/include/cudf/strings/detail/strings_children.cuh b/cpp/include/cudf/strings/detail/strings_children.cuh index f105a6dc546..f5f3982a5d6 100644 --- a/cpp/include/cudf/strings/detail/strings_children.cuh +++ b/cpp/include/cudf/strings/detail/strings_children.cuh @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -81,11 +82,11 @@ std::pair, int64_t> make_offsets_child_column( auto const total_bytes = cudf::detail::sizes_to_offsets(input_itr, input_itr + strings_count + 1, d_offsets, stream); - auto const threshold = get_offset64_threshold(); - CUDF_EXPECTS(is_large_strings_enabled() || (total_bytes < threshold), + auto const threshold = cudf::strings::get_offset64_threshold(); + CUDF_EXPECTS(cudf::strings::is_large_strings_enabled() || (total_bytes < threshold), "Size of output exceeds the column size limit", std::overflow_error); - if (total_bytes >= get_offset64_threshold()) { + if (total_bytes >= cudf::strings::get_offset64_threshold()) { // recompute as int64 offsets when above the threshold offsets_column = make_numeric_column( data_type{type_id::INT64}, strings_count + 1, mask_state::UNALLOCATED, stream, mr); diff --git a/cpp/include/cudf/strings/utilities.hpp b/cpp/include/cudf/strings/utilities.hpp new file mode 100644 index 00000000000..ae445282382 --- /dev/null +++ b/cpp/include/cudf/strings/utilities.hpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include +#include + +namespace CUDF_EXPORT cudf { +namespace strings { + +/** + * @brief Creates a string_view vector from a strings column. + * + * @param strings Strings column instance. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned vector's device memory. + * @return Device vector of string_views + */ +rmm::device_uvector create_string_vector_from_column( + cudf::strings_column_view const strings, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Return the threshold size for a strings column to use int64 offsets + * + * A computed size above this threshold should using int64 offsets, otherwise + * int32 offsets. By default this function will return std::numeric_limits::max(). + * This value can be overridden at runtime using the environment variable + * LIBCUDF_LARGE_STRINGS_THRESHOLD. + * + * @return size in bytes + */ +int64_t get_offset64_threshold(); + +/** + * @brief Checks if large strings is enabled + * + * This checks the setting in the environment variable LIBCUDF_LARGE_STRINGS_ENABLED. + * + * @return true if large strings are supported + */ +bool is_large_strings_enabled(); + +} // namespace strings +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/utilities/pinned_memory.hpp b/cpp/include/cudf/utilities/pinned_memory.hpp index b423eab6d38..3e2fa43cb50 100644 --- a/cpp/include/cudf/utilities/pinned_memory.hpp +++ b/cpp/include/cudf/utilities/pinned_memory.hpp @@ -55,4 +55,20 @@ struct pinned_mr_options { */ bool config_default_pinned_memory_resource(pinned_mr_options const& opts); +/** + * @brief Set the threshold size for using kernels for pinned memory copies. + * + * @param threshold The threshold size in bytes. If the size of the copy is less than this + * threshold, the copy will be done using kernels. If the size is greater than or equal to this + * threshold, the copy will be done using cudaMemcpyAsync. + */ +void set_kernel_pinned_copy_threshold(size_t threshold); + +/** + * @brief Get the threshold size for using kernels for pinned memory copies. + * + * @return The threshold size in bytes. + */ +size_t get_kernel_pinned_copy_threshold(); + } // namespace cudf diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp index 47d17988775..7363f965af8 100644 --- a/cpp/include/cudf_test/column_wrapper.hpp +++ b/cpp/include/cudf_test/column_wrapper.hpp @@ -314,7 +314,12 @@ auto make_chars_and_offsets(StringsIterator begin, StringsIterator end, Validity for (auto str = begin; str < end; ++str) { std::string tmp = (*v++) ? std::string(*str) : std::string{}; chars.insert(chars.end(), std::cbegin(tmp), std::cend(tmp)); - offsets.push_back(offsets.back() + tmp.length()); + auto const last_offset = static_cast(offsets.back()); + auto const next_offset = last_offset + tmp.length(); + CUDF_EXPECTS( + next_offset < static_cast(std::numeric_limits::max()), + "Cannot use strings_column_wrapper to build a large strings column"); + offsets.push_back(static_cast(next_offset)); } return std::pair(std::move(chars), std::move(offsets)); }; diff --git a/cpp/src/binaryop/compiled/binary_ops.cuh b/cpp/src/binaryop/compiled/binary_ops.cuh index 5177e7d4bda..c6af0c3c58a 100644 --- a/cpp/src/binaryop/compiled/binary_ops.cuh +++ b/cpp/src/binaryop/compiled/binary_ops.cuh @@ -49,9 +49,16 @@ struct type_casted_accessor { column_device_view const& col, bool is_scalar) const { - if constexpr (column_device_view::has_element_accessor() and - std::is_convertible_v) - return static_cast(col.element(is_scalar ? 0 : i)); + if constexpr (column_device_view::has_element_accessor()) { + auto const element = col.element(is_scalar ? 0 : i); + if constexpr (std::is_convertible_v) { + return static_cast(element); + } else if constexpr (is_fixed_point() && cuda::std::is_floating_point_v) { + return convert_fixed_to_floating(element); + } else if constexpr (is_fixed_point() && cuda::std::is_floating_point_v) { + return convert_floating_to_fixed(element, numeric::scale_type{0}); + } + } return {}; } }; @@ -159,6 +166,7 @@ struct ops2_wrapper { TypeRhs y = rhs.element(is_rhs_scalar ? 0 : i); auto result = [&]() { if constexpr (std::is_same_v or + std::is_same_v or std::is_same_v or std::is_same_v or std::is_same_v or diff --git a/cpp/src/binaryop/compiled/util.cpp b/cpp/src/binaryop/compiled/util.cpp index 2b6a4f58895..b62c5f1f4e1 100644 --- a/cpp/src/binaryop/compiled/util.cpp +++ b/cpp/src/binaryop/compiled/util.cpp @@ -31,8 +31,8 @@ struct common_type_functor { template std::optional operator()() const { - if constexpr (cudf::has_common_type_v) { - using TypeCommon = std::common_type_t; + if constexpr (binary_op_has_common_type_v) { + using TypeCommon = binary_op_common_type_t; return data_type{type_to_id()}; } @@ -85,8 +85,8 @@ struct is_binary_operation_supported { { if constexpr (column_device_view::has_element_accessor() and column_device_view::has_element_accessor()) { - if constexpr (has_common_type_v) { - using common_t = std::common_type_t; + if constexpr (binary_op_has_common_type_v) { + using common_t = binary_op_common_type_t; return std::is_invocable_v; } else { return std::is_invocable_v; @@ -102,8 +102,8 @@ struct is_binary_operation_supported { if constexpr (column_device_view::has_element_accessor() and column_device_view::has_element_accessor()) { if (has_mutable_element_accessor(out_type) or is_fixed_point(out_type)) { - if constexpr (has_common_type_v) { - using common_t = std::common_type_t; + if constexpr (binary_op_has_common_type_v) { + using common_t = binary_op_common_type_t; if constexpr (std::is_invocable_v) { using ReturnType = std::invoke_result_t; return is_constructible(out_type) or diff --git a/cpp/src/copying/sample.cu b/cpp/src/copying/sample.cu index f8e3a9a83e3..ba00527f6b6 100644 --- a/cpp/src/copying/sample.cu +++ b/cpp/src/copying/sample.cu @@ -16,6 +16,7 @@ #include #include +#include #include #include #include diff --git a/cpp/src/io/parquet/decode_fixed.cu b/cpp/src/io/parquet/decode_fixed.cu index bfd89200786..ea80ae73c2f 100644 --- a/cpp/src/io/parquet/decode_fixed.cu +++ b/cpp/src/io/parquet/decode_fixed.cu @@ -24,136 +24,11 @@ namespace cudf::io::parquet::detail { namespace { -constexpr int decode_block_size = 128; -constexpr int rolling_buf_size = decode_block_size * 2; -// the required number of runs in shared memory we will need to provide the -// rle_stream object -constexpr int rle_run_buffer_size = rle_stream_required_run_buffer_size(); - -template -static __device__ int gpuUpdateValidityOffsetsAndRowIndicesFlat( - int32_t target_value_count, page_state_s* s, state_buf* sb, level_t const* const def, int t) -{ - constexpr int num_warps = decode_block_size / cudf::detail::warp_size; - constexpr int max_batch_size = num_warps * cudf::detail::warp_size; - - auto& ni = s->nesting_info[0]; - - // how many (input) values we've processed in the page so far - int value_count = s->input_value_count; - int valid_count = ni.valid_count; - - // cap by last row so that we don't process any rows past what we want to output. - int const first_row = s->first_row; - int const last_row = first_row + s->num_rows; - int const capped_target_value_count = min(target_value_count, last_row); - - int const valid_map_offset = ni.valid_map_offset; - int const row_index_lower_bound = s->row_index_lower_bound; - - __syncthreads(); - - while (value_count < capped_target_value_count) { - int const batch_size = min(max_batch_size, capped_target_value_count - value_count); - - // definition level. only need to process for nullable columns - int d = 0; - if constexpr (nullable) { - d = t < batch_size - ? static_cast(def[rolling_index(value_count + t)]) - : -1; - } - - int const thread_value_count = t + 1; - int const block_value_count = batch_size; - - // compute our row index, whether we're in row bounds, and validity - int const row_index = (thread_value_count + value_count) - 1; - int const in_row_bounds = (row_index >= row_index_lower_bound) && (row_index < last_row); - int is_valid; - if constexpr (nullable) { - is_valid = ((d > 0) && in_row_bounds) ? 1 : 0; - } else { - is_valid = in_row_bounds; - } - - // thread and block validity count - int thread_valid_count, block_valid_count; - if constexpr (nullable) { - using block_scan = cub::BlockScan; - __shared__ typename block_scan::TempStorage scan_storage; - block_scan(scan_storage).InclusiveSum(is_valid, thread_valid_count, block_valid_count); - __syncthreads(); - - // validity is processed per-warp - // - // nested schemas always read and write to the same bounds (that is, read and write - // positions are already pre-bounded by first_row/num_rows). flat schemas will start reading - // at the first value, even if that is before first_row, because we cannot trivially jump to - // the correct position to start reading. since we are about to write the validity vector - // here we need to adjust our computed mask to take into account the write row bounds. - int const in_write_row_bounds = ballot(row_index >= first_row && row_index < last_row); - int const write_start = __ffs(in_write_row_bounds) - 1; // first bit in the warp to store - int warp_null_count = 0; - if (write_start >= 0) { - uint32_t const warp_validity_mask = ballot(is_valid); - // lane 0 from each warp writes out validity - if ((t % cudf::detail::warp_size) == 0) { - int const vindex = (value_count + thread_value_count) - 1; // absolute input value index - int const bit_offset = (valid_map_offset + vindex + write_start) - - first_row; // absolute bit offset into the output validity map - int const write_end = - cudf::detail::warp_size - __clz(in_write_row_bounds); // last bit in the warp to store - int const bit_count = write_end - write_start; - warp_null_count = bit_count - __popc(warp_validity_mask >> write_start); - - store_validity(bit_offset, ni.valid_map, warp_validity_mask >> write_start, bit_count); - } - } - - // sum null counts. we have to do it this way instead of just incrementing by (value_count - - // valid_count) because valid_count also includes rows that potentially start before our row - // bounds. if we could come up with a way to clean that up, we could remove this and just - // compute it directly at the end of the kernel. - size_type const block_null_count = - cudf::detail::single_lane_block_sum_reduce(warp_null_count); - if (t == 0) { ni.null_count += block_null_count; } - } - // trivial for non-nullable columns - else { - thread_valid_count = thread_value_count; - block_valid_count = block_value_count; - } - - // output offset - if (is_valid) { - int const dst_pos = (value_count + thread_value_count) - 1; - int const src_pos = (valid_count + thread_valid_count) - 1; - sb->nz_idx[rolling_index(src_pos)] = dst_pos; - } - - // update stuff - value_count += block_value_count; - valid_count += block_valid_count; - } - - if (t == 0) { - // update valid value count for decoding and total # of values we've processed - ni.valid_count = valid_count; - ni.value_count = value_count; - s->nz_count = valid_count; - s->input_value_count = value_count; - s->input_row_count = value_count; - } - - return valid_count; -} - -template -__device__ inline void gpuDecodeValues( +template +__device__ inline void gpuDecodeFixedWidthValues( page_state_s* s, state_buf* const sb, int start, int end, int t) { - constexpr int num_warps = decode_block_size / cudf::detail::warp_size; + constexpr int num_warps = block_size / cudf::detail::warp_size; constexpr int max_batch_size = num_warps * cudf::detail::warp_size; PageNestingDecodeInfo* nesting_info_base = s->nesting_info; @@ -217,18 +92,22 @@ __device__ inline void gpuDecodeValues( } } -template -__device__ inline void gpuDecodeSplitValues(page_state_s* s, - state_buf* const sb, - int start, - int end) +template +struct decode_fixed_width_values_func { + __device__ inline void operator()(page_state_s* s, state_buf* const sb, int start, int end, int t) + { + gpuDecodeFixedWidthValues(s, sb, start, end, t); + } +}; + +template +__device__ inline void gpuDecodeFixedWidthSplitValues( + page_state_s* s, state_buf* const sb, int start, int end, int t) { using cudf::detail::warp_size; - constexpr int num_warps = decode_block_size / warp_size; + constexpr int num_warps = block_size / warp_size; constexpr int max_batch_size = num_warps * warp_size; - auto const t = threadIdx.x; - PageNestingDecodeInfo* nesting_info_base = s->nesting_info; int const dtype = s->col.physical_type; auto const data_len = thrust::distance(s->data_start, s->data_end); @@ -307,266 +186,293 @@ __device__ inline void gpuDecodeSplitValues(page_state_s* s, } } -// is the page marked nullable or not -__device__ inline bool is_nullable(page_state_s* s) -{ - auto const lvl = level_type::DEFINITION; - auto const max_def_level = s->col.max_level[lvl]; - return max_def_level > 0; -} +template +struct decode_fixed_width_split_values_func { + __device__ inline void operator()(page_state_s* s, state_buf* const sb, int start, int end, int t) + { + gpuDecodeFixedWidthSplitValues(s, sb, start, end, t); + } +}; -// for a nullable page, check to see if it could have nulls -__device__ inline bool has_nulls(page_state_s* s) +template +static __device__ int gpuUpdateValidityAndRowIndicesNested( + int32_t target_value_count, page_state_s* s, state_buf* sb, level_t const* const def, int t) { - auto const lvl = level_type::DEFINITION; - auto const init_run = s->initial_rle_run[lvl]; - // literal runs, lets assume they could hold nulls - if (is_literal_run(init_run)) { return true; } - - // repeated run with number of items in the run not equal - // to the rows in the page, assume that means we could have nulls - if (s->page.num_input_values != (init_run >> 1)) { return true; } - - auto const lvl_bits = s->col.level_bits[lvl]; - auto const run_val = lvl_bits == 0 ? 0 : s->initial_rle_value[lvl]; - - // the encoded repeated value isn't valid, we have (all) nulls - return run_val != s->col.max_level[lvl]; -} + constexpr int num_warps = decode_block_size / cudf::detail::warp_size; + constexpr int max_batch_size = num_warps * cudf::detail::warp_size; -/** - * @brief Kernel for computing fixed width non dictionary column data stored in the pages - * - * This function will write the page data and the page data's validity to the - * output specified in the page's column chunk. If necessary, additional - * conversion will be performed to translate from the Parquet datatype to - * desired output datatype. - * - * @param pages List of pages - * @param chunks List of column chunks - * @param min_row Row index to start reading at - * @param num_rows Maximum number of rows to read - * @param error_code Error code to set if an error is encountered - */ -template -CUDF_KERNEL void __launch_bounds__(decode_block_size) - gpuDecodePageDataFixed(PageInfo* pages, - device_span chunks, - size_t min_row, - size_t num_rows, - kernel_error::pointer error_code) -{ - __shared__ __align__(16) page_state_s state_g; - __shared__ __align__(16) page_state_buffers_s // unused in this kernel - state_buffers; + // how many (input) values we've processed in the page so far + int value_count = s->input_value_count; - page_state_s* const s = &state_g; - auto* const sb = &state_buffers; - int const page_idx = blockIdx.x; - int const t = threadIdx.x; - PageInfo* pp = &pages[page_idx]; + // cap by last row so that we don't process any rows past what we want to output. + int const first_row = s->first_row; + int const last_row = first_row + s->num_rows; + int const capped_target_value_count = min(target_value_count, last_row); - if (!(BitAnd(pages[page_idx].kernel_mask, decode_kernel_mask::FIXED_WIDTH_NO_DICT))) { return; } + int const row_index_lower_bound = s->row_index_lower_bound; - // must come after the kernel mask check - [[maybe_unused]] null_count_back_copier _{s, t}; + int const max_depth = s->col.max_nesting_depth - 1; + __syncthreads(); - if (!setupLocalPageInfo(s, - pp, - chunks, - min_row, - num_rows, - mask_filter{decode_kernel_mask::FIXED_WIDTH_NO_DICT}, - page_processing_stage::DECODE)) { - return; - } + while (value_count < capped_target_value_count) { + int const batch_size = min(max_batch_size, capped_target_value_count - value_count); - // the level stream decoders - __shared__ rle_run def_runs[rle_run_buffer_size]; - rle_stream def_decoder{def_runs}; + // definition level. only need to process for nullable columns + int d = 0; + if constexpr (nullable) { + if (def) { + d = t < batch_size + ? static_cast(def[rolling_index(value_count + t)]) + : -1; + } else { + d = t < batch_size ? 1 : -1; + } + } - // if we have no work to do (eg, in a skip_rows/num_rows case) in this page. - if (s->num_rows == 0) { return; } + int const thread_value_count = t + 1; + int const block_value_count = batch_size; - bool const nullable = is_nullable(s); - bool const nullable_with_nulls = nullable && has_nulls(s); + // compute our row index, whether we're in row bounds, and validity + int const row_index = (thread_value_count + value_count) - 1; + int const in_row_bounds = (row_index >= row_index_lower_bound) && (row_index < last_row); + int const in_write_row_bounds = ballot(row_index >= first_row && row_index < last_row); + int const write_start = __ffs(in_write_row_bounds) - 1; // first bit in the warp to store + + // iterate by depth + for (int d_idx = 0; d_idx <= max_depth; d_idx++) { + auto& ni = s->nesting_info[d_idx]; + + int is_valid; + if constexpr (nullable) { + is_valid = ((d >= ni.max_def_level) && in_row_bounds) ? 1 : 0; + } else { + is_valid = in_row_bounds; + } - // initialize the stream decoders (requires values computed in setupLocalPageInfo) - level_t* const def = reinterpret_cast(pp->lvl_decode_buf[level_type::DEFINITION]); - if (nullable_with_nulls) { - def_decoder.init(s->col.level_bits[level_type::DEFINITION], - s->abs_lvl_start[level_type::DEFINITION], - s->abs_lvl_end[level_type::DEFINITION], - def, - s->page.num_input_values); - } - __syncthreads(); + // thread and block validity count + int thread_valid_count, block_valid_count; + if constexpr (nullable) { + using block_scan = cub::BlockScan; + __shared__ typename block_scan::TempStorage scan_storage; + block_scan(scan_storage).InclusiveSum(is_valid, thread_valid_count, block_valid_count); + __syncthreads(); + + // validity is processed per-warp + // + // nested schemas always read and write to the same bounds (that is, read and write + // positions are already pre-bounded by first_row/num_rows). flat schemas will start reading + // at the first value, even if that is before first_row, because we cannot trivially jump to + // the correct position to start reading. since we are about to write the validity vector + // here we need to adjust our computed mask to take into account the write row bounds. + int warp_null_count = 0; + if (write_start >= 0 && ni.valid_map != nullptr) { + int const valid_map_offset = ni.valid_map_offset; + uint32_t const warp_validity_mask = ballot(is_valid); + // lane 0 from each warp writes out validity + if ((t % cudf::detail::warp_size) == 0) { + int const vindex = + (value_count + thread_value_count) - 1; // absolute input value index + int const bit_offset = (valid_map_offset + vindex + write_start) - + first_row; // absolute bit offset into the output validity map + int const write_end = cudf::detail::warp_size - + __clz(in_write_row_bounds); // last bit in the warp to store + int const bit_count = write_end - write_start; + warp_null_count = bit_count - __popc(warp_validity_mask >> write_start); + + store_validity(bit_offset, ni.valid_map, warp_validity_mask >> write_start, bit_count); + } + } - // We use two counters in the loop below: processed_count and valid_count. - // - processed_count: number of rows out of num_input_values that we have decoded so far. - // the definition stream returns the number of total rows it has processed in each call - // to decode_next and we accumulate in process_count. - // - valid_count: number of non-null rows we have decoded so far. In each iteration of the - // loop below, we look at the number of valid items (which could be all for non-nullable), - // and valid_count is that running count. - int processed_count = 0; - int valid_count = 0; - // the core loop. decode batches of level stream data using rle_stream objects - // and pass the results to gpuDecodeValues - while (s->error == 0 && processed_count < s->page.num_input_values) { - int next_valid_count; + // sum null counts. we have to do it this way instead of just incrementing by (value_count - + // valid_count) because valid_count also includes rows that potentially start before our row + // bounds. if we could come up with a way to clean that up, we could remove this and just + // compute it directly at the end of the kernel. + size_type const block_null_count = + cudf::detail::single_lane_block_sum_reduce(warp_null_count); + if (t == 0) { ni.null_count += block_null_count; } + } + // trivial for non-nullable columns + else { + thread_valid_count = thread_value_count; + block_valid_count = block_value_count; + } - // only need to process definition levels if the column has nulls - if (nullable_with_nulls) { - processed_count += def_decoder.decode_next(t); - __syncthreads(); + // if this is valid and we're at the leaf, output dst_pos + __syncthreads(); // handle modification of ni.value_count from below + if (is_valid && d_idx == max_depth) { + // for non-list types, the value count is always the same across + int const dst_pos = (value_count + thread_value_count) - 1; + int const src_pos = (ni.valid_count + thread_valid_count) - 1; + sb->nz_idx[rolling_index(src_pos)] = dst_pos; + } + __syncthreads(); // handle modification of ni.value_count from below - next_valid_count = - gpuUpdateValidityOffsetsAndRowIndicesFlat(processed_count, s, sb, def, t); + // update stuff + if (t == 0) { ni.valid_count += block_valid_count; } } - // if we wanted to split off the skip_rows/num_rows case into a separate kernel, we could skip - // this function call entirely since all it will ever generate is a mapping of (i -> i) for - // nz_idx. gpuDecodeValues would be the only work that happens. - else { - processed_count += min(rolling_buf_size, s->page.num_input_values - processed_count); - next_valid_count = gpuUpdateValidityOffsetsAndRowIndicesFlat( - processed_count, s, sb, nullptr, t); - } - __syncthreads(); - // decode the values themselves - gpuDecodeValues(s, sb, valid_count, next_valid_count, t); - __syncthreads(); + value_count += block_value_count; + } - valid_count = next_valid_count; + if (t == 0) { + // update valid value count for decoding and total # of values we've processed + s->nz_count = s->nesting_info[max_depth].valid_count; + s->input_value_count = value_count; + s->input_row_count = value_count; } - if (t == 0 and s->error != 0) { set_error(s->error, error_code); } + + __syncthreads(); + return s->nesting_info[max_depth].valid_count; } -/** - * @brief Kernel for computing fixed width dictionary column data stored in the pages - * - * This function will write the page data and the page data's validity to the - * output specified in the page's column chunk. If necessary, additional - * conversion will be performed to translate from the Parquet datatype to - * desired output datatype. - * - * @param pages List of pages - * @param chunks List of column chunks - * @param min_row Row index to start reading at - * @param num_rows Maximum number of rows to read - * @param error_code Error code to set if an error is encountered - */ -template -CUDF_KERNEL void __launch_bounds__(decode_block_size) - gpuDecodePageDataFixedDict(PageInfo* pages, - device_span chunks, - size_t min_row, - size_t num_rows, - kernel_error::pointer error_code) +template +static __device__ int gpuUpdateValidityAndRowIndicesFlat( + int32_t target_value_count, page_state_s* s, state_buf* sb, level_t const* const def, int t) { - __shared__ __align__(16) page_state_s state_g; - __shared__ __align__(16) page_state_buffers_s // unused in this kernel - state_buffers; - - page_state_s* const s = &state_g; - auto* const sb = &state_buffers; - int const page_idx = blockIdx.x; - int const t = threadIdx.x; - PageInfo* pp = &pages[page_idx]; + constexpr int num_warps = decode_block_size / cudf::detail::warp_size; + constexpr int max_batch_size = num_warps * cudf::detail::warp_size; - if (!(BitAnd(pages[page_idx].kernel_mask, decode_kernel_mask::FIXED_WIDTH_DICT))) { return; } + auto& ni = s->nesting_info[0]; - // must come after the kernel mask check - [[maybe_unused]] null_count_back_copier _{s, t}; + // how many (input) values we've processed in the page so far + int value_count = s->input_value_count; + int valid_count = ni.valid_count; - if (!setupLocalPageInfo(s, - pp, - chunks, - min_row, - num_rows, - mask_filter{decode_kernel_mask::FIXED_WIDTH_DICT}, - page_processing_stage::DECODE)) { - return; - } + // cap by last row so that we don't process any rows past what we want to output. + int const first_row = s->first_row; + int const last_row = first_row + s->num_rows; + int const capped_target_value_count = min(target_value_count, last_row); - __shared__ rle_run def_runs[rle_run_buffer_size]; - rle_stream def_decoder{def_runs}; + int const valid_map_offset = ni.valid_map_offset; + int const row_index_lower_bound = s->row_index_lower_bound; - __shared__ rle_run dict_runs[rle_run_buffer_size]; - rle_stream dict_stream{dict_runs}; + __syncthreads(); - // if we have no work to do (eg, in a skip_rows/num_rows case) in this page. - if (s->num_rows == 0) { return; } + while (value_count < capped_target_value_count) { + int const batch_size = min(max_batch_size, capped_target_value_count - value_count); - bool const nullable = is_nullable(s); - bool const nullable_with_nulls = nullable && has_nulls(s); + // definition level. only need to process for nullable columns + int d = 0; + if constexpr (nullable) { + if (def) { + d = t < batch_size + ? static_cast(def[rolling_index(value_count + t)]) + : -1; + } else { + d = t < batch_size ? 1 : -1; + } + } - // initialize the stream decoders (requires values computed in setupLocalPageInfo) - level_t* const def = reinterpret_cast(pp->lvl_decode_buf[level_type::DEFINITION]); - if (nullable_with_nulls) { - def_decoder.init(s->col.level_bits[level_type::DEFINITION], - s->abs_lvl_start[level_type::DEFINITION], - s->abs_lvl_end[level_type::DEFINITION], - def, - s->page.num_input_values); - } + int const thread_value_count = t + 1; + int const block_value_count = batch_size; - dict_stream.init( - s->dict_bits, s->data_start, s->data_end, sb->dict_idx, s->page.num_input_values); - __syncthreads(); + // compute our row index, whether we're in row bounds, and validity + int const row_index = (thread_value_count + value_count) - 1; + int const in_row_bounds = (row_index >= row_index_lower_bound) && (row_index < last_row); + int is_valid; + if constexpr (nullable) { + is_valid = ((d > 0) && in_row_bounds) ? 1 : 0; + } else { + is_valid = in_row_bounds; + } - // We use two counters in the loop below: processed_count and valid_count. - // - processed_count: number of rows out of num_input_values that we have decoded so far. - // the definition stream returns the number of total rows it has processed in each call - // to decode_next and we accumulate in process_count. - // - valid_count: number of non-null rows we have decoded so far. In each iteration of the - // loop below, we look at the number of valid items (which could be all for non-nullable), - // and valid_count is that running count. - int processed_count = 0; - int valid_count = 0; + // thread and block validity count + int thread_valid_count, block_valid_count; + if constexpr (nullable) { + using block_scan = cub::BlockScan; + __shared__ typename block_scan::TempStorage scan_storage; + block_scan(scan_storage).InclusiveSum(is_valid, thread_valid_count, block_valid_count); + __syncthreads(); - // the core loop. decode batches of level stream data using rle_stream objects - // and pass the results to gpuDecodeValues - while (s->error == 0 && processed_count < s->page.num_input_values) { - int next_valid_count; + // validity is processed per-warp + // + // nested schemas always read and write to the same bounds (that is, read and write + // positions are already pre-bounded by first_row/num_rows). flat schemas will start reading + // at the first value, even if that is before first_row, because we cannot trivially jump to + // the correct position to start reading. since we are about to write the validity vector + // here we need to adjust our computed mask to take into account the write row bounds. + int const in_write_row_bounds = ballot(row_index >= first_row && row_index < last_row); + int const write_start = __ffs(in_write_row_bounds) - 1; // first bit in the warp to store + int warp_null_count = 0; + if (write_start >= 0) { + uint32_t const warp_validity_mask = ballot(is_valid); + // lane 0 from each warp writes out validity + if ((t % cudf::detail::warp_size) == 0) { + int const vindex = (value_count + thread_value_count) - 1; // absolute input value index + int const bit_offset = (valid_map_offset + vindex + write_start) - + first_row; // absolute bit offset into the output validity map + int const write_end = + cudf::detail::warp_size - __clz(in_write_row_bounds); // last bit in the warp to store + int const bit_count = write_end - write_start; + warp_null_count = bit_count - __popc(warp_validity_mask >> write_start); - // only need to process definition levels if the column has nulls - if (nullable_with_nulls) { - processed_count += def_decoder.decode_next(t); - __syncthreads(); + store_validity(bit_offset, ni.valid_map, warp_validity_mask >> write_start, bit_count); + } + } - // count of valid items in this batch - next_valid_count = - gpuUpdateValidityOffsetsAndRowIndicesFlat(processed_count, s, sb, def, t); + // sum null counts. we have to do it this way instead of just incrementing by (value_count - + // valid_count) because valid_count also includes rows that potentially start before our row + // bounds. if we could come up with a way to clean that up, we could remove this and just + // compute it directly at the end of the kernel. + size_type const block_null_count = + cudf::detail::single_lane_block_sum_reduce(warp_null_count); + if (t == 0) { ni.null_count += block_null_count; } } - // if we wanted to split off the skip_rows/num_rows case into a separate kernel, we could skip - // this function call entirely since all it will ever generate is a mapping of (i -> i) for - // nz_idx. gpuDecodeValues would be the only work that happens. + // trivial for non-nullable columns else { - processed_count += min(rolling_buf_size, s->page.num_input_values - processed_count); - next_valid_count = gpuUpdateValidityOffsetsAndRowIndicesFlat( - processed_count, s, sb, nullptr, t); + thread_valid_count = thread_value_count; + block_valid_count = block_value_count; } - __syncthreads(); - // We want to limit the number of dictionary items we decode, that correspond to - // the rows we have processed in this iteration that are valid. - // We know the number of valid rows to process with: next_valid_count - valid_count. - dict_stream.decode_next(t, next_valid_count - valid_count); - __syncthreads(); + // output offset + if (is_valid) { + int const dst_pos = (value_count + thread_value_count) - 1; + int const src_pos = (valid_count + thread_valid_count) - 1; + sb->nz_idx[rolling_index(src_pos)] = dst_pos; + } - // decode the values themselves - gpuDecodeValues(s, sb, valid_count, next_valid_count, t); - __syncthreads(); + // update stuff + value_count += block_value_count; + valid_count += block_valid_count; + } - valid_count = next_valid_count; + if (t == 0) { + // update valid value count for decoding and total # of values we've processed + ni.valid_count = valid_count; + ni.value_count = value_count; // TODO: remove? this is unused in the non-list path + s->nz_count = valid_count; + s->input_value_count = value_count; + s->input_row_count = value_count; } - if (t == 0 and s->error != 0) { set_error(s->error, error_code); } + + return valid_count; +} + +// is the page marked nullable or not +__device__ inline bool is_nullable(page_state_s* s) +{ + auto const lvl = level_type::DEFINITION; + auto const max_def_level = s->col.max_level[lvl]; + return max_def_level > 0; +} + +// for a nullable page, check to see if it could have nulls +__device__ inline bool maybe_has_nulls(page_state_s* s) +{ + auto const lvl = level_type::DEFINITION; + auto const init_run = s->initial_rle_run[lvl]; + // literal runs, lets assume they could hold nulls + if (is_literal_run(init_run)) { return true; } + + // repeated run with number of items in the run not equal + // to the rows in the page, assume that means we could have nulls + if (s->page.num_input_values != (init_run >> 1)) { return true; } + + auto const lvl_bits = s->col.level_bits[lvl]; + auto const run_val = lvl_bits == 0 ? 0 : s->initial_rle_value[lvl]; + + // the encoded repeated value isn't valid, we have (all) nulls + return run_val != s->col.max_level[lvl]; } /** @@ -583,19 +489,28 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size) * @param num_rows Maximum number of rows to read * @param error_code Error code to set if an error is encountered */ -template -CUDF_KERNEL void __launch_bounds__(decode_block_size) - gpuDecodeSplitPageDataFlat(PageInfo* pages, - device_span chunks, - size_t min_row, - size_t num_rows, - kernel_error::pointer error_code) +template + typename DecodeValuesFunc> +CUDF_KERNEL void __launch_bounds__(decode_block_size_t) + gpuDecodePageDataGeneric(PageInfo* pages, + device_span chunks, + size_t min_row, + size_t num_rows, + kernel_error::pointer error_code) { + constexpr int rolling_buf_size = decode_block_size_t * 2; + constexpr int rle_run_buffer_size = rle_stream_required_run_buffer_size(); + __shared__ __align__(16) page_state_s state_g; - __shared__ __align__(16) page_state_buffers_s // unused in this kernel - state_buffers; + using state_buf_t = page_state_buffers_s; + __shared__ __align__(16) state_buf_t state_buffers; page_state_s* const s = &state_g; auto* const sb = &state_buffers; @@ -603,9 +518,7 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size) int const t = threadIdx.x; PageInfo* pp = &pages[page_idx]; - if (!(BitAnd(pages[page_idx].kernel_mask, decode_kernel_mask::BYTE_STREAM_SPLIT_FLAT))) { - return; - } + if (!(BitAnd(pages[page_idx].kernel_mask, kernel_mask_t))) { return; } // must come after the kernel mask check [[maybe_unused]] null_count_back_copier _{s, t}; @@ -615,30 +528,70 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size) chunks, min_row, num_rows, - mask_filter{decode_kernel_mask::BYTE_STREAM_SPLIT_FLAT}, + mask_filter{kernel_mask_t}, page_processing_stage::DECODE)) { return; } - // the level stream decoders - __shared__ rle_run def_runs[rle_run_buffer_size]; - rle_stream def_decoder{def_runs}; - // if we have no work to do (eg, in a skip_rows/num_rows case) in this page. if (s->num_rows == 0) { return; } - bool const nullable = is_nullable(s); - bool const nullable_with_nulls = nullable && has_nulls(s); + DecodeValuesFunc decode_values; + + bool const nullable = is_nullable(s); + bool const should_process_nulls = nullable && maybe_has_nulls(s); + + // shared buffer. all shared memory is suballocated out of here + // constexpr int shared_rep_size = has_lists_t ? cudf::util::round_up_unsafe(rle_run_buffer_size * + // sizeof(rle_run), size_t{16}) : 0; + constexpr int shared_dict_size = + has_dict_t + ? cudf::util::round_up_unsafe(rle_run_buffer_size * sizeof(rle_run), size_t{16}) + : 0; + constexpr int shared_def_size = + cudf::util::round_up_unsafe(rle_run_buffer_size * sizeof(rle_run), size_t{16}); + constexpr int shared_buf_size = /*shared_rep_size +*/ shared_dict_size + shared_def_size; + __shared__ __align__(16) uint8_t shared_buf[shared_buf_size]; + + // setup all shared memory buffers + int shared_offset = 0; + /* + rle_run *rep_runs = reinterpret_cast*>(shared_buf + shared_offset); + if constexpr (has_lists_t){ + shared_offset += shared_rep_size; + } + */ + rle_run* dict_runs = reinterpret_cast*>(shared_buf + shared_offset); + if constexpr (has_dict_t) { shared_offset += shared_dict_size; } + rle_run* def_runs = reinterpret_cast*>(shared_buf + shared_offset); // initialize the stream decoders (requires values computed in setupLocalPageInfo) + rle_stream def_decoder{def_runs}; level_t* const def = reinterpret_cast(pp->lvl_decode_buf[level_type::DEFINITION]); - if (nullable_with_nulls) { + if (should_process_nulls) { def_decoder.init(s->col.level_bits[level_type::DEFINITION], s->abs_lvl_start[level_type::DEFINITION], s->abs_lvl_end[level_type::DEFINITION], def, s->page.num_input_values); } + /* + rle_stream rep_decoder{rep_runs}; + level_t* const rep = reinterpret_cast(pp->lvl_decode_buf[level_type::REPETITION]); + if constexpr(has_lists_t){ + rep_decoder.init(s->col.level_bits[level_type::REPETITION], + s->abs_lvl_start[level_type::REPETITION], + s->abs_lvl_end[level_type::REPETITION], + rep, + s->page.num_input_values); + } + */ + + rle_stream dict_stream{dict_runs}; + if constexpr (has_dict_t) { + dict_stream.init( + s->dict_bits, s->data_start, s->data_end, sb->dict_idx, s->page.num_input_values); + } __syncthreads(); // We use two counters in the loop below: processed_count and valid_count. @@ -655,26 +608,47 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size) while (s->error == 0 && processed_count < s->page.num_input_values) { int next_valid_count; - // only need to process definition levels if the column has nulls - if (nullable_with_nulls) { + // only need to process definition levels if this is a nullable column + if (should_process_nulls) { processed_count += def_decoder.decode_next(t); __syncthreads(); - next_valid_count = - gpuUpdateValidityOffsetsAndRowIndicesFlat(processed_count, s, sb, def, t); + if constexpr (has_nesting_t) { + next_valid_count = gpuUpdateValidityAndRowIndicesNested( + processed_count, s, sb, def, t); + } else { + next_valid_count = gpuUpdateValidityAndRowIndicesFlat( + processed_count, s, sb, def, t); + } } // if we wanted to split off the skip_rows/num_rows case into a separate kernel, we could skip // this function call entirely since all it will ever generate is a mapping of (i -> i) for - // nz_idx. gpuDecodeValues would be the only work that happens. + // nz_idx. gpuDecodeFixedWidthValues would be the only work that happens. else { processed_count += min(rolling_buf_size, s->page.num_input_values - processed_count); - next_valid_count = gpuUpdateValidityOffsetsAndRowIndicesFlat( - processed_count, s, sb, nullptr, t); + + if constexpr (has_nesting_t) { + next_valid_count = + gpuUpdateValidityAndRowIndicesNested( + processed_count, s, sb, nullptr, t); + } else { + next_valid_count = gpuUpdateValidityAndRowIndicesFlat( + processed_count, s, sb, nullptr, t); + } } __syncthreads(); + // if we have dictionary data + if constexpr (has_dict_t) { + // We want to limit the number of dictionary items we decode, that correspond to + // the rows we have processed in this iteration that are valid. + // We know the number of valid rows to process with: next_valid_count - valid_count. + dict_stream.decode_next(t, next_valid_count - valid_count); + __syncthreads(); + } + // decode the values themselves - gpuDecodeSplitValues(s, sb, valid_count, next_valid_count); + decode_values(s, sb, valid_count, next_valid_count, t); __syncthreads(); valid_count = next_valid_count; @@ -689,18 +663,55 @@ void __host__ DecodePageDataFixed(cudf::detail::hostdevice_span pages, size_t num_rows, size_t min_row, int level_type_size, + bool has_nesting, kernel_error::pointer error_code, rmm::cuda_stream_view stream) { + constexpr int decode_block_size = 128; + dim3 dim_block(decode_block_size, 1); dim3 dim_grid(pages.size(), 1); // 1 threadblock per page if (level_type_size == 1) { - gpuDecodePageDataFixed<<>>( - pages.device_ptr(), chunks, min_row, num_rows, error_code); + if (has_nesting) { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } else { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } } else { - gpuDecodePageDataFixed<<>>( - pages.device_ptr(), chunks, min_row, num_rows, error_code); + if (has_nesting) { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } else { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } } } @@ -709,40 +720,113 @@ void __host__ DecodePageDataFixedDict(cudf::detail::hostdevice_span pa size_t num_rows, size_t min_row, int level_type_size, + bool has_nesting, kernel_error::pointer error_code, rmm::cuda_stream_view stream) { - // dim3 dim_block(decode_block_size, 1); // decode_block_size = 128 threads per block - // 1 full warp, and 1 warp of 1 thread + constexpr int decode_block_size = 128; + dim3 dim_block(decode_block_size, 1); // decode_block_size = 128 threads per block dim3 dim_grid(pages.size(), 1); // 1 thread block per page => # blocks if (level_type_size == 1) { - gpuDecodePageDataFixedDict<<>>( - pages.device_ptr(), chunks, min_row, num_rows, error_code); + if (has_nesting) { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } else { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } } else { - gpuDecodePageDataFixedDict<<>>( - pages.device_ptr(), chunks, min_row, num_rows, error_code); + if (has_nesting) { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } else { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } } } -void __host__ DecodeSplitPageDataFlat(cudf::detail::hostdevice_span pages, - cudf::detail::hostdevice_span chunks, - size_t num_rows, - size_t min_row, - int level_type_size, - kernel_error::pointer error_code, - rmm::cuda_stream_view stream) +void __host__ +DecodeSplitPageFixedWidthData(cudf::detail::hostdevice_span pages, + cudf::detail::hostdevice_span chunks, + size_t num_rows, + size_t min_row, + int level_type_size, + bool has_nesting, + kernel_error::pointer error_code, + rmm::cuda_stream_view stream) { + constexpr int decode_block_size = 128; + dim3 dim_block(decode_block_size, 1); // decode_block_size = 128 threads per block dim3 dim_grid(pages.size(), 1); // 1 thread block per page => # blocks if (level_type_size == 1) { - gpuDecodeSplitPageDataFlat<<>>( - pages.device_ptr(), chunks, min_row, num_rows, error_code); + if (has_nesting) { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } else { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } } else { - gpuDecodeSplitPageDataFlat<<>>( - pages.device_ptr(), chunks, min_row, num_rows, error_code); + if (has_nesting) { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } else { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } } } diff --git a/cpp/src/io/parquet/page_hdr.cu b/cpp/src/io/parquet/page_hdr.cu index cf0dd85e490..d604642be54 100644 --- a/cpp/src/io/parquet/page_hdr.cu +++ b/cpp/src/io/parquet/page_hdr.cu @@ -145,6 +145,11 @@ __device__ inline bool is_nested(ColumnChunkDesc const& chunk) return chunk.max_nesting_depth > 1; } +__device__ inline bool is_list(ColumnChunkDesc const& chunk) +{ + return chunk.max_level[level_type::REPETITION] > 0; +} + __device__ inline bool is_byte_array(ColumnChunkDesc const& chunk) { return chunk.physical_type == BYTE_ARRAY; @@ -178,14 +183,17 @@ __device__ decode_kernel_mask kernel_mask_for_page(PageInfo const& page, return decode_kernel_mask::STRING; } - if (!is_nested(chunk) && !is_byte_array(chunk) && !is_boolean(chunk)) { + if (!is_list(chunk) && !is_byte_array(chunk) && !is_boolean(chunk)) { if (page.encoding == Encoding::PLAIN) { - return decode_kernel_mask::FIXED_WIDTH_NO_DICT; + return is_nested(chunk) ? decode_kernel_mask::FIXED_WIDTH_NO_DICT_NESTED + : decode_kernel_mask::FIXED_WIDTH_NO_DICT; } else if (page.encoding == Encoding::PLAIN_DICTIONARY || page.encoding == Encoding::RLE_DICTIONARY) { - return decode_kernel_mask::FIXED_WIDTH_DICT; + return is_nested(chunk) ? decode_kernel_mask::FIXED_WIDTH_DICT_NESTED + : decode_kernel_mask::FIXED_WIDTH_DICT; } else if (page.encoding == Encoding::BYTE_STREAM_SPLIT) { - return decode_kernel_mask::BYTE_STREAM_SPLIT_FLAT; + return is_nested(chunk) ? decode_kernel_mask::BYTE_STREAM_SPLIT_FIXED_WIDTH_NESTED + : decode_kernel_mask::BYTE_STREAM_SPLIT_FIXED_WIDTH_FLAT; } } diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index d82c6f0de59..efc1f5ebab1 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -207,16 +207,20 @@ enum level_type { * Used to control which decode kernels to run. */ enum class decode_kernel_mask { - NONE = 0, - GENERAL = (1 << 0), // Run catch-all decode kernel - STRING = (1 << 1), // Run decode kernel for string data - DELTA_BINARY = (1 << 2), // Run decode kernel for DELTA_BINARY_PACKED data - DELTA_BYTE_ARRAY = (1 << 3), // Run decode kernel for DELTA_BYTE_ARRAY encoded data - DELTA_LENGTH_BA = (1 << 4), // Run decode kernel for DELTA_LENGTH_BYTE_ARRAY encoded data - FIXED_WIDTH_NO_DICT = (1 << 5), // Run decode kernel for fixed width non-dictionary pages - FIXED_WIDTH_DICT = (1 << 6), // Run decode kernel for fixed width dictionary pages - BYTE_STREAM_SPLIT = (1 << 7), // Run decode kernel for BYTE_STREAM_SPLIT encoded data - BYTE_STREAM_SPLIT_FLAT = (1 << 8), // Same as above but with a flat schema + NONE = 0, + GENERAL = (1 << 0), // Run catch-all decode kernel + STRING = (1 << 1), // Run decode kernel for string data + DELTA_BINARY = (1 << 2), // Run decode kernel for DELTA_BINARY_PACKED data + DELTA_BYTE_ARRAY = (1 << 3), // Run decode kernel for DELTA_BYTE_ARRAY encoded data + DELTA_LENGTH_BA = (1 << 4), // Run decode kernel for DELTA_LENGTH_BYTE_ARRAY encoded data + FIXED_WIDTH_NO_DICT = (1 << 5), // Run decode kernel for fixed width non-dictionary pages + FIXED_WIDTH_DICT = (1 << 6), // Run decode kernel for fixed width dictionary pages + BYTE_STREAM_SPLIT = (1 << 7), // Run decode kernel for BYTE_STREAM_SPLIT encoded data + BYTE_STREAM_SPLIT_FIXED_WIDTH_FLAT = (1 << 8), // Same as above but for flat, fixed-width data + BYTE_STREAM_SPLIT_FIXED_WIDTH_NESTED = + (1 << 9), // Same as above but for nested, fixed-width data + FIXED_WIDTH_NO_DICT_NESTED = (1 << 10), // Run decode kernel for fixed width non-dictionary pages + FIXED_WIDTH_DICT_NESTED = (1 << 11), // Run decode kernel for fixed width dictionary pages }; // mask representing all the ways in which a string can be encoded @@ -888,6 +892,7 @@ void DecodeDeltaLengthByteArray(cudf::detail::hostdevice_span pages, * @param[in] num_rows Total number of rows to read * @param[in] min_row Minimum number of rows to read * @param[in] level_type_size Size in bytes of the type for level decoding + * @param[in] has_nesting Whether or not the data contains nested (but not list) data. * @param[out] error_code Error code for kernel failures * @param[in] stream CUDA stream to use */ @@ -896,6 +901,7 @@ void DecodePageDataFixed(cudf::detail::hostdevice_span pages, std::size_t num_rows, size_t min_row, int level_type_size, + bool has_nesting, kernel_error::pointer error_code, rmm::cuda_stream_view stream); @@ -910,6 +916,7 @@ void DecodePageDataFixed(cudf::detail::hostdevice_span pages, * @param[in] num_rows Total number of rows to read * @param[in] min_row Minimum number of rows to read * @param[in] level_type_size Size in bytes of the type for level decoding + * @param[in] has_nesting Whether or not the data contains nested (but not list) data. * @param[out] error_code Error code for kernel failures * @param[in] stream CUDA stream to use */ @@ -918,11 +925,12 @@ void DecodePageDataFixedDict(cudf::detail::hostdevice_span pages, std::size_t num_rows, size_t min_row, int level_type_size, + bool has_nesting, kernel_error::pointer error_code, rmm::cuda_stream_view stream); /** - * @brief Launches kernel for reading dictionary fixed width column data stored in the pages + * @brief Launches kernel for reading fixed width column data stored in the pages * * The page data will be written to the output pointed to in the page's * associated column chunk. @@ -932,16 +940,18 @@ void DecodePageDataFixedDict(cudf::detail::hostdevice_span pages, * @param[in] num_rows Total number of rows to read * @param[in] min_row Minimum number of rows to read * @param[in] level_type_size Size in bytes of the type for level decoding + * @param[in] has_nesting Whether or not the data contains nested (but not list) data. * @param[out] error_code Error code for kernel failures * @param[in] stream CUDA stream to use */ -void DecodeSplitPageDataFlat(cudf::detail::hostdevice_span pages, - cudf::detail::hostdevice_span chunks, - std::size_t num_rows, - size_t min_row, - int level_type_size, - kernel_error::pointer error_code, - rmm::cuda_stream_view stream); +void DecodeSplitPageFixedWidthData(cudf::detail::hostdevice_span pages, + cudf::detail::hostdevice_span chunks, + std::size_t num_rows, + size_t min_row, + int level_type_size, + bool has_nesting, + kernel_error::pointer error_code, + rmm::cuda_stream_view stream); /** * @brief Launches kernel for initializing encoder row group fragments diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp index 1bd2fae281c..f705f6626e7 100644 --- a/cpp/src/io/parquet/reader_impl.cpp +++ b/cpp/src/io/parquet/reader_impl.cpp @@ -267,14 +267,27 @@ void reader::impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num } // launch byte stream split decoder - if (BitAnd(kernel_mask, decode_kernel_mask::BYTE_STREAM_SPLIT_FLAT) != 0) { - DecodeSplitPageDataFlat(subpass.pages, - pass.chunks, - num_rows, - skip_rows, - level_type_size, - error_code.data(), - streams[s_idx++]); + if (BitAnd(kernel_mask, decode_kernel_mask::BYTE_STREAM_SPLIT_FIXED_WIDTH_FLAT) != 0) { + DecodeSplitPageFixedWidthData(subpass.pages, + pass.chunks, + num_rows, + skip_rows, + level_type_size, + false, + error_code.data(), + streams[s_idx++]); + } + + // launch byte stream split decoder, for nested columns + if (BitAnd(kernel_mask, decode_kernel_mask::BYTE_STREAM_SPLIT_FIXED_WIDTH_NESTED) != 0) { + DecodeSplitPageFixedWidthData(subpass.pages, + pass.chunks, + num_rows, + skip_rows, + level_type_size, + true, + error_code.data(), + streams[s_idx++]); } // launch byte stream split decoder @@ -288,22 +301,50 @@ void reader::impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num streams[s_idx++]); } + // launch fixed width type decoder if (BitAnd(kernel_mask, decode_kernel_mask::FIXED_WIDTH_NO_DICT) != 0) { DecodePageDataFixed(subpass.pages, pass.chunks, num_rows, skip_rows, level_type_size, + false, + error_code.data(), + streams[s_idx++]); + } + + // launch fixed width type decoder, for nested columns + if (BitAnd(kernel_mask, decode_kernel_mask::FIXED_WIDTH_NO_DICT_NESTED) != 0) { + DecodePageDataFixed(subpass.pages, + pass.chunks, + num_rows, + skip_rows, + level_type_size, + true, error_code.data(), streams[s_idx++]); } + // launch fixed width type decoder with dictionaries if (BitAnd(kernel_mask, decode_kernel_mask::FIXED_WIDTH_DICT) != 0) { DecodePageDataFixedDict(subpass.pages, pass.chunks, num_rows, skip_rows, level_type_size, + false, + error_code.data(), + streams[s_idx++]); + } + + // launch fixed width type decoder with dictionaries, for nested columns + if (BitAnd(kernel_mask, decode_kernel_mask::FIXED_WIDTH_DICT_NESTED) != 0) { + DecodePageDataFixedDict(subpass.pages, + pass.chunks, + num_rows, + skip_rows, + level_type_size, + true, error_code.data(), streams[s_idx++]); } diff --git a/cpp/src/io/parquet/reader_impl_chunking.cu b/cpp/src/io/parquet/reader_impl_chunking.cu index 9ad5a2d6e8d..d371ef5de93 100644 --- a/cpp/src/io/parquet/reader_impl_chunking.cu +++ b/cpp/src/io/parquet/reader_impl_chunking.cu @@ -337,7 +337,8 @@ int64_t find_next_split(int64_t cur_pos, size_t cur_row_index, size_t cur_cumulative_size, cudf::host_span sizes, - size_t size_limit) + size_t size_limit, + size_t min_row_count) { auto const start = thrust::make_transform_iterator( sizes.begin(), @@ -357,7 +358,7 @@ int64_t find_next_split(int64_t cur_pos, // this guarantees that even if we cannot fit the set of rows represented by our where our cur_pos // is, we will still move forward instead of failing. while (split_pos < (static_cast(sizes.size()) - 1) && - (sizes[split_pos].end_row_index == cur_row_index)) { + (sizes[split_pos].end_row_index - cur_row_index < min_row_count)) { split_pos++; } @@ -657,8 +658,10 @@ std::tuple, size_t, size_t> compute_next_subpass( auto const start_index = find_start_index(h_aggregated_info, start_row); auto const cumulative_size = start_row == 0 || start_index == 0 ? 0 : h_aggregated_info[start_index - 1].size_bytes; + // when choosing subpasses, we need to guarantee at least 2 rows in the included pages so that all + // list columns have a clear start and end. auto const end_index = - find_next_split(start_index, start_row, cumulative_size, h_aggregated_info, size_limit); + find_next_split(start_index, start_row, cumulative_size, h_aggregated_info, size_limit, 2); auto const end_row = h_aggregated_info[end_index].end_row_index; // for each column, collect the set of pages that spans start_row / end_row @@ -703,8 +706,8 @@ std::vector compute_page_splits_by_row(device_span 0; if (is_list && max_col_row < last_pass_row) { - size_t const min_col_row = static_cast(chunk.start_row + last_page.chunk_row); + auto const& first_page = subpass.pages[page_index]; + size_t const min_col_row = static_cast(chunk.start_row + first_page.chunk_row); CUDF_EXPECTS((max_col_row - min_col_row) > 1, "Unexpected short subpass"); max_col_row--; } diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp index 9acd6a1e3a9..aed745c42dd 100644 --- a/cpp/src/io/utilities/hostdevice_vector.hpp +++ b/cpp/src/io/utilities/hostdevice_vector.hpp @@ -18,6 +18,7 @@ #include "hostdevice_span.hpp" +#include #include #include #include @@ -124,26 +125,22 @@ class hostdevice_vector { void host_to_device_async(rmm::cuda_stream_view stream) { - CUDF_CUDA_TRY( - cudaMemcpyAsync(device_ptr(), host_ptr(), size_bytes(), cudaMemcpyDefault, stream.value())); + cuda_memcpy_async(device_ptr(), host_ptr(), size_bytes(), host_memory_kind::PINNED, stream); } void host_to_device_sync(rmm::cuda_stream_view stream) { - host_to_device_async(stream); - stream.synchronize(); + cuda_memcpy(device_ptr(), host_ptr(), size_bytes(), host_memory_kind::PINNED, stream); } void device_to_host_async(rmm::cuda_stream_view stream) { - CUDF_CUDA_TRY( - cudaMemcpyAsync(host_ptr(), device_ptr(), size_bytes(), cudaMemcpyDefault, stream.value())); + cuda_memcpy_async(host_ptr(), device_ptr(), size_bytes(), host_memory_kind::PINNED, stream); } void device_to_host_sync(rmm::cuda_stream_view stream) { - device_to_host_async(stream); - stream.synchronize(); + cuda_memcpy(host_ptr(), device_ptr(), size_bytes(), host_memory_kind::PINNED, stream); } /** diff --git a/cpp/src/join/conditional_join.cu b/cpp/src/join/conditional_join.cu index f02dee5f7f5..97a06d5a923 100644 --- a/cpp/src/join/conditional_join.cu +++ b/cpp/src/join/conditional_join.cu @@ -48,8 +48,7 @@ std::unique_ptr> conditional_join_anti_semi( { if (right.num_rows() == 0) { switch (join_type) { - case join_kind::LEFT_ANTI_JOIN: - return std::make_unique>(left.num_rows(), stream, mr); + case join_kind::LEFT_ANTI_JOIN: return get_trivial_left_join_indices(left, stream, mr).first; case join_kind::LEFT_SEMI_JOIN: return std::make_unique>(0, stream, mr); default: CUDF_FAIL("Invalid join kind."); break; @@ -96,10 +95,6 @@ std::unique_ptr> conditional_join_anti_semi( join_size = size.value(stream); } - if (left.num_rows() == 0) { - return std::make_unique>(0, stream, mr); - } - rmm::device_scalar write_index(0, stream); auto left_indices = std::make_unique>(join_size, stream, mr); @@ -149,8 +144,7 @@ conditional_join(table_view const& left, // with a corresponding NULL from the right. case join_kind::LEFT_JOIN: case join_kind::LEFT_ANTI_JOIN: - case join_kind::FULL_JOIN: - return get_trivial_left_join_indices(left, stream, rmm::mr::get_current_device_resource()); + case join_kind::FULL_JOIN: return get_trivial_left_join_indices(left, stream, mr); // Inner and left semi joins return empty output because no matches can exist. case join_kind::INNER_JOIN: case join_kind::LEFT_SEMI_JOIN: @@ -169,8 +163,7 @@ conditional_join(table_view const& left, std::make_unique>(0, stream, mr)); // Full joins need to return the trivial complement. case join_kind::FULL_JOIN: { - auto ret_flipped = - get_trivial_left_join_indices(right, stream, rmm::mr::get_current_device_resource()); + auto ret_flipped = get_trivial_left_join_indices(right, stream, mr); return std::pair(std::move(ret_flipped.second), std::move(ret_flipped.first)); } default: CUDF_FAIL("Invalid join kind."); break; diff --git a/cpp/src/lists/copying/segmented_gather.cu b/cpp/src/lists/copying/segmented_gather.cu index 89b1a126fc5..779eca438db 100644 --- a/cpp/src/lists/copying/segmented_gather.cu +++ b/cpp/src/lists/copying/segmented_gather.cu @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include #include #include #include diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index a6f15cc49ec..e5cf29f3ebf 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -17,28 +17,62 @@ #include "distinct_helpers.hpp" #include +#include #include #include #include #include +#include #include #include #include +#include +#include #include #include -#include -#include -#include -#include -#include - #include #include namespace cudf { namespace detail { +namespace { +/** + * @brief Invokes the given `func` with desired the row equality + * + * @tparam HasNested Flag indicating whether there are nested columns in the input + * @tparam Func Type of the helper function doing `distinct` check + * + * @param compare_nulls Control whether nulls should be compared as equal or not + * @param compare_nans Control whether floating-point NaNs values should be compared as equal or not + * @param has_nulls Flag indicating whether the input has nulls or not + * @param row_equal Self table comparator + * @param func The input functor to invoke + */ +template +rmm::device_uvector dipatch_row_equal( + null_equality compare_nulls, + nan_equality compare_nans, + bool has_nulls, + cudf::experimental::row::equality::self_comparator row_equal, + Func&& func) +{ + if (compare_nans == nan_equality::ALL_EQUAL) { + auto const d_equal = row_equal.equal_to( + nullate::DYNAMIC{has_nulls}, + compare_nulls, + cudf::experimental::row::equality::nan_equal_physical_equality_comparator{}); + return func(d_equal); + } else { + auto const d_equal = row_equal.equal_to( + nullate::DYNAMIC{has_nulls}, + compare_nulls, + cudf::experimental::row::equality::physical_equality_comparator{}); + return func(d_equal); + } +} +} // namespace rmm::device_uvector distinct_indices(table_view const& input, duplicate_keep_option keep, @@ -47,97 +81,39 @@ rmm::device_uvector distinct_indices(table_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - if (input.num_rows() == 0 or input.num_columns() == 0) { + auto const num_rows = input.num_rows(); + + if (num_rows == 0 or input.num_columns() == 0) { return rmm::device_uvector(0, stream, mr); } - auto map = hash_map_type{compute_hash_table_size(input.num_rows()), - cuco::empty_key{-1}, - cuco::empty_value{std::numeric_limits::min()}, - cudf::detail::cuco_allocator{stream}, - stream.value()}; - auto const preprocessed_input = cudf::experimental::row::hash::preprocessed_table::create(input, stream); auto const has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(input)}; auto const has_nested_columns = cudf::detail::has_nested_columns(input); - auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); - auto const key_hasher = row_hasher.device_hasher(has_nulls); - - auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); - - auto const pair_iter = cudf::detail::make_counting_transform_iterator( - size_type{0}, - cuda::proclaim_return_type>( - [] __device__(size_type const i) { return cuco::make_pair(i, i); })); - - auto const insert_keys = [&](auto const value_comp) { - if (has_nested_columns) { - auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); - map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value()); - } else { - auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); - map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value()); - } + auto const row_hash = cudf::experimental::row::hash::row_hasher(preprocessed_input); + auto const row_equal = cudf::experimental::row::equality::self_comparator(preprocessed_input); + + auto const helper_func = [&](auto const& d_equal) { + using RowHasher = std::decay_t; + auto set = hash_set_type{num_rows, + 0.5, // desired load factor + cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL}, + d_equal, + {row_hash.device_hasher(has_nulls)}, + {}, + {}, + cudf::detail::cuco_allocator{stream}, + stream.value()}; + return detail::reduce_by_row(set, num_rows, keep, stream, mr); }; - if (nans_equal == nan_equality::ALL_EQUAL) { - using nan_equal_comparator = - cudf::experimental::row::equality::nan_equal_physical_equality_comparator; - insert_keys(nan_equal_comparator{}); + if (cudf::detail::has_nested_columns(input)) { + return dipatch_row_equal(nulls_equal, nans_equal, has_nulls, row_equal, helper_func); } else { - using nan_unequal_comparator = cudf::experimental::row::equality::physical_equality_comparator; - insert_keys(nan_unequal_comparator{}); + return dipatch_row_equal(nulls_equal, nans_equal, has_nulls, row_equal, helper_func); } - - auto output_indices = rmm::device_uvector(map.get_size(), stream, mr); - - // If we don't care about order, just gather indices of distinct keys taken from map. - if (keep == duplicate_keep_option::KEEP_ANY) { - map.retrieve_all(output_indices.begin(), thrust::make_discard_iterator(), stream.value()); - return output_indices; - } - - // For other keep options, reduce by row on rows that compare equal. - auto const reduction_results = reduce_by_row(map, - std::move(preprocessed_input), - input.num_rows(), - has_nulls, - has_nested_columns, - keep, - nulls_equal, - nans_equal, - stream, - rmm::mr::get_current_device_resource()); - - // Extract the desired output indices from reduction results. - auto const map_end = [&] { - if (keep == duplicate_keep_option::KEEP_NONE) { - // Reduction results with `KEEP_NONE` are either group sizes of equal rows, or `0`. - // Thus, we only output index of the rows in the groups having group size of `1`. - return thrust::copy_if(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(input.num_rows()), - output_indices.begin(), - [reduction_results = reduction_results.begin()] __device__( - auto const idx) { return reduction_results[idx] == size_type{1}; }); - } - - // Reduction results with `KEEP_FIRST` and `KEEP_LAST` are row indices of the first/last row in - // each group of equal rows (which are the desired output indices), or the value given by - // `reduction_init_value()`. - return thrust::copy_if(rmm::exec_policy(stream), - reduction_results.begin(), - reduction_results.end(), - output_indices.begin(), - [init_value = reduction_init_value(keep)] __device__(auto const idx) { - return idx != init_value; - }); - }(); - - output_indices.resize(thrust::distance(output_indices.begin(), map_end), stream); - return output_indices; } std::unique_ptr
distinct(table_view const& input, diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu index 99ca89cc021..9843bb889f4 100644 --- a/cpp/src/stream_compaction/distinct_count.cu +++ b/cpp/src/stream_compaction/distinct_count.cu @@ -15,16 +15,17 @@ */ #include "stream_compaction_common.cuh" -#include "stream_compaction_common.hpp" #include #include #include +#include #include #include #include #include #include +#include #include #include #include diff --git a/cpp/src/stream_compaction/distinct_helpers.cu b/cpp/src/stream_compaction/distinct_helpers.cu index 13e89b15bb7..c3a004b7f28 100644 --- a/cpp/src/stream_compaction/distinct_helpers.cu +++ b/cpp/src/stream_compaction/distinct_helpers.cu @@ -16,96 +16,127 @@ #include "distinct_helpers.hpp" -#include - -#include +#include +#include namespace cudf::detail { -namespace { -/** - * @brief The functor to find the first/last/all duplicate row for rows that compared equal. - */ -template -struct reduce_fn : reduce_by_row_fn_base { - duplicate_keep_option const keep; - - reduce_fn(MapView const& d_map, - KeyHasher const& d_hasher, - KeyEqual const& d_equal, - duplicate_keep_option const keep, - size_type* const d_output) - : reduce_by_row_fn_base{d_map, - d_hasher, - d_equal, - d_output}, - keep{keep} - { +template +rmm::device_uvector reduce_by_row(hash_set_type& set, + size_type num_rows, + duplicate_keep_option keep, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + auto output_indices = rmm::device_uvector(num_rows, stream, mr); + + // If we don't care about order, just gather indices of distinct keys taken from set. + if (keep == duplicate_keep_option::KEEP_ANY) { + auto const iter = thrust::counting_iterator{0}; + set.insert_async(iter, iter + num_rows, stream.value()); + auto const output_end = set.retrieve_all(output_indices.begin(), stream.value()); + output_indices.resize(thrust::distance(output_indices.begin(), output_end), stream); + return output_indices; } - __device__ void operator()(size_type const idx) const - { - auto const out_ptr = this->get_output_ptr(idx); - - if (keep == duplicate_keep_option::KEEP_FIRST) { - // Store the smallest index of all rows that are equal. - atomicMin(out_ptr, idx); - } else if (keep == duplicate_keep_option::KEEP_LAST) { - // Store the greatest index of all rows that are equal. - atomicMax(out_ptr, idx); - } else { - // Count the number of rows in each group of rows that are compared equal. - atomicAdd(out_ptr, size_type{1}); + auto reduction_results = rmm::device_uvector(num_rows, stream, mr); + thrust::uninitialized_fill(rmm::exec_policy_nosync(stream), + reduction_results.begin(), + reduction_results.end(), + reduction_init_value(keep)); + + auto set_ref = set.ref(cuco::op::insert_and_find); + + thrust::for_each(rmm::exec_policy_nosync(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_rows), + [set_ref, keep, reduction_results = reduction_results.begin()] __device__( + size_type const idx) mutable { + auto const [inserted_idx_ptr, _] = set_ref.insert_and_find(idx); + + auto ref = cuda::atomic_ref{ + reduction_results[*inserted_idx_ptr]}; + if (keep == duplicate_keep_option::KEEP_FIRST) { + // Store the smallest index of all rows that are equal. + ref.fetch_min(idx, cuda::memory_order_relaxed); + } else if (keep == duplicate_keep_option::KEEP_LAST) { + // Store the greatest index of all rows that are equal. + ref.fetch_max(idx, cuda::memory_order_relaxed); + } else { + // Count the number of rows in each group of rows that are compared equal. + ref.fetch_add(size_type{1}, cuda::memory_order_relaxed); + } + }); + + auto const map_end = [&] { + if (keep == duplicate_keep_option::KEEP_NONE) { + // Reduction results with `KEEP_NONE` are either group sizes of equal rows, or `0`. + // Thus, we only output index of the rows in the groups having group size of `1`. + return thrust::copy_if( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_rows), + output_indices.begin(), + cuda::proclaim_return_type( + [reduction_results = reduction_results.begin()] __device__(auto const idx) { + return reduction_results[idx] == size_type{1}; + })); } - } -}; -/** - * @brief The builder to construct an instance of `reduce_fn` functor base on the given - * value of the `duplicate_keep_option` member variable. - */ -struct reduce_func_builder { - duplicate_keep_option const keep; - - template - auto build(MapView const& d_map, - KeyHasher const& d_hasher, - KeyEqual const& d_equal, - size_type* const d_output) - { - return reduce_fn{d_map, d_hasher, d_equal, keep, d_output}; - } -}; + // Reduction results with `KEEP_FIRST` and `KEEP_LAST` are row indices of the first/last row in + // each group of equal rows (which are the desired output indices), or the value given by + // `reduction_init_value()`. + return thrust::copy_if( + rmm::exec_policy(stream), + reduction_results.begin(), + reduction_results.end(), + output_indices.begin(), + cuda::proclaim_return_type([init_value = reduction_init_value(keep)] __device__( + auto const idx) { return idx != init_value; })); + }(); -} // namespace + output_indices.resize(thrust::distance(output_indices.begin(), map_end), stream); + return output_indices; +} -// This function is split from `distinct.cu` to improve compile time. -rmm::device_uvector reduce_by_row( - hash_map_type const& map, - std::shared_ptr const preprocessed_input, +template rmm::device_uvector reduce_by_row( + hash_set_type>& set, size_type num_rows, - cudf::nullate::DYNAMIC has_nulls, - bool has_nested_columns, duplicate_keep_option keep, - null_equality nulls_equal, - nan_equality nans_equal, rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - CUDF_EXPECTS(keep != duplicate_keep_option::KEEP_ANY, - "This function should not be called with KEEP_ANY"); - - return hash_reduce_by_row(map, - preprocessed_input, - num_rows, - has_nulls, - has_nested_columns, - nulls_equal, - nans_equal, - reduce_func_builder{keep}, - reduction_init_value(keep), - stream, - mr); -} + rmm::device_async_resource_ref mr); + +template rmm::device_uvector reduce_by_row( + hash_set_type>& set, + size_type num_rows, + duplicate_keep_option keep, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + +template rmm::device_uvector reduce_by_row( + hash_set_type>& set, + size_type num_rows, + duplicate_keep_option keep, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + +template rmm::device_uvector reduce_by_row( + hash_set_type>& set, + size_type num_rows, + duplicate_keep_option keep, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); } // namespace cudf::detail diff --git a/cpp/src/stream_compaction/distinct_helpers.hpp b/cpp/src/stream_compaction/distinct_helpers.hpp index 40f97e00ce5..fca67c98873 100644 --- a/cpp/src/stream_compaction/distinct_helpers.hpp +++ b/cpp/src/stream_compaction/distinct_helpers.hpp @@ -14,8 +14,7 @@ * limitations under the License. */ -#include "stream_compaction_common.hpp" - +#include #include #include #include @@ -24,6 +23,12 @@ #include #include +#include +#include +#include +#include +#include + namespace cudf::detail { /** @@ -42,13 +47,28 @@ auto constexpr reduction_init_value(duplicate_keep_option keep) } } +template +using hash_set_type = + cuco::static_set, + cuda::thread_scope_device, + RowHasher, + cuco::linear_probing<1, + cudf::experimental::row::hash::device_row_hasher< + cudf::hashing::detail::default_hash, + cudf::nullate::DYNAMIC>>, + cudf::detail::cuco_allocator, + cuco::storage<1>>; + /** - * @brief Perform a reduction on groups of rows that are compared equal. + * @brief Perform a reduction on groups of rows that are compared equal and returns output indices + * of the occurrences of the distinct elements based on `keep` parameter. * * This is essentially a reduce-by-key operation with keys are non-contiguous rows and are compared - * equal. A hash table is used to find groups of equal rows. + * equal. A hash set is used to find groups of equal rows. * * Depending on the `keep` parameter, the reduction operation for each row group is: + * - If `keep == KEEP_ANY` : order does not matter. * - If `keep == KEEP_FIRST`: min of row indices in the group. * - If `keep == KEEP_LAST`: max of row indices in the group. * - If `keep == KEEP_NONE`: count of equivalent rows (group size). @@ -59,30 +79,18 @@ auto constexpr reduction_init_value(duplicate_keep_option keep) * the `reduction_init_value()` function. Then, the reduction result for each row group is written * into the output array at the index of an unspecified row in the group. * - * @param map The auxiliary map to perform reduction - * @param preprocessed_input The preprocessed of the input rows for computing row hashing and row - * comparisons + * @param set The auxiliary set to perform reduction + * @param set_size The number of elements in set * @param num_rows The number of all input rows - * @param has_nulls Indicate whether the input rows has any nulls at any nested levels - * @param has_nested_columns Indicates whether the input table has any nested columns * @param keep The parameter to determine what type of reduction to perform - * @param nulls_equal Flag to specify whether null elements should be considered as equal - * @param nans_equal Flag to specify whether NaN values in floating point column should be - * considered equal. * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned vector - * @return A device_uvector containing the reduction results + * @return A device_uvector containing the output indices */ -rmm::device_uvector reduce_by_row( - hash_map_type const& map, - std::shared_ptr const preprocessed_input, - size_type num_rows, - cudf::nullate::DYNAMIC has_nulls, - bool has_nested_columns, - duplicate_keep_option keep, - null_equality nulls_equal, - nan_equality nans_equal, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); - +template +rmm::device_uvector reduce_by_row(hash_set_type& set, + size_type num_rows, + duplicate_keep_option keep, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); } // namespace cudf::detail diff --git a/cpp/src/stream_compaction/stable_distinct.cu b/cpp/src/stream_compaction/stable_distinct.cu index 27b5a92ab69..074d4fd7d1a 100644 --- a/cpp/src/stream_compaction/stable_distinct.cu +++ b/cpp/src/stream_compaction/stable_distinct.cu @@ -79,11 +79,11 @@ std::unique_ptr
stable_distinct(table_view const& input, duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::stable_distinct( - input, keys, keep, nulls_equal, nans_equal, cudf::get_default_stream(), mr); + return detail::stable_distinct(input, keys, keep, nulls_equal, nans_equal, stream, mr); } } // namespace cudf diff --git a/cpp/src/stream_compaction/stream_compaction_common.cuh b/cpp/src/stream_compaction/stream_compaction_common.cuh index 839672d6a56..0f9bc18e258 100644 --- a/cpp/src/stream_compaction/stream_compaction_common.cuh +++ b/cpp/src/stream_compaction/stream_compaction_common.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,9 +15,8 @@ */ #pragma once -#include "stream_compaction_common.hpp" - #include +#include #include #include diff --git a/cpp/src/stream_compaction/stream_compaction_common.hpp b/cpp/src/stream_compaction/stream_compaction_common.hpp deleted file mode 100644 index 13795f49781..00000000000 --- a/cpp/src/stream_compaction/stream_compaction_common.hpp +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include -#include -#include - -#include -#include - -#include - -namespace cudf { -namespace detail { - -using hash_map_type = cuco::legacy:: - static_map; - -} // namespace detail -} // namespace cudf diff --git a/cpp/src/stream_compaction/unique.cu b/cpp/src/stream_compaction/unique.cu index c1f8b17938c..edb47984d13 100644 --- a/cpp/src/stream_compaction/unique.cu +++ b/cpp/src/stream_compaction/unique.cu @@ -15,7 +15,6 @@ */ #include "stream_compaction_common.cuh" -#include "stream_compaction_common.hpp" #include #include diff --git a/cpp/src/strings/utilities.cu b/cpp/src/strings/utilities.cu index 18e726a6d7d..101004a5d06 100644 --- a/cpp/src/strings/utilities.cu +++ b/cpp/src/strings/utilities.cu @@ -13,16 +13,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "strings/char_types/char_cases.h" #include "strings/char_types/char_flags.h" #include #include #include +#include #include #include #include +#include #include #include @@ -36,8 +37,7 @@ #include #include -namespace cudf { -namespace strings { +namespace cudf::strings { namespace detail { /** @@ -175,5 +175,17 @@ int64_t get_offset_value(cudf::column_view const& offsets, } } // namespace detail -} // namespace strings -} // namespace cudf + +rmm::device_uvector create_string_vector_from_column( + cudf::strings_column_view const strings, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + return detail::create_string_vector_from_column(strings, stream, mr); +} + +int64_t get_offset64_threshold() { return detail::get_offset64_threshold(); } +bool is_large_strings_enabled() { return detail::is_large_strings_enabled(); } + +} // namespace cudf::strings diff --git a/cpp/src/utilities/cuda_memcpy.cu b/cpp/src/utilities/cuda_memcpy.cu new file mode 100644 index 00000000000..3d0822d8545 --- /dev/null +++ b/cpp/src/utilities/cuda_memcpy.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +#include + +namespace cudf::detail { + +namespace { + +void copy_pinned(void* dst, void const* src, std::size_t size, rmm::cuda_stream_view stream) +{ + if (size == 0) return; + + if (size < get_kernel_pinned_copy_threshold()) { + thrust::copy_n(rmm::exec_policy_nosync(stream), + static_cast(src), + size, + static_cast(dst)); + } else { + CUDF_CUDA_TRY(cudaMemcpyAsync(dst, src, size, cudaMemcpyDefault, stream)); + } +} + +void copy_pageable(void* dst, void const* src, std::size_t size, rmm::cuda_stream_view stream) +{ + if (size == 0) return; + + CUDF_CUDA_TRY(cudaMemcpyAsync(dst, src, size, cudaMemcpyDefault, stream)); +} + +}; // namespace + +void cuda_memcpy_async( + void* dst, void const* src, size_t size, host_memory_kind kind, rmm::cuda_stream_view stream) +{ + if (kind == host_memory_kind::PINNED) { + copy_pinned(dst, src, size, stream); + } else if (kind == host_memory_kind::PAGEABLE) { + copy_pageable(dst, src, size, stream); + } else { + CUDF_FAIL("Unsupported host memory kind"); + } +} + +void cuda_memcpy( + void* dst, void const* src, size_t size, host_memory_kind kind, rmm::cuda_stream_view stream) +{ + cuda_memcpy_async(dst, src, size, kind, stream); + stream.synchronize(); +} + +} // namespace cudf::detail diff --git a/cpp/src/utilities/pinned_memory.cpp b/cpp/src/utilities/pinned_memory.cpp index e90b7969b4d..3ea4293fc60 100644 --- a/cpp/src/utilities/pinned_memory.cpp +++ b/cpp/src/utilities/pinned_memory.cpp @@ -211,4 +211,18 @@ bool config_default_pinned_memory_resource(pinned_mr_options const& opts) return did_configure; } +CUDF_EXPORT auto& kernel_pinned_copy_threshold() +{ + // use cudaMemcpyAsync for all pinned copies + static std::atomic threshold = 0; + return threshold; +} + +void set_kernel_pinned_copy_threshold(size_t threshold) +{ + kernel_pinned_copy_threshold() = threshold; +} + +size_t get_kernel_pinned_copy_threshold() { return kernel_pinned_copy_threshold(); } + } // namespace cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 9f14455f42d..eef09954647 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -700,6 +700,7 @@ ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_ROLLING_TEST streams/rolling_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SORTING_TEST streams/sorting_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_STREAM_COMPACTION_TEST streams/stream_compaction_test.cpp STREAM_MODE testing) ConfigureTest( STREAM_STRINGS_TEST streams/strings/case_test.cpp diff --git a/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp b/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp index 6d097b2ff12..89824eb6511 100644 --- a/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp +++ b/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp @@ -843,3 +843,61 @@ TYPED_TEST(FixedPointTest_64_128_Reps, FixedPoint_64_128_ComparisonTests) CUDF_TEST_EXPECT_COLUMNS_EQUAL(h->view(), falses); } } + +template +void test_fixed_floating(cudf::binary_operator op, + double floating_value, + int decimal_value, + int decimal_scale, + ResultType expected) +{ + auto const scale = numeric::scale_type{decimal_scale}; + auto const result_type = cudf::data_type(cudf::type_to_id()); + auto const nullable = + (op == cudf::binary_operator::NULL_EQUALS || op == cudf::binary_operator::NULL_NOT_EQUALS || + op == cudf::binary_operator::NULL_MIN || op == cudf::binary_operator::NULL_MAX); + + cudf::test::fixed_width_column_wrapper floating_col({floating_value}); + cudf::test::fixed_point_column_wrapper decimal_col({decimal_value}, scale); + + auto result = binary_operation(floating_col, decimal_col, op, result_type); + + if constexpr (cudf::is_fixed_point()) { + using wrapper_type = cudf::test::fixed_point_column_wrapper; + auto const expected_col = nullable ? wrapper_type({expected.value()}, {true}, expected.scale()) + : wrapper_type({expected.value()}, expected.scale()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_col, *result.get()); + } else { + using wrapper_type = cudf::test::fixed_width_column_wrapper; + auto const expected_col = + nullable ? wrapper_type({expected}, {true}) : wrapper_type({expected}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_col, *result.get()); + } +} + +TYPED_TEST(FixedPointCompiledTest, FixedPointWithFloating) +{ + using namespace numeric; + + // BOOLEAN + test_fixed_floating(cudf::binary_operator::EQUAL, 1.0, 10, -1, true); + test_fixed_floating(cudf::binary_operator::NOT_EQUAL, 1.0, 10, -1, false); + test_fixed_floating(cudf::binary_operator::LESS, 2.0, 10, -1, false); + test_fixed_floating(cudf::binary_operator::GREATER, 2.0, 10, -1, true); + test_fixed_floating(cudf::binary_operator::LESS_EQUAL, 2.0, 20, -1, true); + test_fixed_floating(cudf::binary_operator::GREATER_EQUAL, 2.0, 30, -1, false); + test_fixed_floating(cudf::binary_operator::NULL_EQUALS, 1.0, 10, -1, true); + test_fixed_floating(cudf::binary_operator::NULL_NOT_EQUALS, 1.0, 10, -1, false); + + // PRIMARY ARITHMETIC + auto const decimal_result = numeric::decimal32(4, numeric::scale_type{0}); + test_fixed_floating(cudf::binary_operator::ADD, 1.0, 30, -1, decimal_result); + test_fixed_floating(cudf::binary_operator::SUB, 6.0, 20, -1, decimal_result); + test_fixed_floating(cudf::binary_operator::MUL, 2.0, 20, -1, decimal_result); + test_fixed_floating(cudf::binary_operator::DIV, 8.0, 2, 0, decimal_result); + test_fixed_floating(cudf::binary_operator::MOD, 9.0, 50, -1, decimal_result); + + // OTHER ARITHMETIC + test_fixed_floating(cudf::binary_operator::NULL_MAX, 4.0, 20, -1, decimal_result); + test_fixed_floating(cudf::binary_operator::NULL_MIN, 4.0, 200, -1, decimal_result); +} diff --git a/cpp/tests/column/factories_test.cpp b/cpp/tests/column/factories_test.cpp index dca36eaa4e7..603187f0330 100644 --- a/cpp/tests/column/factories_test.cpp +++ b/cpp/tests/column/factories_test.cpp @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include @@ -762,7 +762,7 @@ TEST_F(ColumnFactoryTest, FromStructScalarNull) { struct_from_scalar(false); } TEST_F(ColumnFactoryTest, FromScalarErrors) { - if (cudf::strings::detail::is_large_strings_enabled()) { return; } + if (cudf::strings::is_large_strings_enabled()) { return; } cudf::string_scalar ss("hello world"); EXPECT_THROW(cudf::make_column_from_scalar(ss, 214748365), std::overflow_error); diff --git a/cpp/tests/copying/concatenate_tests.cpp b/cpp/tests/copying/concatenate_tests.cpp index 078e0ef9bae..054441788d0 100644 --- a/cpp/tests/copying/concatenate_tests.cpp +++ b/cpp/tests/copying/concatenate_tests.cpp @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include @@ -189,7 +189,7 @@ TEST_F(StringColumnTest, ConcatenateManyColumns) TEST_F(StringColumnTest, ConcatenateTooLarge) { - if (cudf::strings::detail::is_large_strings_enabled()) { return; } + if (cudf::strings::is_large_strings_enabled()) { return; } std::string big_str(1000000, 'a'); // 1 million bytes x 5 = 5 million bytes cudf::test::strings_column_wrapper input{big_str, big_str, big_str, big_str, big_str}; @@ -379,7 +379,7 @@ TEST_F(OverflowTest, OverflowTest) } // string column, overflow on chars - if (!cudf::strings::detail::is_large_strings_enabled()) { + if (!cudf::strings::is_large_strings_enabled()) { constexpr auto size = static_cast(static_cast(1024) * 1024 * 1024); // try and concatenate 6 string columns of with 1 billion chars in each @@ -502,7 +502,7 @@ TEST_F(OverflowTest, Presliced) } // strings, overflow on chars - if (!cudf::strings::detail::is_large_strings_enabled()) { + if (!cudf::strings::is_large_strings_enabled()) { constexpr cudf::size_type total_chars_size = 1024 * 1024 * 1024; constexpr cudf::size_type string_size = 64; constexpr cudf::size_type num_rows = total_chars_size / string_size; diff --git a/cpp/tests/io/parquet_writer_test.cpp b/cpp/tests/io/parquet_writer_test.cpp index 84ab83e33d0..a1f4c7b81d8 100644 --- a/cpp/tests/io/parquet_writer_test.cpp +++ b/cpp/tests/io/parquet_writer_test.cpp @@ -1785,7 +1785,8 @@ TEST_F(ParquetWriterTest, DeltaBinaryStartsWithNulls) CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } -TEST_F(ParquetWriterTest, ByteStreamSplit) +std::pair, cudf::io::table_input_metadata> +make_byte_stream_split_table(bool as_struct) { constexpr auto num_rows = 100; std::mt19937 engine{31337}; @@ -1802,24 +1803,73 @@ TEST_F(ParquetWriterTest, ByteStreamSplit) // throw in a list to make sure both decoders are working auto col4 = make_parquet_list_col(engine, num_rows, 5, true); - auto expected = table_view{{col0, col1, col2, col3, *col4}}; + std::vector> columns; + columns.reserve(5); + columns.push_back(col0.release()); + columns.push_back(col1.release()); + columns.push_back(col2.release()); + columns.push_back(col3.release()); + columns.push_back(std::move(col4)); + + return [&]() -> std::pair, cudf::io::table_input_metadata> { + auto const encoding = cudf::io::column_encoding::BYTE_STREAM_SPLIT; + + // make as a nested struct + if (as_struct) { + auto valids = + cudf::detail::make_counting_transform_iterator(0, [](int i) { return i % 2 == 0; }); + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids, valids + num_rows); + + std::vector> table_cols; + table_cols.push_back( + cudf::make_structs_column(num_rows, std::move(columns), null_count, std::move(null_mask))); + + auto tbl = std::make_unique(std::move(table_cols)); + auto expected = table_view{*tbl}; + + cudf::io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_name("struct"); + expected_metadata.column_metadata[0].set_encoding(encoding); + + expected_metadata.column_metadata[0].child(0).set_name("int32s"); + expected_metadata.column_metadata[0].child(1).set_name("int64s"); + expected_metadata.column_metadata[0].child(2).set_name("floats"); + expected_metadata.column_metadata[0].child(3).set_name("doubles"); + expected_metadata.column_metadata[0].child(4).set_name("int32list"); + for (int idx = 0; idx <= 3; idx++) { + expected_metadata.column_metadata[0].child(idx).set_encoding(encoding); + } + expected_metadata.column_metadata[0].child(4).child(1).set_encoding(encoding); - cudf::io::table_input_metadata expected_metadata(expected); - expected_metadata.column_metadata[0].set_name("int32s"); - expected_metadata.column_metadata[1].set_name("int64s"); - expected_metadata.column_metadata[2].set_name("floats"); - expected_metadata.column_metadata[3].set_name("doubles"); - expected_metadata.column_metadata[4].set_name("int32list"); - auto const encoding = cudf::io::column_encoding::BYTE_STREAM_SPLIT; - for (int i = 0; i <= 3; i++) { - expected_metadata.column_metadata[i].set_encoding(encoding); - } + return {std::move(tbl), expected_metadata}; + } + + // make flat + auto tbl = std::make_unique(std::move(columns)); + auto expected = table_view{*tbl}; - expected_metadata.column_metadata[4].child(1).set_encoding(encoding); + cudf::io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_name("int32s"); + expected_metadata.column_metadata[1].set_name("int64s"); + expected_metadata.column_metadata[2].set_name("floats"); + expected_metadata.column_metadata[3].set_name("doubles"); + expected_metadata.column_metadata[4].set_name("int32list"); + for (int idx = 0; idx <= 3; idx++) { + expected_metadata.column_metadata[idx].set_encoding(encoding); + } + + expected_metadata.column_metadata[4].child(1).set_encoding(encoding); + return {std::move(tbl), expected_metadata}; + }(); +} + +TEST_F(ParquetWriterTest, ByteStreamSplit) +{ + auto [expected, expected_metadata] = make_byte_stream_split_table(false); auto const filepath = temp_env->get_temp_filepath("ByteStreamSplit.parquet"); cudf::io::parquet_writer_options out_opts = - cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected) .metadata(expected_metadata); cudf::io::write_parquet(out_opts); @@ -1827,7 +1877,24 @@ TEST_F(ParquetWriterTest, ByteStreamSplit) cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); auto result = cudf::io::read_parquet(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, result.tbl->view()); +} + +TEST_F(ParquetWriterTest, ByteStreamSplitStruct) +{ + auto [expected, expected_metadata] = make_byte_stream_split_table(true); + + auto const filepath = temp_env->get_temp_filepath("ByteStreamSplitStruct.parquet"); + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected) + .metadata(expected_metadata); + cudf::io::write_parquet(out_opts); + + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, result.tbl->view()); } TEST_F(ParquetWriterTest, DecimalByteStreamSplit) diff --git a/cpp/tests/join/conditional_join_tests.cu b/cpp/tests/join/conditional_join_tests.cu index 79968bcd7f4..7ab4a2ea465 100644 --- a/cpp/tests/join/conditional_join_tests.cu +++ b/cpp/tests/join/conditional_join_tests.cu @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -222,21 +223,25 @@ struct ConditionalJoinPairReturnTest : public ConditionalJoinTest { std::vector> expected_outputs) { auto result_size = this->join_size(left, right, predicate); - EXPECT_TRUE(result_size == expected_outputs.size()); - - auto result = this->join(left, right, predicate); - std::vector> result_pairs; - for (size_t i = 0; i < result.first->size(); ++i) { - // Note: Not trying to be terribly efficient here since these tests are - // small, otherwise a batch copy to host before constructing the tuples - // would be important. - result_pairs.push_back({result.first->element(i, cudf::get_default_stream()), - result.second->element(i, cudf::get_default_stream())}); - } + EXPECT_EQ(result_size, expected_outputs.size()); + + auto result = this->join(left, right, predicate); + auto lhs_result = cudf::detail::make_std_vector_sync(*result.first, cudf::get_default_stream()); + auto rhs_result = + cudf::detail::make_std_vector_sync(*result.second, cudf::get_default_stream()); + std::vector> result_pairs(lhs_result.size()); + std::transform(lhs_result.begin(), + lhs_result.end(), + rhs_result.begin(), + result_pairs.begin(), + [](cudf::size_type lhs, cudf::size_type rhs) { + return std::pair{lhs, rhs}; + }); std::sort(result_pairs.begin(), result_pairs.end()); std::sort(expected_outputs.begin(), expected_outputs.end()); - EXPECT_TRUE(std::equal(expected_outputs.begin(), expected_outputs.end(), result_pairs.begin())); + EXPECT_TRUE(std::equal( + expected_outputs.begin(), expected_outputs.end(), result_pairs.begin(), result_pairs.end())); } /* @@ -411,6 +416,11 @@ TYPED_TEST(ConditionalInnerJoinTest, TestOneColumnLeftEmpty) this->test({{}}, {{3, 4, 5}}, left_zero_eq_right_zero, {}); }; +TYPED_TEST(ConditionalInnerJoinTest, TestOneColumnRightEmpty) +{ + this->test({{3, 4, 5}}, {{}}, left_zero_eq_right_zero, {}); +}; + TYPED_TEST(ConditionalInnerJoinTest, TestOneColumnTwoRowAllEqual) { this->test({{0, 1}}, {{0, 0}}, left_zero_eq_right_zero, {{0, 0}, {0, 1}}); @@ -600,6 +610,14 @@ TYPED_TEST(ConditionalLeftJoinTest, TestOneColumnLeftEmpty) this->test({{}}, {{3, 4, 5}}, left_zero_eq_right_zero, {}); }; +TYPED_TEST(ConditionalLeftJoinTest, TestOneColumnRightEmpty) +{ + this->test({{3, 4, 5}}, + {{}}, + left_zero_eq_right_zero, + {{0, JoinNoneValue}, {1, JoinNoneValue}, {2, JoinNoneValue}}); +}; + TYPED_TEST(ConditionalLeftJoinTest, TestCompareRandomToHash) { auto [left, right] = gen_random_repeated_columns(); @@ -666,6 +684,14 @@ TYPED_TEST(ConditionalFullJoinTest, TestOneColumnLeftEmpty) {{JoinNoneValue, 0}, {JoinNoneValue, 1}, {JoinNoneValue, 2}}); }; +TYPED_TEST(ConditionalFullJoinTest, TestOneColumnRightEmpty) +{ + this->test({{3, 4, 5}}, + {{}}, + left_zero_eq_right_zero, + {{0, JoinNoneValue}, {1, JoinNoneValue}, {2, JoinNoneValue}}); +}; + TYPED_TEST(ConditionalFullJoinTest, TestTwoColumnThreeRowSomeEqual) { this->test({{0, 1, 2}, {10, 20, 30}}, @@ -705,20 +731,16 @@ struct ConditionalJoinSingleReturnTest : public ConditionalJoinTest { auto [left_wrappers, right_wrappers, left_columns, right_columns, left, right] = this->parse_input(left_data, right_data); auto result_size = this->join_size(left, right, predicate); - EXPECT_TRUE(result_size == expected_outputs.size()); - - auto result = this->join(left, right, predicate); - std::vector resulting_indices; - for (size_t i = 0; i < result->size(); ++i) { - // Note: Not trying to be terribly efficient here since these tests are - // small, otherwise a batch copy to host before constructing the tuples - // would be important. - resulting_indices.push_back(result->element(i, cudf::get_default_stream())); - } - std::sort(resulting_indices.begin(), resulting_indices.end()); + EXPECT_EQ(result_size, expected_outputs.size()); + + auto result = this->join(left, right, predicate); + auto result_indices = cudf::detail::make_std_vector_sync(*result, cudf::get_default_stream()); + std::sort(result_indices.begin(), result_indices.end()); std::sort(expected_outputs.begin(), expected_outputs.end()); - EXPECT_TRUE( - std::equal(resulting_indices.begin(), resulting_indices.end(), expected_outputs.begin())); + EXPECT_TRUE(std::equal(result_indices.begin(), + result_indices.end(), + expected_outputs.begin(), + expected_outputs.end())); } void _compare_to_hash_join(std::unique_ptr> const& result, @@ -826,6 +848,16 @@ struct ConditionalLeftSemiJoinTest : public ConditionalJoinSingleReturnTest { TYPED_TEST_SUITE(ConditionalLeftSemiJoinTest, cudf::test::IntegralTypesNotBool); +TYPED_TEST(ConditionalLeftSemiJoinTest, TestOneColumnLeftEmpty) +{ + this->test({{}}, {{3, 4, 5}}, left_zero_eq_right_zero, {}); +}; + +TYPED_TEST(ConditionalLeftSemiJoinTest, TestOneColumnRightEmpty) +{ + this->test({{3, 4, 5}}, {{}}, left_zero_eq_right_zero, {}); +}; + TYPED_TEST(ConditionalLeftSemiJoinTest, TestTwoColumnThreeRowSomeEqual) { this->test({{0, 1, 2}, {10, 20, 30}}, {{0, 1, 3}, {30, 40, 50}}, left_zero_eq_right_zero, {0, 1}); @@ -873,6 +905,16 @@ struct ConditionalLeftAntiJoinTest : public ConditionalJoinSingleReturnTest { TYPED_TEST_SUITE(ConditionalLeftAntiJoinTest, cudf::test::IntegralTypesNotBool); +TYPED_TEST(ConditionalLeftAntiJoinTest, TestOneColumnLeftEmpty) +{ + this->test({{}}, {{3, 4, 5}}, left_zero_eq_right_zero, {}); +}; + +TYPED_TEST(ConditionalLeftAntiJoinTest, TestOneColumnRightEmpty) +{ + this->test({{3, 4, 5}}, {{}}, left_zero_eq_right_zero, {0, 1, 2}); +}; + TYPED_TEST(ConditionalLeftAntiJoinTest, TestTwoColumnThreeRowSomeEqual) { this->test({{0, 1, 2}, {10, 20, 30}}, {{0, 1, 3}, {30, 40, 50}}, left_zero_eq_right_zero, {2}); diff --git a/cpp/tests/streams/stream_compaction_test.cpp b/cpp/tests/streams/stream_compaction_test.cpp new file mode 100644 index 00000000000..56443870602 --- /dev/null +++ b/cpp/tests/streams/stream_compaction_test.cpp @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +auto constexpr null{0}; // null at current level +auto constexpr XXX{0}; // null pushed down from parent level +auto constexpr NaN = std::numeric_limits::quiet_NaN(); +auto constexpr KEEP_ANY = cudf::duplicate_keep_option::KEEP_ANY; +auto constexpr KEEP_FIRST = cudf::duplicate_keep_option::KEEP_FIRST; +auto constexpr KEEP_LAST = cudf::duplicate_keep_option::KEEP_LAST; +auto constexpr KEEP_NONE = cudf::duplicate_keep_option::KEEP_NONE; +auto constexpr NULL_EQUAL = cudf::null_equality::EQUAL; +auto constexpr NULL_UNEQUAL = cudf::null_equality::UNEQUAL; +auto constexpr NAN_EQUAL = cudf::nan_equality::ALL_EQUAL; +auto constexpr NAN_UNEQUAL = cudf::nan_equality::UNEQUAL; + +using int32s_col = cudf::test::fixed_width_column_wrapper; +using floats_col = cudf::test::fixed_width_column_wrapper; + +using cudf::nan_policy; +using cudf::null_equality; +using cudf::null_policy; +using cudf::test::iterators::no_nulls; +using cudf::test::iterators::null_at; +using cudf::test::iterators::nulls_at; + +struct StableDistinctKeepAny : public cudf::test::BaseFixture {}; + +struct StableDistinctKeepFirstLastNone : public cudf::test::BaseFixture {}; + +TEST_F(StableDistinctKeepAny, NoNullsTableWithNaNs) +{ + // Column(s) used to test KEEP_ANY needs to have same rows in contiguous + // groups for equivalent keys because KEEP_ANY is nondeterministic. + auto const col1 = int32s_col{6, 6, 6, 1, 1, 1, 3, 5, 8, 5}; + auto const col2 = floats_col{6, 6, 6, 1, 1, 1, 3, 4, 9, 4}; + auto const keys1 = int32s_col{20, 20, 20, 15, 15, 15, 20, 19, 21, 9}; + auto const keys2 = floats_col{19., 19., 19., NaN, NaN, NaN, 20., 20., 9., 21.}; + + auto const input = cudf::table_view{{col1, col2, keys1, keys2}}; + auto const key_idx = std::vector{2, 3}; + + // NaNs are unequal. + { + auto const exp_col1 = int32s_col{6, 1, 1, 1, 3, 5, 8, 5}; + auto const exp_col2 = floats_col{6, 1, 1, 1, 3, 4, 9, 4}; + auto const exp_keys1 = int32s_col{20, 15, 15, 15, 20, 19, 21, 9}; + auto const exp_keys2 = floats_col{19., NaN, NaN, NaN, 20., 20., 9., 21.}; + auto const expected = cudf::table_view{{exp_col1, exp_col2, exp_keys1, exp_keys2}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_UNEQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + // NaNs are equal. + { + auto const exp_col1 = int32s_col{6, 1, 3, 5, 8, 5}; + auto const exp_col2 = floats_col{6, 1, 3, 4, 9, 4}; + auto const exp_keys1 = int32s_col{20, 15, 20, 19, 21, 9}; + auto const exp_keys2 = floats_col{19., NaN, 20., 20., 9., 21.}; + auto const expected = cudf::table_view{{exp_col1, exp_col2, exp_keys1, exp_keys2}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } +} + +TEST_F(StableDistinctKeepAny, InputWithNullsAndNaNs) +{ + auto constexpr null{0.0}; // shadow the global `null` variable of type int + + // Column(s) used to test KEEP_ANY needs to have same rows in contiguous + // groups for equivalent keys because KEEP_ANY is nondeterministic. + auto const col = int32s_col{5, 4, 4, 1, 1, 1, 8, 8, 1}; + auto const keys = floats_col{{20., null, null, NaN, NaN, NaN, 19., 19., 21.}, nulls_at({1, 2})}; + auto const input = cudf::table_view{{col, keys}}; + auto const key_idx = std::vector{1}; + + // Nulls are equal, NaNs are unequal. + { + auto const exp_col = int32s_col{5, 4, 1, 1, 1, 8, 1}; + auto const exp_keys = floats_col{{20., null, NaN, NaN, NaN, 19., 21.}, null_at(1)}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_UNEQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + // Nulls are equal, NaNs are equal. + { + auto const exp_col = int32s_col{5, 4, 1, 8, 1}; + auto const exp_keys = floats_col{{20., null, NaN, 19., 21.}, null_at(1)}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + // Nulls are unequal, NaNs are unequal. + { + auto const exp_col = int32s_col{5, 4, 4, 1, 1, 1, 8, 1}; + auto const exp_keys = floats_col{{20., null, null, NaN, NaN, NaN, 19., 21.}, nulls_at({1, 2})}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_ANY, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + // Nulls are unequal, NaNs are equal. + { + auto const exp_col = int32s_col{5, 4, 4, 1, 8, 1}; + auto const exp_keys = floats_col{{20., null, null, NaN, 19., 21.}, nulls_at({1, 2})}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_ANY, NULL_UNEQUAL, NAN_EQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } +} + +TEST_F(StableDistinctKeepFirstLastNone, InputWithNaNsEqual) +{ + // Column(s) used to test needs to have different rows for the same keys. + auto const col = int32s_col{0, 1, 2, 3, 4, 5, 6}; + auto const keys = floats_col{20., NaN, NaN, 19., 21., 19., 22.}; + auto const input = cudf::table_view{{col, keys}}; + auto const key_idx = std::vector{1}; + + // KEEP_FIRST + { + auto const exp_col = int32s_col{0, 1, 3, 4, 6}; + auto const exp_keys = floats_col{20., NaN, 19., 21., 22.}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_FIRST, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + // KEEP_LAST + { + auto const exp_col = int32s_col{0, 2, 4, 5, 6}; + auto const exp_keys = floats_col{20., NaN, 21., 19., 22.}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_LAST, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + // KEEP_NONE + { + auto const exp_col = int32s_col{0, 4, 6}; + auto const exp_keys = floats_col{20., 21., 22.}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_NONE, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } +} + +TEST_F(StableDistinctKeepFirstLastNone, InputWithNaNsUnequal) +{ + // Column(s) used to test needs to have different rows for the same keys. + auto const col = int32s_col{0, 1, 2, 3, 4, 5, 6, 7}; + auto const keys = floats_col{20., NaN, NaN, 19., 21., 19., 22., 20.}; + auto const input = cudf::table_view{{col, keys}}; + auto const key_idx = std::vector{1}; + + // KEEP_FIRST + { + auto const exp_col = int32s_col{0, 1, 2, 3, 4, 6}; + auto const exp_keys = floats_col{20., NaN, NaN, 19., 21., 22.}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_FIRST, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + // KEEP_LAST + { + auto const exp_col = int32s_col{1, 2, 4, 5, 6, 7}; + auto const exp_keys = floats_col{NaN, NaN, 21., 19., 22., 20.}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_LAST, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + // KEEP_NONE + { + auto const exp_col = int32s_col{1, 2, 4, 6}; + auto const exp_keys = floats_col{NaN, NaN, 21., 22.}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_NONE, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } +} diff --git a/cpp/tests/strings/array_tests.cpp b/cpp/tests/strings/array_tests.cpp index a1bb87a43fb..9c0ecaa52c0 100644 --- a/cpp/tests/strings/array_tests.cpp +++ b/cpp/tests/strings/array_tests.cpp @@ -23,8 +23,8 @@ #include #include #include -#include #include +#include #include #include @@ -153,7 +153,7 @@ TEST_F(StringsColumnTest, GatherZeroSizeStringsColumn) TEST_F(StringsColumnTest, GatherTooBig) { - if (cudf::strings::detail::is_large_strings_enabled()) { return; } + if (cudf::strings::is_large_strings_enabled()) { return; } std::vector h_chars(3000000); cudf::test::fixed_width_column_wrapper chars(h_chars.begin(), h_chars.end()); diff --git a/cpp/tests/strings/repeat_strings_tests.cpp b/cpp/tests/strings/repeat_strings_tests.cpp index 0539895c5f4..aa4d9320d7c 100644 --- a/cpp/tests/strings/repeat_strings_tests.cpp +++ b/cpp/tests/strings/repeat_strings_tests.cpp @@ -20,9 +20,9 @@ #include #include -#include #include #include +#include using namespace cudf::test::iterators; @@ -221,7 +221,7 @@ TEST_F(RepeatStringsTest, StringsColumnWithColumnRepeatTimesInvalidInput) TEST_F(RepeatStringsTest, StringsColumnWithColumnRepeatTimesOverflowOutput) { - if (cudf::strings::detail::is_large_strings_enabled()) { return; } + if (cudf::strings::is_large_strings_enabled()) { return; } auto const strs = strs_col{"1", "12", "123", "1234", "12345", "123456", "1234567"}; auto const strs_cv = cudf::strings_column_view(strs); diff --git a/docs/cudf/source/_static/cudf-pandas-line-profile.png b/docs/cudf/source/_static/cudf-pandas-line-profile.png new file mode 100644 index 0000000000000000000000000000000000000000..1d5a07c72eb06ab9270ab13e06aa31d55d4981ca GIT binary patch literal 15125 zcmeIZcT|&Y_vecR5l~TSBJCka7m?mU)KH`e2uN2zy7XREdXN&3-b9)ZYC>-jX`xFe zRDsX~NC`+nnFOBaecy3@bIw|4%{lALTJs0FNyyE8wY~RefA@8TKUY(@PIi}!h=}OA zlHyZMBBD!G_}|f2FXCVO-xRmtFBe=i6&@3n^f9dB-(0qoQ#V5dj0(w?cS3wj@Z}1U!)T_?*J}pB;4dnyBA0) zseF;1-Y(eX@~!R@=oqFzU0qjuEU_ecHN!i@tI?CS*Hu?rSG!*jlak?|ajbrTZyXWP zw+p~4gzK+I;0uJS)N^yfg@}TRj&S*Q>5o5u0r_I)<-T27-jn+{l#h}7IP0y#+=>{L zI(R1&<|Kd0ypToCJf^_!EKJAVBJ@xt#QIgzHCW(lcl@tNr=n1=?ZUfdSn zW&f`Rua-ZPY=EUXICn;0?-LQds{V|){(-521k`Z9hY3GPG*q>6KLYn3ffIv~mx$g? ze*lPH_5Q)cxdVOZLqt?&@EmnTj$+~x(3r-8f{4hS95nUUik#19ME7^}1)^{Ea$x$) z(pD5yUV7&b=5!9kKTo3+5sIZv>}OcJ^7Y->zjfaDuA~>PW?Yl^KWmXfBVgC)8~gcF zYX-g$TJ;q^@Bvms`afx?`ut|UvEL4*dJ`5_k8NlTmHoYB7A<`cNP)3tTWO}U=~sH| zm1#kB7&_~RX63lCi5m~_SxBr~d@0b6-snW(+fVZd%<=1FBqG0FTw)th&7n3Zd$R2? zN!H&8i;BrYyeaJO3~0cNgsQm;tC=)lo9qi;-vW+iwxcOnL20Jn(OC#0qKEOq)Q6k> zCu@g%CrhOs-MwViSq^HoTiFx=*koOQbH2Q_FUuuf?poX9$Ijvaca=7gi7njl!HKp$ zcCrPxSj!|Z>NuWSza@2`DBAOEdPIMn6nWSC)$d;lrBb`Ap|akY!;M>Jnf^;70Rqx} z>6FvUQF@~BQH>|!tB{PkJwNN!M7{u>jCGc5?ZBBL549`B#tU)E`)T(J5Iz@SBt%5@ zxwNq@^!S>vYKF2OlO;hPL zdUYH42EG-?GerSvoBpShQ+pj#Fb7RRliaiUP6<(9?Uv4dU1|vwmo+naM9(cx5kz? zE-&2Wl=`Kbn>=_^NROPBhk~717@Lm zX1}Jv4%a;Hy`PBzSTDi;bP=W|ul3pbsHEC}6Zp}AXq{iJ%NN`f9iq=EXw3yc-6LK; zw%?ndM8YgYiTMq&xDwGZi%wfnN%vmkldp%G_bai2eCLB2u6i>a9mA`AFl)8l74$El zz4H1-TNnL4@%9_r@9eao;S|o|$M-~Sh=>?`u(_<+;WDS#M)l+!4jG&?`pU((_AlSN zwy@5gboiflZ>PMYx4!jm+*?`-N%DHQU?GH7J=;{jxu*4_3pq$xbH$InCYoE zroUg-W`D2RO9g1j&EIzw85b+bf&Ibw$~}tpMJ8o|=qDmeS^l-Q z`;W!e`gu$K?x^aEZtI=OnY6ax#h12qkhp1~!|`q65{|*7W3|)yku4L65nh`B=hYY0 z@8GO&$Jw!!18r}oh9w`m7x(L~^j^rsnVeeW&9wy7A7Zyo_jP;OCW#th*;;bb55t=b zww>gEkZrf1`ocF|ICv#HZrSvQHy$odxy)`=oDAp6uAWS8BsfR-d;rrp zpU>Uewl0P>R&uMHR2UTvFi)w>mzyQI-1t6kAExjj*@92R3yECE-O?Q3BO(%u-CJJ{ z!VG><4l!bB7#XtzJ?Oyl*cz)w)YGW4)P2J|y$f69GmhtprjR`<11tKpjXc;W$Mjnp z()uY}^4^1PG~>-+KBE5^Ig4n#u3@9Vxi<}p7W|+Sme@~l1N(gEO`jfpD?r^V&R@d) z{mqttxZ9ZZEAr(W_n^o7X+mERPoxjLW}{<{G=fxMb0s&VpHDaBt@t!9A|oG$ zYa{FEXV~2AFOfD2sO)f_Kbdt8XtoS~Yl^)}oL#%dKDChC4RzPLm>*H3nb?oBBe2DR;BFG&k-4MdDO_0E*nhmScnR2U$eo-AlTotpg|(jG!Gfk z&f4FoG*MqVBjrVq}gYs34#c7m&$lR!#96o9(I_8aDlSkgg zQ|bnCd3w?mO9xLj8WE-ACNq-9JqG$U3&#+XlX1W4d}v3L(Xq0xV6XAsNPMbvu_IPi z*3x0|3eh`1Io)?hb@N&95AKfG-Tu?a`*%G8ev!;KSs=qp9z8pid=u@v8*;SN-G9;_ z5HI_?4BLhiIPahxStvVwF_nCwXU>+-@ft^_SHbCn(-wM@I8w1~)JmT&qs!5b{q4!( zyV5`4=`1|8dtJ5EqLS9$%B!mprQ@@U*a45e2z9>`_52xWBOf)p!_PI-oI{`9mYOZN zeBrHOCTEY4<}lDf5luew&Fp(C)_=0g?C-NDN|)!xGiHb8lfE2q#dl@L&!nkjG7mb9 z>{c`O-#3L5uYDpWdbjWl?*5`-dDd@vGI~PURmOsMaA|&B-?h8|TP^&hwW}!5PyY3JA`T->K zPpa9yv^`O#7biM;_?2CPn9eyKg0#T7&5{R;AH$eWR^s^%PW^xQF(3Ex$uOUQ8`BMe zH)5_p<8;#+4^AbpTlerGzdX-Ga&2aJgZc9YZ6qc0AtqGzxJOxb<%Mplap1~Vq>z)r*m;9jY}Av5C7QE&c1H#V%nox;tlbT%WW zyF)8-U`(IV=~i>`cE5TyLb>H{BIEc08Oc7~Jv|Av@7f(YJti(FTaxXFVER-b!*RNO zaB9X_yjh@&Y7wkzPku5@M1782B}QUgJOiENW3J>+YjEm(0dth4S6JQL`z*cYQ1SD={W*RR1MLYP{YO9k7a(X+ zDdG~vcxI(oelI*IcD|Zvh=d<1+%Nr^!gPfh8nN>uIC?$%PHoDyxYu|8W@Y3#HlxKx zTb_c6jTz3E`V6nHgmgZPR(hbJ= zqO0OedxocLZrgn_Z1PuD+p{udKJD?5_217Im2v5u7UPO;7|}l3UYNaJ?GQ_AUt%0n zu8ZDQ40v#_+5|4eUFJV;Ih0m#U|ctg6u4pO(reylu@la#;*4hn_; z?n{D}xmKxJzX?&tQ}E(NbxGZ*X%%hrt-B+?Z>#aIlY)FJ##d@~?|D2{&5T>tpIV7j zke`gGJsqK6uXSS=FTkQ`{ZA%DH?1?g`N!*(n)ZgPCZ%vo-H;Ohg=G^-N$$UpO@NO6 zB`Totj-3pyvb2c310}f}cri6Epr8SlJ5Yjc_|2FPu}xPMqzI2#0W%xmXQn70kv9q| ztTY@8V3D$~)2`MD)g6G0+**$^V0RiGzxO+oQ8=x@dH}n6k<@07D1pUA4d20?54E1! zr}^{;aVbcb+28KTK>$(j_xbYRbQT0#a!PmqI*&7@8X8#gqRpZFN3!@l=swNlWH|MY z;yW9M9>9U8fmOMPJfA>CFX;nVDG1%Rh8E%V`t1v)2JS;gkv)BZUiG^(X>Qw`rb4Bf zbLJPe)?ccoRV5t0X|%A>zs~Lbl-mNNyz0_#RC&6qywc=v_QSEnBAQ`5hC8NpI87wC zj4Ig$$_5lE5NR1;)8v~tEK!k;S_@>q;ykJtZK)yuHg0S;uU*KN*9HP&cq(zaLz{8En(x3# znfh>0SslYsu@eH?$mo4&v6H#4WP%sAh4O+T zfuA^R2t*)1vCA)}W@dKf{y6o9uhkkZKA&odAU~Im9TL)BeIZkiRmY)14wH+;oE&Lt^Zo* zbiP^p#n5Gye?Qs$@jRchkI}CE@jWueMd@LM(@>@ON z2>t8U*QDZ%-oKpJzgHQ$mu+}a>B*#C=L7Ps!mRy?Ty}rLW!L95T5Aj=zbD7rayn#o zZd}P-?9o;68?DwEve8t&o>!>VXc7c-4mR!M5zM^4hz>f;@@5vQo|; zAND@y1jJD4$LDvgyv$25w>jQymLaDl_ZO(g7m?pR#>yB zrOI5i`xa|s_{hiTGBPbL3FmRTWS0+s{ztuNafc7e78L?-6~Lkm1E!=v8j?y90iS7V zo-q>H!LXqV@X*@ehY-Z71*8;!OJ3l1{ylPJUkZrWylmn~VN&40>cR;%_Wn3y`Z~^s z_pRVI1VJsTUzA+nATap@`HOYP5X3OEFL=*IM44kEl@sr#?{q>m#CW3osbMMclQC|y zCi^-3u&k<)sV41g#s%li873r+5J#h1HSz(CZHPh-Mku_1ih>8LM-$;Q9QTQ4p}5B~plZBgbD%%*;J?Z9_fsl;b`Yexj3w5QrHFN{sW>>nesc z0@H;6oK33!9}`&)R=(kwf3&FSp-9K{xEd;%LwO6^L~e8YF$9tX8OPTYwPR~)HwC?B`pKe2l@ zDC;+vt3my*Tk*WhoFOhI=JHkjbL<%??Td5nJm&vEooP7|H44;wP>4xhnEXXEm`UE5 z04ys&iCcGy8560f}pKaCI53f zUSCIO6ZG{D07^W(bKo5&L1VAAA!zK+%^zQ7*?o$K61>j-&zSy?#4ZOWWeMsbEbnvf zKN~^;mY^6f0`UKfr-}c!Y8(dS>$&vvXHS;Q`)4*5e4bmZ_?ZyEk2s=|oP&Z;S?K>v zicb{I6S6-?3SW^Rq^MI?a)0MHLG-+L@uM1zrRNokNPYKT40hBPfQkR{emaG{F=@3) z?O}7eq@R7$O|%hTRqy0j=8Y=#K5|hrBO^)rgZu|ejs_`8j}c7r|2a3^P8#M;9ry&v zhhDVb;u&@iGs!$vdyHLl#9{Sbf;V39d5AqmY=i!_n#?W$MPwr$_#EFBV+Ywi7_D+| z)NM)jRb5zF=X1 zIc_ev3j!*@MWM=fFfV@u2Ym-)!<&ZTrDgZAUQ_0ds(n(>u z)QnWJgLB0LC2wgukN-SC1kilUDfJ$4$htW6bNE)XfO%i1il}}P*0#|%idYy%+i<{3 zU#o^T*8+;rDmWP40#l#Hr2^_Yj&rSil%Jqq!krJXytRI&D}%p(cy(TD6JN55FWYex~G6JvsW-{x%n#tE;x;%FNdy zm!v602Gh{g5Nn6Quvh#7%jPt|gt1{J_|>yjZg78&8X3mU9xs$Er@m-fT*0@w-u*@& zoF18-9TA`&GYs}Fv6K8V9a{rXmfSWd?}s0(RBuMQY&MwOFGaW`rflV|7DNyTzhlHI zO|Ld&XFqvLFh`Gddg zUKkzR^NDK_TJoTBS6xc8(*moqy(v!YE6=jn$B$dC!0jK!T+qsw8s^*GcY*GH{8Cug ztZN%4r6kHF*%ti*NZ0J z@>5>)O2#b&gY+dF0yJr@55c7{|X;7zwz&xOMUI))Z%QP7WOXznaGl8Oj)^koD+MB zI}057{?+9Wh>(4QeQH~e!{e@xIyD;N`2}{6rPnzYzQplH&0#qg;1zZtGj4?&Kli?` zroNdvj(Vw{_j)4w#tnD*rL>-GMBn;b=7QwLpN$UXH5S%PvrWQ-twn%L|9RR4<$M^o zbD?a;@3qAlIj|pZg8D^&%eq$T!{F3SHcPI!v`Eh#kX`B9juIQn$BGh!kzmrTz0o}} zU24ovQQ^NzrFjH37Ch8>WAakO&L~E@e3Pof%0?m>eJAAl$u3&aJC9K`#QLF##5f~f zMc~Do!@xIx_{H<_5WucmtwH4<@lbiCLl98@k2c$vflRYU>fSE4~#NuC=uNUmf4 zfwdIylYD@MRF_8|u5I2}?J7&~`QmGAuZ{4^sls%QljZx|pF1&xyq6KHKbBsYL7eQ| zRON?%8XLO-ukQp0joe?nA(qsna*jdC>+%ted4xqT6WcA3=uEHr?0}Tz9syPEWI+Z+ zlbFv1GrUVtK2i&3rL8>%gvGx+!I%?@V5vlGl*dGQ|H_uqK%KL$BU(@hqc zy2Mw#!N2O@AMw%a3I5yoYIUl0<*++7e)06A|Qby z=Hw&JkmW?4ylnQ9-Yty zJp7$yuS9=JtZUTCTNg#%)AQmL*)wQ!%mxzRP9q?j*wTsePm(#~Xep19sT!B%QA~I9 zetzlf$v2oKN(llBuJgeH#Q(R=2&U#q)QfvG-a&UW#o@+@z>V-2H7igWHD5 zHf|>?H}7|s&PJsLrzGvxNW2j(xmLgTVx|E)0pM3acRRfPvSucM-j9o6{O@?P8;`IF z=JwN1sgTb`21XN$dkU@ZiBL@HqV~$!fk$H7O6*w(KTS%vLu@gf$tA#w!ge;TxCMcX z&(ZdNAz~?iu&jgcB9+(XN9x0uE8R;l?x>_1C=Syh3G`oI&)!&++R@2JI7kUh%m5AD zi)9?XjMYC!(b~@({_N|em7gTlBX|`(buf<2@T-1DZ`T+lljy8XL!NGj97MrAT7F#w z%{(%_fZi1?U7?Rcm!tqDAHRuL?Nh4Cle}xWM&|NkE-}nQv0JP%FYRG?Li$*F-9$N0&J2h(K>G$Ic#6+ zD|$%CyYE9$Hdyt>ShK0u>ZwxJKYj{v0|kxvT)%MKx5(fK5x!MQoXN`rE%4Z^g#?F- zZJv{w9qM~hyKvu+iYu~?4p-b@?;eDP&O`y|kvl0yMqFqPxlNL??TG{ILYJ2JKH5xB zx230^ZR98|0mu66@KtQCnSJ8uNDa=Uaof(aU&MAQoW*6oP>DMyH zo{h1G14iYPC|Gm9h$+61+7y;NbQzDn+b4_r%N!M+-4|QCGFXd!t zxeu4MeAP=V%TiGuq_7q7 z<13aSt9|b_G3{aufHY>4LT8xe+n&{YQK11b4?{Ir(+Ch-)_3hAtL`T=cyeN|=i@G} zhM+14BD=FQ<5StOoAE|a4-GyfCB={Le!kdpS4JvzFTD(oT8A}N+Ch*hG8XI4E1o=) z6w^)%@oug004SZ6ePc!rw$Q$qg4YQsdTDDm`T9L7=YG@`~5HF*& zfJ}B6o1|3*(p1iqRcq%5NE<^gE!^;;a#!FdQD=B-3qq?z9C-fC4Ou}=Z{`)BsO{L% z#D|n0y#<0>l&(IV8W{R(X=ScFjkL4Z*q>biwx(b2@N0n%A+flJm-2^xik}t2R4xzcSv#OlM1zE9+%8c;0j>kcdaQTL z_N7FEq~TpHVW*t8oY0JL&NzB;wR3myq8-vy&lV$vT%?zu5wA+O3b3I%+@pj)l<;4T zo{l8W7a~y&)A*73O4)v2es_ubMz8vnMJB|HEwZGHX!d|~3ex>qN+*9K2UQ@Gde}XQ z4Zo%5DDz@FTQ=YWxA>ShXEraS%hfXeto^~4b-%LjkzL}diw3@Ra!1Ik%)bi8Dvv{F zX|*XQO*Er0cH3i^EA4^PeCOc$K>qHP>>XaYBDM5yca6Uys=Ql4OW{tRnl8=rv}F4I zFel!IP#hbhKyJ-kk^8wNj8`-l-T7pF-UD#dm#AGPS6ThkdKx~JHjb|zlSZ7B!biBF zr~3MAG8=wJ%~7%O6Xr&evS6h$4$~wXQ6R9-<6n+VS3mHpJlxysMm9p^Nm(nQh)0HB za_+6({PJsT29Z)OWTE|LjbX1K@y~*j^48bPE&E?d-d%I~a^`8CJ-7scyU`AjHl6(e76Pcu%`Hku6O}6%SmComUPw!*b z=*Ez0Iwg1@XMd4}DCka}xoH>j9z|8F?EbqGNgjRY#a^GHJZ8R*J+g!MAtjDNYq9}@ zaVr6E1iM8-5XY88e17GK!@B|citxhTTcJiRd(U8Am(&c|Im~I#0&n*4ssu4rgf;)`CljcQ=CE5-qq1BU{A3r`UqT$i}B2S@H8v)poJd6L}HdDWD*cwV0QT#?a!tHi_g;dUG z4D8%{yNz zy9?BB6c->MlCB(w$+gh>)<{^Mu8;ec96HbUF-;Uc^~Z1R@N(SwF6?(j^&Nf}z*$M+ zPsJePW7ie`9Bill+^%1iX5J`YSx>&{AYIYe-z69r}F84?%+7%foOb!)=7Zu zu?J~VVqSA8GJ))mN+wuc)Sjd09{r)^{=J98F`M`wJ@@~11BV=#@-R~NGysmHNa;V8 ztMTZSI+>sxGJtS5(0XC98x)&U<+X0d&Sf4v{Gl~|$EyST?t}gmq;!Aet$mjD86bcI zo34FN4no)7eaCO1%gV=qzvcAjQv87X;|YSybS{gZ3z-*ixa@Giav(r7$&2c62V(#; zN}F+Z9~iNO@5lR;`Yp_Kn|QicM8ue%XTS)35SnxpYeO?C1y?8#qWpH^8gX$fmMKtK&@vr(*P2D(9breFjB?zr1`tM0wrrudn}>chS~>$7J7 z_gu*u*mssW49VxOPT}<_dctb;&;zf(<*rt+HE=LoOI?hZB?H(PvQsH;WT8^ z4=3usF_=G-AP{g83eh*RZHer2fV;Qr3?I!cqxWStv!*ArE1Sc?rAx=-lccQ`_D#Pa zi6-#rtsUWkFf)@jn5K*R2Z+_+2Y;LcQ|fo;*$?*sE`!eLD2x&}bkg9!pA^=(o@(gJ zxFrlW3OBeP3mY@2)cOWLWfAuAYir8!*i(Y#Hx|}S&*Ov144y9Cwx$kW`LN(O*gE&Q z%k4bVFHC3EZ@hTB0h|XNg=V@8>-1Dsn`a|rz2@;sX(U=^dtW`nb9*qEHULm(XRy9s za#=UXldaylsH6es>b6n>&Z39f7P9#~U>g+S99yIP>HXe?yn7+OQx9$^uBQafMXELR z8jF9G+053b6Q0&onbrV`uM}3F7_~JP+Ay~#zQRS5wIs;YBG*$lC{WXv=U2>c zx<$to(+P0ffw}#IpLWJ5_F5CB*=0IH>a|!$;#pAJ2RH%u7Yd6t%yBRGbXD}J^ccKz zs#+dECN_bK=&ZJGV7)S_;pmW0K{H`?YJ`YrDOzTnPos8vzI7%&4nQTT# zNCz2i!F@*EeP8~`or%u=j7ibBFaWJZzqt^~TKu?`PeR#$Z;+W}GC~fl5Nl{hw) zwx^A6xuDUEU01F%ygj3j$p{q&rl{ErYWz%RRzMIzI(#@-p4#n?Mf2xJDxxp*;7Sjfmq(-ue(dQ2f z^;1>n1%4{irY;t=i@UzCwva-}Pfdwg(m-9udNbfR3*m)FzSv(c_+Kpd{j8++8rOVb zZvCS>2Z8{+hC;RJ^FfYeOO;w7?9?v_2In`bbY%^9l zQ$mC>tfkVEXHHG$7vC=cumva)PP# zXss&pNic!@$nB;I}ZNHkP;J3@xLM#d>r$xp1~1+p#99fP3cMwWcB znvKZ4CiRrvYBf7yO_#fPzc6zjbe|D(rNZPK+iAm%fcqyw!Bi_pZj8YFHV;k@lBNrc!;LM^EH5V?q1Lv7Mz&Md z&+8pz-VnF#`Ca)?_H+WNq4ZduutYS%rAS zTUC-igOMqC5gL7uFj_kA0isv_sX}*1mY1$BuJM5w+GifjyVZ{%cO54G%9;N6hDrS9 zPdRY3NBfxEM)fJo_?=V=XbQMDrqM5A$;ZAKR6+Nz3iBV8c6=&;7zo2ialj@=z}n_M zmjVwY1A2RBzJ>+~G2n0d&n>I?8thpWj_A*7GzQe;aUu0vB+Wk+cfz4CJYoNRC=6fT z`QN*s{r{V-0AC4v!AVFgZ(vAYXc9_X%q&4SV+m=;f0a0V`;T*fe ``` +## Which functions will run on the GPU? + +Generally, `cudf.pandas` will accelerate all the features in the +{ref}`cuDF API ` on the GPU. There are some exceptions. For +example, some functions are GPU-accelerated by cuDF but do not support +every combination of keyword arguments. In cases like unsupported +keyword arguments, cuDF is not able to provide GPU acceleration and +`cudf.pandas` will fall back to the CPU. + +The most accurate way to assess which functions run on the GPU is to try +running the code while using the `cudf.pandas` profiling features. The +profiler will indicate which functions ran on GPU / CPU. To improve +performance, try to use only functionality that can run entirely on GPU. +This helps reduce the number of memory transfers needed to fallback to +CPU. + ## Does it work with third-party libraries? `cudf.pandas` is tested with numerous popular third-party libraries. diff --git a/docs/cudf/source/cudf_pandas/usage.md b/docs/cudf/source/cudf_pandas/usage.md index 376784439aa..0398a8d7086 100644 --- a/docs/cudf/source/cudf_pandas/usage.md +++ b/docs/cudf/source/cudf_pandas/usage.md @@ -63,16 +63,22 @@ back to CPU for certain operations. Running your code with the `cudf.pandas.profile` magic generates a report showing which operations used the GPU and which used the CPU. This can help you identify parts of your code that could be rewritten to be more -GPU-friendly: +GPU-friendly. + +### Using the Function Profiler + +First, enable `cudf.pandas`: ```python %load_ext cudf.pandas import pandas as pd ``` +Next, use the IPython/Jupyter magic `cudf.pandas.profile`: + ```python %%cudf.pandas.profile -df = pd.DataFrame({'a': [0, 1, 2], 'b': [3,4,3]}) +df = pd.DataFrame({'a': [0, 1, 2], 'b': [3, 4, 3]}) df.min(axis=1) out = df.groupby('a').filter( @@ -80,13 +86,35 @@ out = df.groupby('a').filter( ) ``` +This gives a profiler output after the cell runs, shown below. + ![cudf-pandas-profile](../_static/cudf-pandas-profile.png) When an operation falls back to using the CPU, it's typically because that operation isn't implemented by cuDF. The profiler generates a handy link to report the missing functionality to the cuDF team. -To profile a script being run from the command-line, pass the +### Using the Line Profiler + +There is a line profiler activated by the IPython/Jupyter magic `cudf.pandas.line_profile`: + +```python +%%cudf.pandas.line_profile +df = pd.DataFrame({'a': [0, 1, 2], 'b': [3, 4, 3]}) + +df.min(axis=1) +out = df.groupby('a').filter( + lambda group: len(group) > 1 +) +``` + +The output of the line profiler shows the source code and how much time each line spent executing on the GPU and CPU. + +![cudf-pandas-line-profile](../_static/cudf-pandas-line-profile.png) + +### Profiling from the command line + +To profile a script being run from the command line, pass the `--profile` argument: ```bash diff --git a/docs/cudf/source/user_guide/api_docs/index.rst b/docs/cudf/source/user_guide/api_docs/index.rst index b3442908531..d05501f4a4a 100644 --- a/docs/cudf/source/user_guide/api_docs/index.rst +++ b/docs/cudf/source/user_guide/api_docs/index.rst @@ -1,3 +1,5 @@ +.. _cudf-api: + ============= API reference ============= @@ -24,3 +26,4 @@ This page provides a list of all publicly accessible modules, methods and classe options extension_dtypes pylibcudf/index.rst + performance_tracking diff --git a/docs/cudf/source/user_guide/api_docs/performance_tracking.rst b/docs/cudf/source/user_guide/api_docs/performance_tracking.rst new file mode 100644 index 00000000000..9da79e69fb2 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/performance_tracking.rst @@ -0,0 +1,12 @@ +.. _api.performance_tracking: + +==================== +Performance Tracking +==================== + +.. currentmodule:: cudf.utils.performance_tracking +.. autosummary:: + :toctree: api/ + + get_memory_records + print_memory_report diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst index f98298ff052..e9dad705cbf 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst @@ -19,6 +19,7 @@ This page provides API documentation for pylibcudf. gpumemoryview groupby io/index.rst + interop join lists merge diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/interop.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/interop.rst new file mode 100644 index 00000000000..881ab8d7be4 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/interop.rst @@ -0,0 +1,6 @@ +======= +interop +======= + +.. automodule:: cudf._lib.pylibcudf.interop + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst index bfaef732555..cecf1ccc9bb 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst @@ -6,3 +6,4 @@ strings contains replace + slice diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/slice.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/slice.rst new file mode 100644 index 00000000000..0ee5af71c03 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/slice.rst @@ -0,0 +1,6 @@ +===== +slice +===== + +.. automodule:: cudf._lib.pylibcudf.strings.slice + :members: diff --git a/docs/cudf/source/user_guide/index.md b/docs/cudf/source/user_guide/index.md index 486368c3b8b..df4e4795a08 100644 --- a/docs/cudf/source/user_guide/index.md +++ b/docs/cudf/source/user_guide/index.md @@ -16,5 +16,6 @@ options performance-comparisons/index PandasCompat copy-on-write +memory-profiling pandas-2.0-breaking-changes ``` diff --git a/docs/cudf/source/user_guide/memory-profiling.md b/docs/cudf/source/user_guide/memory-profiling.md new file mode 100644 index 00000000000..ab5433685e6 --- /dev/null +++ b/docs/cudf/source/user_guide/memory-profiling.md @@ -0,0 +1,44 @@ +(memory-profiling-user-doc)= + +# Memory Profiling + +Peak memory usage is a common concern in GPU programming because GPU memory is typically smaller than available CPU memory. To easily identify memory hotspots, cuDF provides a memory profiler. It comes with an overhead so avoid using it in performance-sensitive code. + +## Enabling Memory Profiling + +First, enable memory profiling in RMM by calling {py:func}`rmm.statistics.enable_statistics()`. This adds a statistics resource adaptor to the current RMM memory resource, which enables cuDF to access memory profiling information. See the [RMM documentation](https://docs.rapids.ai/api/rmm/stable/guide/#memory-statistics-and-profiling) for more details. + +Second, enable memory profiling in cuDF by setting the `memory_profiling` option to `True`. Use {py:func}`cudf.set_option` or set the environment variable ``CUDF_MEMORY_PROFILING=1`` prior to the launch of the Python interpreter. + +To get the result of the profiling, use {py:func}`cudf.utils.performance_tracking.print_memory_report` or access the raw profiling data by using: {py:func}`cudf.utils.performance_tracking.get_memory_records`. + +### Example +In the following, we enable profiling, do some work, and then print the profiling results: + +```python +>>> import cudf +>>> from cudf.utils.performance_tracking import print_memory_report +>>> from rmm.statistics import enable_statistics +>>> enable_statistics() +>>> cudf.set_option("memory_profiling", True) +>>> cudf.DataFrame({"a": [1, 2, 3]}) # Some work + a +0 1 +1 2 +2 3 +>>> print_memory_report() # Pretty print the result of the profiling +Memory Profiling +================ + +Legends: +ncalls - number of times the function or code block was called +memory_peak - peak memory allocated in function or code block (in bytes) +memory_total - total memory allocated in function or code block (in bytes) + +Ordered by: memory_peak + +ncalls memory_peak memory_total filename:lineno(function) + 1 32 32 cudf/core/dataframe.py:690(DataFrame.__init__) + 2 0 0 cudf/core/index.py:214(RangeIndex.__init__) + 6 0 0 cudf/core/index.py:424(RangeIndex.__len__) +``` diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx index 413eef1d762..047ed27329a 100644 --- a/python/cudf/cudf/_lib/lists.pyx +++ b/python/cudf/cudf/_lib/lists.pyx @@ -9,10 +9,6 @@ from libcpp.utility cimport move from cudf._lib.column cimport Column from cudf._lib.pylibcudf.libcudf.column.column cimport column from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view -from cudf._lib.pylibcudf.libcudf.lists.contains cimport ( - contains, - index_of as cpp_index_of, -) from cudf._lib.pylibcudf.libcudf.lists.extract cimport extract_list_element from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport ( lists_column_view, @@ -23,7 +19,6 @@ from cudf._lib.pylibcudf.libcudf.lists.sorting cimport ( from cudf._lib.pylibcudf.libcudf.lists.stream_compaction cimport ( distinct as cpp_distinct, ) -from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar from cudf._lib.pylibcudf.libcudf.types cimport ( nan_equality, null_equality, @@ -31,11 +26,12 @@ from cudf._lib.pylibcudf.libcudf.types cimport ( order, size_type, ) -from cudf._lib.scalar cimport DeviceScalar from cudf._lib.utils cimport columns_from_pylibcudf_table from cudf._lib import pylibcudf +from cudf._lib.pylibcudf cimport Scalar + @acquire_spill_lock() def count_elements(Column col): @@ -141,64 +137,36 @@ def extract_element_column(Column col, Column index): @acquire_spill_lock() -def contains_scalar(Column col, object py_search_key): - - cdef DeviceScalar search_key = py_search_key.device_value - - cdef shared_ptr[lists_column_view] list_view = ( - make_shared[lists_column_view](col.view()) +def contains_scalar(Column col, py_search_key): + return Column.from_pylibcudf( + pylibcudf.lists.contains( + col.to_pylibcudf(mode="read"), + py_search_key.device_value.c_value, + ) ) - cdef const scalar* search_key_value = search_key.get_raw_ptr() - - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(contains( - list_view.get()[0], - search_key_value[0], - )) - result = Column.from_unique_ptr(move(c_result)) - return result @acquire_spill_lock() def index_of_scalar(Column col, object py_search_key): - - cdef DeviceScalar search_key = py_search_key.device_value - - cdef shared_ptr[lists_column_view] list_view = ( - make_shared[lists_column_view](col.view()) + return Column.from_pylibcudf( + pylibcudf.lists.index_of( + col.to_pylibcudf(mode="read"), + py_search_key.device_value.c_value, + True, + ) ) - cdef const scalar* search_key_value = search_key.get_raw_ptr() - - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(cpp_index_of( - list_view.get()[0], - search_key_value[0], - )) - return Column.from_unique_ptr(move(c_result)) @acquire_spill_lock() def index_of_column(Column col, Column search_keys): - - cdef column_view keys_view = search_keys.view() - - cdef shared_ptr[lists_column_view] list_view = ( - make_shared[lists_column_view](col.view()) + return Column.from_pylibcudf( + pylibcudf.lists.index_of( + col.to_pylibcudf(mode="read"), + search_keys.to_pylibcudf(mode="read"), + True, + ) ) - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(cpp_index_of( - list_view.get()[0], - keys_view, - )) - return Column.from_unique_ptr(move(c_result)) - @acquire_spill_lock() def concatenate_rows(list source_columns): diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx index 7914ed7e9d9..d1ec5be9e62 100644 --- a/python/cudf/cudf/_lib/parquet.pyx +++ b/python/cudf/cudf/_lib/parquet.pyx @@ -20,6 +20,7 @@ from cudf.api.types import is_list_like from cudf._lib.utils cimport data_from_unique_ptr +from cudf._lib import pylibcudf from cudf._lib.utils import _index_level_name, generate_pandas_metadata from libc.stdint cimport uint8_t @@ -70,8 +71,11 @@ from cudf._lib.utils cimport table_view_from_table from pyarrow.lib import NativeFile +from cudf._lib.concat import concat_columns from cudf.utils.ioutils import _ROW_GROUP_SIZE_BYTES_DEFAULT +from cudf._lib.utils cimport data_from_pylibcudf_table + cdef class BufferArrayFromVector: cdef Py_ssize_t length @@ -878,14 +882,32 @@ cdef class ParquetReader: return df def read(self): - dfs = [] + dfs = self._read_chunk() + column_names = dfs._column_names + concatenated_columns = list(dfs._columns) + del dfs while self._has_next(): - dfs.append(self._read_chunk()) - df = cudf.concat(dfs) - df = _process_metadata(df, self.result_meta, self.names, self.row_groups, - self.filepaths_or_buffers, self.pa_buffers, - self.allow_range_index, self.cpp_use_pandas_metadata) - return df + new_chunk = list(self._read_chunk()._columns) + for i in range(len(column_names)): + concatenated_columns[i] = concat_columns( + [concatenated_columns[i], new_chunk[i]] + ) + # Must drop any residual GPU columns to save memory + new_chunk[i] = None + + dfs = cudf.DataFrame._from_data( + *data_from_pylibcudf_table( + pylibcudf.Table( + [col.to_pylibcudf(mode="read") for col in concatenated_columns] + ), + column_names=column_names, + index_names=None + ) + ) + + return _process_metadata(dfs, self.result_meta, self.names, self.row_groups, + self.filepaths_or_buffers, self.pa_buffers, + self.allow_range_index, self.cpp_use_pandas_metadata) cpdef merge_filemetadata(object filemetadata_list): """ diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pxd b/python/cudf/cudf/_lib/pylibcudf/column.pxd index e121e856865..d13791d95cf 100644 --- a/python/cudf/cudf/_lib/pylibcudf/column.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/column.pxd @@ -8,6 +8,9 @@ from cudf._lib.pylibcudf.libcudf.column.column_view cimport ( column_view, mutable_column_view, ) +from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport ( + lists_column_view, +) from cudf._lib.pylibcudf.libcudf.types cimport bitmask_type, size_type from .gpumemoryview cimport gpumemoryview @@ -56,3 +59,4 @@ cdef class ListColumnView: cdef Column _column cpdef child(self) cpdef offsets(self) + cdef lists_column_view view(self) nogil diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pyx b/python/cudf/cudf/_lib/pylibcudf/column.pyx index e726eca154f..e0cf8b7ee32 100644 --- a/python/cudf/cudf/_lib/pylibcudf/column.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/column.pyx @@ -348,6 +348,15 @@ cdef class ListColumnView: """The offsets column of the underlying list column.""" return self._column.child(1) + cdef lists_column_view view(self) nogil: + """Generate a libcudf lists_column_view to pass to libcudf algorithms. + + This method is for pylibcudf's functions to use to generate inputs when + calling libcudf algorithms, and should generally not be needed by users + (even direct pylibcudf Cython users). + """ + return lists_column_view(self._column.view()) + @functools.cache def _datatype_from_dtype_desc(desc): diff --git a/python/cudf/cudf/_lib/pylibcudf/join.pxd b/python/cudf/cudf/_lib/pylibcudf/join.pxd index f560eeef06d..83b4776c16e 100644 --- a/python/cudf/cudf/_lib/pylibcudf/join.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/join.pxd @@ -35,3 +35,5 @@ cpdef Column left_anti_join( Table right_keys, null_equality nulls_equal ) + +cpdef Table cross_join(Table left, Table right) diff --git a/python/cudf/cudf/_lib/pylibcudf/join.pyx b/python/cudf/cudf/_lib/pylibcudf/join.pyx index cf2a6a8187f..308b1b39291 100644 --- a/python/cudf/cudf/_lib/pylibcudf/join.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/join.pyx @@ -2,13 +2,14 @@ from cython.operator import dereference -from libcpp.memory cimport make_unique +from libcpp.memory cimport make_unique, unique_ptr from libcpp.utility cimport move from rmm._lib.device_buffer cimport device_buffer from cudf._lib.pylibcudf.libcudf cimport join as cpp_join from cudf._lib.pylibcudf.libcudf.column.column cimport column +from cudf._lib.pylibcudf.libcudf.table.table cimport table from cudf._lib.pylibcudf.libcudf.types cimport ( data_type, null_equality, @@ -88,7 +89,6 @@ cpdef tuple left_join( nulls_equal : NullEquality Should nulls compare equal? - Returns ------- Tuple[Column, Column] @@ -122,7 +122,6 @@ cpdef tuple full_join( nulls_equal : NullEquality Should nulls compare equal? - Returns ------- Tuple[Column, Column] @@ -156,7 +155,6 @@ cpdef Column left_semi_join( nulls_equal : NullEquality Should nulls compare equal? - Returns ------- Column @@ -190,7 +188,6 @@ cpdef Column left_anti_join( nulls_equal : NullEquality Should nulls compare equal? - Returns ------- Column @@ -204,3 +201,26 @@ cpdef Column left_anti_join( nulls_equal ) return _column_from_gather_map(move(c_result)) + + +cpdef Table cross_join(Table left, Table right): + """Perform a cross join on two tables. + + For details see :cpp:func:`cross_join`. + + Parameters + ---------- + left : Table + The left table to join. + right: Table + The right table to join. + + Returns + ------- + Table + The result of cross joining the two inputs. + """ + cdef unique_ptr[table] result + with nogil: + result = move(cpp_join.cross_join(left.view(), right.view())) + return Table.from_libcudf(move(result)) diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/join.pxd b/python/cudf/cudf/_lib/pylibcudf/libcudf/join.pxd index 89a30f0f255..32cd17f7c11 100644 --- a/python/cudf/cudf/_lib/pylibcudf/libcudf/join.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/libcudf/join.pxd @@ -70,3 +70,8 @@ cdef extern from "cudf/join.hpp" namespace "cudf" nogil: const table_view right_keys, null_equality nulls_equal, ) except + + + cdef unique_ptr[table] cross_join( + const table_view left, + const table_view right, + ) except + diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/contains.pxd b/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/contains.pxd index 721679f35c7..82aed7d70a0 100644 --- a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/contains.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/contains.pxd @@ -1,5 +1,6 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. +from libc.stdint cimport int32_t from libcpp.memory cimport unique_ptr from cudf._lib.exception_handler cimport cudf_exception_handler @@ -12,17 +13,33 @@ from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil: + + cpdef enum class duplicate_find_option(int32_t): + FIND_FIRST + FIND_LAST + cdef unique_ptr[column] contains( - lists_column_view lists, - scalar search_key, + const lists_column_view& lists, + const scalar& search_key, + ) except +cudf_exception_handler + + cdef unique_ptr[column] contains( + const lists_column_view& lists, + const column_view& search_keys, + ) except +cudf_exception_handler + + cdef unique_ptr[column] contains_nulls( + const lists_column_view& lists, ) except +cudf_exception_handler cdef unique_ptr[column] index_of( - lists_column_view lists, - scalar search_key, + const lists_column_view& lists, + const scalar& search_key, + duplicate_find_option find_option, ) except +cudf_exception_handler cdef unique_ptr[column] index_of( - lists_column_view lists, - column_view search_keys, + const lists_column_view& lists, + const column_view& search_keys, + duplicate_find_option find_option, ) except +cudf_exception_handler diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/lists_column_view.pxd b/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/lists_column_view.pxd index dbafc415e45..fd21e7b334b 100644 --- a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/lists_column_view.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/lists_column_view.pxd @@ -9,6 +9,7 @@ from cudf._lib.pylibcudf.libcudf.types cimport size_type cdef extern from "cudf/lists/lists_column_view.hpp" namespace "cudf" nogil: cdef cppclass lists_column_view(column_view): + lists_column_view() except + lists_column_view(const column_view& lists_column) except + column_view parent() except + column_view offsets() except + diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/scalar/scalar_factories.pxd b/python/cudf/cudf/_lib/pylibcudf/libcudf/scalar/scalar_factories.pxd index 5c4e5bf346f..c8220df8938 100644 --- a/python/cudf/cudf/_lib/pylibcudf/libcudf/scalar/scalar_factories.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/libcudf/scalar/scalar_factories.pxd @@ -8,3 +8,4 @@ from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar cdef extern from "cudf/scalar/scalar_factories.hpp" namespace "cudf" nogil: cdef unique_ptr[scalar] make_string_scalar(const string & _string) except + + cdef unique_ptr[scalar] make_fixed_width_scalar[T](T value) except + diff --git a/python/cudf/cudf/_lib/pylibcudf/lists.pxd b/python/cudf/cudf/_lib/pylibcudf/lists.pxd index 74fdc6e2b2f..2ea83cc4c41 100644 --- a/python/cudf/cudf/_lib/pylibcudf/lists.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/lists.pxd @@ -5,8 +5,12 @@ from libcpp cimport bool from cudf._lib.pylibcudf.libcudf.types cimport size_type from .column cimport Column +from .scalar cimport Scalar from .table cimport Table +ctypedef fused ColumnOrScalar: + Column + Scalar cpdef Table explode_outer(Table, size_type explode_column_idx) @@ -14,4 +18,10 @@ cpdef Column concatenate_rows(Table) cpdef Column concatenate_list_elements(Column, bool dropna) +cpdef Column contains(Column, ColumnOrScalar) + +cpdef Column contains_nulls(Column) + +cpdef Column index_of(Column, ColumnOrScalar, bool) + cpdef Column count_elements(Column) diff --git a/python/cudf/cudf/_lib/pylibcudf/lists.pyx b/python/cudf/cudf/_lib/pylibcudf/lists.pyx index 598bbda0a4a..92fc0d6aa33 100644 --- a/python/cudf/cudf/_lib/pylibcudf/lists.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/lists.pyx @@ -1,11 +1,15 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +from cython.operator cimport dereference from libcpp cimport bool -from libcpp.memory cimport make_shared, shared_ptr, unique_ptr +from libcpp.memory cimport unique_ptr from libcpp.utility cimport move from cudf._lib.pylibcudf.libcudf.column.column cimport column -from cudf._lib.pylibcudf.libcudf.lists cimport explode as cpp_explode +from cudf._lib.pylibcudf.libcudf.lists cimport ( + contains as cpp_contains, + explode as cpp_explode, +) from cudf._lib.pylibcudf.libcudf.lists.combine cimport ( concatenate_list_elements as cpp_concatenate_list_elements, concatenate_null_policy, @@ -14,13 +18,12 @@ from cudf._lib.pylibcudf.libcudf.lists.combine cimport ( from cudf._lib.pylibcudf.libcudf.lists.count_elements cimport ( count_elements as cpp_count_elements, ) -from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport ( - lists_column_view, -) from cudf._lib.pylibcudf.libcudf.table.table cimport table from cudf._lib.pylibcudf.libcudf.types cimport size_type +from cudf._lib.pylibcudf.lists cimport ColumnOrScalar -from .column cimport Column +from .column cimport Column, ListColumnView +from .scalar cimport Scalar from .table cimport Table @@ -77,15 +80,15 @@ cpdef Column concatenate_list_elements(Column input, bool dropna): ---------- input : Column The input column + dropna : bool + If true, null list elements will be ignored + from concatenation. Otherwise any input null values will result in + the corresponding output row being set to null. Returns ------- Column A new Column of concatenated list elements - dropna : bool - If true, null list elements will be ignored - from concatenation. Otherwise any input null values will result in - the corresponding output row being set to null. """ cdef concatenate_null_policy null_policy = ( concatenate_null_policy.IGNORE if dropna @@ -102,15 +105,130 @@ cpdef Column concatenate_list_elements(Column input, bool dropna): return Column.from_libcudf(move(c_result)) -cpdef Column count_elements(Column input): - # shared_ptr required because lists_column_view has no default - # ctor - cdef shared_ptr[lists_column_view] list_view = ( - make_shared[lists_column_view](col.view()) +cpdef Column contains(Column input, ColumnOrScalar search_key): + """Create a column of bool values indicating whether + the search_key is contained in the input. + + ``search_key`` may be a + :py:class:`~cudf._lib.pylibcudf.column.Column` or a + :py:class:`~cudf._lib.pylibcudf.scalar.Scalar`. + + For details, see :cpp:func:`contains`. + + Parameters + ---------- + input : Column + The input column. + search_key : Union[Column, Scalar] + The search key. + + Returns + ------- + Column + A new Column of bools indicating if the search_key was + found in the list column. + """ + cdef unique_ptr[column] c_result + cdef ListColumnView list_view = input.list_view() + + if not isinstance(search_key, (Column, Scalar)): + raise TypeError("Must pass a Column or Scalar") + + with nogil: + c_result = move(cpp_contains.contains( + list_view.view(), + search_key.view() if ColumnOrScalar is Column else dereference( + search_key.get() + ), + )) + return Column.from_libcudf(move(c_result)) + + +cpdef Column contains_nulls(Column input): + """Create a column of bool values indicating whether + each row in the lists column contains a null value. + + Parameters + ---------- + input : Column + The input column. + + Returns + ------- + Column + A new Column of bools indicating if the list column + contains a null value. + """ + cdef unique_ptr[column] c_result + cdef ListColumnView list_view = input.list_view() + with nogil: + c_result = move(cpp_contains.contains_nulls(list_view.view())) + return Column.from_libcudf(move(c_result)) + + +cpdef Column index_of(Column input, ColumnOrScalar search_key, bool find_first_option): + """Create a column of index values indicating the position of a search + key row within the corresponding list row in the lists column. + + ``search_key`` may be a + :py:class:`~cudf._lib.pylibcudf.column.Column` or a + :py:class:`~cudf._lib.pylibcudf.scalar.Scalar`. + + For details, see :cpp:func:`index_of`. + + Parameters + ---------- + input : Column + The input column. + search_key : Union[Column, Scalar] + The search key. + find_first_option : bool + If true, index_of returns the first match. + Otherwise the last match is returned. + + Returns + ------- + Column + A new Column of index values that indicate where in the + list column tthe search_key was found. An index value + of -1 indicates that the search_key was not found. + """ + cdef unique_ptr[column] c_result + cdef ListColumnView list_view = input.list_view() + cdef cpp_contains.duplicate_find_option find_option = ( + cpp_contains.duplicate_find_option.FIND_FIRST if find_first_option + else cpp_contains.duplicate_find_option.FIND_LAST ) + + with nogil: + c_result = move(cpp_contains.index_of( + list_view.view(), + search_key.view() if ColumnOrScalar is Column else dereference( + search_key.get() + ), + find_option, + )) + return Column.from_libcudf(move(c_result)) + + +cpdef Column count_elements(Column input): + """Count the number of rows in each + list element in the given lists column. + + Parameters + ---------- + input : Column + The input column + + Returns + ------- + Column + A new Column of the lengths of each list element + """ + cdef ListColumnView list_view = input.list_view() cdef unique_ptr[column] c_result with nogil: - c_result = move(cpp_count_elements(list_view.get()[0])) + c_result = move(cpp_count_elements(list_view.view())) return Column.from_libcudf(move(c_result)) diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/strings/CMakeLists.txt index cb7f71b1912..b499a127541 100644 --- a/python/cudf/cudf/_lib/pylibcudf/strings/CMakeLists.txt +++ b/python/cudf/cudf/_lib/pylibcudf/strings/CMakeLists.txt @@ -13,7 +13,7 @@ # ============================================================================= set(cython_sources capitalize.pyx case.pyx char_types.pyx contains.pyx find.pyx regex_flags.pyx - regex_program.pyx replace.pyx + regex_program.pyx replace.pyx slice.pyx ) set(linked_libraries cudf::cudf) diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/__init__.pxd b/python/cudf/cudf/_lib/pylibcudf/strings/__init__.pxd index 959aa94737d..d1f632d6d8e 100644 --- a/python/cudf/cudf/_lib/pylibcudf/strings/__init__.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/strings/__init__.pxd @@ -9,4 +9,5 @@ from . cimport ( regex_flags, regex_program, replace, + slice, ) diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/__init__.py b/python/cudf/cudf/_lib/pylibcudf/strings/__init__.py index b7384913286..ef102aff2af 100644 --- a/python/cudf/cudf/_lib/pylibcudf/strings/__init__.py +++ b/python/cudf/cudf/_lib/pylibcudf/strings/__init__.py @@ -9,4 +9,5 @@ regex_flags, regex_program, replace, + slice, ) diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/slice.pxd b/python/cudf/cudf/_lib/pylibcudf/strings/slice.pxd new file mode 100644 index 00000000000..7d8d0006ef4 --- /dev/null +++ b/python/cudf/cudf/_lib/pylibcudf/strings/slice.pxd @@ -0,0 +1,15 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from cudf._lib.pylibcudf.column cimport Column +from cudf._lib.pylibcudf.scalar cimport Scalar + +ctypedef fused ColumnOrScalar: + Column + Scalar + +cpdef Column slice_strings( + Column input, + ColumnOrScalar start=*, + ColumnOrScalar stop=*, + Scalar step=* +) diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/slice.pyx b/python/cudf/cudf/_lib/pylibcudf/strings/slice.pyx new file mode 100644 index 00000000000..df75134fb71 --- /dev/null +++ b/python/cudf/cudf/_lib/pylibcudf/strings/slice.pyx @@ -0,0 +1,102 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move + +from cudf._lib.pylibcudf.column cimport Column +from cudf._lib.pylibcudf.libcudf.column.column cimport column +from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport numeric_scalar +from cudf._lib.pylibcudf.libcudf.scalar.scalar_factories cimport ( + make_fixed_width_scalar as cpp_make_fixed_width_scalar, +) +from cudf._lib.pylibcudf.libcudf.strings cimport substring as cpp_slice +from cudf._lib.pylibcudf.libcudf.types cimport size_type +from cudf._lib.pylibcudf.scalar cimport Scalar + +from cython.operator import dereference + + +cpdef Column slice_strings( + Column input, + ColumnOrScalar start=None, + ColumnOrScalar stop=None, + Scalar step=None +): + """Perform a slice operation on a strings column. + + ``start`` and ``stop`` may be a + :py:class:`~cudf._lib.pylibcudf.column.Column` or a + :py:class:`~cudf._lib.pylibcudf.scalar.Scalar`. But ``step`` must be a + :py:class:`~cudf._lib.pylibcudf.scalar.Scalar`. + + For details, see :cpp:func:`cudf::strings::slice_strings`. + + Parameters + ---------- + input : Column + Strings column for this operation + start : Union[Column, Scalar] + The start character position or positions. + stop : Union[Column, Scalar] + The end character position or positions + step : Scalar + Distance between input characters retrieved + + Returns + ------- + pylibcudf.Column + The result of the slice operation + """ + cdef unique_ptr[column] c_result + cdef numeric_scalar[size_type]* cpp_start + cdef numeric_scalar[size_type]* cpp_stop + cdef numeric_scalar[size_type]* cpp_step + + if input is None: + raise ValueError("input cannot be None") + + if ColumnOrScalar is Column: + if step is not None: + raise ValueError("Column-wise slice does not support step") + + if start is None or stop is None: + raise ValueError( + "start and stop must be provided for Column-wise slice" + ) + + with nogil: + c_result = cpp_slice.slice_strings( + input.view(), + start.view(), + stop.view() + ) + + elif ColumnOrScalar is Scalar: + if start is None: + start = Scalar.from_libcudf( + cpp_make_fixed_width_scalar(0) + ) + if stop is None: + stop = Scalar.from_libcudf( + cpp_make_fixed_width_scalar(0) + ) + if step is None: + step = Scalar.from_libcudf( + cpp_make_fixed_width_scalar(1) + ) + + cpp_start = start.c_obj.get() + cpp_stop = stop.c_obj.get() + cpp_step = step.c_obj.get() + + with nogil: + c_result = cpp_slice.slice_strings( + input.view(), + dereference(cpp_start), + dereference(cpp_stop), + dereference(cpp_step) + ) + else: + raise ValueError("start, stop, and step must be either Column or Scalar") + + return Column.from_libcudf(move(c_result)) diff --git a/python/cudf/cudf/_lib/pylibcudf/table.pyx b/python/cudf/cudf/_lib/pylibcudf/table.pyx index d93ac78721b..d91fa0474b0 100644 --- a/python/cudf/cudf/_lib/pylibcudf/table.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/table.pyx @@ -83,6 +83,8 @@ cdef class Table: cpdef int num_rows(self): """The number of rows in this table.""" + if self.num_columns() == 0: + return 0 return self._columns[0].size() cpdef list columns(self): diff --git a/python/cudf/cudf/_lib/strings/substring.pyx b/python/cudf/cudf/_lib/strings/substring.pyx index 170c1016b89..706c21c0634 100644 --- a/python/cudf/cudf/_lib/strings/substring.pyx +++ b/python/cudf/cudf/_lib/strings/substring.pyx @@ -2,24 +2,16 @@ import numpy as np -from libcpp.memory cimport unique_ptr -from libcpp.utility cimport move - from cudf.core.buffer import acquire_spill_lock from cudf._lib.column cimport Column -from cudf._lib.pylibcudf.libcudf.column.column cimport column -from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view -from cudf._lib.pylibcudf.libcudf.strings.substring cimport ( - slice_strings as cpp_slice_strings, -) -from cudf._lib.pylibcudf.libcudf.types cimport size_type from cudf._lib.scalar import as_device_scalar -from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport numeric_scalar from cudf._lib.scalar cimport DeviceScalar +import cudf._lib.pylibcudf as plc + @acquire_spill_lock() def slice_strings(Column source_strings, @@ -32,30 +24,18 @@ def slice_strings(Column source_strings, performed in steps by skipping `step` number of characters in a string. """ - cdef unique_ptr[column] c_result - cdef column_view source_view = source_strings.view() - cdef DeviceScalar start_scalar = as_device_scalar(start, np.int32) cdef DeviceScalar end_scalar = as_device_scalar(end, np.int32) cdef DeviceScalar step_scalar = as_device_scalar(step, np.int32) - cdef numeric_scalar[size_type]* start_numeric_scalar = \ - ( - start_scalar.get_raw_ptr()) - cdef numeric_scalar[size_type]* end_numeric_scalar = \ - (end_scalar.get_raw_ptr()) - cdef numeric_scalar[size_type]* step_numeric_scalar = \ - (step_scalar.get_raw_ptr()) - - with nogil: - c_result = move(cpp_slice_strings( - source_view, - start_numeric_scalar[0], - end_numeric_scalar[0], - step_numeric_scalar[0] - )) - - return Column.from_unique_ptr(move(c_result)) + return Column.from_pylibcudf( + plc.strings.slice.slice_strings( + source_strings.to_pylibcudf(mode="read"), + start_scalar.c_value, + end_scalar.c_value, + step_scalar.c_value + ) + ) @acquire_spill_lock() @@ -67,19 +47,13 @@ def slice_from(Column source_strings, at given starts and stops positions. `starts` and `stops` here are positions per element in the string-column. """ - cdef unique_ptr[column] c_result - cdef column_view source_view = source_strings.view() - cdef column_view starts_view = starts.view() - cdef column_view stops_view = stops.view() - - with nogil: - c_result = move(cpp_slice_strings( - source_view, - starts_view, - stops_view - )) - - return Column.from_unique_ptr(move(c_result)) + return Column.from_pylibcudf( + plc.strings.slice.slice_strings( + source_strings.to_pylibcudf(mode="read"), + starts.to_pylibcudf(mode="read"), + stops.to_pylibcudf(mode="read") + ) + ) @acquire_spill_lock() @@ -90,8 +64,7 @@ def get(Column source_strings, character from each input string. The index of characters required can be controlled by passing `index`. """ - cdef unique_ptr[column] c_result - cdef column_view source_view = source_strings.view() + if index < 0: next_index = index - 1 step = -1 @@ -102,20 +75,11 @@ def get(Column source_strings, cdef DeviceScalar end_scalar = as_device_scalar(next_index, np.int32) cdef DeviceScalar step_scalar = as_device_scalar(step, np.int32) - cdef numeric_scalar[size_type]* start_numeric_scalar = \ - ( - start_scalar.get_raw_ptr()) - cdef numeric_scalar[size_type]* end_numeric_scalar = \ - (end_scalar.get_raw_ptr()) - cdef numeric_scalar[size_type]* step_numeric_scalar = \ - (step_scalar.get_raw_ptr()) - - with nogil: - c_result = move(cpp_slice_strings( - source_view, - start_numeric_scalar[0], - end_numeric_scalar[0], - step_numeric_scalar[0] - )) - - return Column.from_unique_ptr(move(c_result)) + return Column.from_pylibcudf( + plc.strings.slice.slice_strings( + source_strings.to_pylibcudf(mode="read"), + start_scalar.c_value, + end_scalar.c_value, + step_scalar.c_value + ) + ) diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py index caf07b286cd..e160fa697ee 100644 --- a/python/cudf/cudf/core/_base_index.py +++ b/python/cudf/cudf/core/_base_index.py @@ -1104,7 +1104,11 @@ def difference(self, other, sort=None): f"of [None, False, True]; {sort} was passed." ) - other = cudf.Index(other, name=getattr(other, "name", self.name)) + if not isinstance(other, BaseIndex): + other = cudf.Index( + other, + name=getattr(other, "name", self.name), + ) if not len(other): res = self._get_reconciled_name_object(other).unique() diff --git a/python/cudf/cudf/core/algorithms.py b/python/cudf/cudf/core/algorithms.py index 51a32e29886..e8b82ff60c2 100644 --- a/python/cudf/cudf/core/algorithms.py +++ b/python/cudf/cudf/core/algorithms.py @@ -6,7 +6,7 @@ from cudf.core.column import as_column from cudf.core.copy_types import BooleanMask -from cudf.core.index import Index, RangeIndex +from cudf.core.index import RangeIndex, ensure_index from cudf.core.indexed_frame import IndexedFrame from cudf.core.scalar import Scalar from cudf.options import get_option @@ -107,7 +107,7 @@ def factorize(values, sort=False, use_na_sentinel=True, size_hint=None): dtype="int64" if get_option("mode.pandas_compatible") else None, ).values - return labels, cats.values if return_cupy_array else Index(cats) + return labels, cats.values if return_cupy_array else ensure_index(cats) def _linear_interpolation(column, index=None): diff --git a/python/cudf/cudf/core/buffer/spill_manager.py b/python/cudf/cudf/core/buffer/spill_manager.py index 762cd7f9e86..ed351a6b107 100644 --- a/python/cudf/cudf/core/buffer/spill_manager.py +++ b/python/cudf/cudf/core/buffer/spill_manager.py @@ -18,14 +18,14 @@ import rmm.mr from cudf.options import get_option -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.string import format_bytes if TYPE_CHECKING: from cudf.core.buffer.spillable_buffer import SpillableBufferOwner _spill_cudf_nvtx_annotate = partial( - _cudf_nvtx_annotate, domain="cudf_python-spill" + _performance_tracking, domain="cudf_python-spill" ) diff --git a/python/cudf/cudf/core/buffer/spillable_buffer.py b/python/cudf/cudf/core/buffer/spillable_buffer.py index eb57a371965..4c9e524ee05 100644 --- a/python/cudf/cudf/core/buffer/spillable_buffer.py +++ b/python/cudf/cudf/core/buffer/spillable_buffer.py @@ -10,6 +10,7 @@ from typing import TYPE_CHECKING, Any, Literal import numpy +import nvtx from typing_extensions import Self import rmm @@ -21,7 +22,7 @@ host_memory_allocation, ) from cudf.core.buffer.exposure_tracked_buffer import ExposureTrackedBuffer -from cudf.utils.nvtx_annotation import _get_color_for_nvtx, annotate +from cudf.utils.performance_tracking import _get_color_for_nvtx from cudf.utils.string import format_bytes if TYPE_CHECKING: @@ -200,7 +201,7 @@ def spill(self, target: str = "cpu") -> None: ) if (ptr_type, target) == ("gpu", "cpu"): - with annotate( + with nvtx.annotate( message="SpillDtoH", color=_get_color_for_nvtx("SpillDtoH"), domain="cudf_python-spill", @@ -218,7 +219,7 @@ def spill(self, target: str = "cpu") -> None: # trigger a new call to this buffer's `spill()`. # Therefore, it is important that spilling-on-demand doesn't # try to unspill an already locked buffer! - with annotate( + with nvtx.annotate( message="SpillHtoD", color=_get_color_for_nvtx("SpillHtoD"), domain="cudf_python-spill", diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index dfcdfbb9d91..5db6fd904a9 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -927,13 +927,13 @@ def is_unique(self) -> bool: @property def is_monotonic_increasing(self) -> bool: - return not self.has_nulls() and libcudf.sort.is_sorted( + return not self.has_nulls(include_nan=True) and libcudf.sort.is_sorted( [self], [True], None ) @property def is_monotonic_decreasing(self) -> bool: - return not self.has_nulls() and libcudf.sort.is_sorted( + return not self.has_nulls(include_nan=True) and libcudf.sort.is_sorted( [self], [False], None ) diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py index 1bf9a393566..f30a557efb0 100644 --- a/python/cudf/cudf/core/column_accessor.py +++ b/python/cudf/cudf/core/column_accessor.py @@ -472,6 +472,7 @@ def swaplevel(self, i=-2, j=-1): new_keys[n][i], new_keys[n][j] = row[j], row[i] new_dict.update({row: tuple(new_keys[n])}) + # TODO: Change to deep=False when copy-on-write is default new_data = {new_dict[k]: v.copy(deep=True) for k, v in self.items()} # swap level_names for i and j @@ -669,10 +670,11 @@ def rename_column(x): raise ValueError("Duplicate column names are not allowed") data = dict(zip(new_col_names, self.values())) - return self.__class__( + return type(self)( data=data, level_names=self.level_names, multiindex=self.multiindex, + label_dtype=self.label_dtype, verify=False, ) diff --git a/python/cudf/cudf/core/cut.py b/python/cudf/cudf/core/cut.py index 54c5e829e8a..d9f62f51f92 100644 --- a/python/cudf/cudf/core/cut.py +++ b/python/cudf/cudf/core/cut.py @@ -292,7 +292,7 @@ def cut( ) # we return a categorical index, as we don't have a Categorical method - categorical_index = cudf.Index(col) + categorical_index = cudf.CategoricalIndex._from_data({None: col}) if isinstance(orig_x, (pd.Series, cudf.Series)): # if we have a series input we return a series output diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index f0d8157011d..4dfeb68b7ba 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -58,7 +58,12 @@ from cudf.core.column_accessor import ColumnAccessor from cudf.core.copy_types import BooleanMask from cudf.core.groupby.groupby import DataFrameGroupBy, groupby_doc_template -from cudf.core.index import BaseIndex, RangeIndex, _index_from_data, as_index +from cudf.core.index import ( + BaseIndex, + RangeIndex, + _index_from_data, + ensure_index, +) from cudf.core.indexed_frame import ( IndexedFrame, _FrameIndexer, @@ -83,7 +88,7 @@ min_scalar_type, numeric_normalize_types, ) -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.utils import GetAttrGetItemMixin, _external_only_api if TYPE_CHECKING: @@ -145,7 +150,7 @@ def __setitem__(self, key, value): key = (key, slice(None)) return self._setitem_tuple_arg(key, value) - @_cudf_nvtx_annotate + @_performance_tracking def _can_downcast_to_series(self, df, arg): """ This method encapsulates the logic used @@ -188,7 +193,7 @@ def _can_downcast_to_series(self, df, arg): return True return False - @_cudf_nvtx_annotate + @_performance_tracking def _downcast_to_series(self, df, arg): """ "Downcast" from a DataFrame to a Series @@ -233,11 +238,11 @@ class _DataFrameLocIndexer(_DataFrameIndexer): For selection by label. """ - @_cudf_nvtx_annotate + @_performance_tracking def _getitem_scalar(self, arg): return self._frame[arg[1]].loc[arg[0]] - @_cudf_nvtx_annotate + @_performance_tracking def _getitem_tuple_arg(self, arg): from uuid import uuid4 @@ -338,7 +343,7 @@ def _getitem_tuple_arg(self, arg): range(len(tmp_arg[0])) ) }, - index=as_index(tmp_arg[0]), + index=cudf.Index(tmp_arg[0]), ) columns_df[cantor_name] = column.as_column( range(len(columns_df)) @@ -363,7 +368,7 @@ def _getitem_tuple_arg(self, arg): return self._downcast_to_series(df, arg) return df - @_cudf_nvtx_annotate + @_performance_tracking def _setitem_tuple_arg(self, key, value): if ( isinstance(self._frame.index, MultiIndex) @@ -532,7 +537,7 @@ def __getitem__(self, arg): return frame._empty_like(keep_index=True) assert_never(row_spec) - @_cudf_nvtx_annotate + @_performance_tracking def _setitem_tuple_arg(self, key, value): columns_df = self._frame._from_data( self._frame._data.select_by_index(key[1]), self._frame.index @@ -677,7 +682,7 @@ class DataFrame(IndexedFrame, Serializable, GetAttrGetItemMixin): _groupby = DataFrameGroupBy _resampler = DataFrameResampler - @_cudf_nvtx_annotate + @_performance_tracking def __init__( self, data=None, @@ -702,7 +707,7 @@ def __init__( data = data.reindex(index) index = data.index else: - index = cudf.Index(index) + index = ensure_index(index) else: index = data.index @@ -751,7 +756,7 @@ def __init__( if index is None: self._index = RangeIndex(0) else: - self._index = cudf.Index(index) + self._index = ensure_index(index) if columns is not None: rangeindex = isinstance( columns, (range, pd.RangeIndex, cudf.RangeIndex) @@ -859,7 +864,7 @@ def __init__( columns, pd.MultiIndex ) - @_cudf_nvtx_annotate + @_performance_tracking def _init_from_series_list(self, data, columns, index): if index is None: # When `index` is `None`, the final index of @@ -909,7 +914,7 @@ def _init_from_series_list(self, data, columns, index): f"not match length of index ({index_length})" ) - final_index = cudf.Index(index) + final_index = ensure_index(index) series_lengths = list(map(len, data)) data = numeric_normalize_types(*data) @@ -972,14 +977,14 @@ def _init_from_series_list(self, data, columns, index): else: self._data.rangeindex = True - @_cudf_nvtx_annotate + @_performance_tracking def _init_from_list_like(self, data, index=None, columns=None): if index is None: index = RangeIndex(start=0, stop=len(data)) else: - index = cudf.Index(index) + index = ensure_index(index) - self._index = cudf.Index(index) + self._index = index # list-of-dicts case if len(data) > 0 and isinstance(data[0], dict): data = DataFrame.from_pandas(pd.DataFrame(data)) @@ -1030,7 +1035,7 @@ def _init_from_list_like(self, data, index=None, columns=None): ) self._data.label_dtype = getattr(columns, "dtype", None) - @_cudf_nvtx_annotate + @_performance_tracking def _init_from_dict_like( self, data, index=None, columns=None, nan_as_null=None ): @@ -1085,7 +1090,7 @@ def _init_from_dict_like( self._index = RangeIndex(0, num_rows) else: - self._index = cudf.Index(index) + self._index = ensure_index(index) if len(data): self._data.multiindex = True @@ -1119,10 +1124,8 @@ def _from_data( return out @staticmethod - @_cudf_nvtx_annotate + @_performance_tracking def _align_input_series_indices(data, index): - data = data.copy() - input_series = [ Series(val) for val in data.values() @@ -1142,6 +1145,7 @@ def _align_input_series_indices(data, index): ) index = aligned_input_series[0].index + data = data.copy() for name, val in data.items(): if isinstance(val, (pd.Series, Series, dict)): data[name] = aligned_input_series.pop(0) @@ -1188,7 +1192,7 @@ def deserialize(cls, header, frames): return obj @property - @_cudf_nvtx_annotate + @_performance_tracking def shape(self): """Returns a tuple representing the dimensionality of the DataFrame.""" return self._num_rows, self._num_columns @@ -1271,7 +1275,7 @@ def __setattr__(self, key, col): else: super().__setattr__(key, col) - @_cudf_nvtx_annotate + @_performance_tracking def __getitem__(self, arg): """ If *arg* is a ``str`` or ``int`` type, return the column Series. @@ -1365,7 +1369,7 @@ def __getitem__(self, arg): f"__getitem__ on type {type(arg)} is not supported" ) - @_cudf_nvtx_annotate + @_performance_tracking def __setitem__(self, arg, value): """Add/set column by *arg or DataFrame*""" if isinstance(arg, DataFrame): @@ -1483,7 +1487,7 @@ def __setitem__(self, arg, value): def __delitem__(self, name): self._drop_column(name) - @_cudf_nvtx_annotate + @_performance_tracking def memory_usage(self, index=True, deep=False): mem_usage = [col.memory_usage for col in self._data.columns] names = [str(name) for name in self._data.names] @@ -1492,10 +1496,10 @@ def memory_usage(self, index=True, deep=False): names.append("Index") return Series._from_data( data={None: as_column(mem_usage)}, - index=as_index(names), + index=cudf.Index(names), ) - @_cudf_nvtx_annotate + @_performance_tracking def __array_function__(self, func, types, args, kwargs): if "out" in kwargs or not all( issubclass(t, (Series, DataFrame)) for t in types @@ -1529,7 +1533,7 @@ def __array_function__(self, func, types, args, kwargs): return NotImplemented # The _get_numeric_data method is necessary for dask compatibility. - @_cudf_nvtx_annotate + @_performance_tracking def _get_numeric_data(self): """Return a dataframe with only numeric data types""" columns = [ @@ -1539,7 +1543,7 @@ def _get_numeric_data(self): ] return self[columns] - @_cudf_nvtx_annotate + @_performance_tracking def assign(self, **kwargs: Callable[[Self], Any] | Any): """ Assign columns to DataFrame from keyword arguments. @@ -1572,7 +1576,7 @@ def assign(self, **kwargs: Callable[[Self], Any] | Any): return new_df @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def _concat( cls, objs, axis=0, join="outer", ignore_index=False, sort=False ): @@ -1964,12 +1968,12 @@ def _get_renderable_dataframe(self): return output - @_cudf_nvtx_annotate + @_performance_tracking def __repr__(self): output = self._get_renderable_dataframe() return self._clean_renderable_dataframe(output) - @_cudf_nvtx_annotate + @_performance_tracking def _repr_html_(self): lines = ( self._get_renderable_dataframe() @@ -1985,7 +1989,7 @@ def _repr_html_(self): lines.append("") return "\n".join(lines) - @_cudf_nvtx_annotate + @_performance_tracking def _repr_latex_(self): return self._get_renderable_dataframe().to_pandas()._repr_latex_() @@ -2099,7 +2103,7 @@ def _make_operands_and_index_for_binop( return operands, index, can_use_self_column_name @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_dict( cls, data: dict, @@ -2234,7 +2238,7 @@ def from_dict( f"parameter. Got '{orient}' instead" ) - @_cudf_nvtx_annotate + @_performance_tracking def to_dict( self, orient: str = "dict", @@ -2355,7 +2359,7 @@ def to_dict( return self.to_pandas().to_dict(orient=orient, into=into) - @_cudf_nvtx_annotate + @_performance_tracking def scatter_by_map( self, map_index, map_size=None, keep_index=True, debug: bool = False ): @@ -2448,7 +2452,7 @@ def scatter_by_map( return result - @_cudf_nvtx_annotate + @_performance_tracking def update( self, other, @@ -2543,23 +2547,23 @@ def update( self._mimic_inplace(source_df, inplace=True) - @_cudf_nvtx_annotate + @_performance_tracking def __iter__(self): return iter(self._column_names) - @_cudf_nvtx_annotate + @_performance_tracking def __contains__(self, item): # This must check against containment in the pandas Index and not # self._column_names to handle NA, None, nan, etc. correctly. return item in self._data.to_pandas_index() - @_cudf_nvtx_annotate + @_performance_tracking def items(self): """Iterate over column names and series pairs""" for k in self: yield (k, self[k]) - @_cudf_nvtx_annotate + @_performance_tracking def equals(self, other) -> bool: ret = super().equals(other) # If all other checks matched, validate names. @@ -2592,13 +2596,13 @@ def at(self): "index is absolutely necessary. For checking if the columns are a " "MultiIndex, use _data.multiindex." ) - @_cudf_nvtx_annotate + @_performance_tracking def columns(self): """Returns a tuple of columns""" return self._data.to_pandas_index() @columns.setter # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def columns(self, columns): multiindex = False rangeindex = False @@ -2666,7 +2670,7 @@ def _set_columns_like(self, other: ColumnAccessor) -> None: verify=False, ) - @_cudf_nvtx_annotate + @_performance_tracking def reindex( self, labels=None, @@ -2814,7 +2818,7 @@ def reindex( fill_value=fill_value, ) - @_cudf_nvtx_annotate + @_performance_tracking def set_index( self, keys, @@ -2969,6 +2973,7 @@ def set_index( idx = MultiIndex._from_data(dict(enumerate(data_to_add))) idx.names = names + # TODO: Change to deep=False when copy-on-write is default df = self if inplace else self.copy(deep=True) if verify_integrity and not idx.is_unique: @@ -2980,7 +2985,7 @@ def set_index( df.index = idx return df if not inplace else None - @_cudf_nvtx_annotate + @_performance_tracking def fillna( self, value=None, method=None, axis=None, inplace=False, limit=None ): # noqa: D102 @@ -3006,7 +3011,7 @@ def fillna( value=value, method=method, axis=axis, inplace=inplace, limit=limit ) - @_cudf_nvtx_annotate + @_performance_tracking def where(self, cond, other=None, inplace=False): from cudf.core._internals.where import ( _check_and_cast_columns_with_other, @@ -3163,7 +3168,7 @@ def reset_index( inplace=inplace, ) - @_cudf_nvtx_annotate + @_performance_tracking def insert(self, loc, name, value, nan_as_null=no_default): """Add a column to DataFrame at the index specified by loc. @@ -3189,7 +3194,7 @@ def insert(self, loc, name, value, nan_as_null=no_default): ignore_index=False, ) - @_cudf_nvtx_annotate + @_performance_tracking def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True): """ Same as `insert`, with additional `ignore_index` param. @@ -3271,7 +3276,7 @@ def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True): self._data.insert(name, value, loc=loc) @property # type:ignore - @_cudf_nvtx_annotate + @_performance_tracking def axes(self): """ Return a list representing the axes of the DataFrame. @@ -3363,7 +3368,7 @@ def diff(self, periods=1, axis=0): return self - self.shift(periods=periods) - @_cudf_nvtx_annotate + @_performance_tracking def drop_duplicates( self, subset=None, @@ -3451,14 +3456,14 @@ def drop_duplicates( return self._mimic_inplace(outdf, inplace=inplace) - @_cudf_nvtx_annotate + @_performance_tracking def pop(self, item): """Return a column and drop it from the DataFrame.""" popped = self[item] del self[item] return popped - @_cudf_nvtx_annotate + @_performance_tracking def rename( self, mapper=None, @@ -3565,6 +3570,9 @@ def rename( mapper if columns is None and axis in (1, "columns") else columns ) + result = self if inplace else self.copy(deep=copy) + + out_index = None if index: if ( any(isinstance(item, str) for item in index.values()) @@ -3586,51 +3594,52 @@ def rename( ) out_index._data[level] = column.as_column(level_values) out_index._compute_levels_and_codes() - out = DataFrame(index=out_index) else: to_replace = list(index.keys()) vals = list(index.values()) is_all_na = vals.count(None) == len(vals) try: - index_data = { - name: col.find_and_replace(to_replace, vals, is_all_na) - for name, col in self.index._data.items() - } + out_index = _index_from_data( + { + name: col.find_and_replace( + to_replace, vals, is_all_na + ) + for name, col in self.index._data.items() + } + ) except OverflowError: - index_data = self.index._data.copy(deep=True) + pass - out = DataFrame(index=_index_from_data(index_data)) - else: - out = DataFrame(index=self.index) + if out_index is not None: + result.index = out_index if columns: - out._data = self._data.rename_levels(mapper=columns, level=level) - else: - out._data = self._data.copy(deep=copy) + result._data = result._data.rename_levels( + mapper=columns, level=level + ) - if inplace: - self._data = out._data - else: - return out.copy(deep=copy) + return result - @_cudf_nvtx_annotate + @_performance_tracking def add_prefix(self, prefix): + # TODO: Change to deep=False when copy-on-write is default out = self.copy(deep=True) out.columns = [ prefix + col_name for col_name in list(self._data.keys()) ] return out - @_cudf_nvtx_annotate + @_performance_tracking def add_suffix(self, suffix): + # TODO: Change to deep=False when copy-on-write is default out = self.copy(deep=True) out.columns = [ col_name + suffix for col_name in list(self._data.keys()) ] return out - @_cudf_nvtx_annotate + @_performance_tracking def agg(self, aggs, axis=None): """ Aggregate using one or more operations over the specified axis. @@ -3766,7 +3775,7 @@ def agg(self, aggs, axis=None): else: raise ValueError("argument must be a string, list or dict") - @_cudf_nvtx_annotate + @_performance_tracking def nlargest(self, n, columns, keep="first"): """Return the first *n* rows ordered by *columns* in descending order. @@ -3906,7 +3915,7 @@ def nsmallest(self, n, columns, keep="first"): """ return self._n_largest_or_smallest(False, n, columns, keep) - @_cudf_nvtx_annotate + @_performance_tracking def swaplevel(self, i=-2, j=-1, axis=0): """ Swap level i with level j. @@ -3956,7 +3965,8 @@ def swaplevel(self, i=-2, j=-1, axis=0): weight 1.0 0.8 length 0.3 0.2 """ - result = self.copy() + # TODO: Change to deep=False when copy-on-write is default + result = self.copy(deep=True) # To get axis number axis = self._get_axis_from_axis_arg(axis) @@ -3972,7 +3982,7 @@ def swaplevel(self, i=-2, j=-1, axis=0): return result - @_cudf_nvtx_annotate + @_performance_tracking def transpose(self): """Transpose index and columns. @@ -4027,8 +4037,8 @@ def transpose(self): # Set the old column names as the new index result = self.__class__._from_data( - {i: col for i, col in enumerate(result_columns)}, - index=as_index(index), + ColumnAccessor(dict(enumerate(result_columns)), verify=False), + index=cudf.Index(index), ) # Set the old index as the new column names result.columns = columns @@ -4036,7 +4046,7 @@ def transpose(self): T = property(transpose, doc=transpose.__doc__) - @_cudf_nvtx_annotate + @_performance_tracking def melt(self, **kwargs): """Unpivots a DataFrame from wide format to long format, optionally leaving identifier variables set. @@ -4066,7 +4076,7 @@ def melt(self, **kwargs): return melt(self, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def merge( self, right, @@ -4219,7 +4229,7 @@ def merge( suffixes=suffixes, ).perform_merge() - @_cudf_nvtx_annotate + @_performance_tracking def join( self, other, @@ -4268,7 +4278,7 @@ def join( ) return df - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( groupby_doc_template.format( ret=textwrap.dedent( @@ -4402,7 +4412,7 @@ def query(self, expr, local_dict=None): BooleanMask.from_column_unchecked(boolmask) ) - @_cudf_nvtx_annotate + @_performance_tracking def apply( self, func, axis=1, raw=False, result_type=None, args=(), **kwargs ): @@ -4686,7 +4696,7 @@ def _func(x): # pragma: no cover return DataFrame._from_data(result, index=self.index) - @_cudf_nvtx_annotate + @_performance_tracking @applyutils.doc_apply() def apply_rows( self, @@ -4765,7 +4775,7 @@ def apply_rows( cache_key=cache_key, ) - @_cudf_nvtx_annotate + @_performance_tracking @applyutils.doc_applychunks() def apply_chunks( self, @@ -4832,7 +4842,7 @@ def apply_chunks( tpb=tpb, ) - @_cudf_nvtx_annotate + @_performance_tracking def partition_by_hash(self, columns, nparts, keep_index=True): """Partition the dataframe by the hashed value of data in *columns*. @@ -5176,7 +5186,7 @@ def _sizeof_fmt(num, size_qualifier): cudf.utils.ioutils.buffer_write_lines(buf, lines) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_describe() def describe( self, @@ -5238,7 +5248,7 @@ def describe( ) return res - @_cudf_nvtx_annotate + @_performance_tracking def to_pandas( self, *, nullable: bool = False, arrow_type: bool = False ) -> pd.DataFrame: @@ -5328,7 +5338,7 @@ def to_pandas( return out_df @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_pandas(cls, dataframe, nan_as_null=no_default): """ Convert from a Pandas DataFrame. @@ -5401,7 +5411,7 @@ def from_pandas(cls, dataframe, nan_as_null=no_default): ) @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_arrow(cls, table): """ Convert from PyArrow Table to DataFrame. @@ -5487,7 +5497,7 @@ def from_arrow(cls, table): return out - @_cudf_nvtx_annotate + @_performance_tracking def to_arrow(self, preserve_index=None): """ Convert to a PyArrow Table. @@ -5528,7 +5538,7 @@ def to_arrow(self, preserve_index=None): b: [[4,5,6]] """ - data = self.copy(deep=False) + data = self index_descr = [] write_index = preserve_index is not False keep_range_index = write_index and preserve_index is None @@ -5556,6 +5566,7 @@ def to_arrow(self, preserve_index=None): index_descr = ( index.names if index.name is not None else ("index",) ) + data = data.copy(deep=False) for gen_name, col_name in zip(index_descr, index._data.names): data._insert( data.shape[1], @@ -5576,7 +5587,7 @@ def to_arrow(self, preserve_index=None): return out.replace_schema_metadata(metadata) - @_cudf_nvtx_annotate + @_performance_tracking def to_records(self, index=True): """Convert to a numpy recarray @@ -5600,7 +5611,7 @@ def to_records(self, index=True): return ret @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_records(cls, data, index=None, columns=None, nan_as_null=False): """ Convert structured or record ndarray to DataFrame. @@ -5651,7 +5662,7 @@ def from_records(cls, data, index=None, columns=None, nan_as_null=False): } if not is_scalar(index): - new_index = cudf.Index(index) + new_index = ensure_index(index) else: new_index = None @@ -5679,7 +5690,7 @@ def from_records(cls, data, index=None, columns=None, nan_as_null=False): return df @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def _from_arrays(cls, data, index=None, columns=None, nan_as_null=False): """Convert a numpy/cupy array to DataFrame. @@ -5735,7 +5746,7 @@ def _from_arrays(cls, data, index=None, columns=None, nan_as_null=False): } if index is not None: - index = cudf.Index(index) + index = ensure_index(index) if isinstance(columns, (pd.Index, cudf.Index)): level_names = tuple(columns.names) @@ -5757,7 +5768,7 @@ def _from_arrays(cls, data, index=None, columns=None, nan_as_null=False): index=index, ) - @_cudf_nvtx_annotate + @_performance_tracking def interpolate( self, method="linear", @@ -5787,7 +5798,7 @@ def interpolate( **kwargs, ) - @_cudf_nvtx_annotate + @_performance_tracking def quantile( self, q=0.5, @@ -5930,7 +5941,7 @@ def quantile( result.index = cudf.Index(list(map(float, qs)), dtype="float64") return result - @_cudf_nvtx_annotate + @_performance_tracking def isin(self, values): """ Whether each element in the DataFrame is contained in values. @@ -6074,7 +6085,7 @@ def make_false_column_like_self(): # # Stats # - @_cudf_nvtx_annotate + @_performance_tracking def _prepare_for_rowwise_op(self, method, skipna, numeric_only): """Prepare a DataFrame for CuPy-based row-wise operations.""" @@ -6126,7 +6137,7 @@ def _prepare_for_rowwise_op(self, method, skipna, numeric_only): coerced = coerced.astype("int64", copy=False) return coerced, mask, common_dtype - @_cudf_nvtx_annotate + @_performance_tracking def count(self, axis=0, numeric_only=False): """ Count ``non-NA`` cells for each column or row. @@ -6178,7 +6189,7 @@ def count(self, axis=0, numeric_only=False): "columns": 1, } - @_cudf_nvtx_annotate + @_performance_tracking def _reduce( self, op, @@ -6302,7 +6313,7 @@ def _reduce( else: raise ValueError(f"Invalid value of {axis=} received for {op}") - @_cudf_nvtx_annotate + @_performance_tracking def _scan( self, op, @@ -6319,7 +6330,7 @@ def _scan( elif axis == 1: return self._apply_cupy_method_axis_1(op, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def mode(self, axis=0, numeric_only=False, dropna=True): """ Get the mode(s) of each element along the selected axis. @@ -6426,17 +6437,17 @@ def mode(self, axis=0, numeric_only=False, dropna=True): return df - @_cudf_nvtx_annotate + @_performance_tracking def all(self, axis=0, bool_only=None, skipna=True, **kwargs): obj = self.select_dtypes(include="bool") if bool_only else self return super(DataFrame, obj).all(axis, skipna, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def any(self, axis=0, bool_only=None, skipna=True, **kwargs): obj = self.select_dtypes(include="bool") if bool_only else self return super(DataFrame, obj).any(axis, skipna, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def _apply_cupy_method_axis_1(self, method, *args, **kwargs): # This method uses cupy to perform scans and reductions along rows of a # DataFrame. Since cuDF is designed around columnar storage and @@ -6536,7 +6547,7 @@ def _apply_cupy_method_axis_1(self, method, *args, **kwargs): result_df._set_columns_like(prepared._data) return result_df - @_cudf_nvtx_annotate + @_performance_tracking def _columns_view(self, columns): """ Return a subset of the DataFrame's columns as a view. @@ -6545,7 +6556,7 @@ def _columns_view(self, columns): {col: self._data[col] for col in columns}, index=self.index ) - @_cudf_nvtx_annotate + @_performance_tracking def select_dtypes(self, include=None, exclude=None): """Return a subset of the DataFrame's columns based on the column dtypes. @@ -6810,7 +6821,7 @@ def to_orc( index=index, ) - @_cudf_nvtx_annotate + @_performance_tracking def stack(self, level=-1, dropna=no_default, future_stack=False): """Stack the prescribed level(s) from columns to index @@ -7155,7 +7166,7 @@ def unnamed_group_generator(): else: return result - @_cudf_nvtx_annotate + @_performance_tracking def cov(self, **kwargs): """Compute the covariance matrix of a DataFrame. @@ -7210,7 +7221,7 @@ def corr(self, method="pearson", min_periods=None): df._set_columns_like(self._data) return df - @_cudf_nvtx_annotate + @_performance_tracking def to_struct(self, name=None): """ Return a struct Series composed of the columns of the DataFrame. @@ -7244,7 +7255,7 @@ def to_struct(self, name=None): name=name, ) - @_cudf_nvtx_annotate + @_performance_tracking def keys(self): """ Get the columns. @@ -7304,14 +7315,14 @@ def iterrows(self): "if you wish to iterate over each row." ) - @_cudf_nvtx_annotate + @_performance_tracking @copy_docstring(reshape.pivot) def pivot(self, *, columns, index=no_default, values=no_default): return cudf.core.reshape.pivot( self, index=index, columns=columns, values=values ) - @_cudf_nvtx_annotate + @_performance_tracking @copy_docstring(reshape.pivot_table) def pivot_table( self, @@ -7340,14 +7351,14 @@ def pivot_table( sort=sort, ) - @_cudf_nvtx_annotate + @_performance_tracking @copy_docstring(reshape.unstack) def unstack(self, level=-1, fill_value=None): return cudf.core.reshape.unstack( self, level=level, fill_value=fill_value ) - @_cudf_nvtx_annotate + @_performance_tracking def explode(self, column, ignore_index=False): """ Transform each element of a list-like to a row, replicating index @@ -7543,7 +7554,7 @@ def _from_columns_like_self( result._set_columns_like(self._data) return result - @_cudf_nvtx_annotate + @_performance_tracking def interleave_columns(self): """ Interleave Series columns of a table into a single column. @@ -7591,7 +7602,7 @@ def interleave_columns(self): {None: libcudf.reshape.interleave_columns([*self._columns])} ) - @_cudf_nvtx_annotate + @_performance_tracking def eval(self, expr: str, inplace: bool = False, **kwargs): """Evaluate a string describing operations on DataFrame columns. @@ -7947,7 +7958,7 @@ def func(left, right, output): ) -@_cudf_nvtx_annotate +@_performance_tracking def from_pandas(obj, nan_as_null=no_default): """ Convert certain Pandas objects into the cudf equivalent. @@ -8074,7 +8085,7 @@ def from_pandas(obj, nan_as_null=no_default): ) -@_cudf_nvtx_annotate +@_performance_tracking def merge(left, right, *args, **kwargs): if isinstance(left, Series): left = left.to_frame() diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 8ca71180c00..9bac75dc6ac 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -32,7 +32,7 @@ from cudf.core.mixins import BinaryOperand, Scannable from cudf.utils import ioutils from cudf.utils.dtypes import find_common_type -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.utils import _array_ufunc, _warn_no_dask_cudf if TYPE_CHECKING: @@ -86,7 +86,7 @@ def _dtypes(self) -> abc.Iterable: def ndim(self) -> int: raise NotImplementedError() - @_cudf_nvtx_annotate + @_performance_tracking def serialize(self): # TODO: See if self._data can be serialized outright header = { @@ -101,7 +101,7 @@ def serialize(self): return header, frames @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def deserialize(cls, header, frames): cls_deserialize = pickle.loads(header["type-serialized"]) column_names = pickle.loads(header["column_names"]) @@ -122,7 +122,7 @@ def deserialize(cls, header, frames): return cls_deserialize._from_data(col_accessor) @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def _from_data(cls, data: MutableMapping) -> Self: """ Construct cls from a ColumnAccessor-like mapping. @@ -131,7 +131,7 @@ def _from_data(cls, data: MutableMapping) -> Self: Frame.__init__(obj, data) return obj - @_cudf_nvtx_annotate + @_performance_tracking def _from_data_like_self(self, data: MutableMapping) -> Self: """ Return type(self) from a ColumnAccessor-like mapping but @@ -139,7 +139,7 @@ def _from_data_like_self(self, data: MutableMapping) -> Self: """ return self._from_data(data) - @_cudf_nvtx_annotate + @_performance_tracking def _from_columns_like_self( self, columns: list[ColumnBase], @@ -155,7 +155,7 @@ def _from_columns_like_self( frame = self.__class__._from_data(data) return frame._copy_type_metadata(self) - @_cudf_nvtx_annotate + @_performance_tracking def _mimic_inplace( self, result: Self, inplace: bool = False ) -> Self | None: @@ -171,7 +171,7 @@ def _mimic_inplace( return result @property - @_cudf_nvtx_annotate + @_performance_tracking def size(self) -> int: """ Return the number of elements in the underlying data. @@ -263,11 +263,11 @@ def memory_usage(self, deep=False): """ raise NotImplementedError - @_cudf_nvtx_annotate + @_performance_tracking def __len__(self) -> int: return self._num_rows - @_cudf_nvtx_annotate + @_performance_tracking def astype(self, dtype: dict[Any, Dtype], copy: bool = False) -> Self: casted = ( col.astype(dtype.get(col_name, col.dtype), copy=copy) @@ -276,7 +276,7 @@ def astype(self, dtype: dict[Any, Dtype], copy: bool = False) -> Self: ca = self._data._from_columns_like_self(casted, verify=False) return self._from_data_like_self(ca) - @_cudf_nvtx_annotate + @_performance_tracking def equals(self, other) -> bool: """ Test whether two objects contain the same elements. @@ -347,7 +347,7 @@ def equals(self, other) -> bool: ) ) - @_cudf_nvtx_annotate + @_performance_tracking def _get_columns_by_label(self, labels) -> Self: """ Returns columns of the Frame specified by `labels`. @@ -357,7 +357,7 @@ def _get_columns_by_label(self, labels) -> Self: return self._from_data_like_self(self._data.select_by_label(labels)) @property - @_cudf_nvtx_annotate + @_performance_tracking def values(self) -> cupy.ndarray: """ Return a CuPy representation of the DataFrame. @@ -373,7 +373,7 @@ def values(self) -> cupy.ndarray: return self.to_cupy() @property - @_cudf_nvtx_annotate + @_performance_tracking def values_host(self) -> np.ndarray: """ Return a NumPy representation of the data. @@ -388,7 +388,7 @@ def values_host(self) -> np.ndarray: """ return self.to_numpy() - @_cudf_nvtx_annotate + @_performance_tracking def __array__(self, dtype=None): raise TypeError( "Implicit conversion to a host NumPy array via __array__ is not " @@ -397,14 +397,14 @@ def __array__(self, dtype=None): "using .to_numpy()." ) - @_cudf_nvtx_annotate + @_performance_tracking def __arrow_array__(self, type=None): raise TypeError( "Implicit conversion to a host PyArrow object via __arrow_array__ " "is not allowed. Consider using .to_arrow()" ) - @_cudf_nvtx_annotate + @_performance_tracking def _to_array( self, get_array: Callable, @@ -468,7 +468,7 @@ def to_array( # particular, we need to benchmark how much of the overhead is coming from # (potentially unavoidable) local copies in to_cupy and how much comes from # inefficiencies in the implementation. - @_cudf_nvtx_annotate + @_performance_tracking def to_cupy( self, dtype: Dtype | None = None, @@ -502,7 +502,7 @@ def to_cupy( na_value, ) - @_cudf_nvtx_annotate + @_performance_tracking def to_numpy( self, dtype: Dtype | None = None, @@ -537,7 +537,7 @@ def to_numpy( lambda col: col.values_host, numpy, copy, dtype, na_value ) - @_cudf_nvtx_annotate + @_performance_tracking def where(self, cond, other=None, inplace: bool = False) -> Self | None: """ Replace values where the condition is False. @@ -610,7 +610,7 @@ def where(self, cond, other=None, inplace: bool = False) -> Self | None: """ raise NotImplementedError - @_cudf_nvtx_annotate + @_performance_tracking def fillna( self, value: None | ScalarLike | cudf.Series = None, @@ -767,14 +767,14 @@ def fillna( inplace=inplace, ) - @_cudf_nvtx_annotate + @_performance_tracking def _drop_column(self, name): """Drop a column by *name*""" if name not in self._data: raise KeyError(f"column '{name}' does not exist") del self._data[name] - @_cudf_nvtx_annotate + @_performance_tracking def _quantile_table( self, q: float, @@ -808,7 +808,7 @@ def _quantile_table( ) @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_arrow(cls, data: pa.Table) -> Self: """Convert from PyArrow Table to Frame @@ -968,7 +968,7 @@ def from_arrow(cls, data: pa.Table) -> Self: return cls._from_data({name: result[name] for name in column_names}) - @_cudf_nvtx_annotate + @_performance_tracking def to_arrow(self): """ Convert to arrow Table @@ -992,7 +992,7 @@ def to_arrow(self): {str(name): col.to_arrow() for name, col in self._data.items()} ) - @_cudf_nvtx_annotate + @_performance_tracking def _positions_from_column_names(self, column_names) -> list[int]: """Map each column name into their positions in the frame. @@ -1005,7 +1005,7 @@ def _positions_from_column_names(self, column_names) -> list[int]: if name in set(column_names) ] - @_cudf_nvtx_annotate + @_performance_tracking def _copy_type_metadata(self: Self, other: Self) -> Self: """ Copy type metadata from each column of `other` to the corresponding @@ -1020,7 +1020,7 @@ def _copy_type_metadata(self: Self, other: Self) -> Self: return self - @_cudf_nvtx_annotate + @_performance_tracking def isna(self): """ Identify missing values. @@ -1101,7 +1101,7 @@ def isna(self): # Alias for isna isnull = isna - @_cudf_nvtx_annotate + @_performance_tracking def notna(self): """ Identify non-missing values. @@ -1182,7 +1182,7 @@ def notna(self): # Alias for notna notnull = notna - @_cudf_nvtx_annotate + @_performance_tracking def searchsorted( self, values, @@ -1296,7 +1296,7 @@ def searchsorted( else: return result - @_cudf_nvtx_annotate + @_performance_tracking def argsort( self, by=None, @@ -1383,7 +1383,7 @@ def argsort( by=by, ascending=ascending, na_position=na_position ).values - @_cudf_nvtx_annotate + @_performance_tracking def _get_sorted_inds( self, by=None, @@ -1411,7 +1411,7 @@ def _get_sorted_inds( stable=True, ) - @_cudf_nvtx_annotate + @_performance_tracking def _split(self, splits): """Split a frame with split points in ``splits``. Returns a list of Frames of length `len(splits) + 1`. @@ -1426,13 +1426,13 @@ def _split(self, splits): for split_idx in range(len(splits) + 1) ] - @_cudf_nvtx_annotate + @_performance_tracking def _encode(self): columns, indices = libcudf.transform.table_encode([*self._columns]) keys = self._from_columns_like_self(columns) return keys, indices - @_cudf_nvtx_annotate + @_performance_tracking def _unaryop(self, op): data_columns = (col.unary_operator(op) for col in self._columns) return self._from_data_like_self( @@ -1440,7 +1440,7 @@ def _unaryop(self, op): ) @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def _colwise_binop( cls, operands: dict[str | None, tuple[ColumnBase, Any, bool, Any]], @@ -1519,11 +1519,11 @@ def _colwise_binop( return output - @_cudf_nvtx_annotate + @_performance_tracking def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): return _array_ufunc(self, ufunc, method, inputs, kwargs) - @_cudf_nvtx_annotate + @_performance_tracking @acquire_spill_lock() def _apply_cupy_ufunc_to_operands( self, ufunc, cupy_func, operands, **kwargs @@ -1565,7 +1565,7 @@ def _apply_cupy_ufunc_to_operands( return data # Unary logical operators - @_cudf_nvtx_annotate + @_performance_tracking def __neg__(self): """Negate for integral dtypes, logical NOT for bools.""" return self._from_data_like_self( @@ -1579,30 +1579,30 @@ def __neg__(self): ) ) - @_cudf_nvtx_annotate + @_performance_tracking def __pos__(self): return self.copy(deep=True) - @_cudf_nvtx_annotate + @_performance_tracking def __abs__(self): return self._unaryop("abs") # Reductions @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def _get_axis_from_axis_arg(cls, axis): try: return cls._SUPPORT_AXIS_LOOKUP[axis] except KeyError: raise ValueError(f"No axis named {axis} for object type {cls}") - @_cudf_nvtx_annotate + @_performance_tracking def _reduce(self, *args, **kwargs): raise NotImplementedError( f"Reductions are not supported for objects of type {type(self)}." ) - @_cudf_nvtx_annotate + @_performance_tracking def min( self, axis=0, @@ -1653,7 +1653,7 @@ def min( **kwargs, ) - @_cudf_nvtx_annotate + @_performance_tracking def max( self, axis=0, @@ -1701,7 +1701,7 @@ def max( **kwargs, ) - @_cudf_nvtx_annotate + @_performance_tracking def all(self, axis=0, skipna=True, **kwargs): """ Return whether all elements are True in DataFrame. @@ -1754,7 +1754,7 @@ def all(self, axis=0, skipna=True, **kwargs): **kwargs, ) - @_cudf_nvtx_annotate + @_performance_tracking def any(self, axis=0, skipna=True, **kwargs): """ Return whether any elements is True in DataFrame. @@ -1807,26 +1807,26 @@ def any(self, axis=0, skipna=True, **kwargs): **kwargs, ) - @_cudf_nvtx_annotate + @_performance_tracking @ioutils.doc_to_dlpack() def to_dlpack(self): """{docstring}""" return cudf.io.dlpack.to_dlpack(self) - @_cudf_nvtx_annotate + @_performance_tracking def __str__(self): return repr(self) - @_cudf_nvtx_annotate + @_performance_tracking def __deepcopy__(self, memo): return self.copy(deep=True) - @_cudf_nvtx_annotate + @_performance_tracking def __copy__(self): return self.copy(deep=False) - @_cudf_nvtx_annotate + @_performance_tracking def __invert__(self): """Bitwise invert (~) for integral dtypes, logical NOT for bools.""" return self._from_data_like_self( @@ -1835,7 +1835,7 @@ def __invert__(self): ) ) - @_cudf_nvtx_annotate + @_performance_tracking def nunique(self, dropna: bool = True): """ Returns a per column mapping with counts of unique values for @@ -1856,7 +1856,7 @@ def nunique(self, dropna: bool = True): ) @staticmethod - @_cudf_nvtx_annotate + @_performance_tracking def _repeat( columns: list[ColumnBase], repeats, axis=None ) -> list[ColumnBase]: @@ -1870,7 +1870,7 @@ def _repeat( return libcudf.filling.repeat(columns, repeats) - @_cudf_nvtx_annotate + @_performance_tracking @_warn_no_dask_cudf def __dask_tokenize__(self): from dask.base import normalize_token diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 77b54a583d3..eccb3acabf6 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -31,7 +31,7 @@ from cudf.core.mixins import Reducible, Scannable from cudf.core.multiindex import MultiIndex from cudf.core.udf.groupby_utils import _can_be_jitted, jit_groupby_apply -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.utils import GetAttrGetItemMixin if TYPE_CHECKING: @@ -392,7 +392,7 @@ def indices(self): zip(index.to_pandas(), cp.split(indices.values, offsets[1:-1])) ) - @_cudf_nvtx_annotate + @_performance_tracking def get_group(self, name, obj=None): """ Construct DataFrame from group with provided name. @@ -436,7 +436,7 @@ def get_group(self, name, obj=None): ) return obj.iloc[self.indices[name]] - @_cudf_nvtx_annotate + @_performance_tracking def size(self): """ Return the size of each group. @@ -451,7 +451,7 @@ def size(self): .agg("size") ) - @_cudf_nvtx_annotate + @_performance_tracking def cumcount(self): """ Return the cumulative count of keys in each group. @@ -467,7 +467,7 @@ def cumcount(self): .agg("cumcount") ) - @_cudf_nvtx_annotate + @_performance_tracking def rank( self, method="average", @@ -521,7 +521,7 @@ def _groupby(self): [*self.grouping.keys._columns], dropna=self._dropna ) - @_cudf_nvtx_annotate + @_performance_tracking def agg(self, func): """ Apply aggregation(s) to the groups. @@ -821,7 +821,7 @@ def _head_tail(self, n, *, take_head: bool, preserve_order: bool): else: return result - @_cudf_nvtx_annotate + @_performance_tracking def head(self, n: int = 5, *, preserve_order: bool = True): """Return first n rows of each group @@ -874,7 +874,7 @@ def head(self, n: int = 5, *, preserve_order: bool = True): n, take_head=True, preserve_order=preserve_order ) - @_cudf_nvtx_annotate + @_performance_tracking def tail(self, n: int = 5, *, preserve_order: bool = True): """Return last n rows of each group @@ -928,7 +928,7 @@ def tail(self, n: int = 5, *, preserve_order: bool = True): n, take_head=False, preserve_order=preserve_order ) - @_cudf_nvtx_annotate + @_performance_tracking def nth(self, n): """ Return the nth row from each group. @@ -949,7 +949,7 @@ def nth(self, n): del self.obj._data["__groupbynth_order__"] return result - @_cudf_nvtx_annotate + @_performance_tracking def ngroup(self, ascending=True): """ Number each group from 0 to the number of groups - 1. @@ -1261,7 +1261,7 @@ def _normalize_aggs( ] return column_names, columns, normalized_aggs - @_cudf_nvtx_annotate + @_performance_tracking def pipe(self, func, *args, **kwargs): """ Apply a function `func` with arguments to this GroupBy @@ -1316,7 +1316,7 @@ def pipe(self, func, *args, **kwargs): """ return cudf.core.common.pipe(self, func, *args, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def _jit_groupby_apply( self, function, group_names, offsets, group_keys, grouped_values, *args ): @@ -1327,7 +1327,7 @@ def _jit_groupby_apply( chunk_results, group_names, group_keys, grouped_values ) - @_cudf_nvtx_annotate + @_performance_tracking def _iterative_groupby_apply( self, function, group_names, offsets, group_keys, grouped_values, *args ): @@ -1415,7 +1415,7 @@ def _post_process_chunk_results( result.index = cudf.MultiIndex._from_data(index_data) return result - @_cudf_nvtx_annotate + @_performance_tracking def apply( self, function, *args, engine="auto", include_groups: bool = True ): @@ -1573,7 +1573,7 @@ def mult(df): result = result.reset_index() return result - @_cudf_nvtx_annotate + @_performance_tracking def apply_grouped(self, function, **kwargs): """Apply a transformation function over the grouped chunk. @@ -1712,7 +1712,7 @@ def rolling_avg(val, avg): kwargs.update({"chunks": offsets}) return grouped_values.apply_chunks(function, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def _broadcast(self, values): """ Broadcast the results of an aggregation to the group @@ -1736,7 +1736,7 @@ def _broadcast(self, values): values.index = self.obj.index return values - @_cudf_nvtx_annotate + @_performance_tracking def transform(self, function): """Apply an aggregation, then broadcast the result to the group size. @@ -1801,7 +1801,7 @@ def rolling(self, *args, **kwargs): """ return cudf.core.window.rolling.RollingGroupby(self, *args, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def count(self, dropna=True): """Compute the number of values in each column. @@ -1816,7 +1816,7 @@ def func(x): return self.agg(func) - @_cudf_nvtx_annotate + @_performance_tracking def describe(self, include=None, exclude=None): """ Generate descriptive statistics that summarizes the central tendency, @@ -1888,7 +1888,7 @@ def describe(self, include=None, exclude=None): ) return res - @_cudf_nvtx_annotate + @_performance_tracking def corr(self, method="pearson", min_periods=1): """ Compute pairwise correlation of columns, excluding NA/null values. @@ -1950,7 +1950,7 @@ def corr(self, method="pearson", min_periods=1): lambda x: x.corr(method, min_periods), "Correlation" ) - @_cudf_nvtx_annotate + @_performance_tracking def cov(self, min_periods=0, ddof=1): """ Compute the pairwise covariance among the columns of a DataFrame, @@ -2129,7 +2129,7 @@ def _cov_or_corr(self, func, method_name): return res - @_cudf_nvtx_annotate + @_performance_tracking def var(self, ddof=1): """Compute the column-wise variance of the values in each group. @@ -2145,7 +2145,7 @@ def func(x): return self.agg(func) - @_cudf_nvtx_annotate + @_performance_tracking def std(self, ddof=1): """Compute the column-wise std of the values in each group. @@ -2161,7 +2161,7 @@ def func(x): return self.agg(func) - @_cudf_nvtx_annotate + @_performance_tracking def quantile(self, q=0.5, interpolation="linear"): """Compute the column-wise quantiles of the values in each group. @@ -2179,18 +2179,18 @@ def func(x): return self.agg(func) - @_cudf_nvtx_annotate + @_performance_tracking def collect(self): """Get a list of all the values for each column in each group.""" _deprecate_collect() return self.agg(list) - @_cudf_nvtx_annotate + @_performance_tracking def unique(self): """Get a list of the unique values for each column in each group.""" return self.agg("unique") - @_cudf_nvtx_annotate + @_performance_tracking def diff(self, periods=1, axis=0): """Get the difference between the values in each group. @@ -2258,7 +2258,7 @@ def bfill(self, limit=None): return self._scan_fill("bfill", limit) - @_cudf_nvtx_annotate + @_performance_tracking def fillna( self, value=None, @@ -2325,7 +2325,7 @@ def fillna( value=value, inplace=inplace, axis=axis, limit=limit ) - @_cudf_nvtx_annotate + @_performance_tracking def shift(self, periods=1, freq=None, axis=0, fill_value=None): """ Shift each group by ``periods`` positions. @@ -2388,7 +2388,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): result = self._mimic_pandas_order(result) return result._copy_type_metadata(values) - @_cudf_nvtx_annotate + @_performance_tracking def pct_change( self, periods=1, diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 71658695b80..b398ee2343e 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -58,13 +58,24 @@ is_mixed_with_object_dtype, numeric_normalize_types, ) -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.utils import _warn_no_dask_cudf, search_range if TYPE_CHECKING: from collections.abc import Generator, Iterable +def ensure_index(index_like: Any) -> BaseIndex: + """ + Ensure an Index is returned. + + Avoids a shallow copy compared to calling cudf.Index(...) + """ + if not isinstance(index_like, BaseIndex): + return cudf.Index(index_like) + return index_like + + class IndexMeta(type): """Custom metaclass for Index that overrides instance/subclass tests.""" @@ -204,7 +215,7 @@ class RangeIndex(BaseIndex, BinaryOperand): _range: range - @_cudf_nvtx_annotate + @_performance_tracking def __init__( self, start, stop=None, step=1, dtype=None, copy=False, name=None ): @@ -259,17 +270,17 @@ def factorize(self, sort: bool = False, use_na_sentinel: bool = True): return codes, uniques @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def name(self): return self._name @name.setter # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def name(self, value): self._name = value @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def start(self) -> int: """ The value of the `start` parameter (0 if this was not supplied). @@ -277,7 +288,7 @@ def start(self) -> int: return self._range.start @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def stop(self) -> int: """ The value of the stop parameter. @@ -285,7 +296,7 @@ def stop(self) -> int: return self._range.stop @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def step(self) -> int: """ The value of the step parameter. @@ -293,12 +304,12 @@ def step(self) -> int: return self._range.step @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def _num_rows(self) -> int: return len(self) @cached_property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def _values(self): if len(self) > 0: return column.as_column(self._range, dtype=self.dtype) @@ -330,18 +341,18 @@ def _is_interval(self) -> bool: return False @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def hasnans(self) -> bool: return False @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def _data(self): return cudf.core.column_accessor.ColumnAccessor( {self.name: self._values} ) - @_cudf_nvtx_annotate + @_performance_tracking def __contains__(self, item): hash(item) if isinstance(item, bool) or not isinstance( @@ -357,7 +368,7 @@ def __contains__(self, item): except (ValueError, OverflowError): return False - @_cudf_nvtx_annotate + @_performance_tracking def copy(self, name=None, deep=False): """ Make a copy of this object. @@ -377,7 +388,7 @@ def copy(self, name=None, deep=False): return RangeIndex(self._range, name=name) - @_cudf_nvtx_annotate + @_performance_tracking def astype(self, dtype, copy: bool = True): if is_dtype_equal(dtype, self.dtype): return self @@ -386,15 +397,15 @@ def astype(self, dtype, copy: bool = True): def fillna(self, value, downcast=None): return self.copy() - @_cudf_nvtx_annotate + @_performance_tracking def drop_duplicates(self, keep="first"): return self - @_cudf_nvtx_annotate + @_performance_tracking def duplicated(self, keep="first") -> cupy.ndarray: return cupy.zeros(len(self), dtype=bool) - @_cudf_nvtx_annotate + @_performance_tracking def __repr__(self): return ( f"{self.__class__.__name__}(start={self.start}, stop={self.stop}" @@ -408,15 +419,15 @@ def __repr__(self): ) @property - @_cudf_nvtx_annotate + @_performance_tracking def size(self) -> int: return len(self) - @_cudf_nvtx_annotate + @_performance_tracking def __len__(self): return len(self._range) - @_cudf_nvtx_annotate + @_performance_tracking def __getitem__(self, index): if isinstance(index, slice): sl_start, sl_stop, sl_step = index.indices(len(self)) @@ -435,13 +446,13 @@ def __getitem__(self, index): return self.start + index * self.step return self._as_int_index()[index] - @_cudf_nvtx_annotate + @_performance_tracking def equals(self, other) -> bool: if isinstance(other, RangeIndex): return self._range == other._range return self._as_int_index().equals(other) - @_cudf_nvtx_annotate + @_performance_tracking def serialize(self): header = {} header["index_column"] = {} @@ -462,7 +473,7 @@ def serialize(self): return header, frames @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def deserialize(cls, header, frames): h = header["index_column"] name = pickle.loads(header["name"]) @@ -472,7 +483,7 @@ def deserialize(cls, header, frames): return RangeIndex(start=start, stop=stop, step=step, name=name) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def dtype(self): """ `dtype` of the range of values in RangeIndex. @@ -487,7 +498,7 @@ def dtype(self): def _dtypes(self) -> Iterable: return [(self.name, self.dtype)] - @_cudf_nvtx_annotate + @_performance_tracking def to_pandas( self, *, nullable: bool = False, arrow_type: bool = False ) -> pd.RangeIndex: @@ -508,16 +519,16 @@ def is_unique(self) -> bool: return True @cached_property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_monotonic_increasing(self) -> bool: return self.step > 0 or len(self) <= 1 @cached_property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_monotonic_decreasing(self): return self.step < 0 or len(self) <= 1 - @_cudf_nvtx_annotate + @_performance_tracking def memory_usage(self, deep: bool = False) -> int: if deep: warnings.warn( @@ -530,7 +541,7 @@ def unique(self) -> Self: # RangeIndex always has unique values return self.copy() - @_cudf_nvtx_annotate + @_performance_tracking def __mul__(self, other): # Multiplication by raw ints must return a RangeIndex to match pandas. if isinstance(other, cudf.Scalar) and other.dtype.kind in "iu": @@ -547,24 +558,24 @@ def __mul__(self, other): ) return self._as_int_index().__mul__(other) - @_cudf_nvtx_annotate + @_performance_tracking def __rmul__(self, other): # Multiplication is commutative. return self.__mul__(other) - @_cudf_nvtx_annotate + @_performance_tracking def _as_int_index(self): # Convert self to an integer index. This method is used to perform ops # that are not defined directly on RangeIndex. return cudf.Index._from_data(self._data) - @_cudf_nvtx_annotate + @_performance_tracking def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): return self._as_int_index().__array_ufunc__( ufunc, method, *inputs, **kwargs ) - @_cudf_nvtx_annotate + @_performance_tracking def get_indexer(self, target, limit=None, method=None, tolerance=None): target_col = cudf.core.column.as_column(target) if method is not None or not isinstance( @@ -594,7 +605,7 @@ def get_indexer(self, target, limit=None, method=None, tolerance=None): locs[valid] = len(self) - 1 - locs[valid] return locs - @_cudf_nvtx_annotate + @_performance_tracking def get_loc(self, key): if not is_scalar(key): raise TypeError("Should be a scalar-like") @@ -608,7 +619,7 @@ def get_loc(self, key): raise KeyError(key) return idx_int - @_cudf_nvtx_annotate + @_performance_tracking def _union(self, other, sort=None): if isinstance(other, RangeIndex): # Variable suffixes are of the @@ -685,7 +696,7 @@ def _union(self, other, sort=None): self._as_int_index()._union(other, sort=sort) ) - @_cudf_nvtx_annotate + @_performance_tracking def _intersection(self, other, sort=None): if not isinstance(other, RangeIndex): return self._try_reconstruct_range_index( @@ -733,7 +744,7 @@ def _intersection(self, other, sort=None): return self._try_reconstruct_range_index(new_index) - @_cudf_nvtx_annotate + @_performance_tracking def difference(self, other, sort=None): if isinstance(other, RangeIndex) and self.equals(other): return self[:0]._get_reconciled_name_object(other) @@ -785,14 +796,14 @@ def sort_values( else: return sorted_index - @_cudf_nvtx_annotate + @_performance_tracking def _gather(self, gather_map, nullify=False, check_bounds=True): gather_map = cudf.core.column.as_column(gather_map) return cudf.Index._from_data( {self.name: self._values.take(gather_map, nullify, check_bounds)} ) - @_cudf_nvtx_annotate + @_performance_tracking def _apply_boolean_mask(self, boolean_mask): return cudf.Index._from_data( {self.name: self._values.apply_boolean_mask(boolean_mask)} @@ -838,21 +849,21 @@ def join( ) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def _column(self): return self._as_int_index()._column @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def _columns(self): return self._as_int_index()._columns @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def values_host(self) -> np.ndarray: return np.arange(start=self.start, stop=self.stop, step=self.step) - @_cudf_nvtx_annotate + @_performance_tracking def argsort( self, ascending=True, @@ -865,19 +876,19 @@ def argsort( else: return cupy.arange(len(self)) - @_cudf_nvtx_annotate + @_performance_tracking def where(self, cond, other=None, inplace=False): return self._as_int_index().where(cond, other, inplace) - @_cudf_nvtx_annotate + @_performance_tracking def to_numpy(self) -> np.ndarray: return self.values_host - @_cudf_nvtx_annotate + @_performance_tracking def to_cupy(self) -> cupy.ndarray: return self.values - @_cudf_nvtx_annotate + @_performance_tracking def to_arrow(self) -> pa.Array: return pa.array(self._range, type=pa.from_numpy_dtype(self.dtype)) @@ -889,23 +900,23 @@ def __array__(self, dtype=None): "using .to_numpy()." ) - @_cudf_nvtx_annotate + @_performance_tracking def nunique(self, dropna: bool = True) -> int: return len(self) - @_cudf_nvtx_annotate + @_performance_tracking def isna(self) -> cupy.ndarray: return cupy.zeros(len(self), dtype=bool) isnull = isna - @_cudf_nvtx_annotate + @_performance_tracking def notna(self) -> cupy.ndarray: return cupy.ones(len(self), dtype=bool) notnull = isna - @_cudf_nvtx_annotate + @_performance_tracking def _minmax(self, meth: str): no_steps = len(self) - 1 if no_steps == -1: @@ -1004,12 +1015,12 @@ class Index(SingleColumnFrame, BaseIndex, metaclass=IndexMeta): Column's, the data Column will be cloned to adopt this name. """ - @_cudf_nvtx_annotate + @_performance_tracking def __init__(self, data, **kwargs): name = _getdefault_name(data, name=kwargs.get("name")) super().__init__({name: data}) - @_cudf_nvtx_annotate + @_performance_tracking def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): ret = super().__array_ufunc__(ufunc, method, *inputs, **kwargs) @@ -1046,7 +1057,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): return NotImplemented @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def _from_data(cls, data: MutableMapping, name: Any = no_default) -> Self: out = super()._from_data(data=data) if name is not no_default: @@ -1054,7 +1065,7 @@ def _from_data(cls, data: MutableMapping, name: Any = no_default) -> Self: return out @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def _from_data_like_self( cls, data: MutableMapping, name: Any = no_default ) -> Self: @@ -1064,7 +1075,7 @@ def _from_data_like_self( return out @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_arrow(cls, obj): try: return cls(ColumnBase.from_arrow(obj)) @@ -1118,12 +1129,12 @@ def _binaryop( return ret @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def _values(self): return self._column @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def _concat(cls, objs): non_empties = [index for index in objs if len(index)] if len(objs) != len(non_empties): @@ -1166,16 +1177,16 @@ def _concat(cls, objs): result.name = name return result - @_cudf_nvtx_annotate + @_performance_tracking def memory_usage(self, deep=False): return self._column.memory_usage @cached_property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_unique(self): return self._column.is_unique - @_cudf_nvtx_annotate + @_performance_tracking def equals(self, other) -> bool: if not isinstance(other, BaseIndex) or len(self) != len(other): return False @@ -1198,7 +1209,7 @@ def equals(self, other) -> bool: except TypeError: return False - @_cudf_nvtx_annotate + @_performance_tracking def copy(self, name=None, deep=False): """ Make a copy of this object. @@ -1221,11 +1232,11 @@ def copy(self, name=None, deep=False): {name: self._values.copy(True) if deep else self._values} ) - @_cudf_nvtx_annotate + @_performance_tracking def astype(self, dtype, copy: bool = True): return super().astype({self.name: dtype}, copy) - @_cudf_nvtx_annotate + @_performance_tracking def get_indexer(self, target, method=None, limit=None, tolerance=None): if is_scalar(target): raise TypeError("Should be a sequence") @@ -1297,7 +1308,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): return _return_get_indexer_result(result_series.to_cupy()) - @_cudf_nvtx_annotate + @_performance_tracking def get_loc(self, key): if not is_scalar(key): raise TypeError("Should be a scalar-like") @@ -1333,7 +1344,7 @@ def get_loc(self, key): mask[true_inds] = True return mask - @_cudf_nvtx_annotate + @_performance_tracking def __repr__(self): max_seq_items = pd.get_option("max_seq_items") or len(self) mr = 0 @@ -1419,7 +1430,7 @@ def __repr__(self): lines.append(f"{prior_to_dtype} {keywords})") return "\n".join(lines) - @_cudf_nvtx_annotate + @_performance_tracking def __getitem__(self, index): res = self._get_elements_from_column(index) if isinstance(res, ColumnBase): @@ -1427,20 +1438,20 @@ def __getitem__(self, index): return res @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def dtype(self): """ `dtype` of the underlying values in Index. """ return self._values.dtype - @_cudf_nvtx_annotate + @_performance_tracking def isna(self): return self._column.isnull().values isnull = isna - @_cudf_nvtx_annotate + @_performance_tracking def notna(self): return self._column.notnull().values @@ -1470,11 +1481,11 @@ def _is_interval(self): return False @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def hasnans(self): return self._column.has_nulls(include_nan=True) - @_cudf_nvtx_annotate + @_performance_tracking def argsort( self, axis=0, @@ -1518,7 +1529,7 @@ def repeat(self, repeats, axis=None): Frame._repeat([*self._columns], repeats, axis), self._column_names ) - @_cudf_nvtx_annotate + @_performance_tracking def where(self, cond, other=None, inplace=False): result_col = super().where(cond, other, inplace) return self._mimic_inplace( @@ -1569,7 +1580,7 @@ def append(self, other): to_concat.append(obj) else: this = self - other = cudf.Index(other) + other = ensure_index(other) if len(this) == 0 or len(other) == 0: # we'll filter out empties later in ._concat @@ -1615,7 +1626,7 @@ def _indices_of(self, value): @copy_docstring(StringMethods) # type: ignore @property - @_cudf_nvtx_annotate + @_performance_tracking def str(self): if is_string_dtype(self.dtype): return StringMethods(parent=self) @@ -1698,7 +1709,7 @@ class DatetimeIndex(Index): dtype='datetime64[ns]', name='a') """ - @_cudf_nvtx_annotate + @_performance_tracking def __init__( self, data=None, @@ -1761,7 +1772,7 @@ def __init__( ): raise ValueError("No unique frequency found") - @_cudf_nvtx_annotate + @_performance_tracking def _copy_type_metadata(self: Self, other: Self) -> Self: super()._copy_type_metadata(other) self._freq = _validate_freq(other._freq) @@ -1783,7 +1794,7 @@ def __getitem__(self, index): return pd.Timestamp(value) return value - @_cudf_nvtx_annotate + @_performance_tracking def copy(self, name=None, deep=False): idx_copy = super().copy(name=name, deep=deep) return idx_copy._copy_type_metadata(self) @@ -1801,7 +1812,7 @@ def searchsorted( ) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def year(self): """ The year of the datetime. @@ -1820,7 +1831,7 @@ def year(self): return self._get_dt_field("year") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def month(self): """ The month as January=1, December=12. @@ -1839,7 +1850,7 @@ def month(self): return self._get_dt_field("month") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def day(self): """ The day of the datetime. @@ -1858,7 +1869,7 @@ def day(self): return self._get_dt_field("day") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def hour(self): """ The hours of the datetime. @@ -1879,7 +1890,7 @@ def hour(self): return self._get_dt_field("hour") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def minute(self): """ The minutes of the datetime. @@ -1900,7 +1911,7 @@ def minute(self): return self._get_dt_field("minute") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def second(self): """ The seconds of the datetime. @@ -1921,7 +1932,7 @@ def second(self): return self._get_dt_field("second") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def microsecond(self): """ The microseconds of the datetime. @@ -1952,7 +1963,7 @@ def microsecond(self): ) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def nanosecond(self): """ The nanoseconds of the datetime. @@ -1974,7 +1985,7 @@ def nanosecond(self): return self._get_dt_field("nanosecond") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def weekday(self): """ The day of the week with Monday=0, Sunday=6. @@ -1996,7 +2007,7 @@ def weekday(self): return self._get_dt_field("weekday") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def dayofweek(self): """ The day of the week with Monday=0, Sunday=6. @@ -2018,7 +2029,7 @@ def dayofweek(self): return self._get_dt_field("weekday") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def dayofyear(self): """ The day of the year, from 1-365 in non-leap years and @@ -2041,7 +2052,7 @@ def dayofyear(self): return self._get_dt_field("day_of_year") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def day_of_year(self): """ The day of the year, from 1-365 in non-leap years and @@ -2064,7 +2075,7 @@ def day_of_year(self): return self._get_dt_field("day_of_year") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_leap_year(self): """ Boolean indicator if the date belongs to a leap year. @@ -2083,7 +2094,7 @@ def is_leap_year(self): return cupy.asarray(res) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def quarter(self): """ Integer indicator for which quarter of the year the date belongs in. @@ -2108,7 +2119,7 @@ def quarter(self): res = extract_quarter(self._values) return Index(res, dtype="int8") - @_cudf_nvtx_annotate + @_performance_tracking def day_name(self, locale: str | None = None) -> Index: """ Return the day names. Currently supports English locale only. @@ -2128,7 +2139,7 @@ def day_name(self, locale: str | None = None) -> Index: day_names = self._column.get_day_names(locale) return Index._from_data({self.name: day_names}) - @_cudf_nvtx_annotate + @_performance_tracking def month_name(self, locale: str | None = None) -> Index: """ Return the month names. Currently supports English locale only. @@ -2147,7 +2158,7 @@ def month_name(self, locale: str | None = None) -> Index: month_names = self._column.get_month_names(locale) return Index._from_data({self.name: month_names}) - @_cudf_nvtx_annotate + @_performance_tracking def isocalendar(self) -> cudf.DataFrame: """ Returns a DataFrame with the year, week, and day @@ -2172,7 +2183,7 @@ def isocalendar(self) -> cudf.DataFrame: ) return cudf.DataFrame._from_data(ca, index=self) - @_cudf_nvtx_annotate + @_performance_tracking def to_pandas( self, *, nullable: bool = False, arrow_type: bool = False ) -> pd.DatetimeIndex: @@ -2181,7 +2192,7 @@ def to_pandas( result.freq = self._freq._maybe_as_fast_pandas_offset() return result - @_cudf_nvtx_annotate + @_performance_tracking def _get_dt_field(self, field): out_column = self._values.get_dt_field(field) # column.column_empty_like always returns a Column object @@ -2198,7 +2209,7 @@ def _get_dt_field(self, field): def _is_boolean(self): return False - @_cudf_nvtx_annotate + @_performance_tracking def ceil(self, freq): """ Perform ceil operation on the data to the specified freq. @@ -2231,7 +2242,7 @@ def ceil(self, freq): return self.__class__._from_data({self.name: out_column}) - @_cudf_nvtx_annotate + @_performance_tracking def floor(self, freq): """ Perform floor operation on the data to the specified freq. @@ -2264,7 +2275,7 @@ def floor(self, freq): return self.__class__._from_data({self.name: out_column}) - @_cudf_nvtx_annotate + @_performance_tracking def round(self, freq): """ Perform round operation on the data to the specified freq. @@ -2452,7 +2463,7 @@ class TimedeltaIndex(Index): dtype='timedelta64[s]', name='delta-index') """ - @_cudf_nvtx_annotate + @_performance_tracking def __init__( self, data=None, @@ -2500,7 +2511,7 @@ def __getitem__(self, index): return value @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def days(self): """ Number of days for each element. @@ -2509,7 +2520,7 @@ def days(self): return Index(self._values.days, name=self.name, dtype="int64") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def seconds(self): """ Number of seconds (>= 0 and less than 1 day) for each element. @@ -2517,7 +2528,7 @@ def seconds(self): return Index(self._values.seconds, name=self.name, dtype="int32") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def microseconds(self): """ Number of microseconds (>= 0 and less than 1 second) for each element. @@ -2525,7 +2536,7 @@ def microseconds(self): return Index(self._values.microseconds, name=self.name, dtype="int32") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def nanoseconds(self): """ Number of nanoseconds (>= 0 and less than 1 microsecond) for each @@ -2534,7 +2545,7 @@ def nanoseconds(self): return Index(self._values.nanoseconds, name=self.name, dtype="int32") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def components(self): """ Return a dataframe of the components (days, hours, minutes, @@ -2612,7 +2623,7 @@ class CategoricalIndex(Index): CategoricalIndex([1, 2, 3, ], categories=[1, 2, 3], ordered=False, dtype='category', name='a') """ # noqa: E501 - @_cudf_nvtx_annotate + @_performance_tracking def __init__( self, data=None, @@ -2667,7 +2678,7 @@ def __init__( super().__init__(data, name=name) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def codes(self): """ The category codes of this categorical. @@ -2675,7 +2686,7 @@ def codes(self): return Index(self._values.codes) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def categories(self): """ The categories of this categorical. @@ -2689,7 +2700,7 @@ def _is_categorical(self): return True -@_cudf_nvtx_annotate +@_performance_tracking def interval_range( start=None, end=None, @@ -2841,7 +2852,7 @@ class IntervalIndex(Index): IntervalIndex """ - @_cudf_nvtx_annotate + @_performance_tracking def __init__( self, data, @@ -2900,7 +2911,7 @@ def closed(self): return self.dtype.closed @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_breaks( cls, breaks, @@ -2975,7 +2986,7 @@ def _clean_nulls_from_index(self): return self -@_cudf_nvtx_annotate +@_performance_tracking def as_index( arbitrary, nan_as_null=no_default, copy=False, name=no_default, dtype=None ) -> BaseIndex: @@ -3090,7 +3101,7 @@ def _getdefault_name(values, name): return name -@_cudf_nvtx_annotate +@_performance_tracking def _concat_range_index(indexes: list[RangeIndex]) -> BaseIndex: """ An internal Utility function to concat RangeIndex objects. @@ -3131,7 +3142,7 @@ def _concat_range_index(indexes: list[RangeIndex]) -> BaseIndex: return RangeIndex(start, stop, step) -@_cudf_nvtx_annotate +@_performance_tracking def _extended_gcd(a: int, b: int) -> tuple[int, int, int]: """ Extended Euclidean algorithms to solve Bezout's identity: diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 280a6e92eab..ff10051c52d 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -33,7 +33,6 @@ is_list_like, is_scalar, ) -from cudf.core._base_index import BaseIndex from cudf.core._compat import PANDAS_LT_300 from cudf.core.buffer import acquire_spill_lock from cudf.core.column import ColumnBase, as_column @@ -42,7 +41,7 @@ from cudf.core.dtypes import ListDtype from cudf.core.frame import Frame from cudf.core.groupby.groupby import GroupBy -from cudf.core.index import Index, RangeIndex, _index_from_data +from cudf.core.index import RangeIndex, _index_from_data, ensure_index from cudf.core.missing import NA from cudf.core.multiindex import MultiIndex from cudf.core.resample import _Resampler @@ -56,7 +55,7 @@ from cudf.utils import docutils, ioutils from cudf.utils._numba import _CUDFNumbaConfig from cudf.utils.docutils import copy_docstring -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.utils import _warn_no_dask_cudf if TYPE_CHECKING: @@ -66,6 +65,8 @@ Dtype, NotImplementedType, ) + from cudf.core._base_index import BaseIndex + doc_reset_index_template = """ Reset the index of the {klass}, or a level of it. @@ -301,13 +302,13 @@ def _from_data( out._index = RangeIndex(out._data.nrows) if index is None else index return out - @_cudf_nvtx_annotate + @_performance_tracking def _from_data_like_self(self, data: MutableMapping): out = super()._from_data_like_self(data) out.index = self.index return out - @_cudf_nvtx_annotate + @_performance_tracking def _from_columns_like_self( self, columns: list[ColumnBase], @@ -363,7 +364,7 @@ def _mimic_inplace( self._index = result.index return super()._mimic_inplace(result, inplace) - @_cudf_nvtx_annotate + @_performance_tracking def _scan(self, op, axis=None, skipna=True): """ Return {op_name} of the {cls}. @@ -439,7 +440,7 @@ def _check_data_index_length_match(self) -> None: ) @property - @_cudf_nvtx_annotate + @_performance_tracking def empty(self): """ Indicator whether DataFrame or Series is empty. @@ -501,7 +502,7 @@ def empty(self): """ return self.size == 0 - @_cudf_nvtx_annotate + @_performance_tracking @ioutils.doc_to_json() def to_json(self, path_or_buf=None, *args, **kwargs): """{docstring}""" @@ -510,14 +511,14 @@ def to_json(self, path_or_buf=None, *args, **kwargs): self, path_or_buf=path_or_buf, *args, **kwargs ) - @_cudf_nvtx_annotate + @_performance_tracking @ioutils.doc_to_hdf() def to_hdf(self, path_or_buf, key, *args, **kwargs): """{docstring}""" cudf.io.hdf.to_hdf(path_or_buf, key, self, *args, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def to_string(self): r""" Convert to string @@ -606,7 +607,7 @@ def copy(self, deep: bool = True) -> Self: self.index.copy(deep=False), ) - @_cudf_nvtx_annotate + @_performance_tracking def equals(self, other) -> bool: # noqa: D102 return super().equals(other) and self.index.equals(other.index) @@ -627,12 +628,10 @@ def index(self, value): f"new values have {len(value)} elements" ) # avoid unnecessary cast to Index - if not isinstance(value, BaseIndex): - value = Index(value) - + value = ensure_index(value) self._index = value - @_cudf_nvtx_annotate + @_performance_tracking def replace( self, to_replace=None, @@ -900,7 +899,7 @@ def replace( return self._mimic_inplace(result, inplace=inplace) - @_cudf_nvtx_annotate + @_performance_tracking def clip(self, lower=None, upper=None, inplace=False, axis=1): """ Trim values at input threshold(s). @@ -1026,7 +1025,7 @@ def clip(self, lower=None, upper=None, inplace=False, axis=1): ) return self._mimic_inplace(output, inplace=inplace) - @_cudf_nvtx_annotate + @_performance_tracking def abs(self): """ Return a Series/DataFrame with absolute numeric value of each element. @@ -1052,7 +1051,7 @@ def abs(self): """ return self._unaryop("abs") - @_cudf_nvtx_annotate + @_performance_tracking def dot(self, other, reflect=False): """ Get dot product of frame and other, (binary operator `dot`). @@ -1159,15 +1158,15 @@ def dot(self, other, reflect=False): ) return result.item() - @_cudf_nvtx_annotate + @_performance_tracking def __matmul__(self, other): return self.dot(other) - @_cudf_nvtx_annotate + @_performance_tracking def __rmatmul__(self, other): return self.dot(other, reflect=True) - @_cudf_nvtx_annotate + @_performance_tracking def head(self, n=5): """ Return the first `n` rows. @@ -1246,7 +1245,7 @@ def head(self, n=5): """ return self.iloc[:n] - @_cudf_nvtx_annotate + @_performance_tracking def tail(self, n=5): """ Returns the last n rows as a new DataFrame or Series @@ -1277,7 +1276,7 @@ def tail(self, n=5): return self.iloc[-n:] - @_cudf_nvtx_annotate + @_performance_tracking def pipe(self, func, *args, **kwargs): """ Apply ``func(self, *args, **kwargs)``. @@ -1324,7 +1323,7 @@ def pipe(self, func, *args, **kwargs): """ return cudf.core.common.pipe(self, func, *args, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def sum( self, axis=no_default, @@ -1385,7 +1384,7 @@ def sum( **kwargs, ) - @_cudf_nvtx_annotate + @_performance_tracking def product( self, axis=no_default, @@ -1452,7 +1451,7 @@ def product( # Alias for pandas compatibility. prod = product - @_cudf_nvtx_annotate + @_performance_tracking def mean(self, axis=0, skipna=True, numeric_only=False, **kwargs): """ Return the mean of the values for the requested axis. @@ -1541,7 +1540,7 @@ def median( **kwargs, ) - @_cudf_nvtx_annotate + @_performance_tracking def std( self, axis=no_default, @@ -1600,7 +1599,7 @@ def std( **kwargs, ) - @_cudf_nvtx_annotate + @_performance_tracking def var( self, axis=no_default, @@ -1658,7 +1657,7 @@ def var( **kwargs, ) - @_cudf_nvtx_annotate + @_performance_tracking def kurtosis(self, axis=0, skipna=True, numeric_only=False, **kwargs): """ Return Fisher's unbiased kurtosis of a sample. @@ -1718,7 +1717,7 @@ def kurtosis(self, axis=0, skipna=True, numeric_only=False, **kwargs): # Alias for kurtosis. kurt = kurtosis - @_cudf_nvtx_annotate + @_performance_tracking def skew(self, axis=0, skipna=True, numeric_only=False, **kwargs): """ Return unbiased Fisher-Pearson skew of a sample. @@ -1777,7 +1776,7 @@ def skew(self, axis=0, skipna=True, numeric_only=False, **kwargs): **kwargs, ) - @_cudf_nvtx_annotate + @_performance_tracking def mask(self, cond, other=None, inplace: bool = False) -> Self | None: """ Replace values where the condition is True. @@ -1839,7 +1838,7 @@ def mask(self, cond, other=None, inplace: bool = False) -> Self | None: return self.where(cond=~cond, other=other, inplace=inplace) - @_cudf_nvtx_annotate + @_performance_tracking @copy_docstring(Rolling) def rolling( self, window, min_periods=None, center=False, axis=0, win_type=None @@ -1879,7 +1878,7 @@ def ewm( times=times, ) - @_cudf_nvtx_annotate + @_performance_tracking def nans_to_nulls(self): """ Convert nans (if any) to nulls @@ -1935,7 +1934,7 @@ def nans_to_nulls(self): self._data._from_columns_like_self(result) ) - @_cudf_nvtx_annotate + @_performance_tracking def interpolate( self, method="linear", @@ -2034,7 +2033,7 @@ def interpolate( ) ) - @_cudf_nvtx_annotate + @_performance_tracking def shift(self, periods=1, freq=None, axis=0, fill_value=None): """Shift values by `periods` positions.""" axis = self._get_axis_from_axis_arg(axis) @@ -2050,7 +2049,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): self._data._from_columns_like_self(data_columns) ) - @_cudf_nvtx_annotate + @_performance_tracking def truncate(self, before=None, after=None, axis=0, copy=True): """ Truncate a Series or DataFrame before and after some index value. @@ -2398,7 +2397,7 @@ def iloc(self): return self._iloc_indexer_type(self) @property # type:ignore - @_cudf_nvtx_annotate + @_performance_tracking def axes(self): """ Return a list representing the axes of the Series. @@ -2530,7 +2529,7 @@ def squeeze(self, axis: Literal["index", "columns", 0, 1, None] = None): ) return self.iloc[indexer] - @_cudf_nvtx_annotate + @_performance_tracking def scale(self): """ Scale values to [0, 1] in float64 @@ -2565,7 +2564,7 @@ def scale(self): scaled.index = self.index.copy(deep=False) return scaled - @_cudf_nvtx_annotate + @_performance_tracking def sort_index( self, axis=0, @@ -3070,7 +3069,7 @@ def drop_duplicates( self.index.names if not ignore_index else None, ) - @_cudf_nvtx_annotate + @_performance_tracking def duplicated(self, subset=None, keep="first"): """ Return boolean Series denoting duplicate rows. @@ -3180,7 +3179,7 @@ def duplicated(self, subset=None, keep="first"): ) return cudf.Series(result, index=self.index) - @_cudf_nvtx_annotate + @_performance_tracking def _empty_like(self, keep_index=True) -> Self: result = self._from_columns_like_self( libcudf.copying.columns_empty_like( @@ -3217,7 +3216,7 @@ def _split(self, splits, keep_index=True): for i in range(len(splits) + 1) ] - @_cudf_nvtx_annotate + @_performance_tracking def bfill(self, value=None, axis=None, inplace=None, limit=None): """ Synonym for :meth:`Series.fillna` with ``method='bfill'``. @@ -3236,7 +3235,7 @@ def bfill(self, value=None, axis=None, inplace=None, limit=None): limit=limit, ) - @_cudf_nvtx_annotate + @_performance_tracking def backfill(self, value=None, axis=None, inplace=None, limit=None): """ Synonym for :meth:`Series.fillna` with ``method='bfill'``. @@ -3256,7 +3255,7 @@ def backfill(self, value=None, axis=None, inplace=None, limit=None): ) return self.bfill(value=value, axis=axis, inplace=inplace, limit=limit) - @_cudf_nvtx_annotate + @_performance_tracking def ffill(self, value=None, axis=None, inplace=None, limit=None): """ Synonym for :meth:`Series.fillna` with ``method='ffill'``. @@ -3275,7 +3274,7 @@ def ffill(self, value=None, axis=None, inplace=None, limit=None): limit=limit, ) - @_cudf_nvtx_annotate + @_performance_tracking def pad(self, value=None, axis=None, inplace=None, limit=None): """ Synonym for :meth:`Series.fillna` with ``method='ffill'``. @@ -3415,7 +3414,7 @@ def add_suffix(self, suffix): raise NotImplementedError @acquire_spill_lock() - @_cudf_nvtx_annotate + @_performance_tracking def _apply(self, func, kernel_getter, *args, **kwargs): """Apply `func` across the rows of the frame.""" if kwargs: @@ -3595,7 +3594,7 @@ def _align_to_index( sort: bool = True, allow_non_unique: bool = False, ) -> Self: - index = cudf.Index(index) + index = ensure_index(index) if self.index.equals(index): return self @@ -3626,7 +3625,7 @@ def _align_to_index( out.index.names = self.index.names return out - @_cudf_nvtx_annotate + @_performance_tracking def _reindex( self, column_names, @@ -4154,7 +4153,7 @@ def dropna( return self._mimic_inplace(result, inplace=inplace) - @_cudf_nvtx_annotate + @_performance_tracking def _drop_na_columns(self, how="any", subset=None, thresh=None): """ Drop columns containing nulls @@ -4471,7 +4470,7 @@ def last(self, offset): slice_func=lambda i: self.iloc[i:], ) - @_cudf_nvtx_annotate + @_performance_tracking def sample( self, n=None, @@ -4751,7 +4750,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): return NotImplemented - @_cudf_nvtx_annotate + @_performance_tracking def repeat(self, repeats, axis=None): """Repeats elements consecutively. @@ -4949,7 +4948,7 @@ def astype( raise e return self - @_cudf_nvtx_annotate + @_performance_tracking def drop( self, labels=None, @@ -5161,7 +5160,7 @@ def drop( if not inplace: return out - @_cudf_nvtx_annotate + @_performance_tracking def _explode(self, explode_column: Any, ignore_index: bool): # Helper function for `explode` in `Series` and `Dataframe`, explodes a # specified nested column. Other columns' corresponding rows are @@ -5200,7 +5199,7 @@ def _explode(self, explode_column: Any, ignore_index: bool): self.index.names if not ignore_index else None, ) - @_cudf_nvtx_annotate + @_performance_tracking def tile(self, count): """Repeats the rows `count` times to form a new Frame. @@ -5233,7 +5232,7 @@ def tile(self, count): index_names=self._index_names, ) - @_cudf_nvtx_annotate + @_performance_tracking def groupby( self, by=None, @@ -5283,7 +5282,7 @@ def groupby( ) ) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Addition", @@ -5324,7 +5323,7 @@ def add(self, other, axis, level=None, fill_value=None): # noqa: D102 return self._binaryop(other, "__add__", fill_value) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Addition", @@ -5365,7 +5364,7 @@ def radd(self, other, axis, level=None, fill_value=None): # noqa: D102 return self._binaryop(other, "__radd__", fill_value) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Subtraction", @@ -5408,7 +5407,7 @@ def subtract(self, other, axis, level=None, fill_value=None): # noqa: D102 sub = subtract - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Subtraction", @@ -5449,7 +5448,7 @@ def rsub(self, other, axis, level=None, fill_value=None): # noqa: D102 return self._binaryop(other, "__rsub__", fill_value) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Multiplication", @@ -5492,7 +5491,7 @@ def multiply(self, other, axis, level=None, fill_value=None): # noqa: D102 mul = multiply - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Multiplication", @@ -5533,7 +5532,7 @@ def rmul(self, other, axis, level=None, fill_value=None): # noqa: D102 return self._binaryop(other, "__rmul__", fill_value) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Modulo", @@ -5574,7 +5573,7 @@ def mod(self, other, axis, level=None, fill_value=None): # noqa: D102 return self._binaryop(other, "__mod__", fill_value) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Modulo", @@ -5615,7 +5614,7 @@ def rmod(self, other, axis, level=None, fill_value=None): # noqa: D102 return self._binaryop(other, "__rmod__", fill_value) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Exponential", @@ -5656,7 +5655,7 @@ def pow(self, other, axis, level=None, fill_value=None): # noqa: D102 return self._binaryop(other, "__pow__", fill_value) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Exponential", @@ -5697,7 +5696,7 @@ def rpow(self, other, axis, level=None, fill_value=None): # noqa: D102 return self._binaryop(other, "__rpow__", fill_value) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Integer division", @@ -5738,7 +5737,7 @@ def floordiv(self, other, axis, level=None, fill_value=None): # noqa: D102 return self._binaryop(other, "__floordiv__", fill_value) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Integer division", @@ -5779,7 +5778,7 @@ def rfloordiv(self, other, axis, level=None, fill_value=None): # noqa: D102 return self._binaryop(other, "__rfloordiv__", fill_value) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Floating division", @@ -5824,7 +5823,7 @@ def truediv(self, other, axis, level=None, fill_value=None): # noqa: D102 div = truediv divide = truediv - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Floating division", @@ -5868,7 +5867,7 @@ def rtruediv(self, other, axis, level=None, fill_value=None): # noqa: D102 # Alias for rtruediv rdiv = rtruediv - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Equal to", @@ -5908,7 +5907,7 @@ def eq(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 other=other, op="__eq__", fill_value=fill_value, can_reindex=True ) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Not equal to", @@ -5948,7 +5947,7 @@ def ne(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 other=other, op="__ne__", fill_value=fill_value, can_reindex=True ) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Less than", @@ -5988,7 +5987,7 @@ def lt(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 other=other, op="__lt__", fill_value=fill_value, can_reindex=True ) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Less than or equal to", @@ -6028,7 +6027,7 @@ def le(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 other=other, op="__le__", fill_value=fill_value, can_reindex=True ) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Greater than", @@ -6068,7 +6067,7 @@ def gt(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 other=other, op="__gt__", fill_value=fill_value, can_reindex=True ) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Greater than or equal to", @@ -6123,7 +6122,7 @@ def _preprocess_subset(self, subset): raise KeyError(f"columns {diff} do not exist") return subset - @_cudf_nvtx_annotate + @_performance_tracking def rank( self, axis=0, @@ -6291,7 +6290,7 @@ def _check_duplicate_level_names(specified, level_names): ) -@_cudf_nvtx_annotate +@_performance_tracking def _get_replacement_values_for_columns( to_replace: Any, value: Any, columns_dtype_map: dict[Any, Any] ) -> tuple[dict[Any, bool], dict[Any, Any], dict[Any, Any]]: @@ -6458,7 +6457,7 @@ def _is_series(obj): return isinstance(obj, Frame) and obj.ndim == 1 and obj.index is not None -@_cudf_nvtx_annotate +@_performance_tracking def _drop_rows_by_labels( obj: DataFrameOrSeries, labels: ColumnLike | abc.Iterable | str, diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index a01242d957d..9cbe863142b 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -23,15 +23,17 @@ from cudf.api.types import is_integer, is_list_like, is_object_dtype from cudf.core import column from cudf.core._base_index import _return_get_indexer_result +from cudf.core.column_accessor import ColumnAccessor from cudf.core.frame import Frame from cudf.core.index import ( BaseIndex, _get_indexer_basic, _lexsorted_equal_range, + ensure_index, ) from cudf.core.join._join_helpers import _match_join_keys from cudf.utils.dtypes import is_column_like -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.utils import NotIterable, _external_only_api, _is_same_name if TYPE_CHECKING: @@ -125,7 +127,7 @@ class MultiIndex(Frame, BaseIndex, NotIterable): ) """ - @_cudf_nvtx_annotate + @_performance_tracking def __init__( self, levels=None, @@ -172,7 +174,7 @@ def __init__( "codes and is inconsistent!" ) - levels = [cudf.Index(level) for level in levels] + levels = [ensure_index(level) for level in levels] if len(levels) != len(codes._data): raise ValueError( @@ -210,12 +212,12 @@ def __init__( self.names = names @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def names(self): return self._names @names.setter # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def names(self, value): if value is None: value = [None] * self.nlevels @@ -241,13 +243,13 @@ def names(self, value): ) self._names = pd.core.indexes.frozen.FrozenList(value) - @_cudf_nvtx_annotate + @_performance_tracking def to_series(self, index=None, name=None): raise NotImplementedError( "MultiIndex.to_series isn't implemented yet." ) - @_cudf_nvtx_annotate + @_performance_tracking def astype(self, dtype, copy: bool = True): if not is_object_dtype(dtype): raise TypeError( @@ -256,7 +258,7 @@ def astype(self, dtype, copy: bool = True): ) return self - @_cudf_nvtx_annotate + @_performance_tracking def rename(self, names, inplace=False): """ Alter MultiIndex level names @@ -303,7 +305,7 @@ def rename(self, names, inplace=False): """ return self.set_names(names, level=None, inplace=inplace) - @_cudf_nvtx_annotate + @_performance_tracking def set_names(self, names, level=None, inplace=False): names_is_list_like = is_list_like(names) level_is_list_like = is_list_like(level) @@ -341,7 +343,7 @@ def set_names(self, names, level=None, inplace=False): return self._set_names(names=names, inplace=inplace) @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def _from_data( cls, data: MutableMapping, @@ -353,16 +355,16 @@ def _from_data( return obj @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def name(self): return self._name @name.setter # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def name(self, value): self._name = value - @_cudf_nvtx_annotate + @_performance_tracking def copy( self, names=None, @@ -431,7 +433,7 @@ def copy( return mi - @_cudf_nvtx_annotate + @_performance_tracking def __repr__(self): max_seq_items = pd.get_option("display.max_seq_items") or len(self) @@ -446,45 +448,26 @@ def __repr__(self): ) preprocess = self.take(indices) else: - preprocess = self.copy(deep=False) - - if any(col.has_nulls() for col in preprocess._data.columns): - preprocess_df = preprocess.to_frame(index=False) - for name, col in preprocess._data.items(): - if isinstance( - col, - ( - column.datetime.DatetimeColumn, - column.timedelta.TimeDeltaColumn, - ), - ): - preprocess_df[name] = col.astype("str").fillna( - str(cudf.NaT) - ) + preprocess = self - tuples_list = list( - zip( - *list( - map(lambda val: pd.NA if val is None else val, col) - for col in preprocess_df.to_arrow() - .to_pydict() - .values() - ) - ) - ) + arrays = [] + for name, col in zip(self.names, preprocess._columns): + try: + pd_idx = col.to_pandas(nullable=True) + except NotImplementedError: + pd_idx = col.to_pandas(nullable=False) + pd_idx.name = name + arrays.append(pd_idx) - preprocess = preprocess.to_pandas(nullable=True) - preprocess.values[:] = tuples_list - else: - preprocess = preprocess.to_pandas(nullable=True) + preprocess_pd = pd.MultiIndex.from_arrays(arrays) - output = repr(preprocess) + output = repr(preprocess_pd) output_prefix = self.__class__.__name__ + "(" output = output.lstrip(output_prefix) lines = output.split("\n") if len(lines) > 1: - if "length=" in lines[-1] and len(self) != len(preprocess): + if "length=" in lines[-1] and len(self) != len(preprocess_pd): last_line = lines[-1] length_index = last_line.index("length=") last_line = last_line[:length_index] + f"length={len(self)})" @@ -502,7 +485,7 @@ def _codes_frame(self): @property # type: ignore @_external_only_api("Use ._codes_frame instead") - @_cudf_nvtx_annotate + @_performance_tracking def codes(self): """ Returns the codes of the underlying MultiIndex. @@ -528,13 +511,13 @@ def get_slice_bound(self, label, side, kind=None): raise NotImplementedError() @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def nlevels(self): """Integer number of levels in this MultiIndex.""" return self._num_columns @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def levels(self): """ Returns list of levels in the MultiIndex @@ -566,12 +549,12 @@ def levels(self): return self._levels @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def ndim(self) -> int: """Dimension of the data. For MultiIndex ndim is always 2.""" return 2 - @_cudf_nvtx_annotate + @_performance_tracking def _get_level_label(self, level): """Get name of the level. @@ -588,7 +571,7 @@ def _get_level_label(self, level): else: return self._data.names[level] - @_cudf_nvtx_annotate + @_performance_tracking def isin(self, values, level=None): """Return a boolean array where the index values are in values. @@ -687,7 +670,7 @@ def where(self, cond, other=None, inplace=False): ".where is not supported for MultiIndex operations" ) - @_cudf_nvtx_annotate + @_performance_tracking def _compute_levels_and_codes(self): levels = [] @@ -701,7 +684,7 @@ def _compute_levels_and_codes(self): self._levels = levels self._codes = cudf.DataFrame._from_data(codes) - @_cudf_nvtx_annotate + @_performance_tracking def _compute_validity_mask(self, index, row_tuple, max_length): """Computes the valid set of indices of values in the lookup""" lookup = cudf.DataFrame() @@ -749,7 +732,7 @@ def _compute_validity_mask(self, index, row_tuple, max_length): raise KeyError(row) return result - @_cudf_nvtx_annotate + @_performance_tracking def _get_valid_indices_by_tuple(self, index, row_tuple, max_length): # Instructions for Slicing # if tuple, get first and last elements of tuple @@ -779,7 +762,7 @@ def _get_valid_indices_by_tuple(self, index, row_tuple, max_length): return row_tuple return self._compute_validity_mask(index, row_tuple, max_length) - @_cudf_nvtx_annotate + @_performance_tracking def _index_and_downcast(self, result, index, index_key): if isinstance(index_key, (numbers.Number, slice)): index_key = [index_key] @@ -847,7 +830,7 @@ def _index_and_downcast(self, result, index, index_key): result.index = index return result - @_cudf_nvtx_annotate + @_performance_tracking def _get_row_major( self, df: DataFrameOrSeries, @@ -874,7 +857,7 @@ def _get_row_major( final = self._index_and_downcast(result, result.index, row_tuple) return final - @_cudf_nvtx_annotate + @_performance_tracking def _validate_indexer( self, indexer: numbers.Number @@ -902,7 +885,7 @@ def _validate_indexer( for i in indexer: self._validate_indexer(i) - @_cudf_nvtx_annotate + @_performance_tracking def __eq__(self, other): if isinstance(other, MultiIndex): return np.array( @@ -916,12 +899,12 @@ def __eq__(self, other): return NotImplemented @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def size(self): # The size of a MultiIndex is only dependent on the number of rows. return self._num_rows - @_cudf_nvtx_annotate + @_performance_tracking def take(self, indices): if isinstance(indices, cudf.Series) and indices.has_nulls: raise ValueError("Column must have no nulls.") @@ -929,7 +912,7 @@ def take(self, indices): obj.names = self.names return obj - @_cudf_nvtx_annotate + @_performance_tracking def serialize(self): header, frames = super().serialize() # Overwrite the names in _data with the true names. @@ -937,7 +920,7 @@ def serialize(self): return header, frames @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def deserialize(cls, header, frames): # Spoof the column names to construct the frame, then set manually. column_names = pickle.loads(header["column_names"]) @@ -945,7 +928,7 @@ def deserialize(cls, header, frames): obj = super().deserialize(header, frames) return obj._set_names(column_names) - @_cudf_nvtx_annotate + @_performance_tracking def __getitem__(self, index): flatten = isinstance(index, int) @@ -972,7 +955,7 @@ def __getitem__(self, index): result._levels = self._levels return result - @_cudf_nvtx_annotate + @_performance_tracking def to_frame(self, index=True, name=no_default, allow_duplicates=False): """ Create a DataFrame with the levels of the MultiIndex as columns. @@ -1022,44 +1005,34 @@ def to_frame(self, index=True, name=no_default, allow_duplicates=False): a c a c b d b d """ - # TODO: Currently this function makes a shallow copy, which is - # incorrect. We want to make a deep copy, otherwise further - # modifications of the resulting DataFrame will affect the MultiIndex. if name is no_default: column_names = [ level if name is None else name for level, name in enumerate(self.names) ] + elif not is_list_like(name): + raise TypeError( + "'name' must be a list / sequence of column names." + ) + elif len(name) != len(self.levels): + raise ValueError( + "'name' should have the same length as " + "number of levels on index." + ) else: - if not is_list_like(name): - raise TypeError( - "'name' must be a list / sequence of column names." - ) - if len(name) != len(self.levels): - raise ValueError( - "'name' should have the same length as " - "number of levels on index." - ) column_names = name - all_none_names = None - if not ( - all_none_names := all(x is None for x in column_names) - ) and len(column_names) != len(set(column_names)): + if len(column_names) != len(set(column_names)): raise ValueError("Duplicate column names are not allowed") - df = cudf.DataFrame._from_data( - data=self._data, - columns=column_names - if name is not no_default and not all_none_names - else None, + ca = ColumnAccessor( + dict(zip(column_names, (col.copy() for col in self._columns))), + verify=False, + ) + return cudf.DataFrame._from_data( + data=ca, index=self if index else None ) - if index: - df = df.set_index(self) - - return df - - @_cudf_nvtx_annotate + @_performance_tracking def get_level_values(self, level): """ Return the values at the requested level @@ -1115,7 +1088,7 @@ def _is_interval(self): return False @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def _concat(cls, objs): source_data = [o.to_frame(index=False) for o in objs] @@ -1135,7 +1108,7 @@ def _concat(cls, objs): return cudf.MultiIndex.from_frame(source_data, names=names) @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_tuples(cls, tuples, names=None): """ Convert list of tuples to MultiIndex. @@ -1173,12 +1146,12 @@ def from_tuples(cls, tuples, names=None): pdi = pd.MultiIndex.from_tuples(tuples, names=names) return cls.from_pandas(pdi) - @_cudf_nvtx_annotate + @_performance_tracking def to_numpy(self): return self.values_host @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def values_host(self): """ Return a numpy representation of the MultiIndex. @@ -1206,7 +1179,7 @@ def values_host(self): return self.to_pandas().values @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def values(self): """ Return a CuPy representation of the MultiIndex. @@ -1242,8 +1215,8 @@ def values(self): return self.to_frame(index=False).values @classmethod - @_cudf_nvtx_annotate - def from_frame(cls, df, names=None): + @_performance_tracking + def from_frame(cls, df: pd.DataFrame | cudf.DataFrame, names=None): """ Make a MultiIndex from a DataFrame. @@ -1317,7 +1290,7 @@ def from_frame(cls, df, names=None): return obj @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_product(cls, arrays, names=None): """ Make a MultiIndex from the cartesian product of multiple iterables. @@ -1359,7 +1332,7 @@ def from_product(cls, arrays, names=None): return cls.from_pandas(pdi) @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_arrays( cls, arrays, @@ -1418,7 +1391,7 @@ def from_arrays( codes=codes, levels=levels, sortorder=sortorder, names=names ) - @_cudf_nvtx_annotate + @_performance_tracking def _poplevels(self, level): """ Remove and return the specified levels from self. @@ -1469,7 +1442,7 @@ def _poplevels(self, level): return popped - @_cudf_nvtx_annotate + @_performance_tracking def swaplevel(self, i=-2, j=-1): """ Swap level i with level j. @@ -1520,7 +1493,7 @@ def swaplevel(self, i=-2, j=-1): midx = midx.set_names(self.names) return midx - @_cudf_nvtx_annotate + @_performance_tracking def droplevel(self, level=-1): """ Removes the specified levels from the MultiIndex. @@ -1583,7 +1556,7 @@ def droplevel(self, level=-1): else: return mi - @_cudf_nvtx_annotate + @_performance_tracking def to_pandas( self, *, nullable: bool = False, arrow_type: bool = False ) -> pd.MultiIndex: @@ -1600,7 +1573,7 @@ def to_pandas( ) @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_pandas(cls, multiindex: pd.MultiIndex, nan_as_null=no_default): """ Convert from a Pandas MultiIndex @@ -1635,7 +1608,7 @@ def from_pandas(cls, multiindex: pd.MultiIndex, nan_as_null=no_default): ) @cached_property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_unique(self): return len(self) == len(self.unique()) @@ -1643,7 +1616,7 @@ def is_unique(self): def dtype(self): return np.dtype("O") - @_cudf_nvtx_annotate + @_performance_tracking def _is_sorted(self, ascending=None, null_position=None) -> bool: """ Returns a boolean indicating whether the data of the MultiIndex are sorted @@ -1689,7 +1662,7 @@ def _is_sorted(self, ascending=None, null_position=None) -> bool: ) @cached_property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_monotonic_increasing(self) -> bool: """ Return if the index is monotonic increasing @@ -1698,7 +1671,7 @@ def is_monotonic_increasing(self) -> bool: return self._is_sorted(ascending=None, null_position=None) @cached_property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_monotonic_decreasing(self) -> bool: """ Return if the index is monotonic decreasing @@ -1708,7 +1681,7 @@ def is_monotonic_decreasing(self) -> bool: ascending=[False] * len(self.levels), null_position=None ) - @_cudf_nvtx_annotate + @_performance_tracking def fillna(self, value): """ Fill null values with the specified value. @@ -1749,11 +1722,11 @@ def fillna(self, value): return super().fillna(value=value) - @_cudf_nvtx_annotate + @_performance_tracking def unique(self): return self.drop_duplicates(keep="first") - @_cudf_nvtx_annotate + @_performance_tracking def nunique(self, dropna: bool = True) -> int: mi = self.dropna(how="all") if dropna else self return len(mi.unique()) @@ -1768,7 +1741,7 @@ def _clean_nulls_from_index(self): index_df._clean_nulls_from_dataframe(index_df), names=self.names ) - @_cudf_nvtx_annotate + @_performance_tracking def memory_usage(self, deep=False): usage = sum(col.memory_usage for col in self._data.columns) if self.levels: @@ -1779,13 +1752,13 @@ def memory_usage(self, deep=False): usage += col.memory_usage return usage - @_cudf_nvtx_annotate + @_performance_tracking def difference(self, other, sort=None): if hasattr(other, "to_pandas"): other = other.to_pandas() return cudf.from_pandas(self.to_pandas().difference(other, sort)) - @_cudf_nvtx_annotate + @_performance_tracking def append(self, other): """ Append a collection of MultiIndex objects together @@ -1848,7 +1821,7 @@ def append(self, other): return MultiIndex._concat(to_concat) - @_cudf_nvtx_annotate + @_performance_tracking def __array_function__(self, func, types, args, kwargs): cudf_df_module = MultiIndex @@ -1895,7 +1868,7 @@ def _level_index_from_level(self, level): ) from None return level - @_cudf_nvtx_annotate + @_performance_tracking def get_indexer(self, target, method=None, limit=None, tolerance=None): if tolerance is not None: raise NotImplementedError( @@ -1954,7 +1927,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): return _return_get_indexer_result(result_series.to_cupy()) - @_cudf_nvtx_annotate + @_performance_tracking def get_loc(self, key): is_sorted = ( self.is_monotonic_increasing or self.is_monotonic_decreasing @@ -2028,7 +2001,7 @@ def _maybe_match_names(self, other): for self_name, other_name in zip(self.names, other.names) ] - @_cudf_nvtx_annotate + @_performance_tracking def union(self, other, sort=None): if not isinstance(other, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" @@ -2052,7 +2025,7 @@ def union(self, other, sort=None): return self._union(other, sort=sort) - @_cudf_nvtx_annotate + @_performance_tracking def _union(self, other, sort=None): # TODO: When to_frame is refactored to return a # deep copy in future, we should push most of the common @@ -2078,7 +2051,7 @@ def _union(self, other, sort=None): return midx.sort_values() return midx - @_cudf_nvtx_annotate + @_performance_tracking def _intersection(self, other, sort=None): if self.names != other.names: deep = True @@ -2101,14 +2074,14 @@ def _intersection(self, other, sort=None): return midx.sort_values() return midx - @_cudf_nvtx_annotate + @_performance_tracking def _copy_type_metadata(self: Self, other: Self) -> Self: res = super()._copy_type_metadata(other) if isinstance(other, MultiIndex): res._names = other._names return res - @_cudf_nvtx_annotate + @_performance_tracking def _split_columns_by_levels( self, levels: tuple, *, in_levels: bool ) -> Generator[tuple[Any, column.ColumnBase], None, None]: @@ -2127,7 +2100,7 @@ def _split_columns_by_levels( elif not in_levels and i not in level_indices: yield name, col - @_cudf_nvtx_annotate + @_performance_tracking def _new_index_for_reset_index( self, levels: tuple | None, name ) -> None | BaseIndex: diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py index 903c4fe7df5..1120642947b 100644 --- a/python/cudf/cudf/core/reshape.py +++ b/python/cudf/cudf/core/reshape.py @@ -300,51 +300,31 @@ def concat(objs, axis=0, join="outer", ignore_index=False, sort=None): obj = objs[0] if ignore_index: if axis == 1: - result = cudf.DataFrame._from_data( - data=obj._data.copy(deep=True), - index=obj.index.copy(deep=True), - ) - # The DataFrame constructor for dict-like data (such as the - # ColumnAccessor given by obj._data here) will drop any columns - # in the data that are not in `columns`, so we have to rename - # after construction. - result.columns = pd.RangeIndex(len(obj._data.names)) - else: if isinstance(obj, cudf.Series): - result = cudf.Series._from_data( - data=obj._data.copy(deep=True), - index=cudf.RangeIndex(len(obj)), - ) - elif isinstance(obj, pd.Series): - result = cudf.Series( - data=obj, - index=cudf.RangeIndex(len(obj)), - ) + result = obj.to_frame() else: - result = cudf.DataFrame._from_data( - data=obj._data.copy(deep=True), - index=cudf.RangeIndex(len(obj)), - ) + result = obj.copy(deep=True) + result.columns = pd.RangeIndex(len(result._data)) + else: + result = type(obj)._from_data( + data=obj._data.copy(deep=True), + index=cudf.RangeIndex(len(obj)), + ) + elif axis == 0: + result = obj.copy(deep=True) else: - if axis == 0: - result = obj.copy() + if isinstance(obj, cudf.Series): + result = obj.to_frame() else: - data = obj._data.copy(deep=True) - if isinstance(obj, cudf.Series) and obj.name is None: - # If the Series has no name, pandas renames it to 0. - data[0] = data.pop(None) - result = cudf.DataFrame._from_data( - data, index=obj.index.copy(deep=True) + result = obj.copy(deep=True) + if keys is not None and isinstance(result, cudf.DataFrame): + k = keys[0] + result.columns = cudf.MultiIndex.from_tuples( + [ + (k, *c) if isinstance(c, tuple) else (k, c) + for c in result._column_names + ] ) - if keys is not None: - if isinstance(result, cudf.DataFrame): - k = keys[0] - result.columns = cudf.MultiIndex.from_tuples( - [ - (k, *c) if isinstance(c, tuple) else (k, c) - for c in result._column_names - ] - ) if isinstance(result, cudf.Series) and axis == 0: # sort has no effect for series concatted along axis 0 @@ -1179,7 +1159,6 @@ def unstack(df, level, fill_value=None): if pd.api.types.is_list_like(level): if not level: return df - df = df.copy(deep=False) if not isinstance(df.index, cudf.MultiIndex): dtype = df._columns[0].dtype for col in df._columns: @@ -1195,6 +1174,7 @@ def unstack(df, level, fill_value=None): ) return res else: + df = df.copy(deep=False) columns = df.index._poplevels(level) index = df.index result = _pivot(df, index, columns) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 15ad0813601..97b6bbec2d4 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -48,7 +48,7 @@ from cudf.core.column.struct import StructMethods from cudf.core.column_accessor import ColumnAccessor from cudf.core.groupby.groupby import SeriesGroupBy, groupby_doc_template -from cudf.core.index import BaseIndex, DatetimeIndex, RangeIndex, as_index +from cudf.core.index import BaseIndex, DatetimeIndex, RangeIndex, ensure_index from cudf.core.indexed_frame import ( IndexedFrame, _FrameIndexer, @@ -68,7 +68,7 @@ is_mixed_with_object_dtype, to_cudf_compatible_scalar, ) -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking if TYPE_CHECKING: from cudf._typing import ( @@ -179,7 +179,7 @@ class _SeriesIlocIndexer(_FrameIndexer): _frame: cudf.Series - @_cudf_nvtx_annotate + @_performance_tracking def __getitem__(self, arg): indexing_spec = indexing_utils.parse_row_iloc_indexer( indexing_utils.destructure_series_iloc_indexer(arg, self._frame), @@ -187,7 +187,7 @@ def __getitem__(self, arg): ) return self._frame._getitem_preprocessed(indexing_spec) - @_cudf_nvtx_annotate + @_performance_tracking def __setitem__(self, key, value): if isinstance(key, tuple): key = list(key) @@ -274,7 +274,7 @@ class _SeriesLocIndexer(_FrameIndexer): Label-based selection """ - @_cudf_nvtx_annotate + @_performance_tracking def __getitem__(self, arg: Any) -> ScalarLike | DataFrameOrSeries: if isinstance(arg, pd.MultiIndex): arg = cudf.from_pandas(arg) @@ -301,7 +301,7 @@ def __getitem__(self, arg: Any) -> ScalarLike | DataFrameOrSeries: return self._frame.iloc[arg] - @_cudf_nvtx_annotate + @_performance_tracking def __setitem__(self, key, value): try: key = self._loc_to_iloc(key) @@ -476,7 +476,7 @@ def _constructor_expanddim(self): return cudf.DataFrame @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_categorical(cls, categorical, codes=None): """Creates from a pandas.Categorical @@ -517,7 +517,7 @@ def from_categorical(cls, categorical, codes=None): return Series(data=col) @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_masked_array(cls, data, mask, null_count=None): """Create a Series with null-mask. This is equivalent to: @@ -566,7 +566,7 @@ def from_masked_array(cls, data, mask, null_count=None): col = as_column(data).set_mask(mask) return cls(data=col) - @_cudf_nvtx_annotate + @_performance_tracking def __init__( self, data=None, @@ -584,14 +584,12 @@ def __init__( data = {} if isinstance(data, (pd.Series, pd.Index, BaseIndex, Series)): - if copy: + if copy and not isinstance(data, (pd.Series, pd.Index)): data = data.copy(deep=True) name_from_data = data.name column = as_column(data, nan_as_null=nan_as_null, dtype=dtype) - if isinstance(data, pd.Series): - index_from_data = cudf.Index(data.index) - elif isinstance(data, Series): - index_from_data = data.index + if isinstance(data, (pd.Series, Series)): + index_from_data = ensure_index(data.index) elif isinstance(data, ColumnAccessor): raise TypeError( "Use cudf.Series._from_data for constructing a Series from " @@ -642,7 +640,7 @@ def __init__( name = name_from_data if index is not None: - index = cudf.Index(index) + index = ensure_index(index) if index_from_data is not None: first_index = index_from_data @@ -663,7 +661,7 @@ def __init__( self._check_data_index_length_match() @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def _from_data( cls, data: MutableMapping, @@ -675,18 +673,18 @@ def _from_data( out.name = name return out - @_cudf_nvtx_annotate + @_performance_tracking def _from_data_like_self(self, data: MutableMapping): out = super()._from_data_like_self(data) out.name = self.name return out - @_cudf_nvtx_annotate + @_performance_tracking def __contains__(self, item): return item in self.index @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_pandas(cls, s: pd.Series, nan_as_null=no_default): """ Convert from a Pandas Series. @@ -735,7 +733,7 @@ def from_pandas(cls, s: pd.Series, nan_as_null=no_default): return result @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_unique(self): """Return boolean if values in the object are unique. @@ -746,7 +744,7 @@ def is_unique(self): return self._column.is_unique @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def dt(self): """ Accessor object for datetime-like properties of the Series values. @@ -788,7 +786,7 @@ def dt(self): ) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def hasnans(self): """ Return True if there are any NaNs or nulls. @@ -829,7 +827,7 @@ def hasnans(self): """ return self._column.has_nulls(include_nan=True) - @_cudf_nvtx_annotate + @_performance_tracking def serialize(self): header, frames = super().serialize() @@ -842,7 +840,7 @@ def serialize(self): return header, frames @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def deserialize(cls, header, frames): index_nframes = header["index_frame_count"] obj = super().deserialize( @@ -855,7 +853,7 @@ def deserialize(cls, header, frames): return obj - @_cudf_nvtx_annotate + @_performance_tracking def drop( self, labels=None, @@ -884,7 +882,7 @@ def tolist(self): # noqa: D102 to_list = tolist - @_cudf_nvtx_annotate + @_performance_tracking def to_dict(self, into: type[dict] = dict) -> dict: """ Convert Series to {label -> value} dict or dict-like object. @@ -923,7 +921,7 @@ def to_dict(self, into: type[dict] = dict) -> dict: """ return self.to_pandas().to_dict(into=into) - @_cudf_nvtx_annotate + @_performance_tracking def reindex(self, *args, **kwargs): """ Conform Series to new index. @@ -996,7 +994,7 @@ def reindex(self, *args, **kwargs): series.name = self.name return series - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_reset_index_template.format( klass="Series", @@ -1081,7 +1079,7 @@ def reset_index( inplace=inplace, ) - @_cudf_nvtx_annotate + @_performance_tracking def to_frame(self, name=None): """Convert Series into a DataFrame @@ -1124,13 +1122,13 @@ def to_frame(self, name=None): return cudf.DataFrame({col: self._column}, index=self.index) - @_cudf_nvtx_annotate + @_performance_tracking def memory_usage(self, index=True, deep=False): return self._column.memory_usage + ( self.index.memory_usage() if index else 0 ) - @_cudf_nvtx_annotate + @_performance_tracking def __array_function__(self, func, types, args, kwargs): if "out" in kwargs or not all(issubclass(t, Series) for t in types): return NotImplemented @@ -1191,7 +1189,7 @@ def __array_function__(self, func, types, args, kwargs): return NotImplemented - @_cudf_nvtx_annotate + @_performance_tracking def map(self, arg, na_action=None) -> "Series": """ Map values of Series according to input correspondence. @@ -1333,7 +1331,7 @@ def _getitem_preprocessed( return self._empty_like(keep_index=True) assert_never(spec) - @_cudf_nvtx_annotate + @_performance_tracking def __getitem__(self, arg): if isinstance(arg, slice): return self.iloc[arg] @@ -1344,7 +1342,7 @@ def __getitem__(self, arg): items = SingleColumnFrame.__iter__ - @_cudf_nvtx_annotate + @_performance_tracking def __setitem__(self, key, value): if isinstance(key, slice): self.iloc[key] = value @@ -1495,36 +1493,36 @@ def _make_operands_and_index_for_binop( @copy_docstring(CategoricalAccessor) # type: ignore @property - @_cudf_nvtx_annotate + @_performance_tracking def cat(self): return CategoricalAccessor(parent=self) @copy_docstring(StringMethods) # type: ignore @property - @_cudf_nvtx_annotate + @_performance_tracking def str(self): return StringMethods(parent=self) @copy_docstring(ListMethods) # type: ignore @property - @_cudf_nvtx_annotate + @_performance_tracking def list(self): return ListMethods(parent=self) @copy_docstring(StructMethods) # type: ignore @property - @_cudf_nvtx_annotate + @_performance_tracking def struct(self): return StructMethods(parent=self) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def dtype(self): """The dtype of the Series.""" return self._column.dtype @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def _concat(cls, objs, axis=0, index=True): # Concatenate index if not provided if index is True: @@ -1590,25 +1588,25 @@ def _concat(cls, objs, axis=0, index=True): return cls(data=col, index=index, name=name) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def valid_count(self): """Number of non-null values""" return len(self) - self._column.null_count @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def null_count(self): """Number of null values""" return self._column.null_count @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def nullable(self): """A boolean indicating whether a null-mask is needed""" return self._column.nullable @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def has_nulls(self): """ Indicator whether Series contains null values. @@ -1637,7 +1635,7 @@ def has_nulls(self): """ return self._column.has_nulls() - @_cudf_nvtx_annotate + @_performance_tracking def dropna(self, axis=0, inplace=False, how=None): """ Return a Series with null values removed. @@ -1717,7 +1715,7 @@ def dropna(self, axis=0, inplace=False, how=None): return self._mimic_inplace(result, inplace=inplace) - @_cudf_nvtx_annotate + @_performance_tracking def drop_duplicates(self, keep="first", inplace=False, ignore_index=False): """ Return Series with duplicate values removed. @@ -1791,7 +1789,7 @@ def drop_duplicates(self, keep="first", inplace=False, ignore_index=False): return self._mimic_inplace(result, inplace=inplace) - @_cudf_nvtx_annotate + @_performance_tracking def fillna( self, value=None, method=None, axis=None, inplace=False, limit=None ): @@ -1896,7 +1894,7 @@ def between(self, left, right, inclusive="both") -> Series: ) return self._from_data({self.name: lmask & rmask}, self.index) - @_cudf_nvtx_annotate + @_performance_tracking def all(self, axis=0, bool_only=None, skipna=True, **kwargs): if bool_only not in (None, True): raise NotImplementedError( @@ -1904,7 +1902,7 @@ def all(self, axis=0, bool_only=None, skipna=True, **kwargs): ) return super().all(axis, skipna, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def any(self, axis=0, bool_only=None, skipna=True, **kwargs): if bool_only not in (None, True): raise NotImplementedError( @@ -1912,7 +1910,7 @@ def any(self, axis=0, bool_only=None, skipna=True, **kwargs): ) return super().any(axis, skipna, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def to_pandas( self, *, @@ -2004,7 +2002,7 @@ def to_pandas( ) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def data(self): """The gpu buffer for the data @@ -2029,12 +2027,12 @@ def data(self): return self._column.data @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def nullmask(self): """The gpu buffer for the null-mask""" return cudf.Series(self._column.nullmask) - @_cudf_nvtx_annotate + @_performance_tracking def astype( self, dtype, @@ -2051,13 +2049,13 @@ def astype( dtype = {self.name: dtype} return super().astype(dtype, copy, errors) - @_cudf_nvtx_annotate + @_performance_tracking def sort_index(self, axis=0, *args, **kwargs): if axis not in (0, "index"): raise ValueError("Only axis=0 is valid for Series.") return super().sort_index(axis=axis, *args, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def sort_values( self, axis=0, @@ -2112,7 +2110,7 @@ def sort_values( ignore_index=ignore_index, ) - @_cudf_nvtx_annotate + @_performance_tracking def nlargest(self, n=5, keep="first"): """Returns a new Series of the *n* largest element. @@ -2175,7 +2173,7 @@ def nlargest(self, n=5, keep="first"): """ return self._n_largest_or_smallest(True, n, [self.name], keep) - @_cudf_nvtx_annotate + @_performance_tracking def nsmallest(self, n=5, keep="first"): """ Returns a new Series of the *n* smallest element. @@ -2251,7 +2249,7 @@ def nsmallest(self, n=5, keep="first"): """ return self._n_largest_or_smallest(False, n, [self.name], keep) - @_cudf_nvtx_annotate + @_performance_tracking def argsort( self, axis=0, @@ -2274,7 +2272,7 @@ def argsort( obj.name = self.name return obj - @_cudf_nvtx_annotate + @_performance_tracking def replace(self, to_replace=None, value=no_default, *args, **kwargs): if is_dict_like(to_replace) and value not in {None, no_default}: raise ValueError( @@ -2284,7 +2282,7 @@ def replace(self, to_replace=None, value=no_default, *args, **kwargs): return super().replace(to_replace, value, *args, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def update(self, other): """ Modify Series in place using values from passed Series. @@ -2390,7 +2388,7 @@ def update(self, other): self.mask(mask, other, inplace=True) # UDF related - @_cudf_nvtx_annotate + @_performance_tracking def apply(self, func, convert_dtype=True, args=(), **kwargs): """ Apply a scalar function to the values of a Series. @@ -2535,7 +2533,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs): # # Stats # - @_cudf_nvtx_annotate + @_performance_tracking def count(self): """ Return number of non-NA/null observations in the Series @@ -2559,7 +2557,7 @@ def count(self): """ return self.valid_count - @_cudf_nvtx_annotate + @_performance_tracking def mode(self, dropna=True): """ Return the mode(s) of the dataset. @@ -2630,7 +2628,7 @@ def mode(self, dropna=True): {self.name: val_counts.index.sort_values()}, name=self.name ) - @_cudf_nvtx_annotate + @_performance_tracking def round(self, decimals=0, how="half_even"): if not is_integer(decimals): raise ValueError( @@ -2639,7 +2637,7 @@ def round(self, decimals=0, how="half_even"): decimals = int(decimals) return super().round(decimals, how) - @_cudf_nvtx_annotate + @_performance_tracking def cov(self, other, min_periods=None): """ Compute covariance with Series, excluding missing values. @@ -2690,7 +2688,7 @@ def cov(self, other, min_periods=None): f"{other.dtype}" ) - @_cudf_nvtx_annotate + @_performance_tracking def transpose(self): """Return the transpose, which is by definition self.""" @@ -2698,7 +2696,7 @@ def transpose(self): T = property(transpose, doc=transpose.__doc__) - @_cudf_nvtx_annotate + @_performance_tracking def duplicated(self, keep="first"): """ Indicate duplicate Series values. @@ -2778,7 +2776,7 @@ def duplicated(self, keep="first"): """ return super().duplicated(keep=keep) - @_cudf_nvtx_annotate + @_performance_tracking def corr(self, other, method="pearson", min_periods=None): """Calculates the sample correlation between two Series, excluding missing values. @@ -2830,7 +2828,7 @@ def corr(self, other, method="pearson", min_periods=None): f"cannot perform corr with types {self.dtype}, {other.dtype}" ) - @_cudf_nvtx_annotate + @_performance_tracking def autocorr(self, lag=1): """Compute the lag-N autocorrelation. This method computes the Pearson correlation between the Series and its shifted self. @@ -2856,7 +2854,7 @@ def autocorr(self, lag=1): """ return self.corr(self.shift(lag)) - @_cudf_nvtx_annotate + @_performance_tracking def isin(self, values): """Check whether values are contained in Series. @@ -2926,7 +2924,7 @@ def isin(self, values): {self.name: self._column.isin(values)}, index=self.index ) - @_cudf_nvtx_annotate + @_performance_tracking def unique(self): """ Returns unique values of this Series. @@ -2961,7 +2959,7 @@ def unique(self): return res.values return Series(res, name=self.name) - @_cudf_nvtx_annotate + @_performance_tracking def value_counts( self, normalize=False, @@ -3116,7 +3114,7 @@ def value_counts( res.name = result_name return res - @_cudf_nvtx_annotate + @_performance_tracking def quantile( self, q=0.5, interpolation="linear", exact=True, quant_index=True ): @@ -3191,11 +3189,11 @@ def quantile( return Series._from_data( data={self.name: result}, - index=as_index(np_array_q) if quant_index else None, + index=cudf.Index(np_array_q) if quant_index else None, ) @docutils.doc_describe() - @_cudf_nvtx_annotate + @_performance_tracking def describe( self, percentiles=None, @@ -3240,7 +3238,7 @@ def describe( name=self.name, ) - @_cudf_nvtx_annotate + @_performance_tracking def digitize(self, bins, right=False): """Return the indices of the bins to which each value belongs. @@ -3276,7 +3274,7 @@ def digitize(self, bins, right=False): cudf.core.column.numerical.digitize(self._column, bins, right) ) - @_cudf_nvtx_annotate + @_performance_tracking def diff(self, periods=1): """First discrete difference of element. @@ -3347,7 +3345,7 @@ def diff(self, periods=1): return self - self.shift(periods=periods) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( groupby_doc_template.format( ret=textwrap.dedent( @@ -3385,7 +3383,7 @@ def groupby( dropna, ) - @_cudf_nvtx_annotate + @_performance_tracking def rename(self, index=None, copy=True): """ Alter Series name @@ -3431,21 +3429,23 @@ def rename(self, index=None, copy=True): out_data = self._data.copy(deep=copy) return Series._from_data(out_data, self.index, name=index) - @_cudf_nvtx_annotate + @_performance_tracking def add_prefix(self, prefix): return Series._from_data( + # TODO: Change to deep=False when copy-on-write is default data=self._data.copy(deep=True), index=prefix + self.index.astype(str), ) - @_cudf_nvtx_annotate + @_performance_tracking def add_suffix(self, suffix): return Series._from_data( + # TODO: Change to deep=False when copy-on-write is default data=self._data.copy(deep=True), index=self.index.astype(str) + suffix, ) - @_cudf_nvtx_annotate + @_performance_tracking def keys(self): """ Return alias for index. @@ -3489,7 +3489,7 @@ def keys(self): """ return self.index - @_cudf_nvtx_annotate + @_performance_tracking def explode(self, ignore_index=False): """ Transform each element of a list-like to a row, replicating index @@ -3526,7 +3526,7 @@ def explode(self, ignore_index=False): """ return super()._explode(self.name, ignore_index) - @_cudf_nvtx_annotate + @_performance_tracking def pct_change( self, periods=1, fill_method=no_default, limit=no_default, freq=None ): @@ -3600,7 +3600,7 @@ def pct_change( change = diff / data.shift(periods=periods, freq=freq) return change - @_cudf_nvtx_annotate + @_performance_tracking def where(self, cond, other=None, inplace=False): result_col = super().where(cond, other, inplace) return self._mimic_inplace( @@ -3734,7 +3734,7 @@ class DatetimeProperties(BaseDatelikeProperties): """ @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def year(self) -> Series: """ The year of the datetime. @@ -3759,7 +3759,7 @@ def year(self) -> Series: return self._get_dt_field("year") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def month(self) -> Series: """ The month as January=1, December=12. @@ -3784,7 +3784,7 @@ def month(self) -> Series: return self._get_dt_field("month") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def day(self) -> Series: """ The day of the datetime. @@ -3809,7 +3809,7 @@ def day(self) -> Series: return self._get_dt_field("day") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def hour(self) -> Series: """ The hours of the datetime. @@ -3834,7 +3834,7 @@ def hour(self) -> Series: return self._get_dt_field("hour") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def minute(self) -> Series: """ The minutes of the datetime. @@ -3859,7 +3859,7 @@ def minute(self) -> Series: return self._get_dt_field("minute") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def second(self) -> Series: """ The seconds of the datetime. @@ -3884,7 +3884,7 @@ def second(self) -> Series: return self._get_dt_field("second") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def microsecond(self) -> Series: """ The microseconds of the datetime. @@ -3916,7 +3916,7 @@ def microsecond(self) -> Series: return self._return_result_like_self(micro + extra) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def nanosecond(self) -> Series: """ The nanoseconds of the datetime. @@ -3941,7 +3941,7 @@ def nanosecond(self) -> Series: return self._get_dt_field("nanosecond") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def weekday(self) -> Series: """ The day of the week with Monday=0, Sunday=6. @@ -3978,7 +3978,7 @@ def weekday(self) -> Series: return self._get_dt_field("weekday") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def dayofweek(self) -> Series: """ The day of the week with Monday=0, Sunday=6. @@ -4015,7 +4015,7 @@ def dayofweek(self) -> Series: return self._get_dt_field("weekday") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def dayofyear(self) -> Series: """ The day of the year, from 1-365 in non-leap years and @@ -4053,7 +4053,7 @@ def dayofyear(self) -> Series: return self._get_dt_field("day_of_year") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def day_of_year(self) -> Series: """ The day of the year, from 1-365 in non-leap years and @@ -4091,7 +4091,7 @@ def day_of_year(self) -> Series: return self._get_dt_field("day_of_year") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_leap_year(self) -> Series: """ Boolean indicator if the date belongs to a leap year. @@ -4146,7 +4146,7 @@ def is_leap_year(self) -> Series: return self._return_result_like_self(res) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def quarter(self) -> Series: """ Integer indicator for which quarter of the year the date belongs in. @@ -4175,7 +4175,7 @@ def quarter(self) -> Series: ) return self._return_result_like_self(res) - @_cudf_nvtx_annotate + @_performance_tracking def day_name(self, locale: str | None = None) -> Series: """ Return the day names. Currently supports English locale only. @@ -4211,7 +4211,7 @@ def day_name(self, locale: str | None = None) -> Series: self.series._column.get_day_names(locale) ) - @_cudf_nvtx_annotate + @_performance_tracking def month_name(self, locale: str | None = None) -> Series: """ Return the month names. Currently supports English locale only. @@ -4241,7 +4241,7 @@ def month_name(self, locale: str | None = None) -> Series: self.series._column.get_month_names(locale) ) - @_cudf_nvtx_annotate + @_performance_tracking def isocalendar(self) -> cudf.DataFrame: """ Returns a DataFrame with the year, week, and day @@ -4289,7 +4289,7 @@ def isocalendar(self) -> cudf.DataFrame: ) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_month_start(self) -> Series: """ Booleans indicating if dates are the first day of the month. @@ -4297,7 +4297,7 @@ def is_month_start(self) -> Series: return (self.day == 1).fillna(False) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def days_in_month(self) -> Series: """ Get the total number of days in the month that the date falls on. @@ -4346,7 +4346,7 @@ def days_in_month(self) -> Series: ) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_month_end(self) -> Series: """ Boolean indicator if the date is the last day of the month. @@ -4389,7 +4389,7 @@ def is_month_end(self) -> Series: return (self.day == last_day.dt.day).fillna(False) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_quarter_start(self) -> Series: """ Boolean indicator if the date is the first day of a quarter. @@ -4434,7 +4434,7 @@ def is_quarter_start(self) -> Series: return self._return_result_like_self(result) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_quarter_end(self) -> Series: """ Boolean indicator if the date is the last day of a quarter. @@ -4481,7 +4481,7 @@ def is_quarter_end(self) -> Series: return self._return_result_like_self(result) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_year_start(self) -> Series: """ Boolean indicator if the date is the first day of the year. @@ -4512,7 +4512,7 @@ def is_year_start(self) -> Series: return self._return_result_like_self(outcol.fillna(False)) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_year_end(self) -> Series: """ Boolean indicator if the date is the last day of the year. @@ -4545,13 +4545,13 @@ def is_year_end(self) -> Series: result = cudf._lib.copying.copy_if_else(leap, non_leap, leap_dates) return self._return_result_like_self(result.fillna(False)) - @_cudf_nvtx_annotate + @_performance_tracking def _get_dt_field(self, field: str) -> Series: return self._return_result_like_self( self.series._column.get_dt_field(field) ) - @_cudf_nvtx_annotate + @_performance_tracking def ceil(self, freq: str) -> Series: """ Perform ceil operation on the data to the specified freq. @@ -4584,7 +4584,7 @@ def ceil(self, freq: str) -> Series: """ return self._return_result_like_self(self.series._column.ceil(freq)) - @_cudf_nvtx_annotate + @_performance_tracking def floor(self, freq: str) -> Series: """ Perform floor operation on the data to the specified freq. @@ -4617,7 +4617,7 @@ def floor(self, freq: str) -> Series: """ return self._return_result_like_self(self.series._column.floor(freq)) - @_cudf_nvtx_annotate + @_performance_tracking def round(self, freq: str) -> Series: """ Perform round operation on the data to the specified freq. @@ -4653,7 +4653,7 @@ def round(self, freq: str) -> Series: """ return self._return_result_like_self(self.series._column.round(freq)) - @_cudf_nvtx_annotate + @_performance_tracking def strftime(self, date_format: str, *args, **kwargs) -> Series: """ Convert to Series using specified ``date_format``. @@ -4830,7 +4830,7 @@ class TimedeltaProperties(BaseDatelikeProperties): """ @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def days(self) -> Series: """ Number of days. @@ -4862,7 +4862,7 @@ def days(self) -> Series: return self._get_td_field("days") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def seconds(self) -> Series: """ Number of seconds (>= 0 and less than 1 day). @@ -4901,7 +4901,7 @@ def seconds(self) -> Series: return self._get_td_field("seconds") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def microseconds(self) -> Series: """ Number of microseconds (>= 0 and less than 1 second). @@ -4933,7 +4933,7 @@ def microseconds(self) -> Series: return self._get_td_field("microseconds") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def nanoseconds(self) -> Series: """ Return the number of nanoseconds (n), where 0 <= n < 1 microsecond. @@ -4965,7 +4965,7 @@ def nanoseconds(self) -> Series: return self._get_td_field("nanoseconds") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def components(self) -> cudf.DataFrame: """ Return a Dataframe of the components of the Timedeltas. @@ -4997,14 +4997,14 @@ def components(self) -> cudf.DataFrame: ca, index=self.series.index ) - @_cudf_nvtx_annotate + @_performance_tracking def _get_td_field(self, field: str) -> Series: return self._return_result_like_self( getattr(self.series._column, field) ) -@_cudf_nvtx_annotate +@_performance_tracking def _align_indices(series_list, how="outer", allow_non_unique=False): """ Internal util to align the indices of a list of Series objects @@ -5067,7 +5067,7 @@ def _align_indices(series_list, how="outer", allow_non_unique=False): @acquire_spill_lock() -@_cudf_nvtx_annotate +@_performance_tracking def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False): r"""Returns a boolean array where two arrays are equal within a tolerance. diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py index 23a2c828a04..f9555aee6a2 100644 --- a/python/cudf/cudf/core/single_column_frame.py +++ b/python/cudf/cudf/core/single_column_frame.py @@ -18,7 +18,7 @@ ) from cudf.core.column import ColumnBase, as_column from cudf.core.frame import Frame -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.utils import NotIterable if TYPE_CHECKING: @@ -41,7 +41,7 @@ class SingleColumnFrame(Frame, NotIterable): "index": 0, } - @_cudf_nvtx_annotate + @_performance_tracking def _reduce( self, op, @@ -62,7 +62,7 @@ def _reduce( except AttributeError: raise TypeError(f"cannot perform {op} with type {self.dtype}") - @_cudf_nvtx_annotate + @_performance_tracking def _scan(self, op, axis=None, *args, **kwargs): if axis not in (None, 0): raise NotImplementedError("axis parameter is not implemented yet") @@ -70,24 +70,24 @@ def _scan(self, op, axis=None, *args, **kwargs): return super()._scan(op, axis=axis, *args, **kwargs) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def name(self): """Get the name of this object.""" return next(iter(self._column_names)) @name.setter # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def name(self, value): self._data[value] = self._data.pop(self.name) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def ndim(self) -> int: # noqa: D401 """Number of dimensions of the underlying data, by definition 1.""" return 1 @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def shape(self) -> tuple[int]: """Get a tuple representing the dimensionality of the Index.""" return (len(self),) @@ -99,27 +99,27 @@ def __bool__(self): ) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def _num_columns(self) -> int: return 1 @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def _column(self) -> ColumnBase: return next(iter(self._columns)) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def values(self) -> cupy.ndarray: # noqa: D102 return self._column.values @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def values_host(self) -> numpy.ndarray: # noqa: D102 return self._column.values_host @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_arrow(cls, array) -> Self: """Create from PyArrow Array/ChunkedArray. @@ -150,7 +150,7 @@ def from_arrow(cls, array) -> Self: """ return cls(ColumnBase.from_arrow(array)) - @_cudf_nvtx_annotate + @_performance_tracking def to_arrow(self) -> pa.Array: """ Convert to a PyArrow Array. @@ -182,7 +182,7 @@ def to_arrow(self) -> pa.Array: return self._column.to_arrow() @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_unique(self) -> bool: """Return boolean if values in the object are unique. @@ -193,7 +193,7 @@ def is_unique(self) -> bool: return self._column.is_unique @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_monotonic_increasing(self) -> bool: """Return boolean if values in the object are monotonically increasing. @@ -204,7 +204,7 @@ def is_monotonic_increasing(self) -> bool: return self._column.is_monotonic_increasing @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_monotonic_decreasing(self) -> bool: """Return boolean if values in the object are monotonically decreasing. @@ -215,7 +215,7 @@ def is_monotonic_decreasing(self) -> bool: return self._column.is_monotonic_decreasing @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def __cuda_array_interface__(self): # While the parent column class has a `__cuda_array_interface__` method # defined, it is not implemented for all column types. When it is not @@ -229,7 +229,7 @@ def __cuda_array_interface__(self): "'__cuda_array_interface__'" ) - @_cudf_nvtx_annotate + @_performance_tracking def factorize( self, sort: bool = False, use_na_sentinel: bool = True ) -> tuple[cupy.ndarray, cudf.Index]: @@ -268,7 +268,7 @@ def factorize( use_na_sentinel=use_na_sentinel, ) - @_cudf_nvtx_annotate + @_performance_tracking def _make_operands_for_binop( self, other: Any, @@ -323,7 +323,7 @@ def _make_operands_for_binop( return {result_name: (self._column, other, reflect, fill_value)} - @_cudf_nvtx_annotate + @_performance_tracking def nunique(self, dropna: bool = True) -> int: """ Return count of unique values for the column. @@ -369,7 +369,7 @@ def _get_elements_from_column(self, arg) -> ScalarLike | ColumnBase: return self._column.apply_boolean_mask(arg) raise NotImplementedError(f"Unknown indexer {type(arg)}") - @_cudf_nvtx_annotate + @_performance_tracking def where(self, cond, other=None, inplace=False): from cudf.core._internals.where import ( _check_and_cast_columns_with_other, diff --git a/python/cudf/cudf/core/udf/groupby_utils.py b/python/cudf/cudf/core/udf/groupby_utils.py index 06d9296ca0f..265b87350ae 100644 --- a/python/cudf/cudf/core/udf/groupby_utils.py +++ b/python/cudf/cudf/core/udf/groupby_utils.py @@ -30,7 +30,7 @@ _supported_dtypes_from_frame, ) from cudf.utils._numba import _CUDFNumbaConfig -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking def _get_frame_groupby_type(dtype, index_dtype): @@ -126,7 +126,7 @@ def _get_groupby_apply_kernel(frame, func, args): return kernel, return_type -@_cudf_nvtx_annotate +@_performance_tracking def jit_groupby_apply(offsets, grouped_values, function, *args): """ Main entrypoint for JIT Groupby.apply via Numba. diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py index f1704e4ea78..d616761cb3b 100644 --- a/python/cudf/cudf/core/udf/utils.py +++ b/python/cudf/cudf/core/udf/utils.py @@ -38,7 +38,7 @@ STRING_TYPES, TIMEDELTA_TYPES, ) -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.utils import initfunc # Maximum size of a string column is 2 GiB @@ -71,7 +71,7 @@ def _ptx_file(): ) -@_cudf_nvtx_annotate +@_performance_tracking def _get_udf_return_type(argty, func: Callable, args=()): """ Get the return type of a masked UDF for a given set of argument dtypes. It @@ -236,7 +236,7 @@ def _generate_cache_key(frame, func: Callable, args, suffix="__APPLY_UDF"): ) -@_cudf_nvtx_annotate +@_performance_tracking def _compile_or_get( frame, func, args, kernel_getter=None, suffix="__APPLY_UDF" ): diff --git a/python/cudf/cudf/io/csv.py b/python/cudf/cudf/io/csv.py index f07764e2ce4..e909d96309e 100644 --- a/python/cudf/cudf/io/csv.py +++ b/python/cudf/cudf/io/csv.py @@ -12,10 +12,10 @@ from cudf.api.types import is_scalar from cudf.utils import ioutils from cudf.utils.dtypes import _maybe_convert_to_default_type -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking -@_cudf_nvtx_annotate +@_performance_tracking @ioutils.doc_read_csv() def read_csv( filepath_or_buffer, @@ -151,7 +151,7 @@ def read_csv( return df -@_cudf_nvtx_annotate +@_performance_tracking @ioutils.doc_to_csv() def to_csv( df, diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py index 58b104b84e9..7733e770d99 100644 --- a/python/cudf/cudf/io/parquet.py +++ b/python/cudf/cudf/io/parquet.py @@ -22,7 +22,7 @@ from cudf.api.types import is_list_like from cudf.core.column import as_column, build_categorical_column, column_empty from cudf.utils import ioutils -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking BYTE_SIZES = { "kb": 1000, @@ -50,7 +50,7 @@ } -@_cudf_nvtx_annotate +@_performance_tracking def _write_parquet( df, paths, @@ -130,7 +130,7 @@ def _write_parquet( # Logic chosen to match: https://arrow.apache.org/ # docs/_modules/pyarrow/parquet.html#write_to_dataset -@_cudf_nvtx_annotate +@_performance_tracking def write_to_dataset( df, root_path, @@ -318,7 +318,7 @@ def write_to_dataset( @ioutils.doc_read_parquet_metadata() -@_cudf_nvtx_annotate +@_performance_tracking def read_parquet_metadata(filepath_or_buffer): """{docstring}""" # Multiple sources are passed as a list. If a single source is passed, @@ -360,7 +360,7 @@ def read_parquet_metadata(filepath_or_buffer): return libparquet.read_parquet_metadata(filepaths_or_buffers) -@_cudf_nvtx_annotate +@_performance_tracking def _process_dataset( paths, fs, @@ -515,7 +515,7 @@ def _process_dataset( @ioutils.doc_read_parquet() -@_cudf_nvtx_annotate +@_performance_tracking def read_parquet( filepath_or_buffer, engine="cudf", @@ -785,7 +785,7 @@ def _handle_is(column: cudf.Series, value, *, negate) -> cudf.Series: return df -@_cudf_nvtx_annotate +@_performance_tracking def _parquet_to_frame( paths_or_buffers, *args, @@ -885,7 +885,7 @@ def _parquet_to_frame( return dfs[0] -@_cudf_nvtx_annotate +@_performance_tracking def _read_parquet( filepaths_or_buffers, engine, @@ -908,12 +908,20 @@ def _read_parquet( "cudf engine doesn't support the " f"following positional arguments: {list(args)}" ) - return libparquet.read_parquet( - filepaths_or_buffers, - columns=columns, - row_groups=row_groups, - use_pandas_metadata=use_pandas_metadata, - ) + if cudf.get_option("mode.pandas_compatible"): + return libparquet.ParquetReader( + filepaths_or_buffers, + columns=columns, + row_groups=row_groups, + use_pandas_metadata=use_pandas_metadata, + ).read() + else: + return libparquet.read_parquet( + filepaths_or_buffers, + columns=columns, + row_groups=row_groups, + use_pandas_metadata=use_pandas_metadata, + ) else: if ( isinstance(filepaths_or_buffers, list) @@ -933,7 +941,7 @@ def _read_parquet( @ioutils.doc_to_parquet() -@_cudf_nvtx_annotate +@_performance_tracking def to_parquet( df, path, @@ -1099,7 +1107,7 @@ def _get_estimated_file_size(df): return file_size -@_cudf_nvtx_annotate +@_performance_tracking def _get_partitioned( df, root_path, @@ -1137,7 +1145,7 @@ def _get_partitioned( return full_paths, metadata_file_paths, grouped_df, part_offsets, filename -@_cudf_nvtx_annotate +@_performance_tracking def _get_groups_and_offsets( df, partition_cols, @@ -1297,7 +1305,7 @@ class ParquetDatasetWriter: """ - @_cudf_nvtx_annotate + @_performance_tracking def __init__( self, path, @@ -1347,7 +1355,7 @@ def __init__( self._file_sizes: dict[str, int] = {} - @_cudf_nvtx_annotate + @_performance_tracking def write_table(self, df): """ Write a dataframe to the file/dataset @@ -1478,7 +1486,7 @@ def write_table(self, df): self.path_cw_map.update({k: new_cw_idx for k in new_paths}) self._chunked_writers[-1][0].write_table(grouped_df, part_info) - @_cudf_nvtx_annotate + @_performance_tracking def close(self, return_metadata=False): """ Close all open files and optionally return footer metadata as a binary diff --git a/python/cudf/cudf/io/text.py b/python/cudf/cudf/io/text.py index 0e19972f6e0..4329480bb2c 100644 --- a/python/cudf/cudf/io/text.py +++ b/python/cudf/cudf/io/text.py @@ -1,14 +1,14 @@ -# Copyright (c) 2018-2023, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. from io import BytesIO, StringIO import cudf from cudf._lib import text as libtext from cudf.utils import ioutils -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking -@_cudf_nvtx_annotate +@_performance_tracking @ioutils.doc_read_text() def read_text( filepath_or_buffer, diff --git a/python/cudf/cudf/options.py b/python/cudf/cudf/options.py index fb5a963f008..1f539e7f266 100644 --- a/python/cudf/cudf/options.py +++ b/python/cudf/cudf/options.py @@ -311,6 +311,20 @@ def _integer_and_none_validator(val): _make_contains_validator([False, True]), ) +_register_option( + "memory_profiling", + _env_get_bool("CUDF_MEMORY_PROFILING", False), + textwrap.dedent( + """ + If set to `False`, disables memory profiling. + If set to `True`, enables memory profiling. + Read more at: :ref:`memory-profiling-user-doc` + \tValid values are True or False. Default is False. + """ + ), + _make_contains_validator([False, True]), +) + class option_context(ContextDecorator): """ diff --git a/python/cudf/cudf/pandas/_wrappers/numpy.py b/python/cudf/cudf/pandas/_wrappers/numpy.py index c445be46f58..3b012169676 100644 --- a/python/cudf/cudf/pandas/_wrappers/numpy.py +++ b/python/cudf/cudf/pandas/_wrappers/numpy.py @@ -129,6 +129,19 @@ def wrap_ndarray(cls, arr: cupy.ndarray | numpy.ndarray, constructor): }, ) + +flatiter = make_final_proxy_type( + "flatiter", + cupy.flatiter, + numpy.flatiter, + fast_to_slow=lambda fast: cupy.asnumpy(fast.base).flat, + slow_to_fast=lambda slow: cupy.asarray(slow).flat, + additional_attributes={ + "__array__": array_method, + }, +) + + # Mapping flags between slow and fast types _ndarray_flags = make_intermediate_proxy_type( "_ndarray_flags", diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py index 0ba432d6d0e..a64bf7772fe 100644 --- a/python/cudf/cudf/pandas/_wrappers/pandas.py +++ b/python/cudf/cudf/pandas/_wrappers/pandas.py @@ -522,6 +522,22 @@ def Index__new__(cls, *args, **kwargs): }, ) +ArrowStringArrayNumpySemantics = make_final_proxy_type( + "ArrowStringArrayNumpySemantics", + _Unusable, + pd.core.arrays.string_arrow.ArrowStringArrayNumpySemantics, + fast_to_slow=_Unusable(), + slow_to_fast=_Unusable(), +) + +ArrowStringArray = make_final_proxy_type( + "ArrowStringArray", + _Unusable, + pd.core.arrays.string_arrow.ArrowStringArray, + fast_to_slow=_Unusable(), + slow_to_fast=_Unusable(), +) + StringDtype = make_final_proxy_type( "StringDtype", _Unusable, diff --git a/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh b/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh index cd9f90d50fe..a66f63c09b3 100755 --- a/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh +++ b/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh @@ -133,7 +133,8 @@ and not test_s3_roundtrip" TEST_THAT_CRASH_PYTEST_WORKERS="not test_bitmasks_pyarrow \ and not test_large_string_pyarrow \ and not test_interchange_from_corrected_buffer_dtypes \ -and not test_eof_states" +and not test_eof_states \ +and not test_array_tz" # TODO: Remove "not db" once a postgres & mysql container is set up on the CI PANDAS_CI="1" timeout 30m python -m pytest -p cudf.pandas \ diff --git a/python/cudf/cudf/pylibcudf_tests/test_join.py b/python/cudf/cudf/pylibcudf_tests/test_join.py new file mode 100644 index 00000000000..eb25ed915b1 --- /dev/null +++ b/python/cudf/cudf/pylibcudf_tests/test_join.py @@ -0,0 +1,29 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import numpy as np +import pyarrow as pa +from utils import assert_table_eq + +from cudf._lib import pylibcudf as plc + + +def test_cross_join(): + left = pa.Table.from_arrays([[0, 1, 2], [3, 4, 5]], names=["a", "b"]) + right = pa.Table.from_arrays( + [[6, 7, 8, 9], [10, 11, 12, 13]], names=["c", "d"] + ) + + pleft = plc.interop.from_arrow(left) + pright = plc.interop.from_arrow(right) + + expect = pa.Table.from_arrays( + [ + *(np.repeat(c.to_numpy(), len(right)) for c in left.columns), + *(np.tile(c.to_numpy(), len(left)) for c in right.columns), + ], + names=["a", "b", "c", "d"], + ) + + got = plc.join.cross_join(pleft, pright) + + assert_table_eq(expect, got) diff --git a/python/cudf/cudf/pylibcudf_tests/test_lists.py b/python/cudf/cudf/pylibcudf_tests/test_lists.py index ef4ab8b86fe..13570a48cc2 100644 --- a/python/cudf/cudf/pylibcudf_tests/test_lists.py +++ b/python/cudf/cudf/pylibcudf_tests/test_lists.py @@ -7,15 +7,28 @@ from cudf._lib import pylibcudf as plc -def test_concatenate_rows(): - test_data = [[[0, 1], [2], [5], [6, 7]], [[8], [9], [], [13, 14, 15]]] +@pytest.fixture +def test_data(): + return [[[[0, 1], [2], [5], [6, 7]], [[8], [9], [], [13, 14, 15]]]] - arrow_tbl = pa.Table.from_arrays(test_data, names=["a", "b"]) + +@pytest.fixture +def scalar(): + return pa.scalar(1) + + +@pytest.fixture +def column(): + return pa.array([3, 2, 5, 6]), pa.array([-1, 0, 0, 0], type=pa.int32()) + + +def test_concatenate_rows(test_data): + arrow_tbl = pa.Table.from_arrays(test_data[0], names=["a", "b"]) plc_tbl = plc.interop.from_arrow(arrow_tbl) res = plc.lists.concatenate_rows(plc_tbl) - expect = pa.array([pair[0] + pair[1] for pair in zip(*test_data)]) + expect = pa.array([pair[0] + pair[1] for pair in zip(*test_data[0])]) assert_column_eq(expect, res) @@ -46,12 +59,88 @@ def test_concatenate_list_elements(test_data, dropna, expected): assert_column_eq(expect, res) -def test_count_elements(): - test_data = [[1, 2, 3], [4], [5, 6]] - arr = pa.array(test_data) +def test_contains_scalar(test_data, scalar): + list_column = test_data[0][0] + arr = pa.array(list_column) + + plc_column = plc.interop.from_arrow(arr) + plc_scalar = plc.interop.from_arrow(scalar) + res = plc.lists.contains(plc_column, plc_scalar) + + expect = pa.array([True, False, False, False]) + + assert_column_eq(expect, res) + + +def test_contains_list_column(test_data): + list_column1 = test_data[0][0] + list_column2 = [1, 3, 5, 1] + arr1 = pa.array(list_column1) + arr2 = pa.array(list_column2) + + plc_column1 = plc.interop.from_arrow(arr1) + plc_column2 = plc.interop.from_arrow(arr2) + res = plc.lists.contains(plc_column1, plc_column2) + + expect = pa.array([True, False, True, False]) + + assert_column_eq(expect, res) + + +@pytest.mark.parametrize( + "list_column, expected", + [ + ( + [[1, None], [1, 3, 4], [5, None]], + [True, False, True], + ), + ( + [[1, None], None, [5]], + [True, None, False], + ), + ], +) +def test_contains_nulls(list_column, expected): + arr = pa.array(list_column) + plc_column = plc.interop.from_arrow(arr) + res = plc.lists.contains_nulls(plc_column) + + expect = pa.array(expected) + + assert_column_eq(expect, res) + + +def test_index_of_scalar(test_data, scalar): + list_column = test_data[0][0] + arr = pa.array(list_column) + + plc_column = plc.interop.from_arrow(arr) + plc_scalar = plc.interop.from_arrow(scalar) + res = plc.lists.index_of(plc_column, plc_scalar, True) + + expect = pa.array([1, -1, -1, -1], type=pa.int32()) + + assert_column_eq(expect, res) + + +def test_index_of_list_column(test_data, column): + list_column = test_data[0][0] + arr1 = pa.array(list_column) + arr2, expect = column + plc_column1 = plc.interop.from_arrow(arr1) + plc_column2 = plc.interop.from_arrow(arr2) + res = plc.lists.index_of(plc_column1, plc_column2, True) + + expect = pa.array(column[1], type=pa.int32()) + + assert_column_eq(expect, res) + + +def test_count_elements(test_data): + arr = pa.array(test_data[0][1]) plc_column = plc.interop.from_arrow(arr) - res = plc.lists.contains(plc_column) + res = plc.lists.count_elements(plc_column) - expect = pa.array([3, 1, 2]) + expect = pa.array([1, 1, 0, 3], type=pa.int32()) assert_column_eq(expect, res) diff --git a/python/cudf/cudf/pylibcudf_tests/test_string_slice.py b/python/cudf/cudf/pylibcudf_tests/test_string_slice.py new file mode 100644 index 00000000000..bd63987b30f --- /dev/null +++ b/python/cudf/cudf/pylibcudf_tests/test_string_slice.py @@ -0,0 +1,116 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import pyarrow as pa +import pytest +from utils import assert_column_eq + +import cudf._lib.pylibcudf as plc + + +@pytest.fixture(scope="module") +def pa_col(): + return pa.array(["AbC", "123abc", "", " ", None]) + + +@pytest.fixture(scope="module") +def plc_col(pa_col): + return plc.interop.from_arrow(pa_col) + + +@pytest.fixture( + scope="module", + params=[(1, 3, 1), (0, 3, -1), (3, 2, 1), (1, 5, 5), (1, 100, 2)], +) +def pa_start_stop_step(request): + return tuple(pa.scalar(x, type=pa.int32()) for x in request.param) + + +@pytest.fixture(scope="module") +def plc_start_stop_step(pa_start_stop_step): + return tuple(plc.interop.from_arrow(x) for x in pa_start_stop_step) + + +@pytest.fixture(scope="module") +def pa_starts_col(): + return pa.array([0, 1, 3, -1, 100]) + + +@pytest.fixture(scope="module") +def plc_starts_col(pa_starts_col): + return plc.interop.from_arrow(pa_starts_col) + + +@pytest.fixture(scope="module") +def pa_stops_col(): + return pa.array([1, 3, 4, -1, 100]) + + +@pytest.fixture(scope="module") +def plc_stops_col(pa_stops_col): + return plc.interop.from_arrow(pa_stops_col) + + +def test_slice(pa_col, plc_col, pa_start_stop_step, plc_start_stop_step): + pa_start, pa_stop, pa_step = pa_start_stop_step + plc_start, plc_stop, plc_step = plc_start_stop_step + + def slice_string(st, start, stop, step): + return st[start:stop:step] if st is not None else None + + expected = pa.array( + [ + slice_string(x, pa_start.as_py(), pa_stop.as_py(), pa_step.as_py()) + for x in pa_col.to_pylist() + ], + type=pa.string(), + ) + + got = plc.strings.slice.slice_strings( + plc_col, start=plc_start, stop=plc_stop, step=plc_step + ) + + assert_column_eq(expected, got) + + +def test_slice_column( + pa_col, plc_col, pa_starts_col, plc_starts_col, pa_stops_col, plc_stops_col +): + def slice_string(st, start, stop): + if stop < 0: + stop = len(st) + return st[start:stop] if st is not None else None + + expected = pa.array( + [ + slice_string(x, start, stop) + for x, start, stop in zip( + pa_col.to_pylist(), + pa_starts_col.to_pylist(), + pa_stops_col.to_pylist(), + ) + ], + type=pa.string(), + ) + + got = plc.strings.slice.slice_strings( + plc_col, plc_starts_col, plc_stops_col + ) + + assert_column_eq(expected, got) + + +def test_slice_invalid(plc_col, plc_starts_col, plc_stops_col): + with pytest.raises(TypeError): + # no maching signature + plc.strings.slice.slice_strings(None, pa_starts_col, pa_stops_col) + with pytest.raises(ValueError): + # signature found but wrong value passed + plc.strings.slice.slice_strings(plc_col, plc_starts_col, None) + with pytest.raises(TypeError): + # no matching signature (2nd arg) + plc.strings.slice.slice_strings(plc_col, None, plc_stops_col) + with pytest.raises(TypeError): + # can't provide step for columnwise api + plc.strings.slice.slice_strings( + plc_col, plc_starts_col, plc_stops_col, plc_starts_col + ) diff --git a/python/cudf/cudf/pylibcudf_tests/test_table.py b/python/cudf/cudf/pylibcudf_tests/test_table.py new file mode 100644 index 00000000000..cf1d51f6491 --- /dev/null +++ b/python/cudf/cudf/pylibcudf_tests/test_table.py @@ -0,0 +1,22 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import pyarrow as pa +import pytest + +import cudf._lib.pylibcudf as plc + + +@pytest.mark.parametrize( + "arrow_tbl", + [ + pa.table([]), + pa.table({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}), + pa.table({"a": [1, 2, 3]}), + pa.table({"a": [1], "b": [2], "c": [3]}), + ], +) +def test_table_shape(arrow_tbl): + plc_tbl = plc.interop.from_arrow(arrow_tbl) + + plc_tbl_shape = (plc_tbl.num_rows(), plc_tbl.num_columns()) + assert plc_tbl_shape == arrow_tbl.shape diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 05ee8346afa..f40106a30f4 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -10024,6 +10024,14 @@ def test_dataframe_rename_duplicate_column(): gdf.rename(columns={"a": "b"}, inplace=True) +def test_dataframe_rename_columns_keep_type(): + gdf = cudf.DataFrame([[1, 2, 3]]) + gdf.columns = cudf.Index([4, 5, 6], dtype=np.int8) + result = gdf.rename({4: 50}, axis="columns").columns + expected = pd.Index([50, 5, 6], dtype=np.int8) + assert_eq(result, expected) + + @pytest_unmark_spilling @pytest.mark.skipif( PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, @@ -11070,3 +11078,27 @@ def test_dataframe_loc_int_float(dtype1, dtype2): expected = pdf.loc[pidx] assert_eq(actual, expected, check_index_type=True, check_dtype=True) + + +@pytest.mark.parametrize( + "data", + [ + cudf.DataFrame(range(2)), + None, + [cudf.Series(range(2))], + [[0], [1]], + {1: range(2)}, + cupy.arange(2), + ], +) +def test_init_with_index_no_shallow_copy(data): + idx = cudf.RangeIndex(2) + df = cudf.DataFrame(data, index=idx) + assert df.index is idx + + +def test_from_records_with_index_no_shallow_copy(): + idx = cudf.RangeIndex(2) + data = np.array([(1.0, 2), (3.0, 4)], dtype=[("x", " 0 + + out = StringIO() + print_memory_report(file=out) + assert "DataFrame.merge" in out.getvalue() diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py index 8f65bd26bd1..193d64a9e7f 100644 --- a/python/cudf/cudf/tests/test_repr.py +++ b/python/cudf/cudf/tests/test_repr.py @@ -1210,7 +1210,7 @@ def test_multiindex_repr(pmi, max_seq_items): .index, textwrap.dedent( """ - MultiIndex([('abc', 'NaT', 0.345), + MultiIndex([('abc', NaT, 0.345), ( , '0 days 00:00:00.000000001', ), ('xyz', '0 days 00:00:00.000000002', 100.0), ( , '0 days 00:00:00.000000003', 10.0)], @@ -1252,10 +1252,10 @@ def test_multiindex_repr(pmi, max_seq_items): .index, textwrap.dedent( """ - MultiIndex([('NaT', ), - ('NaT', ), - ('NaT', ), - ('NaT', )], + MultiIndex([(NaT, ), + (NaT, ), + (NaT, ), + (NaT, )], names=['b', 'a']) """ ), diff --git a/python/cudf/cudf/utils/nvtx_annotation.py b/python/cudf/cudf/utils/nvtx_annotation.py deleted file mode 100644 index a4404e51232..00000000000 --- a/python/cudf/cudf/utils/nvtx_annotation.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. - -import hashlib -from functools import partial - -from nvtx import annotate - -_NVTX_COLORS = ["green", "blue", "purple", "rapids"] - - -def _get_color_for_nvtx(name): - m = hashlib.sha256() - m.update(name.encode()) - hash_value = int(m.hexdigest(), 16) - idx = hash_value % len(_NVTX_COLORS) - return _NVTX_COLORS[idx] - - -def _cudf_nvtx_annotate(func, domain="cudf_python"): - """Decorator for applying nvtx annotations to methods in cudf.""" - return annotate( - message=func.__qualname__, - color=_get_color_for_nvtx(func.__qualname__), - domain=domain, - )(func) - - -_dask_cudf_nvtx_annotate = partial( - _cudf_nvtx_annotate, domain="dask_cudf_python" -) diff --git a/python/cudf/cudf/utils/performance_tracking.py b/python/cudf/cudf/utils/performance_tracking.py new file mode 100644 index 00000000000..30c891d0d5a --- /dev/null +++ b/python/cudf/cudf/utils/performance_tracking.py @@ -0,0 +1,82 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from __future__ import annotations + +import contextlib +import functools +import hashlib +import sys + +import nvtx + +import rmm.statistics + +from cudf.options import get_option + +_NVTX_COLORS = ["green", "blue", "purple", "rapids"] + + +def _get_color_for_nvtx(name): + m = hashlib.sha256() + m.update(name.encode()) + hash_value = int(m.hexdigest(), 16) + idx = hash_value % len(_NVTX_COLORS) + return _NVTX_COLORS[idx] + + +def _performance_tracking(func, domain="cudf_python"): + """Decorator for applying performance tracking (if enabled).""" + + @functools.wraps(func) + def wrapper(*args, **kwargs): + with contextlib.ExitStack() as stack: + if get_option("memory_profiling"): + # NB: the user still needs to call `rmm.statistics.enable_statistics()` + # to enable memory profiling. + stack.enter_context( + rmm.statistics.profiler( + name=rmm.statistics._get_descriptive_name_of_object( + func + ) + ) + ) + if nvtx.enabled(): + stack.enter_context( + nvtx.annotate( + message=func.__qualname__, + color=_get_color_for_nvtx(func.__qualname__), + domain=domain, + ) + ) + return func(*args, **kwargs) + + return wrapper + + +_dask_cudf_performance_tracking = functools.partial( + _performance_tracking, domain="dask_cudf_python" +) + + +def get_memory_records() -> ( + dict[str, rmm.statistics.ProfilerRecords.MemoryRecord] +): + """Get the memory records from the memory profiling + + Returns + ------- + Dict that maps function names to memory records. Empty if + memory profiling is disabled + """ + return rmm.statistics.default_profiler_records.records + + +def print_memory_report(file=sys.stdout) -> None: + """Pretty print the result of the memory profiling + + Parameters + ---------- + file + The output stream + """ + print(rmm.statistics.default_profiler_records.report(), file=file) diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py index 2e4dfc4bb14..7347ec7866a 100644 --- a/python/cudf/cudf/utils/utils.py +++ b/python/cudf/cudf/utils/utils.py @@ -159,8 +159,9 @@ def _external_only_api(func, alternative=""): @functools.wraps(func) def wrapper(*args, **kwargs): # Check the immediately preceding frame to see if it's in cudf. - frame, lineno = next(traceback.walk_stack(None)) - fn = frame.f_code.co_filename + pre_frame = traceback.extract_stack(limit=2)[0] + fn = pre_frame.filename + lineno = pre_frame.lineno if _cudf_root in fn and _tests_root not in fn: raise RuntimeError( f"External-only API called in {fn} at line {lineno}. " diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index eed5037cbea..f51ce103677 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1533,3 +1533,36 @@ def test_is_proxy_object(): assert is_proxy_object(np_arr_proxy) assert is_proxy_object(s1) assert not is_proxy_object(s2) + + +def test_numpy_cupy_flatiter(series): + cp = pytest.importorskip("cupy") + + _, s = series + arr = s.values + + assert type(arr.flat._fsproxy_fast) == cp.flatiter + assert type(arr.flat._fsproxy_slow) == np.flatiter + + +def test_arrow_string_arrays(): + cu_s = xpd.Series(["a", "b", "c"]) + pd_s = pd.Series(["a", "b", "c"]) + + cu_arr = xpd.arrays.ArrowStringArray._from_sequence( + cu_s, dtype=xpd.StringDtype("pyarrow") + ) + pd_arr = pd.arrays.ArrowStringArray._from_sequence( + pd_s, dtype=pd.StringDtype("pyarrow") + ) + + tm.assert_equal(cu_arr, pd_arr) + + cu_arr = xpd.core.arrays.string_arrow.ArrowStringArray._from_sequence( + cu_s, dtype=xpd.StringDtype("pyarrow_numpy") + ) + pd_arr = pd.core.arrays.string_arrow.ArrowStringArray._from_sequence( + pd_s, dtype=pd.StringDtype("pyarrow_numpy") + ) + + tm.assert_equal(cu_arr, pd_arr) diff --git a/python/cudf/udf_cpp/strings/src/strings/udf/udf_apis.cu b/python/cudf/udf_cpp/strings/src/strings/udf/udf_apis.cu index 941e61e6787..b924995cf4b 100644 --- a/python/cudf/udf_cpp/strings/src/strings/udf/udf_apis.cu +++ b/python/cudf/udf_cpp/strings/src/strings/udf/udf_apis.cu @@ -15,10 +15,10 @@ */ #include -#include #include #include #include +#include #include #include @@ -57,7 +57,7 @@ std::unique_ptr to_string_view_array(cudf::column_view const rmm::cuda_stream_view stream) { return std::make_unique( - std::move(cudf::strings::detail::create_string_vector_from_column( + std::move(cudf::strings::create_string_vector_from_column( cudf::strings_column_view(input), stream, rmm::mr::get_current_device_resource()) .release())); } diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py index 871134665af..17d7d15e4e5 100644 --- a/python/cudf_polars/cudf_polars/dsl/expr.py +++ b/python/cudf_polars/cudf_polars/dsl/expr.py @@ -51,6 +51,7 @@ "GroupedRollingWindow", "Cast", "Agg", + "Ternary", "BinOp", ] @@ -443,12 +444,12 @@ def __init__( ): # With ignore_nulls == False, polars uses Kleene logic raise NotImplementedError(f"Kleene logic for {self.name}") - if self.name in ( - pl_expr.BooleanFunction.IsFinite, - pl_expr.BooleanFunction.IsInfinite, - pl_expr.BooleanFunction.IsIn, + if self.name == pl_expr.BooleanFunction.IsIn and not all( + c.dtype == self.children[0].dtype for c in self.children ): - raise NotImplementedError(f"{self.name}") + # TODO: If polars IR doesn't put the casts in, we need to + # mimic the supertype promotion rules. + raise NotImplementedError("IsIn doesn't support supertype casting") @staticmethod def _distinct( @@ -506,6 +507,33 @@ def do_evaluate( mapping: Mapping[Expr, Column] | None = None, ) -> Column: """Evaluate this expression given a dataframe for context.""" + if self.name in ( + pl_expr.BooleanFunction.IsFinite, + pl_expr.BooleanFunction.IsInfinite, + ): + # Avoid evaluating the child if the dtype tells us it's unnecessary. + (child,) = self.children + is_finite = self.name == pl_expr.BooleanFunction.IsFinite + if child.dtype.id() not in (plc.TypeId.FLOAT32, plc.TypeId.FLOAT64): + value = plc.interop.from_arrow( + pa.scalar(value=is_finite, type=plc.interop.to_arrow(self.dtype)) + ) + return Column(plc.Column.from_scalar(value, df.num_rows)) + needles = child.evaluate(df, context=context, mapping=mapping) + to_search = [-float("inf"), float("inf")] + if is_finite: + # NaN is neither finite not infinite + to_search.append(float("nan")) + haystack = plc.interop.from_arrow( + pa.array( + to_search, + type=plc.interop.to_arrow(needles.obj.type()), + ) + ) + result = plc.search.contains(haystack, needles.obj) + if is_finite: + result = plc.unary.unary_operation(result, plc.unary.UnaryOperator.NOT) + return Column(result) columns = [ child.evaluate(df, context=context, mapping=mapping) for child in self.children @@ -612,31 +640,13 @@ def do_evaluate( (c.obj for c in columns), ) ) - elif self.name == pl_expr.BooleanFunction.IsBetween: - column, lo, hi = columns - (closed,) = self.options - lop, rop = self._BETWEEN_OPS[closed] - lo_obj = ( - lo.obj_scalar - if lo.is_scalar and lo.obj.size() != column.obj.size() - else lo.obj - ) - hi_obj = ( - hi.obj_scalar - if hi.is_scalar and hi.obj.size() != column.obj.size() - else hi.obj - ) + elif self.name == pl_expr.BooleanFunction.IsIn: + needles, haystack = columns + return Column(plc.search.contains(haystack.obj, needles.obj)) + elif self.name == pl_expr.BooleanFunction.Not: + (column,) = columns return Column( - plc.binaryop.binary_operation( - plc.binaryop.binary_operation( - column.obj, lo_obj, lop, output_type=self.dtype - ), - plc.binaryop.binary_operation( - column.obj, hi_obj, rop, output_type=self.dtype - ), - plc.binaryop.BinaryOperator.LOGICAL_AND, - self.dtype, - ) + plc.unary.unary_operation(column.obj, plc.unary.UnaryOperator.NOT) ) else: raise NotImplementedError( @@ -1103,6 +1113,34 @@ def do_evaluate( return self.op(child.evaluate(df, context=context, mapping=mapping)) +class Ternary(Expr): + __slots__ = ("children",) + _non_child = ("dtype",) + children: tuple[Expr, Expr, Expr] + + def __init__( + self, dtype: plc.DataType, when: Expr, then: Expr, otherwise: Expr + ) -> None: + super().__init__(dtype) + self.children = (when, then, otherwise) + + def do_evaluate( + self, + df: DataFrame, + *, + context: ExecutionContext = ExecutionContext.FRAME, + mapping: Mapping[Expr, Column] | None = None, + ) -> Column: + """Evaluate this expression given a dataframe for context.""" + when, then, otherwise = ( + child.evaluate(df, context=context, mapping=mapping) + for child in self.children + ) + then_obj = then.obj_scalar if then.is_scalar else then.obj + otherwise_obj = otherwise.obj_scalar if otherwise.is_scalar else otherwise.obj + return Column(plc.copying.copy_if_else(then_obj, otherwise_obj, when.obj)) + + class BinOp(Expr): __slots__ = ("op", "children") _non_child = ("dtype", "op") diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py index b3dd6ae7cc3..3f5f3c74050 100644 --- a/python/cudf_polars/cudf_polars/dsl/ir.py +++ b/python/cudf_polars/cudf_polars/dsl/ir.py @@ -123,7 +123,7 @@ def broadcast( ] -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class IR: """Abstract plan node, representing an unevaluated dataframe.""" @@ -157,7 +157,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: ) # pragma: no cover -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class PythonScan(IR): """Representation of input from a python function.""" @@ -171,7 +171,7 @@ def __post_init__(self): raise NotImplementedError("PythonScan not implemented") -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class Scan(IR): """Input from files.""" @@ -248,7 +248,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return df.filter(mask) -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class Cache(IR): """ Return a cached plan node. @@ -269,7 +269,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return cache.setdefault(self.key, self.value.evaluate(cache=cache)) -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class DataFrameScan(IR): """ Input from an existing polars DataFrame. @@ -315,7 +315,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return df -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class Select(IR): """Produce a new dataframe selecting given expressions from an input.""" @@ -336,7 +336,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return DataFrame(columns) -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class Reduce(IR): """ Produce a new dataframe selecting given expressions from an input. @@ -389,7 +389,7 @@ def placeholder_column(n: int) -> plc.Column: ) -@dataclasses.dataclass(slots=False) +@dataclasses.dataclass class GroupBy(IR): """Perform a groupby.""" @@ -490,7 +490,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return DataFrame([*result_keys, *results]).slice(self.options.slice) -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class Join(IR): """A join of two dataframes.""" @@ -503,7 +503,7 @@ class Join(IR): right_on: list[expr.NamedExpr] """List of expressions used as keys in the right frame.""" options: tuple[ - Literal["inner", "left", "full", "leftsemi", "leftanti"], + Literal["inner", "left", "full", "leftsemi", "leftanti", "cross"], bool, tuple[int, int] | None, str | None, @@ -520,11 +520,14 @@ class Join(IR): def __post_init__(self) -> None: """Validate preconditions.""" - if self.options[0] == "cross": - raise NotImplementedError("cross join not implemented") + if any( + isinstance(e.value, expr.Literal) + for e in itertools.chain(self.left_on, self.right_on) + ): + raise NotImplementedError("Join with literal as join key.") - @cache @staticmethod + @cache def _joiners( how: Literal["inner", "left", "full", "leftsemi", "leftanti"], ) -> tuple[ @@ -567,35 +570,42 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" left = self.left.evaluate(cache=cache) right = self.right.evaluate(cache=cache) - left_on = DataFrame( - broadcast( - *(e.evaluate(left) for e in self.left_on), target_length=left.num_rows - ) - ) - right_on = DataFrame( - broadcast( - *(e.evaluate(right) for e in self.right_on), - target_length=right.num_rows, - ) - ) how, join_nulls, zlice, suffix, coalesce = self.options + suffix = "_right" if suffix is None else suffix + if how == "cross": + # Separate implementation, since cross_join returns the + # result, not the gather maps + columns = plc.join.cross_join(left.table, right.table).columns() + left_cols = [ + NamedColumn(new, old.name).sorted_like(old) + for new, old in zip(columns[: left.num_columns], left.columns) + ] + right_cols = [ + NamedColumn( + new, + old.name + if old.name not in left.column_names_set + else f"{old.name}{suffix}", + ) + for new, old in zip(columns[left.num_columns :], right.columns) + ] + return DataFrame([*left_cols, *right_cols]) + # TODO: Waiting on clarity based on https://github.com/pola-rs/polars/issues/17184 + left_on = DataFrame(broadcast(*(e.evaluate(left) for e in self.left_on))) + right_on = DataFrame(broadcast(*(e.evaluate(right) for e in self.right_on))) null_equality = ( plc.types.NullEquality.EQUAL if join_nulls else plc.types.NullEquality.UNEQUAL ) - suffix = "_right" if suffix is None else suffix join_fn, left_policy, right_policy = Join._joiners(how) if right_policy is None: # Semi join lg = join_fn(left_on.table, right_on.table, null_equality) - left = left.replace_columns(*left_on.columns) table = plc.copying.gather(left.table, lg, left_policy) result = DataFrame.from_table(table, left.column_names) else: lg, rg = join_fn(left_on.table, right_on.table, null_equality) - left = left.replace_columns(*left_on.columns) - right = right.replace_columns(*right_on.columns) if coalesce and how == "inner": right = right.discard_columns(right_on.column_names_set) left = DataFrame.from_table( @@ -629,7 +639,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return result.slice(zlice) -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class HStack(IR): """Add new columns to a dataframe.""" @@ -658,7 +668,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return df.with_columns(columns) -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class Distinct(IR): """Produce a new dataframe with distinct rows.""" @@ -728,7 +738,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return result.slice(self.zlice) -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class Sort(IR): """Sort a dataframe.""" @@ -797,7 +807,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return DataFrame(columns).slice(self.zlice) -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class Slice(IR): """Slice a dataframe.""" @@ -814,7 +824,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return df.slice((self.offset, self.length)) -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class Filter(IR): """Filter a dataframe with a boolean mask.""" @@ -830,7 +840,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return df.filter(mask) -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class Projection(IR): """Select a subset of columns from a dataframe.""" @@ -847,7 +857,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return DataFrame(columns) -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class MapFunction(IR): """Apply some function to a dataframe.""" @@ -881,6 +891,13 @@ def __post_init__(self) -> None: # polars requires that all to-explode columns have the # same sub-shapes raise NotImplementedError("Explode with more than one column") + elif self.name == "rename": + old, new, _ = self.options + # TODO: perhaps polars should validate renaming in the IR? + if len(new) != len(set(new)) or ( + set(new) & (set(self.df.schema.keys() - set(old))) + ): + raise NotImplementedError("Duplicate new names in rename.") def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" @@ -906,7 +923,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: raise AssertionError("Should never be reached") # pragma: no cover -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class Union(IR): """Concatenate dataframes vertically.""" @@ -930,7 +947,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: ).slice(self.zlice) -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class HConcat(IR): """Concatenate dataframes horizontally.""" diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py index 5d289885f47..953ff636cce 100644 --- a/python/cudf_polars/cudf_polars/dsl/translate.py +++ b/python/cudf_polars/cudf_polars/dsl/translate.py @@ -342,6 +342,16 @@ def _(node: pl_expr.Function, visitor: NodeTraverser, dtype: plc.DataType) -> ex *(translate_expr(visitor, n=n) for n in node.input), ) elif isinstance(name, pl_expr.BooleanFunction): + if name == pl_expr.BooleanFunction.IsBetween: + column, lo, hi = (translate_expr(visitor, n=n) for n in node.input) + (closed,) = options + lop, rop = expr.BooleanFunction._BETWEEN_OPS[closed] + return expr.BinOp( + dtype, + plc.binaryop.BinaryOperator.LOGICAL_AND, + expr.BinOp(dtype, lop, column, lo), + expr.BinOp(dtype, rop, column, hi), + ) return expr.BooleanFunction( dtype, name, @@ -436,6 +446,16 @@ def _(node: pl_expr.Agg, visitor: NodeTraverser, dtype: plc.DataType) -> expr.Ex ) +@_translate_expr.register +def _(node: pl_expr.Ternary, visitor: NodeTraverser, dtype: plc.DataType) -> expr.Expr: + return expr.Ternary( + dtype, + translate_expr(visitor, n=node.predicate), + translate_expr(visitor, n=node.truthy), + translate_expr(visitor, n=node.falsy), + ) + + @_translate_expr.register def _( node: pl_expr.BinaryExpr, visitor: NodeTraverser, dtype: plc.DataType diff --git a/python/cudf_polars/cudf_polars/typing/__init__.py b/python/cudf_polars/cudf_polars/typing/__init__.py index 6d597a91724..c04eac41bb7 100644 --- a/python/cudf_polars/cudf_polars/typing/__init__.py +++ b/python/cudf_polars/cudf_polars/typing/__init__.py @@ -6,7 +6,7 @@ from __future__ import annotations from collections.abc import Mapping -from typing import TYPE_CHECKING, Literal, Protocol, TypeAlias +from typing import TYPE_CHECKING, Literal, Protocol, Union from polars.polars import _expr_nodes as pl_expr, _ir_nodes as pl_ir @@ -15,43 +15,45 @@ if TYPE_CHECKING: from typing import Callable + from typing_extensions import TypeAlias + import polars as pl -IR: TypeAlias = ( - pl_ir.PythonScan - | pl_ir.Scan - | pl_ir.Cache - | pl_ir.DataFrameScan - | pl_ir.Select - | pl_ir.GroupBy - | pl_ir.Join - | pl_ir.HStack - | pl_ir.Distinct - | pl_ir.Sort - | pl_ir.Slice - | pl_ir.Filter - | pl_ir.SimpleProjection - | pl_ir.MapFunction - | pl_ir.Union - | pl_ir.HConcat - | pl_ir.ExtContext -) - -Expr: TypeAlias = ( - pl_expr.Function - | pl_expr.Window - | pl_expr.Literal - | pl_expr.Sort - | pl_expr.SortBy - | pl_expr.Gather - | pl_expr.Filter - | pl_expr.Cast - | pl_expr.Column - | pl_expr.Agg - | pl_expr.BinaryExpr - | pl_expr.Len - | pl_expr.PyExprIR -) +IR: TypeAlias = Union[ + pl_ir.PythonScan, + pl_ir.Scan, + pl_ir.Cache, + pl_ir.DataFrameScan, + pl_ir.Select, + pl_ir.GroupBy, + pl_ir.Join, + pl_ir.HStack, + pl_ir.Distinct, + pl_ir.Sort, + pl_ir.Slice, + pl_ir.Filter, + pl_ir.SimpleProjection, + pl_ir.MapFunction, + pl_ir.Union, + pl_ir.HConcat, + pl_ir.ExtContext, +] + +Expr: TypeAlias = Union[ + pl_expr.Function, + pl_expr.Window, + pl_expr.Literal, + pl_expr.Sort, + pl_expr.SortBy, + pl_expr.Gather, + pl_expr.Filter, + pl_expr.Cast, + pl_expr.Column, + pl_expr.Agg, + pl_expr.BinaryExpr, + pl_expr.Len, + pl_expr.PyExprIR, +] Schema: TypeAlias = Mapping[str, plc.DataType] diff --git a/python/cudf_polars/tests/expressions/test_agg.py b/python/cudf_polars/tests/expressions/test_agg.py index 2ffa1c4af6d..267d0a99692 100644 --- a/python/cudf_polars/tests/expressions/test_agg.py +++ b/python/cudf_polars/tests/expressions/test_agg.py @@ -52,7 +52,7 @@ def test_agg(df, agg): # https://github.com/rapidsai/cudf/issues/15852 check_dtypes = agg not in {"n_unique", "median"} - if not check_dtypes and q.schema["a"] != pl.Float64: + if not check_dtypes and q.collect_schema()["a"] != pl.Float64: with pytest.raises(AssertionError): assert_gpu_result_equal(q) assert_gpu_result_equal(q, check_dtypes=check_dtypes, check_exact=False) @@ -65,7 +65,7 @@ def test_agg(df, agg): ) @pytest.mark.parametrize("op", ["min", "max"]) def test_agg_float_with_nans(propagate_nans, op): - df = pl.LazyFrame({"a": [1, 2, float("nan")]}) + df = pl.LazyFrame({"a": pl.Series([1, 2, float("nan")], dtype=pl.Float64())}) op = getattr(pl.Expr, f"nan_{op}" if propagate_nans else op) q = df.select(op(pl.col("a"))) diff --git a/python/cudf_polars/tests/expressions/test_booleanfunction.py b/python/cudf_polars/tests/expressions/test_booleanfunction.py index 951b749e670..97421008669 100644 --- a/python/cudf_polars/tests/expressions/test_booleanfunction.py +++ b/python/cudf_polars/tests/expressions/test_booleanfunction.py @@ -6,7 +6,10 @@ import polars as pl -from cudf_polars.testing.asserts import assert_gpu_result_equal +from cudf_polars.testing.asserts import ( + assert_gpu_result_equal, + assert_ir_translation_raises, +) @pytest.fixture(params=[False, True], ids=["no_nulls", "nulls"]) @@ -26,7 +29,7 @@ def has_nulls(request): def test_booleanfunction_reduction(ignore_nulls): ldf = pl.LazyFrame( { - "a": [1, 2, 3.0, 2, 5], + "a": pl.Series([1, 2, 3.0, 2, 5], dtype=pl.Float64()), "b": [0, 3, 1, -1, None], "c": [1, 6, 5, 3, 2], } @@ -67,22 +70,27 @@ def test_boolean_function_unary(request, expr, has_nans, has_nulls): df = pl.LazyFrame({"a": pl.Series(values, dtype=pl.Float32())}) - q = df.select(expr(pl.col("a"))) + q = df.select(expr(pl.col("a")), expr(pl.col("a")).not_().alias("b")) assert_gpu_result_equal(q) -@pytest.mark.xfail(reason="Evaluation handlers not yet implemented") @pytest.mark.parametrize( "expr", [ pl.col("a").is_finite(), pl.col("a").is_infinite(), - pl.col("a").is_in(pl.col("b")), + [pl.col("a").is_infinite(), pl.col("b").is_finite()], ], ) -def test_unsupported_boolean_function(expr): - df = pl.LazyFrame({"a": [1, float("nan"), 2, 4], "b": [1, 2, 3, 4]}) +def test_boolean_finite(expr): + df = pl.LazyFrame( + { + "a": pl.Series([1, float("nan"), 2, float("inf")], dtype=pl.Float64()), + "b": [1, 2, 3, 4], + "c": pl.Series([1, 2, 3, 4], dtype=pl.Float64()), + } + ) q = df.select(expr) @@ -95,7 +103,11 @@ def test_unsupported_boolean_function(expr): ) def test_boolean_isbetween(closed, bounds): df = pl.LazyFrame( - {"a": [1, float("nan"), 2, 4], "lo": [1, 2, 2, 3], "hi": [10, 4, 2, 4]} + { + "a": pl.Series([1, float("nan"), 2, 4], dtype=pl.Float32()), + "lo": [1, 2, 2, 3], + "hi": [10, 4, 2, 4], + } ) q = df.select(pl.col("a").is_between(*bounds, closed=closed)) @@ -127,3 +139,33 @@ def test_boolean_horizontal(request, expr, has_nulls, wide): q = ldf.select(expr) assert_gpu_result_equal(q) + + +@pytest.mark.parametrize( + "expr", + [ + pl.col("a").is_in(pl.col("b")), + pl.col("a").is_in(pl.col("c")), + pl.col("c").is_in(pl.col("d")), + ], +) +def test_boolean_is_in(expr): + ldf = pl.LazyFrame( + { + "a": pl.Series([1, 2, 3], dtype=pl.Int64()), + "b": pl.Series([3, 4, 2], dtype=pl.Int64()), + "c": pl.Series([1, None, 3], dtype=pl.Int64()), + "d": pl.Series([10, None, 11], dtype=pl.Int64()), + } + ) + + q = ldf.select(expr) + + assert_gpu_result_equal(q) + + +def test_boolean_is_in_raises_unsupported(): + ldf = pl.LazyFrame({"a": pl.Series([1, 2, 3], dtype=pl.Int64)}) + q = ldf.select(pl.col("a").is_in(pl.lit(1, dtype=pl.Int32()))) + + assert_ir_translation_raises(q, NotImplementedError) diff --git a/python/cudf_polars/tests/expressions/test_rolling.py b/python/cudf_polars/tests/expressions/test_rolling.py index d4920d35f14..992efe0ba79 100644 --- a/python/cudf_polars/tests/expressions/test_rolling.py +++ b/python/cudf_polars/tests/expressions/test_rolling.py @@ -3,11 +3,9 @@ from __future__ import annotations -import pytest - import polars as pl -from cudf_polars import translate_ir +from cudf_polars.testing.asserts import assert_ir_translation_raises def test_rolling(): @@ -29,13 +27,13 @@ def test_rolling(): min_a=pl.min("a").rolling(index_column="dt", period="2d"), max_a=pl.max("a").rolling(index_column="dt", period="2d"), ) - with pytest.raises(NotImplementedError): - _ = translate_ir(q._ldf.visit()) + + assert_ir_translation_raises(q, NotImplementedError) def test_grouped_rolling(): df = pl.LazyFrame({"a": [1, 2, 3, 4, 5, 6], "b": [1, 2, 1, 3, 1, 2]}) q = df.select(pl.col("a").min().over("b")) - with pytest.raises(NotImplementedError): - _ = translate_ir(q._ldf.visit()) + + assert_ir_translation_raises(q, NotImplementedError) diff --git a/python/cudf_polars/tests/expressions/test_stringfunction.py b/python/cudf_polars/tests/expressions/test_stringfunction.py index 3c498fe7286..9729e765948 100644 --- a/python/cudf_polars/tests/expressions/test_stringfunction.py +++ b/python/cudf_polars/tests/expressions/test_stringfunction.py @@ -8,8 +8,11 @@ import polars as pl -from cudf_polars import execute_with_cudf, translate_ir -from cudf_polars.testing.asserts import assert_gpu_result_equal +from cudf_polars import execute_with_cudf +from cudf_polars.testing.asserts import ( + assert_gpu_result_equal, + assert_ir_translation_raises, +) @pytest.fixture @@ -47,22 +50,19 @@ def test_supported_stringfunction_expression(ldf): def test_unsupported_stringfunction(ldf): q = ldf.select(pl.col("a").str.count_matches("e", literal=True)) - with pytest.raises(NotImplementedError): - _ = translate_ir(q._ldf.visit()) + assert_ir_translation_raises(q, NotImplementedError) def test_contains_re_non_strict_raises(ldf): q = ldf.select(pl.col("a").str.contains(".", strict=False)) - with pytest.raises(NotImplementedError): - _ = translate_ir(q._ldf.visit()) + assert_ir_translation_raises(q, NotImplementedError) def test_contains_re_non_literal_raises(ldf): q = ldf.select(pl.col("a").str.contains(pl.col("b"), literal=False)) - with pytest.raises(NotImplementedError): - _ = translate_ir(q._ldf.visit()) + assert_ir_translation_raises(q, NotImplementedError) @pytest.mark.parametrize( diff --git a/python/cudf_polars/tests/expressions/test_when_then.py b/python/cudf_polars/tests/expressions/test_when_then.py new file mode 100644 index 00000000000..cf1c0fe7fce --- /dev/null +++ b/python/cudf_polars/tests/expressions/test_when_then.py @@ -0,0 +1,27 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations + +import pytest + +import polars as pl + +from cudf_polars.testing.asserts import assert_gpu_result_equal + + +@pytest.mark.parametrize("then_scalar", [False, True]) +@pytest.mark.parametrize("otherwise_scalar", [False, True]) +@pytest.mark.parametrize("expr", [pl.col("c"), pl.col("c").is_not_null()]) +def test_when_then(then_scalar, otherwise_scalar, expr): + ldf = pl.LazyFrame( + { + "a": [1, 2, 3, 4, 5, 6, 7], + "b": [10, 13, 11, 15, 16, 11, 10], + "c": [None, True, False, False, True, True, False], + } + ) + + then = pl.lit(10) if then_scalar else pl.col("a") + otherwise = pl.lit(-2) if otherwise_scalar else pl.col("b") + q = ldf.select(pl.when(expr).then(then).otherwise(otherwise)) + assert_gpu_result_equal(q) diff --git a/python/cudf_polars/tests/test_groupby.py b/python/cudf_polars/tests/test_groupby.py index e70f923b097..aefad59eb91 100644 --- a/python/cudf_polars/tests/test_groupby.py +++ b/python/cudf_polars/tests/test_groupby.py @@ -6,8 +6,10 @@ import polars as pl -from cudf_polars import translate_ir -from cudf_polars.testing.asserts import assert_gpu_result_equal +from cudf_polars.testing.asserts import ( + assert_gpu_result_equal, + assert_ir_translation_raises, +) @pytest.fixture @@ -72,7 +74,7 @@ def test_groupby(df: pl.LazyFrame, maintain_order, keys, exprs): q = df.group_by(*keys, maintain_order=maintain_order).agg(*exprs) if not maintain_order: - sort_keys = list(q.schema.keys())[: len(keys)] + sort_keys = list(q.collect_schema().keys())[: len(keys)] q = q.sort(*sort_keys) assert_gpu_result_equal(q, check_exact=False) @@ -97,5 +99,4 @@ def test_groupby_len(df, keys): def test_groupby_unsupported(df, expr): q = df.group_by("key1").agg(expr) - with pytest.raises(NotImplementedError): - _ = translate_ir(q._ldf.visit()) + assert_ir_translation_raises(q, NotImplementedError) diff --git a/python/cudf_polars/tests/test_join.py b/python/cudf_polars/tests/test_join.py index f4a4704f3cc..89f6fd3455b 100644 --- a/python/cudf_polars/tests/test_join.py +++ b/python/cudf_polars/tests/test_join.py @@ -6,7 +6,10 @@ import polars as pl -from cudf_polars.testing.asserts import assert_gpu_result_equal +from cudf_polars.testing.asserts import ( + assert_gpu_result_equal, + assert_ir_translation_raises, +) @pytest.mark.parametrize( @@ -16,10 +19,6 @@ "left", "semi", "anti", - pytest.param( - "cross", - marks=pytest.mark.xfail(reason="cross join not implemented"), - ), "full", ], ) @@ -55,3 +54,34 @@ def test_join(how, coalesce, join_nulls, join_expr): right, on=join_expr, how=how, join_nulls=join_nulls, coalesce=coalesce ) assert_gpu_result_equal(query, check_row_order=False) + + +def test_cross_join(): + left = pl.DataFrame( + { + "a": [1, 2, 3, 1, None], + "b": [1, 2, 3, 4, 5], + "c": [2, 3, 4, 5, 6], + } + ).lazy() + right = pl.DataFrame( + { + "a": [1, 4, 3, 7, None, None], + "c": [2, 3, 4, 5, 6, 7], + } + ).lazy() + + q = left.join(right, how="cross") + + assert_gpu_result_equal(q) + + +@pytest.mark.parametrize( + "left_on,right_on", [(pl.col("a"), pl.lit(2)), (pl.lit(2), pl.col("a"))] +) +def test_join_literal_key_unsupported(left_on, right_on): + left = pl.LazyFrame({"a": [1, 2, 3], "b": [3, 4, 5]}) + right = pl.LazyFrame({"a": [1, 2, 3], "b": [5, 6, 7]}) + q = left.join(right, left_on=left_on, right_on=right_on, how="inner") + + assert_ir_translation_raises(q, NotImplementedError) diff --git a/python/cudf_polars/tests/test_mapfunction.py b/python/cudf_polars/tests/test_mapfunction.py index ec6b3f3fc0a..77032108e6f 100644 --- a/python/cudf_polars/tests/test_mapfunction.py +++ b/python/cudf_polars/tests/test_mapfunction.py @@ -6,8 +6,10 @@ import polars as pl -from cudf_polars import translate_ir -from cudf_polars.testing.asserts import assert_gpu_result_equal +from cudf_polars.testing.asserts import ( + assert_gpu_result_equal, + assert_ir_translation_raises, +) def test_merge_sorted_raises(): @@ -17,16 +19,14 @@ def test_merge_sorted_raises(): q = df1.merge_sorted(df2, key="a").merge_sorted(df3, key="a") - with pytest.raises(NotImplementedError): - _ = translate_ir(q._ldf.visit()) + assert_ir_translation_raises(q, NotImplementedError) def test_explode_multiple_raises(): df = pl.LazyFrame({"a": [[1, 2], [3, 4]], "b": [[5, 6], [7, 8]]}) q = df.explode("a", "b") - with pytest.raises(NotImplementedError): - _ = translate_ir(q._ldf.visit()) + assert_ir_translation_raises(q, NotImplementedError) @pytest.mark.parametrize("column", ["a", "b"]) @@ -41,3 +41,23 @@ def test_explode_single(column): q = df.explode(column) assert_gpu_result_equal(q) + + +@pytest.mark.parametrize("mapping", [{"b": "a"}, {"a": "c", "b": "c"}]) +def test_rename_duplicate_raises(mapping): + df = pl.LazyFrame({"a": [1, 2, 3], "b": [3, 4, 5]}) + + q = df.rename(mapping) + + assert_ir_translation_raises(q, NotImplementedError) + + +@pytest.mark.parametrize( + "mapping", [{}, {"b": "c"}, {"b": "a", "a": "b"}, {"a": "c", "b": "d"}] +) +def test_rename_columns(mapping): + df = pl.LazyFrame({"a": [1, 2, 3], "b": [3, 4, 5]}) + + q = df.rename(mapping) + + assert_gpu_result_equal(q) diff --git a/python/cudf_polars/tests/test_python_scan.py b/python/cudf_polars/tests/test_python_scan.py index c03474e3dc8..fd8453b77c4 100644 --- a/python/cudf_polars/tests/test_python_scan.py +++ b/python/cudf_polars/tests/test_python_scan.py @@ -2,11 +2,9 @@ # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations -import pytest - import polars as pl -from cudf_polars import translate_ir +from cudf_polars.testing.asserts import assert_ir_translation_raises def test_python_scan(): @@ -14,7 +12,6 @@ def source(with_columns, predicate, nrows): return pl.DataFrame({"a": pl.Series([1, 2, 3], dtype=pl.Int8())}) q = pl.LazyFrame._scan_python_function({"a": pl.Int8}, source, pyarrow=False) - with pytest.raises(NotImplementedError): - _ = translate_ir(q._ldf.visit()) + assert_ir_translation_raises(q, NotImplementedError) assert q.collect().equals(source(None, None, None)) diff --git a/python/cudf_polars/tests/test_union.py b/python/cudf_polars/tests/test_union.py index 6c9122bc260..b021d832910 100644 --- a/python/cudf_polars/tests/test_union.py +++ b/python/cudf_polars/tests/test_union.py @@ -2,12 +2,12 @@ # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations -import pytest - import polars as pl -from cudf_polars import translate_ir -from cudf_polars.testing.asserts import assert_gpu_result_equal +from cudf_polars.testing.asserts import ( + assert_gpu_result_equal, + assert_ir_translation_raises, +) def test_union(): @@ -31,8 +31,8 @@ def test_union_schema_mismatch_raises(): ).lazy() ldf2 = ldf.select(pl.col("a").cast(pl.Float32)) query = pl.concat([ldf, ldf2], how="diagonal") - with pytest.raises(NotImplementedError): - _ = translate_ir(query._ldf.visit()) + + assert_ir_translation_raises(query, NotImplementedError) def test_concat_vertical(): diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py index d250589e389..1f55a59ea55 100644 --- a/python/dask_cudf/dask_cudf/backends.py +++ b/python/dask_cudf/dask_cudf/backends.py @@ -43,7 +43,7 @@ import cudf from cudf.api.types import is_string_dtype -from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate +from cudf.utils.performance_tracking import _dask_cudf_performance_tracking from .core import DataFrame, Index, Series @@ -53,7 +53,7 @@ @meta_nonempty.register(cudf.BaseIndex) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _nonempty_index(idx): if isinstance(idx, cudf.core.index.RangeIndex): return cudf.core.index.RangeIndex(2, name=idx.name) @@ -100,7 +100,7 @@ def _nest_list_data(data, leaf_type): return data -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _get_non_empty_data(s): if isinstance(s, cudf.core.column.CategoricalColumn): categories = ( @@ -147,7 +147,7 @@ def _get_non_empty_data(s): @meta_nonempty.register(cudf.Series) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _nonempty_series(s, idx=None): if idx is None: idx = _nonempty_index(s.index) @@ -157,7 +157,7 @@ def _nonempty_series(s, idx=None): @meta_nonempty.register(cudf.DataFrame) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def meta_nonempty_cudf(x): idx = meta_nonempty(x.index) columns_with_dtype = dict() @@ -182,18 +182,18 @@ def meta_nonempty_cudf(x): @make_meta_dispatch.register((cudf.Series, cudf.DataFrame)) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def make_meta_cudf(x, index=None): return x.head(0) @make_meta_dispatch.register(cudf.BaseIndex) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def make_meta_cudf_index(x, index=None): return x[:0] -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _empty_series(name, dtype, index=None): if isinstance(dtype, str) and dtype == "category": return cudf.Series( @@ -203,7 +203,7 @@ def _empty_series(name, dtype, index=None): @make_meta_obj.register(object) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def make_meta_object_cudf(x, index=None): """Create an empty cudf object containing the desired metadata. @@ -274,7 +274,7 @@ def make_meta_object_cudf(x, index=None): @concat_dispatch.register((cudf.DataFrame, cudf.Series, cudf.BaseIndex)) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def concat_cudf( dfs, axis=0, @@ -299,13 +299,13 @@ def concat_cudf( @categorical_dtype_dispatch.register( (cudf.DataFrame, cudf.Series, cudf.BaseIndex) ) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def categorical_dtype_cudf(categories=None, ordered=False): return cudf.CategoricalDtype(categories=categories, ordered=ordered) @tolist_dispatch.register((cudf.Series, cudf.BaseIndex)) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def tolist_cudf(obj): return obj.to_pandas().tolist() @@ -313,7 +313,7 @@ def tolist_cudf(obj): @is_categorical_dtype_dispatch.register( (cudf.Series, cudf.BaseIndex, cudf.CategoricalDtype, Series) ) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def is_categorical_dtype_cudf(obj): return cudf.api.types._is_categorical_dtype(obj) @@ -324,7 +324,7 @@ def get_grouper_cudf(obj): @percentile_lookup.register((cudf.Series, cp.ndarray, cudf.BaseIndex)) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def percentile_cudf(a, q, interpolation="linear"): # Cudf dispatch to the equivalent of `np.percentile`: # https://numpy.org/doc/stable/reference/generated/numpy.percentile.html @@ -400,7 +400,7 @@ def _table_to_cudf(obj, table, self_destruct=None, **kwargs): @union_categoricals_dispatch.register((cudf.Series, cudf.BaseIndex)) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def union_categoricals_cudf( to_union, sort_categories=False, ignore_order=False ): @@ -410,7 +410,7 @@ def union_categoricals_cudf( @hash_object_dispatch.register((cudf.DataFrame, cudf.Series)) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def hash_object_cudf(frame, index=True): if index: frame = frame.reset_index() @@ -418,7 +418,7 @@ def hash_object_cudf(frame, index=True): @hash_object_dispatch.register(cudf.BaseIndex) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def hash_object_cudf_index(ind, index=None): if isinstance(ind, cudf.MultiIndex): return ind.to_frame(index=False).hash_values() @@ -428,7 +428,7 @@ def hash_object_cudf_index(ind, index=None): @group_split_dispatch.register((cudf.Series, cudf.DataFrame)) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def group_split_cudf(df, c, k, ignore_index=False): return dict( zip( @@ -443,7 +443,7 @@ def group_split_cudf(df, c, k, ignore_index=False): @sizeof_dispatch.register(cudf.DataFrame) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def sizeof_cudf_dataframe(df): return int( sum(col.memory_usage for col in df._data.columns) @@ -452,7 +452,7 @@ def sizeof_cudf_dataframe(df): @sizeof_dispatch.register((cudf.Series, cudf.BaseIndex)) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def sizeof_cudf_series_index(obj): return obj.memory_usage() diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py index 3bd455a3a57..aab56e3a1b0 100644 --- a/python/dask_cudf/dask_cudf/core.py +++ b/python/dask_cudf/dask_cudf/core.py @@ -22,7 +22,7 @@ import cudf from cudf import _lib as libcudf -from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate +from cudf.utils.performance_tracking import _dask_cudf_performance_tracking from dask_cudf import sorting from dask_cudf.accessors import ListMethods, StructMethods @@ -53,7 +53,7 @@ def __repr__(self): s = "" return s % (type(self).__name__, len(self.dask), self.npartitions) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def to_dask_dataframe(self, **kwargs): """Create a dask.dataframe object from a dask_cudf object @@ -92,7 +92,7 @@ class DataFrame(_Frame, dd.core.DataFrame): _partition_type = cudf.DataFrame - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def _assign_column(self, k, v): def assigner(df, k, v): out = df.copy() @@ -102,7 +102,7 @@ def assigner(df, k, v): meta = assigner(self._meta, k, dask_make_meta(v)) return self.map_partitions(assigner, k, v, meta=meta) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def apply_rows(self, func, incols, outcols, kwargs=None, cache_key=None): import uuid @@ -123,7 +123,7 @@ def do_apply_rows(df, func, incols, outcols, kwargs): ) @_deprecate_shuffle_kwarg - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def merge(self, other, shuffle_method=None, **kwargs): on = kwargs.pop("on", None) if isinstance(on, tuple): @@ -136,7 +136,7 @@ def merge(self, other, shuffle_method=None, **kwargs): ) @_deprecate_shuffle_kwarg - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def join(self, other, shuffle_method=None, **kwargs): # CuDF doesn't support "right" join yet how = kwargs.pop("how", "left") @@ -155,7 +155,7 @@ def join(self, other, shuffle_method=None, **kwargs): ) @_deprecate_shuffle_kwarg - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def set_index( self, other, @@ -237,7 +237,7 @@ def set_index( ) @_deprecate_shuffle_kwarg - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def sort_values( self, by, @@ -275,14 +275,14 @@ def sort_values( return df.reset_index(drop=True) return df - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def to_parquet(self, path, *args, **kwargs): """Calls dask.dataframe.io.to_parquet with CudfEngine backend""" from dask_cudf.io import to_parquet return to_parquet(self, path, *args, **kwargs) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def to_orc(self, path, **kwargs): """Calls dask_cudf.io.to_orc""" from dask_cudf.io import to_orc @@ -290,7 +290,7 @@ def to_orc(self, path, **kwargs): return to_orc(self, path, **kwargs) @derived_from(pd.DataFrame) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def var( self, axis=None, @@ -324,28 +324,28 @@ def var( return _parallel_var(self, meta, skipna, split_every, out) @_deprecate_shuffle_kwarg - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def shuffle(self, *args, shuffle_method=None, **kwargs): """Wraps dask.dataframe DataFrame.shuffle method""" return super().shuffle( *args, shuffle_method=_get_shuffle_method(shuffle_method), **kwargs ) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def groupby(self, by=None, **kwargs): from .groupby import CudfDataFrameGroupBy return CudfDataFrameGroupBy(self, by=by, **kwargs) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def sum_of_squares(x): x = x.astype("f8")._column outcol = libcudf.reduce.reduce("sum_of_squares", x) return cudf.Series(outcol) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def var_aggregate(x2, x, n, ddof): try: with warnings.catch_warnings(record=True): @@ -358,12 +358,12 @@ def var_aggregate(x2, x, n, ddof): return np.float64(np.nan) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def nlargest_agg(x, **kwargs): return cudf.concat(x).nlargest(**kwargs) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def nsmallest_agg(x, **kwargs): return cudf.concat(x).nsmallest(**kwargs) @@ -371,7 +371,7 @@ def nsmallest_agg(x, **kwargs): class Series(_Frame, dd.core.Series): _partition_type = cudf.Series - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def count(self, split_every=False): return reduction( [self], @@ -381,14 +381,14 @@ def count(self, split_every=False): meta="i8", ) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def mean(self, split_every=False): sum = self.sum(split_every=split_every) n = self.count(split_every=split_every) return sum / n @derived_from(pd.DataFrame) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def var( self, axis=None, @@ -417,19 +417,19 @@ def var( else: return _parallel_var(self, meta, skipna, split_every, out) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def groupby(self, *args, **kwargs): from .groupby import CudfSeriesGroupBy return CudfSeriesGroupBy(self, *args, **kwargs) @property # type: ignore - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def list(self): return ListMethods(self) @property # type: ignore - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def struct(self): return StructMethods(self) @@ -438,7 +438,7 @@ class Index(Series, dd.core.Index): _partition_type = cudf.Index # type: ignore -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _naive_var(ddf, meta, skipna, ddof, split_every, out): num = ddf._get_numeric_data() x = 1.0 * num.sum(skipna=skipna, split_every=split_every) @@ -453,7 +453,7 @@ def _naive_var(ddf, meta, skipna, ddof, split_every, out): return handle_out(out, result) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _parallel_var(ddf, meta, skipna, split_every, out): def _local_var(x, skipna): if skipna: @@ -520,7 +520,7 @@ def _finalize_var(vals): return handle_out(out, result) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _extract_meta(x): """ Extract internal cache data (``_meta``) from dask_cudf objects @@ -536,7 +536,7 @@ def _extract_meta(x): return x -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _emulate(func, *args, **kwargs): """ Apply a function using args / kwargs. If arguments contain dd.DataFrame / @@ -546,7 +546,7 @@ def _emulate(func, *args, **kwargs): return func(*_extract_meta(args), **_extract_meta(kwargs)) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def align_partitions(args): """Align partitions between dask_cudf objects. @@ -563,7 +563,7 @@ def align_partitions(args): return args -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def reduction( args, chunk=None, @@ -702,7 +702,7 @@ def reduction( return dd.core.new_dd_object(graph, b, meta, (None, None)) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def from_cudf(data, npartitions=None, chunksize=None, sort=True, name=None): from dask_cudf import QUERY_PLANNING_ON @@ -746,7 +746,7 @@ def from_cudf(data, npartitions=None, chunksize=None, sort=True, name=None): ) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def from_dask_dataframe(df): """ Convert a Dask :class:`dask.dataframe.DataFrame` to a Dask-cuDF diff --git a/python/dask_cudf/dask_cudf/groupby.py b/python/dask_cudf/dask_cudf/groupby.py index 2e72461b43d..bbbcde17b51 100644 --- a/python/dask_cudf/dask_cudf/groupby.py +++ b/python/dask_cudf/dask_cudf/groupby.py @@ -16,7 +16,7 @@ import cudf from cudf.core.groupby.groupby import _deprecate_collect -from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate +from cudf.utils.performance_tracking import _dask_cudf_performance_tracking from dask_cudf.sorting import _deprecate_shuffle_kwarg @@ -56,13 +56,13 @@ def wrapper(*args, **kwargs): class CudfDataFrameGroupBy(DataFrameGroupBy): - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def __init__(self, *args, sort=None, **kwargs): self.sep = kwargs.pop("sep", "___") self.as_index = kwargs.pop("as_index", True) super().__init__(*args, sort=sort, **kwargs) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def __getitem__(self, key): if isinstance(key, list): g = CudfDataFrameGroupBy( @@ -84,7 +84,7 @@ def __getitem__(self, key): g._meta = g._meta[key] return g - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def _make_groupby_method_aggs(self, agg_name): """Create aggs dictionary for aggregation methods""" @@ -92,7 +92,7 @@ def _make_groupby_method_aggs(self, agg_name): return {c: agg_name for c in self.obj.columns if c not in self.by} return {c: agg_name for c in self.obj.columns if c != self.by} - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def count(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -102,7 +102,7 @@ def count(self, split_every=None, split_out=1): split_out, ) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def mean(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -112,7 +112,7 @@ def mean(self, split_every=None, split_out=1): split_out, ) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def std(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -122,7 +122,7 @@ def std(self, split_every=None, split_out=1): split_out, ) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def var(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -132,7 +132,7 @@ def var(self, split_every=None, split_out=1): split_out, ) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def sum(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -142,7 +142,7 @@ def sum(self, split_every=None, split_out=1): split_out, ) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def min(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -152,7 +152,7 @@ def min(self, split_every=None, split_out=1): split_out, ) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def max(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -162,7 +162,7 @@ def max(self, split_every=None, split_out=1): split_out, ) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def collect(self, split_every=None, split_out=1): _deprecate_collect() @@ -173,7 +173,7 @@ def collect(self, split_every=None, split_out=1): split_out, ) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def first(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -183,7 +183,7 @@ def first(self, split_every=None, split_out=1): split_out, ) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def last(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -194,7 +194,7 @@ def last(self, split_every=None, split_out=1): ) @_deprecate_shuffle_kwarg - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def aggregate( self, arg, split_every=None, split_out=1, shuffle_method=None ): @@ -231,13 +231,13 @@ def aggregate( class CudfSeriesGroupBy(SeriesGroupBy): - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def __init__(self, *args, sort=None, **kwargs): self.sep = kwargs.pop("sep", "___") self.as_index = kwargs.pop("as_index", True) super().__init__(*args, sort=sort, **kwargs) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def count(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -247,7 +247,7 @@ def count(self, split_every=None, split_out=1): split_out, )[self._slice] - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def mean(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -257,7 +257,7 @@ def mean(self, split_every=None, split_out=1): split_out, )[self._slice] - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def std(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -267,7 +267,7 @@ def std(self, split_every=None, split_out=1): split_out, )[self._slice] - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def var(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -277,7 +277,7 @@ def var(self, split_every=None, split_out=1): split_out, )[self._slice] - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def sum(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -287,7 +287,7 @@ def sum(self, split_every=None, split_out=1): split_out, )[self._slice] - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def min(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -297,7 +297,7 @@ def min(self, split_every=None, split_out=1): split_out, )[self._slice] - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def max(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -307,7 +307,7 @@ def max(self, split_every=None, split_out=1): split_out, )[self._slice] - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def collect(self, split_every=None, split_out=1): _deprecate_collect() @@ -318,7 +318,7 @@ def collect(self, split_every=None, split_out=1): split_out, )[self._slice] - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def first(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -328,7 +328,7 @@ def first(self, split_every=None, split_out=1): split_out, )[self._slice] - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def last(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -339,7 +339,7 @@ def last(self, split_every=None, split_out=1): )[self._slice] @_deprecate_shuffle_kwarg - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def aggregate( self, arg, split_every=None, split_out=1, shuffle_method=None ): @@ -429,7 +429,7 @@ def _shuffle_aggregate( return result -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def groupby_agg( ddf, gb_cols, @@ -641,7 +641,7 @@ def groupby_agg( ) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _make_groupby_agg_call( gb, aggs, split_every, split_out, shuffle_method=None ): @@ -663,7 +663,7 @@ def _make_groupby_agg_call( ) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _redirect_aggs(arg): """Redirect aggregations to their corresponding name in cuDF""" redirects = { @@ -690,7 +690,7 @@ def _redirect_aggs(arg): return redirects.get(arg, arg) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _aggs_optimized(arg, supported: set): """Check that aggregations in `arg` are a subset of `supported`""" if isinstance(arg, (list, dict)): @@ -712,7 +712,7 @@ def _aggs_optimized(arg, supported: set): return False -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _groupby_optimized(gb): """Check that groupby input can use dask-cudf optimized codepath""" return isinstance(gb.obj, DaskDataFrame) and ( @@ -730,7 +730,7 @@ def _make_name(col_name, sep="_"): return sep.join(name for name in col_name if name != "") -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _groupby_partition_agg(df, gb_cols, aggs, columns, dropna, sort, sep): """Initial partition-level aggregation task. @@ -768,7 +768,7 @@ def _groupby_partition_agg(df, gb_cols, aggs, columns, dropna, sort, sep): return gb[sorted(output_columns)] -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _tree_node_agg(df, gb_cols, dropna, sort, sep): """Node in groupby-aggregation reduction tree. @@ -807,7 +807,7 @@ def _tree_node_agg(df, gb_cols, dropna, sort, sep): return gb[sorted(output_columns)] -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _var_agg(df, col, count_name, sum_name, pow2_sum_name, ddof=1): """Calculate variance (given count, sum, and sum-squared columns).""" @@ -829,7 +829,7 @@ def _var_agg(df, col, count_name, sum_name, pow2_sum_name, ddof=1): return var -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _finalize_gb_agg( gb_in, gb_cols, diff --git a/python/dask_cudf/dask_cudf/sorting.py b/python/dask_cudf/dask_cudf/sorting.py index f3774e20d32..a2ba4d1878e 100644 --- a/python/dask_cudf/dask_cudf/sorting.py +++ b/python/dask_cudf/dask_cudf/sorting.py @@ -18,7 +18,7 @@ import cudf from cudf.api.types import _is_categorical_dtype -from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate +from cudf.utils.performance_tracking import _dask_cudf_performance_tracking _SHUFFLE_SUPPORT = ("tasks", "p2p") # "disk" not supported @@ -48,14 +48,14 @@ def wrapper(*args, **kwargs): return wrapper -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def set_index_post(df, index_name, drop, column_dtype): df2 = df.set_index(index_name, drop=drop) df2.columns = df2.columns.astype(column_dtype) return df2 -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _set_partitions_pre(s, divisions, ascending=True, na_position="last"): if ascending: partitions = divisions.searchsorted(s, side="right") - 1 @@ -72,7 +72,7 @@ def _set_partitions_pre(s, divisions, ascending=True, na_position="last"): return partitions -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _quantile(a, q): n = len(a) if not len(a): @@ -83,7 +83,7 @@ def _quantile(a, q): ) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def merge_quantiles(finalq, qs, vals): """Combine several quantile calculations of different data. [NOTE: Same logic as dask.array merge_percentiles] @@ -146,7 +146,7 @@ def _append_counts(val, count): return rv.reset_index(drop=True) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _approximate_quantile(df, q): """Approximate quantiles of DataFrame or Series. [NOTE: Same logic as dask.dataframe Series quantile] @@ -220,7 +220,7 @@ def set_quantile_index(df): return df -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def quantile_divisions(df, by, npartitions): qn = np.linspace(0.0, 1.0, npartitions + 1).tolist() divisions = _approximate_quantile(df[by], qn).compute() @@ -257,7 +257,7 @@ def quantile_divisions(df, by, npartitions): @_deprecate_shuffle_kwarg -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def sort_values( df, by,