diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 98c2ec0a22e..4fbc28fa6e1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -71,15 +71,14 @@ for a minimal build of libcudf without using conda are also listed below. Compilers: -* `gcc` version 9.3+ -* `nvcc` version 11.5+ -* `cmake` version 3.26.4+ +* `gcc` version 11.4+ +* `nvcc` version 11.8+ +* `cmake` version 3.29.6+ -CUDA/GPU: +CUDA/GPU Runtime: -* CUDA 11.5+ -* NVIDIA driver 450.80.02+ -* Volta architecture or better (Compute Capability >=7.0) +* CUDA 11.4+ +* Volta architecture or better ([Compute Capability](https://docs.nvidia.com/deploy/cuda-compatibility/) >=7.0) You can obtain CUDA from [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads). diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp index 019e0f30fe9..7563c823454 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -59,20 +59,18 @@ void parquet_read_common(cudf::size_type num_rows_to_read, } template -void BM_parquet_read_data(nvbench::state& state, nvbench::type_list>) +void BM_parquet_read_data_common(nvbench::state& state, + data_profile const& profile, + nvbench::type_list>) { auto const d_type = get_type_or_group(static_cast(DataType)); - auto const cardinality = static_cast(state.get_int64("cardinality")); - auto const run_length = static_cast(state.get_int64("run_length")); auto const source_type = retrieve_io_type_enum(state.get_string("io_type")); auto const compression = cudf::io::compression_type::SNAPPY; cuio_source_sink_pair source_sink(source_type); auto const num_rows_written = [&]() { - auto const tbl = create_random_table( - cycle_dtypes(d_type, num_cols), - table_size_bytes{data_size}, - data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); + auto const tbl = + create_random_table(cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, profile); auto const view = tbl->view(); cudf::io::parquet_writer_options write_opts = @@ -85,6 +83,32 @@ void BM_parquet_read_data(nvbench::state& state, nvbench::type_list +void BM_parquet_read_data(nvbench::state& state, + nvbench::type_list> type_list) +{ + auto const cardinality = static_cast(state.get_int64("cardinality")); + auto const run_length = static_cast(state.get_int64("run_length")); + BM_parquet_read_data_common( + state, data_profile_builder().cardinality(cardinality).avg_run_length(run_length), type_list); +} + +template +void BM_parquet_read_fixed_width_struct(nvbench::state& state, + nvbench::type_list> type_list) +{ + auto const cardinality = static_cast(state.get_int64("cardinality")); + auto const run_length = static_cast(state.get_int64("run_length")); + std::vector s_types{ + cudf::type_id::INT32, cudf::type_id::FLOAT32, cudf::type_id::INT64}; + BM_parquet_read_data_common(state, + data_profile_builder() + .cardinality(cardinality) + .avg_run_length(run_length) + .struct_types(s_types), + type_list); +} + void BM_parquet_read_io_compression(nvbench::state& state) { auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL), @@ -247,3 +271,13 @@ NVBENCH_BENCH(BM_parquet_read_io_small_mixed) .add_int64_axis("cardinality", {0, 1000}) .add_int64_axis("run_length", {1, 32}) .add_int64_axis("num_string_cols", {1, 2, 3}); + +// a benchmark for structs that only contain fixed-width types +using d_type_list_struct_only = nvbench::enum_type_list; +NVBENCH_BENCH_TYPES(BM_parquet_read_fixed_width_struct, NVBENCH_TYPE_AXES(d_type_list_struct_only)) + .set_name("parquet_read_fixed_width_struct") + .set_type_axes_names({"data_type"}) + .add_string_axis("io_type", {"DEVICE_BUFFER"}) + .set_min_samples(4) + .add_int64_axis("cardinality", {0, 1000}) + .add_int64_axis("run_length", {1, 32}); diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake index 9758958b44f..6ec35ddcaf1 100644 --- a/cpp/cmake/thirdparty/get_cucollections.cmake +++ b/cpp/cmake/thirdparty/get_cucollections.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -15,6 +15,11 @@ # This function finds cuCollections and performs any additional configuration. function(find_and_configure_cucollections) include(${rapids-cmake-dir}/cpm/cuco.cmake) + include(${rapids-cmake-dir}/cpm/package_override.cmake) + + set(cudf_patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/patches") + rapids_cpm_package_override("${cudf_patch_dir}/cuco_override.json") + if(BUILD_SHARED_LIBS) rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports) else() diff --git a/cpp/cmake/thirdparty/patches/cuco_noexcept.diff b/cpp/cmake/thirdparty/patches/cuco_noexcept.diff new file mode 100644 index 00000000000..0f334c0e81f --- /dev/null +++ b/cpp/cmake/thirdparty/patches/cuco_noexcept.diff @@ -0,0 +1,227 @@ +diff --git a/include/cuco/aow_storage.cuh b/include/cuco/aow_storage.cuh +index 7f9de01..5228193 100644 +--- a/include/cuco/aow_storage.cuh ++++ b/include/cuco/aow_storage.cuh +@@ -81,7 +81,7 @@ class aow_storage : public detail::aow_storage_base { + * @param size Number of windows to (de)allocate + * @param allocator Allocator used for (de)allocating device storage + */ +- explicit constexpr aow_storage(Extent size, Allocator const& allocator = {}) noexcept; ++ explicit constexpr aow_storage(Extent size, Allocator const& allocator = {}); + + aow_storage(aow_storage&&) = default; ///< Move constructor + /** +@@ -122,7 +122,7 @@ class aow_storage : public detail::aow_storage_base { + * @param key Key to which all keys in `slots` are initialized + * @param stream Stream used for executing the kernel + */ +- void initialize(value_type key, cuda_stream_ref stream = {}) noexcept; ++ void initialize(value_type key, cuda_stream_ref stream = {}); + + /** + * @brief Asynchronously initializes each slot in the AoW storage to contain `key`. +diff --git a/include/cuco/detail/open_addressing/open_addressing_impl.cuh b/include/cuco/detail/open_addressing/open_addressing_impl.cuh +index c2c9c14..8ac4236 100644 +--- a/include/cuco/detail/open_addressing/open_addressing_impl.cuh ++++ b/include/cuco/detail/open_addressing/open_addressing_impl.cuh +@@ -125,7 +125,7 @@ class open_addressing_impl { + KeyEqual const& pred, + ProbingScheme const& probing_scheme, + Allocator const& alloc, +- cuda_stream_ref stream) noexcept ++ cuda_stream_ref stream) + : empty_slot_sentinel_{empty_slot_sentinel}, + erased_key_sentinel_{this->extract_key(empty_slot_sentinel)}, + predicate_{pred}, +@@ -233,7 +233,7 @@ class open_addressing_impl { + * + * @param stream CUDA stream this operation is executed in + */ +- void clear(cuda_stream_ref stream) noexcept { storage_.initialize(empty_slot_sentinel_, stream); } ++ void clear(cuda_stream_ref stream) { storage_.initialize(empty_slot_sentinel_, stream); } + + /** + * @brief Asynchronously erases all elements from the container. After this call, `size()` returns +@@ -599,7 +599,7 @@ class open_addressing_impl { + * + * @return The number of elements in the container + */ +- [[nodiscard]] size_type size(cuda_stream_ref stream) const noexcept ++ [[nodiscard]] size_type size(cuda_stream_ref stream) const + { + auto counter = + detail::counter_storage{this->allocator()}; +diff --git a/include/cuco/detail/static_map/static_map.inl b/include/cuco/detail/static_map/static_map.inl +index e17a145..3fa1d02 100644 +--- a/include/cuco/detail/static_map/static_map.inl ++++ b/include/cuco/detail/static_map/static_map.inl +@@ -123,7 +123,7 @@ template + void static_map::clear( +- cuda_stream_ref stream) noexcept ++ cuda_stream_ref stream) + { + impl_->clear(stream); + } +@@ -215,7 +215,7 @@ template + template + void static_map:: +- insert_or_assign(InputIt first, InputIt last, cuda_stream_ref stream) noexcept ++ insert_or_assign(InputIt first, InputIt last, cuda_stream_ref stream) + { + return this->insert_or_assign_async(first, last, stream); + stream.synchronize(); +@@ -465,7 +465,7 @@ template + static_map::size_type + static_map::size( +- cuda_stream_ref stream) const noexcept ++ cuda_stream_ref stream) const + { + return impl_->size(stream); + } +diff --git a/include/cuco/detail/static_multiset/static_multiset.inl b/include/cuco/detail/static_multiset/static_multiset.inl +index 174f9bc..582926b 100644 +--- a/include/cuco/detail/static_multiset/static_multiset.inl ++++ b/include/cuco/detail/static_multiset/static_multiset.inl +@@ -97,7 +97,7 @@ template + void static_multiset::clear( +- cuda_stream_ref stream) noexcept ++ cuda_stream_ref stream) + { + impl_->clear(stream); + } +@@ -183,7 +183,7 @@ template + static_multiset::size_type + static_multiset::size( +- cuda_stream_ref stream) const noexcept ++ cuda_stream_ref stream) const + { + return impl_->size(stream); + } +diff --git a/include/cuco/detail/static_set/static_set.inl b/include/cuco/detail/static_set/static_set.inl +index 645013f..d3cece0 100644 +--- a/include/cuco/detail/static_set/static_set.inl ++++ b/include/cuco/detail/static_set/static_set.inl +@@ -98,7 +98,7 @@ template + void static_set::clear( +- cuda_stream_ref stream) noexcept ++ cuda_stream_ref stream) + { + impl_->clear(stream); + } +@@ -429,7 +429,7 @@ template + static_set::size_type + static_set::size( +- cuda_stream_ref stream) const noexcept ++ cuda_stream_ref stream) const + { + return impl_->size(stream); + } +diff --git a/include/cuco/detail/storage/aow_storage.inl b/include/cuco/detail/storage/aow_storage.inl +index 3547f4c..94b7f98 100644 +--- a/include/cuco/detail/storage/aow_storage.inl ++++ b/include/cuco/detail/storage/aow_storage.inl +@@ -32,8 +32,8 @@ + namespace cuco { + + template +-constexpr aow_storage::aow_storage( +- Extent size, Allocator const& allocator) noexcept ++constexpr aow_storage::aow_storage(Extent size, ++ Allocator const& allocator) + : detail::aow_storage_base{size}, + allocator_{allocator}, + window_deleter_{capacity(), allocator_}, +@@ -64,7 +64,7 @@ aow_storage::ref() const noexcept + + template + void aow_storage::initialize(value_type key, +- cuda_stream_ref stream) noexcept ++ cuda_stream_ref stream) + { + this->initialize_async(key, stream); + stream.synchronize(); +diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh +index c86e90c..95da423 100644 +--- a/include/cuco/static_map.cuh ++++ b/include/cuco/static_map.cuh +@@ -269,7 +269,7 @@ class static_map { + * + * @param stream CUDA stream this operation is executed in + */ +- void clear(cuda_stream_ref stream = {}) noexcept; ++ void clear(cuda_stream_ref stream = {}); + + /** + * @brief Asynchronously erases all elements from the container. After this call, `size()` returns +@@ -387,7 +387,7 @@ class static_map { + * @param stream CUDA stream used for insert + */ + template +- void insert_or_assign(InputIt first, InputIt last, cuda_stream_ref stream = {}) noexcept; ++ void insert_or_assign(InputIt first, InputIt last, cuda_stream_ref stream = {}); + + /** + * @brief For any key-value pair `{k, v}` in the range `[first, last)`, if a key equivalent to `k` +@@ -690,7 +690,7 @@ class static_map { + * @param stream CUDA stream used to get the number of inserted elements + * @return The number of elements in the container + */ +- [[nodiscard]] size_type size(cuda_stream_ref stream = {}) const noexcept; ++ [[nodiscard]] size_type size(cuda_stream_ref stream = {}) const; + + /** + * @brief Gets the maximum number of elements the hash map can hold. +diff --git a/include/cuco/static_multiset.cuh b/include/cuco/static_multiset.cuh +index 0daf103..fbcbc9c 100644 +--- a/include/cuco/static_multiset.cuh ++++ b/include/cuco/static_multiset.cuh +@@ -235,7 +235,7 @@ class static_multiset { + * + * @param stream CUDA stream this operation is executed in + */ +- void clear(cuda_stream_ref stream = {}) noexcept; ++ void clear(cuda_stream_ref stream = {}); + + /** + * @brief Asynchronously erases all elements from the container. After this call, `size()` returns +@@ -339,7 +339,7 @@ class static_multiset { + * @param stream CUDA stream used to get the number of inserted elements + * @return The number of elements in the container + */ +- [[nodiscard]] size_type size(cuda_stream_ref stream = {}) const noexcept; ++ [[nodiscard]] size_type size(cuda_stream_ref stream = {}) const; + + /** + * @brief Gets the maximum number of elements the multiset can hold. +diff --git a/include/cuco/static_set.cuh b/include/cuco/static_set.cuh +index a069939..3517f84 100644 +--- a/include/cuco/static_set.cuh ++++ b/include/cuco/static_set.cuh +@@ -240,7 +240,7 @@ class static_set { + * + * @param stream CUDA stream this operation is executed in + */ +- void clear(cuda_stream_ref stream = {}) noexcept; ++ void clear(cuda_stream_ref stream = {}); + + /** + * @brief Asynchronously erases all elements from the container. After this call, `size()` returns +@@ -687,7 +687,7 @@ class static_set { + * @param stream CUDA stream used to get the number of inserted elements + * @return The number of elements in the container + */ +- [[nodiscard]] size_type size(cuda_stream_ref stream = {}) const noexcept; ++ [[nodiscard]] size_type size(cuda_stream_ref stream = {}) const; + + /** + * @brief Gets the maximum number of elements the hash set can hold. diff --git a/cpp/cmake/thirdparty/patches/cuco_override.json b/cpp/cmake/thirdparty/patches/cuco_override.json new file mode 100644 index 00000000000..ae0a9a4b4f0 --- /dev/null +++ b/cpp/cmake/thirdparty/patches/cuco_override.json @@ -0,0 +1,14 @@ + +{ + "packages" : { + "cuco" : { + "patches" : [ + { + "file" : "${current_json_dir}/cuco_noexcept.diff", + "issue" : "Remove erroneous noexcept clauses on cuco functions that may throw [https://github.com/rapidsai/cudf/issues/16059]", + "fixed_in" : "" + } + ] + } + } +} diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md index ff80c2daab8..0d097541692 100644 --- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md +++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md @@ -1,4 +1,4 @@ -# libcudf C++ Developer Guide +# libcudf C++ Developer Guide {#DEVELOPER_GUIDE} This document serves as a guide for contributors to libcudf C++ code. Developers should also refer to these additional files for further documentation of libcudf best practices. @@ -469,7 +469,7 @@ libcudf throws under different circumstances, see the [section on error handling # libcudf API and Implementation -## Streams +## Streams {#streams} libcudf is in the process of adding support for asynchronous execution using CUDA streams. In order to facilitate the usage of streams, all new libcudf APIs @@ -486,33 +486,37 @@ use only asynchronous versions of CUDA APIs with the stream parameter. In order to make the `detail` API callable from other libcudf functions, it should be exposed in a header placed in the `cudf/cpp/include/detail/` directory. +The declaration is not necessary if no other libcudf functions call the `detail` function. For example: ```c++ // cpp/include/cudf/header.hpp -void external_function(...); +void external_function(..., + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); // cpp/include/cudf/detail/header.hpp namespace detail{ -void external_function(..., rmm::cuda_stream_view stream) +void external_function(..., rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) } // namespace detail // cudf/src/implementation.cpp namespace detail{ - // Use the stream parameter in the detail implementation. - void external_function(..., rmm::cuda_stream_view stream){ - // Implementation uses the stream with async APIs. - rmm::device_buffer buff(...,stream); - CUDF_CUDA_TRY(cudaMemcpyAsync(...,stream.value())); - kernel<<<..., stream>>>(...); - thrust::algorithm(rmm::exec_policy(stream), ...); - } +// Use the stream parameter in the detail implementation. +void external_function(..., rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr){ + // Implementation uses the stream with async APIs. + rmm::device_buffer buff(..., stream, mr); + CUDF_CUDA_TRY(cudaMemcpyAsync(...,stream.value())); + kernel<<<..., stream>>>(...); + thrust::algorithm(rmm::exec_policy(stream), ...); +} } // namespace detail -void external_function(...){ - CUDF_FUNC_RANGE(); // Generates an NVTX range for the lifetime of this function. - detail::external_function(..., cudf::get_default_stream()); +void external_function(..., rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); // Generates an NVTX range for the lifetime of this function. + detail::external_function(..., stream, mr); } ``` @@ -703,28 +707,28 @@ The preferred style for how inputs are passed in and outputs are returned is the - `column_view const&` - Tables: - `table_view const&` - - Scalar: - - `scalar const&` - - Everything else: - - Trivial or inexpensively copied types - - Pass by value - - Non-trivial or expensive to copy types - - Pass by `const&` + - Scalar: + - `scalar const&` + - Everything else: + - Trivial or inexpensively copied types + - Pass by value + - Non-trivial or expensive to copy types + - Pass by `const&` - In/Outs - Columns: - `mutable_column_view&` - Tables: - `mutable_table_view&` - - Everything else: - - Pass by via raw pointer + - Everything else: + - Pass by via raw pointer - Outputs - Outputs should be *returned*, i.e., no output parameters - Columns: - `std::unique_ptr` - Tables: - `std::unique_ptr` - - Scalars: - - `std::unique_ptr` + - Scalars: + - `std::unique_ptr` ### Multiple Return Values @@ -908,6 +912,10 @@ functions that are specific to columns of Strings. These functions reside in the namespace. Similarly, functionality used exclusively for unit testing is in the `cudf::test::` namespace. +The public function is expected to contain a call to `CUDF_FUNC_RANGE()` followed by a call to +a `detail` function with same name and parameters as the public function. +See the [Streams](#streams) section for an example of this pattern. + ### Internal Many functions are not meant for public use, so place them in either the `detail` or an *anonymous* diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp index 5e41a871f32..22dad11e109 100644 --- a/cpp/include/cudf/binaryop.hpp +++ b/cpp/include/cudf/binaryop.hpp @@ -91,6 +91,56 @@ enum class binary_operator : int32_t { ///< (null, false) is null, and (valid, valid) == LOGICAL_OR(valid, valid) INVALID_BINARY ///< invalid operation }; + +/// Binary operation common type default +template +struct binary_op_common_type {}; + +/// Binary operation common type specialization +template +struct binary_op_common_type>> { + /// The common type of the template parameters + using type = std::common_type_t; +}; + +/// Binary operation common type specialization +template +struct binary_op_common_type< + L, + R, + std::enable_if_t() && cuda::std::is_floating_point_v>> { + /// The common type of the template parameters + using type = L; +}; + +/// Binary operation common type specialization +template +struct binary_op_common_type< + L, + R, + std::enable_if_t() && cuda::std::is_floating_point_v>> { + /// The common type of the template parameters + using type = R; +}; + +/// Binary operation common type helper +template +using binary_op_common_type_t = typename binary_op_common_type::type; + +namespace detail { +template +struct binary_op_has_common_type_impl : std::false_type {}; + +template +struct binary_op_has_common_type_impl>, L, R> + : std::true_type {}; +} // namespace detail + +/// Checks if binary operation types have a common type +template +constexpr inline bool binary_op_has_common_type_v = + detail::binary_op_has_common_type_impl::value; + /** * @brief Performs a binary operation between a scalar and a column. * diff --git a/cpp/include/cudf/detail/copy_if.cuh b/cpp/include/cudf/detail/copy_if.cuh index c98057d077a..b6310e6cd2f 100644 --- a/cpp/include/cudf/detail/copy_if.cuh +++ b/cpp/include/cudf/detail/copy_if.cuh @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include #include @@ -242,8 +242,8 @@ struct scatter_gather_functor { rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - auto output_column = cudf::detail::allocate_like( - input, output_size, cudf::mask_allocation_policy::RETAIN, stream, mr); + auto output_column = + cudf::allocate_like(input, output_size, cudf::mask_allocation_policy::RETAIN, stream, mr); auto output = output_column->mutable_view(); bool has_valid = input.nullable(); diff --git a/cpp/include/cudf/detail/distinct_hash_join.cuh b/cpp/include/cudf/detail/distinct_hash_join.cuh index de3d23e9470..1ef8b3b120a 100644 --- a/cpp/include/cudf/detail/distinct_hash_join.cuh +++ b/cpp/include/cudf/detail/distinct_hash_join.cuh @@ -42,6 +42,9 @@ template struct comparator_adapter { comparator_adapter(Equal const& d_equal) : _d_equal{d_equal} {} + // suppress "function was declared but never referenced warning" +#pragma nv_diagnostic push +#pragma nv_diag_suppress 177 __device__ constexpr auto operator()( cuco::pair const&, cuco::pair const&) const noexcept @@ -50,6 +53,14 @@ struct comparator_adapter { return false; } + __device__ constexpr auto operator()( + cuco::pair const&, + cuco::pair const&) const noexcept + { + // All build table keys are distinct thus `false` no matter what + return false; + } + __device__ constexpr auto operator()( cuco::pair const& lhs, cuco::pair const& rhs) const noexcept @@ -58,6 +69,15 @@ struct comparator_adapter { return _d_equal(lhs.second, rhs.second); } + __device__ constexpr auto operator()( + cuco::pair const& lhs, + cuco::pair const& rhs) const noexcept + { + if (lhs.first != rhs.first) { return false; } + return _d_equal(lhs.second, rhs.second); + } +#pragma nv_diagnostic pop + private: Equal _d_equal; }; @@ -94,7 +114,7 @@ struct distinct_hash_join { using cuco_storage_type = cuco::storage<1>; /// Hash table type - using hash_table_type = cuco::static_set, + using hash_table_type = cuco::static_set, cuco::extent, cuda::thread_scope_device, comparator_adapter, diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index c9d350ce983..5977c7341c1 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include #include @@ -217,10 +217,9 @@ struct column_gatherer_impl(), source_column.size(), @@ -413,8 +412,8 @@ struct column_gatherer_impl { auto keys_copy = std::make_unique(dictionary.keys(), stream, mr); // Perform gather on just the indices column_view indices = dictionary.get_indices_annotated(); - auto new_indices = cudf::detail::allocate_like( - indices, output_count, cudf::mask_allocation_policy::NEVER, stream, mr); + auto new_indices = + cudf::allocate_like(indices, output_count, cudf::mask_allocation_policy::NEVER, stream, mr); gather_helper( cudf::detail::indexalator_factory::make_input_iterator(indices), indices.size(), diff --git a/cpp/include/cudf/detail/stream_compaction.hpp b/cpp/include/cudf/detail/stream_compaction.hpp index e2974789ea1..e3ef4190fd2 100644 --- a/cpp/include/cudf/detail/stream_compaction.hpp +++ b/cpp/include/cudf/detail/stream_compaction.hpp @@ -88,8 +88,6 @@ std::unique_ptr
distinct(table_view const& input, /** * @copydoc cudf::stable_distinct - * - * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr
stable_distinct(table_view const& input, std::vector const& keys, diff --git a/cpp/include/cudf/lists/lists_column_view.hpp b/cpp/include/cudf/lists/lists_column_view.hpp index 57a4f724c2d..3397cb0ca1d 100644 --- a/cpp/include/cudf/lists/lists_column_view.hpp +++ b/cpp/include/cudf/lists/lists_column_view.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,6 +38,7 @@ namespace cudf { */ class lists_column_view : private column_view { public: + lists_column_view() = default; /** * @brief Construct a new lists column view object from a column view. * diff --git a/cpp/include/cudf/stream_compaction.hpp b/cpp/include/cudf/stream_compaction.hpp index c386b3a22b4..181af11adb8 100644 --- a/cpp/include/cudf/stream_compaction.hpp +++ b/cpp/include/cudf/stream_compaction.hpp @@ -320,6 +320,7 @@ std::unique_ptr distinct_indices( * @param keep Copy any, first, last, or none of the found duplicates * @param nulls_equal Flag to specify whether null elements should be considered as equal * @param nans_equal Flag to specify whether NaN elements should be considered as equal + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned table * @return Table with distinct rows, preserving input order */ @@ -329,6 +330,7 @@ std::unique_ptr
stable_distinct( duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY, null_equality nulls_equal = null_equality::EQUAL, nan_equality nans_equal = nan_equality::ALL_EQUAL, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** diff --git a/cpp/include/cudf/strings/detail/strings_children.cuh b/cpp/include/cudf/strings/detail/strings_children.cuh index f105a6dc546..f5f3982a5d6 100644 --- a/cpp/include/cudf/strings/detail/strings_children.cuh +++ b/cpp/include/cudf/strings/detail/strings_children.cuh @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -81,11 +82,11 @@ std::pair, int64_t> make_offsets_child_column( auto const total_bytes = cudf::detail::sizes_to_offsets(input_itr, input_itr + strings_count + 1, d_offsets, stream); - auto const threshold = get_offset64_threshold(); - CUDF_EXPECTS(is_large_strings_enabled() || (total_bytes < threshold), + auto const threshold = cudf::strings::get_offset64_threshold(); + CUDF_EXPECTS(cudf::strings::is_large_strings_enabled() || (total_bytes < threshold), "Size of output exceeds the column size limit", std::overflow_error); - if (total_bytes >= get_offset64_threshold()) { + if (total_bytes >= cudf::strings::get_offset64_threshold()) { // recompute as int64 offsets when above the threshold offsets_column = make_numeric_column( data_type{type_id::INT64}, strings_count + 1, mask_state::UNALLOCATED, stream, mr); diff --git a/cpp/include/cudf/strings/utilities.hpp b/cpp/include/cudf/strings/utilities.hpp new file mode 100644 index 00000000000..ae445282382 --- /dev/null +++ b/cpp/include/cudf/strings/utilities.hpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include +#include + +namespace CUDF_EXPORT cudf { +namespace strings { + +/** + * @brief Creates a string_view vector from a strings column. + * + * @param strings Strings column instance. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned vector's device memory. + * @return Device vector of string_views + */ +rmm::device_uvector create_string_vector_from_column( + cudf::strings_column_view const strings, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Return the threshold size for a strings column to use int64 offsets + * + * A computed size above this threshold should using int64 offsets, otherwise + * int32 offsets. By default this function will return std::numeric_limits::max(). + * This value can be overridden at runtime using the environment variable + * LIBCUDF_LARGE_STRINGS_THRESHOLD. + * + * @return size in bytes + */ +int64_t get_offset64_threshold(); + +/** + * @brief Checks if large strings is enabled + * + * This checks the setting in the environment variable LIBCUDF_LARGE_STRINGS_ENABLED. + * + * @return true if large strings are supported + */ +bool is_large_strings_enabled(); + +} // namespace strings +} // namespace CUDF_EXPORT cudf diff --git a/cpp/src/binaryop/compiled/binary_ops.cuh b/cpp/src/binaryop/compiled/binary_ops.cuh index 5177e7d4bda..c6af0c3c58a 100644 --- a/cpp/src/binaryop/compiled/binary_ops.cuh +++ b/cpp/src/binaryop/compiled/binary_ops.cuh @@ -49,9 +49,16 @@ struct type_casted_accessor { column_device_view const& col, bool is_scalar) const { - if constexpr (column_device_view::has_element_accessor() and - std::is_convertible_v) - return static_cast(col.element(is_scalar ? 0 : i)); + if constexpr (column_device_view::has_element_accessor()) { + auto const element = col.element(is_scalar ? 0 : i); + if constexpr (std::is_convertible_v) { + return static_cast(element); + } else if constexpr (is_fixed_point() && cuda::std::is_floating_point_v) { + return convert_fixed_to_floating(element); + } else if constexpr (is_fixed_point() && cuda::std::is_floating_point_v) { + return convert_floating_to_fixed(element, numeric::scale_type{0}); + } + } return {}; } }; @@ -159,6 +166,7 @@ struct ops2_wrapper { TypeRhs y = rhs.element(is_rhs_scalar ? 0 : i); auto result = [&]() { if constexpr (std::is_same_v or + std::is_same_v or std::is_same_v or std::is_same_v or std::is_same_v or diff --git a/cpp/src/binaryop/compiled/util.cpp b/cpp/src/binaryop/compiled/util.cpp index 2b6a4f58895..b62c5f1f4e1 100644 --- a/cpp/src/binaryop/compiled/util.cpp +++ b/cpp/src/binaryop/compiled/util.cpp @@ -31,8 +31,8 @@ struct common_type_functor { template std::optional operator()() const { - if constexpr (cudf::has_common_type_v) { - using TypeCommon = std::common_type_t; + if constexpr (binary_op_has_common_type_v) { + using TypeCommon = binary_op_common_type_t; return data_type{type_to_id()}; } @@ -85,8 +85,8 @@ struct is_binary_operation_supported { { if constexpr (column_device_view::has_element_accessor() and column_device_view::has_element_accessor()) { - if constexpr (has_common_type_v) { - using common_t = std::common_type_t; + if constexpr (binary_op_has_common_type_v) { + using common_t = binary_op_common_type_t; return std::is_invocable_v; } else { return std::is_invocable_v; @@ -102,8 +102,8 @@ struct is_binary_operation_supported { if constexpr (column_device_view::has_element_accessor() and column_device_view::has_element_accessor()) { if (has_mutable_element_accessor(out_type) or is_fixed_point(out_type)) { - if constexpr (has_common_type_v) { - using common_t = std::common_type_t; + if constexpr (binary_op_has_common_type_v) { + using common_t = binary_op_common_type_t; if constexpr (std::is_invocable_v) { using ReturnType = std::invoke_result_t; return is_constructible(out_type) or diff --git a/cpp/src/copying/sample.cu b/cpp/src/copying/sample.cu index f8e3a9a83e3..ba00527f6b6 100644 --- a/cpp/src/copying/sample.cu +++ b/cpp/src/copying/sample.cu @@ -16,6 +16,7 @@ #include #include +#include #include #include #include diff --git a/cpp/src/io/parquet/decode_fixed.cu b/cpp/src/io/parquet/decode_fixed.cu index bfd89200786..ea80ae73c2f 100644 --- a/cpp/src/io/parquet/decode_fixed.cu +++ b/cpp/src/io/parquet/decode_fixed.cu @@ -24,136 +24,11 @@ namespace cudf::io::parquet::detail { namespace { -constexpr int decode_block_size = 128; -constexpr int rolling_buf_size = decode_block_size * 2; -// the required number of runs in shared memory we will need to provide the -// rle_stream object -constexpr int rle_run_buffer_size = rle_stream_required_run_buffer_size(); - -template -static __device__ int gpuUpdateValidityOffsetsAndRowIndicesFlat( - int32_t target_value_count, page_state_s* s, state_buf* sb, level_t const* const def, int t) -{ - constexpr int num_warps = decode_block_size / cudf::detail::warp_size; - constexpr int max_batch_size = num_warps * cudf::detail::warp_size; - - auto& ni = s->nesting_info[0]; - - // how many (input) values we've processed in the page so far - int value_count = s->input_value_count; - int valid_count = ni.valid_count; - - // cap by last row so that we don't process any rows past what we want to output. - int const first_row = s->first_row; - int const last_row = first_row + s->num_rows; - int const capped_target_value_count = min(target_value_count, last_row); - - int const valid_map_offset = ni.valid_map_offset; - int const row_index_lower_bound = s->row_index_lower_bound; - - __syncthreads(); - - while (value_count < capped_target_value_count) { - int const batch_size = min(max_batch_size, capped_target_value_count - value_count); - - // definition level. only need to process for nullable columns - int d = 0; - if constexpr (nullable) { - d = t < batch_size - ? static_cast(def[rolling_index(value_count + t)]) - : -1; - } - - int const thread_value_count = t + 1; - int const block_value_count = batch_size; - - // compute our row index, whether we're in row bounds, and validity - int const row_index = (thread_value_count + value_count) - 1; - int const in_row_bounds = (row_index >= row_index_lower_bound) && (row_index < last_row); - int is_valid; - if constexpr (nullable) { - is_valid = ((d > 0) && in_row_bounds) ? 1 : 0; - } else { - is_valid = in_row_bounds; - } - - // thread and block validity count - int thread_valid_count, block_valid_count; - if constexpr (nullable) { - using block_scan = cub::BlockScan; - __shared__ typename block_scan::TempStorage scan_storage; - block_scan(scan_storage).InclusiveSum(is_valid, thread_valid_count, block_valid_count); - __syncthreads(); - - // validity is processed per-warp - // - // nested schemas always read and write to the same bounds (that is, read and write - // positions are already pre-bounded by first_row/num_rows). flat schemas will start reading - // at the first value, even if that is before first_row, because we cannot trivially jump to - // the correct position to start reading. since we are about to write the validity vector - // here we need to adjust our computed mask to take into account the write row bounds. - int const in_write_row_bounds = ballot(row_index >= first_row && row_index < last_row); - int const write_start = __ffs(in_write_row_bounds) - 1; // first bit in the warp to store - int warp_null_count = 0; - if (write_start >= 0) { - uint32_t const warp_validity_mask = ballot(is_valid); - // lane 0 from each warp writes out validity - if ((t % cudf::detail::warp_size) == 0) { - int const vindex = (value_count + thread_value_count) - 1; // absolute input value index - int const bit_offset = (valid_map_offset + vindex + write_start) - - first_row; // absolute bit offset into the output validity map - int const write_end = - cudf::detail::warp_size - __clz(in_write_row_bounds); // last bit in the warp to store - int const bit_count = write_end - write_start; - warp_null_count = bit_count - __popc(warp_validity_mask >> write_start); - - store_validity(bit_offset, ni.valid_map, warp_validity_mask >> write_start, bit_count); - } - } - - // sum null counts. we have to do it this way instead of just incrementing by (value_count - - // valid_count) because valid_count also includes rows that potentially start before our row - // bounds. if we could come up with a way to clean that up, we could remove this and just - // compute it directly at the end of the kernel. - size_type const block_null_count = - cudf::detail::single_lane_block_sum_reduce(warp_null_count); - if (t == 0) { ni.null_count += block_null_count; } - } - // trivial for non-nullable columns - else { - thread_valid_count = thread_value_count; - block_valid_count = block_value_count; - } - - // output offset - if (is_valid) { - int const dst_pos = (value_count + thread_value_count) - 1; - int const src_pos = (valid_count + thread_valid_count) - 1; - sb->nz_idx[rolling_index(src_pos)] = dst_pos; - } - - // update stuff - value_count += block_value_count; - valid_count += block_valid_count; - } - - if (t == 0) { - // update valid value count for decoding and total # of values we've processed - ni.valid_count = valid_count; - ni.value_count = value_count; - s->nz_count = valid_count; - s->input_value_count = value_count; - s->input_row_count = value_count; - } - - return valid_count; -} - -template -__device__ inline void gpuDecodeValues( +template +__device__ inline void gpuDecodeFixedWidthValues( page_state_s* s, state_buf* const sb, int start, int end, int t) { - constexpr int num_warps = decode_block_size / cudf::detail::warp_size; + constexpr int num_warps = block_size / cudf::detail::warp_size; constexpr int max_batch_size = num_warps * cudf::detail::warp_size; PageNestingDecodeInfo* nesting_info_base = s->nesting_info; @@ -217,18 +92,22 @@ __device__ inline void gpuDecodeValues( } } -template -__device__ inline void gpuDecodeSplitValues(page_state_s* s, - state_buf* const sb, - int start, - int end) +template +struct decode_fixed_width_values_func { + __device__ inline void operator()(page_state_s* s, state_buf* const sb, int start, int end, int t) + { + gpuDecodeFixedWidthValues(s, sb, start, end, t); + } +}; + +template +__device__ inline void gpuDecodeFixedWidthSplitValues( + page_state_s* s, state_buf* const sb, int start, int end, int t) { using cudf::detail::warp_size; - constexpr int num_warps = decode_block_size / warp_size; + constexpr int num_warps = block_size / warp_size; constexpr int max_batch_size = num_warps * warp_size; - auto const t = threadIdx.x; - PageNestingDecodeInfo* nesting_info_base = s->nesting_info; int const dtype = s->col.physical_type; auto const data_len = thrust::distance(s->data_start, s->data_end); @@ -307,266 +186,293 @@ __device__ inline void gpuDecodeSplitValues(page_state_s* s, } } -// is the page marked nullable or not -__device__ inline bool is_nullable(page_state_s* s) -{ - auto const lvl = level_type::DEFINITION; - auto const max_def_level = s->col.max_level[lvl]; - return max_def_level > 0; -} +template +struct decode_fixed_width_split_values_func { + __device__ inline void operator()(page_state_s* s, state_buf* const sb, int start, int end, int t) + { + gpuDecodeFixedWidthSplitValues(s, sb, start, end, t); + } +}; -// for a nullable page, check to see if it could have nulls -__device__ inline bool has_nulls(page_state_s* s) +template +static __device__ int gpuUpdateValidityAndRowIndicesNested( + int32_t target_value_count, page_state_s* s, state_buf* sb, level_t const* const def, int t) { - auto const lvl = level_type::DEFINITION; - auto const init_run = s->initial_rle_run[lvl]; - // literal runs, lets assume they could hold nulls - if (is_literal_run(init_run)) { return true; } - - // repeated run with number of items in the run not equal - // to the rows in the page, assume that means we could have nulls - if (s->page.num_input_values != (init_run >> 1)) { return true; } - - auto const lvl_bits = s->col.level_bits[lvl]; - auto const run_val = lvl_bits == 0 ? 0 : s->initial_rle_value[lvl]; - - // the encoded repeated value isn't valid, we have (all) nulls - return run_val != s->col.max_level[lvl]; -} + constexpr int num_warps = decode_block_size / cudf::detail::warp_size; + constexpr int max_batch_size = num_warps * cudf::detail::warp_size; -/** - * @brief Kernel for computing fixed width non dictionary column data stored in the pages - * - * This function will write the page data and the page data's validity to the - * output specified in the page's column chunk. If necessary, additional - * conversion will be performed to translate from the Parquet datatype to - * desired output datatype. - * - * @param pages List of pages - * @param chunks List of column chunks - * @param min_row Row index to start reading at - * @param num_rows Maximum number of rows to read - * @param error_code Error code to set if an error is encountered - */ -template -CUDF_KERNEL void __launch_bounds__(decode_block_size) - gpuDecodePageDataFixed(PageInfo* pages, - device_span chunks, - size_t min_row, - size_t num_rows, - kernel_error::pointer error_code) -{ - __shared__ __align__(16) page_state_s state_g; - __shared__ __align__(16) page_state_buffers_s // unused in this kernel - state_buffers; + // how many (input) values we've processed in the page so far + int value_count = s->input_value_count; - page_state_s* const s = &state_g; - auto* const sb = &state_buffers; - int const page_idx = blockIdx.x; - int const t = threadIdx.x; - PageInfo* pp = &pages[page_idx]; + // cap by last row so that we don't process any rows past what we want to output. + int const first_row = s->first_row; + int const last_row = first_row + s->num_rows; + int const capped_target_value_count = min(target_value_count, last_row); - if (!(BitAnd(pages[page_idx].kernel_mask, decode_kernel_mask::FIXED_WIDTH_NO_DICT))) { return; } + int const row_index_lower_bound = s->row_index_lower_bound; - // must come after the kernel mask check - [[maybe_unused]] null_count_back_copier _{s, t}; + int const max_depth = s->col.max_nesting_depth - 1; + __syncthreads(); - if (!setupLocalPageInfo(s, - pp, - chunks, - min_row, - num_rows, - mask_filter{decode_kernel_mask::FIXED_WIDTH_NO_DICT}, - page_processing_stage::DECODE)) { - return; - } + while (value_count < capped_target_value_count) { + int const batch_size = min(max_batch_size, capped_target_value_count - value_count); - // the level stream decoders - __shared__ rle_run def_runs[rle_run_buffer_size]; - rle_stream def_decoder{def_runs}; + // definition level. only need to process for nullable columns + int d = 0; + if constexpr (nullable) { + if (def) { + d = t < batch_size + ? static_cast(def[rolling_index(value_count + t)]) + : -1; + } else { + d = t < batch_size ? 1 : -1; + } + } - // if we have no work to do (eg, in a skip_rows/num_rows case) in this page. - if (s->num_rows == 0) { return; } + int const thread_value_count = t + 1; + int const block_value_count = batch_size; - bool const nullable = is_nullable(s); - bool const nullable_with_nulls = nullable && has_nulls(s); + // compute our row index, whether we're in row bounds, and validity + int const row_index = (thread_value_count + value_count) - 1; + int const in_row_bounds = (row_index >= row_index_lower_bound) && (row_index < last_row); + int const in_write_row_bounds = ballot(row_index >= first_row && row_index < last_row); + int const write_start = __ffs(in_write_row_bounds) - 1; // first bit in the warp to store + + // iterate by depth + for (int d_idx = 0; d_idx <= max_depth; d_idx++) { + auto& ni = s->nesting_info[d_idx]; + + int is_valid; + if constexpr (nullable) { + is_valid = ((d >= ni.max_def_level) && in_row_bounds) ? 1 : 0; + } else { + is_valid = in_row_bounds; + } - // initialize the stream decoders (requires values computed in setupLocalPageInfo) - level_t* const def = reinterpret_cast(pp->lvl_decode_buf[level_type::DEFINITION]); - if (nullable_with_nulls) { - def_decoder.init(s->col.level_bits[level_type::DEFINITION], - s->abs_lvl_start[level_type::DEFINITION], - s->abs_lvl_end[level_type::DEFINITION], - def, - s->page.num_input_values); - } - __syncthreads(); + // thread and block validity count + int thread_valid_count, block_valid_count; + if constexpr (nullable) { + using block_scan = cub::BlockScan; + __shared__ typename block_scan::TempStorage scan_storage; + block_scan(scan_storage).InclusiveSum(is_valid, thread_valid_count, block_valid_count); + __syncthreads(); + + // validity is processed per-warp + // + // nested schemas always read and write to the same bounds (that is, read and write + // positions are already pre-bounded by first_row/num_rows). flat schemas will start reading + // at the first value, even if that is before first_row, because we cannot trivially jump to + // the correct position to start reading. since we are about to write the validity vector + // here we need to adjust our computed mask to take into account the write row bounds. + int warp_null_count = 0; + if (write_start >= 0 && ni.valid_map != nullptr) { + int const valid_map_offset = ni.valid_map_offset; + uint32_t const warp_validity_mask = ballot(is_valid); + // lane 0 from each warp writes out validity + if ((t % cudf::detail::warp_size) == 0) { + int const vindex = + (value_count + thread_value_count) - 1; // absolute input value index + int const bit_offset = (valid_map_offset + vindex + write_start) - + first_row; // absolute bit offset into the output validity map + int const write_end = cudf::detail::warp_size - + __clz(in_write_row_bounds); // last bit in the warp to store + int const bit_count = write_end - write_start; + warp_null_count = bit_count - __popc(warp_validity_mask >> write_start); + + store_validity(bit_offset, ni.valid_map, warp_validity_mask >> write_start, bit_count); + } + } - // We use two counters in the loop below: processed_count and valid_count. - // - processed_count: number of rows out of num_input_values that we have decoded so far. - // the definition stream returns the number of total rows it has processed in each call - // to decode_next and we accumulate in process_count. - // - valid_count: number of non-null rows we have decoded so far. In each iteration of the - // loop below, we look at the number of valid items (which could be all for non-nullable), - // and valid_count is that running count. - int processed_count = 0; - int valid_count = 0; - // the core loop. decode batches of level stream data using rle_stream objects - // and pass the results to gpuDecodeValues - while (s->error == 0 && processed_count < s->page.num_input_values) { - int next_valid_count; + // sum null counts. we have to do it this way instead of just incrementing by (value_count - + // valid_count) because valid_count also includes rows that potentially start before our row + // bounds. if we could come up with a way to clean that up, we could remove this and just + // compute it directly at the end of the kernel. + size_type const block_null_count = + cudf::detail::single_lane_block_sum_reduce(warp_null_count); + if (t == 0) { ni.null_count += block_null_count; } + } + // trivial for non-nullable columns + else { + thread_valid_count = thread_value_count; + block_valid_count = block_value_count; + } - // only need to process definition levels if the column has nulls - if (nullable_with_nulls) { - processed_count += def_decoder.decode_next(t); - __syncthreads(); + // if this is valid and we're at the leaf, output dst_pos + __syncthreads(); // handle modification of ni.value_count from below + if (is_valid && d_idx == max_depth) { + // for non-list types, the value count is always the same across + int const dst_pos = (value_count + thread_value_count) - 1; + int const src_pos = (ni.valid_count + thread_valid_count) - 1; + sb->nz_idx[rolling_index(src_pos)] = dst_pos; + } + __syncthreads(); // handle modification of ni.value_count from below - next_valid_count = - gpuUpdateValidityOffsetsAndRowIndicesFlat(processed_count, s, sb, def, t); + // update stuff + if (t == 0) { ni.valid_count += block_valid_count; } } - // if we wanted to split off the skip_rows/num_rows case into a separate kernel, we could skip - // this function call entirely since all it will ever generate is a mapping of (i -> i) for - // nz_idx. gpuDecodeValues would be the only work that happens. - else { - processed_count += min(rolling_buf_size, s->page.num_input_values - processed_count); - next_valid_count = gpuUpdateValidityOffsetsAndRowIndicesFlat( - processed_count, s, sb, nullptr, t); - } - __syncthreads(); - // decode the values themselves - gpuDecodeValues(s, sb, valid_count, next_valid_count, t); - __syncthreads(); + value_count += block_value_count; + } - valid_count = next_valid_count; + if (t == 0) { + // update valid value count for decoding and total # of values we've processed + s->nz_count = s->nesting_info[max_depth].valid_count; + s->input_value_count = value_count; + s->input_row_count = value_count; } - if (t == 0 and s->error != 0) { set_error(s->error, error_code); } + + __syncthreads(); + return s->nesting_info[max_depth].valid_count; } -/** - * @brief Kernel for computing fixed width dictionary column data stored in the pages - * - * This function will write the page data and the page data's validity to the - * output specified in the page's column chunk. If necessary, additional - * conversion will be performed to translate from the Parquet datatype to - * desired output datatype. - * - * @param pages List of pages - * @param chunks List of column chunks - * @param min_row Row index to start reading at - * @param num_rows Maximum number of rows to read - * @param error_code Error code to set if an error is encountered - */ -template -CUDF_KERNEL void __launch_bounds__(decode_block_size) - gpuDecodePageDataFixedDict(PageInfo* pages, - device_span chunks, - size_t min_row, - size_t num_rows, - kernel_error::pointer error_code) +template +static __device__ int gpuUpdateValidityAndRowIndicesFlat( + int32_t target_value_count, page_state_s* s, state_buf* sb, level_t const* const def, int t) { - __shared__ __align__(16) page_state_s state_g; - __shared__ __align__(16) page_state_buffers_s // unused in this kernel - state_buffers; - - page_state_s* const s = &state_g; - auto* const sb = &state_buffers; - int const page_idx = blockIdx.x; - int const t = threadIdx.x; - PageInfo* pp = &pages[page_idx]; + constexpr int num_warps = decode_block_size / cudf::detail::warp_size; + constexpr int max_batch_size = num_warps * cudf::detail::warp_size; - if (!(BitAnd(pages[page_idx].kernel_mask, decode_kernel_mask::FIXED_WIDTH_DICT))) { return; } + auto& ni = s->nesting_info[0]; - // must come after the kernel mask check - [[maybe_unused]] null_count_back_copier _{s, t}; + // how many (input) values we've processed in the page so far + int value_count = s->input_value_count; + int valid_count = ni.valid_count; - if (!setupLocalPageInfo(s, - pp, - chunks, - min_row, - num_rows, - mask_filter{decode_kernel_mask::FIXED_WIDTH_DICT}, - page_processing_stage::DECODE)) { - return; - } + // cap by last row so that we don't process any rows past what we want to output. + int const first_row = s->first_row; + int const last_row = first_row + s->num_rows; + int const capped_target_value_count = min(target_value_count, last_row); - __shared__ rle_run def_runs[rle_run_buffer_size]; - rle_stream def_decoder{def_runs}; + int const valid_map_offset = ni.valid_map_offset; + int const row_index_lower_bound = s->row_index_lower_bound; - __shared__ rle_run dict_runs[rle_run_buffer_size]; - rle_stream dict_stream{dict_runs}; + __syncthreads(); - // if we have no work to do (eg, in a skip_rows/num_rows case) in this page. - if (s->num_rows == 0) { return; } + while (value_count < capped_target_value_count) { + int const batch_size = min(max_batch_size, capped_target_value_count - value_count); - bool const nullable = is_nullable(s); - bool const nullable_with_nulls = nullable && has_nulls(s); + // definition level. only need to process for nullable columns + int d = 0; + if constexpr (nullable) { + if (def) { + d = t < batch_size + ? static_cast(def[rolling_index(value_count + t)]) + : -1; + } else { + d = t < batch_size ? 1 : -1; + } + } - // initialize the stream decoders (requires values computed in setupLocalPageInfo) - level_t* const def = reinterpret_cast(pp->lvl_decode_buf[level_type::DEFINITION]); - if (nullable_with_nulls) { - def_decoder.init(s->col.level_bits[level_type::DEFINITION], - s->abs_lvl_start[level_type::DEFINITION], - s->abs_lvl_end[level_type::DEFINITION], - def, - s->page.num_input_values); - } + int const thread_value_count = t + 1; + int const block_value_count = batch_size; - dict_stream.init( - s->dict_bits, s->data_start, s->data_end, sb->dict_idx, s->page.num_input_values); - __syncthreads(); + // compute our row index, whether we're in row bounds, and validity + int const row_index = (thread_value_count + value_count) - 1; + int const in_row_bounds = (row_index >= row_index_lower_bound) && (row_index < last_row); + int is_valid; + if constexpr (nullable) { + is_valid = ((d > 0) && in_row_bounds) ? 1 : 0; + } else { + is_valid = in_row_bounds; + } - // We use two counters in the loop below: processed_count and valid_count. - // - processed_count: number of rows out of num_input_values that we have decoded so far. - // the definition stream returns the number of total rows it has processed in each call - // to decode_next and we accumulate in process_count. - // - valid_count: number of non-null rows we have decoded so far. In each iteration of the - // loop below, we look at the number of valid items (which could be all for non-nullable), - // and valid_count is that running count. - int processed_count = 0; - int valid_count = 0; + // thread and block validity count + int thread_valid_count, block_valid_count; + if constexpr (nullable) { + using block_scan = cub::BlockScan; + __shared__ typename block_scan::TempStorage scan_storage; + block_scan(scan_storage).InclusiveSum(is_valid, thread_valid_count, block_valid_count); + __syncthreads(); - // the core loop. decode batches of level stream data using rle_stream objects - // and pass the results to gpuDecodeValues - while (s->error == 0 && processed_count < s->page.num_input_values) { - int next_valid_count; + // validity is processed per-warp + // + // nested schemas always read and write to the same bounds (that is, read and write + // positions are already pre-bounded by first_row/num_rows). flat schemas will start reading + // at the first value, even if that is before first_row, because we cannot trivially jump to + // the correct position to start reading. since we are about to write the validity vector + // here we need to adjust our computed mask to take into account the write row bounds. + int const in_write_row_bounds = ballot(row_index >= first_row && row_index < last_row); + int const write_start = __ffs(in_write_row_bounds) - 1; // first bit in the warp to store + int warp_null_count = 0; + if (write_start >= 0) { + uint32_t const warp_validity_mask = ballot(is_valid); + // lane 0 from each warp writes out validity + if ((t % cudf::detail::warp_size) == 0) { + int const vindex = (value_count + thread_value_count) - 1; // absolute input value index + int const bit_offset = (valid_map_offset + vindex + write_start) - + first_row; // absolute bit offset into the output validity map + int const write_end = + cudf::detail::warp_size - __clz(in_write_row_bounds); // last bit in the warp to store + int const bit_count = write_end - write_start; + warp_null_count = bit_count - __popc(warp_validity_mask >> write_start); - // only need to process definition levels if the column has nulls - if (nullable_with_nulls) { - processed_count += def_decoder.decode_next(t); - __syncthreads(); + store_validity(bit_offset, ni.valid_map, warp_validity_mask >> write_start, bit_count); + } + } - // count of valid items in this batch - next_valid_count = - gpuUpdateValidityOffsetsAndRowIndicesFlat(processed_count, s, sb, def, t); + // sum null counts. we have to do it this way instead of just incrementing by (value_count - + // valid_count) because valid_count also includes rows that potentially start before our row + // bounds. if we could come up with a way to clean that up, we could remove this and just + // compute it directly at the end of the kernel. + size_type const block_null_count = + cudf::detail::single_lane_block_sum_reduce(warp_null_count); + if (t == 0) { ni.null_count += block_null_count; } } - // if we wanted to split off the skip_rows/num_rows case into a separate kernel, we could skip - // this function call entirely since all it will ever generate is a mapping of (i -> i) for - // nz_idx. gpuDecodeValues would be the only work that happens. + // trivial for non-nullable columns else { - processed_count += min(rolling_buf_size, s->page.num_input_values - processed_count); - next_valid_count = gpuUpdateValidityOffsetsAndRowIndicesFlat( - processed_count, s, sb, nullptr, t); + thread_valid_count = thread_value_count; + block_valid_count = block_value_count; } - __syncthreads(); - // We want to limit the number of dictionary items we decode, that correspond to - // the rows we have processed in this iteration that are valid. - // We know the number of valid rows to process with: next_valid_count - valid_count. - dict_stream.decode_next(t, next_valid_count - valid_count); - __syncthreads(); + // output offset + if (is_valid) { + int const dst_pos = (value_count + thread_value_count) - 1; + int const src_pos = (valid_count + thread_valid_count) - 1; + sb->nz_idx[rolling_index(src_pos)] = dst_pos; + } - // decode the values themselves - gpuDecodeValues(s, sb, valid_count, next_valid_count, t); - __syncthreads(); + // update stuff + value_count += block_value_count; + valid_count += block_valid_count; + } - valid_count = next_valid_count; + if (t == 0) { + // update valid value count for decoding and total # of values we've processed + ni.valid_count = valid_count; + ni.value_count = value_count; // TODO: remove? this is unused in the non-list path + s->nz_count = valid_count; + s->input_value_count = value_count; + s->input_row_count = value_count; } - if (t == 0 and s->error != 0) { set_error(s->error, error_code); } + + return valid_count; +} + +// is the page marked nullable or not +__device__ inline bool is_nullable(page_state_s* s) +{ + auto const lvl = level_type::DEFINITION; + auto const max_def_level = s->col.max_level[lvl]; + return max_def_level > 0; +} + +// for a nullable page, check to see if it could have nulls +__device__ inline bool maybe_has_nulls(page_state_s* s) +{ + auto const lvl = level_type::DEFINITION; + auto const init_run = s->initial_rle_run[lvl]; + // literal runs, lets assume they could hold nulls + if (is_literal_run(init_run)) { return true; } + + // repeated run with number of items in the run not equal + // to the rows in the page, assume that means we could have nulls + if (s->page.num_input_values != (init_run >> 1)) { return true; } + + auto const lvl_bits = s->col.level_bits[lvl]; + auto const run_val = lvl_bits == 0 ? 0 : s->initial_rle_value[lvl]; + + // the encoded repeated value isn't valid, we have (all) nulls + return run_val != s->col.max_level[lvl]; } /** @@ -583,19 +489,28 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size) * @param num_rows Maximum number of rows to read * @param error_code Error code to set if an error is encountered */ -template -CUDF_KERNEL void __launch_bounds__(decode_block_size) - gpuDecodeSplitPageDataFlat(PageInfo* pages, - device_span chunks, - size_t min_row, - size_t num_rows, - kernel_error::pointer error_code) +template + typename DecodeValuesFunc> +CUDF_KERNEL void __launch_bounds__(decode_block_size_t) + gpuDecodePageDataGeneric(PageInfo* pages, + device_span chunks, + size_t min_row, + size_t num_rows, + kernel_error::pointer error_code) { + constexpr int rolling_buf_size = decode_block_size_t * 2; + constexpr int rle_run_buffer_size = rle_stream_required_run_buffer_size(); + __shared__ __align__(16) page_state_s state_g; - __shared__ __align__(16) page_state_buffers_s // unused in this kernel - state_buffers; + using state_buf_t = page_state_buffers_s; + __shared__ __align__(16) state_buf_t state_buffers; page_state_s* const s = &state_g; auto* const sb = &state_buffers; @@ -603,9 +518,7 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size) int const t = threadIdx.x; PageInfo* pp = &pages[page_idx]; - if (!(BitAnd(pages[page_idx].kernel_mask, decode_kernel_mask::BYTE_STREAM_SPLIT_FLAT))) { - return; - } + if (!(BitAnd(pages[page_idx].kernel_mask, kernel_mask_t))) { return; } // must come after the kernel mask check [[maybe_unused]] null_count_back_copier _{s, t}; @@ -615,30 +528,70 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size) chunks, min_row, num_rows, - mask_filter{decode_kernel_mask::BYTE_STREAM_SPLIT_FLAT}, + mask_filter{kernel_mask_t}, page_processing_stage::DECODE)) { return; } - // the level stream decoders - __shared__ rle_run def_runs[rle_run_buffer_size]; - rle_stream def_decoder{def_runs}; - // if we have no work to do (eg, in a skip_rows/num_rows case) in this page. if (s->num_rows == 0) { return; } - bool const nullable = is_nullable(s); - bool const nullable_with_nulls = nullable && has_nulls(s); + DecodeValuesFunc decode_values; + + bool const nullable = is_nullable(s); + bool const should_process_nulls = nullable && maybe_has_nulls(s); + + // shared buffer. all shared memory is suballocated out of here + // constexpr int shared_rep_size = has_lists_t ? cudf::util::round_up_unsafe(rle_run_buffer_size * + // sizeof(rle_run), size_t{16}) : 0; + constexpr int shared_dict_size = + has_dict_t + ? cudf::util::round_up_unsafe(rle_run_buffer_size * sizeof(rle_run), size_t{16}) + : 0; + constexpr int shared_def_size = + cudf::util::round_up_unsafe(rle_run_buffer_size * sizeof(rle_run), size_t{16}); + constexpr int shared_buf_size = /*shared_rep_size +*/ shared_dict_size + shared_def_size; + __shared__ __align__(16) uint8_t shared_buf[shared_buf_size]; + + // setup all shared memory buffers + int shared_offset = 0; + /* + rle_run *rep_runs = reinterpret_cast*>(shared_buf + shared_offset); + if constexpr (has_lists_t){ + shared_offset += shared_rep_size; + } + */ + rle_run* dict_runs = reinterpret_cast*>(shared_buf + shared_offset); + if constexpr (has_dict_t) { shared_offset += shared_dict_size; } + rle_run* def_runs = reinterpret_cast*>(shared_buf + shared_offset); // initialize the stream decoders (requires values computed in setupLocalPageInfo) + rle_stream def_decoder{def_runs}; level_t* const def = reinterpret_cast(pp->lvl_decode_buf[level_type::DEFINITION]); - if (nullable_with_nulls) { + if (should_process_nulls) { def_decoder.init(s->col.level_bits[level_type::DEFINITION], s->abs_lvl_start[level_type::DEFINITION], s->abs_lvl_end[level_type::DEFINITION], def, s->page.num_input_values); } + /* + rle_stream rep_decoder{rep_runs}; + level_t* const rep = reinterpret_cast(pp->lvl_decode_buf[level_type::REPETITION]); + if constexpr(has_lists_t){ + rep_decoder.init(s->col.level_bits[level_type::REPETITION], + s->abs_lvl_start[level_type::REPETITION], + s->abs_lvl_end[level_type::REPETITION], + rep, + s->page.num_input_values); + } + */ + + rle_stream dict_stream{dict_runs}; + if constexpr (has_dict_t) { + dict_stream.init( + s->dict_bits, s->data_start, s->data_end, sb->dict_idx, s->page.num_input_values); + } __syncthreads(); // We use two counters in the loop below: processed_count and valid_count. @@ -655,26 +608,47 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size) while (s->error == 0 && processed_count < s->page.num_input_values) { int next_valid_count; - // only need to process definition levels if the column has nulls - if (nullable_with_nulls) { + // only need to process definition levels if this is a nullable column + if (should_process_nulls) { processed_count += def_decoder.decode_next(t); __syncthreads(); - next_valid_count = - gpuUpdateValidityOffsetsAndRowIndicesFlat(processed_count, s, sb, def, t); + if constexpr (has_nesting_t) { + next_valid_count = gpuUpdateValidityAndRowIndicesNested( + processed_count, s, sb, def, t); + } else { + next_valid_count = gpuUpdateValidityAndRowIndicesFlat( + processed_count, s, sb, def, t); + } } // if we wanted to split off the skip_rows/num_rows case into a separate kernel, we could skip // this function call entirely since all it will ever generate is a mapping of (i -> i) for - // nz_idx. gpuDecodeValues would be the only work that happens. + // nz_idx. gpuDecodeFixedWidthValues would be the only work that happens. else { processed_count += min(rolling_buf_size, s->page.num_input_values - processed_count); - next_valid_count = gpuUpdateValidityOffsetsAndRowIndicesFlat( - processed_count, s, sb, nullptr, t); + + if constexpr (has_nesting_t) { + next_valid_count = + gpuUpdateValidityAndRowIndicesNested( + processed_count, s, sb, nullptr, t); + } else { + next_valid_count = gpuUpdateValidityAndRowIndicesFlat( + processed_count, s, sb, nullptr, t); + } } __syncthreads(); + // if we have dictionary data + if constexpr (has_dict_t) { + // We want to limit the number of dictionary items we decode, that correspond to + // the rows we have processed in this iteration that are valid. + // We know the number of valid rows to process with: next_valid_count - valid_count. + dict_stream.decode_next(t, next_valid_count - valid_count); + __syncthreads(); + } + // decode the values themselves - gpuDecodeSplitValues(s, sb, valid_count, next_valid_count); + decode_values(s, sb, valid_count, next_valid_count, t); __syncthreads(); valid_count = next_valid_count; @@ -689,18 +663,55 @@ void __host__ DecodePageDataFixed(cudf::detail::hostdevice_span pages, size_t num_rows, size_t min_row, int level_type_size, + bool has_nesting, kernel_error::pointer error_code, rmm::cuda_stream_view stream) { + constexpr int decode_block_size = 128; + dim3 dim_block(decode_block_size, 1); dim3 dim_grid(pages.size(), 1); // 1 threadblock per page if (level_type_size == 1) { - gpuDecodePageDataFixed<<>>( - pages.device_ptr(), chunks, min_row, num_rows, error_code); + if (has_nesting) { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } else { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } } else { - gpuDecodePageDataFixed<<>>( - pages.device_ptr(), chunks, min_row, num_rows, error_code); + if (has_nesting) { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } else { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } } } @@ -709,40 +720,113 @@ void __host__ DecodePageDataFixedDict(cudf::detail::hostdevice_span pa size_t num_rows, size_t min_row, int level_type_size, + bool has_nesting, kernel_error::pointer error_code, rmm::cuda_stream_view stream) { - // dim3 dim_block(decode_block_size, 1); // decode_block_size = 128 threads per block - // 1 full warp, and 1 warp of 1 thread + constexpr int decode_block_size = 128; + dim3 dim_block(decode_block_size, 1); // decode_block_size = 128 threads per block dim3 dim_grid(pages.size(), 1); // 1 thread block per page => # blocks if (level_type_size == 1) { - gpuDecodePageDataFixedDict<<>>( - pages.device_ptr(), chunks, min_row, num_rows, error_code); + if (has_nesting) { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } else { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } } else { - gpuDecodePageDataFixedDict<<>>( - pages.device_ptr(), chunks, min_row, num_rows, error_code); + if (has_nesting) { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } else { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } } } -void __host__ DecodeSplitPageDataFlat(cudf::detail::hostdevice_span pages, - cudf::detail::hostdevice_span chunks, - size_t num_rows, - size_t min_row, - int level_type_size, - kernel_error::pointer error_code, - rmm::cuda_stream_view stream) +void __host__ +DecodeSplitPageFixedWidthData(cudf::detail::hostdevice_span pages, + cudf::detail::hostdevice_span chunks, + size_t num_rows, + size_t min_row, + int level_type_size, + bool has_nesting, + kernel_error::pointer error_code, + rmm::cuda_stream_view stream) { + constexpr int decode_block_size = 128; + dim3 dim_block(decode_block_size, 1); // decode_block_size = 128 threads per block dim3 dim_grid(pages.size(), 1); // 1 thread block per page => # blocks if (level_type_size == 1) { - gpuDecodeSplitPageDataFlat<<>>( - pages.device_ptr(), chunks, min_row, num_rows, error_code); + if (has_nesting) { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } else { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } } else { - gpuDecodeSplitPageDataFlat<<>>( - pages.device_ptr(), chunks, min_row, num_rows, error_code); + if (has_nesting) { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } else { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } } } diff --git a/cpp/src/io/parquet/page_hdr.cu b/cpp/src/io/parquet/page_hdr.cu index cf0dd85e490..d604642be54 100644 --- a/cpp/src/io/parquet/page_hdr.cu +++ b/cpp/src/io/parquet/page_hdr.cu @@ -145,6 +145,11 @@ __device__ inline bool is_nested(ColumnChunkDesc const& chunk) return chunk.max_nesting_depth > 1; } +__device__ inline bool is_list(ColumnChunkDesc const& chunk) +{ + return chunk.max_level[level_type::REPETITION] > 0; +} + __device__ inline bool is_byte_array(ColumnChunkDesc const& chunk) { return chunk.physical_type == BYTE_ARRAY; @@ -178,14 +183,17 @@ __device__ decode_kernel_mask kernel_mask_for_page(PageInfo const& page, return decode_kernel_mask::STRING; } - if (!is_nested(chunk) && !is_byte_array(chunk) && !is_boolean(chunk)) { + if (!is_list(chunk) && !is_byte_array(chunk) && !is_boolean(chunk)) { if (page.encoding == Encoding::PLAIN) { - return decode_kernel_mask::FIXED_WIDTH_NO_DICT; + return is_nested(chunk) ? decode_kernel_mask::FIXED_WIDTH_NO_DICT_NESTED + : decode_kernel_mask::FIXED_WIDTH_NO_DICT; } else if (page.encoding == Encoding::PLAIN_DICTIONARY || page.encoding == Encoding::RLE_DICTIONARY) { - return decode_kernel_mask::FIXED_WIDTH_DICT; + return is_nested(chunk) ? decode_kernel_mask::FIXED_WIDTH_DICT_NESTED + : decode_kernel_mask::FIXED_WIDTH_DICT; } else if (page.encoding == Encoding::BYTE_STREAM_SPLIT) { - return decode_kernel_mask::BYTE_STREAM_SPLIT_FLAT; + return is_nested(chunk) ? decode_kernel_mask::BYTE_STREAM_SPLIT_FIXED_WIDTH_NESTED + : decode_kernel_mask::BYTE_STREAM_SPLIT_FIXED_WIDTH_FLAT; } } diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index d82c6f0de59..efc1f5ebab1 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -207,16 +207,20 @@ enum level_type { * Used to control which decode kernels to run. */ enum class decode_kernel_mask { - NONE = 0, - GENERAL = (1 << 0), // Run catch-all decode kernel - STRING = (1 << 1), // Run decode kernel for string data - DELTA_BINARY = (1 << 2), // Run decode kernel for DELTA_BINARY_PACKED data - DELTA_BYTE_ARRAY = (1 << 3), // Run decode kernel for DELTA_BYTE_ARRAY encoded data - DELTA_LENGTH_BA = (1 << 4), // Run decode kernel for DELTA_LENGTH_BYTE_ARRAY encoded data - FIXED_WIDTH_NO_DICT = (1 << 5), // Run decode kernel for fixed width non-dictionary pages - FIXED_WIDTH_DICT = (1 << 6), // Run decode kernel for fixed width dictionary pages - BYTE_STREAM_SPLIT = (1 << 7), // Run decode kernel for BYTE_STREAM_SPLIT encoded data - BYTE_STREAM_SPLIT_FLAT = (1 << 8), // Same as above but with a flat schema + NONE = 0, + GENERAL = (1 << 0), // Run catch-all decode kernel + STRING = (1 << 1), // Run decode kernel for string data + DELTA_BINARY = (1 << 2), // Run decode kernel for DELTA_BINARY_PACKED data + DELTA_BYTE_ARRAY = (1 << 3), // Run decode kernel for DELTA_BYTE_ARRAY encoded data + DELTA_LENGTH_BA = (1 << 4), // Run decode kernel for DELTA_LENGTH_BYTE_ARRAY encoded data + FIXED_WIDTH_NO_DICT = (1 << 5), // Run decode kernel for fixed width non-dictionary pages + FIXED_WIDTH_DICT = (1 << 6), // Run decode kernel for fixed width dictionary pages + BYTE_STREAM_SPLIT = (1 << 7), // Run decode kernel for BYTE_STREAM_SPLIT encoded data + BYTE_STREAM_SPLIT_FIXED_WIDTH_FLAT = (1 << 8), // Same as above but for flat, fixed-width data + BYTE_STREAM_SPLIT_FIXED_WIDTH_NESTED = + (1 << 9), // Same as above but for nested, fixed-width data + FIXED_WIDTH_NO_DICT_NESTED = (1 << 10), // Run decode kernel for fixed width non-dictionary pages + FIXED_WIDTH_DICT_NESTED = (1 << 11), // Run decode kernel for fixed width dictionary pages }; // mask representing all the ways in which a string can be encoded @@ -888,6 +892,7 @@ void DecodeDeltaLengthByteArray(cudf::detail::hostdevice_span pages, * @param[in] num_rows Total number of rows to read * @param[in] min_row Minimum number of rows to read * @param[in] level_type_size Size in bytes of the type for level decoding + * @param[in] has_nesting Whether or not the data contains nested (but not list) data. * @param[out] error_code Error code for kernel failures * @param[in] stream CUDA stream to use */ @@ -896,6 +901,7 @@ void DecodePageDataFixed(cudf::detail::hostdevice_span pages, std::size_t num_rows, size_t min_row, int level_type_size, + bool has_nesting, kernel_error::pointer error_code, rmm::cuda_stream_view stream); @@ -910,6 +916,7 @@ void DecodePageDataFixed(cudf::detail::hostdevice_span pages, * @param[in] num_rows Total number of rows to read * @param[in] min_row Minimum number of rows to read * @param[in] level_type_size Size in bytes of the type for level decoding + * @param[in] has_nesting Whether or not the data contains nested (but not list) data. * @param[out] error_code Error code for kernel failures * @param[in] stream CUDA stream to use */ @@ -918,11 +925,12 @@ void DecodePageDataFixedDict(cudf::detail::hostdevice_span pages, std::size_t num_rows, size_t min_row, int level_type_size, + bool has_nesting, kernel_error::pointer error_code, rmm::cuda_stream_view stream); /** - * @brief Launches kernel for reading dictionary fixed width column data stored in the pages + * @brief Launches kernel for reading fixed width column data stored in the pages * * The page data will be written to the output pointed to in the page's * associated column chunk. @@ -932,16 +940,18 @@ void DecodePageDataFixedDict(cudf::detail::hostdevice_span pages, * @param[in] num_rows Total number of rows to read * @param[in] min_row Minimum number of rows to read * @param[in] level_type_size Size in bytes of the type for level decoding + * @param[in] has_nesting Whether or not the data contains nested (but not list) data. * @param[out] error_code Error code for kernel failures * @param[in] stream CUDA stream to use */ -void DecodeSplitPageDataFlat(cudf::detail::hostdevice_span pages, - cudf::detail::hostdevice_span chunks, - std::size_t num_rows, - size_t min_row, - int level_type_size, - kernel_error::pointer error_code, - rmm::cuda_stream_view stream); +void DecodeSplitPageFixedWidthData(cudf::detail::hostdevice_span pages, + cudf::detail::hostdevice_span chunks, + std::size_t num_rows, + size_t min_row, + int level_type_size, + bool has_nesting, + kernel_error::pointer error_code, + rmm::cuda_stream_view stream); /** * @brief Launches kernel for initializing encoder row group fragments diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp index 1bd2fae281c..f705f6626e7 100644 --- a/cpp/src/io/parquet/reader_impl.cpp +++ b/cpp/src/io/parquet/reader_impl.cpp @@ -267,14 +267,27 @@ void reader::impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num } // launch byte stream split decoder - if (BitAnd(kernel_mask, decode_kernel_mask::BYTE_STREAM_SPLIT_FLAT) != 0) { - DecodeSplitPageDataFlat(subpass.pages, - pass.chunks, - num_rows, - skip_rows, - level_type_size, - error_code.data(), - streams[s_idx++]); + if (BitAnd(kernel_mask, decode_kernel_mask::BYTE_STREAM_SPLIT_FIXED_WIDTH_FLAT) != 0) { + DecodeSplitPageFixedWidthData(subpass.pages, + pass.chunks, + num_rows, + skip_rows, + level_type_size, + false, + error_code.data(), + streams[s_idx++]); + } + + // launch byte stream split decoder, for nested columns + if (BitAnd(kernel_mask, decode_kernel_mask::BYTE_STREAM_SPLIT_FIXED_WIDTH_NESTED) != 0) { + DecodeSplitPageFixedWidthData(subpass.pages, + pass.chunks, + num_rows, + skip_rows, + level_type_size, + true, + error_code.data(), + streams[s_idx++]); } // launch byte stream split decoder @@ -288,22 +301,50 @@ void reader::impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num streams[s_idx++]); } + // launch fixed width type decoder if (BitAnd(kernel_mask, decode_kernel_mask::FIXED_WIDTH_NO_DICT) != 0) { DecodePageDataFixed(subpass.pages, pass.chunks, num_rows, skip_rows, level_type_size, + false, + error_code.data(), + streams[s_idx++]); + } + + // launch fixed width type decoder, for nested columns + if (BitAnd(kernel_mask, decode_kernel_mask::FIXED_WIDTH_NO_DICT_NESTED) != 0) { + DecodePageDataFixed(subpass.pages, + pass.chunks, + num_rows, + skip_rows, + level_type_size, + true, error_code.data(), streams[s_idx++]); } + // launch fixed width type decoder with dictionaries if (BitAnd(kernel_mask, decode_kernel_mask::FIXED_WIDTH_DICT) != 0) { DecodePageDataFixedDict(subpass.pages, pass.chunks, num_rows, skip_rows, level_type_size, + false, + error_code.data(), + streams[s_idx++]); + } + + // launch fixed width type decoder with dictionaries, for nested columns + if (BitAnd(kernel_mask, decode_kernel_mask::FIXED_WIDTH_DICT_NESTED) != 0) { + DecodePageDataFixedDict(subpass.pages, + pass.chunks, + num_rows, + skip_rows, + level_type_size, + true, error_code.data(), streams[s_idx++]); } diff --git a/cpp/src/join/conditional_join.cu b/cpp/src/join/conditional_join.cu index 97a06d5a923..d4ef2747c9d 100644 --- a/cpp/src/join/conditional_join.cu +++ b/cpp/src/join/conditional_join.cu @@ -95,7 +95,7 @@ std::unique_ptr> conditional_join_anti_semi( join_size = size.value(stream); } - rmm::device_scalar write_index(0, stream); + rmm::device_scalar write_index(0, stream); auto left_indices = std::make_unique>(join_size, stream, mr); @@ -232,13 +232,14 @@ conditional_join(table_view const& left, std::make_unique>(0, stream, mr)); } - rmm::device_scalar write_index(0, stream); + rmm::device_scalar write_index(0, stream); auto left_indices = std::make_unique>(join_size, stream, mr); auto right_indices = std::make_unique>(join_size, stream, mr); auto const& join_output_l = left_indices->data(); auto const& join_output_r = right_indices->data(); + if (has_nulls) { conditional_join <<>>( diff --git a/cpp/src/join/conditional_join_kernels.cuh b/cpp/src/join/conditional_join_kernels.cuh index 1e16c451f5a..62769862f54 100644 --- a/cpp/src/join/conditional_join_kernels.cuh +++ b/cpp/src/join/conditional_join_kernels.cuh @@ -29,6 +29,110 @@ namespace cudf { namespace detail { +/** + * @brief Adds a pair of indices to the shared memory cache + * + * @param[in] first The first index in the pair + * @param[in] second The second index in the pair + * @param[in,out] current_idx_shared Pointer to shared index that determines + * where in the shared memory cache the pair will be written + * @param[in] warp_id The ID of the warp of the calling the thread + * @param[out] joined_shared_l Pointer to the shared memory cache for left indices + * @param[out] joined_shared_r Pointer to the shared memory cache for right indices + */ +__inline__ __device__ void add_pair_to_cache(size_type const first, + size_type const second, + std::size_t* current_idx_shared, + int const warp_id, + size_type* joined_shared_l, + size_type* joined_shared_r) +{ + cuda::atomic_ref ref{*(current_idx_shared + warp_id)}; + std::size_t my_current_idx = ref.fetch_add(1, cuda::memory_order_relaxed); + // It's guaranteed to fit into the shared cache + joined_shared_l[my_current_idx] = first; + joined_shared_r[my_current_idx] = second; +} + +__inline__ __device__ void add_left_to_cache(size_type const first, + std::size_t* current_idx_shared, + int const warp_id, + size_type* joined_shared_l) +{ + cuda::atomic_ref ref{*(current_idx_shared + warp_id)}; + std::size_t my_current_idx = ref.fetch_add(1, cuda::memory_order_relaxed); + joined_shared_l[my_current_idx] = first; +} + +template +__device__ void flush_output_cache(unsigned int const activemask, + std::size_t const max_size, + int const warp_id, + int const lane_id, + std::size_t* current_idx, + std::size_t current_idx_shared[num_warps], + size_type join_shared_l[num_warps][output_cache_size], + size_type join_shared_r[num_warps][output_cache_size], + size_type* join_output_l, + size_type* join_output_r) +{ + // count how many active threads participating here which could be less than warp_size + int const num_threads = __popc(activemask); + std::size_t output_offset = 0; + + if (0 == lane_id) { + cuda::atomic_ref ref{*current_idx}; + output_offset = ref.fetch_add(current_idx_shared[warp_id], cuda::memory_order_relaxed); + } + + // No warp sync is necessary here because we are assuming that ShuffleIndex + // is internally using post-CUDA 9.0 synchronization-safe primitives + // (__shfl_sync instead of __shfl). __shfl is technically not guaranteed to + // be safe by the compiler because it is not required by the standard to + // converge divergent branches before executing. + output_offset = cub::ShuffleIndex(output_offset, 0, activemask); + + for (std::size_t shared_out_idx = static_cast(lane_id); + shared_out_idx < current_idx_shared[warp_id]; + shared_out_idx += num_threads) { + std::size_t thread_offset = output_offset + shared_out_idx; + if (thread_offset < max_size) { + join_output_l[thread_offset] = join_shared_l[warp_id][shared_out_idx]; + join_output_r[thread_offset] = join_shared_r[warp_id][shared_out_idx]; + } + } +} + +template +__device__ void flush_output_cache(unsigned int const activemask, + std::size_t const max_size, + int const warp_id, + int const lane_id, + std::size_t* current_idx, + std::size_t current_idx_shared[num_warps], + size_type join_shared_l[num_warps][output_cache_size], + size_type* join_output_l) +{ + int const num_threads = __popc(activemask); + std::size_t output_offset = 0; + + if (0 == lane_id) { + cuda::atomic_ref ref{*current_idx}; + output_offset = ref.fetch_add(current_idx_shared[warp_id], cuda::memory_order_relaxed); + } + + output_offset = cub::ShuffleIndex(output_offset, 0, activemask); + + for (std::size_t shared_out_idx = static_cast(lane_id); + shared_out_idx < current_idx_shared[warp_id]; + shared_out_idx += num_threads) { + std::size_t thread_offset = output_offset + shared_out_idx; + if (thread_offset < max_size) { + join_output_l[thread_offset] = join_shared_l[warp_id][shared_out_idx]; + } + } +} + /** * @brief Computes the output size of joining the left table to the right table. * @@ -103,14 +207,14 @@ CUDF_KERNEL void compute_conditional_join_output_size( } } - using BlockReduce = cub::BlockReduce; + using BlockReduce = cub::BlockReduce; __shared__ typename BlockReduce::TempStorage temp_storage; std::size_t block_counter = BlockReduce(temp_storage).Sum(thread_counter); // Add block counter to global counter if (threadIdx.x == 0) { cuda::atomic_ref ref{*output_size}; - ref.fetch_add(block_counter, cuda::std::memory_order_relaxed); + ref.fetch_add(block_counter, cuda::memory_order_relaxed); } } @@ -143,13 +247,13 @@ CUDF_KERNEL void conditional_join(table_device_view left_table, join_kind join_type, cudf::size_type* join_output_l, cudf::size_type* join_output_r, - cudf::size_type* current_idx, + std::size_t* current_idx, cudf::ast::detail::expression_device_view device_expression_data, - cudf::size_type const max_size, + std::size_t const max_size, bool const swap_tables) { constexpr int num_warps = block_size / detail::warp_size; - __shared__ cudf::size_type current_idx_shared[num_warps]; + __shared__ std::size_t current_idx_shared[num_warps]; __shared__ cudf::size_type join_shared_l[num_warps][output_cache_size]; __shared__ cudf::size_type join_shared_r[num_warps][output_cache_size]; @@ -183,7 +287,7 @@ CUDF_KERNEL void conditional_join(table_device_view left_table, if (outer_row_index < outer_num_rows) { bool found_match = false; - for (thread_index_type inner_row_index(0); inner_row_index < inner_num_rows; + for (cudf::thread_index_type inner_row_index(0); inner_row_index < inner_num_rows; ++inner_row_index) { auto output_dest = cudf::ast::detail::value_expression_result(); auto const left_row_index = swap_tables ? inner_row_index : outer_row_index; @@ -277,12 +381,12 @@ CUDF_KERNEL void conditional_join_anti_semi( table_device_view right_table, join_kind join_type, cudf::size_type* join_output_l, - cudf::size_type* current_idx, + std::size_t* current_idx, cudf::ast::detail::expression_device_view device_expression_data, - cudf::size_type const max_size) + std::size_t const max_size) { constexpr int num_warps = block_size / detail::warp_size; - __shared__ cudf::size_type current_idx_shared[num_warps]; + __shared__ std::size_t current_idx_shared[num_warps]; __shared__ cudf::size_type join_shared_l[num_warps][output_cache_size]; extern __shared__ char raw_intermediate_storage[]; @@ -310,7 +414,7 @@ CUDF_KERNEL void conditional_join_anti_semi( for (cudf::thread_index_type outer_row_index = start_idx; outer_row_index < outer_num_rows; outer_row_index += stride) { bool found_match = false; - for (thread_index_type inner_row_index(0); inner_row_index < inner_num_rows; + for (cudf::thread_index_type inner_row_index(0); inner_row_index < inner_num_rows; ++inner_row_index) { auto output_dest = cudf::ast::detail::value_expression_result(); diff --git a/cpp/src/join/distinct_hash_join.cu b/cpp/src/join/distinct_hash_join.cu index 5048da25e86..daa1bf17c0d 100644 --- a/cpp/src/join/distinct_hash_join.cu +++ b/cpp/src/join/distinct_hash_join.cu @@ -54,7 +54,7 @@ auto prepare_device_equal( cudf::null_equality compare_nulls) { auto const two_table_equal = - cudf::experimental::row::equality::two_table_comparator(build, probe); + cudf::experimental::row::equality::two_table_comparator(probe, build); return comparator_adapter{two_table_equal.equal_to( nullate::DYNAMIC{has_nulls}, compare_nulls)}; } @@ -113,7 +113,7 @@ distinct_hash_join::distinct_hash_join(cudf::table_view const& build, _hash_table{build.num_rows(), CUCO_DESIRED_LOAD_FACTOR, cuco::empty_key{cuco::pair{std::numeric_limits::max(), - lhs_index_type{JoinNoneValue}}}, + rhs_index_type{JoinNoneValue}}}, prepare_device_equal( _preprocessed_build, _preprocessed_probe, has_nulls, compare_nulls), {}, @@ -131,7 +131,7 @@ distinct_hash_join::distinct_hash_join(cudf::table_view const& build, auto const d_hasher = row_hasher.device_hasher(nullate::DYNAMIC{this->_has_nulls}); auto const iter = cudf::detail::make_counting_transform_iterator( - 0, build_keys_fn{d_hasher}); + 0, build_keys_fn{d_hasher}); size_type const build_table_num_rows{build.num_rows()}; if (this->_nulls_equal == cudf::null_equality::EQUAL or (not cudf::nullable(this->_build))) { @@ -174,7 +174,7 @@ distinct_hash_join::inner_join(rmm::cuda_stream_view stream, cudf::experimental::row::hash::row_hasher{this->_preprocessed_probe}; auto const d_probe_hasher = probe_row_hasher.device_hasher(nullate::DYNAMIC{this->_has_nulls}); auto const iter = cudf::detail::make_counting_transform_iterator( - 0, build_keys_fn{d_probe_hasher}); + 0, build_keys_fn{d_probe_hasher}); auto const build_indices_begin = thrust::make_transform_output_iterator(build_indices->begin(), output_fn{}); @@ -216,7 +216,7 @@ std::unique_ptr> distinct_hash_join::l cudf::experimental::row::hash::row_hasher{this->_preprocessed_probe}; auto const d_probe_hasher = probe_row_hasher.device_hasher(nullate::DYNAMIC{this->_has_nulls}); auto const iter = cudf::detail::make_counting_transform_iterator( - 0, build_keys_fn{d_probe_hasher}); + 0, build_keys_fn{d_probe_hasher}); auto const output_begin = thrust::make_transform_output_iterator(build_indices->begin(), output_fn{}); diff --git a/cpp/src/join/join_common_utils.cuh b/cpp/src/join/join_common_utils.cuh index 31f267d5cfb..3d0f3e4340d 100644 --- a/cpp/src/join/join_common_utils.cuh +++ b/cpp/src/join/join_common_utils.cuh @@ -262,101 +262,6 @@ struct valid_range { } }; -/** - * @brief Adds a pair of indices to the shared memory cache - * - * @param[in] first The first index in the pair - * @param[in] second The second index in the pair - * @param[in,out] current_idx_shared Pointer to shared index that determines - * where in the shared memory cache the pair will be written - * @param[in] warp_id The ID of the warp of the calling the thread - * @param[out] joined_shared_l Pointer to the shared memory cache for left indices - * @param[out] joined_shared_r Pointer to the shared memory cache for right indices - */ -__inline__ __device__ void add_pair_to_cache(size_type const first, - size_type const second, - size_type* current_idx_shared, - int const warp_id, - size_type* joined_shared_l, - size_type* joined_shared_r) -{ - size_type my_current_idx{atomicAdd(current_idx_shared + warp_id, size_type(1))}; - // its guaranteed to fit into the shared cache - joined_shared_l[my_current_idx] = first; - joined_shared_r[my_current_idx] = second; -} - -__inline__ __device__ void add_left_to_cache(size_type const first, - size_type* current_idx_shared, - int const warp_id, - size_type* joined_shared_l) -{ - size_type my_current_idx{atomicAdd(current_idx_shared + warp_id, size_type(1))}; - - joined_shared_l[my_current_idx] = first; -} - -template -__device__ void flush_output_cache(unsigned int const activemask, - cudf::size_type const max_size, - int const warp_id, - int const lane_id, - cudf::size_type* current_idx, - cudf::size_type current_idx_shared[num_warps], - size_type join_shared_l[num_warps][output_cache_size], - size_type join_shared_r[num_warps][output_cache_size], - size_type* join_output_l, - size_type* join_output_r) -{ - // count how many active threads participating here which could be less than warp_size - int const num_threads = __popc(activemask); - cudf::size_type output_offset = 0; - - if (0 == lane_id) { output_offset = atomicAdd(current_idx, current_idx_shared[warp_id]); } - - // No warp sync is necessary here because we are assuming that ShuffleIndex - // is internally using post-CUDA 9.0 synchronization-safe primitives - // (__shfl_sync instead of __shfl). __shfl is technically not guaranteed to - // be safe by the compiler because it is not required by the standard to - // converge divergent branches before executing. - output_offset = cub::ShuffleIndex(output_offset, 0, activemask); - - for (int shared_out_idx = lane_id; shared_out_idx < current_idx_shared[warp_id]; - shared_out_idx += num_threads) { - cudf::size_type thread_offset = output_offset + shared_out_idx; - if (thread_offset < max_size) { - join_output_l[thread_offset] = join_shared_l[warp_id][shared_out_idx]; - join_output_r[thread_offset] = join_shared_r[warp_id][shared_out_idx]; - } - } -} - -template -__device__ void flush_output_cache(unsigned int const activemask, - cudf::size_type const max_size, - int const warp_id, - int const lane_id, - cudf::size_type* current_idx, - cudf::size_type current_idx_shared[num_warps], - size_type join_shared_l[num_warps][output_cache_size], - size_type* join_output_l) -{ - int const num_threads = __popc(activemask); - cudf::size_type output_offset = 0; - - if (0 == lane_id) { output_offset = atomicAdd(current_idx, current_idx_shared[warp_id]); } - - output_offset = cub::ShuffleIndex(output_offset, 0, activemask); - - for (int shared_out_idx = lane_id; shared_out_idx < current_idx_shared[warp_id]; - shared_out_idx += num_threads) { - cudf::size_type thread_offset = output_offset + shared_out_idx; - if (thread_offset < max_size) { - join_output_l[thread_offset] = join_shared_l[warp_id][shared_out_idx]; - } - } -} - } // namespace detail } // namespace cudf diff --git a/cpp/src/lists/copying/segmented_gather.cu b/cpp/src/lists/copying/segmented_gather.cu index 89b1a126fc5..779eca438db 100644 --- a/cpp/src/lists/copying/segmented_gather.cu +++ b/cpp/src/lists/copying/segmented_gather.cu @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include #include #include #include diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index 466f9093194..fbb0f6cb0f5 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -53,12 +53,12 @@ struct hasher_adapter { __device__ constexpr auto operator()(lhs_index_type idx) const noexcept { - return _haystack_hasher(static_cast(idx)); + return _needle_hasher(static_cast(idx)); } __device__ constexpr auto operator()(rhs_index_type idx) const noexcept { - return _needle_hasher(static_cast(idx)); + return _haystack_hasher(static_cast(idx)); } private: @@ -76,6 +76,9 @@ struct comparator_adapter { { } + // suppress "function was declared but never referenced warning" +#pragma nv_diagnostic push +#pragma nv_diag_suppress 177 __device__ constexpr auto operator()(lhs_index_type lhs_index, lhs_index_type rhs_index) const noexcept { @@ -85,12 +88,28 @@ struct comparator_adapter { return _self_equal(lhs, rhs); } + __device__ constexpr auto operator()(rhs_index_type lhs_index, + rhs_index_type rhs_index) const noexcept + { + auto const lhs = static_cast(lhs_index); + auto const rhs = static_cast(rhs_index); + + return _self_equal(lhs, rhs); + } + __device__ constexpr auto operator()(lhs_index_type lhs_index, rhs_index_type rhs_index) const noexcept { return _two_table_equal(lhs_index, rhs_index); } + __device__ constexpr auto operator()(rhs_index_type lhs_index, + lhs_index_type rhs_index) const noexcept + { + return _two_table_equal(lhs_index, rhs_index); + } +#pragma nv_diagnostic pop + private: SelfEqual const _self_equal; TwoTableEqual const _two_table_equal; @@ -210,26 +229,26 @@ rmm::device_uvector contains(table_view const& haystack, auto const self_equal = cudf::experimental::row::equality::self_comparator(preprocessed_haystack); auto const two_table_equal = cudf::experimental::row::equality::two_table_comparator( - preprocessed_haystack, preprocessed_needles); + preprocessed_needles, preprocessed_haystack); // The output vector. auto contained = rmm::device_uvector(needles.num_rows(), stream, mr); auto const haystack_iter = cudf::detail::make_counting_transform_iterator( - size_type{0}, cuda::proclaim_return_type([] __device__(auto idx) { - return lhs_index_type{idx}; - })); - auto const needles_iter = cudf::detail::make_counting_transform_iterator( size_type{0}, cuda::proclaim_return_type([] __device__(auto idx) { return rhs_index_type{idx}; })); + auto const needles_iter = cudf::detail::make_counting_transform_iterator( + size_type{0}, cuda::proclaim_return_type([] __device__(auto idx) { + return lhs_index_type{idx}; + })); auto const helper_func = [&](auto const& d_self_equal, auto const& d_two_table_equal, auto const& probing_scheme) { auto const d_equal = comparator_adapter{d_self_equal, d_two_table_equal}; auto set = cuco::static_set{cuco::extent{compute_hash_table_size(haystack.num_rows())}, - cuco::empty_key{lhs_index_type{-1}}, + cuco::empty_key{rhs_index_type{-1}}, d_equal, probing_scheme, {}, diff --git a/cpp/src/stream_compaction/stable_distinct.cu b/cpp/src/stream_compaction/stable_distinct.cu index 27b5a92ab69..074d4fd7d1a 100644 --- a/cpp/src/stream_compaction/stable_distinct.cu +++ b/cpp/src/stream_compaction/stable_distinct.cu @@ -79,11 +79,11 @@ std::unique_ptr
stable_distinct(table_view const& input, duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::stable_distinct( - input, keys, keep, nulls_equal, nans_equal, cudf::get_default_stream(), mr); + return detail::stable_distinct(input, keys, keep, nulls_equal, nans_equal, stream, mr); } } // namespace cudf diff --git a/cpp/src/strings/utilities.cu b/cpp/src/strings/utilities.cu index 18e726a6d7d..101004a5d06 100644 --- a/cpp/src/strings/utilities.cu +++ b/cpp/src/strings/utilities.cu @@ -13,16 +13,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "strings/char_types/char_cases.h" #include "strings/char_types/char_flags.h" #include #include #include +#include #include #include #include +#include #include #include @@ -36,8 +37,7 @@ #include #include -namespace cudf { -namespace strings { +namespace cudf::strings { namespace detail { /** @@ -175,5 +175,17 @@ int64_t get_offset_value(cudf::column_view const& offsets, } } // namespace detail -} // namespace strings -} // namespace cudf + +rmm::device_uvector create_string_vector_from_column( + cudf::strings_column_view const strings, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + return detail::create_string_vector_from_column(strings, stream, mr); +} + +int64_t get_offset64_threshold() { return detail::get_offset64_threshold(); } +bool is_large_strings_enabled() { return detail::is_large_strings_enabled(); } + +} // namespace cudf::strings diff --git a/cpp/src/text/bpe/byte_pair_encoding.cuh b/cpp/src/text/bpe/byte_pair_encoding.cuh index 2ad22fd4e46..3bb574748b6 100644 --- a/cpp/src/text/bpe/byte_pair_encoding.cuh +++ b/cpp/src/text/bpe/byte_pair_encoding.cuh @@ -96,6 +96,14 @@ struct bpe_equal { auto const right = d_strings.element(lhs + 1); return (left == rhs.first) && (right == rhs.second); } + // used by find + __device__ bool operator()(merge_pair_type const& lhs, cudf::size_type rhs) const noexcept + { + rhs *= 2; + auto const left = d_strings.element(rhs); + auto const right = d_strings.element(rhs + 1); + return (left == lhs.first) && (right == lhs.second); + } }; using bpe_probe_scheme = cuco::linear_probing<1, bpe_hasher>; @@ -154,6 +162,11 @@ struct mp_equal { auto const left = d_strings.element(lhs); return left == rhs; } + __device__ bool operator()(cudf::string_view const& lhs, cudf::size_type rhs) const noexcept + { + auto const right = d_strings.element(rhs); + return lhs == right; + } }; using mp_probe_scheme = cuco::linear_probing<1, mp_hasher>; diff --git a/cpp/src/text/vocabulary_tokenize.cu b/cpp/src/text/vocabulary_tokenize.cu index f012f7ce09a..ea09f5d17af 100644 --- a/cpp/src/text/vocabulary_tokenize.cu +++ b/cpp/src/text/vocabulary_tokenize.cu @@ -86,10 +86,18 @@ struct vocab_equal { return lhs == rhs; // all rows are expected to be unique } // used by find + // suppress "function was declared but never referenced warning" +#pragma nv_diagnostic push +#pragma nv_diag_suppress 177 __device__ bool operator()(cudf::size_type lhs, cudf::string_view const& rhs) const noexcept { return d_strings.element(lhs) == rhs; } + __device__ bool operator()(cudf::string_view const& lhs, cudf::size_type rhs) const noexcept + { + return d_strings.element(rhs) == lhs; + } +#pragma nv_diagnostic pop }; using probe_scheme = cuco::linear_probing<1, vocab_hasher>; diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 9f14455f42d..eef09954647 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -700,6 +700,7 @@ ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_ROLLING_TEST streams/rolling_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SORTING_TEST streams/sorting_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_STREAM_COMPACTION_TEST streams/stream_compaction_test.cpp STREAM_MODE testing) ConfigureTest( STREAM_STRINGS_TEST streams/strings/case_test.cpp diff --git a/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp b/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp index 6d097b2ff12..89824eb6511 100644 --- a/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp +++ b/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp @@ -843,3 +843,61 @@ TYPED_TEST(FixedPointTest_64_128_Reps, FixedPoint_64_128_ComparisonTests) CUDF_TEST_EXPECT_COLUMNS_EQUAL(h->view(), falses); } } + +template +void test_fixed_floating(cudf::binary_operator op, + double floating_value, + int decimal_value, + int decimal_scale, + ResultType expected) +{ + auto const scale = numeric::scale_type{decimal_scale}; + auto const result_type = cudf::data_type(cudf::type_to_id()); + auto const nullable = + (op == cudf::binary_operator::NULL_EQUALS || op == cudf::binary_operator::NULL_NOT_EQUALS || + op == cudf::binary_operator::NULL_MIN || op == cudf::binary_operator::NULL_MAX); + + cudf::test::fixed_width_column_wrapper floating_col({floating_value}); + cudf::test::fixed_point_column_wrapper decimal_col({decimal_value}, scale); + + auto result = binary_operation(floating_col, decimal_col, op, result_type); + + if constexpr (cudf::is_fixed_point()) { + using wrapper_type = cudf::test::fixed_point_column_wrapper; + auto const expected_col = nullable ? wrapper_type({expected.value()}, {true}, expected.scale()) + : wrapper_type({expected.value()}, expected.scale()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_col, *result.get()); + } else { + using wrapper_type = cudf::test::fixed_width_column_wrapper; + auto const expected_col = + nullable ? wrapper_type({expected}, {true}) : wrapper_type({expected}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_col, *result.get()); + } +} + +TYPED_TEST(FixedPointCompiledTest, FixedPointWithFloating) +{ + using namespace numeric; + + // BOOLEAN + test_fixed_floating(cudf::binary_operator::EQUAL, 1.0, 10, -1, true); + test_fixed_floating(cudf::binary_operator::NOT_EQUAL, 1.0, 10, -1, false); + test_fixed_floating(cudf::binary_operator::LESS, 2.0, 10, -1, false); + test_fixed_floating(cudf::binary_operator::GREATER, 2.0, 10, -1, true); + test_fixed_floating(cudf::binary_operator::LESS_EQUAL, 2.0, 20, -1, true); + test_fixed_floating(cudf::binary_operator::GREATER_EQUAL, 2.0, 30, -1, false); + test_fixed_floating(cudf::binary_operator::NULL_EQUALS, 1.0, 10, -1, true); + test_fixed_floating(cudf::binary_operator::NULL_NOT_EQUALS, 1.0, 10, -1, false); + + // PRIMARY ARITHMETIC + auto const decimal_result = numeric::decimal32(4, numeric::scale_type{0}); + test_fixed_floating(cudf::binary_operator::ADD, 1.0, 30, -1, decimal_result); + test_fixed_floating(cudf::binary_operator::SUB, 6.0, 20, -1, decimal_result); + test_fixed_floating(cudf::binary_operator::MUL, 2.0, 20, -1, decimal_result); + test_fixed_floating(cudf::binary_operator::DIV, 8.0, 2, 0, decimal_result); + test_fixed_floating(cudf::binary_operator::MOD, 9.0, 50, -1, decimal_result); + + // OTHER ARITHMETIC + test_fixed_floating(cudf::binary_operator::NULL_MAX, 4.0, 20, -1, decimal_result); + test_fixed_floating(cudf::binary_operator::NULL_MIN, 4.0, 200, -1, decimal_result); +} diff --git a/cpp/tests/column/factories_test.cpp b/cpp/tests/column/factories_test.cpp index dca36eaa4e7..603187f0330 100644 --- a/cpp/tests/column/factories_test.cpp +++ b/cpp/tests/column/factories_test.cpp @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include @@ -762,7 +762,7 @@ TEST_F(ColumnFactoryTest, FromStructScalarNull) { struct_from_scalar(false); } TEST_F(ColumnFactoryTest, FromScalarErrors) { - if (cudf::strings::detail::is_large_strings_enabled()) { return; } + if (cudf::strings::is_large_strings_enabled()) { return; } cudf::string_scalar ss("hello world"); EXPECT_THROW(cudf::make_column_from_scalar(ss, 214748365), std::overflow_error); diff --git a/cpp/tests/copying/concatenate_tests.cpp b/cpp/tests/copying/concatenate_tests.cpp index 078e0ef9bae..054441788d0 100644 --- a/cpp/tests/copying/concatenate_tests.cpp +++ b/cpp/tests/copying/concatenate_tests.cpp @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include @@ -189,7 +189,7 @@ TEST_F(StringColumnTest, ConcatenateManyColumns) TEST_F(StringColumnTest, ConcatenateTooLarge) { - if (cudf::strings::detail::is_large_strings_enabled()) { return; } + if (cudf::strings::is_large_strings_enabled()) { return; } std::string big_str(1000000, 'a'); // 1 million bytes x 5 = 5 million bytes cudf::test::strings_column_wrapper input{big_str, big_str, big_str, big_str, big_str}; @@ -379,7 +379,7 @@ TEST_F(OverflowTest, OverflowTest) } // string column, overflow on chars - if (!cudf::strings::detail::is_large_strings_enabled()) { + if (!cudf::strings::is_large_strings_enabled()) { constexpr auto size = static_cast(static_cast(1024) * 1024 * 1024); // try and concatenate 6 string columns of with 1 billion chars in each @@ -502,7 +502,7 @@ TEST_F(OverflowTest, Presliced) } // strings, overflow on chars - if (!cudf::strings::detail::is_large_strings_enabled()) { + if (!cudf::strings::is_large_strings_enabled()) { constexpr cudf::size_type total_chars_size = 1024 * 1024 * 1024; constexpr cudf::size_type string_size = 64; constexpr cudf::size_type num_rows = total_chars_size / string_size; diff --git a/cpp/tests/io/parquet_writer_test.cpp b/cpp/tests/io/parquet_writer_test.cpp index 84ab83e33d0..a1f4c7b81d8 100644 --- a/cpp/tests/io/parquet_writer_test.cpp +++ b/cpp/tests/io/parquet_writer_test.cpp @@ -1785,7 +1785,8 @@ TEST_F(ParquetWriterTest, DeltaBinaryStartsWithNulls) CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } -TEST_F(ParquetWriterTest, ByteStreamSplit) +std::pair, cudf::io::table_input_metadata> +make_byte_stream_split_table(bool as_struct) { constexpr auto num_rows = 100; std::mt19937 engine{31337}; @@ -1802,24 +1803,73 @@ TEST_F(ParquetWriterTest, ByteStreamSplit) // throw in a list to make sure both decoders are working auto col4 = make_parquet_list_col(engine, num_rows, 5, true); - auto expected = table_view{{col0, col1, col2, col3, *col4}}; + std::vector> columns; + columns.reserve(5); + columns.push_back(col0.release()); + columns.push_back(col1.release()); + columns.push_back(col2.release()); + columns.push_back(col3.release()); + columns.push_back(std::move(col4)); + + return [&]() -> std::pair, cudf::io::table_input_metadata> { + auto const encoding = cudf::io::column_encoding::BYTE_STREAM_SPLIT; + + // make as a nested struct + if (as_struct) { + auto valids = + cudf::detail::make_counting_transform_iterator(0, [](int i) { return i % 2 == 0; }); + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids, valids + num_rows); + + std::vector> table_cols; + table_cols.push_back( + cudf::make_structs_column(num_rows, std::move(columns), null_count, std::move(null_mask))); + + auto tbl = std::make_unique(std::move(table_cols)); + auto expected = table_view{*tbl}; + + cudf::io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_name("struct"); + expected_metadata.column_metadata[0].set_encoding(encoding); + + expected_metadata.column_metadata[0].child(0).set_name("int32s"); + expected_metadata.column_metadata[0].child(1).set_name("int64s"); + expected_metadata.column_metadata[0].child(2).set_name("floats"); + expected_metadata.column_metadata[0].child(3).set_name("doubles"); + expected_metadata.column_metadata[0].child(4).set_name("int32list"); + for (int idx = 0; idx <= 3; idx++) { + expected_metadata.column_metadata[0].child(idx).set_encoding(encoding); + } + expected_metadata.column_metadata[0].child(4).child(1).set_encoding(encoding); - cudf::io::table_input_metadata expected_metadata(expected); - expected_metadata.column_metadata[0].set_name("int32s"); - expected_metadata.column_metadata[1].set_name("int64s"); - expected_metadata.column_metadata[2].set_name("floats"); - expected_metadata.column_metadata[3].set_name("doubles"); - expected_metadata.column_metadata[4].set_name("int32list"); - auto const encoding = cudf::io::column_encoding::BYTE_STREAM_SPLIT; - for (int i = 0; i <= 3; i++) { - expected_metadata.column_metadata[i].set_encoding(encoding); - } + return {std::move(tbl), expected_metadata}; + } + + // make flat + auto tbl = std::make_unique(std::move(columns)); + auto expected = table_view{*tbl}; - expected_metadata.column_metadata[4].child(1).set_encoding(encoding); + cudf::io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_name("int32s"); + expected_metadata.column_metadata[1].set_name("int64s"); + expected_metadata.column_metadata[2].set_name("floats"); + expected_metadata.column_metadata[3].set_name("doubles"); + expected_metadata.column_metadata[4].set_name("int32list"); + for (int idx = 0; idx <= 3; idx++) { + expected_metadata.column_metadata[idx].set_encoding(encoding); + } + + expected_metadata.column_metadata[4].child(1).set_encoding(encoding); + return {std::move(tbl), expected_metadata}; + }(); +} + +TEST_F(ParquetWriterTest, ByteStreamSplit) +{ + auto [expected, expected_metadata] = make_byte_stream_split_table(false); auto const filepath = temp_env->get_temp_filepath("ByteStreamSplit.parquet"); cudf::io::parquet_writer_options out_opts = - cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected) .metadata(expected_metadata); cudf::io::write_parquet(out_opts); @@ -1827,7 +1877,24 @@ TEST_F(ParquetWriterTest, ByteStreamSplit) cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); auto result = cudf::io::read_parquet(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, result.tbl->view()); +} + +TEST_F(ParquetWriterTest, ByteStreamSplitStruct) +{ + auto [expected, expected_metadata] = make_byte_stream_split_table(true); + + auto const filepath = temp_env->get_temp_filepath("ByteStreamSplitStruct.parquet"); + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected) + .metadata(expected_metadata); + cudf::io::write_parquet(out_opts); + + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, result.tbl->view()); } TEST_F(ParquetWriterTest, DecimalByteStreamSplit) diff --git a/cpp/tests/streams/stream_compaction_test.cpp b/cpp/tests/streams/stream_compaction_test.cpp new file mode 100644 index 00000000000..56443870602 --- /dev/null +++ b/cpp/tests/streams/stream_compaction_test.cpp @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +auto constexpr null{0}; // null at current level +auto constexpr XXX{0}; // null pushed down from parent level +auto constexpr NaN = std::numeric_limits::quiet_NaN(); +auto constexpr KEEP_ANY = cudf::duplicate_keep_option::KEEP_ANY; +auto constexpr KEEP_FIRST = cudf::duplicate_keep_option::KEEP_FIRST; +auto constexpr KEEP_LAST = cudf::duplicate_keep_option::KEEP_LAST; +auto constexpr KEEP_NONE = cudf::duplicate_keep_option::KEEP_NONE; +auto constexpr NULL_EQUAL = cudf::null_equality::EQUAL; +auto constexpr NULL_UNEQUAL = cudf::null_equality::UNEQUAL; +auto constexpr NAN_EQUAL = cudf::nan_equality::ALL_EQUAL; +auto constexpr NAN_UNEQUAL = cudf::nan_equality::UNEQUAL; + +using int32s_col = cudf::test::fixed_width_column_wrapper; +using floats_col = cudf::test::fixed_width_column_wrapper; + +using cudf::nan_policy; +using cudf::null_equality; +using cudf::null_policy; +using cudf::test::iterators::no_nulls; +using cudf::test::iterators::null_at; +using cudf::test::iterators::nulls_at; + +struct StableDistinctKeepAny : public cudf::test::BaseFixture {}; + +struct StableDistinctKeepFirstLastNone : public cudf::test::BaseFixture {}; + +TEST_F(StableDistinctKeepAny, NoNullsTableWithNaNs) +{ + // Column(s) used to test KEEP_ANY needs to have same rows in contiguous + // groups for equivalent keys because KEEP_ANY is nondeterministic. + auto const col1 = int32s_col{6, 6, 6, 1, 1, 1, 3, 5, 8, 5}; + auto const col2 = floats_col{6, 6, 6, 1, 1, 1, 3, 4, 9, 4}; + auto const keys1 = int32s_col{20, 20, 20, 15, 15, 15, 20, 19, 21, 9}; + auto const keys2 = floats_col{19., 19., 19., NaN, NaN, NaN, 20., 20., 9., 21.}; + + auto const input = cudf::table_view{{col1, col2, keys1, keys2}}; + auto const key_idx = std::vector{2, 3}; + + // NaNs are unequal. + { + auto const exp_col1 = int32s_col{6, 1, 1, 1, 3, 5, 8, 5}; + auto const exp_col2 = floats_col{6, 1, 1, 1, 3, 4, 9, 4}; + auto const exp_keys1 = int32s_col{20, 15, 15, 15, 20, 19, 21, 9}; + auto const exp_keys2 = floats_col{19., NaN, NaN, NaN, 20., 20., 9., 21.}; + auto const expected = cudf::table_view{{exp_col1, exp_col2, exp_keys1, exp_keys2}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_UNEQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + // NaNs are equal. + { + auto const exp_col1 = int32s_col{6, 1, 3, 5, 8, 5}; + auto const exp_col2 = floats_col{6, 1, 3, 4, 9, 4}; + auto const exp_keys1 = int32s_col{20, 15, 20, 19, 21, 9}; + auto const exp_keys2 = floats_col{19., NaN, 20., 20., 9., 21.}; + auto const expected = cudf::table_view{{exp_col1, exp_col2, exp_keys1, exp_keys2}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } +} + +TEST_F(StableDistinctKeepAny, InputWithNullsAndNaNs) +{ + auto constexpr null{0.0}; // shadow the global `null` variable of type int + + // Column(s) used to test KEEP_ANY needs to have same rows in contiguous + // groups for equivalent keys because KEEP_ANY is nondeterministic. + auto const col = int32s_col{5, 4, 4, 1, 1, 1, 8, 8, 1}; + auto const keys = floats_col{{20., null, null, NaN, NaN, NaN, 19., 19., 21.}, nulls_at({1, 2})}; + auto const input = cudf::table_view{{col, keys}}; + auto const key_idx = std::vector{1}; + + // Nulls are equal, NaNs are unequal. + { + auto const exp_col = int32s_col{5, 4, 1, 1, 1, 8, 1}; + auto const exp_keys = floats_col{{20., null, NaN, NaN, NaN, 19., 21.}, null_at(1)}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_UNEQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + // Nulls are equal, NaNs are equal. + { + auto const exp_col = int32s_col{5, 4, 1, 8, 1}; + auto const exp_keys = floats_col{{20., null, NaN, 19., 21.}, null_at(1)}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + // Nulls are unequal, NaNs are unequal. + { + auto const exp_col = int32s_col{5, 4, 4, 1, 1, 1, 8, 1}; + auto const exp_keys = floats_col{{20., null, null, NaN, NaN, NaN, 19., 21.}, nulls_at({1, 2})}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_ANY, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + // Nulls are unequal, NaNs are equal. + { + auto const exp_col = int32s_col{5, 4, 4, 1, 8, 1}; + auto const exp_keys = floats_col{{20., null, null, NaN, 19., 21.}, nulls_at({1, 2})}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_ANY, NULL_UNEQUAL, NAN_EQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } +} + +TEST_F(StableDistinctKeepFirstLastNone, InputWithNaNsEqual) +{ + // Column(s) used to test needs to have different rows for the same keys. + auto const col = int32s_col{0, 1, 2, 3, 4, 5, 6}; + auto const keys = floats_col{20., NaN, NaN, 19., 21., 19., 22.}; + auto const input = cudf::table_view{{col, keys}}; + auto const key_idx = std::vector{1}; + + // KEEP_FIRST + { + auto const exp_col = int32s_col{0, 1, 3, 4, 6}; + auto const exp_keys = floats_col{20., NaN, 19., 21., 22.}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_FIRST, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + // KEEP_LAST + { + auto const exp_col = int32s_col{0, 2, 4, 5, 6}; + auto const exp_keys = floats_col{20., NaN, 21., 19., 22.}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_LAST, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + // KEEP_NONE + { + auto const exp_col = int32s_col{0, 4, 6}; + auto const exp_keys = floats_col{20., 21., 22.}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_NONE, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } +} + +TEST_F(StableDistinctKeepFirstLastNone, InputWithNaNsUnequal) +{ + // Column(s) used to test needs to have different rows for the same keys. + auto const col = int32s_col{0, 1, 2, 3, 4, 5, 6, 7}; + auto const keys = floats_col{20., NaN, NaN, 19., 21., 19., 22., 20.}; + auto const input = cudf::table_view{{col, keys}}; + auto const key_idx = std::vector{1}; + + // KEEP_FIRST + { + auto const exp_col = int32s_col{0, 1, 2, 3, 4, 6}; + auto const exp_keys = floats_col{20., NaN, NaN, 19., 21., 22.}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_FIRST, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + // KEEP_LAST + { + auto const exp_col = int32s_col{1, 2, 4, 5, 6, 7}; + auto const exp_keys = floats_col{NaN, NaN, 21., 19., 22., 20.}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_LAST, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + // KEEP_NONE + { + auto const exp_col = int32s_col{1, 2, 4, 6}; + auto const exp_keys = floats_col{NaN, NaN, 21., 22.}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_NONE, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } +} diff --git a/cpp/tests/strings/array_tests.cpp b/cpp/tests/strings/array_tests.cpp index a1bb87a43fb..9c0ecaa52c0 100644 --- a/cpp/tests/strings/array_tests.cpp +++ b/cpp/tests/strings/array_tests.cpp @@ -23,8 +23,8 @@ #include #include #include -#include #include +#include #include #include @@ -153,7 +153,7 @@ TEST_F(StringsColumnTest, GatherZeroSizeStringsColumn) TEST_F(StringsColumnTest, GatherTooBig) { - if (cudf::strings::detail::is_large_strings_enabled()) { return; } + if (cudf::strings::is_large_strings_enabled()) { return; } std::vector h_chars(3000000); cudf::test::fixed_width_column_wrapper chars(h_chars.begin(), h_chars.end()); diff --git a/cpp/tests/strings/repeat_strings_tests.cpp b/cpp/tests/strings/repeat_strings_tests.cpp index 0539895c5f4..aa4d9320d7c 100644 --- a/cpp/tests/strings/repeat_strings_tests.cpp +++ b/cpp/tests/strings/repeat_strings_tests.cpp @@ -20,9 +20,9 @@ #include #include -#include #include #include +#include using namespace cudf::test::iterators; @@ -221,7 +221,7 @@ TEST_F(RepeatStringsTest, StringsColumnWithColumnRepeatTimesInvalidInput) TEST_F(RepeatStringsTest, StringsColumnWithColumnRepeatTimesOverflowOutput) { - if (cudf::strings::detail::is_large_strings_enabled()) { return; } + if (cudf::strings::is_large_strings_enabled()) { return; } auto const strs = strs_col{"1", "12", "123", "1234", "12345", "123456", "1234567"}; auto const strs_cv = cudf::strings_column_view(strs); diff --git a/docs/cudf/source/user_guide/api_docs/index.rst b/docs/cudf/source/user_guide/api_docs/index.rst index 5f26a921012..d05501f4a4a 100644 --- a/docs/cudf/source/user_guide/api_docs/index.rst +++ b/docs/cudf/source/user_guide/api_docs/index.rst @@ -26,3 +26,4 @@ This page provides a list of all publicly accessible modules, methods and classe options extension_dtypes pylibcudf/index.rst + performance_tracking diff --git a/docs/cudf/source/user_guide/api_docs/performance_tracking.rst b/docs/cudf/source/user_guide/api_docs/performance_tracking.rst new file mode 100644 index 00000000000..9da79e69fb2 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/performance_tracking.rst @@ -0,0 +1,12 @@ +.. _api.performance_tracking: + +==================== +Performance Tracking +==================== + +.. currentmodule:: cudf.utils.performance_tracking +.. autosummary:: + :toctree: api/ + + get_memory_records + print_memory_report diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/index.rst index 0d53ac92db9..bde6d8094ce 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/index.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/index.rst @@ -16,3 +16,4 @@ I/O Functions :maxdepth: 1 avro + json diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/json.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/json.rst new file mode 100644 index 00000000000..6aeae1f322a --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/json.rst @@ -0,0 +1,6 @@ +==== +JSON +==== + +.. automodule:: cudf._lib.pylibcudf.io.json + :members: diff --git a/docs/cudf/source/user_guide/index.md b/docs/cudf/source/user_guide/index.md index 486368c3b8b..df4e4795a08 100644 --- a/docs/cudf/source/user_guide/index.md +++ b/docs/cudf/source/user_guide/index.md @@ -16,5 +16,6 @@ options performance-comparisons/index PandasCompat copy-on-write +memory-profiling pandas-2.0-breaking-changes ``` diff --git a/docs/cudf/source/user_guide/memory-profiling.md b/docs/cudf/source/user_guide/memory-profiling.md new file mode 100644 index 00000000000..ab5433685e6 --- /dev/null +++ b/docs/cudf/source/user_guide/memory-profiling.md @@ -0,0 +1,44 @@ +(memory-profiling-user-doc)= + +# Memory Profiling + +Peak memory usage is a common concern in GPU programming because GPU memory is typically smaller than available CPU memory. To easily identify memory hotspots, cuDF provides a memory profiler. It comes with an overhead so avoid using it in performance-sensitive code. + +## Enabling Memory Profiling + +First, enable memory profiling in RMM by calling {py:func}`rmm.statistics.enable_statistics()`. This adds a statistics resource adaptor to the current RMM memory resource, which enables cuDF to access memory profiling information. See the [RMM documentation](https://docs.rapids.ai/api/rmm/stable/guide/#memory-statistics-and-profiling) for more details. + +Second, enable memory profiling in cuDF by setting the `memory_profiling` option to `True`. Use {py:func}`cudf.set_option` or set the environment variable ``CUDF_MEMORY_PROFILING=1`` prior to the launch of the Python interpreter. + +To get the result of the profiling, use {py:func}`cudf.utils.performance_tracking.print_memory_report` or access the raw profiling data by using: {py:func}`cudf.utils.performance_tracking.get_memory_records`. + +### Example +In the following, we enable profiling, do some work, and then print the profiling results: + +```python +>>> import cudf +>>> from cudf.utils.performance_tracking import print_memory_report +>>> from rmm.statistics import enable_statistics +>>> enable_statistics() +>>> cudf.set_option("memory_profiling", True) +>>> cudf.DataFrame({"a": [1, 2, 3]}) # Some work + a +0 1 +1 2 +2 3 +>>> print_memory_report() # Pretty print the result of the profiling +Memory Profiling +================ + +Legends: +ncalls - number of times the function or code block was called +memory_peak - peak memory allocated in function or code block (in bytes) +memory_total - total memory allocated in function or code block (in bytes) + +Ordered by: memory_peak + +ncalls memory_peak memory_total filename:lineno(function) + 1 32 32 cudf/core/dataframe.py:690(DataFrame.__init__) + 2 0 0 cudf/core/index.py:214(RangeIndex.__init__) + 6 0 0 cudf/core/index.py:424(RangeIndex.__len__) +``` diff --git a/python/cudf/cudf/_lib/json.pyx b/python/cudf/cudf/_lib/json.pyx index 4847e7fa5bb..9c646e3357b 100644 --- a/python/cudf/cudf/_lib/json.pyx +++ b/python/cudf/cudf/_lib/json.pyx @@ -8,30 +8,16 @@ import cudf from cudf.core.buffer import acquire_spill_lock from libcpp cimport bool -from libcpp.memory cimport unique_ptr -from libcpp.string cimport string -from libcpp.utility cimport move cimport cudf._lib.pylibcudf.libcudf.io.types as cudf_io_types -from cudf._lib.column cimport Column -from cudf._lib.io.utils cimport add_df_col_struct_names, make_sink_info +from cudf._lib.io.utils cimport add_df_col_struct_names from cudf._lib.pylibcudf.io.types cimport compression_type -from cudf._lib.pylibcudf.libcudf.io.data_sink cimport data_sink -from cudf._lib.pylibcudf.libcudf.io.json cimport ( - json_recovery_mode_t, - json_writer_options, - write_json as libcudf_write_json, -) -from cudf._lib.pylibcudf.libcudf.io.types cimport ( - column_name_info, - sink_info, - table_metadata, -) -from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view +from cudf._lib.pylibcudf.libcudf.io.json cimport json_recovery_mode_t +from cudf._lib.pylibcudf.libcudf.io.types cimport compression_type from cudf._lib.pylibcudf.libcudf.types cimport data_type, type_id from cudf._lib.pylibcudf.types cimport DataType from cudf._lib.types cimport dtype_to_data_type -from cudf._lib.utils cimport data_from_pylibcudf_io, table_view_from_table +from cudf._lib.utils cimport data_from_pylibcudf_io import cudf._lib.pylibcudf as plc @@ -153,45 +139,27 @@ def write_json( -------- cudf.to_json """ - cdef table_view input_table_view = table_view_from_table( - table, ignore_index=True - ) - - cdef unique_ptr[data_sink] data_sink_c - cdef sink_info sink_info_c = make_sink_info(path_or_buf, data_sink_c) - cdef string na_c = na_rep.encode() - cdef bool include_nulls_c = include_nulls - cdef bool lines_c = lines - cdef int rows_per_chunk_c = rows_per_chunk - cdef string true_value_c = 'true'.encode() - cdef string false_value_c = 'false'.encode() - cdef table_metadata tbl_meta - - num_index_cols_meta = 0 - cdef column_name_info child_info - for i, name in enumerate(table._column_names, num_index_cols_meta): - child_info.name = name.encode() - tbl_meta.schema_info.push_back(child_info) - _set_col_children_metadata( - table[name]._column, - tbl_meta.schema_info[i] - ) + cdef list colnames = [] - cdef json_writer_options options = move( - json_writer_options.builder(sink_info_c, input_table_view) - .metadata(tbl_meta) - .na_rep(na_c) - .include_nulls(include_nulls_c) - .lines(lines_c) - .rows_per_chunk(rows_per_chunk_c) - .true_value(true_value_c) - .false_value(false_value_c) - .build() - ) + for name in table._column_names: + colnames.append((name, _dtype_to_names_list(table[name]._column))) try: - with nogil: - libcudf_write_json(options) + plc.io.json.write_json( + plc.io.SinkInfo([path_or_buf]), + plc.io.TableWithMetadata( + plc.Table([ + c.to_pylibcudf(mode="read") for c in table._columns + ]), + colnames + ), + na_rep, + include_nulls, + lines, + rows_per_chunk, + true_value="true", + false_value="false" + ) except OverflowError: raise OverflowError( f"Writing JSON file with rows_per_chunk={rows_per_chunk} failed. " @@ -236,23 +204,12 @@ cdef data_type _get_cudf_data_type_from_dtype(object dtype) except *: ) return dtype_to_data_type(dtype) -cdef _set_col_children_metadata(Column col, - column_name_info& col_meta): - cdef column_name_info child_info + +def _dtype_to_names_list(col): if isinstance(col.dtype, cudf.StructDtype): - for i, (child_col, name) in enumerate( - zip(col.children, list(col.dtype.fields)) - ): - child_info.name = name.encode() - col_meta.children.push_back(child_info) - _set_col_children_metadata( - child_col, col_meta.children[i] - ) + return [(name, _dtype_to_names_list(child)) + for name, child in zip(col.dtype.fields, col.children)] elif isinstance(col.dtype, cudf.ListDtype): - for i, child_col in enumerate(col.children): - col_meta.children.push_back(child_info) - _set_col_children_metadata( - child_col, col_meta.children[i] - ) - else: - return + return [("", _dtype_to_names_list(child)) + for child in col.children] + return [] diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx index 5d406f5c85f..0ad09dba717 100644 --- a/python/cudf/cudf/_lib/lists.pyx +++ b/python/cudf/cudf/_lib/lists.pyx @@ -9,10 +9,6 @@ from libcpp.utility cimport move from cudf._lib.column cimport Column from cudf._lib.pylibcudf.libcudf.column.column cimport column from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view -from cudf._lib.pylibcudf.libcudf.lists.contains cimport ( - contains, - index_of as cpp_index_of, -) from cudf._lib.pylibcudf.libcudf.lists.count_elements cimport ( count_elements as cpp_count_elements, ) @@ -26,7 +22,6 @@ from cudf._lib.pylibcudf.libcudf.lists.sorting cimport ( from cudf._lib.pylibcudf.libcudf.lists.stream_compaction cimport ( distinct as cpp_distinct, ) -from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar from cudf._lib.pylibcudf.libcudf.types cimport ( nan_equality, null_equality, @@ -34,11 +29,12 @@ from cudf._lib.pylibcudf.libcudf.types cimport ( order, size_type, ) -from cudf._lib.scalar cimport DeviceScalar from cudf._lib.utils cimport columns_from_pylibcudf_table from cudf._lib import pylibcudf +from cudf._lib.pylibcudf cimport Scalar + @acquire_spill_lock() def count_elements(Column col): @@ -153,64 +149,36 @@ def extract_element_column(Column col, Column index): @acquire_spill_lock() -def contains_scalar(Column col, object py_search_key): - - cdef DeviceScalar search_key = py_search_key.device_value - - cdef shared_ptr[lists_column_view] list_view = ( - make_shared[lists_column_view](col.view()) +def contains_scalar(Column col, py_search_key): + return Column.from_pylibcudf( + pylibcudf.lists.contains( + col.to_pylibcudf(mode="read"), + py_search_key.device_value.c_value, + ) ) - cdef const scalar* search_key_value = search_key.get_raw_ptr() - - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(contains( - list_view.get()[0], - search_key_value[0], - )) - result = Column.from_unique_ptr(move(c_result)) - return result @acquire_spill_lock() def index_of_scalar(Column col, object py_search_key): - - cdef DeviceScalar search_key = py_search_key.device_value - - cdef shared_ptr[lists_column_view] list_view = ( - make_shared[lists_column_view](col.view()) + return Column.from_pylibcudf( + pylibcudf.lists.index_of( + col.to_pylibcudf(mode="read"), + py_search_key.device_value.c_value, + True, + ) ) - cdef const scalar* search_key_value = search_key.get_raw_ptr() - - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(cpp_index_of( - list_view.get()[0], - search_key_value[0], - )) - return Column.from_unique_ptr(move(c_result)) @acquire_spill_lock() def index_of_column(Column col, Column search_keys): - - cdef column_view keys_view = search_keys.view() - - cdef shared_ptr[lists_column_view] list_view = ( - make_shared[lists_column_view](col.view()) + return Column.from_pylibcudf( + pylibcudf.lists.index_of( + col.to_pylibcudf(mode="read"), + search_keys.to_pylibcudf(mode="read"), + True, + ) ) - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(cpp_index_of( - list_view.get()[0], - keys_view, - )) - return Column.from_unique_ptr(move(c_result)) - @acquire_spill_lock() def concatenate_rows(list source_columns): diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pxd b/python/cudf/cudf/_lib/pylibcudf/column.pxd index e121e856865..d13791d95cf 100644 --- a/python/cudf/cudf/_lib/pylibcudf/column.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/column.pxd @@ -8,6 +8,9 @@ from cudf._lib.pylibcudf.libcudf.column.column_view cimport ( column_view, mutable_column_view, ) +from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport ( + lists_column_view, +) from cudf._lib.pylibcudf.libcudf.types cimport bitmask_type, size_type from .gpumemoryview cimport gpumemoryview @@ -56,3 +59,4 @@ cdef class ListColumnView: cdef Column _column cpdef child(self) cpdef offsets(self) + cdef lists_column_view view(self) nogil diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pyx b/python/cudf/cudf/_lib/pylibcudf/column.pyx index e726eca154f..e0cf8b7ee32 100644 --- a/python/cudf/cudf/_lib/pylibcudf/column.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/column.pyx @@ -348,6 +348,15 @@ cdef class ListColumnView: """The offsets column of the underlying list column.""" return self._column.child(1) + cdef lists_column_view view(self) nogil: + """Generate a libcudf lists_column_view to pass to libcudf algorithms. + + This method is for pylibcudf's functions to use to generate inputs when + calling libcudf algorithms, and should generally not be needed by users + (even direct pylibcudf Cython users). + """ + return lists_column_view(self._column.view()) + @functools.cache def _datatype_from_dtype_desc(desc): diff --git a/python/cudf/cudf/_lib/pylibcudf/io/__init__.py b/python/cudf/cudf/_lib/pylibcudf/io/__init__.py index 4a71e839db2..fb4e4c7e4bb 100644 --- a/python/cudf/cudf/_lib/pylibcudf/io/__init__.py +++ b/python/cudf/cudf/_lib/pylibcudf/io/__init__.py @@ -1,4 +1,4 @@ # Copyright (c) 2024, NVIDIA CORPORATION. from . import avro, datasource, json, types -from .types import SourceInfo, TableWithMetadata +from .types import SinkInfo, SourceInfo, TableWithMetadata diff --git a/python/cudf/cudf/_lib/pylibcudf/io/avro.pyx b/python/cudf/cudf/_lib/pylibcudf/io/avro.pyx index 946e0896fc8..538bd8aa322 100644 --- a/python/cudf/cudf/_lib/pylibcudf/io/avro.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/io/avro.pyx @@ -19,7 +19,7 @@ cpdef TableWithMetadata read_avro( size_type num_rows = -1 ): """ - Reads an Avro dataset into a set of columns. + Reads an Avro dataset into a :py:class:`~.types.TableWithMetadata`. Parameters ---------- @@ -36,7 +36,7 @@ cpdef TableWithMetadata read_avro( Returns ------- TableWithMetadata - The Table and its corresponding metadata that was read in. + The Table and its corresponding metadata (column names) that were read in. """ cdef vector[string] c_columns if columns is not None and len(columns) > 0: diff --git a/python/cudf/cudf/_lib/pylibcudf/io/json.pxd b/python/cudf/cudf/_lib/pylibcudf/io/json.pxd index 36f90e6de30..f7f733a493d 100644 --- a/python/cudf/cudf/_lib/pylibcudf/io/json.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/io/json.pxd @@ -2,6 +2,7 @@ from libcpp cimport bool from cudf._lib.pylibcudf.io.types cimport ( + SinkInfo, SourceInfo, TableWithMetadata, compression_type, @@ -22,3 +23,15 @@ cpdef TableWithMetadata read_json( bool prune_columns = *, json_recovery_mode_t recovery_mode = *, ) + + +cpdef void write_json( + SinkInfo sink_info, + TableWithMetadata tbl, + str na_rep = *, + bool include_nulls = *, + bool lines = *, + size_type rows_per_chunk = *, + str true_value = *, + str false_value = * +) diff --git a/python/cudf/cudf/_lib/pylibcudf/io/json.pyx b/python/cudf/cudf/_lib/pylibcudf/io/json.pyx index 2a5970ad6d6..354cb4981de 100644 --- a/python/cudf/cudf/_lib/pylibcudf/io/json.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/io/json.pyx @@ -1,18 +1,29 @@ # Copyright (c) 2024, NVIDIA CORPORATION. from libcpp cimport bool +from libcpp.limits cimport numeric_limits from libcpp.map cimport map from libcpp.string cimport string from libcpp.utility cimport move from libcpp.vector cimport vector -from cudf._lib.pylibcudf.io.types cimport SourceInfo, TableWithMetadata +from cudf._lib.pylibcudf.io.types cimport ( + SinkInfo, + SourceInfo, + TableWithMetadata, +) from cudf._lib.pylibcudf.libcudf.io.json cimport ( json_reader_options, json_recovery_mode_t, + json_writer_options, read_json as cpp_read_json, schema_element, + write_json as cpp_write_json, +) +from cudf._lib.pylibcudf.libcudf.io.types cimport ( + compression_type, + table_metadata, + table_with_metadata, ) -from cudf._lib.pylibcudf.libcudf.io.types cimport table_with_metadata from cudf._lib.pylibcudf.libcudf.types cimport data_type, size_type from cudf._lib.pylibcudf.types cimport DataType @@ -114,3 +125,58 @@ cpdef TableWithMetadata read_json( c_result = move(cpp_read_json(opts)) return TableWithMetadata.from_libcudf(c_result) + + +cpdef void write_json( + SinkInfo sink_info, + TableWithMetadata table_w_meta, + str na_rep = "", + bool include_nulls = False, + bool lines = False, + size_type rows_per_chunk = numeric_limits[size_type].max(), + str true_value = "true", + str false_value = "false" +): + """ + Writes a :py:class:`~cudf._lib.pylibcudf.table.Table` to JSON format. + + Parameters + ---------- + sink_info: SinkInfo + The SinkInfo object to write the JSON to. + table_w_meta: TableWithMetadata + The TableWithMetadata object containing the Table to write + na_rep: str, default "" + The string representation for null values. + include_nulls: bool, default False + Enables/Disables output of nulls as 'null'. + lines: bool, default False + If `True`, write output in the JSON lines format. + rows_per_chunk: size_type, defaults to length of the input table + The maximum number of rows to write at a time. + true_value: str, default "true" + The string representation for values != 0 in INT8 types. + false_value: str, default "false" + The string representation for values == 0 in INT8 types. + """ + cdef table_metadata tbl_meta = table_w_meta.metadata + cdef string na_rep_c = na_rep.encode() + + cdef json_writer_options options = ( + json_writer_options.builder(sink_info.c_obj, table_w_meta.tbl.view()) + .metadata(tbl_meta) + .na_rep(na_rep_c) + .include_nulls(include_nulls) + .lines(lines) + .build() + ) + + if rows_per_chunk != numeric_limits[size_type].max(): + options.set_rows_per_chunk(rows_per_chunk) + if true_value != "true": + options.set_true_value(true_value.encode()) + if false_value != "false": + options.set_false_value(false_value.encode()) + + with nogil: + cpp_write_json(options) diff --git a/python/cudf/cudf/_lib/pylibcudf/io/types.pxd b/python/cudf/cudf/_lib/pylibcudf/io/types.pxd index a9d12ecfc38..787462fd1fe 100644 --- a/python/cudf/cudf/_lib/pylibcudf/io/types.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/io/types.pxd @@ -1,7 +1,8 @@ # Copyright (c) 2024, NVIDIA CORPORATION. - +from libcpp.memory cimport unique_ptr from libcpp.vector cimport vector +from cudf._lib.pylibcudf.libcudf.io.data_sink cimport data_sink from cudf._lib.pylibcudf.libcudf.io.types cimport ( column_encoding, column_in_metadata, @@ -35,3 +36,8 @@ cdef class TableWithMetadata: cdef class SourceInfo: cdef source_info c_obj + +cdef class SinkInfo: + # This vector just exists to keep the unique_ptrs to the sinks alive + cdef vector[unique_ptr[data_sink]] sink_storage + cdef sink_info c_obj diff --git a/python/cudf/cudf/_lib/pylibcudf/io/types.pyx b/python/cudf/cudf/_lib/pylibcudf/io/types.pyx index 4fe1732cd0c..eb1017c0663 100644 --- a/python/cudf/cudf/_lib/pylibcudf/io/types.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/io/types.pyx @@ -1,10 +1,14 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +from cpython.buffer cimport PyBUF_READ +from cpython.memoryview cimport PyMemoryView_FromMemory +from libcpp.memory cimport unique_ptr from libcpp.string cimport string from libcpp.utility cimport move from libcpp.vector cimport vector from cudf._lib.pylibcudf.io.datasource cimport Datasource +from cudf._lib.pylibcudf.libcudf.io.data_sink cimport data_sink from cudf._lib.pylibcudf.libcudf.io.datasource cimport datasource from cudf._lib.pylibcudf.libcudf.io.types cimport ( column_name_info, @@ -13,6 +17,7 @@ from cudf._lib.pylibcudf.libcudf.io.types cimport ( table_with_metadata, ) +import codecs import errno import io import os @@ -43,7 +48,7 @@ cdef class TableWithMetadata: def __init__(self, Table tbl, list column_names): self.tbl = tbl - self.metadata.schema_info = move(self._make_column_info(column_names)) + self.metadata.schema_info = self._make_column_info(column_names) cdef vector[column_name_info] _make_column_info(self, list column_names): cdef vector[column_name_info] col_name_infos @@ -55,8 +60,8 @@ cdef class TableWithMetadata: if not isinstance(name, str): raise ValueError("Column name must be a string!") - info.name = move( name.encode()) - info.children = move(self._make_column_info(child_names)) + info.name = name.encode() + info.children = self._make_column_info(child_names) col_name_infos.push_back(info) @@ -104,6 +109,7 @@ cdef class TableWithMetadata: out.metadata = tbl_with_meta.metadata return out + cdef class SourceInfo: """A class containing details on a source to read from. @@ -184,4 +190,87 @@ cdef class SourceInfo: if empty_buffer is True: c_host_buffers.push_back(host_buffer(NULL, 0)) - self.c_obj = move(source_info(c_host_buffers)) + self.c_obj = source_info(c_host_buffers) + + +# Adapts a python io.IOBase object as a libcudf IO data_sink. This lets you +# write from cudf to any python file-like object (File/BytesIO/SocketIO etc) +cdef cppclass iobase_data_sink(data_sink): + object buf + + iobase_data_sink(object buf_): + this.buf = buf_ + + void host_write(const void * data, size_t size) with gil: + if isinstance(buf, io.TextIOBase): + buf.write(PyMemoryView_FromMemory(data, size, PyBUF_READ) + .tobytes().decode()) + else: + buf.write(PyMemoryView_FromMemory(data, size, PyBUF_READ)) + + void flush() with gil: + buf.flush() + + size_t bytes_written() with gil: + return buf.tell() + + +cdef class SinkInfo: + """A class containing details on a source to read from. + + For details, see :cpp:class:`cudf::io::sink_info`. + + Parameters + ---------- + sinks : list of str, PathLike, BytesIO, StringIO + + A homogeneous list of sinks (this can be a string filename, + bytes, or one of the Python I/O classes) to read from. + + Mixing different types of sinks will raise a `ValueError`. + """ + + def __init__(self, list sinks): + cdef vector[data_sink *] data_sinks + cdef vector[string] paths + + if not sinks: + raise ValueError("Need to pass at least one sink") + + if isinstance(sinks[0], os.PathLike): + sinks = [os.path.expanduser(s) for s in sinks] + + cdef object initial_sink_cls = type(sinks[0]) + + if not all(isinstance(s, initial_sink_cls) for s in sinks): + raise ValueError("All sinks must be of the same type!") + + if initial_sink_cls in {io.StringIO, io.BytesIO, io.TextIOBase}: + data_sinks.reserve(len(sinks)) + if isinstance(sinks[0], (io.StringIO, io.BytesIO)): + for s in sinks: + self.sink_storage.push_back( + unique_ptr[data_sink](new iobase_data_sink(s)) + ) + elif isinstance(sinks[0], io.TextIOBase): + for s in sinks: + if codecs.lookup(s).name not in ('utf-8', 'ascii'): + raise NotImplementedError(f"Unsupported encoding {s.encoding}") + self.sink_storage.push_back( + unique_ptr[data_sink](new iobase_data_sink(s.buffer)) + ) + data_sinks.push_back(self.sink_storage.back().get()) + elif isinstance(sinks[0], str): + paths.reserve(len(sinks)) + for s in sinks: + paths.push_back( s.encode()) + else: + raise TypeError( + "Unrecognized input type: {}".format(type(sinks[0])) + ) + + if data_sinks.size() > 0: + self.c_obj = sink_info(data_sinks) + else: + # we don't have sinks so we must have paths to sinks + self.c_obj = sink_info(paths) diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/contains.pxd b/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/contains.pxd index 721679f35c7..82aed7d70a0 100644 --- a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/contains.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/contains.pxd @@ -1,5 +1,6 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. +from libc.stdint cimport int32_t from libcpp.memory cimport unique_ptr from cudf._lib.exception_handler cimport cudf_exception_handler @@ -12,17 +13,33 @@ from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil: + + cpdef enum class duplicate_find_option(int32_t): + FIND_FIRST + FIND_LAST + cdef unique_ptr[column] contains( - lists_column_view lists, - scalar search_key, + const lists_column_view& lists, + const scalar& search_key, + ) except +cudf_exception_handler + + cdef unique_ptr[column] contains( + const lists_column_view& lists, + const column_view& search_keys, + ) except +cudf_exception_handler + + cdef unique_ptr[column] contains_nulls( + const lists_column_view& lists, ) except +cudf_exception_handler cdef unique_ptr[column] index_of( - lists_column_view lists, - scalar search_key, + const lists_column_view& lists, + const scalar& search_key, + duplicate_find_option find_option, ) except +cudf_exception_handler cdef unique_ptr[column] index_of( - lists_column_view lists, - column_view search_keys, + const lists_column_view& lists, + const column_view& search_keys, + duplicate_find_option find_option, ) except +cudf_exception_handler diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/lists_column_view.pxd b/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/lists_column_view.pxd index dbafc415e45..fd21e7b334b 100644 --- a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/lists_column_view.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/lists_column_view.pxd @@ -9,6 +9,7 @@ from cudf._lib.pylibcudf.libcudf.types cimport size_type cdef extern from "cudf/lists/lists_column_view.hpp" namespace "cudf" nogil: cdef cppclass lists_column_view(column_view): + lists_column_view() except + lists_column_view(const column_view& lists_column) except + column_view parent() except + column_view offsets() except + diff --git a/python/cudf/cudf/_lib/pylibcudf/lists.pxd b/python/cudf/cudf/_lib/pylibcudf/lists.pxd index 2d2a5b2a9ea..2ccf0139e90 100644 --- a/python/cudf/cudf/_lib/pylibcudf/lists.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/lists.pxd @@ -5,11 +5,21 @@ from libcpp cimport bool from cudf._lib.pylibcudf.libcudf.types cimport size_type from .column cimport Column +from .scalar cimport Scalar from .table cimport Table +ctypedef fused ColumnOrScalar: + Column + Scalar cpdef Table explode_outer(Table, size_type explode_column_idx) cpdef Column concatenate_rows(Table) cpdef Column concatenate_list_elements(Column, bool dropna) + +cpdef Column contains(Column, ColumnOrScalar) + +cpdef Column contains_nulls(Column) + +cpdef Column index_of(Column, ColumnOrScalar, bool) diff --git a/python/cudf/cudf/_lib/pylibcudf/lists.pyx b/python/cudf/cudf/_lib/pylibcudf/lists.pyx index 069c9da31c2..a94d940accd 100644 --- a/python/cudf/cudf/_lib/pylibcudf/lists.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/lists.pyx @@ -1,11 +1,15 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +from cython.operator cimport dereference from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.utility cimport move from cudf._lib.pylibcudf.libcudf.column.column cimport column -from cudf._lib.pylibcudf.libcudf.lists cimport explode as cpp_explode +from cudf._lib.pylibcudf.libcudf.lists cimport ( + contains as cpp_contains, + explode as cpp_explode, +) from cudf._lib.pylibcudf.libcudf.lists.combine cimport ( concatenate_list_elements as cpp_concatenate_list_elements, concatenate_null_policy, @@ -13,8 +17,10 @@ from cudf._lib.pylibcudf.libcudf.lists.combine cimport ( ) from cudf._lib.pylibcudf.libcudf.table.table cimport table from cudf._lib.pylibcudf.libcudf.types cimport size_type +from cudf._lib.pylibcudf.lists cimport ColumnOrScalar -from .column cimport Column +from .column cimport Column, ListColumnView +from .scalar cimport Scalar from .table cimport Table @@ -71,15 +77,15 @@ cpdef Column concatenate_list_elements(Column input, bool dropna): ---------- input : Column The input column + dropna : bool + If true, null list elements will be ignored + from concatenation. Otherwise any input null values will result in + the corresponding output row being set to null. Returns ------- Column A new Column of concatenated list elements - dropna : bool - If true, null list elements will be ignored - from concatenation. Otherwise any input null values will result in - the corresponding output row being set to null. """ cdef concatenate_null_policy null_policy = ( concatenate_null_policy.IGNORE if dropna @@ -94,3 +100,109 @@ cpdef Column concatenate_list_elements(Column input, bool dropna): )) return Column.from_libcudf(move(c_result)) + + +cpdef Column contains(Column input, ColumnOrScalar search_key): + """Create a column of bool values indicating whether + the search_key is contained in the input. + + ``search_key`` may be a + :py:class:`~cudf._lib.pylibcudf.column.Column` or a + :py:class:`~cudf._lib.pylibcudf.scalar.Scalar`. + + For details, see :cpp:func:`contains`. + + Parameters + ---------- + input : Column + The input column. + search_key : Union[Column, Scalar] + The search key. + + Returns + ------- + Column + A new Column of bools indicating if the search_key was + found in the list column. + """ + cdef unique_ptr[column] c_result + cdef ListColumnView list_view = input.list_view() + + if not isinstance(search_key, (Column, Scalar)): + raise TypeError("Must pass a Column or Scalar") + + with nogil: + c_result = move(cpp_contains.contains( + list_view.view(), + search_key.view() if ColumnOrScalar is Column else dereference( + search_key.get() + ), + )) + return Column.from_libcudf(move(c_result)) + + +cpdef Column contains_nulls(Column input): + """Create a column of bool values indicating whether + each row in the lists column contains a null value. + + Parameters + ---------- + input : Column + The input column. + + Returns + ------- + Column + A new Column of bools indicating if the list column + contains a null value. + """ + cdef unique_ptr[column] c_result + cdef ListColumnView list_view = input.list_view() + with nogil: + c_result = move(cpp_contains.contains_nulls(list_view.view())) + return Column.from_libcudf(move(c_result)) + + +cpdef Column index_of(Column input, ColumnOrScalar search_key, bool find_first_option): + """Create a column of index values indicating the position of a search + key row within the corresponding list row in the lists column. + + ``search_key`` may be a + :py:class:`~cudf._lib.pylibcudf.column.Column` or a + :py:class:`~cudf._lib.pylibcudf.scalar.Scalar`. + + For details, see :cpp:func:`index_of`. + + Parameters + ---------- + input : Column + The input column. + search_key : Union[Column, Scalar] + The search key. + find_first_option : bool + If true, index_of returns the first match. + Otherwise the last match is returned. + + Returns + ------- + Column + A new Column of index values that indicate where in the + list column tthe search_key was found. An index value + of -1 indicates that the search_key was not found. + """ + cdef unique_ptr[column] c_result + cdef ListColumnView list_view = input.list_view() + cdef cpp_contains.duplicate_find_option find_option = ( + cpp_contains.duplicate_find_option.FIND_FIRST if find_first_option + else cpp_contains.duplicate_find_option.FIND_LAST + ) + + with nogil: + c_result = move(cpp_contains.index_of( + list_view.view(), + search_key.view() if ColumnOrScalar is Column else dereference( + search_key.get() + ), + find_option, + )) + return Column.from_libcudf(move(c_result)) diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py index caf07b286cd..e160fa697ee 100644 --- a/python/cudf/cudf/core/_base_index.py +++ b/python/cudf/cudf/core/_base_index.py @@ -1104,7 +1104,11 @@ def difference(self, other, sort=None): f"of [None, False, True]; {sort} was passed." ) - other = cudf.Index(other, name=getattr(other, "name", self.name)) + if not isinstance(other, BaseIndex): + other = cudf.Index( + other, + name=getattr(other, "name", self.name), + ) if not len(other): res = self._get_reconciled_name_object(other).unique() diff --git a/python/cudf/cudf/core/algorithms.py b/python/cudf/cudf/core/algorithms.py index 51a32e29886..e8b82ff60c2 100644 --- a/python/cudf/cudf/core/algorithms.py +++ b/python/cudf/cudf/core/algorithms.py @@ -6,7 +6,7 @@ from cudf.core.column import as_column from cudf.core.copy_types import BooleanMask -from cudf.core.index import Index, RangeIndex +from cudf.core.index import RangeIndex, ensure_index from cudf.core.indexed_frame import IndexedFrame from cudf.core.scalar import Scalar from cudf.options import get_option @@ -107,7 +107,7 @@ def factorize(values, sort=False, use_na_sentinel=True, size_hint=None): dtype="int64" if get_option("mode.pandas_compatible") else None, ).values - return labels, cats.values if return_cupy_array else Index(cats) + return labels, cats.values if return_cupy_array else ensure_index(cats) def _linear_interpolation(column, index=None): diff --git a/python/cudf/cudf/core/buffer/spill_manager.py b/python/cudf/cudf/core/buffer/spill_manager.py index 762cd7f9e86..ed351a6b107 100644 --- a/python/cudf/cudf/core/buffer/spill_manager.py +++ b/python/cudf/cudf/core/buffer/spill_manager.py @@ -18,14 +18,14 @@ import rmm.mr from cudf.options import get_option -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.string import format_bytes if TYPE_CHECKING: from cudf.core.buffer.spillable_buffer import SpillableBufferOwner _spill_cudf_nvtx_annotate = partial( - _cudf_nvtx_annotate, domain="cudf_python-spill" + _performance_tracking, domain="cudf_python-spill" ) diff --git a/python/cudf/cudf/core/buffer/spillable_buffer.py b/python/cudf/cudf/core/buffer/spillable_buffer.py index eb57a371965..4c9e524ee05 100644 --- a/python/cudf/cudf/core/buffer/spillable_buffer.py +++ b/python/cudf/cudf/core/buffer/spillable_buffer.py @@ -10,6 +10,7 @@ from typing import TYPE_CHECKING, Any, Literal import numpy +import nvtx from typing_extensions import Self import rmm @@ -21,7 +22,7 @@ host_memory_allocation, ) from cudf.core.buffer.exposure_tracked_buffer import ExposureTrackedBuffer -from cudf.utils.nvtx_annotation import _get_color_for_nvtx, annotate +from cudf.utils.performance_tracking import _get_color_for_nvtx from cudf.utils.string import format_bytes if TYPE_CHECKING: @@ -200,7 +201,7 @@ def spill(self, target: str = "cpu") -> None: ) if (ptr_type, target) == ("gpu", "cpu"): - with annotate( + with nvtx.annotate( message="SpillDtoH", color=_get_color_for_nvtx("SpillDtoH"), domain="cudf_python-spill", @@ -218,7 +219,7 @@ def spill(self, target: str = "cpu") -> None: # trigger a new call to this buffer's `spill()`. # Therefore, it is important that spilling-on-demand doesn't # try to unspill an already locked buffer! - with annotate( + with nvtx.annotate( message="SpillHtoD", color=_get_color_for_nvtx("SpillHtoD"), domain="cudf_python-spill", diff --git a/python/cudf/cudf/core/cut.py b/python/cudf/cudf/core/cut.py index 54c5e829e8a..d9f62f51f92 100644 --- a/python/cudf/cudf/core/cut.py +++ b/python/cudf/cudf/core/cut.py @@ -292,7 +292,7 @@ def cut( ) # we return a categorical index, as we don't have a Categorical method - categorical_index = cudf.Index(col) + categorical_index = cudf.CategoricalIndex._from_data({None: col}) if isinstance(orig_x, (pd.Series, cudf.Series)): # if we have a series input we return a series output diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index f7f5ef792d6..4dfeb68b7ba 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -58,7 +58,12 @@ from cudf.core.column_accessor import ColumnAccessor from cudf.core.copy_types import BooleanMask from cudf.core.groupby.groupby import DataFrameGroupBy, groupby_doc_template -from cudf.core.index import BaseIndex, RangeIndex, _index_from_data, as_index +from cudf.core.index import ( + BaseIndex, + RangeIndex, + _index_from_data, + ensure_index, +) from cudf.core.indexed_frame import ( IndexedFrame, _FrameIndexer, @@ -83,7 +88,7 @@ min_scalar_type, numeric_normalize_types, ) -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.utils import GetAttrGetItemMixin, _external_only_api if TYPE_CHECKING: @@ -145,7 +150,7 @@ def __setitem__(self, key, value): key = (key, slice(None)) return self._setitem_tuple_arg(key, value) - @_cudf_nvtx_annotate + @_performance_tracking def _can_downcast_to_series(self, df, arg): """ This method encapsulates the logic used @@ -188,7 +193,7 @@ def _can_downcast_to_series(self, df, arg): return True return False - @_cudf_nvtx_annotate + @_performance_tracking def _downcast_to_series(self, df, arg): """ "Downcast" from a DataFrame to a Series @@ -233,11 +238,11 @@ class _DataFrameLocIndexer(_DataFrameIndexer): For selection by label. """ - @_cudf_nvtx_annotate + @_performance_tracking def _getitem_scalar(self, arg): return self._frame[arg[1]].loc[arg[0]] - @_cudf_nvtx_annotate + @_performance_tracking def _getitem_tuple_arg(self, arg): from uuid import uuid4 @@ -338,7 +343,7 @@ def _getitem_tuple_arg(self, arg): range(len(tmp_arg[0])) ) }, - index=as_index(tmp_arg[0]), + index=cudf.Index(tmp_arg[0]), ) columns_df[cantor_name] = column.as_column( range(len(columns_df)) @@ -363,7 +368,7 @@ def _getitem_tuple_arg(self, arg): return self._downcast_to_series(df, arg) return df - @_cudf_nvtx_annotate + @_performance_tracking def _setitem_tuple_arg(self, key, value): if ( isinstance(self._frame.index, MultiIndex) @@ -532,7 +537,7 @@ def __getitem__(self, arg): return frame._empty_like(keep_index=True) assert_never(row_spec) - @_cudf_nvtx_annotate + @_performance_tracking def _setitem_tuple_arg(self, key, value): columns_df = self._frame._from_data( self._frame._data.select_by_index(key[1]), self._frame.index @@ -677,7 +682,7 @@ class DataFrame(IndexedFrame, Serializable, GetAttrGetItemMixin): _groupby = DataFrameGroupBy _resampler = DataFrameResampler - @_cudf_nvtx_annotate + @_performance_tracking def __init__( self, data=None, @@ -702,7 +707,7 @@ def __init__( data = data.reindex(index) index = data.index else: - index = cudf.Index(index) + index = ensure_index(index) else: index = data.index @@ -751,7 +756,7 @@ def __init__( if index is None: self._index = RangeIndex(0) else: - self._index = cudf.Index(index) + self._index = ensure_index(index) if columns is not None: rangeindex = isinstance( columns, (range, pd.RangeIndex, cudf.RangeIndex) @@ -859,7 +864,7 @@ def __init__( columns, pd.MultiIndex ) - @_cudf_nvtx_annotate + @_performance_tracking def _init_from_series_list(self, data, columns, index): if index is None: # When `index` is `None`, the final index of @@ -909,7 +914,7 @@ def _init_from_series_list(self, data, columns, index): f"not match length of index ({index_length})" ) - final_index = cudf.Index(index) + final_index = ensure_index(index) series_lengths = list(map(len, data)) data = numeric_normalize_types(*data) @@ -972,14 +977,14 @@ def _init_from_series_list(self, data, columns, index): else: self._data.rangeindex = True - @_cudf_nvtx_annotate + @_performance_tracking def _init_from_list_like(self, data, index=None, columns=None): if index is None: index = RangeIndex(start=0, stop=len(data)) else: - index = cudf.Index(index) + index = ensure_index(index) - self._index = cudf.Index(index) + self._index = index # list-of-dicts case if len(data) > 0 and isinstance(data[0], dict): data = DataFrame.from_pandas(pd.DataFrame(data)) @@ -1030,7 +1035,7 @@ def _init_from_list_like(self, data, index=None, columns=None): ) self._data.label_dtype = getattr(columns, "dtype", None) - @_cudf_nvtx_annotate + @_performance_tracking def _init_from_dict_like( self, data, index=None, columns=None, nan_as_null=None ): @@ -1085,7 +1090,7 @@ def _init_from_dict_like( self._index = RangeIndex(0, num_rows) else: - self._index = cudf.Index(index) + self._index = ensure_index(index) if len(data): self._data.multiindex = True @@ -1119,7 +1124,7 @@ def _from_data( return out @staticmethod - @_cudf_nvtx_annotate + @_performance_tracking def _align_input_series_indices(data, index): input_series = [ Series(val) @@ -1187,7 +1192,7 @@ def deserialize(cls, header, frames): return obj @property - @_cudf_nvtx_annotate + @_performance_tracking def shape(self): """Returns a tuple representing the dimensionality of the DataFrame.""" return self._num_rows, self._num_columns @@ -1270,7 +1275,7 @@ def __setattr__(self, key, col): else: super().__setattr__(key, col) - @_cudf_nvtx_annotate + @_performance_tracking def __getitem__(self, arg): """ If *arg* is a ``str`` or ``int`` type, return the column Series. @@ -1364,7 +1369,7 @@ def __getitem__(self, arg): f"__getitem__ on type {type(arg)} is not supported" ) - @_cudf_nvtx_annotate + @_performance_tracking def __setitem__(self, arg, value): """Add/set column by *arg or DataFrame*""" if isinstance(arg, DataFrame): @@ -1482,7 +1487,7 @@ def __setitem__(self, arg, value): def __delitem__(self, name): self._drop_column(name) - @_cudf_nvtx_annotate + @_performance_tracking def memory_usage(self, index=True, deep=False): mem_usage = [col.memory_usage for col in self._data.columns] names = [str(name) for name in self._data.names] @@ -1491,10 +1496,10 @@ def memory_usage(self, index=True, deep=False): names.append("Index") return Series._from_data( data={None: as_column(mem_usage)}, - index=as_index(names), + index=cudf.Index(names), ) - @_cudf_nvtx_annotate + @_performance_tracking def __array_function__(self, func, types, args, kwargs): if "out" in kwargs or not all( issubclass(t, (Series, DataFrame)) for t in types @@ -1528,7 +1533,7 @@ def __array_function__(self, func, types, args, kwargs): return NotImplemented # The _get_numeric_data method is necessary for dask compatibility. - @_cudf_nvtx_annotate + @_performance_tracking def _get_numeric_data(self): """Return a dataframe with only numeric data types""" columns = [ @@ -1538,7 +1543,7 @@ def _get_numeric_data(self): ] return self[columns] - @_cudf_nvtx_annotate + @_performance_tracking def assign(self, **kwargs: Callable[[Self], Any] | Any): """ Assign columns to DataFrame from keyword arguments. @@ -1571,7 +1576,7 @@ def assign(self, **kwargs: Callable[[Self], Any] | Any): return new_df @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def _concat( cls, objs, axis=0, join="outer", ignore_index=False, sort=False ): @@ -1963,12 +1968,12 @@ def _get_renderable_dataframe(self): return output - @_cudf_nvtx_annotate + @_performance_tracking def __repr__(self): output = self._get_renderable_dataframe() return self._clean_renderable_dataframe(output) - @_cudf_nvtx_annotate + @_performance_tracking def _repr_html_(self): lines = ( self._get_renderable_dataframe() @@ -1984,7 +1989,7 @@ def _repr_html_(self): lines.append("") return "\n".join(lines) - @_cudf_nvtx_annotate + @_performance_tracking def _repr_latex_(self): return self._get_renderable_dataframe().to_pandas()._repr_latex_() @@ -2098,7 +2103,7 @@ def _make_operands_and_index_for_binop( return operands, index, can_use_self_column_name @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_dict( cls, data: dict, @@ -2233,7 +2238,7 @@ def from_dict( f"parameter. Got '{orient}' instead" ) - @_cudf_nvtx_annotate + @_performance_tracking def to_dict( self, orient: str = "dict", @@ -2354,7 +2359,7 @@ def to_dict( return self.to_pandas().to_dict(orient=orient, into=into) - @_cudf_nvtx_annotate + @_performance_tracking def scatter_by_map( self, map_index, map_size=None, keep_index=True, debug: bool = False ): @@ -2447,7 +2452,7 @@ def scatter_by_map( return result - @_cudf_nvtx_annotate + @_performance_tracking def update( self, other, @@ -2542,23 +2547,23 @@ def update( self._mimic_inplace(source_df, inplace=True) - @_cudf_nvtx_annotate + @_performance_tracking def __iter__(self): return iter(self._column_names) - @_cudf_nvtx_annotate + @_performance_tracking def __contains__(self, item): # This must check against containment in the pandas Index and not # self._column_names to handle NA, None, nan, etc. correctly. return item in self._data.to_pandas_index() - @_cudf_nvtx_annotate + @_performance_tracking def items(self): """Iterate over column names and series pairs""" for k in self: yield (k, self[k]) - @_cudf_nvtx_annotate + @_performance_tracking def equals(self, other) -> bool: ret = super().equals(other) # If all other checks matched, validate names. @@ -2591,13 +2596,13 @@ def at(self): "index is absolutely necessary. For checking if the columns are a " "MultiIndex, use _data.multiindex." ) - @_cudf_nvtx_annotate + @_performance_tracking def columns(self): """Returns a tuple of columns""" return self._data.to_pandas_index() @columns.setter # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def columns(self, columns): multiindex = False rangeindex = False @@ -2665,7 +2670,7 @@ def _set_columns_like(self, other: ColumnAccessor) -> None: verify=False, ) - @_cudf_nvtx_annotate + @_performance_tracking def reindex( self, labels=None, @@ -2813,7 +2818,7 @@ def reindex( fill_value=fill_value, ) - @_cudf_nvtx_annotate + @_performance_tracking def set_index( self, keys, @@ -2980,7 +2985,7 @@ def set_index( df.index = idx return df if not inplace else None - @_cudf_nvtx_annotate + @_performance_tracking def fillna( self, value=None, method=None, axis=None, inplace=False, limit=None ): # noqa: D102 @@ -3006,7 +3011,7 @@ def fillna( value=value, method=method, axis=axis, inplace=inplace, limit=limit ) - @_cudf_nvtx_annotate + @_performance_tracking def where(self, cond, other=None, inplace=False): from cudf.core._internals.where import ( _check_and_cast_columns_with_other, @@ -3163,7 +3168,7 @@ def reset_index( inplace=inplace, ) - @_cudf_nvtx_annotate + @_performance_tracking def insert(self, loc, name, value, nan_as_null=no_default): """Add a column to DataFrame at the index specified by loc. @@ -3189,7 +3194,7 @@ def insert(self, loc, name, value, nan_as_null=no_default): ignore_index=False, ) - @_cudf_nvtx_annotate + @_performance_tracking def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True): """ Same as `insert`, with additional `ignore_index` param. @@ -3271,7 +3276,7 @@ def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True): self._data.insert(name, value, loc=loc) @property # type:ignore - @_cudf_nvtx_annotate + @_performance_tracking def axes(self): """ Return a list representing the axes of the DataFrame. @@ -3363,7 +3368,7 @@ def diff(self, periods=1, axis=0): return self - self.shift(periods=periods) - @_cudf_nvtx_annotate + @_performance_tracking def drop_duplicates( self, subset=None, @@ -3451,14 +3456,14 @@ def drop_duplicates( return self._mimic_inplace(outdf, inplace=inplace) - @_cudf_nvtx_annotate + @_performance_tracking def pop(self, item): """Return a column and drop it from the DataFrame.""" popped = self[item] del self[item] return popped - @_cudf_nvtx_annotate + @_performance_tracking def rename( self, mapper=None, @@ -3616,7 +3621,7 @@ def rename( return result - @_cudf_nvtx_annotate + @_performance_tracking def add_prefix(self, prefix): # TODO: Change to deep=False when copy-on-write is default out = self.copy(deep=True) @@ -3625,7 +3630,7 @@ def add_prefix(self, prefix): ] return out - @_cudf_nvtx_annotate + @_performance_tracking def add_suffix(self, suffix): # TODO: Change to deep=False when copy-on-write is default out = self.copy(deep=True) @@ -3634,7 +3639,7 @@ def add_suffix(self, suffix): ] return out - @_cudf_nvtx_annotate + @_performance_tracking def agg(self, aggs, axis=None): """ Aggregate using one or more operations over the specified axis. @@ -3770,7 +3775,7 @@ def agg(self, aggs, axis=None): else: raise ValueError("argument must be a string, list or dict") - @_cudf_nvtx_annotate + @_performance_tracking def nlargest(self, n, columns, keep="first"): """Return the first *n* rows ordered by *columns* in descending order. @@ -3910,7 +3915,7 @@ def nsmallest(self, n, columns, keep="first"): """ return self._n_largest_or_smallest(False, n, columns, keep) - @_cudf_nvtx_annotate + @_performance_tracking def swaplevel(self, i=-2, j=-1, axis=0): """ Swap level i with level j. @@ -3977,7 +3982,7 @@ def swaplevel(self, i=-2, j=-1, axis=0): return result - @_cudf_nvtx_annotate + @_performance_tracking def transpose(self): """Transpose index and columns. @@ -4033,7 +4038,7 @@ def transpose(self): # Set the old column names as the new index result = self.__class__._from_data( ColumnAccessor(dict(enumerate(result_columns)), verify=False), - index=as_index(index), + index=cudf.Index(index), ) # Set the old index as the new column names result.columns = columns @@ -4041,7 +4046,7 @@ def transpose(self): T = property(transpose, doc=transpose.__doc__) - @_cudf_nvtx_annotate + @_performance_tracking def melt(self, **kwargs): """Unpivots a DataFrame from wide format to long format, optionally leaving identifier variables set. @@ -4071,7 +4076,7 @@ def melt(self, **kwargs): return melt(self, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def merge( self, right, @@ -4224,7 +4229,7 @@ def merge( suffixes=suffixes, ).perform_merge() - @_cudf_nvtx_annotate + @_performance_tracking def join( self, other, @@ -4273,7 +4278,7 @@ def join( ) return df - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( groupby_doc_template.format( ret=textwrap.dedent( @@ -4407,7 +4412,7 @@ def query(self, expr, local_dict=None): BooleanMask.from_column_unchecked(boolmask) ) - @_cudf_nvtx_annotate + @_performance_tracking def apply( self, func, axis=1, raw=False, result_type=None, args=(), **kwargs ): @@ -4691,7 +4696,7 @@ def _func(x): # pragma: no cover return DataFrame._from_data(result, index=self.index) - @_cudf_nvtx_annotate + @_performance_tracking @applyutils.doc_apply() def apply_rows( self, @@ -4770,7 +4775,7 @@ def apply_rows( cache_key=cache_key, ) - @_cudf_nvtx_annotate + @_performance_tracking @applyutils.doc_applychunks() def apply_chunks( self, @@ -4837,7 +4842,7 @@ def apply_chunks( tpb=tpb, ) - @_cudf_nvtx_annotate + @_performance_tracking def partition_by_hash(self, columns, nparts, keep_index=True): """Partition the dataframe by the hashed value of data in *columns*. @@ -5181,7 +5186,7 @@ def _sizeof_fmt(num, size_qualifier): cudf.utils.ioutils.buffer_write_lines(buf, lines) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_describe() def describe( self, @@ -5243,7 +5248,7 @@ def describe( ) return res - @_cudf_nvtx_annotate + @_performance_tracking def to_pandas( self, *, nullable: bool = False, arrow_type: bool = False ) -> pd.DataFrame: @@ -5333,7 +5338,7 @@ def to_pandas( return out_df @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_pandas(cls, dataframe, nan_as_null=no_default): """ Convert from a Pandas DataFrame. @@ -5406,7 +5411,7 @@ def from_pandas(cls, dataframe, nan_as_null=no_default): ) @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_arrow(cls, table): """ Convert from PyArrow Table to DataFrame. @@ -5492,7 +5497,7 @@ def from_arrow(cls, table): return out - @_cudf_nvtx_annotate + @_performance_tracking def to_arrow(self, preserve_index=None): """ Convert to a PyArrow Table. @@ -5582,7 +5587,7 @@ def to_arrow(self, preserve_index=None): return out.replace_schema_metadata(metadata) - @_cudf_nvtx_annotate + @_performance_tracking def to_records(self, index=True): """Convert to a numpy recarray @@ -5606,7 +5611,7 @@ def to_records(self, index=True): return ret @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_records(cls, data, index=None, columns=None, nan_as_null=False): """ Convert structured or record ndarray to DataFrame. @@ -5657,7 +5662,7 @@ def from_records(cls, data, index=None, columns=None, nan_as_null=False): } if not is_scalar(index): - new_index = cudf.Index(index) + new_index = ensure_index(index) else: new_index = None @@ -5685,7 +5690,7 @@ def from_records(cls, data, index=None, columns=None, nan_as_null=False): return df @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def _from_arrays(cls, data, index=None, columns=None, nan_as_null=False): """Convert a numpy/cupy array to DataFrame. @@ -5741,7 +5746,7 @@ def _from_arrays(cls, data, index=None, columns=None, nan_as_null=False): } if index is not None: - index = cudf.Index(index) + index = ensure_index(index) if isinstance(columns, (pd.Index, cudf.Index)): level_names = tuple(columns.names) @@ -5763,7 +5768,7 @@ def _from_arrays(cls, data, index=None, columns=None, nan_as_null=False): index=index, ) - @_cudf_nvtx_annotate + @_performance_tracking def interpolate( self, method="linear", @@ -5793,7 +5798,7 @@ def interpolate( **kwargs, ) - @_cudf_nvtx_annotate + @_performance_tracking def quantile( self, q=0.5, @@ -5936,7 +5941,7 @@ def quantile( result.index = cudf.Index(list(map(float, qs)), dtype="float64") return result - @_cudf_nvtx_annotate + @_performance_tracking def isin(self, values): """ Whether each element in the DataFrame is contained in values. @@ -6080,7 +6085,7 @@ def make_false_column_like_self(): # # Stats # - @_cudf_nvtx_annotate + @_performance_tracking def _prepare_for_rowwise_op(self, method, skipna, numeric_only): """Prepare a DataFrame for CuPy-based row-wise operations.""" @@ -6132,7 +6137,7 @@ def _prepare_for_rowwise_op(self, method, skipna, numeric_only): coerced = coerced.astype("int64", copy=False) return coerced, mask, common_dtype - @_cudf_nvtx_annotate + @_performance_tracking def count(self, axis=0, numeric_only=False): """ Count ``non-NA`` cells for each column or row. @@ -6184,7 +6189,7 @@ def count(self, axis=0, numeric_only=False): "columns": 1, } - @_cudf_nvtx_annotate + @_performance_tracking def _reduce( self, op, @@ -6308,7 +6313,7 @@ def _reduce( else: raise ValueError(f"Invalid value of {axis=} received for {op}") - @_cudf_nvtx_annotate + @_performance_tracking def _scan( self, op, @@ -6325,7 +6330,7 @@ def _scan( elif axis == 1: return self._apply_cupy_method_axis_1(op, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def mode(self, axis=0, numeric_only=False, dropna=True): """ Get the mode(s) of each element along the selected axis. @@ -6432,17 +6437,17 @@ def mode(self, axis=0, numeric_only=False, dropna=True): return df - @_cudf_nvtx_annotate + @_performance_tracking def all(self, axis=0, bool_only=None, skipna=True, **kwargs): obj = self.select_dtypes(include="bool") if bool_only else self return super(DataFrame, obj).all(axis, skipna, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def any(self, axis=0, bool_only=None, skipna=True, **kwargs): obj = self.select_dtypes(include="bool") if bool_only else self return super(DataFrame, obj).any(axis, skipna, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def _apply_cupy_method_axis_1(self, method, *args, **kwargs): # This method uses cupy to perform scans and reductions along rows of a # DataFrame. Since cuDF is designed around columnar storage and @@ -6542,7 +6547,7 @@ def _apply_cupy_method_axis_1(self, method, *args, **kwargs): result_df._set_columns_like(prepared._data) return result_df - @_cudf_nvtx_annotate + @_performance_tracking def _columns_view(self, columns): """ Return a subset of the DataFrame's columns as a view. @@ -6551,7 +6556,7 @@ def _columns_view(self, columns): {col: self._data[col] for col in columns}, index=self.index ) - @_cudf_nvtx_annotate + @_performance_tracking def select_dtypes(self, include=None, exclude=None): """Return a subset of the DataFrame's columns based on the column dtypes. @@ -6816,7 +6821,7 @@ def to_orc( index=index, ) - @_cudf_nvtx_annotate + @_performance_tracking def stack(self, level=-1, dropna=no_default, future_stack=False): """Stack the prescribed level(s) from columns to index @@ -7161,7 +7166,7 @@ def unnamed_group_generator(): else: return result - @_cudf_nvtx_annotate + @_performance_tracking def cov(self, **kwargs): """Compute the covariance matrix of a DataFrame. @@ -7216,7 +7221,7 @@ def corr(self, method="pearson", min_periods=None): df._set_columns_like(self._data) return df - @_cudf_nvtx_annotate + @_performance_tracking def to_struct(self, name=None): """ Return a struct Series composed of the columns of the DataFrame. @@ -7250,7 +7255,7 @@ def to_struct(self, name=None): name=name, ) - @_cudf_nvtx_annotate + @_performance_tracking def keys(self): """ Get the columns. @@ -7310,14 +7315,14 @@ def iterrows(self): "if you wish to iterate over each row." ) - @_cudf_nvtx_annotate + @_performance_tracking @copy_docstring(reshape.pivot) def pivot(self, *, columns, index=no_default, values=no_default): return cudf.core.reshape.pivot( self, index=index, columns=columns, values=values ) - @_cudf_nvtx_annotate + @_performance_tracking @copy_docstring(reshape.pivot_table) def pivot_table( self, @@ -7346,14 +7351,14 @@ def pivot_table( sort=sort, ) - @_cudf_nvtx_annotate + @_performance_tracking @copy_docstring(reshape.unstack) def unstack(self, level=-1, fill_value=None): return cudf.core.reshape.unstack( self, level=level, fill_value=fill_value ) - @_cudf_nvtx_annotate + @_performance_tracking def explode(self, column, ignore_index=False): """ Transform each element of a list-like to a row, replicating index @@ -7549,7 +7554,7 @@ def _from_columns_like_self( result._set_columns_like(self._data) return result - @_cudf_nvtx_annotate + @_performance_tracking def interleave_columns(self): """ Interleave Series columns of a table into a single column. @@ -7597,7 +7602,7 @@ def interleave_columns(self): {None: libcudf.reshape.interleave_columns([*self._columns])} ) - @_cudf_nvtx_annotate + @_performance_tracking def eval(self, expr: str, inplace: bool = False, **kwargs): """Evaluate a string describing operations on DataFrame columns. @@ -7953,7 +7958,7 @@ def func(left, right, output): ) -@_cudf_nvtx_annotate +@_performance_tracking def from_pandas(obj, nan_as_null=no_default): """ Convert certain Pandas objects into the cudf equivalent. @@ -8080,7 +8085,7 @@ def from_pandas(obj, nan_as_null=no_default): ) -@_cudf_nvtx_annotate +@_performance_tracking def merge(left, right, *args, **kwargs): if isinstance(left, Series): left = left.to_frame() diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 8ca71180c00..9bac75dc6ac 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -32,7 +32,7 @@ from cudf.core.mixins import BinaryOperand, Scannable from cudf.utils import ioutils from cudf.utils.dtypes import find_common_type -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.utils import _array_ufunc, _warn_no_dask_cudf if TYPE_CHECKING: @@ -86,7 +86,7 @@ def _dtypes(self) -> abc.Iterable: def ndim(self) -> int: raise NotImplementedError() - @_cudf_nvtx_annotate + @_performance_tracking def serialize(self): # TODO: See if self._data can be serialized outright header = { @@ -101,7 +101,7 @@ def serialize(self): return header, frames @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def deserialize(cls, header, frames): cls_deserialize = pickle.loads(header["type-serialized"]) column_names = pickle.loads(header["column_names"]) @@ -122,7 +122,7 @@ def deserialize(cls, header, frames): return cls_deserialize._from_data(col_accessor) @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def _from_data(cls, data: MutableMapping) -> Self: """ Construct cls from a ColumnAccessor-like mapping. @@ -131,7 +131,7 @@ def _from_data(cls, data: MutableMapping) -> Self: Frame.__init__(obj, data) return obj - @_cudf_nvtx_annotate + @_performance_tracking def _from_data_like_self(self, data: MutableMapping) -> Self: """ Return type(self) from a ColumnAccessor-like mapping but @@ -139,7 +139,7 @@ def _from_data_like_self(self, data: MutableMapping) -> Self: """ return self._from_data(data) - @_cudf_nvtx_annotate + @_performance_tracking def _from_columns_like_self( self, columns: list[ColumnBase], @@ -155,7 +155,7 @@ def _from_columns_like_self( frame = self.__class__._from_data(data) return frame._copy_type_metadata(self) - @_cudf_nvtx_annotate + @_performance_tracking def _mimic_inplace( self, result: Self, inplace: bool = False ) -> Self | None: @@ -171,7 +171,7 @@ def _mimic_inplace( return result @property - @_cudf_nvtx_annotate + @_performance_tracking def size(self) -> int: """ Return the number of elements in the underlying data. @@ -263,11 +263,11 @@ def memory_usage(self, deep=False): """ raise NotImplementedError - @_cudf_nvtx_annotate + @_performance_tracking def __len__(self) -> int: return self._num_rows - @_cudf_nvtx_annotate + @_performance_tracking def astype(self, dtype: dict[Any, Dtype], copy: bool = False) -> Self: casted = ( col.astype(dtype.get(col_name, col.dtype), copy=copy) @@ -276,7 +276,7 @@ def astype(self, dtype: dict[Any, Dtype], copy: bool = False) -> Self: ca = self._data._from_columns_like_self(casted, verify=False) return self._from_data_like_self(ca) - @_cudf_nvtx_annotate + @_performance_tracking def equals(self, other) -> bool: """ Test whether two objects contain the same elements. @@ -347,7 +347,7 @@ def equals(self, other) -> bool: ) ) - @_cudf_nvtx_annotate + @_performance_tracking def _get_columns_by_label(self, labels) -> Self: """ Returns columns of the Frame specified by `labels`. @@ -357,7 +357,7 @@ def _get_columns_by_label(self, labels) -> Self: return self._from_data_like_self(self._data.select_by_label(labels)) @property - @_cudf_nvtx_annotate + @_performance_tracking def values(self) -> cupy.ndarray: """ Return a CuPy representation of the DataFrame. @@ -373,7 +373,7 @@ def values(self) -> cupy.ndarray: return self.to_cupy() @property - @_cudf_nvtx_annotate + @_performance_tracking def values_host(self) -> np.ndarray: """ Return a NumPy representation of the data. @@ -388,7 +388,7 @@ def values_host(self) -> np.ndarray: """ return self.to_numpy() - @_cudf_nvtx_annotate + @_performance_tracking def __array__(self, dtype=None): raise TypeError( "Implicit conversion to a host NumPy array via __array__ is not " @@ -397,14 +397,14 @@ def __array__(self, dtype=None): "using .to_numpy()." ) - @_cudf_nvtx_annotate + @_performance_tracking def __arrow_array__(self, type=None): raise TypeError( "Implicit conversion to a host PyArrow object via __arrow_array__ " "is not allowed. Consider using .to_arrow()" ) - @_cudf_nvtx_annotate + @_performance_tracking def _to_array( self, get_array: Callable, @@ -468,7 +468,7 @@ def to_array( # particular, we need to benchmark how much of the overhead is coming from # (potentially unavoidable) local copies in to_cupy and how much comes from # inefficiencies in the implementation. - @_cudf_nvtx_annotate + @_performance_tracking def to_cupy( self, dtype: Dtype | None = None, @@ -502,7 +502,7 @@ def to_cupy( na_value, ) - @_cudf_nvtx_annotate + @_performance_tracking def to_numpy( self, dtype: Dtype | None = None, @@ -537,7 +537,7 @@ def to_numpy( lambda col: col.values_host, numpy, copy, dtype, na_value ) - @_cudf_nvtx_annotate + @_performance_tracking def where(self, cond, other=None, inplace: bool = False) -> Self | None: """ Replace values where the condition is False. @@ -610,7 +610,7 @@ def where(self, cond, other=None, inplace: bool = False) -> Self | None: """ raise NotImplementedError - @_cudf_nvtx_annotate + @_performance_tracking def fillna( self, value: None | ScalarLike | cudf.Series = None, @@ -767,14 +767,14 @@ def fillna( inplace=inplace, ) - @_cudf_nvtx_annotate + @_performance_tracking def _drop_column(self, name): """Drop a column by *name*""" if name not in self._data: raise KeyError(f"column '{name}' does not exist") del self._data[name] - @_cudf_nvtx_annotate + @_performance_tracking def _quantile_table( self, q: float, @@ -808,7 +808,7 @@ def _quantile_table( ) @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_arrow(cls, data: pa.Table) -> Self: """Convert from PyArrow Table to Frame @@ -968,7 +968,7 @@ def from_arrow(cls, data: pa.Table) -> Self: return cls._from_data({name: result[name] for name in column_names}) - @_cudf_nvtx_annotate + @_performance_tracking def to_arrow(self): """ Convert to arrow Table @@ -992,7 +992,7 @@ def to_arrow(self): {str(name): col.to_arrow() for name, col in self._data.items()} ) - @_cudf_nvtx_annotate + @_performance_tracking def _positions_from_column_names(self, column_names) -> list[int]: """Map each column name into their positions in the frame. @@ -1005,7 +1005,7 @@ def _positions_from_column_names(self, column_names) -> list[int]: if name in set(column_names) ] - @_cudf_nvtx_annotate + @_performance_tracking def _copy_type_metadata(self: Self, other: Self) -> Self: """ Copy type metadata from each column of `other` to the corresponding @@ -1020,7 +1020,7 @@ def _copy_type_metadata(self: Self, other: Self) -> Self: return self - @_cudf_nvtx_annotate + @_performance_tracking def isna(self): """ Identify missing values. @@ -1101,7 +1101,7 @@ def isna(self): # Alias for isna isnull = isna - @_cudf_nvtx_annotate + @_performance_tracking def notna(self): """ Identify non-missing values. @@ -1182,7 +1182,7 @@ def notna(self): # Alias for notna notnull = notna - @_cudf_nvtx_annotate + @_performance_tracking def searchsorted( self, values, @@ -1296,7 +1296,7 @@ def searchsorted( else: return result - @_cudf_nvtx_annotate + @_performance_tracking def argsort( self, by=None, @@ -1383,7 +1383,7 @@ def argsort( by=by, ascending=ascending, na_position=na_position ).values - @_cudf_nvtx_annotate + @_performance_tracking def _get_sorted_inds( self, by=None, @@ -1411,7 +1411,7 @@ def _get_sorted_inds( stable=True, ) - @_cudf_nvtx_annotate + @_performance_tracking def _split(self, splits): """Split a frame with split points in ``splits``. Returns a list of Frames of length `len(splits) + 1`. @@ -1426,13 +1426,13 @@ def _split(self, splits): for split_idx in range(len(splits) + 1) ] - @_cudf_nvtx_annotate + @_performance_tracking def _encode(self): columns, indices = libcudf.transform.table_encode([*self._columns]) keys = self._from_columns_like_self(columns) return keys, indices - @_cudf_nvtx_annotate + @_performance_tracking def _unaryop(self, op): data_columns = (col.unary_operator(op) for col in self._columns) return self._from_data_like_self( @@ -1440,7 +1440,7 @@ def _unaryop(self, op): ) @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def _colwise_binop( cls, operands: dict[str | None, tuple[ColumnBase, Any, bool, Any]], @@ -1519,11 +1519,11 @@ def _colwise_binop( return output - @_cudf_nvtx_annotate + @_performance_tracking def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): return _array_ufunc(self, ufunc, method, inputs, kwargs) - @_cudf_nvtx_annotate + @_performance_tracking @acquire_spill_lock() def _apply_cupy_ufunc_to_operands( self, ufunc, cupy_func, operands, **kwargs @@ -1565,7 +1565,7 @@ def _apply_cupy_ufunc_to_operands( return data # Unary logical operators - @_cudf_nvtx_annotate + @_performance_tracking def __neg__(self): """Negate for integral dtypes, logical NOT for bools.""" return self._from_data_like_self( @@ -1579,30 +1579,30 @@ def __neg__(self): ) ) - @_cudf_nvtx_annotate + @_performance_tracking def __pos__(self): return self.copy(deep=True) - @_cudf_nvtx_annotate + @_performance_tracking def __abs__(self): return self._unaryop("abs") # Reductions @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def _get_axis_from_axis_arg(cls, axis): try: return cls._SUPPORT_AXIS_LOOKUP[axis] except KeyError: raise ValueError(f"No axis named {axis} for object type {cls}") - @_cudf_nvtx_annotate + @_performance_tracking def _reduce(self, *args, **kwargs): raise NotImplementedError( f"Reductions are not supported for objects of type {type(self)}." ) - @_cudf_nvtx_annotate + @_performance_tracking def min( self, axis=0, @@ -1653,7 +1653,7 @@ def min( **kwargs, ) - @_cudf_nvtx_annotate + @_performance_tracking def max( self, axis=0, @@ -1701,7 +1701,7 @@ def max( **kwargs, ) - @_cudf_nvtx_annotate + @_performance_tracking def all(self, axis=0, skipna=True, **kwargs): """ Return whether all elements are True in DataFrame. @@ -1754,7 +1754,7 @@ def all(self, axis=0, skipna=True, **kwargs): **kwargs, ) - @_cudf_nvtx_annotate + @_performance_tracking def any(self, axis=0, skipna=True, **kwargs): """ Return whether any elements is True in DataFrame. @@ -1807,26 +1807,26 @@ def any(self, axis=0, skipna=True, **kwargs): **kwargs, ) - @_cudf_nvtx_annotate + @_performance_tracking @ioutils.doc_to_dlpack() def to_dlpack(self): """{docstring}""" return cudf.io.dlpack.to_dlpack(self) - @_cudf_nvtx_annotate + @_performance_tracking def __str__(self): return repr(self) - @_cudf_nvtx_annotate + @_performance_tracking def __deepcopy__(self, memo): return self.copy(deep=True) - @_cudf_nvtx_annotate + @_performance_tracking def __copy__(self): return self.copy(deep=False) - @_cudf_nvtx_annotate + @_performance_tracking def __invert__(self): """Bitwise invert (~) for integral dtypes, logical NOT for bools.""" return self._from_data_like_self( @@ -1835,7 +1835,7 @@ def __invert__(self): ) ) - @_cudf_nvtx_annotate + @_performance_tracking def nunique(self, dropna: bool = True): """ Returns a per column mapping with counts of unique values for @@ -1856,7 +1856,7 @@ def nunique(self, dropna: bool = True): ) @staticmethod - @_cudf_nvtx_annotate + @_performance_tracking def _repeat( columns: list[ColumnBase], repeats, axis=None ) -> list[ColumnBase]: @@ -1870,7 +1870,7 @@ def _repeat( return libcudf.filling.repeat(columns, repeats) - @_cudf_nvtx_annotate + @_performance_tracking @_warn_no_dask_cudf def __dask_tokenize__(self): from dask.base import normalize_token diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 77b54a583d3..eccb3acabf6 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -31,7 +31,7 @@ from cudf.core.mixins import Reducible, Scannable from cudf.core.multiindex import MultiIndex from cudf.core.udf.groupby_utils import _can_be_jitted, jit_groupby_apply -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.utils import GetAttrGetItemMixin if TYPE_CHECKING: @@ -392,7 +392,7 @@ def indices(self): zip(index.to_pandas(), cp.split(indices.values, offsets[1:-1])) ) - @_cudf_nvtx_annotate + @_performance_tracking def get_group(self, name, obj=None): """ Construct DataFrame from group with provided name. @@ -436,7 +436,7 @@ def get_group(self, name, obj=None): ) return obj.iloc[self.indices[name]] - @_cudf_nvtx_annotate + @_performance_tracking def size(self): """ Return the size of each group. @@ -451,7 +451,7 @@ def size(self): .agg("size") ) - @_cudf_nvtx_annotate + @_performance_tracking def cumcount(self): """ Return the cumulative count of keys in each group. @@ -467,7 +467,7 @@ def cumcount(self): .agg("cumcount") ) - @_cudf_nvtx_annotate + @_performance_tracking def rank( self, method="average", @@ -521,7 +521,7 @@ def _groupby(self): [*self.grouping.keys._columns], dropna=self._dropna ) - @_cudf_nvtx_annotate + @_performance_tracking def agg(self, func): """ Apply aggregation(s) to the groups. @@ -821,7 +821,7 @@ def _head_tail(self, n, *, take_head: bool, preserve_order: bool): else: return result - @_cudf_nvtx_annotate + @_performance_tracking def head(self, n: int = 5, *, preserve_order: bool = True): """Return first n rows of each group @@ -874,7 +874,7 @@ def head(self, n: int = 5, *, preserve_order: bool = True): n, take_head=True, preserve_order=preserve_order ) - @_cudf_nvtx_annotate + @_performance_tracking def tail(self, n: int = 5, *, preserve_order: bool = True): """Return last n rows of each group @@ -928,7 +928,7 @@ def tail(self, n: int = 5, *, preserve_order: bool = True): n, take_head=False, preserve_order=preserve_order ) - @_cudf_nvtx_annotate + @_performance_tracking def nth(self, n): """ Return the nth row from each group. @@ -949,7 +949,7 @@ def nth(self, n): del self.obj._data["__groupbynth_order__"] return result - @_cudf_nvtx_annotate + @_performance_tracking def ngroup(self, ascending=True): """ Number each group from 0 to the number of groups - 1. @@ -1261,7 +1261,7 @@ def _normalize_aggs( ] return column_names, columns, normalized_aggs - @_cudf_nvtx_annotate + @_performance_tracking def pipe(self, func, *args, **kwargs): """ Apply a function `func` with arguments to this GroupBy @@ -1316,7 +1316,7 @@ def pipe(self, func, *args, **kwargs): """ return cudf.core.common.pipe(self, func, *args, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def _jit_groupby_apply( self, function, group_names, offsets, group_keys, grouped_values, *args ): @@ -1327,7 +1327,7 @@ def _jit_groupby_apply( chunk_results, group_names, group_keys, grouped_values ) - @_cudf_nvtx_annotate + @_performance_tracking def _iterative_groupby_apply( self, function, group_names, offsets, group_keys, grouped_values, *args ): @@ -1415,7 +1415,7 @@ def _post_process_chunk_results( result.index = cudf.MultiIndex._from_data(index_data) return result - @_cudf_nvtx_annotate + @_performance_tracking def apply( self, function, *args, engine="auto", include_groups: bool = True ): @@ -1573,7 +1573,7 @@ def mult(df): result = result.reset_index() return result - @_cudf_nvtx_annotate + @_performance_tracking def apply_grouped(self, function, **kwargs): """Apply a transformation function over the grouped chunk. @@ -1712,7 +1712,7 @@ def rolling_avg(val, avg): kwargs.update({"chunks": offsets}) return grouped_values.apply_chunks(function, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def _broadcast(self, values): """ Broadcast the results of an aggregation to the group @@ -1736,7 +1736,7 @@ def _broadcast(self, values): values.index = self.obj.index return values - @_cudf_nvtx_annotate + @_performance_tracking def transform(self, function): """Apply an aggregation, then broadcast the result to the group size. @@ -1801,7 +1801,7 @@ def rolling(self, *args, **kwargs): """ return cudf.core.window.rolling.RollingGroupby(self, *args, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def count(self, dropna=True): """Compute the number of values in each column. @@ -1816,7 +1816,7 @@ def func(x): return self.agg(func) - @_cudf_nvtx_annotate + @_performance_tracking def describe(self, include=None, exclude=None): """ Generate descriptive statistics that summarizes the central tendency, @@ -1888,7 +1888,7 @@ def describe(self, include=None, exclude=None): ) return res - @_cudf_nvtx_annotate + @_performance_tracking def corr(self, method="pearson", min_periods=1): """ Compute pairwise correlation of columns, excluding NA/null values. @@ -1950,7 +1950,7 @@ def corr(self, method="pearson", min_periods=1): lambda x: x.corr(method, min_periods), "Correlation" ) - @_cudf_nvtx_annotate + @_performance_tracking def cov(self, min_periods=0, ddof=1): """ Compute the pairwise covariance among the columns of a DataFrame, @@ -2129,7 +2129,7 @@ def _cov_or_corr(self, func, method_name): return res - @_cudf_nvtx_annotate + @_performance_tracking def var(self, ddof=1): """Compute the column-wise variance of the values in each group. @@ -2145,7 +2145,7 @@ def func(x): return self.agg(func) - @_cudf_nvtx_annotate + @_performance_tracking def std(self, ddof=1): """Compute the column-wise std of the values in each group. @@ -2161,7 +2161,7 @@ def func(x): return self.agg(func) - @_cudf_nvtx_annotate + @_performance_tracking def quantile(self, q=0.5, interpolation="linear"): """Compute the column-wise quantiles of the values in each group. @@ -2179,18 +2179,18 @@ def func(x): return self.agg(func) - @_cudf_nvtx_annotate + @_performance_tracking def collect(self): """Get a list of all the values for each column in each group.""" _deprecate_collect() return self.agg(list) - @_cudf_nvtx_annotate + @_performance_tracking def unique(self): """Get a list of the unique values for each column in each group.""" return self.agg("unique") - @_cudf_nvtx_annotate + @_performance_tracking def diff(self, periods=1, axis=0): """Get the difference between the values in each group. @@ -2258,7 +2258,7 @@ def bfill(self, limit=None): return self._scan_fill("bfill", limit) - @_cudf_nvtx_annotate + @_performance_tracking def fillna( self, value=None, @@ -2325,7 +2325,7 @@ def fillna( value=value, inplace=inplace, axis=axis, limit=limit ) - @_cudf_nvtx_annotate + @_performance_tracking def shift(self, periods=1, freq=None, axis=0, fill_value=None): """ Shift each group by ``periods`` positions. @@ -2388,7 +2388,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): result = self._mimic_pandas_order(result) return result._copy_type_metadata(values) - @_cudf_nvtx_annotate + @_performance_tracking def pct_change( self, periods=1, diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 71658695b80..b398ee2343e 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -58,13 +58,24 @@ is_mixed_with_object_dtype, numeric_normalize_types, ) -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.utils import _warn_no_dask_cudf, search_range if TYPE_CHECKING: from collections.abc import Generator, Iterable +def ensure_index(index_like: Any) -> BaseIndex: + """ + Ensure an Index is returned. + + Avoids a shallow copy compared to calling cudf.Index(...) + """ + if not isinstance(index_like, BaseIndex): + return cudf.Index(index_like) + return index_like + + class IndexMeta(type): """Custom metaclass for Index that overrides instance/subclass tests.""" @@ -204,7 +215,7 @@ class RangeIndex(BaseIndex, BinaryOperand): _range: range - @_cudf_nvtx_annotate + @_performance_tracking def __init__( self, start, stop=None, step=1, dtype=None, copy=False, name=None ): @@ -259,17 +270,17 @@ def factorize(self, sort: bool = False, use_na_sentinel: bool = True): return codes, uniques @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def name(self): return self._name @name.setter # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def name(self, value): self._name = value @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def start(self) -> int: """ The value of the `start` parameter (0 if this was not supplied). @@ -277,7 +288,7 @@ def start(self) -> int: return self._range.start @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def stop(self) -> int: """ The value of the stop parameter. @@ -285,7 +296,7 @@ def stop(self) -> int: return self._range.stop @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def step(self) -> int: """ The value of the step parameter. @@ -293,12 +304,12 @@ def step(self) -> int: return self._range.step @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def _num_rows(self) -> int: return len(self) @cached_property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def _values(self): if len(self) > 0: return column.as_column(self._range, dtype=self.dtype) @@ -330,18 +341,18 @@ def _is_interval(self) -> bool: return False @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def hasnans(self) -> bool: return False @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def _data(self): return cudf.core.column_accessor.ColumnAccessor( {self.name: self._values} ) - @_cudf_nvtx_annotate + @_performance_tracking def __contains__(self, item): hash(item) if isinstance(item, bool) or not isinstance( @@ -357,7 +368,7 @@ def __contains__(self, item): except (ValueError, OverflowError): return False - @_cudf_nvtx_annotate + @_performance_tracking def copy(self, name=None, deep=False): """ Make a copy of this object. @@ -377,7 +388,7 @@ def copy(self, name=None, deep=False): return RangeIndex(self._range, name=name) - @_cudf_nvtx_annotate + @_performance_tracking def astype(self, dtype, copy: bool = True): if is_dtype_equal(dtype, self.dtype): return self @@ -386,15 +397,15 @@ def astype(self, dtype, copy: bool = True): def fillna(self, value, downcast=None): return self.copy() - @_cudf_nvtx_annotate + @_performance_tracking def drop_duplicates(self, keep="first"): return self - @_cudf_nvtx_annotate + @_performance_tracking def duplicated(self, keep="first") -> cupy.ndarray: return cupy.zeros(len(self), dtype=bool) - @_cudf_nvtx_annotate + @_performance_tracking def __repr__(self): return ( f"{self.__class__.__name__}(start={self.start}, stop={self.stop}" @@ -408,15 +419,15 @@ def __repr__(self): ) @property - @_cudf_nvtx_annotate + @_performance_tracking def size(self) -> int: return len(self) - @_cudf_nvtx_annotate + @_performance_tracking def __len__(self): return len(self._range) - @_cudf_nvtx_annotate + @_performance_tracking def __getitem__(self, index): if isinstance(index, slice): sl_start, sl_stop, sl_step = index.indices(len(self)) @@ -435,13 +446,13 @@ def __getitem__(self, index): return self.start + index * self.step return self._as_int_index()[index] - @_cudf_nvtx_annotate + @_performance_tracking def equals(self, other) -> bool: if isinstance(other, RangeIndex): return self._range == other._range return self._as_int_index().equals(other) - @_cudf_nvtx_annotate + @_performance_tracking def serialize(self): header = {} header["index_column"] = {} @@ -462,7 +473,7 @@ def serialize(self): return header, frames @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def deserialize(cls, header, frames): h = header["index_column"] name = pickle.loads(header["name"]) @@ -472,7 +483,7 @@ def deserialize(cls, header, frames): return RangeIndex(start=start, stop=stop, step=step, name=name) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def dtype(self): """ `dtype` of the range of values in RangeIndex. @@ -487,7 +498,7 @@ def dtype(self): def _dtypes(self) -> Iterable: return [(self.name, self.dtype)] - @_cudf_nvtx_annotate + @_performance_tracking def to_pandas( self, *, nullable: bool = False, arrow_type: bool = False ) -> pd.RangeIndex: @@ -508,16 +519,16 @@ def is_unique(self) -> bool: return True @cached_property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_monotonic_increasing(self) -> bool: return self.step > 0 or len(self) <= 1 @cached_property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_monotonic_decreasing(self): return self.step < 0 or len(self) <= 1 - @_cudf_nvtx_annotate + @_performance_tracking def memory_usage(self, deep: bool = False) -> int: if deep: warnings.warn( @@ -530,7 +541,7 @@ def unique(self) -> Self: # RangeIndex always has unique values return self.copy() - @_cudf_nvtx_annotate + @_performance_tracking def __mul__(self, other): # Multiplication by raw ints must return a RangeIndex to match pandas. if isinstance(other, cudf.Scalar) and other.dtype.kind in "iu": @@ -547,24 +558,24 @@ def __mul__(self, other): ) return self._as_int_index().__mul__(other) - @_cudf_nvtx_annotate + @_performance_tracking def __rmul__(self, other): # Multiplication is commutative. return self.__mul__(other) - @_cudf_nvtx_annotate + @_performance_tracking def _as_int_index(self): # Convert self to an integer index. This method is used to perform ops # that are not defined directly on RangeIndex. return cudf.Index._from_data(self._data) - @_cudf_nvtx_annotate + @_performance_tracking def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): return self._as_int_index().__array_ufunc__( ufunc, method, *inputs, **kwargs ) - @_cudf_nvtx_annotate + @_performance_tracking def get_indexer(self, target, limit=None, method=None, tolerance=None): target_col = cudf.core.column.as_column(target) if method is not None or not isinstance( @@ -594,7 +605,7 @@ def get_indexer(self, target, limit=None, method=None, tolerance=None): locs[valid] = len(self) - 1 - locs[valid] return locs - @_cudf_nvtx_annotate + @_performance_tracking def get_loc(self, key): if not is_scalar(key): raise TypeError("Should be a scalar-like") @@ -608,7 +619,7 @@ def get_loc(self, key): raise KeyError(key) return idx_int - @_cudf_nvtx_annotate + @_performance_tracking def _union(self, other, sort=None): if isinstance(other, RangeIndex): # Variable suffixes are of the @@ -685,7 +696,7 @@ def _union(self, other, sort=None): self._as_int_index()._union(other, sort=sort) ) - @_cudf_nvtx_annotate + @_performance_tracking def _intersection(self, other, sort=None): if not isinstance(other, RangeIndex): return self._try_reconstruct_range_index( @@ -733,7 +744,7 @@ def _intersection(self, other, sort=None): return self._try_reconstruct_range_index(new_index) - @_cudf_nvtx_annotate + @_performance_tracking def difference(self, other, sort=None): if isinstance(other, RangeIndex) and self.equals(other): return self[:0]._get_reconciled_name_object(other) @@ -785,14 +796,14 @@ def sort_values( else: return sorted_index - @_cudf_nvtx_annotate + @_performance_tracking def _gather(self, gather_map, nullify=False, check_bounds=True): gather_map = cudf.core.column.as_column(gather_map) return cudf.Index._from_data( {self.name: self._values.take(gather_map, nullify, check_bounds)} ) - @_cudf_nvtx_annotate + @_performance_tracking def _apply_boolean_mask(self, boolean_mask): return cudf.Index._from_data( {self.name: self._values.apply_boolean_mask(boolean_mask)} @@ -838,21 +849,21 @@ def join( ) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def _column(self): return self._as_int_index()._column @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def _columns(self): return self._as_int_index()._columns @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def values_host(self) -> np.ndarray: return np.arange(start=self.start, stop=self.stop, step=self.step) - @_cudf_nvtx_annotate + @_performance_tracking def argsort( self, ascending=True, @@ -865,19 +876,19 @@ def argsort( else: return cupy.arange(len(self)) - @_cudf_nvtx_annotate + @_performance_tracking def where(self, cond, other=None, inplace=False): return self._as_int_index().where(cond, other, inplace) - @_cudf_nvtx_annotate + @_performance_tracking def to_numpy(self) -> np.ndarray: return self.values_host - @_cudf_nvtx_annotate + @_performance_tracking def to_cupy(self) -> cupy.ndarray: return self.values - @_cudf_nvtx_annotate + @_performance_tracking def to_arrow(self) -> pa.Array: return pa.array(self._range, type=pa.from_numpy_dtype(self.dtype)) @@ -889,23 +900,23 @@ def __array__(self, dtype=None): "using .to_numpy()." ) - @_cudf_nvtx_annotate + @_performance_tracking def nunique(self, dropna: bool = True) -> int: return len(self) - @_cudf_nvtx_annotate + @_performance_tracking def isna(self) -> cupy.ndarray: return cupy.zeros(len(self), dtype=bool) isnull = isna - @_cudf_nvtx_annotate + @_performance_tracking def notna(self) -> cupy.ndarray: return cupy.ones(len(self), dtype=bool) notnull = isna - @_cudf_nvtx_annotate + @_performance_tracking def _minmax(self, meth: str): no_steps = len(self) - 1 if no_steps == -1: @@ -1004,12 +1015,12 @@ class Index(SingleColumnFrame, BaseIndex, metaclass=IndexMeta): Column's, the data Column will be cloned to adopt this name. """ - @_cudf_nvtx_annotate + @_performance_tracking def __init__(self, data, **kwargs): name = _getdefault_name(data, name=kwargs.get("name")) super().__init__({name: data}) - @_cudf_nvtx_annotate + @_performance_tracking def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): ret = super().__array_ufunc__(ufunc, method, *inputs, **kwargs) @@ -1046,7 +1057,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): return NotImplemented @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def _from_data(cls, data: MutableMapping, name: Any = no_default) -> Self: out = super()._from_data(data=data) if name is not no_default: @@ -1054,7 +1065,7 @@ def _from_data(cls, data: MutableMapping, name: Any = no_default) -> Self: return out @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def _from_data_like_self( cls, data: MutableMapping, name: Any = no_default ) -> Self: @@ -1064,7 +1075,7 @@ def _from_data_like_self( return out @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_arrow(cls, obj): try: return cls(ColumnBase.from_arrow(obj)) @@ -1118,12 +1129,12 @@ def _binaryop( return ret @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def _values(self): return self._column @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def _concat(cls, objs): non_empties = [index for index in objs if len(index)] if len(objs) != len(non_empties): @@ -1166,16 +1177,16 @@ def _concat(cls, objs): result.name = name return result - @_cudf_nvtx_annotate + @_performance_tracking def memory_usage(self, deep=False): return self._column.memory_usage @cached_property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_unique(self): return self._column.is_unique - @_cudf_nvtx_annotate + @_performance_tracking def equals(self, other) -> bool: if not isinstance(other, BaseIndex) or len(self) != len(other): return False @@ -1198,7 +1209,7 @@ def equals(self, other) -> bool: except TypeError: return False - @_cudf_nvtx_annotate + @_performance_tracking def copy(self, name=None, deep=False): """ Make a copy of this object. @@ -1221,11 +1232,11 @@ def copy(self, name=None, deep=False): {name: self._values.copy(True) if deep else self._values} ) - @_cudf_nvtx_annotate + @_performance_tracking def astype(self, dtype, copy: bool = True): return super().astype({self.name: dtype}, copy) - @_cudf_nvtx_annotate + @_performance_tracking def get_indexer(self, target, method=None, limit=None, tolerance=None): if is_scalar(target): raise TypeError("Should be a sequence") @@ -1297,7 +1308,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): return _return_get_indexer_result(result_series.to_cupy()) - @_cudf_nvtx_annotate + @_performance_tracking def get_loc(self, key): if not is_scalar(key): raise TypeError("Should be a scalar-like") @@ -1333,7 +1344,7 @@ def get_loc(self, key): mask[true_inds] = True return mask - @_cudf_nvtx_annotate + @_performance_tracking def __repr__(self): max_seq_items = pd.get_option("max_seq_items") or len(self) mr = 0 @@ -1419,7 +1430,7 @@ def __repr__(self): lines.append(f"{prior_to_dtype} {keywords})") return "\n".join(lines) - @_cudf_nvtx_annotate + @_performance_tracking def __getitem__(self, index): res = self._get_elements_from_column(index) if isinstance(res, ColumnBase): @@ -1427,20 +1438,20 @@ def __getitem__(self, index): return res @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def dtype(self): """ `dtype` of the underlying values in Index. """ return self._values.dtype - @_cudf_nvtx_annotate + @_performance_tracking def isna(self): return self._column.isnull().values isnull = isna - @_cudf_nvtx_annotate + @_performance_tracking def notna(self): return self._column.notnull().values @@ -1470,11 +1481,11 @@ def _is_interval(self): return False @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def hasnans(self): return self._column.has_nulls(include_nan=True) - @_cudf_nvtx_annotate + @_performance_tracking def argsort( self, axis=0, @@ -1518,7 +1529,7 @@ def repeat(self, repeats, axis=None): Frame._repeat([*self._columns], repeats, axis), self._column_names ) - @_cudf_nvtx_annotate + @_performance_tracking def where(self, cond, other=None, inplace=False): result_col = super().where(cond, other, inplace) return self._mimic_inplace( @@ -1569,7 +1580,7 @@ def append(self, other): to_concat.append(obj) else: this = self - other = cudf.Index(other) + other = ensure_index(other) if len(this) == 0 or len(other) == 0: # we'll filter out empties later in ._concat @@ -1615,7 +1626,7 @@ def _indices_of(self, value): @copy_docstring(StringMethods) # type: ignore @property - @_cudf_nvtx_annotate + @_performance_tracking def str(self): if is_string_dtype(self.dtype): return StringMethods(parent=self) @@ -1698,7 +1709,7 @@ class DatetimeIndex(Index): dtype='datetime64[ns]', name='a') """ - @_cudf_nvtx_annotate + @_performance_tracking def __init__( self, data=None, @@ -1761,7 +1772,7 @@ def __init__( ): raise ValueError("No unique frequency found") - @_cudf_nvtx_annotate + @_performance_tracking def _copy_type_metadata(self: Self, other: Self) -> Self: super()._copy_type_metadata(other) self._freq = _validate_freq(other._freq) @@ -1783,7 +1794,7 @@ def __getitem__(self, index): return pd.Timestamp(value) return value - @_cudf_nvtx_annotate + @_performance_tracking def copy(self, name=None, deep=False): idx_copy = super().copy(name=name, deep=deep) return idx_copy._copy_type_metadata(self) @@ -1801,7 +1812,7 @@ def searchsorted( ) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def year(self): """ The year of the datetime. @@ -1820,7 +1831,7 @@ def year(self): return self._get_dt_field("year") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def month(self): """ The month as January=1, December=12. @@ -1839,7 +1850,7 @@ def month(self): return self._get_dt_field("month") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def day(self): """ The day of the datetime. @@ -1858,7 +1869,7 @@ def day(self): return self._get_dt_field("day") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def hour(self): """ The hours of the datetime. @@ -1879,7 +1890,7 @@ def hour(self): return self._get_dt_field("hour") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def minute(self): """ The minutes of the datetime. @@ -1900,7 +1911,7 @@ def minute(self): return self._get_dt_field("minute") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def second(self): """ The seconds of the datetime. @@ -1921,7 +1932,7 @@ def second(self): return self._get_dt_field("second") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def microsecond(self): """ The microseconds of the datetime. @@ -1952,7 +1963,7 @@ def microsecond(self): ) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def nanosecond(self): """ The nanoseconds of the datetime. @@ -1974,7 +1985,7 @@ def nanosecond(self): return self._get_dt_field("nanosecond") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def weekday(self): """ The day of the week with Monday=0, Sunday=6. @@ -1996,7 +2007,7 @@ def weekday(self): return self._get_dt_field("weekday") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def dayofweek(self): """ The day of the week with Monday=0, Sunday=6. @@ -2018,7 +2029,7 @@ def dayofweek(self): return self._get_dt_field("weekday") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def dayofyear(self): """ The day of the year, from 1-365 in non-leap years and @@ -2041,7 +2052,7 @@ def dayofyear(self): return self._get_dt_field("day_of_year") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def day_of_year(self): """ The day of the year, from 1-365 in non-leap years and @@ -2064,7 +2075,7 @@ def day_of_year(self): return self._get_dt_field("day_of_year") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_leap_year(self): """ Boolean indicator if the date belongs to a leap year. @@ -2083,7 +2094,7 @@ def is_leap_year(self): return cupy.asarray(res) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def quarter(self): """ Integer indicator for which quarter of the year the date belongs in. @@ -2108,7 +2119,7 @@ def quarter(self): res = extract_quarter(self._values) return Index(res, dtype="int8") - @_cudf_nvtx_annotate + @_performance_tracking def day_name(self, locale: str | None = None) -> Index: """ Return the day names. Currently supports English locale only. @@ -2128,7 +2139,7 @@ def day_name(self, locale: str | None = None) -> Index: day_names = self._column.get_day_names(locale) return Index._from_data({self.name: day_names}) - @_cudf_nvtx_annotate + @_performance_tracking def month_name(self, locale: str | None = None) -> Index: """ Return the month names. Currently supports English locale only. @@ -2147,7 +2158,7 @@ def month_name(self, locale: str | None = None) -> Index: month_names = self._column.get_month_names(locale) return Index._from_data({self.name: month_names}) - @_cudf_nvtx_annotate + @_performance_tracking def isocalendar(self) -> cudf.DataFrame: """ Returns a DataFrame with the year, week, and day @@ -2172,7 +2183,7 @@ def isocalendar(self) -> cudf.DataFrame: ) return cudf.DataFrame._from_data(ca, index=self) - @_cudf_nvtx_annotate + @_performance_tracking def to_pandas( self, *, nullable: bool = False, arrow_type: bool = False ) -> pd.DatetimeIndex: @@ -2181,7 +2192,7 @@ def to_pandas( result.freq = self._freq._maybe_as_fast_pandas_offset() return result - @_cudf_nvtx_annotate + @_performance_tracking def _get_dt_field(self, field): out_column = self._values.get_dt_field(field) # column.column_empty_like always returns a Column object @@ -2198,7 +2209,7 @@ def _get_dt_field(self, field): def _is_boolean(self): return False - @_cudf_nvtx_annotate + @_performance_tracking def ceil(self, freq): """ Perform ceil operation on the data to the specified freq. @@ -2231,7 +2242,7 @@ def ceil(self, freq): return self.__class__._from_data({self.name: out_column}) - @_cudf_nvtx_annotate + @_performance_tracking def floor(self, freq): """ Perform floor operation on the data to the specified freq. @@ -2264,7 +2275,7 @@ def floor(self, freq): return self.__class__._from_data({self.name: out_column}) - @_cudf_nvtx_annotate + @_performance_tracking def round(self, freq): """ Perform round operation on the data to the specified freq. @@ -2452,7 +2463,7 @@ class TimedeltaIndex(Index): dtype='timedelta64[s]', name='delta-index') """ - @_cudf_nvtx_annotate + @_performance_tracking def __init__( self, data=None, @@ -2500,7 +2511,7 @@ def __getitem__(self, index): return value @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def days(self): """ Number of days for each element. @@ -2509,7 +2520,7 @@ def days(self): return Index(self._values.days, name=self.name, dtype="int64") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def seconds(self): """ Number of seconds (>= 0 and less than 1 day) for each element. @@ -2517,7 +2528,7 @@ def seconds(self): return Index(self._values.seconds, name=self.name, dtype="int32") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def microseconds(self): """ Number of microseconds (>= 0 and less than 1 second) for each element. @@ -2525,7 +2536,7 @@ def microseconds(self): return Index(self._values.microseconds, name=self.name, dtype="int32") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def nanoseconds(self): """ Number of nanoseconds (>= 0 and less than 1 microsecond) for each @@ -2534,7 +2545,7 @@ def nanoseconds(self): return Index(self._values.nanoseconds, name=self.name, dtype="int32") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def components(self): """ Return a dataframe of the components (days, hours, minutes, @@ -2612,7 +2623,7 @@ class CategoricalIndex(Index): CategoricalIndex([1, 2, 3, ], categories=[1, 2, 3], ordered=False, dtype='category', name='a') """ # noqa: E501 - @_cudf_nvtx_annotate + @_performance_tracking def __init__( self, data=None, @@ -2667,7 +2678,7 @@ def __init__( super().__init__(data, name=name) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def codes(self): """ The category codes of this categorical. @@ -2675,7 +2686,7 @@ def codes(self): return Index(self._values.codes) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def categories(self): """ The categories of this categorical. @@ -2689,7 +2700,7 @@ def _is_categorical(self): return True -@_cudf_nvtx_annotate +@_performance_tracking def interval_range( start=None, end=None, @@ -2841,7 +2852,7 @@ class IntervalIndex(Index): IntervalIndex """ - @_cudf_nvtx_annotate + @_performance_tracking def __init__( self, data, @@ -2900,7 +2911,7 @@ def closed(self): return self.dtype.closed @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_breaks( cls, breaks, @@ -2975,7 +2986,7 @@ def _clean_nulls_from_index(self): return self -@_cudf_nvtx_annotate +@_performance_tracking def as_index( arbitrary, nan_as_null=no_default, copy=False, name=no_default, dtype=None ) -> BaseIndex: @@ -3090,7 +3101,7 @@ def _getdefault_name(values, name): return name -@_cudf_nvtx_annotate +@_performance_tracking def _concat_range_index(indexes: list[RangeIndex]) -> BaseIndex: """ An internal Utility function to concat RangeIndex objects. @@ -3131,7 +3142,7 @@ def _concat_range_index(indexes: list[RangeIndex]) -> BaseIndex: return RangeIndex(start, stop, step) -@_cudf_nvtx_annotate +@_performance_tracking def _extended_gcd(a: int, b: int) -> tuple[int, int, int]: """ Extended Euclidean algorithms to solve Bezout's identity: diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 280a6e92eab..ff10051c52d 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -33,7 +33,6 @@ is_list_like, is_scalar, ) -from cudf.core._base_index import BaseIndex from cudf.core._compat import PANDAS_LT_300 from cudf.core.buffer import acquire_spill_lock from cudf.core.column import ColumnBase, as_column @@ -42,7 +41,7 @@ from cudf.core.dtypes import ListDtype from cudf.core.frame import Frame from cudf.core.groupby.groupby import GroupBy -from cudf.core.index import Index, RangeIndex, _index_from_data +from cudf.core.index import RangeIndex, _index_from_data, ensure_index from cudf.core.missing import NA from cudf.core.multiindex import MultiIndex from cudf.core.resample import _Resampler @@ -56,7 +55,7 @@ from cudf.utils import docutils, ioutils from cudf.utils._numba import _CUDFNumbaConfig from cudf.utils.docutils import copy_docstring -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.utils import _warn_no_dask_cudf if TYPE_CHECKING: @@ -66,6 +65,8 @@ Dtype, NotImplementedType, ) + from cudf.core._base_index import BaseIndex + doc_reset_index_template = """ Reset the index of the {klass}, or a level of it. @@ -301,13 +302,13 @@ def _from_data( out._index = RangeIndex(out._data.nrows) if index is None else index return out - @_cudf_nvtx_annotate + @_performance_tracking def _from_data_like_self(self, data: MutableMapping): out = super()._from_data_like_self(data) out.index = self.index return out - @_cudf_nvtx_annotate + @_performance_tracking def _from_columns_like_self( self, columns: list[ColumnBase], @@ -363,7 +364,7 @@ def _mimic_inplace( self._index = result.index return super()._mimic_inplace(result, inplace) - @_cudf_nvtx_annotate + @_performance_tracking def _scan(self, op, axis=None, skipna=True): """ Return {op_name} of the {cls}. @@ -439,7 +440,7 @@ def _check_data_index_length_match(self) -> None: ) @property - @_cudf_nvtx_annotate + @_performance_tracking def empty(self): """ Indicator whether DataFrame or Series is empty. @@ -501,7 +502,7 @@ def empty(self): """ return self.size == 0 - @_cudf_nvtx_annotate + @_performance_tracking @ioutils.doc_to_json() def to_json(self, path_or_buf=None, *args, **kwargs): """{docstring}""" @@ -510,14 +511,14 @@ def to_json(self, path_or_buf=None, *args, **kwargs): self, path_or_buf=path_or_buf, *args, **kwargs ) - @_cudf_nvtx_annotate + @_performance_tracking @ioutils.doc_to_hdf() def to_hdf(self, path_or_buf, key, *args, **kwargs): """{docstring}""" cudf.io.hdf.to_hdf(path_or_buf, key, self, *args, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def to_string(self): r""" Convert to string @@ -606,7 +607,7 @@ def copy(self, deep: bool = True) -> Self: self.index.copy(deep=False), ) - @_cudf_nvtx_annotate + @_performance_tracking def equals(self, other) -> bool: # noqa: D102 return super().equals(other) and self.index.equals(other.index) @@ -627,12 +628,10 @@ def index(self, value): f"new values have {len(value)} elements" ) # avoid unnecessary cast to Index - if not isinstance(value, BaseIndex): - value = Index(value) - + value = ensure_index(value) self._index = value - @_cudf_nvtx_annotate + @_performance_tracking def replace( self, to_replace=None, @@ -900,7 +899,7 @@ def replace( return self._mimic_inplace(result, inplace=inplace) - @_cudf_nvtx_annotate + @_performance_tracking def clip(self, lower=None, upper=None, inplace=False, axis=1): """ Trim values at input threshold(s). @@ -1026,7 +1025,7 @@ def clip(self, lower=None, upper=None, inplace=False, axis=1): ) return self._mimic_inplace(output, inplace=inplace) - @_cudf_nvtx_annotate + @_performance_tracking def abs(self): """ Return a Series/DataFrame with absolute numeric value of each element. @@ -1052,7 +1051,7 @@ def abs(self): """ return self._unaryop("abs") - @_cudf_nvtx_annotate + @_performance_tracking def dot(self, other, reflect=False): """ Get dot product of frame and other, (binary operator `dot`). @@ -1159,15 +1158,15 @@ def dot(self, other, reflect=False): ) return result.item() - @_cudf_nvtx_annotate + @_performance_tracking def __matmul__(self, other): return self.dot(other) - @_cudf_nvtx_annotate + @_performance_tracking def __rmatmul__(self, other): return self.dot(other, reflect=True) - @_cudf_nvtx_annotate + @_performance_tracking def head(self, n=5): """ Return the first `n` rows. @@ -1246,7 +1245,7 @@ def head(self, n=5): """ return self.iloc[:n] - @_cudf_nvtx_annotate + @_performance_tracking def tail(self, n=5): """ Returns the last n rows as a new DataFrame or Series @@ -1277,7 +1276,7 @@ def tail(self, n=5): return self.iloc[-n:] - @_cudf_nvtx_annotate + @_performance_tracking def pipe(self, func, *args, **kwargs): """ Apply ``func(self, *args, **kwargs)``. @@ -1324,7 +1323,7 @@ def pipe(self, func, *args, **kwargs): """ return cudf.core.common.pipe(self, func, *args, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def sum( self, axis=no_default, @@ -1385,7 +1384,7 @@ def sum( **kwargs, ) - @_cudf_nvtx_annotate + @_performance_tracking def product( self, axis=no_default, @@ -1452,7 +1451,7 @@ def product( # Alias for pandas compatibility. prod = product - @_cudf_nvtx_annotate + @_performance_tracking def mean(self, axis=0, skipna=True, numeric_only=False, **kwargs): """ Return the mean of the values for the requested axis. @@ -1541,7 +1540,7 @@ def median( **kwargs, ) - @_cudf_nvtx_annotate + @_performance_tracking def std( self, axis=no_default, @@ -1600,7 +1599,7 @@ def std( **kwargs, ) - @_cudf_nvtx_annotate + @_performance_tracking def var( self, axis=no_default, @@ -1658,7 +1657,7 @@ def var( **kwargs, ) - @_cudf_nvtx_annotate + @_performance_tracking def kurtosis(self, axis=0, skipna=True, numeric_only=False, **kwargs): """ Return Fisher's unbiased kurtosis of a sample. @@ -1718,7 +1717,7 @@ def kurtosis(self, axis=0, skipna=True, numeric_only=False, **kwargs): # Alias for kurtosis. kurt = kurtosis - @_cudf_nvtx_annotate + @_performance_tracking def skew(self, axis=0, skipna=True, numeric_only=False, **kwargs): """ Return unbiased Fisher-Pearson skew of a sample. @@ -1777,7 +1776,7 @@ def skew(self, axis=0, skipna=True, numeric_only=False, **kwargs): **kwargs, ) - @_cudf_nvtx_annotate + @_performance_tracking def mask(self, cond, other=None, inplace: bool = False) -> Self | None: """ Replace values where the condition is True. @@ -1839,7 +1838,7 @@ def mask(self, cond, other=None, inplace: bool = False) -> Self | None: return self.where(cond=~cond, other=other, inplace=inplace) - @_cudf_nvtx_annotate + @_performance_tracking @copy_docstring(Rolling) def rolling( self, window, min_periods=None, center=False, axis=0, win_type=None @@ -1879,7 +1878,7 @@ def ewm( times=times, ) - @_cudf_nvtx_annotate + @_performance_tracking def nans_to_nulls(self): """ Convert nans (if any) to nulls @@ -1935,7 +1934,7 @@ def nans_to_nulls(self): self._data._from_columns_like_self(result) ) - @_cudf_nvtx_annotate + @_performance_tracking def interpolate( self, method="linear", @@ -2034,7 +2033,7 @@ def interpolate( ) ) - @_cudf_nvtx_annotate + @_performance_tracking def shift(self, periods=1, freq=None, axis=0, fill_value=None): """Shift values by `periods` positions.""" axis = self._get_axis_from_axis_arg(axis) @@ -2050,7 +2049,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): self._data._from_columns_like_self(data_columns) ) - @_cudf_nvtx_annotate + @_performance_tracking def truncate(self, before=None, after=None, axis=0, copy=True): """ Truncate a Series or DataFrame before and after some index value. @@ -2398,7 +2397,7 @@ def iloc(self): return self._iloc_indexer_type(self) @property # type:ignore - @_cudf_nvtx_annotate + @_performance_tracking def axes(self): """ Return a list representing the axes of the Series. @@ -2530,7 +2529,7 @@ def squeeze(self, axis: Literal["index", "columns", 0, 1, None] = None): ) return self.iloc[indexer] - @_cudf_nvtx_annotate + @_performance_tracking def scale(self): """ Scale values to [0, 1] in float64 @@ -2565,7 +2564,7 @@ def scale(self): scaled.index = self.index.copy(deep=False) return scaled - @_cudf_nvtx_annotate + @_performance_tracking def sort_index( self, axis=0, @@ -3070,7 +3069,7 @@ def drop_duplicates( self.index.names if not ignore_index else None, ) - @_cudf_nvtx_annotate + @_performance_tracking def duplicated(self, subset=None, keep="first"): """ Return boolean Series denoting duplicate rows. @@ -3180,7 +3179,7 @@ def duplicated(self, subset=None, keep="first"): ) return cudf.Series(result, index=self.index) - @_cudf_nvtx_annotate + @_performance_tracking def _empty_like(self, keep_index=True) -> Self: result = self._from_columns_like_self( libcudf.copying.columns_empty_like( @@ -3217,7 +3216,7 @@ def _split(self, splits, keep_index=True): for i in range(len(splits) + 1) ] - @_cudf_nvtx_annotate + @_performance_tracking def bfill(self, value=None, axis=None, inplace=None, limit=None): """ Synonym for :meth:`Series.fillna` with ``method='bfill'``. @@ -3236,7 +3235,7 @@ def bfill(self, value=None, axis=None, inplace=None, limit=None): limit=limit, ) - @_cudf_nvtx_annotate + @_performance_tracking def backfill(self, value=None, axis=None, inplace=None, limit=None): """ Synonym for :meth:`Series.fillna` with ``method='bfill'``. @@ -3256,7 +3255,7 @@ def backfill(self, value=None, axis=None, inplace=None, limit=None): ) return self.bfill(value=value, axis=axis, inplace=inplace, limit=limit) - @_cudf_nvtx_annotate + @_performance_tracking def ffill(self, value=None, axis=None, inplace=None, limit=None): """ Synonym for :meth:`Series.fillna` with ``method='ffill'``. @@ -3275,7 +3274,7 @@ def ffill(self, value=None, axis=None, inplace=None, limit=None): limit=limit, ) - @_cudf_nvtx_annotate + @_performance_tracking def pad(self, value=None, axis=None, inplace=None, limit=None): """ Synonym for :meth:`Series.fillna` with ``method='ffill'``. @@ -3415,7 +3414,7 @@ def add_suffix(self, suffix): raise NotImplementedError @acquire_spill_lock() - @_cudf_nvtx_annotate + @_performance_tracking def _apply(self, func, kernel_getter, *args, **kwargs): """Apply `func` across the rows of the frame.""" if kwargs: @@ -3595,7 +3594,7 @@ def _align_to_index( sort: bool = True, allow_non_unique: bool = False, ) -> Self: - index = cudf.Index(index) + index = ensure_index(index) if self.index.equals(index): return self @@ -3626,7 +3625,7 @@ def _align_to_index( out.index.names = self.index.names return out - @_cudf_nvtx_annotate + @_performance_tracking def _reindex( self, column_names, @@ -4154,7 +4153,7 @@ def dropna( return self._mimic_inplace(result, inplace=inplace) - @_cudf_nvtx_annotate + @_performance_tracking def _drop_na_columns(self, how="any", subset=None, thresh=None): """ Drop columns containing nulls @@ -4471,7 +4470,7 @@ def last(self, offset): slice_func=lambda i: self.iloc[i:], ) - @_cudf_nvtx_annotate + @_performance_tracking def sample( self, n=None, @@ -4751,7 +4750,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): return NotImplemented - @_cudf_nvtx_annotate + @_performance_tracking def repeat(self, repeats, axis=None): """Repeats elements consecutively. @@ -4949,7 +4948,7 @@ def astype( raise e return self - @_cudf_nvtx_annotate + @_performance_tracking def drop( self, labels=None, @@ -5161,7 +5160,7 @@ def drop( if not inplace: return out - @_cudf_nvtx_annotate + @_performance_tracking def _explode(self, explode_column: Any, ignore_index: bool): # Helper function for `explode` in `Series` and `Dataframe`, explodes a # specified nested column. Other columns' corresponding rows are @@ -5200,7 +5199,7 @@ def _explode(self, explode_column: Any, ignore_index: bool): self.index.names if not ignore_index else None, ) - @_cudf_nvtx_annotate + @_performance_tracking def tile(self, count): """Repeats the rows `count` times to form a new Frame. @@ -5233,7 +5232,7 @@ def tile(self, count): index_names=self._index_names, ) - @_cudf_nvtx_annotate + @_performance_tracking def groupby( self, by=None, @@ -5283,7 +5282,7 @@ def groupby( ) ) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Addition", @@ -5324,7 +5323,7 @@ def add(self, other, axis, level=None, fill_value=None): # noqa: D102 return self._binaryop(other, "__add__", fill_value) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Addition", @@ -5365,7 +5364,7 @@ def radd(self, other, axis, level=None, fill_value=None): # noqa: D102 return self._binaryop(other, "__radd__", fill_value) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Subtraction", @@ -5408,7 +5407,7 @@ def subtract(self, other, axis, level=None, fill_value=None): # noqa: D102 sub = subtract - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Subtraction", @@ -5449,7 +5448,7 @@ def rsub(self, other, axis, level=None, fill_value=None): # noqa: D102 return self._binaryop(other, "__rsub__", fill_value) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Multiplication", @@ -5492,7 +5491,7 @@ def multiply(self, other, axis, level=None, fill_value=None): # noqa: D102 mul = multiply - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Multiplication", @@ -5533,7 +5532,7 @@ def rmul(self, other, axis, level=None, fill_value=None): # noqa: D102 return self._binaryop(other, "__rmul__", fill_value) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Modulo", @@ -5574,7 +5573,7 @@ def mod(self, other, axis, level=None, fill_value=None): # noqa: D102 return self._binaryop(other, "__mod__", fill_value) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Modulo", @@ -5615,7 +5614,7 @@ def rmod(self, other, axis, level=None, fill_value=None): # noqa: D102 return self._binaryop(other, "__rmod__", fill_value) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Exponential", @@ -5656,7 +5655,7 @@ def pow(self, other, axis, level=None, fill_value=None): # noqa: D102 return self._binaryop(other, "__pow__", fill_value) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Exponential", @@ -5697,7 +5696,7 @@ def rpow(self, other, axis, level=None, fill_value=None): # noqa: D102 return self._binaryop(other, "__rpow__", fill_value) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Integer division", @@ -5738,7 +5737,7 @@ def floordiv(self, other, axis, level=None, fill_value=None): # noqa: D102 return self._binaryop(other, "__floordiv__", fill_value) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Integer division", @@ -5779,7 +5778,7 @@ def rfloordiv(self, other, axis, level=None, fill_value=None): # noqa: D102 return self._binaryop(other, "__rfloordiv__", fill_value) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Floating division", @@ -5824,7 +5823,7 @@ def truediv(self, other, axis, level=None, fill_value=None): # noqa: D102 div = truediv divide = truediv - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Floating division", @@ -5868,7 +5867,7 @@ def rtruediv(self, other, axis, level=None, fill_value=None): # noqa: D102 # Alias for rtruediv rdiv = rtruediv - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Equal to", @@ -5908,7 +5907,7 @@ def eq(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 other=other, op="__eq__", fill_value=fill_value, can_reindex=True ) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Not equal to", @@ -5948,7 +5947,7 @@ def ne(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 other=other, op="__ne__", fill_value=fill_value, can_reindex=True ) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Less than", @@ -5988,7 +5987,7 @@ def lt(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 other=other, op="__lt__", fill_value=fill_value, can_reindex=True ) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Less than or equal to", @@ -6028,7 +6027,7 @@ def le(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 other=other, op="__le__", fill_value=fill_value, can_reindex=True ) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Greater than", @@ -6068,7 +6067,7 @@ def gt(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 other=other, op="__gt__", fill_value=fill_value, can_reindex=True ) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_binop_template.format( operation="Greater than or equal to", @@ -6123,7 +6122,7 @@ def _preprocess_subset(self, subset): raise KeyError(f"columns {diff} do not exist") return subset - @_cudf_nvtx_annotate + @_performance_tracking def rank( self, axis=0, @@ -6291,7 +6290,7 @@ def _check_duplicate_level_names(specified, level_names): ) -@_cudf_nvtx_annotate +@_performance_tracking def _get_replacement_values_for_columns( to_replace: Any, value: Any, columns_dtype_map: dict[Any, Any] ) -> tuple[dict[Any, bool], dict[Any, Any], dict[Any, Any]]: @@ -6458,7 +6457,7 @@ def _is_series(obj): return isinstance(obj, Frame) and obj.ndim == 1 and obj.index is not None -@_cudf_nvtx_annotate +@_performance_tracking def _drop_rows_by_labels( obj: DataFrameOrSeries, labels: ColumnLike | abc.Iterable | str, diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index a01242d957d..9cbe863142b 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -23,15 +23,17 @@ from cudf.api.types import is_integer, is_list_like, is_object_dtype from cudf.core import column from cudf.core._base_index import _return_get_indexer_result +from cudf.core.column_accessor import ColumnAccessor from cudf.core.frame import Frame from cudf.core.index import ( BaseIndex, _get_indexer_basic, _lexsorted_equal_range, + ensure_index, ) from cudf.core.join._join_helpers import _match_join_keys from cudf.utils.dtypes import is_column_like -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.utils import NotIterable, _external_only_api, _is_same_name if TYPE_CHECKING: @@ -125,7 +127,7 @@ class MultiIndex(Frame, BaseIndex, NotIterable): ) """ - @_cudf_nvtx_annotate + @_performance_tracking def __init__( self, levels=None, @@ -172,7 +174,7 @@ def __init__( "codes and is inconsistent!" ) - levels = [cudf.Index(level) for level in levels] + levels = [ensure_index(level) for level in levels] if len(levels) != len(codes._data): raise ValueError( @@ -210,12 +212,12 @@ def __init__( self.names = names @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def names(self): return self._names @names.setter # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def names(self, value): if value is None: value = [None] * self.nlevels @@ -241,13 +243,13 @@ def names(self, value): ) self._names = pd.core.indexes.frozen.FrozenList(value) - @_cudf_nvtx_annotate + @_performance_tracking def to_series(self, index=None, name=None): raise NotImplementedError( "MultiIndex.to_series isn't implemented yet." ) - @_cudf_nvtx_annotate + @_performance_tracking def astype(self, dtype, copy: bool = True): if not is_object_dtype(dtype): raise TypeError( @@ -256,7 +258,7 @@ def astype(self, dtype, copy: bool = True): ) return self - @_cudf_nvtx_annotate + @_performance_tracking def rename(self, names, inplace=False): """ Alter MultiIndex level names @@ -303,7 +305,7 @@ def rename(self, names, inplace=False): """ return self.set_names(names, level=None, inplace=inplace) - @_cudf_nvtx_annotate + @_performance_tracking def set_names(self, names, level=None, inplace=False): names_is_list_like = is_list_like(names) level_is_list_like = is_list_like(level) @@ -341,7 +343,7 @@ def set_names(self, names, level=None, inplace=False): return self._set_names(names=names, inplace=inplace) @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def _from_data( cls, data: MutableMapping, @@ -353,16 +355,16 @@ def _from_data( return obj @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def name(self): return self._name @name.setter # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def name(self, value): self._name = value - @_cudf_nvtx_annotate + @_performance_tracking def copy( self, names=None, @@ -431,7 +433,7 @@ def copy( return mi - @_cudf_nvtx_annotate + @_performance_tracking def __repr__(self): max_seq_items = pd.get_option("display.max_seq_items") or len(self) @@ -446,45 +448,26 @@ def __repr__(self): ) preprocess = self.take(indices) else: - preprocess = self.copy(deep=False) - - if any(col.has_nulls() for col in preprocess._data.columns): - preprocess_df = preprocess.to_frame(index=False) - for name, col in preprocess._data.items(): - if isinstance( - col, - ( - column.datetime.DatetimeColumn, - column.timedelta.TimeDeltaColumn, - ), - ): - preprocess_df[name] = col.astype("str").fillna( - str(cudf.NaT) - ) + preprocess = self - tuples_list = list( - zip( - *list( - map(lambda val: pd.NA if val is None else val, col) - for col in preprocess_df.to_arrow() - .to_pydict() - .values() - ) - ) - ) + arrays = [] + for name, col in zip(self.names, preprocess._columns): + try: + pd_idx = col.to_pandas(nullable=True) + except NotImplementedError: + pd_idx = col.to_pandas(nullable=False) + pd_idx.name = name + arrays.append(pd_idx) - preprocess = preprocess.to_pandas(nullable=True) - preprocess.values[:] = tuples_list - else: - preprocess = preprocess.to_pandas(nullable=True) + preprocess_pd = pd.MultiIndex.from_arrays(arrays) - output = repr(preprocess) + output = repr(preprocess_pd) output_prefix = self.__class__.__name__ + "(" output = output.lstrip(output_prefix) lines = output.split("\n") if len(lines) > 1: - if "length=" in lines[-1] and len(self) != len(preprocess): + if "length=" in lines[-1] and len(self) != len(preprocess_pd): last_line = lines[-1] length_index = last_line.index("length=") last_line = last_line[:length_index] + f"length={len(self)})" @@ -502,7 +485,7 @@ def _codes_frame(self): @property # type: ignore @_external_only_api("Use ._codes_frame instead") - @_cudf_nvtx_annotate + @_performance_tracking def codes(self): """ Returns the codes of the underlying MultiIndex. @@ -528,13 +511,13 @@ def get_slice_bound(self, label, side, kind=None): raise NotImplementedError() @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def nlevels(self): """Integer number of levels in this MultiIndex.""" return self._num_columns @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def levels(self): """ Returns list of levels in the MultiIndex @@ -566,12 +549,12 @@ def levels(self): return self._levels @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def ndim(self) -> int: """Dimension of the data. For MultiIndex ndim is always 2.""" return 2 - @_cudf_nvtx_annotate + @_performance_tracking def _get_level_label(self, level): """Get name of the level. @@ -588,7 +571,7 @@ def _get_level_label(self, level): else: return self._data.names[level] - @_cudf_nvtx_annotate + @_performance_tracking def isin(self, values, level=None): """Return a boolean array where the index values are in values. @@ -687,7 +670,7 @@ def where(self, cond, other=None, inplace=False): ".where is not supported for MultiIndex operations" ) - @_cudf_nvtx_annotate + @_performance_tracking def _compute_levels_and_codes(self): levels = [] @@ -701,7 +684,7 @@ def _compute_levels_and_codes(self): self._levels = levels self._codes = cudf.DataFrame._from_data(codes) - @_cudf_nvtx_annotate + @_performance_tracking def _compute_validity_mask(self, index, row_tuple, max_length): """Computes the valid set of indices of values in the lookup""" lookup = cudf.DataFrame() @@ -749,7 +732,7 @@ def _compute_validity_mask(self, index, row_tuple, max_length): raise KeyError(row) return result - @_cudf_nvtx_annotate + @_performance_tracking def _get_valid_indices_by_tuple(self, index, row_tuple, max_length): # Instructions for Slicing # if tuple, get first and last elements of tuple @@ -779,7 +762,7 @@ def _get_valid_indices_by_tuple(self, index, row_tuple, max_length): return row_tuple return self._compute_validity_mask(index, row_tuple, max_length) - @_cudf_nvtx_annotate + @_performance_tracking def _index_and_downcast(self, result, index, index_key): if isinstance(index_key, (numbers.Number, slice)): index_key = [index_key] @@ -847,7 +830,7 @@ def _index_and_downcast(self, result, index, index_key): result.index = index return result - @_cudf_nvtx_annotate + @_performance_tracking def _get_row_major( self, df: DataFrameOrSeries, @@ -874,7 +857,7 @@ def _get_row_major( final = self._index_and_downcast(result, result.index, row_tuple) return final - @_cudf_nvtx_annotate + @_performance_tracking def _validate_indexer( self, indexer: numbers.Number @@ -902,7 +885,7 @@ def _validate_indexer( for i in indexer: self._validate_indexer(i) - @_cudf_nvtx_annotate + @_performance_tracking def __eq__(self, other): if isinstance(other, MultiIndex): return np.array( @@ -916,12 +899,12 @@ def __eq__(self, other): return NotImplemented @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def size(self): # The size of a MultiIndex is only dependent on the number of rows. return self._num_rows - @_cudf_nvtx_annotate + @_performance_tracking def take(self, indices): if isinstance(indices, cudf.Series) and indices.has_nulls: raise ValueError("Column must have no nulls.") @@ -929,7 +912,7 @@ def take(self, indices): obj.names = self.names return obj - @_cudf_nvtx_annotate + @_performance_tracking def serialize(self): header, frames = super().serialize() # Overwrite the names in _data with the true names. @@ -937,7 +920,7 @@ def serialize(self): return header, frames @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def deserialize(cls, header, frames): # Spoof the column names to construct the frame, then set manually. column_names = pickle.loads(header["column_names"]) @@ -945,7 +928,7 @@ def deserialize(cls, header, frames): obj = super().deserialize(header, frames) return obj._set_names(column_names) - @_cudf_nvtx_annotate + @_performance_tracking def __getitem__(self, index): flatten = isinstance(index, int) @@ -972,7 +955,7 @@ def __getitem__(self, index): result._levels = self._levels return result - @_cudf_nvtx_annotate + @_performance_tracking def to_frame(self, index=True, name=no_default, allow_duplicates=False): """ Create a DataFrame with the levels of the MultiIndex as columns. @@ -1022,44 +1005,34 @@ def to_frame(self, index=True, name=no_default, allow_duplicates=False): a c a c b d b d """ - # TODO: Currently this function makes a shallow copy, which is - # incorrect. We want to make a deep copy, otherwise further - # modifications of the resulting DataFrame will affect the MultiIndex. if name is no_default: column_names = [ level if name is None else name for level, name in enumerate(self.names) ] + elif not is_list_like(name): + raise TypeError( + "'name' must be a list / sequence of column names." + ) + elif len(name) != len(self.levels): + raise ValueError( + "'name' should have the same length as " + "number of levels on index." + ) else: - if not is_list_like(name): - raise TypeError( - "'name' must be a list / sequence of column names." - ) - if len(name) != len(self.levels): - raise ValueError( - "'name' should have the same length as " - "number of levels on index." - ) column_names = name - all_none_names = None - if not ( - all_none_names := all(x is None for x in column_names) - ) and len(column_names) != len(set(column_names)): + if len(column_names) != len(set(column_names)): raise ValueError("Duplicate column names are not allowed") - df = cudf.DataFrame._from_data( - data=self._data, - columns=column_names - if name is not no_default and not all_none_names - else None, + ca = ColumnAccessor( + dict(zip(column_names, (col.copy() for col in self._columns))), + verify=False, + ) + return cudf.DataFrame._from_data( + data=ca, index=self if index else None ) - if index: - df = df.set_index(self) - - return df - - @_cudf_nvtx_annotate + @_performance_tracking def get_level_values(self, level): """ Return the values at the requested level @@ -1115,7 +1088,7 @@ def _is_interval(self): return False @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def _concat(cls, objs): source_data = [o.to_frame(index=False) for o in objs] @@ -1135,7 +1108,7 @@ def _concat(cls, objs): return cudf.MultiIndex.from_frame(source_data, names=names) @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_tuples(cls, tuples, names=None): """ Convert list of tuples to MultiIndex. @@ -1173,12 +1146,12 @@ def from_tuples(cls, tuples, names=None): pdi = pd.MultiIndex.from_tuples(tuples, names=names) return cls.from_pandas(pdi) - @_cudf_nvtx_annotate + @_performance_tracking def to_numpy(self): return self.values_host @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def values_host(self): """ Return a numpy representation of the MultiIndex. @@ -1206,7 +1179,7 @@ def values_host(self): return self.to_pandas().values @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def values(self): """ Return a CuPy representation of the MultiIndex. @@ -1242,8 +1215,8 @@ def values(self): return self.to_frame(index=False).values @classmethod - @_cudf_nvtx_annotate - def from_frame(cls, df, names=None): + @_performance_tracking + def from_frame(cls, df: pd.DataFrame | cudf.DataFrame, names=None): """ Make a MultiIndex from a DataFrame. @@ -1317,7 +1290,7 @@ def from_frame(cls, df, names=None): return obj @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_product(cls, arrays, names=None): """ Make a MultiIndex from the cartesian product of multiple iterables. @@ -1359,7 +1332,7 @@ def from_product(cls, arrays, names=None): return cls.from_pandas(pdi) @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_arrays( cls, arrays, @@ -1418,7 +1391,7 @@ def from_arrays( codes=codes, levels=levels, sortorder=sortorder, names=names ) - @_cudf_nvtx_annotate + @_performance_tracking def _poplevels(self, level): """ Remove and return the specified levels from self. @@ -1469,7 +1442,7 @@ def _poplevels(self, level): return popped - @_cudf_nvtx_annotate + @_performance_tracking def swaplevel(self, i=-2, j=-1): """ Swap level i with level j. @@ -1520,7 +1493,7 @@ def swaplevel(self, i=-2, j=-1): midx = midx.set_names(self.names) return midx - @_cudf_nvtx_annotate + @_performance_tracking def droplevel(self, level=-1): """ Removes the specified levels from the MultiIndex. @@ -1583,7 +1556,7 @@ def droplevel(self, level=-1): else: return mi - @_cudf_nvtx_annotate + @_performance_tracking def to_pandas( self, *, nullable: bool = False, arrow_type: bool = False ) -> pd.MultiIndex: @@ -1600,7 +1573,7 @@ def to_pandas( ) @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_pandas(cls, multiindex: pd.MultiIndex, nan_as_null=no_default): """ Convert from a Pandas MultiIndex @@ -1635,7 +1608,7 @@ def from_pandas(cls, multiindex: pd.MultiIndex, nan_as_null=no_default): ) @cached_property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_unique(self): return len(self) == len(self.unique()) @@ -1643,7 +1616,7 @@ def is_unique(self): def dtype(self): return np.dtype("O") - @_cudf_nvtx_annotate + @_performance_tracking def _is_sorted(self, ascending=None, null_position=None) -> bool: """ Returns a boolean indicating whether the data of the MultiIndex are sorted @@ -1689,7 +1662,7 @@ def _is_sorted(self, ascending=None, null_position=None) -> bool: ) @cached_property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_monotonic_increasing(self) -> bool: """ Return if the index is monotonic increasing @@ -1698,7 +1671,7 @@ def is_monotonic_increasing(self) -> bool: return self._is_sorted(ascending=None, null_position=None) @cached_property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_monotonic_decreasing(self) -> bool: """ Return if the index is monotonic decreasing @@ -1708,7 +1681,7 @@ def is_monotonic_decreasing(self) -> bool: ascending=[False] * len(self.levels), null_position=None ) - @_cudf_nvtx_annotate + @_performance_tracking def fillna(self, value): """ Fill null values with the specified value. @@ -1749,11 +1722,11 @@ def fillna(self, value): return super().fillna(value=value) - @_cudf_nvtx_annotate + @_performance_tracking def unique(self): return self.drop_duplicates(keep="first") - @_cudf_nvtx_annotate + @_performance_tracking def nunique(self, dropna: bool = True) -> int: mi = self.dropna(how="all") if dropna else self return len(mi.unique()) @@ -1768,7 +1741,7 @@ def _clean_nulls_from_index(self): index_df._clean_nulls_from_dataframe(index_df), names=self.names ) - @_cudf_nvtx_annotate + @_performance_tracking def memory_usage(self, deep=False): usage = sum(col.memory_usage for col in self._data.columns) if self.levels: @@ -1779,13 +1752,13 @@ def memory_usage(self, deep=False): usage += col.memory_usage return usage - @_cudf_nvtx_annotate + @_performance_tracking def difference(self, other, sort=None): if hasattr(other, "to_pandas"): other = other.to_pandas() return cudf.from_pandas(self.to_pandas().difference(other, sort)) - @_cudf_nvtx_annotate + @_performance_tracking def append(self, other): """ Append a collection of MultiIndex objects together @@ -1848,7 +1821,7 @@ def append(self, other): return MultiIndex._concat(to_concat) - @_cudf_nvtx_annotate + @_performance_tracking def __array_function__(self, func, types, args, kwargs): cudf_df_module = MultiIndex @@ -1895,7 +1868,7 @@ def _level_index_from_level(self, level): ) from None return level - @_cudf_nvtx_annotate + @_performance_tracking def get_indexer(self, target, method=None, limit=None, tolerance=None): if tolerance is not None: raise NotImplementedError( @@ -1954,7 +1927,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): return _return_get_indexer_result(result_series.to_cupy()) - @_cudf_nvtx_annotate + @_performance_tracking def get_loc(self, key): is_sorted = ( self.is_monotonic_increasing or self.is_monotonic_decreasing @@ -2028,7 +2001,7 @@ def _maybe_match_names(self, other): for self_name, other_name in zip(self.names, other.names) ] - @_cudf_nvtx_annotate + @_performance_tracking def union(self, other, sort=None): if not isinstance(other, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" @@ -2052,7 +2025,7 @@ def union(self, other, sort=None): return self._union(other, sort=sort) - @_cudf_nvtx_annotate + @_performance_tracking def _union(self, other, sort=None): # TODO: When to_frame is refactored to return a # deep copy in future, we should push most of the common @@ -2078,7 +2051,7 @@ def _union(self, other, sort=None): return midx.sort_values() return midx - @_cudf_nvtx_annotate + @_performance_tracking def _intersection(self, other, sort=None): if self.names != other.names: deep = True @@ -2101,14 +2074,14 @@ def _intersection(self, other, sort=None): return midx.sort_values() return midx - @_cudf_nvtx_annotate + @_performance_tracking def _copy_type_metadata(self: Self, other: Self) -> Self: res = super()._copy_type_metadata(other) if isinstance(other, MultiIndex): res._names = other._names return res - @_cudf_nvtx_annotate + @_performance_tracking def _split_columns_by_levels( self, levels: tuple, *, in_levels: bool ) -> Generator[tuple[Any, column.ColumnBase], None, None]: @@ -2127,7 +2100,7 @@ def _split_columns_by_levels( elif not in_levels and i not in level_indices: yield name, col - @_cudf_nvtx_annotate + @_performance_tracking def _new_index_for_reset_index( self, levels: tuple | None, name ) -> None | BaseIndex: diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index ea25d482578..97b6bbec2d4 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -48,7 +48,7 @@ from cudf.core.column.struct import StructMethods from cudf.core.column_accessor import ColumnAccessor from cudf.core.groupby.groupby import SeriesGroupBy, groupby_doc_template -from cudf.core.index import BaseIndex, DatetimeIndex, RangeIndex, as_index +from cudf.core.index import BaseIndex, DatetimeIndex, RangeIndex, ensure_index from cudf.core.indexed_frame import ( IndexedFrame, _FrameIndexer, @@ -68,7 +68,7 @@ is_mixed_with_object_dtype, to_cudf_compatible_scalar, ) -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking if TYPE_CHECKING: from cudf._typing import ( @@ -179,7 +179,7 @@ class _SeriesIlocIndexer(_FrameIndexer): _frame: cudf.Series - @_cudf_nvtx_annotate + @_performance_tracking def __getitem__(self, arg): indexing_spec = indexing_utils.parse_row_iloc_indexer( indexing_utils.destructure_series_iloc_indexer(arg, self._frame), @@ -187,7 +187,7 @@ def __getitem__(self, arg): ) return self._frame._getitem_preprocessed(indexing_spec) - @_cudf_nvtx_annotate + @_performance_tracking def __setitem__(self, key, value): if isinstance(key, tuple): key = list(key) @@ -274,7 +274,7 @@ class _SeriesLocIndexer(_FrameIndexer): Label-based selection """ - @_cudf_nvtx_annotate + @_performance_tracking def __getitem__(self, arg: Any) -> ScalarLike | DataFrameOrSeries: if isinstance(arg, pd.MultiIndex): arg = cudf.from_pandas(arg) @@ -301,7 +301,7 @@ def __getitem__(self, arg: Any) -> ScalarLike | DataFrameOrSeries: return self._frame.iloc[arg] - @_cudf_nvtx_annotate + @_performance_tracking def __setitem__(self, key, value): try: key = self._loc_to_iloc(key) @@ -476,7 +476,7 @@ def _constructor_expanddim(self): return cudf.DataFrame @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_categorical(cls, categorical, codes=None): """Creates from a pandas.Categorical @@ -517,7 +517,7 @@ def from_categorical(cls, categorical, codes=None): return Series(data=col) @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_masked_array(cls, data, mask, null_count=None): """Create a Series with null-mask. This is equivalent to: @@ -566,7 +566,7 @@ def from_masked_array(cls, data, mask, null_count=None): col = as_column(data).set_mask(mask) return cls(data=col) - @_cudf_nvtx_annotate + @_performance_tracking def __init__( self, data=None, @@ -588,10 +588,8 @@ def __init__( data = data.copy(deep=True) name_from_data = data.name column = as_column(data, nan_as_null=nan_as_null, dtype=dtype) - if isinstance(data, pd.Series): - index_from_data = cudf.Index(data.index) - elif isinstance(data, Series): - index_from_data = data.index + if isinstance(data, (pd.Series, Series)): + index_from_data = ensure_index(data.index) elif isinstance(data, ColumnAccessor): raise TypeError( "Use cudf.Series._from_data for constructing a Series from " @@ -642,7 +640,7 @@ def __init__( name = name_from_data if index is not None: - index = cudf.Index(index) + index = ensure_index(index) if index_from_data is not None: first_index = index_from_data @@ -663,7 +661,7 @@ def __init__( self._check_data_index_length_match() @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def _from_data( cls, data: MutableMapping, @@ -675,18 +673,18 @@ def _from_data( out.name = name return out - @_cudf_nvtx_annotate + @_performance_tracking def _from_data_like_self(self, data: MutableMapping): out = super()._from_data_like_self(data) out.name = self.name return out - @_cudf_nvtx_annotate + @_performance_tracking def __contains__(self, item): return item in self.index @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_pandas(cls, s: pd.Series, nan_as_null=no_default): """ Convert from a Pandas Series. @@ -735,7 +733,7 @@ def from_pandas(cls, s: pd.Series, nan_as_null=no_default): return result @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_unique(self): """Return boolean if values in the object are unique. @@ -746,7 +744,7 @@ def is_unique(self): return self._column.is_unique @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def dt(self): """ Accessor object for datetime-like properties of the Series values. @@ -788,7 +786,7 @@ def dt(self): ) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def hasnans(self): """ Return True if there are any NaNs or nulls. @@ -829,7 +827,7 @@ def hasnans(self): """ return self._column.has_nulls(include_nan=True) - @_cudf_nvtx_annotate + @_performance_tracking def serialize(self): header, frames = super().serialize() @@ -842,7 +840,7 @@ def serialize(self): return header, frames @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def deserialize(cls, header, frames): index_nframes = header["index_frame_count"] obj = super().deserialize( @@ -855,7 +853,7 @@ def deserialize(cls, header, frames): return obj - @_cudf_nvtx_annotate + @_performance_tracking def drop( self, labels=None, @@ -884,7 +882,7 @@ def tolist(self): # noqa: D102 to_list = tolist - @_cudf_nvtx_annotate + @_performance_tracking def to_dict(self, into: type[dict] = dict) -> dict: """ Convert Series to {label -> value} dict or dict-like object. @@ -923,7 +921,7 @@ def to_dict(self, into: type[dict] = dict) -> dict: """ return self.to_pandas().to_dict(into=into) - @_cudf_nvtx_annotate + @_performance_tracking def reindex(self, *args, **kwargs): """ Conform Series to new index. @@ -996,7 +994,7 @@ def reindex(self, *args, **kwargs): series.name = self.name return series - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( doc_reset_index_template.format( klass="Series", @@ -1081,7 +1079,7 @@ def reset_index( inplace=inplace, ) - @_cudf_nvtx_annotate + @_performance_tracking def to_frame(self, name=None): """Convert Series into a DataFrame @@ -1124,13 +1122,13 @@ def to_frame(self, name=None): return cudf.DataFrame({col: self._column}, index=self.index) - @_cudf_nvtx_annotate + @_performance_tracking def memory_usage(self, index=True, deep=False): return self._column.memory_usage + ( self.index.memory_usage() if index else 0 ) - @_cudf_nvtx_annotate + @_performance_tracking def __array_function__(self, func, types, args, kwargs): if "out" in kwargs or not all(issubclass(t, Series) for t in types): return NotImplemented @@ -1191,7 +1189,7 @@ def __array_function__(self, func, types, args, kwargs): return NotImplemented - @_cudf_nvtx_annotate + @_performance_tracking def map(self, arg, na_action=None) -> "Series": """ Map values of Series according to input correspondence. @@ -1333,7 +1331,7 @@ def _getitem_preprocessed( return self._empty_like(keep_index=True) assert_never(spec) - @_cudf_nvtx_annotate + @_performance_tracking def __getitem__(self, arg): if isinstance(arg, slice): return self.iloc[arg] @@ -1344,7 +1342,7 @@ def __getitem__(self, arg): items = SingleColumnFrame.__iter__ - @_cudf_nvtx_annotate + @_performance_tracking def __setitem__(self, key, value): if isinstance(key, slice): self.iloc[key] = value @@ -1495,36 +1493,36 @@ def _make_operands_and_index_for_binop( @copy_docstring(CategoricalAccessor) # type: ignore @property - @_cudf_nvtx_annotate + @_performance_tracking def cat(self): return CategoricalAccessor(parent=self) @copy_docstring(StringMethods) # type: ignore @property - @_cudf_nvtx_annotate + @_performance_tracking def str(self): return StringMethods(parent=self) @copy_docstring(ListMethods) # type: ignore @property - @_cudf_nvtx_annotate + @_performance_tracking def list(self): return ListMethods(parent=self) @copy_docstring(StructMethods) # type: ignore @property - @_cudf_nvtx_annotate + @_performance_tracking def struct(self): return StructMethods(parent=self) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def dtype(self): """The dtype of the Series.""" return self._column.dtype @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def _concat(cls, objs, axis=0, index=True): # Concatenate index if not provided if index is True: @@ -1590,25 +1588,25 @@ def _concat(cls, objs, axis=0, index=True): return cls(data=col, index=index, name=name) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def valid_count(self): """Number of non-null values""" return len(self) - self._column.null_count @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def null_count(self): """Number of null values""" return self._column.null_count @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def nullable(self): """A boolean indicating whether a null-mask is needed""" return self._column.nullable @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def has_nulls(self): """ Indicator whether Series contains null values. @@ -1637,7 +1635,7 @@ def has_nulls(self): """ return self._column.has_nulls() - @_cudf_nvtx_annotate + @_performance_tracking def dropna(self, axis=0, inplace=False, how=None): """ Return a Series with null values removed. @@ -1717,7 +1715,7 @@ def dropna(self, axis=0, inplace=False, how=None): return self._mimic_inplace(result, inplace=inplace) - @_cudf_nvtx_annotate + @_performance_tracking def drop_duplicates(self, keep="first", inplace=False, ignore_index=False): """ Return Series with duplicate values removed. @@ -1791,7 +1789,7 @@ def drop_duplicates(self, keep="first", inplace=False, ignore_index=False): return self._mimic_inplace(result, inplace=inplace) - @_cudf_nvtx_annotate + @_performance_tracking def fillna( self, value=None, method=None, axis=None, inplace=False, limit=None ): @@ -1896,7 +1894,7 @@ def between(self, left, right, inclusive="both") -> Series: ) return self._from_data({self.name: lmask & rmask}, self.index) - @_cudf_nvtx_annotate + @_performance_tracking def all(self, axis=0, bool_only=None, skipna=True, **kwargs): if bool_only not in (None, True): raise NotImplementedError( @@ -1904,7 +1902,7 @@ def all(self, axis=0, bool_only=None, skipna=True, **kwargs): ) return super().all(axis, skipna, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def any(self, axis=0, bool_only=None, skipna=True, **kwargs): if bool_only not in (None, True): raise NotImplementedError( @@ -1912,7 +1910,7 @@ def any(self, axis=0, bool_only=None, skipna=True, **kwargs): ) return super().any(axis, skipna, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def to_pandas( self, *, @@ -2004,7 +2002,7 @@ def to_pandas( ) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def data(self): """The gpu buffer for the data @@ -2029,12 +2027,12 @@ def data(self): return self._column.data @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def nullmask(self): """The gpu buffer for the null-mask""" return cudf.Series(self._column.nullmask) - @_cudf_nvtx_annotate + @_performance_tracking def astype( self, dtype, @@ -2051,13 +2049,13 @@ def astype( dtype = {self.name: dtype} return super().astype(dtype, copy, errors) - @_cudf_nvtx_annotate + @_performance_tracking def sort_index(self, axis=0, *args, **kwargs): if axis not in (0, "index"): raise ValueError("Only axis=0 is valid for Series.") return super().sort_index(axis=axis, *args, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def sort_values( self, axis=0, @@ -2112,7 +2110,7 @@ def sort_values( ignore_index=ignore_index, ) - @_cudf_nvtx_annotate + @_performance_tracking def nlargest(self, n=5, keep="first"): """Returns a new Series of the *n* largest element. @@ -2175,7 +2173,7 @@ def nlargest(self, n=5, keep="first"): """ return self._n_largest_or_smallest(True, n, [self.name], keep) - @_cudf_nvtx_annotate + @_performance_tracking def nsmallest(self, n=5, keep="first"): """ Returns a new Series of the *n* smallest element. @@ -2251,7 +2249,7 @@ def nsmallest(self, n=5, keep="first"): """ return self._n_largest_or_smallest(False, n, [self.name], keep) - @_cudf_nvtx_annotate + @_performance_tracking def argsort( self, axis=0, @@ -2274,7 +2272,7 @@ def argsort( obj.name = self.name return obj - @_cudf_nvtx_annotate + @_performance_tracking def replace(self, to_replace=None, value=no_default, *args, **kwargs): if is_dict_like(to_replace) and value not in {None, no_default}: raise ValueError( @@ -2284,7 +2282,7 @@ def replace(self, to_replace=None, value=no_default, *args, **kwargs): return super().replace(to_replace, value, *args, **kwargs) - @_cudf_nvtx_annotate + @_performance_tracking def update(self, other): """ Modify Series in place using values from passed Series. @@ -2390,7 +2388,7 @@ def update(self, other): self.mask(mask, other, inplace=True) # UDF related - @_cudf_nvtx_annotate + @_performance_tracking def apply(self, func, convert_dtype=True, args=(), **kwargs): """ Apply a scalar function to the values of a Series. @@ -2535,7 +2533,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs): # # Stats # - @_cudf_nvtx_annotate + @_performance_tracking def count(self): """ Return number of non-NA/null observations in the Series @@ -2559,7 +2557,7 @@ def count(self): """ return self.valid_count - @_cudf_nvtx_annotate + @_performance_tracking def mode(self, dropna=True): """ Return the mode(s) of the dataset. @@ -2630,7 +2628,7 @@ def mode(self, dropna=True): {self.name: val_counts.index.sort_values()}, name=self.name ) - @_cudf_nvtx_annotate + @_performance_tracking def round(self, decimals=0, how="half_even"): if not is_integer(decimals): raise ValueError( @@ -2639,7 +2637,7 @@ def round(self, decimals=0, how="half_even"): decimals = int(decimals) return super().round(decimals, how) - @_cudf_nvtx_annotate + @_performance_tracking def cov(self, other, min_periods=None): """ Compute covariance with Series, excluding missing values. @@ -2690,7 +2688,7 @@ def cov(self, other, min_periods=None): f"{other.dtype}" ) - @_cudf_nvtx_annotate + @_performance_tracking def transpose(self): """Return the transpose, which is by definition self.""" @@ -2698,7 +2696,7 @@ def transpose(self): T = property(transpose, doc=transpose.__doc__) - @_cudf_nvtx_annotate + @_performance_tracking def duplicated(self, keep="first"): """ Indicate duplicate Series values. @@ -2778,7 +2776,7 @@ def duplicated(self, keep="first"): """ return super().duplicated(keep=keep) - @_cudf_nvtx_annotate + @_performance_tracking def corr(self, other, method="pearson", min_periods=None): """Calculates the sample correlation between two Series, excluding missing values. @@ -2830,7 +2828,7 @@ def corr(self, other, method="pearson", min_periods=None): f"cannot perform corr with types {self.dtype}, {other.dtype}" ) - @_cudf_nvtx_annotate + @_performance_tracking def autocorr(self, lag=1): """Compute the lag-N autocorrelation. This method computes the Pearson correlation between the Series and its shifted self. @@ -2856,7 +2854,7 @@ def autocorr(self, lag=1): """ return self.corr(self.shift(lag)) - @_cudf_nvtx_annotate + @_performance_tracking def isin(self, values): """Check whether values are contained in Series. @@ -2926,7 +2924,7 @@ def isin(self, values): {self.name: self._column.isin(values)}, index=self.index ) - @_cudf_nvtx_annotate + @_performance_tracking def unique(self): """ Returns unique values of this Series. @@ -2961,7 +2959,7 @@ def unique(self): return res.values return Series(res, name=self.name) - @_cudf_nvtx_annotate + @_performance_tracking def value_counts( self, normalize=False, @@ -3116,7 +3114,7 @@ def value_counts( res.name = result_name return res - @_cudf_nvtx_annotate + @_performance_tracking def quantile( self, q=0.5, interpolation="linear", exact=True, quant_index=True ): @@ -3191,11 +3189,11 @@ def quantile( return Series._from_data( data={self.name: result}, - index=as_index(np_array_q) if quant_index else None, + index=cudf.Index(np_array_q) if quant_index else None, ) @docutils.doc_describe() - @_cudf_nvtx_annotate + @_performance_tracking def describe( self, percentiles=None, @@ -3240,7 +3238,7 @@ def describe( name=self.name, ) - @_cudf_nvtx_annotate + @_performance_tracking def digitize(self, bins, right=False): """Return the indices of the bins to which each value belongs. @@ -3276,7 +3274,7 @@ def digitize(self, bins, right=False): cudf.core.column.numerical.digitize(self._column, bins, right) ) - @_cudf_nvtx_annotate + @_performance_tracking def diff(self, periods=1): """First discrete difference of element. @@ -3347,7 +3345,7 @@ def diff(self, periods=1): return self - self.shift(periods=periods) - @_cudf_nvtx_annotate + @_performance_tracking @docutils.doc_apply( groupby_doc_template.format( ret=textwrap.dedent( @@ -3385,7 +3383,7 @@ def groupby( dropna, ) - @_cudf_nvtx_annotate + @_performance_tracking def rename(self, index=None, copy=True): """ Alter Series name @@ -3431,7 +3429,7 @@ def rename(self, index=None, copy=True): out_data = self._data.copy(deep=copy) return Series._from_data(out_data, self.index, name=index) - @_cudf_nvtx_annotate + @_performance_tracking def add_prefix(self, prefix): return Series._from_data( # TODO: Change to deep=False when copy-on-write is default @@ -3439,7 +3437,7 @@ def add_prefix(self, prefix): index=prefix + self.index.astype(str), ) - @_cudf_nvtx_annotate + @_performance_tracking def add_suffix(self, suffix): return Series._from_data( # TODO: Change to deep=False when copy-on-write is default @@ -3447,7 +3445,7 @@ def add_suffix(self, suffix): index=self.index.astype(str) + suffix, ) - @_cudf_nvtx_annotate + @_performance_tracking def keys(self): """ Return alias for index. @@ -3491,7 +3489,7 @@ def keys(self): """ return self.index - @_cudf_nvtx_annotate + @_performance_tracking def explode(self, ignore_index=False): """ Transform each element of a list-like to a row, replicating index @@ -3528,7 +3526,7 @@ def explode(self, ignore_index=False): """ return super()._explode(self.name, ignore_index) - @_cudf_nvtx_annotate + @_performance_tracking def pct_change( self, periods=1, fill_method=no_default, limit=no_default, freq=None ): @@ -3602,7 +3600,7 @@ def pct_change( change = diff / data.shift(periods=periods, freq=freq) return change - @_cudf_nvtx_annotate + @_performance_tracking def where(self, cond, other=None, inplace=False): result_col = super().where(cond, other, inplace) return self._mimic_inplace( @@ -3736,7 +3734,7 @@ class DatetimeProperties(BaseDatelikeProperties): """ @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def year(self) -> Series: """ The year of the datetime. @@ -3761,7 +3759,7 @@ def year(self) -> Series: return self._get_dt_field("year") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def month(self) -> Series: """ The month as January=1, December=12. @@ -3786,7 +3784,7 @@ def month(self) -> Series: return self._get_dt_field("month") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def day(self) -> Series: """ The day of the datetime. @@ -3811,7 +3809,7 @@ def day(self) -> Series: return self._get_dt_field("day") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def hour(self) -> Series: """ The hours of the datetime. @@ -3836,7 +3834,7 @@ def hour(self) -> Series: return self._get_dt_field("hour") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def minute(self) -> Series: """ The minutes of the datetime. @@ -3861,7 +3859,7 @@ def minute(self) -> Series: return self._get_dt_field("minute") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def second(self) -> Series: """ The seconds of the datetime. @@ -3886,7 +3884,7 @@ def second(self) -> Series: return self._get_dt_field("second") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def microsecond(self) -> Series: """ The microseconds of the datetime. @@ -3918,7 +3916,7 @@ def microsecond(self) -> Series: return self._return_result_like_self(micro + extra) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def nanosecond(self) -> Series: """ The nanoseconds of the datetime. @@ -3943,7 +3941,7 @@ def nanosecond(self) -> Series: return self._get_dt_field("nanosecond") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def weekday(self) -> Series: """ The day of the week with Monday=0, Sunday=6. @@ -3980,7 +3978,7 @@ def weekday(self) -> Series: return self._get_dt_field("weekday") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def dayofweek(self) -> Series: """ The day of the week with Monday=0, Sunday=6. @@ -4017,7 +4015,7 @@ def dayofweek(self) -> Series: return self._get_dt_field("weekday") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def dayofyear(self) -> Series: """ The day of the year, from 1-365 in non-leap years and @@ -4055,7 +4053,7 @@ def dayofyear(self) -> Series: return self._get_dt_field("day_of_year") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def day_of_year(self) -> Series: """ The day of the year, from 1-365 in non-leap years and @@ -4093,7 +4091,7 @@ def day_of_year(self) -> Series: return self._get_dt_field("day_of_year") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_leap_year(self) -> Series: """ Boolean indicator if the date belongs to a leap year. @@ -4148,7 +4146,7 @@ def is_leap_year(self) -> Series: return self._return_result_like_self(res) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def quarter(self) -> Series: """ Integer indicator for which quarter of the year the date belongs in. @@ -4177,7 +4175,7 @@ def quarter(self) -> Series: ) return self._return_result_like_self(res) - @_cudf_nvtx_annotate + @_performance_tracking def day_name(self, locale: str | None = None) -> Series: """ Return the day names. Currently supports English locale only. @@ -4213,7 +4211,7 @@ def day_name(self, locale: str | None = None) -> Series: self.series._column.get_day_names(locale) ) - @_cudf_nvtx_annotate + @_performance_tracking def month_name(self, locale: str | None = None) -> Series: """ Return the month names. Currently supports English locale only. @@ -4243,7 +4241,7 @@ def month_name(self, locale: str | None = None) -> Series: self.series._column.get_month_names(locale) ) - @_cudf_nvtx_annotate + @_performance_tracking def isocalendar(self) -> cudf.DataFrame: """ Returns a DataFrame with the year, week, and day @@ -4291,7 +4289,7 @@ def isocalendar(self) -> cudf.DataFrame: ) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_month_start(self) -> Series: """ Booleans indicating if dates are the first day of the month. @@ -4299,7 +4297,7 @@ def is_month_start(self) -> Series: return (self.day == 1).fillna(False) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def days_in_month(self) -> Series: """ Get the total number of days in the month that the date falls on. @@ -4348,7 +4346,7 @@ def days_in_month(self) -> Series: ) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_month_end(self) -> Series: """ Boolean indicator if the date is the last day of the month. @@ -4391,7 +4389,7 @@ def is_month_end(self) -> Series: return (self.day == last_day.dt.day).fillna(False) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_quarter_start(self) -> Series: """ Boolean indicator if the date is the first day of a quarter. @@ -4436,7 +4434,7 @@ def is_quarter_start(self) -> Series: return self._return_result_like_self(result) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_quarter_end(self) -> Series: """ Boolean indicator if the date is the last day of a quarter. @@ -4483,7 +4481,7 @@ def is_quarter_end(self) -> Series: return self._return_result_like_self(result) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_year_start(self) -> Series: """ Boolean indicator if the date is the first day of the year. @@ -4514,7 +4512,7 @@ def is_year_start(self) -> Series: return self._return_result_like_self(outcol.fillna(False)) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_year_end(self) -> Series: """ Boolean indicator if the date is the last day of the year. @@ -4547,13 +4545,13 @@ def is_year_end(self) -> Series: result = cudf._lib.copying.copy_if_else(leap, non_leap, leap_dates) return self._return_result_like_self(result.fillna(False)) - @_cudf_nvtx_annotate + @_performance_tracking def _get_dt_field(self, field: str) -> Series: return self._return_result_like_self( self.series._column.get_dt_field(field) ) - @_cudf_nvtx_annotate + @_performance_tracking def ceil(self, freq: str) -> Series: """ Perform ceil operation on the data to the specified freq. @@ -4586,7 +4584,7 @@ def ceil(self, freq: str) -> Series: """ return self._return_result_like_self(self.series._column.ceil(freq)) - @_cudf_nvtx_annotate + @_performance_tracking def floor(self, freq: str) -> Series: """ Perform floor operation on the data to the specified freq. @@ -4619,7 +4617,7 @@ def floor(self, freq: str) -> Series: """ return self._return_result_like_self(self.series._column.floor(freq)) - @_cudf_nvtx_annotate + @_performance_tracking def round(self, freq: str) -> Series: """ Perform round operation on the data to the specified freq. @@ -4655,7 +4653,7 @@ def round(self, freq: str) -> Series: """ return self._return_result_like_self(self.series._column.round(freq)) - @_cudf_nvtx_annotate + @_performance_tracking def strftime(self, date_format: str, *args, **kwargs) -> Series: """ Convert to Series using specified ``date_format``. @@ -4832,7 +4830,7 @@ class TimedeltaProperties(BaseDatelikeProperties): """ @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def days(self) -> Series: """ Number of days. @@ -4864,7 +4862,7 @@ def days(self) -> Series: return self._get_td_field("days") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def seconds(self) -> Series: """ Number of seconds (>= 0 and less than 1 day). @@ -4903,7 +4901,7 @@ def seconds(self) -> Series: return self._get_td_field("seconds") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def microseconds(self) -> Series: """ Number of microseconds (>= 0 and less than 1 second). @@ -4935,7 +4933,7 @@ def microseconds(self) -> Series: return self._get_td_field("microseconds") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def nanoseconds(self) -> Series: """ Return the number of nanoseconds (n), where 0 <= n < 1 microsecond. @@ -4967,7 +4965,7 @@ def nanoseconds(self) -> Series: return self._get_td_field("nanoseconds") @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def components(self) -> cudf.DataFrame: """ Return a Dataframe of the components of the Timedeltas. @@ -4999,14 +4997,14 @@ def components(self) -> cudf.DataFrame: ca, index=self.series.index ) - @_cudf_nvtx_annotate + @_performance_tracking def _get_td_field(self, field: str) -> Series: return self._return_result_like_self( getattr(self.series._column, field) ) -@_cudf_nvtx_annotate +@_performance_tracking def _align_indices(series_list, how="outer", allow_non_unique=False): """ Internal util to align the indices of a list of Series objects @@ -5069,7 +5067,7 @@ def _align_indices(series_list, how="outer", allow_non_unique=False): @acquire_spill_lock() -@_cudf_nvtx_annotate +@_performance_tracking def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False): r"""Returns a boolean array where two arrays are equal within a tolerance. diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py index 23a2c828a04..f9555aee6a2 100644 --- a/python/cudf/cudf/core/single_column_frame.py +++ b/python/cudf/cudf/core/single_column_frame.py @@ -18,7 +18,7 @@ ) from cudf.core.column import ColumnBase, as_column from cudf.core.frame import Frame -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.utils import NotIterable if TYPE_CHECKING: @@ -41,7 +41,7 @@ class SingleColumnFrame(Frame, NotIterable): "index": 0, } - @_cudf_nvtx_annotate + @_performance_tracking def _reduce( self, op, @@ -62,7 +62,7 @@ def _reduce( except AttributeError: raise TypeError(f"cannot perform {op} with type {self.dtype}") - @_cudf_nvtx_annotate + @_performance_tracking def _scan(self, op, axis=None, *args, **kwargs): if axis not in (None, 0): raise NotImplementedError("axis parameter is not implemented yet") @@ -70,24 +70,24 @@ def _scan(self, op, axis=None, *args, **kwargs): return super()._scan(op, axis=axis, *args, **kwargs) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def name(self): """Get the name of this object.""" return next(iter(self._column_names)) @name.setter # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def name(self, value): self._data[value] = self._data.pop(self.name) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def ndim(self) -> int: # noqa: D401 """Number of dimensions of the underlying data, by definition 1.""" return 1 @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def shape(self) -> tuple[int]: """Get a tuple representing the dimensionality of the Index.""" return (len(self),) @@ -99,27 +99,27 @@ def __bool__(self): ) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def _num_columns(self) -> int: return 1 @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def _column(self) -> ColumnBase: return next(iter(self._columns)) @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def values(self) -> cupy.ndarray: # noqa: D102 return self._column.values @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def values_host(self) -> numpy.ndarray: # noqa: D102 return self._column.values_host @classmethod - @_cudf_nvtx_annotate + @_performance_tracking def from_arrow(cls, array) -> Self: """Create from PyArrow Array/ChunkedArray. @@ -150,7 +150,7 @@ def from_arrow(cls, array) -> Self: """ return cls(ColumnBase.from_arrow(array)) - @_cudf_nvtx_annotate + @_performance_tracking def to_arrow(self) -> pa.Array: """ Convert to a PyArrow Array. @@ -182,7 +182,7 @@ def to_arrow(self) -> pa.Array: return self._column.to_arrow() @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_unique(self) -> bool: """Return boolean if values in the object are unique. @@ -193,7 +193,7 @@ def is_unique(self) -> bool: return self._column.is_unique @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_monotonic_increasing(self) -> bool: """Return boolean if values in the object are monotonically increasing. @@ -204,7 +204,7 @@ def is_monotonic_increasing(self) -> bool: return self._column.is_monotonic_increasing @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def is_monotonic_decreasing(self) -> bool: """Return boolean if values in the object are monotonically decreasing. @@ -215,7 +215,7 @@ def is_monotonic_decreasing(self) -> bool: return self._column.is_monotonic_decreasing @property # type: ignore - @_cudf_nvtx_annotate + @_performance_tracking def __cuda_array_interface__(self): # While the parent column class has a `__cuda_array_interface__` method # defined, it is not implemented for all column types. When it is not @@ -229,7 +229,7 @@ def __cuda_array_interface__(self): "'__cuda_array_interface__'" ) - @_cudf_nvtx_annotate + @_performance_tracking def factorize( self, sort: bool = False, use_na_sentinel: bool = True ) -> tuple[cupy.ndarray, cudf.Index]: @@ -268,7 +268,7 @@ def factorize( use_na_sentinel=use_na_sentinel, ) - @_cudf_nvtx_annotate + @_performance_tracking def _make_operands_for_binop( self, other: Any, @@ -323,7 +323,7 @@ def _make_operands_for_binop( return {result_name: (self._column, other, reflect, fill_value)} - @_cudf_nvtx_annotate + @_performance_tracking def nunique(self, dropna: bool = True) -> int: """ Return count of unique values for the column. @@ -369,7 +369,7 @@ def _get_elements_from_column(self, arg) -> ScalarLike | ColumnBase: return self._column.apply_boolean_mask(arg) raise NotImplementedError(f"Unknown indexer {type(arg)}") - @_cudf_nvtx_annotate + @_performance_tracking def where(self, cond, other=None, inplace=False): from cudf.core._internals.where import ( _check_and_cast_columns_with_other, diff --git a/python/cudf/cudf/core/udf/groupby_utils.py b/python/cudf/cudf/core/udf/groupby_utils.py index 06d9296ca0f..265b87350ae 100644 --- a/python/cudf/cudf/core/udf/groupby_utils.py +++ b/python/cudf/cudf/core/udf/groupby_utils.py @@ -30,7 +30,7 @@ _supported_dtypes_from_frame, ) from cudf.utils._numba import _CUDFNumbaConfig -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking def _get_frame_groupby_type(dtype, index_dtype): @@ -126,7 +126,7 @@ def _get_groupby_apply_kernel(frame, func, args): return kernel, return_type -@_cudf_nvtx_annotate +@_performance_tracking def jit_groupby_apply(offsets, grouped_values, function, *args): """ Main entrypoint for JIT Groupby.apply via Numba. diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py index f1704e4ea78..d616761cb3b 100644 --- a/python/cudf/cudf/core/udf/utils.py +++ b/python/cudf/cudf/core/udf/utils.py @@ -38,7 +38,7 @@ STRING_TYPES, TIMEDELTA_TYPES, ) -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.utils import initfunc # Maximum size of a string column is 2 GiB @@ -71,7 +71,7 @@ def _ptx_file(): ) -@_cudf_nvtx_annotate +@_performance_tracking def _get_udf_return_type(argty, func: Callable, args=()): """ Get the return type of a masked UDF for a given set of argument dtypes. It @@ -236,7 +236,7 @@ def _generate_cache_key(frame, func: Callable, args, suffix="__APPLY_UDF"): ) -@_cudf_nvtx_annotate +@_performance_tracking def _compile_or_get( frame, func, args, kernel_getter=None, suffix="__APPLY_UDF" ): diff --git a/python/cudf/cudf/io/csv.py b/python/cudf/cudf/io/csv.py index f07764e2ce4..e909d96309e 100644 --- a/python/cudf/cudf/io/csv.py +++ b/python/cudf/cudf/io/csv.py @@ -12,10 +12,10 @@ from cudf.api.types import is_scalar from cudf.utils import ioutils from cudf.utils.dtypes import _maybe_convert_to_default_type -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking -@_cudf_nvtx_annotate +@_performance_tracking @ioutils.doc_read_csv() def read_csv( filepath_or_buffer, @@ -151,7 +151,7 @@ def read_csv( return df -@_cudf_nvtx_annotate +@_performance_tracking @ioutils.doc_to_csv() def to_csv( df, diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py index 2a838ca7417..7733e770d99 100644 --- a/python/cudf/cudf/io/parquet.py +++ b/python/cudf/cudf/io/parquet.py @@ -22,7 +22,7 @@ from cudf.api.types import is_list_like from cudf.core.column import as_column, build_categorical_column, column_empty from cudf.utils import ioutils -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking BYTE_SIZES = { "kb": 1000, @@ -50,7 +50,7 @@ } -@_cudf_nvtx_annotate +@_performance_tracking def _write_parquet( df, paths, @@ -130,7 +130,7 @@ def _write_parquet( # Logic chosen to match: https://arrow.apache.org/ # docs/_modules/pyarrow/parquet.html#write_to_dataset -@_cudf_nvtx_annotate +@_performance_tracking def write_to_dataset( df, root_path, @@ -318,7 +318,7 @@ def write_to_dataset( @ioutils.doc_read_parquet_metadata() -@_cudf_nvtx_annotate +@_performance_tracking def read_parquet_metadata(filepath_or_buffer): """{docstring}""" # Multiple sources are passed as a list. If a single source is passed, @@ -360,7 +360,7 @@ def read_parquet_metadata(filepath_or_buffer): return libparquet.read_parquet_metadata(filepaths_or_buffers) -@_cudf_nvtx_annotate +@_performance_tracking def _process_dataset( paths, fs, @@ -515,7 +515,7 @@ def _process_dataset( @ioutils.doc_read_parquet() -@_cudf_nvtx_annotate +@_performance_tracking def read_parquet( filepath_or_buffer, engine="cudf", @@ -785,7 +785,7 @@ def _handle_is(column: cudf.Series, value, *, negate) -> cudf.Series: return df -@_cudf_nvtx_annotate +@_performance_tracking def _parquet_to_frame( paths_or_buffers, *args, @@ -885,7 +885,7 @@ def _parquet_to_frame( return dfs[0] -@_cudf_nvtx_annotate +@_performance_tracking def _read_parquet( filepaths_or_buffers, engine, @@ -941,7 +941,7 @@ def _read_parquet( @ioutils.doc_to_parquet() -@_cudf_nvtx_annotate +@_performance_tracking def to_parquet( df, path, @@ -1107,7 +1107,7 @@ def _get_estimated_file_size(df): return file_size -@_cudf_nvtx_annotate +@_performance_tracking def _get_partitioned( df, root_path, @@ -1145,7 +1145,7 @@ def _get_partitioned( return full_paths, metadata_file_paths, grouped_df, part_offsets, filename -@_cudf_nvtx_annotate +@_performance_tracking def _get_groups_and_offsets( df, partition_cols, @@ -1305,7 +1305,7 @@ class ParquetDatasetWriter: """ - @_cudf_nvtx_annotate + @_performance_tracking def __init__( self, path, @@ -1355,7 +1355,7 @@ def __init__( self._file_sizes: dict[str, int] = {} - @_cudf_nvtx_annotate + @_performance_tracking def write_table(self, df): """ Write a dataframe to the file/dataset @@ -1486,7 +1486,7 @@ def write_table(self, df): self.path_cw_map.update({k: new_cw_idx for k in new_paths}) self._chunked_writers[-1][0].write_table(grouped_df, part_info) - @_cudf_nvtx_annotate + @_performance_tracking def close(self, return_metadata=False): """ Close all open files and optionally return footer metadata as a binary diff --git a/python/cudf/cudf/io/text.py b/python/cudf/cudf/io/text.py index 0e19972f6e0..4329480bb2c 100644 --- a/python/cudf/cudf/io/text.py +++ b/python/cudf/cudf/io/text.py @@ -1,14 +1,14 @@ -# Copyright (c) 2018-2023, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. from io import BytesIO, StringIO import cudf from cudf._lib import text as libtext from cudf.utils import ioutils -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.performance_tracking import _performance_tracking -@_cudf_nvtx_annotate +@_performance_tracking @ioutils.doc_read_text() def read_text( filepath_or_buffer, diff --git a/python/cudf/cudf/options.py b/python/cudf/cudf/options.py index fb5a963f008..1f539e7f266 100644 --- a/python/cudf/cudf/options.py +++ b/python/cudf/cudf/options.py @@ -311,6 +311,20 @@ def _integer_and_none_validator(val): _make_contains_validator([False, True]), ) +_register_option( + "memory_profiling", + _env_get_bool("CUDF_MEMORY_PROFILING", False), + textwrap.dedent( + """ + If set to `False`, disables memory profiling. + If set to `True`, enables memory profiling. + Read more at: :ref:`memory-profiling-user-doc` + \tValid values are True or False. Default is False. + """ + ), + _make_contains_validator([False, True]), +) + class option_context(ContextDecorator): """ diff --git a/python/cudf/cudf/pandas/_wrappers/numpy.py b/python/cudf/cudf/pandas/_wrappers/numpy.py index c445be46f58..3b012169676 100644 --- a/python/cudf/cudf/pandas/_wrappers/numpy.py +++ b/python/cudf/cudf/pandas/_wrappers/numpy.py @@ -129,6 +129,19 @@ def wrap_ndarray(cls, arr: cupy.ndarray | numpy.ndarray, constructor): }, ) + +flatiter = make_final_proxy_type( + "flatiter", + cupy.flatiter, + numpy.flatiter, + fast_to_slow=lambda fast: cupy.asnumpy(fast.base).flat, + slow_to_fast=lambda slow: cupy.asarray(slow).flat, + additional_attributes={ + "__array__": array_method, + }, +) + + # Mapping flags between slow and fast types _ndarray_flags = make_intermediate_proxy_type( "_ndarray_flags", diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py index 0ba432d6d0e..a64bf7772fe 100644 --- a/python/cudf/cudf/pandas/_wrappers/pandas.py +++ b/python/cudf/cudf/pandas/_wrappers/pandas.py @@ -522,6 +522,22 @@ def Index__new__(cls, *args, **kwargs): }, ) +ArrowStringArrayNumpySemantics = make_final_proxy_type( + "ArrowStringArrayNumpySemantics", + _Unusable, + pd.core.arrays.string_arrow.ArrowStringArrayNumpySemantics, + fast_to_slow=_Unusable(), + slow_to_fast=_Unusable(), +) + +ArrowStringArray = make_final_proxy_type( + "ArrowStringArray", + _Unusable, + pd.core.arrays.string_arrow.ArrowStringArray, + fast_to_slow=_Unusable(), + slow_to_fast=_Unusable(), +) + StringDtype = make_final_proxy_type( "StringDtype", _Unusable, diff --git a/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh b/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh index cd9f90d50fe..a66f63c09b3 100755 --- a/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh +++ b/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh @@ -133,7 +133,8 @@ and not test_s3_roundtrip" TEST_THAT_CRASH_PYTEST_WORKERS="not test_bitmasks_pyarrow \ and not test_large_string_pyarrow \ and not test_interchange_from_corrected_buffer_dtypes \ -and not test_eof_states" +and not test_eof_states \ +and not test_array_tz" # TODO: Remove "not db" once a postgres & mysql container is set up on the CI PANDAS_CI="1" timeout 30m python -m pytest -p cudf.pandas \ diff --git a/python/cudf/cudf/pylibcudf_tests/common/utils.py b/python/cudf/cudf/pylibcudf_tests/common/utils.py index 58d5548bd90..26e716981fc 100644 --- a/python/cudf/cudf/pylibcudf_tests/common/utils.py +++ b/python/cudf/cudf/pylibcudf_tests/common/utils.py @@ -1,6 +1,9 @@ # Copyright (c) 2024, NVIDIA CORPORATION. from __future__ import annotations +import io +import os + import pyarrow as pa import pytest @@ -108,6 +111,8 @@ def _make_fields_nullable(typ): print(lhs) print(rhs) + print(lhs.type) + print(rhs.type) assert lhs.equals(rhs) @@ -171,21 +176,16 @@ def is_signed_integer(plc_dtype: plc.DataType): ) -def is_unsigned_integer(plc_dtype: plc.DataType): - return plc_dtype.id() in ( - plc.TypeId.UINT8, - plc.TypeId.UINT16, - plc.TypeId.UINT32, - plc.TypeId.UINT64, - ) - - def is_integer(plc_dtype: plc.DataType): return plc_dtype.id() in ( plc.TypeId.INT8, plc.TypeId.INT16, plc.TypeId.INT32, plc.TypeId.INT64, + plc.TypeId.UINT8, + plc.TypeId.UINT16, + plc.TypeId.UINT32, + plc.TypeId.UINT64, ) @@ -212,28 +212,45 @@ def is_fixed_width(plc_dtype: plc.DataType): ) -def nesting(typ) -> tuple[int, int]: +def nesting_level(typ) -> tuple[int, int]: """Return list and struct nesting of a pyarrow type.""" if isinstance(typ, pa.ListType): - list_, struct = nesting(typ.value_type) + list_, struct = nesting_level(typ.value_type) return list_ + 1, struct elif isinstance(typ, pa.StructType): - lists, structs = map(max, zip(*(nesting(t.type) for t in typ))) + lists, structs = map(max, zip(*(nesting_level(t.type) for t in typ))) return lists, structs + 1 else: return 0, 0 def is_nested_struct(typ): - return nesting(typ)[1] > 1 + return nesting_level(typ)[1] > 1 def is_nested_list(typ): - return nesting(typ)[0] > 1 + return nesting_level(typ)[0] > 1 + + +def sink_to_str(sink): + """ + Takes a sink (e.g. StringIO/BytesIO, filepath, etc.) + and reads in the contents into a string (str not bytes) + for comparison + """ + if isinstance(sink, (str, os.PathLike)): + with open(sink, "r") as f: + str_result = f.read() + elif isinstance(sink, io.BytesIO): + sink.seek(0) + str_result = sink.read().decode() + else: + sink.seek(0) + str_result = sink.read() + return str_result -# TODO: enable uint64, some failing tests -NUMERIC_PA_TYPES = [pa.int64(), pa.float64()] # pa.uint64()] +NUMERIC_PA_TYPES = [pa.int64(), pa.float64(), pa.uint64()] STRING_PA_TYPES = [pa.string()] BOOL_PA_TYPES = [pa.bool_()] LIST_PA_TYPES = [ @@ -271,11 +288,6 @@ def is_nested_list(typ): + DEFAULT_PA_STRUCT_TESTING_TYPES ) -ALL_PA_TYPES = ( - DEFAULT_PA_TYPES + LIST_PA_TYPES[1:] + DEFAULT_PA_STRUCT_TESTING_TYPES[1:] -) - - # Map pylibcudf compression types to pandas ones # Not all compression types map cleanly, read the comments to learn more! # If a compression type is unsupported, it maps to False. @@ -298,3 +310,4 @@ def is_nested_list(typ): CompressionType.SNAPPY: "snappy", CompressionType.BROTLI: "brotli", } +ALL_PA_TYPES = DEFAULT_PA_TYPES diff --git a/python/cudf/cudf/pylibcudf_tests/conftest.py b/python/cudf/cudf/pylibcudf_tests/conftest.py index ed457f3d671..39832eb4bba 100644 --- a/python/cudf/cudf/pylibcudf_tests/conftest.py +++ b/python/cudf/cudf/pylibcudf_tests/conftest.py @@ -37,6 +37,8 @@ def numeric_pa_type(request): return request.param +# TODO: Consider adding another fixture/adapting this +# fixture to consider nullability @pytest.fixture(scope="session", params=[0, 100]) def table_data(request): """ @@ -50,64 +52,47 @@ def table_data(request): """ nrows = request.param - table_dict = dict() + table_dict = {} # Colnames in the format expected by # plc.io.TableWithMetadata colnames = [] + np.random.seed(42) + for typ in ALL_PA_TYPES: rand_vals = np.random.randint(0, nrows, nrows) child_colnames = [] - if isinstance(typ, pa.ListType): - - def _generate_list_data(typ): - child_colnames = [] - if isinstance(typ, pa.ListType): - # recurse to get vals - rand_arrs, grandchild_colnames = _generate_list_data( - typ.value_type - ) - pa_array = pa.array( - [list(row_vals) for row_vals in zip(rand_arrs)], - type=typ, - ) - child_colnames.append(("", grandchild_colnames)) - else: - # typ is scalar type - pa_array = pa.array(rand_vals).cast(typ) - child_colnames.append(("", [])) - return pa_array, child_colnames - - rand_arr, child_colnames = _generate_list_data(typ) - elif isinstance(typ, pa.StructType): - - def _generate_struct_data(typ): - child_colnames = [] - if isinstance(typ, pa.StructType): - # recurse to get vals - rand_arrs = [] - for i in range(typ.num_fields): - rand_arr, grandchild_colnames = _generate_struct_data( - typ.field(i).type - ) - rand_arrs.append(rand_arr) - child_colnames.append( - (typ.field(i).name, grandchild_colnames) - ) - - pa_array = pa.StructArray.from_arrays( - [rand_arr for rand_arr in rand_arrs], - names=[ - typ.field(i).name for i in range(typ.num_fields) - ], - ) - else: - # typ is scalar type - pa_array = pa.array(rand_vals).cast(typ) - return pa_array, child_colnames - - rand_arr, child_colnames = _generate_struct_data(typ) + def _generate_nested_data(typ): + child_colnames = [] + + # recurse to get vals for children + rand_arrs = [] + for i in range(typ.num_fields): + rand_arr, grandchild_colnames = _generate_nested_data( + typ.field(i).type + ) + rand_arrs.append(rand_arr) + child_colnames.append((typ.field(i).name, grandchild_colnames)) + + if isinstance(typ, pa.StructType): + pa_array = pa.StructArray.from_arrays( + [rand_arr for rand_arr in rand_arrs], + names=[typ.field(i).name for i in range(typ.num_fields)], + ) + elif isinstance(typ, pa.ListType): + pa_array = pa.array( + [list(row_vals) for row_vals in zip(rand_arrs[0])], + type=typ, + ) + child_colnames.append(("", grandchild_colnames)) + else: + # typ is scalar type + pa_array = pa.array(rand_vals).cast(typ) + return pa_array, child_colnames + + if isinstance(typ, (pa.ListType, pa.StructType)): + rand_arr, child_colnames = _generate_nested_data(typ) else: rand_arr = pa.array(rand_vals).cast(typ) @@ -122,21 +107,18 @@ def _generate_struct_data(typ): @pytest.fixture( - params=["a.txt", pathlib.Path("a.txt"), io.BytesIO(), io.StringIO()], + params=["a.txt", pathlib.Path("a.txt"), io.BytesIO, io.StringIO], ) def source_or_sink(request, tmp_path): fp_or_buf = request.param if isinstance(fp_or_buf, str): - fp_or_buf = f"{tmp_path}/{fp_or_buf}" + return f"{tmp_path}/{fp_or_buf}" elif isinstance(fp_or_buf, os.PathLike): - fp_or_buf = tmp_path.joinpath(fp_or_buf) - - yield fp_or_buf - # Cleanup after ourselves - # since the BytesIO and StringIO objects get cached by pytest - if isinstance(fp_or_buf, io.IOBase): - fp_or_buf.seek(0) - fp_or_buf.truncate(0) + return tmp_path.joinpath(fp_or_buf) + elif issubclass(fp_or_buf, io.IOBase): + # Must construct io.StringIO/io.BytesIO inside + # fixture, or we'll end up re-using it + return fp_or_buf() @pytest.fixture(params=[opt for opt in plc.io.types.CompressionType]) diff --git a/python/cudf/cudf/pylibcudf_tests/test_avro.py b/python/cudf/cudf/pylibcudf_tests/io/test_avro.py similarity index 100% rename from python/cudf/cudf/pylibcudf_tests/test_avro.py rename to python/cudf/cudf/pylibcudf_tests/io/test_avro.py diff --git a/python/cudf/cudf/pylibcudf_tests/test_json.py b/python/cudf/cudf/pylibcudf_tests/io/test_json.py similarity index 67% rename from python/cudf/cudf/pylibcudf_tests/test_json.py rename to python/cudf/cudf/pylibcudf_tests/io/test_json.py index fdcf8a5a490..ccc4551a6e4 100644 --- a/python/cudf/cudf/pylibcudf_tests/test_json.py +++ b/python/cudf/cudf/pylibcudf_tests/io/test_json.py @@ -4,7 +4,11 @@ import pandas as pd import pyarrow as pa import pytest -from utils import COMPRESSION_TYPE_TO_PANDAS, assert_table_and_meta_eq +from utils import ( + COMPRESSION_TYPE_TO_PANDAS, + assert_table_and_meta_eq, + sink_to_str, +) import cudf._lib.pylibcudf as plc from cudf._lib.pylibcudf.io.types import CompressionType @@ -37,6 +41,114 @@ def write_json_bytes(source, json_bytes): source.seek(0) +@pytest.mark.parametrize("rows_per_chunk", [8, 100]) +@pytest.mark.parametrize("lines", [True, False]) +def test_write_json_basic(table_data, source_or_sink, lines, rows_per_chunk): + plc_table_w_meta, pa_table = table_data + sink = source_or_sink + + plc.io.json.write_json( + plc.io.SinkInfo([sink]), + plc_table_w_meta, + lines=lines, + rows_per_chunk=rows_per_chunk, + ) + + exp = pa_table.to_pandas() + + # Convert everything to string to make + # comparisons easier + str_result = sink_to_str(sink) + + pd_result = exp.to_json(orient="records", lines=lines) + + assert str_result == pd_result + + +@pytest.mark.parametrize("include_nulls", [True, False]) +@pytest.mark.parametrize("na_rep", ["null", "awef", ""]) +def test_write_json_nulls(na_rep, include_nulls): + names = ["a", "b"] + pa_tbl = pa.Table.from_arrays( + [pa.array([1.0, 2.0, None]), pa.array([True, None, False])], + names=names, + ) + plc_tbl = plc.interop.from_arrow(pa_tbl) + plc_tbl_w_meta = plc.io.types.TableWithMetadata( + plc_tbl, column_names=[(name, []) for name in names] + ) + + sink = io.StringIO() + + plc.io.json.write_json( + plc.io.SinkInfo([sink]), + plc_tbl_w_meta, + na_rep=na_rep, + include_nulls=include_nulls, + ) + + exp = pa_tbl.to_pandas() + + # Convert everything to string to make + # comparisons easier + str_result = sink_to_str(sink) + pd_result = exp.to_json(orient="records") + + if not include_nulls: + # No equivalent in pandas, so we just + # sanity check by making sure na_rep + # doesn't appear in the output + + # don't quote null + for name in names: + assert f'{{"{name}":{na_rep}}}' not in str_result + return + + # pandas doesn't suppport na_rep + # let's just manually do str.replace + pd_result = pd_result.replace("null", na_rep) + + assert str_result == pd_result + + +@pytest.mark.parametrize("true_value", ["True", "correct"]) +@pytest.mark.parametrize("false_value", ["False", "wrong"]) +def test_write_json_bool_opts(true_value, false_value): + names = ["a"] + pa_tbl = pa.Table.from_arrays([pa.array([True, None, False])], names=names) + plc_tbl = plc.interop.from_arrow(pa_tbl) + plc_tbl_w_meta = plc.io.types.TableWithMetadata( + plc_tbl, column_names=[(name, []) for name in names] + ) + + sink = io.StringIO() + + plc.io.json.write_json( + plc.io.SinkInfo([sink]), + plc_tbl_w_meta, + include_nulls=True, + na_rep="null", + true_value=true_value, + false_value=false_value, + ) + + exp = pa_tbl.to_pandas() + + # Convert everything to string to make + # comparisons easier + str_result = sink_to_str(sink) + pd_result = exp.to_json(orient="records") + + # pandas doesn't suppport na_rep + # let's just manually do str.replace + if true_value != "true": + pd_result = pd_result.replace("true", true_value) + if false_value != "false": + pd_result = pd_result.replace("false", false_value) + + assert str_result == pd_result + + @pytest.mark.parametrize("lines", [True, False]) def test_read_json_basic( table_data, source_or_sink, lines, compression_type, request @@ -85,11 +197,28 @@ def test_read_json_basic( lines=lines, ) - # orient=records is lossy - # and doesn't preserve column names when there's zero rows in the table + # Adjustments to correct for the fact orient=records is lossy + # and doesn't + # 1) preserve colnames when zero rows in table + # 2) preserve struct nullability + # 3) differentiate int64/uint64 if len(pa_table) == 0: pa_table = pa.table([]) + new_fields = [] + for i in range(len(pa_table.schema)): + curr_field = pa_table.schema.field(i) + if curr_field.type == pa.uint64(): + try: + curr_field = curr_field.with_type(pa.int64()) + except OverflowError: + # There will be no confusion, values are too large + # for int64 anyways + pass + new_fields.append(curr_field) + + pa_table = pa_table.cast(pa.schema(new_fields)) + # Convert non-nullable struct fields to nullable fields # since nullable=False cannot roundtrip through orient='records' # JSON format diff --git a/python/cudf/cudf/pylibcudf_tests/test_source_info.py b/python/cudf/cudf/pylibcudf_tests/io/test_source_sink_info.py similarity index 72% rename from python/cudf/cudf/pylibcudf_tests/test_source_info.py rename to python/cudf/cudf/pylibcudf_tests/io/test_source_sink_info.py index 019321b7259..287dd8f21c8 100644 --- a/python/cudf/cudf/pylibcudf_tests/test_source_info.py +++ b/python/cudf/cudf/pylibcudf_tests/io/test_source_sink_info.py @@ -9,6 +9,21 @@ from cudf._lib.pylibcudf.io.datasource import NativeFileDatasource +@pytest.fixture(params=[plc.io.SourceInfo, plc.io.SinkInfo]) +def io_class(request): + return request.param + + +def _skip_invalid_sinks(io_class, sink): + """ + Skip invalid sinks for SinkInfo + """ + if io_class is plc.io.SinkInfo and isinstance( + sink, (bytes, NativeFileDatasource) + ): + pytest.skip(f"{sink} is not a valid input for SinkInfo") + + @pytest.mark.parametrize( "source", [ @@ -18,16 +33,15 @@ NativeFileDatasource(pa.PythonFile(io.BytesIO(), mode="r")), ], ) -def test_source_info_ctor(source, tmp_path): +def test_source_info_ctor(io_class, source, tmp_path): if isinstance(source, str): file = tmp_path / source file.write_bytes("hello world".encode("utf-8")) source = str(file) - plc.io.SourceInfo([source]) + _skip_invalid_sinks(io_class, source) - # TODO: test contents of source_info buffer is correct - # once buffers are exposed on python side + io_class([source]) @pytest.mark.parametrize( @@ -42,7 +56,7 @@ def test_source_info_ctor(source, tmp_path): ], ], ) -def test_source_info_ctor_multiple(sources, tmp_path): +def test_source_info_ctor_multiple(io_class, sources, tmp_path): for i in range(len(sources)): source = sources[i] if isinstance(source, str): @@ -50,10 +64,9 @@ def test_source_info_ctor_multiple(sources, tmp_path): file.write_bytes("hello world".encode("utf-8")) sources[i] = str(file) - plc.io.SourceInfo(sources) + _skip_invalid_sinks(io_class, source) - # TODO: test contents of source_info buffer is correct - # once buffers are exposed on python side + io_class(sources) @pytest.mark.parametrize( @@ -73,7 +86,7 @@ def test_source_info_ctor_multiple(sources, tmp_path): ], ], ) -def test_source_info_ctor_mixing_invalid(sources, tmp_path): +def test_source_info_ctor_mixing_invalid(io_class, sources, tmp_path): # Unlike the previous test # don't create files so that they are missing for i in range(len(sources)): @@ -82,8 +95,9 @@ def test_source_info_ctor_mixing_invalid(sources, tmp_path): file = tmp_path / source file.write_bytes("hello world".encode("utf-8")) sources[i] = str(file) + _skip_invalid_sinks(io_class, source) with pytest.raises(ValueError): - plc.io.SourceInfo(sources) + io_class(sources) def test_source_info_invalid(): diff --git a/python/cudf/cudf/pylibcudf_tests/test_lists.py b/python/cudf/cudf/pylibcudf_tests/test_lists.py index b21af8ea11c..c781126e388 100644 --- a/python/cudf/cudf/pylibcudf_tests/test_lists.py +++ b/python/cudf/cudf/pylibcudf_tests/test_lists.py @@ -7,15 +7,28 @@ from cudf._lib import pylibcudf as plc -def test_concatenate_rows(): - test_data = [[[0, 1], [2], [5], [6, 7]], [[8], [9], [], [13, 14, 15]]] +@pytest.fixture +def test_data(): + return [[[[0, 1], [2], [5], [6, 7]], [[8], [9], [], [13, 14, 15]]]] - arrow_tbl = pa.Table.from_arrays(test_data, names=["a", "b"]) + +@pytest.fixture +def scalar(): + return pa.scalar(1) + + +@pytest.fixture +def column(): + return pa.array([3, 2, 5, 6]), pa.array([-1, 0, 0, 0], type=pa.int32()) + + +def test_concatenate_rows(test_data): + arrow_tbl = pa.Table.from_arrays(test_data[0], names=["a", "b"]) plc_tbl = plc.interop.from_arrow(arrow_tbl) res = plc.lists.concatenate_rows(plc_tbl) - expect = pa.array([pair[0] + pair[1] for pair in zip(*test_data)]) + expect = pa.array([pair[0] + pair[1] for pair in zip(*test_data[0])]) assert_column_eq(expect, res) @@ -44,3 +57,80 @@ def test_concatenate_list_elements(test_data, dropna, expected): expect = pa.array(expected) assert_column_eq(expect, res) + + +def test_contains_scalar(test_data, scalar): + list_column = test_data[0][0] + arr = pa.array(list_column) + + plc_column = plc.interop.from_arrow(arr) + plc_scalar = plc.interop.from_arrow(scalar) + res = plc.lists.contains(plc_column, plc_scalar) + + expect = pa.array([True, False, False, False]) + + assert_column_eq(expect, res) + + +def test_contains_list_column(test_data): + list_column1 = test_data[0][0] + list_column2 = [1, 3, 5, 1] + arr1 = pa.array(list_column1) + arr2 = pa.array(list_column2) + + plc_column1 = plc.interop.from_arrow(arr1) + plc_column2 = plc.interop.from_arrow(arr2) + res = plc.lists.contains(plc_column1, plc_column2) + + expect = pa.array([True, False, True, False]) + + assert_column_eq(expect, res) + + +@pytest.mark.parametrize( + "list_column, expected", + [ + ( + [[1, None], [1, 3, 4], [5, None]], + [True, False, True], + ), + ( + [[1, None], None, [5]], + [True, None, False], + ), + ], +) +def test_contains_nulls(list_column, expected): + arr = pa.array(list_column) + plc_column = plc.interop.from_arrow(arr) + res = plc.lists.contains_nulls(plc_column) + + expect = pa.array(expected) + + assert_column_eq(expect, res) + + +def test_index_of_scalar(test_data, scalar): + list_column = test_data[0][0] + arr = pa.array(list_column) + + plc_column = plc.interop.from_arrow(arr) + plc_scalar = plc.interop.from_arrow(scalar) + res = plc.lists.index_of(plc_column, plc_scalar, True) + + expect = pa.array([1, -1, -1, -1], type=pa.int32()) + + assert_column_eq(expect, res) + + +def test_index_of_list_column(test_data, column): + list_column = test_data[0][0] + arr1 = pa.array(list_column) + arr2, expect = column + plc_column1 = plc.interop.from_arrow(arr1) + plc_column2 = plc.interop.from_arrow(arr2) + res = plc.lists.index_of(plc_column1, plc_column2, True) + + expect = pa.array(column[1], type=pa.int32()) + + assert_column_eq(expect, res) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index fc7fd87d4c5..f40106a30f4 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -11078,3 +11078,27 @@ def test_dataframe_loc_int_float(dtype1, dtype2): expected = pdf.loc[pidx] assert_eq(actual, expected, check_index_type=True, check_dtype=True) + + +@pytest.mark.parametrize( + "data", + [ + cudf.DataFrame(range(2)), + None, + [cudf.Series(range(2))], + [[0], [1]], + {1: range(2)}, + cupy.arange(2), + ], +) +def test_init_with_index_no_shallow_copy(data): + idx = cudf.RangeIndex(2) + df = cudf.DataFrame(data, index=idx) + assert df.index is idx + + +def test_from_records_with_index_no_shallow_copy(): + idx = cudf.RangeIndex(2) + data = np.array([(1.0, 2), (3.0, 4)], dtype=[("x", " 0 + + out = StringIO() + print_memory_report(file=out) + assert "DataFrame.merge" in out.getvalue() diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py index 8f65bd26bd1..193d64a9e7f 100644 --- a/python/cudf/cudf/tests/test_repr.py +++ b/python/cudf/cudf/tests/test_repr.py @@ -1210,7 +1210,7 @@ def test_multiindex_repr(pmi, max_seq_items): .index, textwrap.dedent( """ - MultiIndex([('abc', 'NaT', 0.345), + MultiIndex([('abc', NaT, 0.345), ( , '0 days 00:00:00.000000001', ), ('xyz', '0 days 00:00:00.000000002', 100.0), ( , '0 days 00:00:00.000000003', 10.0)], @@ -1252,10 +1252,10 @@ def test_multiindex_repr(pmi, max_seq_items): .index, textwrap.dedent( """ - MultiIndex([('NaT', ), - ('NaT', ), - ('NaT', ), - ('NaT', )], + MultiIndex([(NaT, ), + (NaT, ), + (NaT, ), + (NaT, )], names=['b', 'a']) """ ), diff --git a/python/cudf/cudf/utils/nvtx_annotation.py b/python/cudf/cudf/utils/nvtx_annotation.py deleted file mode 100644 index a4404e51232..00000000000 --- a/python/cudf/cudf/utils/nvtx_annotation.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. - -import hashlib -from functools import partial - -from nvtx import annotate - -_NVTX_COLORS = ["green", "blue", "purple", "rapids"] - - -def _get_color_for_nvtx(name): - m = hashlib.sha256() - m.update(name.encode()) - hash_value = int(m.hexdigest(), 16) - idx = hash_value % len(_NVTX_COLORS) - return _NVTX_COLORS[idx] - - -def _cudf_nvtx_annotate(func, domain="cudf_python"): - """Decorator for applying nvtx annotations to methods in cudf.""" - return annotate( - message=func.__qualname__, - color=_get_color_for_nvtx(func.__qualname__), - domain=domain, - )(func) - - -_dask_cudf_nvtx_annotate = partial( - _cudf_nvtx_annotate, domain="dask_cudf_python" -) diff --git a/python/cudf/cudf/utils/performance_tracking.py b/python/cudf/cudf/utils/performance_tracking.py new file mode 100644 index 00000000000..30c891d0d5a --- /dev/null +++ b/python/cudf/cudf/utils/performance_tracking.py @@ -0,0 +1,82 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from __future__ import annotations + +import contextlib +import functools +import hashlib +import sys + +import nvtx + +import rmm.statistics + +from cudf.options import get_option + +_NVTX_COLORS = ["green", "blue", "purple", "rapids"] + + +def _get_color_for_nvtx(name): + m = hashlib.sha256() + m.update(name.encode()) + hash_value = int(m.hexdigest(), 16) + idx = hash_value % len(_NVTX_COLORS) + return _NVTX_COLORS[idx] + + +def _performance_tracking(func, domain="cudf_python"): + """Decorator for applying performance tracking (if enabled).""" + + @functools.wraps(func) + def wrapper(*args, **kwargs): + with contextlib.ExitStack() as stack: + if get_option("memory_profiling"): + # NB: the user still needs to call `rmm.statistics.enable_statistics()` + # to enable memory profiling. + stack.enter_context( + rmm.statistics.profiler( + name=rmm.statistics._get_descriptive_name_of_object( + func + ) + ) + ) + if nvtx.enabled(): + stack.enter_context( + nvtx.annotate( + message=func.__qualname__, + color=_get_color_for_nvtx(func.__qualname__), + domain=domain, + ) + ) + return func(*args, **kwargs) + + return wrapper + + +_dask_cudf_performance_tracking = functools.partial( + _performance_tracking, domain="dask_cudf_python" +) + + +def get_memory_records() -> ( + dict[str, rmm.statistics.ProfilerRecords.MemoryRecord] +): + """Get the memory records from the memory profiling + + Returns + ------- + Dict that maps function names to memory records. Empty if + memory profiling is disabled + """ + return rmm.statistics.default_profiler_records.records + + +def print_memory_report(file=sys.stdout) -> None: + """Pretty print the result of the memory profiling + + Parameters + ---------- + file + The output stream + """ + print(rmm.statistics.default_profiler_records.report(), file=file) diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py index 2e4dfc4bb14..7347ec7866a 100644 --- a/python/cudf/cudf/utils/utils.py +++ b/python/cudf/cudf/utils/utils.py @@ -159,8 +159,9 @@ def _external_only_api(func, alternative=""): @functools.wraps(func) def wrapper(*args, **kwargs): # Check the immediately preceding frame to see if it's in cudf. - frame, lineno = next(traceback.walk_stack(None)) - fn = frame.f_code.co_filename + pre_frame = traceback.extract_stack(limit=2)[0] + fn = pre_frame.filename + lineno = pre_frame.lineno if _cudf_root in fn and _tests_root not in fn: raise RuntimeError( f"External-only API called in {fn} at line {lineno}. " diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index eed5037cbea..f51ce103677 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1533,3 +1533,36 @@ def test_is_proxy_object(): assert is_proxy_object(np_arr_proxy) assert is_proxy_object(s1) assert not is_proxy_object(s2) + + +def test_numpy_cupy_flatiter(series): + cp = pytest.importorskip("cupy") + + _, s = series + arr = s.values + + assert type(arr.flat._fsproxy_fast) == cp.flatiter + assert type(arr.flat._fsproxy_slow) == np.flatiter + + +def test_arrow_string_arrays(): + cu_s = xpd.Series(["a", "b", "c"]) + pd_s = pd.Series(["a", "b", "c"]) + + cu_arr = xpd.arrays.ArrowStringArray._from_sequence( + cu_s, dtype=xpd.StringDtype("pyarrow") + ) + pd_arr = pd.arrays.ArrowStringArray._from_sequence( + pd_s, dtype=pd.StringDtype("pyarrow") + ) + + tm.assert_equal(cu_arr, pd_arr) + + cu_arr = xpd.core.arrays.string_arrow.ArrowStringArray._from_sequence( + cu_s, dtype=xpd.StringDtype("pyarrow_numpy") + ) + pd_arr = pd.core.arrays.string_arrow.ArrowStringArray._from_sequence( + pd_s, dtype=pd.StringDtype("pyarrow_numpy") + ) + + tm.assert_equal(cu_arr, pd_arr) diff --git a/python/cudf/udf_cpp/strings/src/strings/udf/udf_apis.cu b/python/cudf/udf_cpp/strings/src/strings/udf/udf_apis.cu index 941e61e6787..b924995cf4b 100644 --- a/python/cudf/udf_cpp/strings/src/strings/udf/udf_apis.cu +++ b/python/cudf/udf_cpp/strings/src/strings/udf/udf_apis.cu @@ -15,10 +15,10 @@ */ #include -#include #include #include #include +#include #include #include @@ -57,7 +57,7 @@ std::unique_ptr to_string_view_array(cudf::column_view const rmm::cuda_stream_view stream) { return std::make_unique( - std::move(cudf::strings::detail::create_string_vector_from_column( + std::move(cudf::strings::create_string_vector_from_column( cudf::strings_column_view(input), stream, rmm::mr::get_current_device_resource()) .release())); } diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py index 871134665af..16cfd9b9749 100644 --- a/python/cudf_polars/cudf_polars/dsl/expr.py +++ b/python/cudf_polars/cudf_polars/dsl/expr.py @@ -27,11 +27,12 @@ import cudf._lib.pylibcudf as plc from cudf_polars.containers import Column, NamedColumn -from cudf_polars.utils import sorting +from cudf_polars.utils import dtypes, sorting if TYPE_CHECKING: from collections.abc import Mapping, Sequence + import polars.polars as plrs import polars.type_aliases as pl_types from cudf_polars.containers import DataFrame @@ -51,6 +52,7 @@ "GroupedRollingWindow", "Cast", "Agg", + "Ternary", "BinOp", ] @@ -368,6 +370,29 @@ def do_evaluate( return Column(plc.Column.from_scalar(plc.interop.from_arrow(self.value), 1)) +class LiteralColumn(Expr): + __slots__ = ("value",) + _non_child = ("dtype", "value") + value: pa.Array[Any, Any] + children: tuple[()] + + def __init__(self, dtype: plc.DataType, value: plrs.PySeries) -> None: + super().__init__(dtype) + data = value.to_arrow() + self.value = data.cast(dtypes.downcast_arrow_lists(data.type)) + + def do_evaluate( + self, + df: DataFrame, + *, + context: ExecutionContext = ExecutionContext.FRAME, + mapping: Mapping[Expr, Column] | None = None, + ) -> Column: + """Evaluate this expression given a dataframe for context.""" + # datatype of pyarrow array is correct by construction. + return Column(plc.interop.from_arrow(self.value)) + + class Col(Expr): __slots__ = ("name",) _non_child = ("dtype", "name") @@ -443,12 +468,12 @@ def __init__( ): # With ignore_nulls == False, polars uses Kleene logic raise NotImplementedError(f"Kleene logic for {self.name}") - if self.name in ( - pl_expr.BooleanFunction.IsFinite, - pl_expr.BooleanFunction.IsInfinite, - pl_expr.BooleanFunction.IsIn, + if self.name == pl_expr.BooleanFunction.IsIn and not all( + c.dtype == self.children[0].dtype for c in self.children ): - raise NotImplementedError(f"{self.name}") + # TODO: If polars IR doesn't put the casts in, we need to + # mimic the supertype promotion rules. + raise NotImplementedError("IsIn doesn't support supertype casting") @staticmethod def _distinct( @@ -506,6 +531,33 @@ def do_evaluate( mapping: Mapping[Expr, Column] | None = None, ) -> Column: """Evaluate this expression given a dataframe for context.""" + if self.name in ( + pl_expr.BooleanFunction.IsFinite, + pl_expr.BooleanFunction.IsInfinite, + ): + # Avoid evaluating the child if the dtype tells us it's unnecessary. + (child,) = self.children + is_finite = self.name == pl_expr.BooleanFunction.IsFinite + if child.dtype.id() not in (plc.TypeId.FLOAT32, plc.TypeId.FLOAT64): + value = plc.interop.from_arrow( + pa.scalar(value=is_finite, type=plc.interop.to_arrow(self.dtype)) + ) + return Column(plc.Column.from_scalar(value, df.num_rows)) + needles = child.evaluate(df, context=context, mapping=mapping) + to_search = [-float("inf"), float("inf")] + if is_finite: + # NaN is neither finite not infinite + to_search.append(float("nan")) + haystack = plc.interop.from_arrow( + pa.array( + to_search, + type=plc.interop.to_arrow(needles.obj.type()), + ) + ) + result = plc.search.contains(haystack, needles.obj) + if is_finite: + result = plc.unary.unary_operation(result, plc.unary.UnaryOperator.NOT) + return Column(result) columns = [ child.evaluate(df, context=context, mapping=mapping) for child in self.children @@ -612,31 +664,13 @@ def do_evaluate( (c.obj for c in columns), ) ) - elif self.name == pl_expr.BooleanFunction.IsBetween: - column, lo, hi = columns - (closed,) = self.options - lop, rop = self._BETWEEN_OPS[closed] - lo_obj = ( - lo.obj_scalar - if lo.is_scalar and lo.obj.size() != column.obj.size() - else lo.obj - ) - hi_obj = ( - hi.obj_scalar - if hi.is_scalar and hi.obj.size() != column.obj.size() - else hi.obj - ) + elif self.name == pl_expr.BooleanFunction.IsIn: + needles, haystack = columns + return Column(plc.search.contains(haystack.obj, needles.obj)) + elif self.name == pl_expr.BooleanFunction.Not: + (column,) = columns return Column( - plc.binaryop.binary_operation( - plc.binaryop.binary_operation( - column.obj, lo_obj, lop, output_type=self.dtype - ), - plc.binaryop.binary_operation( - column.obj, hi_obj, rop, output_type=self.dtype - ), - plc.binaryop.BinaryOperator.LOGICAL_AND, - self.dtype, - ) + plc.unary.unary_operation(column.obj, plc.unary.UnaryOperator.NOT) ) else: raise NotImplementedError( @@ -1103,6 +1137,34 @@ def do_evaluate( return self.op(child.evaluate(df, context=context, mapping=mapping)) +class Ternary(Expr): + __slots__ = ("children",) + _non_child = ("dtype",) + children: tuple[Expr, Expr, Expr] + + def __init__( + self, dtype: plc.DataType, when: Expr, then: Expr, otherwise: Expr + ) -> None: + super().__init__(dtype) + self.children = (when, then, otherwise) + + def do_evaluate( + self, + df: DataFrame, + *, + context: ExecutionContext = ExecutionContext.FRAME, + mapping: Mapping[Expr, Column] | None = None, + ) -> Column: + """Evaluate this expression given a dataframe for context.""" + when, then, otherwise = ( + child.evaluate(df, context=context, mapping=mapping) + for child in self.children + ) + then_obj = then.obj_scalar if then.is_scalar else then.obj + otherwise_obj = otherwise.obj_scalar if otherwise.is_scalar else otherwise.obj + return Column(plc.copying.copy_if_else(then_obj, otherwise_obj, when.obj)) + + class BinOp(Expr): __slots__ = ("op", "children") _non_child = ("dtype", "op") @@ -1118,6 +1180,12 @@ def __init__( super().__init__(dtype) self.op = op self.children = (left, right) + if ( + op in (plc.binaryop.BinaryOperator.ADD, plc.binaryop.BinaryOperator.SUB) + and ({left.dtype.id(), right.dtype.id()}.issubset(dtypes.TIMELIKE_TYPES)) + and not dtypes.have_compatible_resolution(left.dtype.id(), right.dtype.id()) + ): + raise NotImplementedError("Casting rules for timelike types") _MAPPING: ClassVar[dict[pl_expr.Operator, plc.binaryop.BinaryOperator]] = { pl_expr.Operator.Eq: plc.binaryop.BinaryOperator.EQUAL, diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py index 3f5f3c74050..abe26b14a90 100644 --- a/python/cudf_polars/cudf_polars/dsl/ir.py +++ b/python/cudf_polars/cudf_polars/dsl/ir.py @@ -29,7 +29,7 @@ import cudf_polars.dsl.expr as expr from cudf_polars.containers import DataFrame, NamedColumn -from cudf_polars.utils import sorting +from cudf_polars.utils import dtypes, sorting if TYPE_CHECKING: from collections.abc import MutableMapping @@ -130,6 +130,11 @@ class IR: schema: Schema """Mapping from column names to their data types.""" + def __post_init__(self): + """Validate preconditions.""" + if any(dtype.id() == plc.TypeId.EMPTY for dtype in self.schema.values()): + raise NotImplementedError("Cannot make empty columns.") + def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """ Evaluate the node and return a dataframe. @@ -292,15 +297,10 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: table = pdf.to_arrow() schema = table.schema for i, field in enumerate(schema): - # TODO: Nested types - if field.type == pa.large_string(): - # TODO: goes away when libcudf supports large strings - schema = schema.set(i, pa.field(field.name, pa.string())) - elif isinstance(field.type, pa.LargeListType): - # TODO: goes away when libcudf supports large lists - schema = schema.set( - i, pa.field(field.name, pa.list_(field.type.field(0))) - ) + schema = schema.set( + i, pa.field(field.name, dtypes.downcast_arrow_lists(field.type)) + ) + # No-op if the schema is unchanged. table = table.cast(schema) df = DataFrame.from_table( plc.interop.from_arrow(table), list(self.schema.keys()) diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py index 5d289885f47..f4bf07ae1e0 100644 --- a/python/cudf_polars/cudf_polars/dsl/translate.py +++ b/python/cudf_polars/cudf_polars/dsl/translate.py @@ -12,6 +12,7 @@ import pyarrow as pa from typing_extensions import assert_never +import polars.polars as plrs from polars.polars import _expr_nodes as pl_expr, _ir_nodes as pl_ir import cudf._lib.pylibcudf as plc @@ -342,6 +343,16 @@ def _(node: pl_expr.Function, visitor: NodeTraverser, dtype: plc.DataType) -> ex *(translate_expr(visitor, n=n) for n in node.input), ) elif isinstance(name, pl_expr.BooleanFunction): + if name == pl_expr.BooleanFunction.IsBetween: + column, lo, hi = (translate_expr(visitor, n=n) for n in node.input) + (closed,) = options + lop, rop = expr.BooleanFunction._BETWEEN_OPS[closed] + return expr.BinOp( + dtype, + plc.binaryop.BinaryOperator.LOGICAL_AND, + expr.BinOp(dtype, lop, column, lo), + expr.BinOp(dtype, rop, column, hi), + ) return expr.BooleanFunction( dtype, name, @@ -373,6 +384,8 @@ def _(node: pl_expr.Window, visitor: NodeTraverser, dtype: plc.DataType) -> expr @_translate_expr.register def _(node: pl_expr.Literal, visitor: NodeTraverser, dtype: plc.DataType) -> expr.Expr: + if isinstance(node.value, plrs.PySeries): + return expr.LiteralColumn(dtype, node.value) value = pa.scalar(node.value, type=plc.interop.to_arrow(dtype)) return expr.Literal(dtype, value) @@ -436,6 +449,16 @@ def _(node: pl_expr.Agg, visitor: NodeTraverser, dtype: plc.DataType) -> expr.Ex ) +@_translate_expr.register +def _(node: pl_expr.Ternary, visitor: NodeTraverser, dtype: plc.DataType) -> expr.Expr: + return expr.Ternary( + dtype, + translate_expr(visitor, n=node.predicate), + translate_expr(visitor, n=node.truthy), + translate_expr(visitor, n=node.falsy), + ) + + @_translate_expr.register def _( node: pl_expr.BinaryExpr, visitor: NodeTraverser, dtype: plc.DataType diff --git a/python/cudf_polars/cudf_polars/utils/dtypes.py b/python/cudf_polars/cudf_polars/utils/dtypes.py index 3d4a643e1fc..507acb5d33a 100644 --- a/python/cudf_polars/cudf_polars/utils/dtypes.py +++ b/python/cudf_polars/cudf_polars/utils/dtypes.py @@ -7,13 +7,92 @@ from functools import cache +import pyarrow as pa from typing_extensions import assert_never import polars as pl import cudf._lib.pylibcudf as plc -__all__ = ["from_polars"] +__all__ = ["from_polars", "downcast_arrow_lists", "have_compatible_resolution"] + + +TIMELIKE_TYPES: frozenset[plc.TypeId] = frozenset( + [ + plc.TypeId.TIMESTAMP_MILLISECONDS, + plc.TypeId.TIMESTAMP_MICROSECONDS, + plc.TypeId.TIMESTAMP_NANOSECONDS, + plc.TypeId.TIMESTAMP_DAYS, + plc.TypeId.DURATION_MILLISECONDS, + plc.TypeId.DURATION_MICROSECONDS, + plc.TypeId.DURATION_NANOSECONDS, + ] +) + + +def have_compatible_resolution(lid: plc.TypeId, rid: plc.TypeId): + """ + Do two datetime typeids have matching resolution for a binop. + + Parameters + ---------- + lid + Left type id + rid + Right type id + + Returns + ------- + True if resolutions are compatible, False otherwise. + + Notes + ----- + Polars has different casting rules for combining + datetimes/durations than libcudf, and while we don't encode the + casting rules fully, just reject things we can't handle. + + Precondition for correctness: both lid and rid are timelike. + """ + if lid == rid: + return True + # Timestamps are smaller than durations in the libcudf enum. + lid, rid = sorted([lid, rid]) + if lid == plc.TypeId.TIMESTAMP_MILLISECONDS: + return rid == plc.TypeId.DURATION_MILLISECONDS + elif lid == plc.TypeId.TIMESTAMP_MICROSECONDS: + return rid == plc.TypeId.DURATION_MICROSECONDS + elif lid == plc.TypeId.TIMESTAMP_NANOSECONDS: + return rid == plc.TypeId.DURATION_NANOSECONDS + return False + + +def downcast_arrow_lists(typ: pa.DataType) -> pa.DataType: + """ + Sanitize an arrow datatype from polars. + + Parameters + ---------- + typ + Arrow type to sanitize + + Returns + ------- + Sanitized arrow type + + Notes + ----- + As well as arrow ``ListType``s, polars can produce + ``LargeListType``s and ``FixedSizeListType``s, these are not + currently handled by libcudf, so we attempt to cast them all into + normal ``ListType``s on the arrow side before consuming the arrow + data. + """ + if isinstance(typ, pa.LargeListType): + return pa.list_(downcast_arrow_lists(typ.value_type)) + # We don't have to worry about diving into struct types for now + # since those are always NotImplemented before we get here. + assert not isinstance(typ, pa.StructType) + return typ @cache diff --git a/python/cudf_polars/tests/expressions/test_booleanfunction.py b/python/cudf_polars/tests/expressions/test_booleanfunction.py index a52fba26528..97421008669 100644 --- a/python/cudf_polars/tests/expressions/test_booleanfunction.py +++ b/python/cudf_polars/tests/expressions/test_booleanfunction.py @@ -6,7 +6,10 @@ import polars as pl -from cudf_polars.testing.asserts import assert_gpu_result_equal +from cudf_polars.testing.asserts import ( + assert_gpu_result_equal, + assert_ir_translation_raises, +) @pytest.fixture(params=[False, True], ids=["no_nulls", "nulls"]) @@ -67,23 +70,26 @@ def test_boolean_function_unary(request, expr, has_nans, has_nulls): df = pl.LazyFrame({"a": pl.Series(values, dtype=pl.Float32())}) - q = df.select(expr(pl.col("a"))) + q = df.select(expr(pl.col("a")), expr(pl.col("a")).not_().alias("b")) assert_gpu_result_equal(q) -@pytest.mark.xfail(reason="Evaluation handlers not yet implemented") @pytest.mark.parametrize( "expr", [ pl.col("a").is_finite(), pl.col("a").is_infinite(), - pl.col("a").is_in(pl.col("b")), + [pl.col("a").is_infinite(), pl.col("b").is_finite()], ], ) -def test_unsupported_boolean_function(expr): +def test_boolean_finite(expr): df = pl.LazyFrame( - {"a": pl.Series([1, float("nan"), 2, 4], dtype=pl.Float64()), "b": [1, 2, 3, 4]} + { + "a": pl.Series([1, float("nan"), 2, float("inf")], dtype=pl.Float64()), + "b": [1, 2, 3, 4], + "c": pl.Series([1, 2, 3, 4], dtype=pl.Float64()), + } ) q = df.select(expr) @@ -133,3 +139,33 @@ def test_boolean_horizontal(request, expr, has_nulls, wide): q = ldf.select(expr) assert_gpu_result_equal(q) + + +@pytest.mark.parametrize( + "expr", + [ + pl.col("a").is_in(pl.col("b")), + pl.col("a").is_in(pl.col("c")), + pl.col("c").is_in(pl.col("d")), + ], +) +def test_boolean_is_in(expr): + ldf = pl.LazyFrame( + { + "a": pl.Series([1, 2, 3], dtype=pl.Int64()), + "b": pl.Series([3, 4, 2], dtype=pl.Int64()), + "c": pl.Series([1, None, 3], dtype=pl.Int64()), + "d": pl.Series([10, None, 11], dtype=pl.Int64()), + } + ) + + q = ldf.select(expr) + + assert_gpu_result_equal(q) + + +def test_boolean_is_in_raises_unsupported(): + ldf = pl.LazyFrame({"a": pl.Series([1, 2, 3], dtype=pl.Int64)}) + q = ldf.select(pl.col("a").is_in(pl.lit(1, dtype=pl.Int32()))) + + assert_ir_translation_raises(q, NotImplementedError) diff --git a/python/cudf_polars/tests/expressions/test_literal.py b/python/cudf_polars/tests/expressions/test_literal.py new file mode 100644 index 00000000000..55e688428bd --- /dev/null +++ b/python/cudf_polars/tests/expressions/test_literal.py @@ -0,0 +1,96 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations + +import pytest + +import polars as pl + +from cudf_polars.testing.asserts import ( + assert_gpu_result_equal, + assert_ir_translation_raises, +) +from cudf_polars.utils import dtypes + + +@pytest.fixture( + params=[ + None, + pl.Int8(), + pl.Int16(), + pl.Int32(), + pl.Int64(), + pl.UInt8(), + pl.UInt16(), + pl.UInt32(), + pl.UInt64(), + ] +) +def integer(request): + return pl.lit(10, dtype=request.param) + + +@pytest.fixture(params=[None, pl.Float32(), pl.Float64()]) +def float(request): + return pl.lit(1.0, dtype=request.param) + + +def test_numeric_literal(integer, float): + df = pl.LazyFrame({}) + + q = df.select(integer=integer, float_=float, sum_=integer + float) + + assert_gpu_result_equal(q) + + +@pytest.fixture( + params=[pl.Date(), pl.Datetime("ms"), pl.Datetime("us"), pl.Datetime("ns")] +) +def timestamp(request): + return pl.lit(10_000, dtype=request.param) + + +@pytest.fixture(params=[pl.Duration("ms"), pl.Duration("us"), pl.Duration("ns")]) +def timedelta(request): + return pl.lit(9_000, dtype=request.param) + + +def test_timelike_literal(timestamp, timedelta): + df = pl.LazyFrame({}) + + q = df.select( + time=timestamp, + delta=timedelta, + adjusted=timestamp + timedelta, + two_delta=timedelta + timedelta, + ) + schema = q.collect_schema() + time_type = schema["time"] + delta_type = schema["delta"] + if dtypes.have_compatible_resolution( + dtypes.from_polars(time_type).id(), dtypes.from_polars(delta_type).id() + ): + assert_gpu_result_equal(q) + else: + assert_ir_translation_raises(q, NotImplementedError) + + +def test_select_literal_series(): + df = pl.LazyFrame({}) + + q = df.select( + a=pl.Series(["a", "b", "c"], dtype=pl.String()), + b=pl.Series([[1, 2], [3], None], dtype=pl.List(pl.UInt16())), + c=pl.Series([[[1]], [], [[1, 2, 3, 4]]], dtype=pl.List(pl.List(pl.Float32()))), + ) + + assert_gpu_result_equal(q) + + +@pytest.mark.parametrize("expr", [pl.lit(None), pl.lit(10, dtype=pl.Decimal())]) +def test_unsupported_literal_raises(expr): + df = pl.LazyFrame({}) + + q = df.select(expr) + + assert_ir_translation_raises(q, NotImplementedError) diff --git a/python/cudf_polars/tests/expressions/test_when_then.py b/python/cudf_polars/tests/expressions/test_when_then.py new file mode 100644 index 00000000000..cf1c0fe7fce --- /dev/null +++ b/python/cudf_polars/tests/expressions/test_when_then.py @@ -0,0 +1,27 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations + +import pytest + +import polars as pl + +from cudf_polars.testing.asserts import assert_gpu_result_equal + + +@pytest.mark.parametrize("then_scalar", [False, True]) +@pytest.mark.parametrize("otherwise_scalar", [False, True]) +@pytest.mark.parametrize("expr", [pl.col("c"), pl.col("c").is_not_null()]) +def test_when_then(then_scalar, otherwise_scalar, expr): + ldf = pl.LazyFrame( + { + "a": [1, 2, 3, 4, 5, 6, 7], + "b": [10, 13, 11, 15, 16, 11, 10], + "c": [None, True, False, False, True, True, False], + } + ) + + then = pl.lit(10) if then_scalar else pl.col("a") + otherwise = pl.lit(-2) if otherwise_scalar else pl.col("b") + q = ldf.select(pl.when(expr).then(then).otherwise(otherwise)) + assert_gpu_result_equal(q) diff --git a/python/cudf_polars/tests/test_dataframescan.py b/python/cudf_polars/tests/test_dataframescan.py index 1ffe06ac562..b5c0fb7be9f 100644 --- a/python/cudf_polars/tests/test_dataframescan.py +++ b/python/cudf_polars/tests/test_dataframescan.py @@ -41,3 +41,22 @@ def test_scan_drop_nulls(subset, predicate_pushdown): assert_gpu_result_equal( q, collect_kwargs={"predicate_pushdown": predicate_pushdown} ) + + +def test_can_convert_lists(): + df = pl.LazyFrame( + { + "a": pl.Series([[1, 2], [3]], dtype=pl.List(pl.Int8())), + "b": pl.Series([[1], [2]], dtype=pl.List(pl.UInt16())), + "c": pl.Series( + [ + [["1", "2", "3"], ["4", "567"]], + [["8", "9"], []], + ], + dtype=pl.List(pl.List(pl.String())), + ), + "d": pl.Series([[[1, 2]], []], dtype=pl.List(pl.List(pl.UInt16()))), + } + ) + + assert_gpu_result_equal(df) diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py index d250589e389..1f55a59ea55 100644 --- a/python/dask_cudf/dask_cudf/backends.py +++ b/python/dask_cudf/dask_cudf/backends.py @@ -43,7 +43,7 @@ import cudf from cudf.api.types import is_string_dtype -from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate +from cudf.utils.performance_tracking import _dask_cudf_performance_tracking from .core import DataFrame, Index, Series @@ -53,7 +53,7 @@ @meta_nonempty.register(cudf.BaseIndex) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _nonempty_index(idx): if isinstance(idx, cudf.core.index.RangeIndex): return cudf.core.index.RangeIndex(2, name=idx.name) @@ -100,7 +100,7 @@ def _nest_list_data(data, leaf_type): return data -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _get_non_empty_data(s): if isinstance(s, cudf.core.column.CategoricalColumn): categories = ( @@ -147,7 +147,7 @@ def _get_non_empty_data(s): @meta_nonempty.register(cudf.Series) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _nonempty_series(s, idx=None): if idx is None: idx = _nonempty_index(s.index) @@ -157,7 +157,7 @@ def _nonempty_series(s, idx=None): @meta_nonempty.register(cudf.DataFrame) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def meta_nonempty_cudf(x): idx = meta_nonempty(x.index) columns_with_dtype = dict() @@ -182,18 +182,18 @@ def meta_nonempty_cudf(x): @make_meta_dispatch.register((cudf.Series, cudf.DataFrame)) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def make_meta_cudf(x, index=None): return x.head(0) @make_meta_dispatch.register(cudf.BaseIndex) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def make_meta_cudf_index(x, index=None): return x[:0] -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _empty_series(name, dtype, index=None): if isinstance(dtype, str) and dtype == "category": return cudf.Series( @@ -203,7 +203,7 @@ def _empty_series(name, dtype, index=None): @make_meta_obj.register(object) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def make_meta_object_cudf(x, index=None): """Create an empty cudf object containing the desired metadata. @@ -274,7 +274,7 @@ def make_meta_object_cudf(x, index=None): @concat_dispatch.register((cudf.DataFrame, cudf.Series, cudf.BaseIndex)) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def concat_cudf( dfs, axis=0, @@ -299,13 +299,13 @@ def concat_cudf( @categorical_dtype_dispatch.register( (cudf.DataFrame, cudf.Series, cudf.BaseIndex) ) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def categorical_dtype_cudf(categories=None, ordered=False): return cudf.CategoricalDtype(categories=categories, ordered=ordered) @tolist_dispatch.register((cudf.Series, cudf.BaseIndex)) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def tolist_cudf(obj): return obj.to_pandas().tolist() @@ -313,7 +313,7 @@ def tolist_cudf(obj): @is_categorical_dtype_dispatch.register( (cudf.Series, cudf.BaseIndex, cudf.CategoricalDtype, Series) ) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def is_categorical_dtype_cudf(obj): return cudf.api.types._is_categorical_dtype(obj) @@ -324,7 +324,7 @@ def get_grouper_cudf(obj): @percentile_lookup.register((cudf.Series, cp.ndarray, cudf.BaseIndex)) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def percentile_cudf(a, q, interpolation="linear"): # Cudf dispatch to the equivalent of `np.percentile`: # https://numpy.org/doc/stable/reference/generated/numpy.percentile.html @@ -400,7 +400,7 @@ def _table_to_cudf(obj, table, self_destruct=None, **kwargs): @union_categoricals_dispatch.register((cudf.Series, cudf.BaseIndex)) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def union_categoricals_cudf( to_union, sort_categories=False, ignore_order=False ): @@ -410,7 +410,7 @@ def union_categoricals_cudf( @hash_object_dispatch.register((cudf.DataFrame, cudf.Series)) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def hash_object_cudf(frame, index=True): if index: frame = frame.reset_index() @@ -418,7 +418,7 @@ def hash_object_cudf(frame, index=True): @hash_object_dispatch.register(cudf.BaseIndex) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def hash_object_cudf_index(ind, index=None): if isinstance(ind, cudf.MultiIndex): return ind.to_frame(index=False).hash_values() @@ -428,7 +428,7 @@ def hash_object_cudf_index(ind, index=None): @group_split_dispatch.register((cudf.Series, cudf.DataFrame)) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def group_split_cudf(df, c, k, ignore_index=False): return dict( zip( @@ -443,7 +443,7 @@ def group_split_cudf(df, c, k, ignore_index=False): @sizeof_dispatch.register(cudf.DataFrame) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def sizeof_cudf_dataframe(df): return int( sum(col.memory_usage for col in df._data.columns) @@ -452,7 +452,7 @@ def sizeof_cudf_dataframe(df): @sizeof_dispatch.register((cudf.Series, cudf.BaseIndex)) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def sizeof_cudf_series_index(obj): return obj.memory_usage() diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py index 3bd455a3a57..aab56e3a1b0 100644 --- a/python/dask_cudf/dask_cudf/core.py +++ b/python/dask_cudf/dask_cudf/core.py @@ -22,7 +22,7 @@ import cudf from cudf import _lib as libcudf -from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate +from cudf.utils.performance_tracking import _dask_cudf_performance_tracking from dask_cudf import sorting from dask_cudf.accessors import ListMethods, StructMethods @@ -53,7 +53,7 @@ def __repr__(self): s = "" return s % (type(self).__name__, len(self.dask), self.npartitions) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def to_dask_dataframe(self, **kwargs): """Create a dask.dataframe object from a dask_cudf object @@ -92,7 +92,7 @@ class DataFrame(_Frame, dd.core.DataFrame): _partition_type = cudf.DataFrame - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def _assign_column(self, k, v): def assigner(df, k, v): out = df.copy() @@ -102,7 +102,7 @@ def assigner(df, k, v): meta = assigner(self._meta, k, dask_make_meta(v)) return self.map_partitions(assigner, k, v, meta=meta) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def apply_rows(self, func, incols, outcols, kwargs=None, cache_key=None): import uuid @@ -123,7 +123,7 @@ def do_apply_rows(df, func, incols, outcols, kwargs): ) @_deprecate_shuffle_kwarg - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def merge(self, other, shuffle_method=None, **kwargs): on = kwargs.pop("on", None) if isinstance(on, tuple): @@ -136,7 +136,7 @@ def merge(self, other, shuffle_method=None, **kwargs): ) @_deprecate_shuffle_kwarg - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def join(self, other, shuffle_method=None, **kwargs): # CuDF doesn't support "right" join yet how = kwargs.pop("how", "left") @@ -155,7 +155,7 @@ def join(self, other, shuffle_method=None, **kwargs): ) @_deprecate_shuffle_kwarg - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def set_index( self, other, @@ -237,7 +237,7 @@ def set_index( ) @_deprecate_shuffle_kwarg - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def sort_values( self, by, @@ -275,14 +275,14 @@ def sort_values( return df.reset_index(drop=True) return df - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def to_parquet(self, path, *args, **kwargs): """Calls dask.dataframe.io.to_parquet with CudfEngine backend""" from dask_cudf.io import to_parquet return to_parquet(self, path, *args, **kwargs) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def to_orc(self, path, **kwargs): """Calls dask_cudf.io.to_orc""" from dask_cudf.io import to_orc @@ -290,7 +290,7 @@ def to_orc(self, path, **kwargs): return to_orc(self, path, **kwargs) @derived_from(pd.DataFrame) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def var( self, axis=None, @@ -324,28 +324,28 @@ def var( return _parallel_var(self, meta, skipna, split_every, out) @_deprecate_shuffle_kwarg - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def shuffle(self, *args, shuffle_method=None, **kwargs): """Wraps dask.dataframe DataFrame.shuffle method""" return super().shuffle( *args, shuffle_method=_get_shuffle_method(shuffle_method), **kwargs ) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def groupby(self, by=None, **kwargs): from .groupby import CudfDataFrameGroupBy return CudfDataFrameGroupBy(self, by=by, **kwargs) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def sum_of_squares(x): x = x.astype("f8")._column outcol = libcudf.reduce.reduce("sum_of_squares", x) return cudf.Series(outcol) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def var_aggregate(x2, x, n, ddof): try: with warnings.catch_warnings(record=True): @@ -358,12 +358,12 @@ def var_aggregate(x2, x, n, ddof): return np.float64(np.nan) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def nlargest_agg(x, **kwargs): return cudf.concat(x).nlargest(**kwargs) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def nsmallest_agg(x, **kwargs): return cudf.concat(x).nsmallest(**kwargs) @@ -371,7 +371,7 @@ def nsmallest_agg(x, **kwargs): class Series(_Frame, dd.core.Series): _partition_type = cudf.Series - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def count(self, split_every=False): return reduction( [self], @@ -381,14 +381,14 @@ def count(self, split_every=False): meta="i8", ) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def mean(self, split_every=False): sum = self.sum(split_every=split_every) n = self.count(split_every=split_every) return sum / n @derived_from(pd.DataFrame) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def var( self, axis=None, @@ -417,19 +417,19 @@ def var( else: return _parallel_var(self, meta, skipna, split_every, out) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def groupby(self, *args, **kwargs): from .groupby import CudfSeriesGroupBy return CudfSeriesGroupBy(self, *args, **kwargs) @property # type: ignore - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def list(self): return ListMethods(self) @property # type: ignore - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def struct(self): return StructMethods(self) @@ -438,7 +438,7 @@ class Index(Series, dd.core.Index): _partition_type = cudf.Index # type: ignore -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _naive_var(ddf, meta, skipna, ddof, split_every, out): num = ddf._get_numeric_data() x = 1.0 * num.sum(skipna=skipna, split_every=split_every) @@ -453,7 +453,7 @@ def _naive_var(ddf, meta, skipna, ddof, split_every, out): return handle_out(out, result) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _parallel_var(ddf, meta, skipna, split_every, out): def _local_var(x, skipna): if skipna: @@ -520,7 +520,7 @@ def _finalize_var(vals): return handle_out(out, result) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _extract_meta(x): """ Extract internal cache data (``_meta``) from dask_cudf objects @@ -536,7 +536,7 @@ def _extract_meta(x): return x -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _emulate(func, *args, **kwargs): """ Apply a function using args / kwargs. If arguments contain dd.DataFrame / @@ -546,7 +546,7 @@ def _emulate(func, *args, **kwargs): return func(*_extract_meta(args), **_extract_meta(kwargs)) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def align_partitions(args): """Align partitions between dask_cudf objects. @@ -563,7 +563,7 @@ def align_partitions(args): return args -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def reduction( args, chunk=None, @@ -702,7 +702,7 @@ def reduction( return dd.core.new_dd_object(graph, b, meta, (None, None)) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def from_cudf(data, npartitions=None, chunksize=None, sort=True, name=None): from dask_cudf import QUERY_PLANNING_ON @@ -746,7 +746,7 @@ def from_cudf(data, npartitions=None, chunksize=None, sort=True, name=None): ) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def from_dask_dataframe(df): """ Convert a Dask :class:`dask.dataframe.DataFrame` to a Dask-cuDF diff --git a/python/dask_cudf/dask_cudf/groupby.py b/python/dask_cudf/dask_cudf/groupby.py index 2e72461b43d..bbbcde17b51 100644 --- a/python/dask_cudf/dask_cudf/groupby.py +++ b/python/dask_cudf/dask_cudf/groupby.py @@ -16,7 +16,7 @@ import cudf from cudf.core.groupby.groupby import _deprecate_collect -from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate +from cudf.utils.performance_tracking import _dask_cudf_performance_tracking from dask_cudf.sorting import _deprecate_shuffle_kwarg @@ -56,13 +56,13 @@ def wrapper(*args, **kwargs): class CudfDataFrameGroupBy(DataFrameGroupBy): - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def __init__(self, *args, sort=None, **kwargs): self.sep = kwargs.pop("sep", "___") self.as_index = kwargs.pop("as_index", True) super().__init__(*args, sort=sort, **kwargs) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def __getitem__(self, key): if isinstance(key, list): g = CudfDataFrameGroupBy( @@ -84,7 +84,7 @@ def __getitem__(self, key): g._meta = g._meta[key] return g - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def _make_groupby_method_aggs(self, agg_name): """Create aggs dictionary for aggregation methods""" @@ -92,7 +92,7 @@ def _make_groupby_method_aggs(self, agg_name): return {c: agg_name for c in self.obj.columns if c not in self.by} return {c: agg_name for c in self.obj.columns if c != self.by} - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def count(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -102,7 +102,7 @@ def count(self, split_every=None, split_out=1): split_out, ) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def mean(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -112,7 +112,7 @@ def mean(self, split_every=None, split_out=1): split_out, ) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def std(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -122,7 +122,7 @@ def std(self, split_every=None, split_out=1): split_out, ) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def var(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -132,7 +132,7 @@ def var(self, split_every=None, split_out=1): split_out, ) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def sum(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -142,7 +142,7 @@ def sum(self, split_every=None, split_out=1): split_out, ) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def min(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -152,7 +152,7 @@ def min(self, split_every=None, split_out=1): split_out, ) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def max(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -162,7 +162,7 @@ def max(self, split_every=None, split_out=1): split_out, ) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def collect(self, split_every=None, split_out=1): _deprecate_collect() @@ -173,7 +173,7 @@ def collect(self, split_every=None, split_out=1): split_out, ) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def first(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -183,7 +183,7 @@ def first(self, split_every=None, split_out=1): split_out, ) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def last(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -194,7 +194,7 @@ def last(self, split_every=None, split_out=1): ) @_deprecate_shuffle_kwarg - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def aggregate( self, arg, split_every=None, split_out=1, shuffle_method=None ): @@ -231,13 +231,13 @@ def aggregate( class CudfSeriesGroupBy(SeriesGroupBy): - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def __init__(self, *args, sort=None, **kwargs): self.sep = kwargs.pop("sep", "___") self.as_index = kwargs.pop("as_index", True) super().__init__(*args, sort=sort, **kwargs) - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def count(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -247,7 +247,7 @@ def count(self, split_every=None, split_out=1): split_out, )[self._slice] - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def mean(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -257,7 +257,7 @@ def mean(self, split_every=None, split_out=1): split_out, )[self._slice] - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def std(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -267,7 +267,7 @@ def std(self, split_every=None, split_out=1): split_out, )[self._slice] - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def var(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -277,7 +277,7 @@ def var(self, split_every=None, split_out=1): split_out, )[self._slice] - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def sum(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -287,7 +287,7 @@ def sum(self, split_every=None, split_out=1): split_out, )[self._slice] - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def min(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -297,7 +297,7 @@ def min(self, split_every=None, split_out=1): split_out, )[self._slice] - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def max(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -307,7 +307,7 @@ def max(self, split_every=None, split_out=1): split_out, )[self._slice] - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def collect(self, split_every=None, split_out=1): _deprecate_collect() @@ -318,7 +318,7 @@ def collect(self, split_every=None, split_out=1): split_out, )[self._slice] - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def first(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -328,7 +328,7 @@ def first(self, split_every=None, split_out=1): split_out, )[self._slice] - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking @_check_groupby_optimized def last(self, split_every=None, split_out=1): return _make_groupby_agg_call( @@ -339,7 +339,7 @@ def last(self, split_every=None, split_out=1): )[self._slice] @_deprecate_shuffle_kwarg - @_dask_cudf_nvtx_annotate + @_dask_cudf_performance_tracking def aggregate( self, arg, split_every=None, split_out=1, shuffle_method=None ): @@ -429,7 +429,7 @@ def _shuffle_aggregate( return result -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def groupby_agg( ddf, gb_cols, @@ -641,7 +641,7 @@ def groupby_agg( ) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _make_groupby_agg_call( gb, aggs, split_every, split_out, shuffle_method=None ): @@ -663,7 +663,7 @@ def _make_groupby_agg_call( ) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _redirect_aggs(arg): """Redirect aggregations to their corresponding name in cuDF""" redirects = { @@ -690,7 +690,7 @@ def _redirect_aggs(arg): return redirects.get(arg, arg) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _aggs_optimized(arg, supported: set): """Check that aggregations in `arg` are a subset of `supported`""" if isinstance(arg, (list, dict)): @@ -712,7 +712,7 @@ def _aggs_optimized(arg, supported: set): return False -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _groupby_optimized(gb): """Check that groupby input can use dask-cudf optimized codepath""" return isinstance(gb.obj, DaskDataFrame) and ( @@ -730,7 +730,7 @@ def _make_name(col_name, sep="_"): return sep.join(name for name in col_name if name != "") -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _groupby_partition_agg(df, gb_cols, aggs, columns, dropna, sort, sep): """Initial partition-level aggregation task. @@ -768,7 +768,7 @@ def _groupby_partition_agg(df, gb_cols, aggs, columns, dropna, sort, sep): return gb[sorted(output_columns)] -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _tree_node_agg(df, gb_cols, dropna, sort, sep): """Node in groupby-aggregation reduction tree. @@ -807,7 +807,7 @@ def _tree_node_agg(df, gb_cols, dropna, sort, sep): return gb[sorted(output_columns)] -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _var_agg(df, col, count_name, sum_name, pow2_sum_name, ddof=1): """Calculate variance (given count, sum, and sum-squared columns).""" @@ -829,7 +829,7 @@ def _var_agg(df, col, count_name, sum_name, pow2_sum_name, ddof=1): return var -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _finalize_gb_agg( gb_in, gb_cols, diff --git a/python/dask_cudf/dask_cudf/sorting.py b/python/dask_cudf/dask_cudf/sorting.py index f3774e20d32..a2ba4d1878e 100644 --- a/python/dask_cudf/dask_cudf/sorting.py +++ b/python/dask_cudf/dask_cudf/sorting.py @@ -18,7 +18,7 @@ import cudf from cudf.api.types import _is_categorical_dtype -from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate +from cudf.utils.performance_tracking import _dask_cudf_performance_tracking _SHUFFLE_SUPPORT = ("tasks", "p2p") # "disk" not supported @@ -48,14 +48,14 @@ def wrapper(*args, **kwargs): return wrapper -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def set_index_post(df, index_name, drop, column_dtype): df2 = df.set_index(index_name, drop=drop) df2.columns = df2.columns.astype(column_dtype) return df2 -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _set_partitions_pre(s, divisions, ascending=True, na_position="last"): if ascending: partitions = divisions.searchsorted(s, side="right") - 1 @@ -72,7 +72,7 @@ def _set_partitions_pre(s, divisions, ascending=True, na_position="last"): return partitions -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _quantile(a, q): n = len(a) if not len(a): @@ -83,7 +83,7 @@ def _quantile(a, q): ) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def merge_quantiles(finalq, qs, vals): """Combine several quantile calculations of different data. [NOTE: Same logic as dask.array merge_percentiles] @@ -146,7 +146,7 @@ def _append_counts(val, count): return rv.reset_index(drop=True) -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def _approximate_quantile(df, q): """Approximate quantiles of DataFrame or Series. [NOTE: Same logic as dask.dataframe Series quantile] @@ -220,7 +220,7 @@ def set_quantile_index(df): return df -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def quantile_divisions(df, by, npartitions): qn = np.linspace(0.0, 1.0, npartitions + 1).tolist() divisions = _approximate_quantile(df[by], qn).compute() @@ -257,7 +257,7 @@ def quantile_divisions(df, by, npartitions): @_deprecate_shuffle_kwarg -@_dask_cudf_nvtx_annotate +@_dask_cudf_performance_tracking def sort_values( df, by,