From 7246a9e36c59baa3050272800e559d3b74b2ba0a Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Tue, 9 May 2023 16:59:08 -0400 Subject: [PATCH 01/16] Performance improvement for libcudf upper/lower conversion for long strings (#13142) Improves on performance for longer strings with `cudf::strings::to_lower()` `cudf::strings::to_upper()` and `cudf::strings::swapcase()` APIs. The current implementation works well with smallish strings and so this new implementation splits into a longish string algorithm when average number of bytes per string is 64 bytes or greater. The new implementation is similar but computes the output size of each string with a warp per string function. In addition, a check is added for long strings testing if all bytes are ASCII and thereby can run a faster kernel for this case. Reference https://github.com/rapidsai/cudf/issues/13048 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Mark Harris (https://github.com/harrism) - Karthikeyan (https://github.com/karthikeyann) URL: https://github.com/rapidsai/cudf/pull/13142 --- cpp/benchmarks/CMakeLists.txt | 5 +- cpp/benchmarks/string/case.cpp | 70 +++++--- cpp/src/strings/case.cu | 267 +++++++++++++++++++++++-------- cpp/tests/strings/case_tests.cpp | 58 +++++++ 4 files changed, 307 insertions(+), 93 deletions(-) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 565a396d913..90225b8537a 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -280,7 +280,6 @@ ConfigureNVBench(TEXT_NVBENCH text/minhash.cpp) # * strings benchmark ------------------------------------------------------------------- ConfigureBench( STRINGS_BENCH - string/case.cpp string/combine.cpp string/contains.cpp string/convert_datetime.cpp @@ -301,7 +300,9 @@ ConfigureBench( string/url_decode.cu ) -ConfigureNVBench(STRINGS_NVBENCH string/like.cpp string/reverse.cpp string/lengths.cpp) +ConfigureNVBench( + STRINGS_NVBENCH string/like.cpp string/reverse.cpp string/lengths.cpp string/case.cpp +) # ################################################################################################## # * json benchmark ------------------------------------------------------------------- diff --git a/cpp/benchmarks/string/case.cpp b/cpp/benchmarks/string/case.cpp index 6152ea741a3..0cdd5fbac32 100644 --- a/cpp/benchmarks/string/case.cpp +++ b/cpp/benchmarks/string/case.cpp @@ -15,36 +15,64 @@ */ #include -#include -#include +#include #include #include #include -class StringCase : public cudf::benchmark {}; +#include -static void BM_case(benchmark::State& state) +void bench_case(nvbench::state& state) { - cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; - auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}); - cudf::strings_column_view input(column->view()); + auto const n_rows = static_cast(state.get_int64("num_rows")); + auto const max_width = static_cast(state.get_int64("width")); + auto const encoding = state.get_string("encoding"); - for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::get_default_stream()); - cudf::strings::to_lower(input); + if (static_cast(n_rows) * static_cast(max_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); } - state.SetBytesProcessed(state.iterations() * input.chars_size()); -} + data_profile const profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_width); + auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); + + auto col_view = column->view(); + + cudf::column::contents ascii_contents; + if (encoding == "ascii") { + data_profile ascii_profile = data_profile_builder().no_validity().distribution( + cudf::type_id::INT8, distribution_id::UNIFORM, 32, 126); // nice ASCII range + auto input = cudf::strings_column_view(col_view); + auto ascii_column = + create_random_column(cudf::type_id::INT8, row_count{input.chars_size()}, ascii_profile); + auto ascii_data = ascii_column->view(); -#define SORT_BENCHMARK_DEFINE(name) \ - BENCHMARK_DEFINE_F(StringCase, name) \ - (::benchmark::State & st) { BM_case(st); } \ - BENCHMARK_REGISTER_F(StringCase, name) \ - ->RangeMultiplier(8) \ - ->Ranges({{1 << 12, 1 << 24}}) \ - ->UseManualTime() \ - ->Unit(benchmark::kMillisecond); + col_view = cudf::column_view(col_view.type(), + col_view.size(), + nullptr, + col_view.null_mask(), + col_view.null_count(), + 0, + {input.offsets(), ascii_data}); + + ascii_contents = ascii_column->release(); + } + auto input = cudf::strings_column_view(col_view); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + + state.add_element_count(input.chars_size(), "chars_size"); + state.add_global_memory_reads(input.chars_size()); + state.add_global_memory_writes(input.chars_size()); + + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { auto result = cudf::strings::to_lower(input); }); +} -SORT_BENCHMARK_DEFINE(to_lower) +NVBENCH_BENCH(bench_case) + .set_name("strings_case") + .add_int64_axis("width", {32, 64, 128, 256, 512, 1024, 2048}) + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}) + .add_string_axis("encoding", {"ascii", "utf8"}); diff --git a/cpp/src/strings/case.cu b/cpp/src/strings/case.cu index 02c4532bb79..0997983c95e 100644 --- a/cpp/src/strings/case.cu +++ b/cpp/src/strings/case.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,9 +17,10 @@ #include #include #include -#include #include #include +#include +#include #include #include #include @@ -38,31 +39,33 @@ namespace detail { namespace { /** - * @brief Per string logic for case conversion functions. + * @brief Threshold to decide on using string or warp parallel functions. * + * If the average byte length of a string in a column exceeds this value then + * the warp-parallel function is used to compute the output sizes. + * Otherwise, a regular string-parallel function is used. + * + * This value was found using the strings_lengths benchmark results. */ -struct upper_lower_fn { - const column_device_view d_column; - character_flags_table_type case_flag; // flag to check with on each character - const character_flags_table_type* d_flags; - const character_cases_table_type* d_case_table; - const special_case_mapping* d_special_case_mapping; - int32_t* d_offsets{}; - char* d_chars{}; +constexpr size_type AVG_CHAR_BYTES_THRESHOLD = 64; - __device__ special_case_mapping get_special_case_mapping(uint32_t code_point) - { - return d_special_case_mapping[get_special_case_hash_index(code_point)]; - } +/** + * @brief Utility functions for converting characters to upper or lower case + */ +struct convert_char_fn { + character_flags_table_type case_flag; + character_flags_table_type const* d_flags; + character_cases_table_type const* d_case_table; + special_case_mapping const* d_special_case_mapping; - // compute-size / copy the bytes representing the special case mapping for this codepoint - __device__ int32_t handle_special_case_bytes(uint32_t code_point, - char* d_buffer, - detail::character_flags_table_type flag) + // compute size or copy the bytes representing the special case mapping for this codepoint + __device__ size_type handle_special_case_bytes(uint32_t code_point, + detail::character_flags_table_type flag, + char* d_buffer = nullptr) const { - special_case_mapping m = get_special_case_mapping(code_point); - size_type bytes = 0; + special_case_mapping m = d_special_case_mapping[get_special_case_hash_index(code_point)]; + size_type bytes = 0; auto const count = IS_LOWER(flag) ? m.num_upper_chars : m.num_lower_chars; auto const* chars = IS_LOWER(flag) ? m.upper : m.lower; for (uint16_t idx = 0; idx < count; idx++) { @@ -73,76 +76,200 @@ struct upper_lower_fn { return bytes; } - __device__ void operator()(size_type idx) + // this is called for converting any UTF-8 characters + __device__ size_type process_character(char_utf8 chr, char* d_buffer = nullptr) const + { + auto const code_point = detail::utf8_to_codepoint(chr); + + detail::character_flags_table_type flag = code_point <= 0x00'FFFF ? d_flags[code_point] : 0; + + // we apply special mapping in two cases: + // - uncased characters with the special mapping flag: always + // - cased characters with the special mapping flag: when matching the input case_flag + if (IS_SPECIAL(flag) && ((flag & case_flag) || !IS_UPPER_OR_LOWER(flag))) { + return handle_special_case_bytes(code_point, case_flag, d_buffer); + } + + char_utf8 const new_char = + (flag & case_flag) ? detail::codepoint_to_utf8(d_case_table[code_point]) : chr; + return (d_buffer) ? detail::from_char_utf8(new_char, d_buffer) + : detail::bytes_in_char_utf8(new_char); + } + + // special function for converting ASCII-only characters + __device__ char process_ascii(char chr) { - if (d_column.is_null(idx)) { + return (case_flag & d_flags[chr]) ? static_cast(d_case_table[chr]) : chr; + } +}; + +/** + * @brief Per string logic for case conversion functions + * + * This can be used in calls to make_strings_children. + */ +struct upper_lower_fn { + convert_char_fn converter; + column_device_view d_strings; + size_type* d_offsets{}; + char* d_chars{}; + + __device__ void operator()(size_type idx) const + { + if (d_strings.is_null(idx)) { if (!d_chars) d_offsets[idx] = 0; return; } - auto const d_str = d_column.template element(idx); - int32_t bytes = 0; + auto const d_str = d_strings.element(idx); + size_type bytes = 0; char* d_buffer = d_chars ? d_chars + d_offsets[idx] : nullptr; for (auto itr = d_str.begin(); itr != d_str.end(); ++itr) { - uint32_t code_point = detail::utf8_to_codepoint(*itr); - - detail::character_flags_table_type flag = code_point <= 0x00'FFFF ? d_flags[code_point] : 0; - - // we apply special mapping in two cases: - // - uncased characters with the special mapping flag, always - // - cased characters with the special mapping flag, when matching the input case_flag - // - if (IS_SPECIAL(flag) && ((flag & case_flag) || !IS_UPPER_OR_LOWER(flag))) { - auto const new_bytes = handle_special_case_bytes(code_point, d_buffer, case_flag); - bytes += new_bytes; - if (d_buffer) d_buffer += new_bytes; + auto const size = converter.process_character(*itr, d_buffer); + if (d_buffer) { + d_buffer += size; } else { - char_utf8 new_char = - (flag & case_flag) ? detail::codepoint_to_utf8(d_case_table[code_point]) : *itr; - if (!d_buffer) - bytes += detail::bytes_in_char_utf8(new_char); - else - d_buffer += detail::from_char_utf8(new_char, d_buffer); + bytes += size; } } - if (!d_buffer) d_offsets[idx] = bytes; + if (!d_buffer) { d_offsets[idx] = bytes; } + } +}; + +/** + * @brief Count output bytes in warp-parallel threads + * + * This executes as one warp per string and just computes the output sizes. + */ +struct count_bytes_fn { + convert_char_fn converter; + column_device_view d_strings; + size_type* d_offsets; + + __device__ void operator()(size_type idx) const + { + auto const str_idx = idx / cudf::detail::warp_size; + auto const lane_idx = idx % cudf::detail::warp_size; + + // initialize the output for the atomicAdd + if (lane_idx == 0) { d_offsets[str_idx] = 0; } + __syncwarp(); + + if (d_strings.is_null(str_idx)) { return; } + auto const d_str = d_strings.element(str_idx); + auto const str_ptr = d_str.data(); + + size_type size = 0; + for (auto i = lane_idx; i < d_str.size_bytes(); i += cudf::detail::warp_size) { + auto const chr = str_ptr[i]; + if (is_utf8_continuation_char(chr)) { continue; } + char_utf8 u8 = 0; + to_char_utf8(str_ptr + i, u8); + size += converter.process_character(u8); + } + // this is every so slightly faster than using the cub::warp_reduce + if (size > 0) atomicAdd(d_offsets + str_idx, size); } }; +/** + * @brief Special functor for processing ASCII-only data + */ +struct ascii_converter_fn { + convert_char_fn converter; + __device__ char operator()(char chr) { return converter.process_ascii(chr); } +}; + /** * @brief Utility method for converting upper and lower case characters - * in a strings column. + * in a strings column * - * @param strings Strings to convert. + * @param input Strings to convert * @param case_flag The character type to convert (upper, lower, or both) - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column with characters converted. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column with characters converted */ -std::unique_ptr convert_case(strings_column_view const& strings, +std::unique_ptr convert_case(strings_column_view const& input, character_flags_table_type case_flag, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - if (strings.is_empty()) return make_empty_column(type_id::STRING); - - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_column = *strings_column; - - // build functor with lookup tables used for case conversion - upper_lower_fn functor{d_column, - case_flag, - get_character_flags_table(), - get_character_cases_table(), - get_special_case_mapping_table()}; - - // this utility calls the functor to build the offsets and chars columns - auto children = cudf::strings::detail::make_strings_children(functor, strings.size(), stream, mr); - - return make_strings_column(strings.size(), - std::move(children.first), - std::move(children.second), - strings.null_count(), - cudf::detail::copy_bitmask(strings.parent(), stream, mr)); + if (input.size() == input.null_count()) { + return std::make_unique(input.parent(), stream, mr); + } + + auto const d_strings = column_device_view::create(input.parent(), stream); + auto const d_flags = get_character_flags_table(); + auto const d_cases = get_character_cases_table(); + auto const d_special = get_special_case_mapping_table(); + + convert_char_fn ccfn{case_flag, d_flags, d_cases, d_special}; + upper_lower_fn converter{ccfn, *d_strings}; + + // For smaller strings, use the regular string-parallel algorithm + if ((input.chars_size() / (input.size() - input.null_count())) < AVG_CHAR_BYTES_THRESHOLD) { + auto [offsets, chars] = + cudf::strings::detail::make_strings_children(converter, input.size(), stream, mr); + return make_strings_column(input.size(), + std::move(offsets), + std::move(chars), + input.null_count(), + cudf::detail::copy_bitmask(input.parent(), stream, mr)); + } + + // Check if the input contains any multi-byte characters. + // This check incurs ~20% performance hit for smaller strings and so we only use it + // after the threshold check above. The check makes very little impact for larger strings + // but results in a large performance gain when the input contains only single-byte characters. + // The count_if is faster than any_of or all_of: https://github.com/NVIDIA/thrust/issues/1016 + bool const multi_byte_chars = + thrust::count_if( + rmm::exec_policy(stream), input.chars_begin(), input.chars_end(), [] __device__(auto chr) { + return is_utf8_continuation_char(chr); + }) > 0; + if (!multi_byte_chars) { + // optimization for ASCII-only case: copy the input column and inplace replace each character + auto result = std::make_unique(input.parent(), stream, mr); + auto d_chars = + result->mutable_view().child(strings_column_view::chars_column_index).data(); + auto const chars_size = strings_column_view(result->view()).chars_size(); + thrust::transform( + rmm::exec_policy(stream), d_chars, d_chars + chars_size, d_chars, ascii_converter_fn{ccfn}); + result->set_null_count(input.null_count()); + return result; + } + + // This will use a warp-parallel algorithm to compute the output sizes for each string + // and then uses the normal string parallel functor to build the output. + auto offsets = make_numeric_column( + data_type{type_to_id()}, input.size() + 1, mask_state::UNALLOCATED, stream, mr); + auto d_offsets = offsets->mutable_view().data(); + + // first pass, compute output sizes + // note: tried to use segmented-reduce approach instead here and it was consistently slower + count_bytes_fn counter{ccfn, *d_strings, d_offsets}; + auto const count_itr = thrust::make_counting_iterator(0); + thrust::for_each_n( + rmm::exec_policy(stream), count_itr, input.size() * cudf::detail::warp_size, counter); + + // convert sizes to offsets + auto const bytes = + cudf::detail::sizes_to_offsets(d_offsets, d_offsets + input.size() + 1, d_offsets, stream); + CUDF_EXPECTS(bytes <= static_cast(std::numeric_limits::max()), + "Size of output exceeds column size limit", + std::overflow_error); + + auto chars = create_chars_child_column(static_cast(bytes), stream, mr); + // second pass, write output + converter.d_offsets = d_offsets; + converter.d_chars = chars->mutable_view().data(); + thrust::for_each_n(rmm::exec_policy(stream), count_itr, input.size(), converter); + + return make_strings_column(input.size(), + std::move(offsets), + std::move(chars), + input.null_count(), + cudf::detail::copy_bitmask(input.parent(), stream, mr)); } } // namespace diff --git a/cpp/tests/strings/case_tests.cpp b/cpp/tests/strings/case_tests.cpp index 3852930dafe..31637a6ab9a 100644 --- a/cpp/tests/strings/case_tests.cpp +++ b/cpp/tests/strings/case_tests.cpp @@ -202,6 +202,64 @@ TEST_F(StringsCaseTest, MultiCharLower) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } +TEST_F(StringsCaseTest, Ascii) +{ + // triggering the ascii code path requires some long-ish strings + cudf::test::strings_column_wrapper input{ + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=- ", + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=- ", + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=- ", + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=-"}; + auto view = cudf::strings_column_view(input); + auto expected = cudf::test::strings_column_wrapper{ + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=- ", + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=- ", + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=- ", + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=-"}; + auto results = cudf::strings::to_lower(view); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + + expected = cudf::test::strings_column_wrapper{ + "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()_+=- ", + "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()_+=- ", + "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()_+=- ", + "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()_+=-"}; + results = cudf::strings::to_upper(view); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + + results = cudf::strings::to_upper(cudf::strings_column_view(cudf::slice(input, {1, 3}).front())); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, cudf::slice(expected, {1, 3}).front()); +} + +TEST_F(StringsCaseTest, LongStrings) +{ + // average string length >= AVG_CHAR_BYTES_THRESHOLD as defined in case.cu + cudf::test::strings_column_wrapper input{ + "ABCDÉFGHIJKLMNOPQRSTUVWXYZabcdéfghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=- ", + "ABCDÉFGHIJKLMNOPQRSTUVWXYZabcdéfghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=- ", + "ABCDÉFGHIJKLMNOPQRSTUVWXYZabcdéfghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=- ", + "ABCDÉFGHIJKLMNOPQRSTUVWXYZabcdéfghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=-"}; + auto view = cudf::strings_column_view(input); + auto expected = cudf::test::strings_column_wrapper{ + "abcdéfghijklmnopqrstuvwxyzabcdéfghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=- ", + "abcdéfghijklmnopqrstuvwxyzabcdéfghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=- ", + "abcdéfghijklmnopqrstuvwxyzabcdéfghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=- ", + "abcdéfghijklmnopqrstuvwxyzabcdéfghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=-"}; + auto results = cudf::strings::to_lower(view); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + + expected = cudf::test::strings_column_wrapper{ + "ABCDÉFGHIJKLMNOPQRSTUVWXYZABCDÉFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()_+=- ", + "ABCDÉFGHIJKLMNOPQRSTUVWXYZABCDÉFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()_+=- ", + "ABCDÉFGHIJKLMNOPQRSTUVWXYZABCDÉFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()_+=- ", + "ABCDÉFGHIJKLMNOPQRSTUVWXYZABCDÉFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()_+=-"}; + results = cudf::strings::to_upper(view); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + + results = cudf::strings::to_upper(cudf::strings_column_view(cudf::slice(input, {1, 3}).front())); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, cudf::slice(expected, {1, 3}).front()); +} + TEST_F(StringsCaseTest, EmptyStringsColumn) { cudf::column_view zero_size_strings_column( From a1c2eecb9f333bd88616b9e8ef1917230d1b17b8 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 9 May 2023 18:03:50 -0500 Subject: [PATCH 02/16] Fix gtest pinning to 1.13.0. (#13319) Fixes gtest pinning. The `.*` is not needed here. This aligns with similar changes I'm making in other RAPIDS repos to bump their gtest pinnings. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/cudf/pull/13319 --- conda/environments/all_cuda-118_arch-x86_64.yaml | 4 ++-- conda/recipes/libcudf/meta.yaml | 2 ++ dependencies.yaml | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 5b8a0ea53ba..4031f1aa1c3 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -31,8 +31,8 @@ dependencies: - fmt>=9.1.0,<10 - fsspec>=0.6.0 - gcc_linux-64=11.* -- gmock>=1.13.0.* -- gtest>=1.13.0.* +- gmock>=1.13.0 +- gtest>=1.13.0 - hypothesis - ipython - libarrow==11.0.0.* diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index a236b62c5fd..275b8f9332f 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -396,6 +396,8 @@ outputs: - {{ pin_subpackage('libcudf', exact=True) }} - {{ pin_subpackage('libcudf_kafka', exact=True) }} - cudatoolkit {{ cuda_spec }} + - gtest {{ gtest_version }} + - gmock {{ gtest_version }} - libcurand {{ libcurand_run_version }} about: home: https://rapids.ai/ diff --git a/dependencies.yaml b/dependencies.yaml index 00ec63d31ef..70d7f8c1ec8 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -213,8 +213,8 @@ dependencies: - output_types: conda packages: - fmt>=9.1.0,<10 - - >est gtest>=1.13.0.* - - &gmock gmock>=1.13.0.* + - >est gtest>=1.13.0 + - &gmock gmock>=1.13.0 # Hard pin the patch version used during the build. This must be kept # in sync with the version pinned in get_arrow.cmake. - libarrow==11.0.0.* From 3d814bdaef8d50d8573ccbba8a9c04476db6f433 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Tue, 9 May 2023 23:54:02 -0700 Subject: [PATCH 03/16] Fix the row index stream order in ORC reader (#13242) Fixes #11890 Use fixed order when reading row index streams. The order is `PRESENT`, `DATA`, `SECONDARY`/`LENGTH` (maps to `DATA2`). Is any of these is absent in the column, relative order is maintained. Thus, the order is a sub-array of the one above. Also simplified some logic related to stream order, as we do not need to pass it from the host. Instead, we only pass a bitmap to denote which streams are present. Updated the xfail test, as it now passes :) Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Keith Kraus (https://github.com/kkraus14) - Yunsong Wang (https://github.com/PointKernel) - Bradley Dice (https://github.com/bdice) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/13242 --- cpp/src/io/orc/reader_impl.cu | 42 ++++++++++++------------------ cpp/src/io/orc/stripe_init.cu | 39 ++++++++++++++++++++++----- python/cudf/cudf/tests/test_orc.py | 12 ++++----- 3 files changed, 54 insertions(+), 39 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index fbe44eff5ad..1561737da48 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -102,27 +102,18 @@ constexpr type_id to_type_id(const orc::SchemaType& schema, return type_id::EMPTY; } -constexpr std::pair get_index_type_and_pos( - const orc::StreamKind kind, uint32_t skip_count, bool non_child) +gpu::StreamIndexType get_stream_index_type(orc::StreamKind kind) { switch (kind) { - case orc::DATA: - skip_count += 1; - skip_count |= (skip_count & 0xff) << 8; - return std::pair(gpu::CI_DATA, skip_count); + case orc::DATA: return gpu::CI_DATA; case orc::LENGTH: - case orc::SECONDARY: - skip_count += 1; - skip_count |= (skip_count & 0xff) << 16; - return std::pair(gpu::CI_DATA2, skip_count); - case orc::DICTIONARY_DATA: return std::pair(gpu::CI_DICTIONARY, skip_count); - case orc::PRESENT: - skip_count += (non_child ? 1 : 0); - return std::pair(gpu::CI_PRESENT, skip_count); - case orc::ROW_INDEX: return std::pair(gpu::CI_INDEX, skip_count); + case orc::SECONDARY: return gpu::CI_DATA2; + case orc::DICTIONARY_DATA: return gpu::CI_DICTIONARY; + case orc::PRESENT: return gpu::CI_PRESENT; + case orc::ROW_INDEX: return gpu::CI_INDEX; default: // Skip this stream as it's not strictly required - return std::pair(gpu::CI_NUM_STREAMS, 0); + return gpu::CI_NUM_STREAMS; } } @@ -213,16 +204,15 @@ size_t gather_stream_info(const size_t stripe_index, } if (col != -1) { if (src_offset >= stripeinfo->indexLength || use_index) { - // NOTE: skip_count field is temporarily used to track index ordering - auto& chunk = chunks[stripe_index][col]; - const auto idx = - get_index_type_and_pos(stream.kind, chunk.skip_count, col == orc2gdf[column_id]); - if (idx.first < gpu::CI_NUM_STREAMS) { - chunk.strm_id[idx.first] = stream_info.size(); - chunk.strm_len[idx.first] = stream.length; - chunk.skip_count = idx.second; - - if (idx.first == gpu::CI_DICTIONARY) { + auto& chunk = chunks[stripe_index][col]; + auto const index_type = get_stream_index_type(stream.kind); + if (index_type < gpu::CI_NUM_STREAMS) { + chunk.strm_id[index_type] = stream_info.size(); + chunk.strm_len[index_type] = stream.length; + // NOTE: skip_count field is temporarily used to track the presence of index streams + chunk.skip_count |= 1 << index_type; + + if (index_type == gpu::CI_DICTIONARY) { chunk.dictionary_start = *num_dictionary_entries; chunk.dict_len = stripefooter->columns[column_id].dictionarySize; *num_dictionary_entries += stripefooter->columns[column_id].dictionarySize; diff --git a/cpp/src/io/orc/stripe_init.cu b/cpp/src/io/orc/stripe_init.cu index 11813677b95..6c0f0767b73 100644 --- a/cpp/src/io/orc/stripe_init.cu +++ b/cpp/src/io/orc/stripe_init.cu @@ -21,6 +21,8 @@ #include #include +#include +#include namespace cudf { namespace io { @@ -226,6 +228,30 @@ enum row_entry_state_e { STORE_INDEX2, }; +/** + * @brief Calculates the order of index streams based on the index types present in the column. + * + * @param index_types_bitmap The bitmap of index types showing which index streams are present + * + * @return The order of index streams + */ +static auto __device__ index_order_from_index_types(uint32_t index_types_bitmap) +{ + constexpr std::array full_order = {CI_PRESENT, CI_DATA, CI_DATA2}; + + std::array partial_order; + thrust::copy_if(thrust::seq, + full_order.cbegin(), + full_order.cend(), + partial_order.begin(), + [index_types_bitmap] __device__(auto index_type) { + // Check if the index type is present + return index_types_bitmap & (1 << index_type); + }); + + return partial_order; +} + /** * @brief Decode a single row group index entry * @@ -239,11 +265,14 @@ static uint32_t __device__ ProtobufParseRowIndexEntry(rowindex_state_s* s, uint8_t const* const end) { constexpr uint32_t pb_rowindexentry_id = ProtofType::FIXEDLEN + 8; + auto const stream_order = index_order_from_index_types(s->chunk.skip_count); const uint8_t* cur = start; row_entry_state_e state = NOT_FOUND; - uint32_t length = 0, strm_idx_id = s->chunk.skip_count >> 8, idx_id = 1, ci_id = CI_PRESENT, - pos_end = 0; + uint32_t length = 0; + uint32_t idx_id = 0; + uint32_t pos_end = 0; + uint32_t ci_id = CI_NUM_STREAMS; while (cur < end) { uint32_t v = 0; for (uint32_t l = 0; l <= 28; l += 7) { @@ -283,10 +312,8 @@ static uint32_t __device__ ProtobufParseRowIndexEntry(rowindex_state_s* s, } break; case STORE_INDEX0: - ci_id = (idx_id == (strm_idx_id & 0xff)) ? CI_DATA - : (idx_id == ((strm_idx_id >> 8) & 0xff)) ? CI_DATA2 - : CI_PRESENT; - idx_id++; + // Start of a new entry; determine the stream index types + ci_id = stream_order[idx_id++]; if (s->is_compressed) { if (ci_id < CI_PRESENT) s->row_index_entry[0][ci_id] = v; if (cur >= start + pos_end) return length; diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py index 33095761fde..7fcad5df9f1 100644 --- a/python/cudf/cudf/tests/test_orc.py +++ b/python/cudf/cudf/tests/test_orc.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. import datetime import decimal @@ -1896,12 +1896,10 @@ def test_reader_empty_stripe(datadir, fname): assert_eq(expected, got) -@pytest.mark.xfail( - reason="https://github.com/rapidsai/cudf/issues/11890", raises=RuntimeError -) -def test_reader_unsupported_offsets(): - # needs enough data for more than one row group - expected = cudf.DataFrame({"str": ["*"] * 10001}, dtype="string") +# needs enough data for multiple row groups +@pytest.mark.parametrize("data", [["*"] * 10001, ["**", None] * 5001]) +def test_reader_row_index_order(data): + expected = cudf.DataFrame({"str": data}, dtype="string") buffer = BytesIO() expected.to_pandas().to_orc(buffer) From 6f3f50721e585b55bd263cc36926eb2c99c5f811 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath <3190405+shwina@users.noreply.github.com> Date: Wed, 10 May 2023 15:19:07 -0400 Subject: [PATCH 04/16] Support the case=False argument to str.contains (#13290) Closes https://github.com/rapidsai/cudf/issues/13253 Authors: - Ashwin Srinath (https://github.com/shwina) Approvers: - Matthew Roeschke (https://github.com/mroeschke) URL: https://github.com/rapidsai/cudf/pull/13290 --- python/cudf/cudf/core/column/string.py | 35 +++++++++++++++----------- python/cudf/cudf/tests/test_string.py | 11 ++++++++ 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index fefa7beb562..1a09fc0b985 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -744,7 +744,7 @@ def contains( 4 False dtype: bool - The ``pat`` may also be a list of strings in which case + The ``pat`` may also be a sequence of strings in which case the individual strings are searched in corresponding rows. >>> s2 = cudf.Series(['house', 'dog', 'and', '', '']) @@ -756,8 +756,6 @@ def contains( 4 dtype: bool """ # noqa W605 - if case is not True: - raise NotImplementedError("`case` parameter is not yet supported") if na is not np.nan: raise NotImplementedError("`na` parameter is not yet supported") if regex and isinstance(pat, re.Pattern): @@ -767,22 +765,31 @@ def contains( raise NotImplementedError( "unsupported value for `flags` parameter" ) - - if pat is None: - result_col = column.column_empty( - len(self._column), dtype="bool", masked=True + if regex and not case: + raise NotImplementedError( + "`case=False` only supported when `regex=False`" ) - elif is_scalar(pat): + + if is_scalar(pat): if regex: result_col = libstrings.contains_re(self._column, pat, flags) else: - result_col = libstrings.contains( - self._column, cudf.Scalar(pat, "str") - ) + if case is False: + input_column = libstrings.to_lower(self._column) + pat = cudf.Scalar(pat.lower(), dtype="str") # type: ignore + else: + input_column = self._column + pat = cudf.Scalar(pat, dtype="str") # type: ignore + result_col = libstrings.contains(input_column, pat) else: - result_col = libstrings.contains_multiple( - self._column, column.as_column(pat, dtype="str") - ) + # TODO: we silently ignore the `regex=` flag here + if case is False: + input_column = libstrings.to_lower(self._column) + pat = libstrings.to_lower(column.as_column(pat, dtype="str")) + else: + input_column = self._column + pat = column.as_column(pat, dtype="str") + result_col = libstrings.contains_multiple(input_column, pat) return self._return_or_inplace(result_col) def like(self, pat: str, esc: str = None) -> SeriesOrIndex: diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index c866e064366..12e832ba23b 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -820,6 +820,17 @@ def test_string_contains(ps_gs, pat, regex, flags, flags_raise, na, na_raise): assert_eq(expect, got) +def test_string_contains_case(ps_gs): + ps, gs = ps_gs + with pytest.raises(NotImplementedError): + gs.str.contains("A", case=False) + expected = ps.str.contains("A", regex=False, case=False) + got = gs.str.contains("A", regex=False, case=False) + assert_eq(expected, got) + got = gs.str.contains("a", regex=False, case=False) + assert_eq(expected, got) + + @pytest.mark.parametrize( "pat,esc,expect", [ From e4e65a9769bd6873d001b4e4ebfd4a0e51374881 Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Thu, 11 May 2023 07:35:07 -0700 Subject: [PATCH 05/16] Clean up buffers in case AssertionError (#13262) Authors: - Raza Jafri (https://github.com/razajafri) Approvers: - Alessandro Bellina (https://github.com/abellina) - Robert (Bobby) Evans (https://github.com/revans2) URL: https://github.com/rapidsai/cudf/pull/13262 --- .../java/ai/rapids/cudf/BitVectorHelper.java | 4 +- .../java/ai/rapids/cudf/ColumnVector.java | 7 ++- .../main/java/ai/rapids/cudf/ColumnView.java | 59 ++++++++++++++++--- java/src/main/java/ai/rapids/cudf/Table.java | 38 ++++++------ .../java/ai/rapids/cudf/ColumnVectorTest.java | 50 +++++++++++++++- 5 files changed, 126 insertions(+), 32 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/BitVectorHelper.java b/java/src/main/java/ai/rapids/cudf/BitVectorHelper.java index cdb7e9e4418..fa96e833b90 100644 --- a/java/src/main/java/ai/rapids/cudf/BitVectorHelper.java +++ b/java/src/main/java/ai/rapids/cudf/BitVectorHelper.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -71,7 +71,7 @@ private static void shiftSrcLeftAndWriteToDst(HostMemoryBuffer src, HostMemoryBu /** * This method returns the length in bytes needed to represent X number of rows * e.g. getValidityLengthInBytes(5) => 1 byte - * getLengthInBytes(7) => 1 byte + * getValidityLengthInBytes(7) => 1 byte * getValidityLengthInBytes(14) => 2 bytes */ static long getValidityLengthInBytes(long rows) { diff --git a/java/src/main/java/ai/rapids/cudf/ColumnVector.java b/java/src/main/java/ai/rapids/cudf/ColumnVector.java index 4d43ffcb457..fecb13e1921 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnVector.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnVector.java @@ -119,7 +119,10 @@ public ColumnVector(DType type, long rows, Optional nullCount, incRefCountInternal(true); } - private static OffHeapState makeOffHeap(DType type, long rows, Optional nullCount, + /** + * This method is internal and exposed purely for testing purposes + */ + static OffHeapState makeOffHeap(DType type, long rows, Optional nullCount, DeviceMemoryBuffer dataBuffer, DeviceMemoryBuffer validityBuffer, DeviceMemoryBuffer offsetBuffer, List toClose, long[] childHandles) { long viewHandle = initViewHandle(type, (int)rows, nullCount.orElse(UNKNOWN_NULL_COUNT).intValue(), @@ -141,7 +144,7 @@ private static OffHeapState makeOffHeap(DType type, long rows, Optional nu * @param offsetBuffer a host buffer required for strings and string categories. The column * vector takes ownership of the buffer. Do not use the buffer after calling * this. - * @param toClose List of buffers to track adn close once done, usually in case of children + * @param toClose List of buffers to track and close once done, usually in case of children * @param childHandles array of longs for child column view handles. */ public ColumnVector(DType type, long rows, Optional nullCount, diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 67ad9166fe0..8b59ea68972 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -43,8 +43,10 @@ public class ColumnView implements AutoCloseable, BinaryOperable { protected final ColumnVector.OffHeapState offHeap; /** - * Constructs a Column View given a native view address + * Constructs a Column View given a native view address. This asserts that if the ColumnView is + * of nested-type it doesn't contain non-empty nulls * @param address the view handle + * @throws AssertionError if the address points to a nested-type view with non-empty nulls */ ColumnView(long address) { this.viewHandle = address; @@ -52,15 +54,24 @@ public class ColumnView implements AutoCloseable, BinaryOperable { this.rows = ColumnView.getNativeRowCount(viewHandle); this.nullCount = ColumnView.getNativeNullCount(viewHandle); this.offHeap = null; - AssertEmptyNulls.assertNullsAreEmpty(this); + try { + AssertEmptyNulls.assertNullsAreEmpty(this); + } catch (AssertionError ae) { + // offHeap state is null, so there is nothing to clean in offHeap + // delete ColumnView to avoid memory leak + deleteColumnView(viewHandle); + viewHandle = 0; + throw ae; + } } /** * Intended to be called from ColumnVector when it is being constructed. Because state creates a * cudf::column_view instance and will close it in all cases, we don't want to have to double - * close it. + * close it. This asserts that if the offHeapState is of nested-type it doesn't contain non-empty nulls * @param state the state this view is based off of. + * @throws AssertionError if offHeapState points to a nested-type view with non-empty nulls */ protected ColumnView(ColumnVector.OffHeapState state) { offHeap = state; @@ -68,7 +79,14 @@ protected ColumnView(ColumnVector.OffHeapState state) { type = DType.fromNative(ColumnView.getNativeTypeId(viewHandle), ColumnView.getNativeTypeScale(viewHandle)); rows = ColumnView.getNativeRowCount(viewHandle); nullCount = ColumnView.getNativeNullCount(viewHandle); - AssertEmptyNulls.assertNullsAreEmpty(this); + try { + AssertEmptyNulls.assertNullsAreEmpty(this); + } catch (AssertionError ae) { + // cleanup offHeap + offHeap.clean(false); + viewHandle = 0; + throw ae; + } } /** @@ -649,8 +667,14 @@ public final ColumnVector ifElse(Scalar trueValue, Scalar falseValue) { public final ColumnVector[] slice(int... indices) { long[] nativeHandles = slice(this.getNativeView(), indices); ColumnVector[] columnVectors = new ColumnVector[nativeHandles.length]; - for (int i = 0; i < nativeHandles.length; i++) { - columnVectors[i] = new ColumnVector(nativeHandles[i]); + try { + for (int i = 0; i < nativeHandles.length; i++) { + columnVectors[i] = new ColumnVector(nativeHandles[i]); + nativeHandles[i] = 0; + } + } catch (Throwable t) { + cleanupColumnViews(nativeHandles, columnVectors); + throw t; } return columnVectors; } @@ -788,12 +812,31 @@ public final ColumnVector[] split(int... indices) { public ColumnView[] splitAsViews(int... indices) { long[] nativeHandles = split(this.getNativeView(), indices); ColumnView[] columnViews = new ColumnView[nativeHandles.length]; - for (int i = 0; i < nativeHandles.length; i++) { - columnViews[i] = new ColumnView(nativeHandles[i]); + try { + for (int i = 0; i < nativeHandles.length; i++) { + columnViews[i] = new ColumnView(nativeHandles[i]); + nativeHandles[i] = 0; + } + } catch (Throwable t) { + cleanupColumnViews(nativeHandles, columnViews); + throw t; } return columnViews; } + static void cleanupColumnViews(long[] nativeHandles, ColumnView[] columnViews) { + for (ColumnView columnView: columnViews) { + if (columnView != null) { + columnView.close(); + } + } + for (long nativeHandle: nativeHandles) { + if (nativeHandle != 0) { + deleteColumnView(nativeHandle); + } + } + } + /** * Create a new vector of "normalized" values, where: * 1. All representations of NaN (and -NaN) are replaced with the normalized NaN value diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java index 9abc2dbcd7c..93cb1acfae4 100644 --- a/java/src/main/java/ai/rapids/cudf/Table.java +++ b/java/src/main/java/ai/rapids/cudf/Table.java @@ -87,6 +87,7 @@ public Table(long[] cudfColumns) { try { for (int i = 0; i < cudfColumns.length; i++) { this.columns[i] = new ColumnVector(cudfColumns[i]); + cudfColumns[i] = 0; } long[] views = new long[columns.length]; for (int i = 0; i < columns.length; i++) { @@ -95,13 +96,7 @@ public Table(long[] cudfColumns) { nativeHandle = createCudfTableView(views); this.rows = columns[0].getRowCount(); } catch (Throwable t) { - for (int i = 0; i < cudfColumns.length; i++) { - if (this.columns[i] != null) { - this.columns[i].close(); - } else { - ColumnVector.deleteCudfColumn(cudfColumns[i]); - } - } + ColumnView.cleanupColumnViews(cudfColumns, this.columns); throw t; } } @@ -3396,8 +3391,14 @@ public static GatherMap mixedLeftAntiJoinGatherMap(Table leftKeys, Table rightKe public ColumnVector[] convertToRows() { long[] ptrs = convertToRows(nativeHandle); ColumnVector[] ret = new ColumnVector[ptrs.length]; - for (int i = 0; i < ptrs.length; i++) { - ret[i] = new ColumnVector(ptrs[i]); + try { + for (int i = 0; i < ptrs.length; i++) { + ret[i] = new ColumnVector(ptrs[i]); + ptrs[i] = 0; + } + } catch (Throwable t) { + ColumnView.cleanupColumnViews(ptrs, ret); + throw t; } return ret; } @@ -3479,8 +3480,14 @@ public ColumnVector[] convertToRows() { public ColumnVector[] convertToRowsFixedWidthOptimized() { long[] ptrs = convertToRowsFixedWidthOptimized(nativeHandle); ColumnVector[] ret = new ColumnVector[ptrs.length]; - for (int i = 0; i < ptrs.length; i++) { - ret[i] = new ColumnVector(ptrs[i]); + try { + for (int i = 0; i < ptrs.length; i++) { + ret[i] = new ColumnVector(ptrs[i]); + ptrs[i] = 0; + } + } catch (Throwable t) { + ColumnView.cleanupColumnViews(ptrs, ret); + throw t; } return ret; } @@ -3552,14 +3559,7 @@ public static Table fromPackedTable(ByteBuffer metadata, DeviceMemoryBuffer data } result = new Table(columns); } catch (Throwable t) { - for (int i = 0; i < columns.length; i++) { - if (columns[i] != null) { - columns[i].close(); - } - if (columnViewAddresses[i] != 0) { - ColumnView.deleteColumnView(columnViewAddresses[i]); - } - } + ColumnView.cleanupColumnViews(columnViewAddresses, columns); throw t; } diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 6e9498acdac..59b4c9f9f67 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -32,6 +32,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Optional; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Supplier; import java.util.stream.Collectors; @@ -6677,6 +6678,54 @@ void testApplyBooleanMaskFromListOfStructure() { } } + @Test + void testColumnViewWithNonEmptyNullsIsCleared() { + List list0 = Arrays.asList(1, 2, 3); + List list1 = Arrays.asList(4, 5, null); + List list2 = Arrays.asList(7, 8, 9); + List list3 = null; + try (ColumnVector input = ColumnVectorTest.makeListsColumn(DType.INT32, list0, list1, list2, list3); + BaseDeviceMemoryBuffer baseValidityBuffer = input.getDeviceBufferFor(BufferType.VALIDITY); + BaseDeviceMemoryBuffer baseOffsetBuffer = input.getDeviceBufferFor(BufferType.OFFSET); + HostMemoryBuffer newValidity = HostMemoryBuffer.allocate(BitVectorHelper.getValidityAllocationSizeInBytes(4))) { + + newValidity.copyFromDeviceBuffer(baseValidityBuffer); + // we are setting list1 with 3 elements to null. This will result in a non-empty null in the + // ColumnView at index 1 + BitVectorHelper.setNullAt(newValidity, 1); + // validityBuffer will be closed by offHeapState later + DeviceMemoryBuffer validityBuffer = DeviceMemoryBuffer.allocate(BitVectorHelper.getValidityAllocationSizeInBytes(4)); + try { + // offsetBuffer will be closed by offHeapState later + DeviceMemoryBuffer offsetBuffer = DeviceMemoryBuffer.allocate(baseOffsetBuffer.getLength()); + try { + validityBuffer.copyFromHostBuffer(newValidity); + offsetBuffer.copyFromMemoryBuffer(0, baseOffsetBuffer, 0, + baseOffsetBuffer.length, Cuda.DEFAULT_STREAM); + + // The new offHeapState will have 2 nulls, one null at index 4 from the original ColumnVector + // the other at index 1 which is non-empty + ColumnVector.OffHeapState offHeapState = ColumnVector.makeOffHeap(input.type, input.rows, Optional.of(2L), + null, validityBuffer, offsetBuffer, + null, Arrays.stream(input.getChildColumnViews()).mapToLong((c) -> c.viewHandle).toArray()); + try { + new ColumnView(offHeapState); + } catch (AssertionError ae) { + assert offHeapState.isClean(); + } + } catch (Exception e) { + if (!offsetBuffer.closed) { + offsetBuffer.close(); + } + } + } catch (Exception e) { + if (!validityBuffer.closed) { + validityBuffer.close(); + } + } + } + } + @Test public void testEventHandlerIsCalledForEachClose() { final AtomicInteger onClosedWasCalled = new AtomicInteger(0); @@ -6700,5 +6749,4 @@ public void testEventHandlerIsNotCalledIfNotSet() { } assertEquals(0, onClosedWasCalled.get()); } - } From 8a5a1f46641f15dabd1b17b8eb63cdb4cb3a325c Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Thu, 11 May 2023 10:10:19 -0700 Subject: [PATCH 06/16] Clean up `distinct_count` benchmark (#13321) This PR moves the `distinct_count` benchmark to the `stream_compaction` folder. It also adds peak memory usage information and targets the public API for performance evaluation. Authors: - Yunsong Wang (https://github.com/PointKernel) Approvers: - Nghia Truong (https://github.com/ttnghia) - Karthikeyan (https://github.com/karthikeyann) URL: https://github.com/rapidsai/cudf/pull/13321 --- cpp/benchmarks/CMakeLists.txt | 7 +++---- .../distinct_count.cpp | 17 ++++++++++------- 2 files changed, 13 insertions(+), 11 deletions(-) rename cpp/benchmarks/{reduction => stream_compaction}/distinct_count.cpp (75%) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 90225b8537a..dcc70a4b6d9 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -153,8 +153,8 @@ ConfigureBench(APPLY_BOOLEAN_MASK_BENCH stream_compaction/apply_boolean_mask.cpp # ################################################################################################## # * stream_compaction benchmark ------------------------------------------------------------------- ConfigureNVBench( - STREAM_COMPACTION_NVBENCH stream_compaction/distinct.cpp stream_compaction/unique.cpp - stream_compaction/unique_count.cpp + STREAM_COMPACTION_NVBENCH stream_compaction/distinct.cpp stream_compaction/distinct_count.cpp + stream_compaction/unique.cpp stream_compaction/unique_count.cpp ) # ################################################################################################## @@ -195,8 +195,7 @@ ConfigureBench( reduction/reduce.cpp reduction/scan.cpp ) ConfigureNVBench( - REDUCTION_NVBENCH reduction/distinct_count.cpp reduction/rank.cpp reduction/scan_structs.cpp - reduction/segmented_reduce.cpp + REDUCTION_NVBENCH reduction/rank.cpp reduction/scan_structs.cpp reduction/segmented_reduce.cpp ) # ################################################################################################## diff --git a/cpp/benchmarks/reduction/distinct_count.cpp b/cpp/benchmarks/stream_compaction/distinct_count.cpp similarity index 75% rename from cpp/benchmarks/reduction/distinct_count.cpp rename to cpp/benchmarks/stream_compaction/distinct_count.cpp index d2218c270a8..2b2c901b90f 100644 --- a/cpp/benchmarks/reduction/distinct_count.cpp +++ b/cpp/benchmarks/stream_compaction/distinct_count.cpp @@ -15,14 +15,14 @@ */ #include -#include +#include -#include +#include #include template -static void bench_reduction_distinct_count(nvbench::state& state, nvbench::type_list) +static void bench_distinct_count(nvbench::state& state, nvbench::type_list) { auto const dtype = cudf::type_to_id(); auto const size = static_cast(state.get_int64("num_rows")); @@ -40,16 +40,19 @@ static void bench_reduction_distinct_count(nvbench::state& state, nvbench::type_ auto const& data_column = data_table->get_column(0); auto const input_table = cudf::table_view{{data_column, data_column, data_column}}; + auto mem_stats_logger = cudf::memory_stats_logger(); // init stats logger + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { - rmm::cuda_stream_view stream{launch.get_stream()}; - cudf::detail::distinct_count(input_table, cudf::null_equality::EQUAL, stream); + cudf::distinct_count(input_table, cudf::null_equality::EQUAL); }); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); } using data_type = nvbench::type_list; -NVBENCH_BENCH_TYPES(bench_reduction_distinct_count, NVBENCH_TYPE_AXES(data_type)) - .set_name("reduction_distinct_count") +NVBENCH_BENCH_TYPES(bench_distinct_count, NVBENCH_TYPE_AXES(data_type)) + .set_name("distinct_count") .add_int64_axis("num_rows", { 10000, // 10k From deaba97bb38cd0f49d383cf94b6720e832ac1d97 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 11 May 2023 11:02:32 -0700 Subject: [PATCH 07/16] Fix approach to detecting assignment for gte/lte operators (#13285) Resolves #13185 Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/13285 --- python/cudf/cudf/core/dataframe.py | 7 ++++--- python/cudf/cudf/tests/test_dataframe.py | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 8c8f0119b3f..2371dfc51cd 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -7139,13 +7139,14 @@ def eval(self, expr: str, inplace: bool = False, **kwargs): "Keyword arguments other than `inplace` are not supported" ) - # Have to use a regex match to avoid capturing "==" - includes_assignment = re.search("[^=]=[^=]", expr) is not None + # Have to use a regex match to avoid capturing ==, >=, or <= + equals_sign_regex = "[^=><]=[^=]" + includes_assignment = re.search(equals_sign_regex, expr) is not None # Check if there were multiple statements. Filter out empty lines. statements = tuple(filter(None, expr.strip().split("\n"))) if len(statements) > 1 and any( - re.search("[^=]=[^=]", st) is None for st in statements + re.search(equals_sign_regex, st) is None for st in statements ): raise ValueError( "Multi-line expressions are only valid if all expressions " diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index b3c8468c119..918bd995ed1 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -9794,6 +9794,7 @@ def df_eval(request): ("a / b", float), ("a * b", int), ("a > b", int), + ("a >= b", int), ("a > b > c", int), ("a > b < c", int), ("a & b", int), @@ -9835,7 +9836,7 @@ def test_dataframe_eval(df_eval, expr, dtype): assert_eq(expect, got, check_names=False) # Test inplace - if re.search("[^=]=[^=]", expr) is not None: + if re.search("[^=><]=[^=]", expr) is not None: pdf_eval = df_eval.to_pandas() pdf_eval.eval(expr, inplace=True) df_eval.eval(expr, inplace=True) From 98122d3ea0728e715cf12df426946b21a61b511e Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Thu, 11 May 2023 18:35:20 -0400 Subject: [PATCH 08/16] Support gcc 12 as the C++ compiler (#13316) Corrects compilation issues found by gcc 12 and the new warnings it provides. Three major classes of issues was found: - incorrect lambda captures of the wrong container for the iterators being used - incorrect implicit conversions - warnings generated by using std::transform and std::initializer_lists Authors: - Robert Maynard (https://github.com/robertmaynard) Approvers: - Bradley Dice (https://github.com/bdice) - David Wendt (https://github.com/davidwendt) - Nghia Truong (https://github.com/ttnghia) - Mark Harris (https://github.com/harrism) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/13316 --- cpp/cmake/thirdparty/patches/nvbench_override.json | 5 +++++ cpp/include/cudf_test/iterator_utilities.hpp | 8 +++++--- cpp/include/cudf_test/type_lists.hpp | 14 ++++++++------ cpp/src/io/json/json_column.cu | 6 +++--- cpp/src/io/json/nested_json_gpu.cu | 7 ++++--- cpp/tests/io/json_test.cpp | 2 +- cpp/tests/scalar/scalar_test.cpp | 4 ++-- 7 files changed, 28 insertions(+), 18 deletions(-) diff --git a/cpp/cmake/thirdparty/patches/nvbench_override.json b/cpp/cmake/thirdparty/patches/nvbench_override.json index 7be868081b6..d5df222ae37 100644 --- a/cpp/cmake/thirdparty/patches/nvbench_override.json +++ b/cpp/cmake/thirdparty/patches/nvbench_override.json @@ -12,6 +12,11 @@ "file" : "nvbench/use_existing_fmt.diff", "issue" : "Fix add support for using an existing fmt [https://github.com/NVIDIA/nvbench/pull/125]", "fixed_in" : "" + }, + { + "file" : "nvbench/public_fmt_dep_in_conda.diff", + "issue" : "Propagate fmt requirement in conda envs [https://github.com/NVIDIA/nvbench/pull/127]", + "fixed_in" : "" } ] } diff --git a/cpp/include/cudf_test/iterator_utilities.hpp b/cpp/include/cudf_test/iterator_utilities.hpp index c2c6b3ae83d..10f6e77d889 100644 --- a/cpp/include/cudf_test/iterator_utilities.hpp +++ b/cpp/include/cudf_test/iterator_utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -121,6 +121,9 @@ template * The returned iterator yields `false` (to mark `null`) at the indices corresponding to the * pointers having `nullptr` values and `true` for the remaining indices. * + * @note The input vector is referenced by the transform iterator, so the + * lifespan must be just as long as the iterator. + * * @tparam T the data type * @param ptrs The data pointers for which the validity iterator is computed * @return auto Validity iterator @@ -128,8 +131,7 @@ template template [[maybe_unused]] static auto nulls_from_nullptrs(std::vector const& ptrs) { - // The vector `indices` is copied into the lambda as it can be destroyed at the caller site. - return thrust::make_transform_iterator(ptrs.begin(), [ptrs](auto ptr) { return ptr != nullptr; }); + return thrust::make_transform_iterator(ptrs.begin(), [](auto ptr) { return ptr != nullptr; }); } } // namespace iterators diff --git a/cpp/include/cudf_test/type_lists.hpp b/cpp/include/cudf_test/type_lists.hpp index 6ea4311c8cb..2404cf0d134 100644 --- a/cpp/include/cudf_test/type_lists.hpp +++ b/cpp/include/cudf_test/type_lists.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -84,11 +84,13 @@ std::enable_if_t() && !cudf::is_timestamp_t> make_type_param_vector(std::initializer_list const& init_list) { - thrust::host_vector vec(init_list.size()); - std::transform(std::cbegin(init_list), std::cend(init_list), std::begin(vec), [](auto const& e) { - if constexpr (std::is_unsigned_v) { return static_cast(std::abs(e)); } - return static_cast(e); - }); + std::vector input{init_list}; + std::vector vec(init_list.size()); + std::transform( + std::cbegin(input), std::cend(input), std::begin(vec), [](auto const& e) -> TypeParam { + if constexpr (std::is_unsigned_v) { return static_cast(std::abs(e)); } + return static_cast(e); + }); return vec; } diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index 65c93105304..0fdcd33ada4 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -813,10 +813,10 @@ std::pair, std::vector> device_json_co // For string columns return ["offsets", "char"] schema if (target_type.id() == type_id::STRING) { - return {std::move(col), {{"offsets"}, {"chars"}}}; + return {std::move(col), std::vector{{"offsets"}, {"chars"}}}; } // Non-string leaf-columns (e.g., numeric) do not have child columns in the schema - return {std::move(col), {}}; + return {std::move(col), std::vector{}}; } case json_col_t::StructColumn: { std::vector> child_columns; @@ -860,7 +860,7 @@ std::pair, std::vector> device_json_co data_type{type_id::INT8}, 0, rmm::device_buffer{0, stream, mr}), - {}} + std::vector{}} : device_json_column_to_cudf_column( json_col.child_columns.begin()->second, d_input, diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu index c29bf2f8866..d8ca0411910 100644 --- a/cpp/src/io/json/nested_json_gpu.cu +++ b/cpp/src/io/json/nested_json_gpu.cu @@ -1678,11 +1678,11 @@ std::pair, std::vector> json_column_to // For string columns return ["offsets", "char"] schema if (target_type.id() == type_id::STRING) { - return {std::move(col), {{"offsets"}, {"chars"}}}; + return {std::move(col), std::vector{{"offsets"}, {"chars"}}}; } // Non-string leaf-columns (e.g., numeric) do not have child columns in the schema else { - return {std::move(col), {}}; + return {std::move(col), std::vector{}}; } break; } @@ -1724,7 +1724,8 @@ std::pair, std::vector> json_column_to auto [child_column, names] = json_col.child_columns.empty() ? std::pair, - std::vector>{std::make_unique(), {}} + std::vector>{std::make_unique(), + std::vector{}} : json_column_to_cudf_column(json_col.child_columns.begin()->second, d_input, options, diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp index 9bf16ecca6b..cd067cce928 100644 --- a/cpp/tests/io/json_test.cpp +++ b/cpp/tests/io/json_test.cpp @@ -1568,7 +1568,7 @@ TEST_P(JsonReaderParamTest, JsonDtypeParsing) auto make_validity = [](std::vector const& validity) { return cudf::detail::make_counting_transform_iterator( - 0, [=](auto i) -> bool { return static_cast(validity[i]); }); + 0, [&](auto i) -> bool { return static_cast(validity[i]); }); }; constexpr int int_ignore{}; diff --git a/cpp/tests/scalar/scalar_test.cpp b/cpp/tests/scalar/scalar_test.cpp index 9c70fc364c7..d2f2b5d6a2e 100644 --- a/cpp/tests/scalar/scalar_test.cpp +++ b/cpp/tests/scalar/scalar_test.cpp @@ -33,7 +33,7 @@ TYPED_TEST_SUITE(TypedScalarTestWithoutFixedPoint, cudf::test::FixedWidthTypesWi TYPED_TEST(TypedScalarTest, DefaultValidity) { using Type = cudf::device_storage_type_t; - Type value = cudf::test::make_type_param_scalar(7); + Type value = static_cast(cudf::test::make_type_param_scalar(7)); cudf::scalar_type_t s(value); EXPECT_TRUE(s.is_valid()); @@ -71,7 +71,7 @@ TYPED_TEST(TypedScalarTestWithoutFixedPoint, SetNull) TYPED_TEST(TypedScalarTest, CopyConstructor) { using Type = cudf::device_storage_type_t; - Type value = cudf::test::make_type_param_scalar(8); + Type value = static_cast(cudf::test::make_type_param_scalar(8)); cudf::scalar_type_t s(value); auto s2 = s; From 6c4ff21692507fcf2f801f1986d53d09c1617537 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Fri, 12 May 2023 11:54:37 +0100 Subject: [PATCH 09/16] Update mypy to 1.3 (#13340) Bring us up to date with the current mypy release. Big diff, mostly due to the no-implicit-optional rule that mypy now enforces (see commit message of 040f5773c37d89cdbc0baf57fa70a4b3dcab4fc9 for details). That commit is automated so probably best to review commit-by-commit for the substantive changes. - Closes #13336 Authors: - Lawrence Mitchell (https://github.com/wence-) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/13340 --- .pre-commit-config.yaml | 2 +- python/cudf/cudf/_lib/column.pyi | 4 +- python/cudf/cudf/core/buffer/buffer.py | 4 +- python/cudf/cudf/core/buffer/spill_manager.py | 6 +- .../cudf/cudf/core/buffer/spillable_buffer.py | 8 +- python/cudf/cudf/core/buffer/utils.py | 4 +- python/cudf/cudf/core/column/categorical.py | 21 +++-- python/cudf/cudf/core/column/column.py | 69 +++++++++-------- python/cudf/cudf/core/column/datetime.py | 34 +++++---- python/cudf/cudf/core/column/decimal.py | 7 +- python/cudf/cudf/core/column/interval.py | 8 +- python/cudf/cudf/core/column/lists.py | 3 +- python/cudf/cudf/core/column/numerical.py | 20 ++--- .../cudf/cudf/core/column/numerical_base.py | 31 +++++--- python/cudf/cudf/core/column/string.py | 76 +++++++++++-------- python/cudf/cudf/core/column/struct.py | 5 +- python/cudf/cudf/core/column/timedelta.py | 21 ++--- python/cudf/cudf/core/column_accessor.py | 2 +- python/cudf/cudf/core/dataframe.py | 4 +- python/cudf/cudf/core/groupby/groupby.py | 7 +- python/cudf/cudf/core/index.py | 1 - python/cudf/cudf/core/multiindex.py | 3 - python/cudf/cudf/core/single_column_frame.py | 1 - 23 files changed, 205 insertions(+), 136 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 08cc6ec4a14..0ac54113278 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -44,7 +44,7 @@ repos: hooks: - id: cython-lint - repo: https://github.com/pre-commit/mirrors-mypy - rev: 'v0.971' + rev: 'v1.3.0' hooks: - id: mypy additional_dependencies: [types-cachetools] diff --git a/python/cudf/cudf/_lib/column.pyi b/python/cudf/cudf/_lib/column.pyi index 013cba3ae03..bd53801a972 100644 --- a/python/cudf/cudf/_lib/column.pyi +++ b/python/cudf/cudf/_lib/column.pyi @@ -29,8 +29,8 @@ class Column: size: int, dtype: Dtype, mask: Optional[Buffer] = None, - offset: int = None, - null_count: int = None, + offset: Optional[int] = None, + null_count: Optional[int] = None, children: Tuple[ColumnBase, ...] = (), ) -> None: ... @property diff --git a/python/cudf/cudf/core/buffer/buffer.py b/python/cudf/cudf/core/buffer/buffer.py index 2262730d8a1..abf1ec47e3d 100644 --- a/python/cudf/cudf/core/buffer/buffer.py +++ b/python/cudf/cudf/core/buffer/buffer.py @@ -5,7 +5,7 @@ import math import pickle from types import SimpleNamespace -from typing import Any, Dict, Mapping, Sequence, Tuple, Type, TypeVar +from typing import Any, Dict, Mapping, Optional, Sequence, Tuple, Type, TypeVar import numpy @@ -42,7 +42,7 @@ def host_memory_allocation(nbytes: int) -> memoryview: def cuda_array_interface_wrapper( ptr: int, size: int, - owner: object = None, + owner: Optional[object] = None, readonly=False, typestr="|u1", version=0, diff --git a/python/cudf/cudf/core/buffer/spill_manager.py b/python/cudf/cudf/core/buffer/spill_manager.py index d2a87af3869..7f8399ba522 100644 --- a/python/cudf/cudf/core/buffer/spill_manager.py +++ b/python/cudf/cudf/core/buffer/spill_manager.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. from __future__ import annotations @@ -225,7 +225,7 @@ def __init__( self, *, spill_on_demand: bool = False, - device_memory_limit: int = None, + device_memory_limit: Optional[int] = None, statistic_level: int = 0, ) -> None: self._lock = threading.Lock() @@ -358,7 +358,7 @@ def spill_device_memory(self, nbytes: int) -> int: buf.lock.release() return spilled - def spill_to_device_limit(self, device_limit: int = None) -> int: + def spill_to_device_limit(self, device_limit: Optional[int] = None) -> int: """Try to spill device memory until device limit Notice, by default this is a no-op. diff --git a/python/cudf/cudf/core/buffer/spillable_buffer.py b/python/cudf/cudf/core/buffer/spillable_buffer.py index c71841a5a26..169b52b828e 100644 --- a/python/cudf/cudf/core/buffer/spillable_buffer.py +++ b/python/cudf/cudf/core/buffer/spillable_buffer.py @@ -448,7 +448,9 @@ def __cuda_array_interface__(self) -> dict: "version": 0, } - def memoryview(self, *, offset: int = 0, size: int = None) -> memoryview: + def memoryview( + self, *, offset: int = 0, size: Optional[int] = None + ) -> memoryview: size = self._size if size is None else size with self.lock: if self.spillable: @@ -573,7 +575,9 @@ def deserialize(cls, header: dict, frames: list): # copied. return SpillableBuffer.deserialize(header, frames) - def memoryview(self, *, offset: int = 0, size: int = None) -> memoryview: + def memoryview( + self, *, offset: int = 0, size: Optional[int] = None + ) -> memoryview: size = self._size if size is None else size return self._base.memoryview(offset=self._offset + offset, size=size) diff --git a/python/cudf/cudf/core/buffer/utils.py b/python/cudf/cudf/core/buffer/utils.py index 2fe332a12fe..85e4762641e 100644 --- a/python/cudf/cudf/core/buffer/utils.py +++ b/python/cudf/cudf/core/buffer/utils.py @@ -16,8 +16,8 @@ def as_buffer( data: Union[int, Any], *, - size: int = None, - owner: object = None, + size: Optional[int] = None, + owner: Optional[object] = None, exposed: bool = False, ) -> Buffer: """Factory function to wrap `data` in a Buffer object. diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index 52f7c0b957f..c6d7f779884 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -710,10 +710,10 @@ class CategoricalColumn(column.ColumnBase): def __init__( self, dtype: CategoricalDtype, - mask: Buffer = None, - size: int = None, + mask: Optional[Buffer] = None, + size: Optional[int] = None, offset: int = 0, - null_count: int = None, + null_count: Optional[int] = None, children: Tuple["column.ColumnBase", ...] = (), ): @@ -889,7 +889,7 @@ def _fill( return result def slice( - self, start: int, stop: int, stride: int = None + self, start: int, stop: int, stride: Optional[int] = None ) -> "column.ColumnBase": codes = self.codes.slice(start, stop, stride) return cudf.core.column.build_categorical_column( @@ -962,7 +962,9 @@ def __cuda_array_interface__(self) -> Mapping[str, Any]: " if you need this functionality." ) - def to_pandas(self, index: pd.Index = None, **kwargs) -> pd.Series: + def to_pandas( + self, index: Optional[pd.Index] = None, **kwargs + ) -> pd.Series: if self.categories.dtype.kind == "f": new_mask = bools_to_mask(self.notnull()) col = column.build_categorical_column( @@ -1219,7 +1221,10 @@ def notnull(self) -> ColumnBase: return result def fillna( - self, fill_value: Any = None, method: Any = None, dtype: Dtype = None + self, + fill_value: Any = None, + method: Any = None, + dtype: Optional[Dtype] = None, ) -> CategoricalColumn: """ Fill null values with *fill_value* @@ -1237,7 +1242,7 @@ def fillna( try: fill_value = self._encode(fill_value) fill_value = self.codes.dtype.type(fill_value) - except (ValueError) as err: + except ValueError as err: err_msg = "fill value must be in categories" raise ValueError(err_msg) from err else: @@ -1641,7 +1646,7 @@ def _create_empty_categorical_column( def pandas_categorical_as_column( - categorical: ColumnLike, codes: ColumnLike = None + categorical: ColumnLike, codes: Optional[ColumnLike] = None ) -> CategoricalColumn: """Creates a CategoricalColumn from a pandas.Categorical diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 042a1060fae..6557001f884 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -201,7 +201,9 @@ def __repr__(self): f"dtype: {self.dtype}" ) - def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series": + def to_pandas( + self, index: Optional[pd.Index] = None, **kwargs + ) -> "pd.Series": """Convert object to pandas type. The default implementation falls back to PyArrow for the conversion. @@ -548,7 +550,9 @@ def element_indexing(self, index: int): return libcudf.copying.get_element(self, idx).value - def slice(self, start: int, stop: int, stride: int = None) -> ColumnBase: + def slice( + self, start: int, stop: int, stride: Optional[int] = None + ) -> ColumnBase: stride = 1 if stride is None else stride if start < 0: start = start + len(self) @@ -699,8 +703,8 @@ def _check_scatter_key_length( def fillna( self: T, value: Any = None, - method: str = None, - dtype: Dtype = None, + method: Optional[str] = None, + dtype: Optional[Dtype] = None, ) -> T: """Fill null values with ``value``. @@ -1097,7 +1101,6 @@ def apply_boolean_mask(self, mask) -> ColumnBase: def argsort( self, ascending: bool = True, na_position: str = "last" ) -> "cudf.core.column.NumericalColumn": - return self.as_frame()._get_sorted_inds( ascending=ascending, na_position=na_position ) @@ -1244,14 +1247,19 @@ def normalize_binop_value( ) -> Union[ColumnBase, ScalarLike]: raise NotImplementedError - def _minmax(self, skipna: bool = None): + def _minmax(self, skipna: Optional[bool] = None): result_col = self._process_for_reduction(skipna=skipna) if isinstance(result_col, ColumnBase): return libcudf.reduce.minmax(result_col) return result_col def _reduce( - self, op: str, skipna: bool = None, min_count: int = 0, *args, **kwargs + self, + op: str, + skipna: Optional[bool] = None, + min_count: int = 0, + *args, + **kwargs, ) -> ScalarLike: """Compute {op} of column values. @@ -1273,7 +1281,7 @@ def contains_na_entries(self) -> bool: return self.null_count != 0 def _process_for_reduction( - self, skipna: bool = None, min_count: int = 0 + self, skipna: Optional[bool] = None, min_count: int = 0 ) -> Union[ColumnBase, ScalarLike]: skipna = True if skipna is None else skipna @@ -1314,8 +1322,8 @@ def _with_type_metadata(self: ColumnBase, dtype: Dtype) -> ColumnBase: def _label_encoding( self, cats: ColumnBase, - dtype: Dtype = None, - na_sentinel: ScalarLike = None, + dtype: Optional[Dtype] = None, + na_sentinel: Optional[ScalarLike] = None, ): """ Convert each value in `self` into an integer code, with `cats` @@ -1389,9 +1397,9 @@ def _return_sentinel_column(): def column_empty_like( column: ColumnBase, - dtype: Dtype = None, + dtype: Optional[Dtype] = None, masked: bool = False, - newsize: int = None, + newsize: Optional[int] = None, ) -> ColumnBase: """Allocate a new column like the given *column*""" if dtype is None: @@ -1494,10 +1502,10 @@ def build_column( data: Union[Buffer, None], dtype: Dtype, *, - size: int = None, - mask: Buffer = None, + size: Optional[int] = None, + mask: Optional[Buffer] = None, offset: int = 0, - null_count: int = None, + null_count: Optional[int] = None, children: Tuple[ColumnBase, ...] = (), ) -> ColumnBase: """ @@ -1666,10 +1674,10 @@ def build_column( def build_categorical_column( categories: ColumnBase, codes: ColumnBase, - mask: Buffer = None, - size: int = None, + mask: Optional[Buffer] = None, + size: Optional[int] = None, offset: int = 0, - null_count: int = None, + null_count: Optional[int] = None, ordered: bool = False, ) -> "cudf.core.column.CategoricalColumn": """ @@ -1757,10 +1765,10 @@ def build_interval_column( def build_list_column( indices: ColumnBase, elements: ColumnBase, - mask: Buffer = None, - size: int = None, + mask: Optional[Buffer] = None, + size: Optional[int] = None, offset: int = 0, - null_count: int = None, + null_count: Optional[int] = None, ) -> "cudf.core.column.ListColumn": """ Build a ListColumn @@ -1803,10 +1811,10 @@ def build_struct_column( names: Sequence[str], children: Tuple[ColumnBase, ...], dtype: Optional[Dtype] = None, - mask: Buffer = None, - size: int = None, + mask: Optional[Buffer] = None, + size: Optional[int] = None, offset: int = 0, - null_count: int = None, + null_count: Optional[int] = None, ) -> "cudf.core.column.StructColumn": """ Build a StructColumn @@ -1863,9 +1871,9 @@ def _make_copy_replacing_NaT_with_null(column): def as_column( arbitrary: Any, - nan_as_null: bool = None, - dtype: Dtype = None, - length: int = None, + nan_as_null: Optional[bool] = None, + dtype: Optional[Dtype] = None, + length: Optional[int] = None, ): """Create a Column from an arbitrary object @@ -2106,7 +2114,6 @@ def as_column( data = build_column(data=buffer, mask=mask, dtype=arbitrary.dtype) elif arb_dtype.kind == "m": - time_unit = get_time_unit(arbitrary) cast_dtype = time_unit in ("D", "W", "M", "Y") @@ -2466,7 +2473,7 @@ def deserialize_columns(headers: List[dict], frames: List) -> List[ColumnBase]: def arange( start: Union[int, float], - stop: Union[int, float] = None, + stop: Optional[Union[int, float]] = None, step: Union[int, float] = 1, dtype=None, ) -> cudf.core.column.NumericalColumn: @@ -2524,7 +2531,9 @@ def arange( ) -def full(size: int, fill_value: ScalarLike, dtype: Dtype = None) -> ColumnBase: +def full( + size: int, fill_value: ScalarLike, dtype: Optional[Dtype] = None +) -> ColumnBase: """ Returns a column of given size and dtype, filled with a given value. diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 4c65a631adc..c0a2a6ac546 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -6,7 +6,7 @@ import locale import re from locale import nl_langinfo -from typing import Any, Mapping, Sequence, cast +from typing import Any, Mapping, Optional, Sequence, cast import numpy as np import pandas as pd @@ -125,10 +125,10 @@ def __init__( self, data: Buffer, dtype: DtypeObj, - mask: Buffer = None, - size: int = None, # TODO: make non-optional + mask: Optional[Buffer] = None, + size: Optional[int] = None, # TODO: make non-optional offset: int = 0, - null_count: int = None, + null_count: Optional[int] = None, ): dtype = cudf.dtype(dtype) @@ -202,7 +202,10 @@ def day_of_year(self) -> ColumnBase: return self.get_dt_field("day_of_year") def to_pandas( - self, index: pd.Index = None, nullable: bool = False, **kwargs + self, + index: Optional[pd.Index] = None, + nullable: bool = False, + **kwargs, ) -> "cudf.Series": # Workaround until following issue is fixed: # https://issues.apache.org/jira/browse/ARROW-9772 @@ -363,7 +366,7 @@ def mean( def std( self, - skipna: bool = None, + skipna: Optional[bool] = None, min_count: int = 0, dtype: Dtype = np.float64, ddof: int = 1, @@ -375,7 +378,7 @@ def std( * _unit_to_nanoseconds_conversion[self.time_unit], ) - def median(self, skipna: bool = None) -> pd.Timestamp: + def median(self, skipna: Optional[bool] = None) -> pd.Timestamp: return pd.Timestamp( self.as_numerical.median(skipna=skipna), unit=self.time_unit ) @@ -451,7 +454,10 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: return libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype) def fillna( - self, fill_value: Any = None, method: str = None, dtype: Dtype = None + self, + fill_value: Any = None, + method: Optional[str] = None, + dtype: Optional[Dtype] = None, ) -> DatetimeColumn: if fill_value is not None: if cudf.utils.utils._isnat(fill_value): @@ -495,7 +501,6 @@ def isin(self, values: Sequence) -> ColumnBase: def can_cast_safely(self, to_dtype: Dtype) -> bool: if np.issubdtype(to_dtype, np.datetime64): - to_res, _ = np.datetime_data(to_dtype) self_res, _ = np.datetime_data(self.dtype) @@ -542,10 +547,10 @@ def __init__( self, data: Buffer, dtype: pd.DatetimeTZDtype, - mask: Buffer = None, - size: int = None, + mask: Optional[Buffer] = None, + size: Optional[int] = None, offset: int = 0, - null_count: int = None, + null_count: Optional[int] = None, ): super().__init__( data=data, @@ -558,7 +563,10 @@ def __init__( self._dtype = dtype def to_pandas( - self, index: pd.Index = None, nullable: bool = False, **kwargs + self, + index: Optional[pd.Index] = None, + nullable: bool = False, + **kwargs, ) -> "cudf.Series": return self._local_time.to_pandas().dt.tz_localize( self.dtype.tz, ambiguous="NaT", nonexistent="NaT" diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 9fc7663ffca..420637c1924 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -2,7 +2,7 @@ import warnings from decimal import Decimal -from typing import Any, Sequence, Union, cast +from typing import Any, Optional, Sequence, Union, cast import cupy as cp import numpy as np @@ -103,7 +103,10 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str): return result def fillna( - self, value: Any = None, method: str = None, dtype: Dtype = None + self, + value: Any = None, + method: Optional[str] = None, + dtype: Optional[Dtype] = None, ): """Fill null values with ``value``. diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py index 657403a6082..1b9caa42ecf 100644 --- a/python/cudf/cudf/core/column/interval.py +++ b/python/cudf/cudf/core/column/interval.py @@ -1,4 +1,6 @@ -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2023, NVIDIA CORPORATION. +from typing import Optional + import pandas as pd import pyarrow as pa @@ -124,7 +126,9 @@ def as_interval_column(self, dtype, **kwargs): else: raise ValueError("dtype must be IntervalDtype") - def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series": + def to_pandas( + self, index: Optional[pd.Index] = None, **kwargs + ) -> "pd.Series": # Note: This does not handle null values in the interval column. # However, this exact sequence (calling __from_arrow__ on the output of # self.to_arrow) is currently the best known way to convert interval diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index 4eea64c00d3..0bb9f70f851 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -5,6 +5,7 @@ import numpy as np import pyarrow as pa +from typing_extensions import Self import cudf from cudf._lib.copying import segmented_gather @@ -261,7 +262,7 @@ def as_string_column( # Call libcudf to format the list column return format_list_column(lc, separators) - def _transform_leaves(self, func, *args, **kwargs): + def _transform_leaves(self, func, *args, **kwargs) -> Self: # return a new list column with the same nested structure # as ``self``, but with the leaf column transformed # by applying ``func`` to it diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 87e73d212ef..840858c4bdb 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -81,10 +81,10 @@ def __init__( self, data: Buffer, dtype: DtypeObj, - mask: Buffer = None, - size: int = None, # TODO: make this non-optional + mask: Optional[Buffer] = None, + size: Optional[int] = None, # TODO: make this non-optional offset: int = 0, - null_count: int = None, + null_count: Optional[int] = None, ): dtype = cudf.dtype(dtype) @@ -428,11 +428,11 @@ def _process_values_for_isin( return lhs, rhs - def _can_return_nan(self, skipna: bool = None) -> bool: + def _can_return_nan(self, skipna: Optional[bool] = None) -> bool: return not skipna and self.has_nulls(include_nan=True) def _process_for_reduction( - self, skipna: bool = None, min_count: int = 0 + self, skipna: Optional[bool] = None, min_count: int = 0 ) -> Union[NumericalColumn, ScalarLike]: skipna = True if skipna is None else skipna @@ -516,8 +516,8 @@ def find_and_replace( def fillna( self, fill_value: Any = None, - method: str = None, - dtype: Dtype = None, + method: Optional[str] = None, + dtype: Optional[Dtype] = None, fill_nan: bool = True, ) -> NumericalColumn: """ @@ -684,7 +684,6 @@ def can_cast_safely(self, to_dtype: DtypeObj) -> bool: ): return True else: - filled = self.fillna(0) return ( cudf.Series(filled).astype(to_dtype).astype(filled.dtype) @@ -720,7 +719,10 @@ def _with_type_metadata(self: ColumnBase, dtype: Dtype) -> ColumnBase: return self def to_pandas( - self, index: pd.Index = None, nullable: bool = False, **kwargs + self, + index: Optional[pd.Index] = None, + nullable: bool = False, + **kwargs, ) -> "pd.Series": if nullable and self.dtype in np_dtypes_to_pandas_dtypes: pandas_nullable_dtype = np_dtypes_to_pandas_dtypes[self.dtype] diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py index bb7711a3ead..08c2f7cc7b1 100644 --- a/python/cudf/cudf/core/column/numerical_base.py +++ b/python/cudf/cudf/core/column/numerical_base.py @@ -1,9 +1,9 @@ -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2023, NVIDIA CORPORATION. """Define an interface for columns that can perform numerical operations.""" from __future__ import annotations -from typing import cast +from typing import Optional, cast import numpy as np @@ -40,10 +40,10 @@ class NumericalBaseColumn(ColumnBase, Scannable): "cummax", } - def _can_return_nan(self, skipna: bool = None) -> bool: + def _can_return_nan(self, skipna: Optional[bool] = None) -> bool: return not skipna and self.has_nulls() - def kurtosis(self, skipna: bool = None) -> float: + def kurtosis(self, skipna: Optional[bool] = None) -> float: skipna = True if skipna is None else skipna if len(self) == 0 or self._can_return_nan(skipna=skipna): @@ -68,7 +68,7 @@ def kurtosis(self, skipna: bool = None) -> float: kurt = term_one_section_one * term_one_section_two - 3 * term_two return kurt - def skew(self, skipna: bool = None) -> ScalarLike: + def skew(self, skipna: Optional[bool] = None) -> ScalarLike: skipna = True if skipna is None else skipna if len(self) == 0 or self._can_return_nan(skipna=skipna): @@ -122,26 +122,39 @@ def quantile( ) return result - def mean(self, skipna: bool = None, min_count: int = 0, dtype=np.float64): + def mean( + self, + skipna: Optional[bool] = None, + min_count: int = 0, + dtype=np.float64, + ): return self._reduce( "mean", skipna=skipna, min_count=min_count, dtype=dtype ) def var( - self, skipna: bool = None, min_count: int = 0, dtype=np.float64, ddof=1 + self, + skipna: Optional[bool] = None, + min_count: int = 0, + dtype=np.float64, + ddof=1, ): return self._reduce( "var", skipna=skipna, min_count=min_count, dtype=dtype, ddof=ddof ) def std( - self, skipna: bool = None, min_count: int = 0, dtype=np.float64, ddof=1 + self, + skipna: Optional[bool] = None, + min_count: int = 0, + dtype=np.float64, + ddof=1, ): return self._reduce( "std", skipna=skipna, min_count=min_count, dtype=dtype, ddof=ddof ) - def median(self, skipna: bool = None) -> NumericalBaseColumn: + def median(self, skipna: Optional[bool] = None) -> NumericalBaseColumn: skipna = True if skipna is None else skipna if self._can_return_nan(skipna=skipna): diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 1a09fc0b985..8e83d0c72b6 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -259,12 +259,14 @@ def byte_count(self) -> SeriesOrIndex: ) @overload - def cat(self, sep: str = None, na_rep: str = None) -> str: + def cat( + self, sep: Optional[str] = None, na_rep: Optional[str] = None + ) -> str: ... @overload def cat( - self, others, sep: str = None, na_rep: str = None + self, others, sep: Optional[str] = None, na_rep: Optional[str] = None ) -> Union[SeriesOrIndex, "cudf.core.column.string.StringColumn"]: ... @@ -792,7 +794,7 @@ def contains( result_col = libstrings.contains_multiple(input_column, pat) return self._return_or_inplace(result_col) - def like(self, pat: str, esc: str = None) -> SeriesOrIndex: + def like(self, pat: str, esc: Optional[str] = None) -> SeriesOrIndex: """ Test if a like pattern matches a string of a Series or Index. @@ -1072,7 +1074,10 @@ def replace_with_backrefs(self, pat: str, repl: str) -> SeriesOrIndex: ) def slice( - self, start: int = None, stop: int = None, step: int = None + self, + start: Optional[int] = None, + stop: Optional[int] = None, + step: Optional[int] = None, ) -> SeriesOrIndex: """ Slice substrings from each element in the Series or Index. @@ -2047,7 +2052,7 @@ def istitle(self) -> SeriesOrIndex: return self._return_or_inplace(libstrings.is_title(self._column)) def filter_alphanum( - self, repl: str = None, keep: bool = True + self, repl: Optional[str] = None, keep: bool = True ) -> SeriesOrIndex: """ Remove non-alphanumeric characters from strings in this column. @@ -2133,7 +2138,10 @@ def slice_from( ) def slice_replace( - self, start: int = None, stop: int = None, repl: str = None + self, + start: Optional[int] = None, + stop: Optional[int] = None, + repl: Optional[str] = None, ) -> SeriesOrIndex: """ Replace the specified section of each string with a new string. @@ -2221,7 +2229,9 @@ def slice_replace( ), ) - def insert(self, start: int = 0, repl: str = None) -> SeriesOrIndex: + def insert( + self, start: int = 0, repl: Optional[str] = None + ) -> SeriesOrIndex: """ Insert the specified string into each string in the specified position. @@ -2401,10 +2411,10 @@ def get_json_object( def split( self, - pat: str = None, + pat: Optional[str] = None, n: int = -1, expand: bool = False, - regex: bool = None, + regex: Optional[bool] = None, ) -> SeriesOrIndex: """ Split strings around given separator/delimiter. @@ -2569,10 +2579,10 @@ def split( def rsplit( self, - pat: str = None, + pat: Optional[str] = None, n: int = -1, expand: bool = False, - regex: bool = None, + regex: Optional[bool] = None, ) -> SeriesOrIndex: """ Split strings around given separator/delimiter. @@ -3221,7 +3231,7 @@ def rjust(self, width: int, fillchar: str = " ") -> SeriesOrIndex: libstrings.rjust(self._column, width, fillchar) ) - def strip(self, to_strip: str = None) -> SeriesOrIndex: + def strip(self, to_strip: Optional[str] = None) -> SeriesOrIndex: r""" Remove leading and trailing characters. @@ -3280,7 +3290,7 @@ def strip(self, to_strip: str = None) -> SeriesOrIndex: libstrings.strip(self._column, cudf.Scalar(to_strip, "str")) ) - def lstrip(self, to_strip: str = None) -> SeriesOrIndex: + def lstrip(self, to_strip: Optional[str] = None) -> SeriesOrIndex: r""" Remove leading and trailing characters. @@ -3327,7 +3337,7 @@ def lstrip(self, to_strip: str = None) -> SeriesOrIndex: libstrings.lstrip(self._column, cudf.Scalar(to_strip, "str")) ) - def rstrip(self, to_strip: str = None) -> SeriesOrIndex: + def rstrip(self, to_strip: Optional[str] = None) -> SeriesOrIndex: r""" Remove leading and trailing characters. @@ -3980,7 +3990,9 @@ def removeprefix(self, prefix: str) -> SeriesOrIndex: ) return self._return_or_inplace(result) - def find(self, sub: str, start: int = 0, end: int = None) -> SeriesOrIndex: + def find( + self, sub: str, start: int = 0, end: Optional[int] = None + ) -> SeriesOrIndex: """ Return lowest indexes in each strings in the Series/Index where the substring is fully contained between ``[start:end]``. @@ -4036,7 +4048,7 @@ def find(self, sub: str, start: int = 0, end: int = None) -> SeriesOrIndex: return self._return_or_inplace(result_col) def rfind( - self, sub: str, start: int = 0, end: int = None + self, sub: str, start: int = 0, end: Optional[int] = None ) -> SeriesOrIndex: """ Return highest indexes in each strings in the Series/Index @@ -4097,7 +4109,7 @@ def rfind( return self._return_or_inplace(result_col) def index( - self, sub: str, start: int = 0, end: int = None + self, sub: str, start: int = 0, end: Optional[int] = None ) -> SeriesOrIndex: """ Return lowest indexes in each strings where the substring @@ -4159,7 +4171,7 @@ def index( return result def rindex( - self, sub: str, start: int = 0, end: int = None + self, sub: str, start: int = 0, end: Optional[int] = None ) -> SeriesOrIndex: """ Return highest indexes in each strings where the substring @@ -4426,7 +4438,7 @@ def translate(self, table: dict) -> SeriesOrIndex: ) def filter_characters( - self, table: dict, keep: bool = True, repl: str = None + self, table: dict, keep: bool = True, repl: Optional[str] = None ) -> SeriesOrIndex: """ Remove characters from each string using the character ranges @@ -4877,7 +4889,7 @@ def ngrams_tokenize( ) def replace_tokens( - self, targets, replacements, delimiter: str = None + self, targets, replacements, delimiter: Optional[str] = None ) -> SeriesOrIndex: """ The targets tokens are searched for within each string in the series @@ -4962,8 +4974,8 @@ def replace_tokens( def filter_tokens( self, min_token_length: int, - replacement: str = None, - delimiter: str = None, + replacement: Optional[str] = None, + delimiter: Optional[str] = None, ) -> SeriesOrIndex: """ Remove tokens from within each string in the series that are @@ -5351,10 +5363,10 @@ class StringColumn(column.ColumnBase): def __init__( self, - mask: Buffer = None, - size: int = None, # TODO: make non-optional + mask: Optional[Buffer] = None, + size: Optional[int] = None, # TODO: make non-optional offset: int = 0, - null_count: int = None, + null_count: Optional[int] = None, children: Tuple["column.ColumnBase", ...] = (), ): dtype = cudf.api.types.dtype("object") @@ -5484,8 +5496,8 @@ def to_arrow(self) -> pa.Array: def sum( self, - skipna: bool = None, - dtype: Dtype = None, + skipna: Optional[bool] = None, + dtype: Optional[Dtype] = None, min_count: int = 0, ): result_col = self._process_for_reduction( @@ -5616,7 +5628,10 @@ def values(self) -> cupy.ndarray: raise TypeError("String Arrays is not yet implemented in cudf") def to_pandas( - self, index: pd.Index = None, nullable: bool = False, **kwargs + self, + index: Optional[pd.Index] = None, + nullable: bool = False, + **kwargs, ) -> "pd.Series": if nullable: pandas_array = pd.StringDtype().__from_arrow__(self.to_arrow()) @@ -5686,8 +5701,8 @@ def find_and_replace( def fillna( self, fill_value: Any = None, - method: str = None, - dtype: Dtype = None, + method: Optional[str] = None, + dtype: Optional[Dtype] = None, ) -> StringColumn: if fill_value is not None: if not is_scalar(fill_value): @@ -5835,7 +5850,6 @@ def view(self, dtype) -> "cudf.core.column.ColumnBase": def _get_cols_list(parent_obj, others): - parent_index = ( parent_obj.index if isinstance(parent_obj, cudf.Series) else parent_obj ) diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py index 6838d711641..6306bd1f32d 100644 --- a/python/cudf/cudf/core/column/struct.py +++ b/python/cudf/cudf/core/column/struct.py @@ -2,6 +2,7 @@ from __future__ import annotations from functools import cached_property +from typing import Optional import pandas as pd import pyarrow as pa @@ -57,7 +58,9 @@ def to_arrow(self): pa_type, len(self), buffers, children=children ) - def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series": + def to_pandas( + self, index: Optional[pd.Index] = None, **kwargs + ) -> "pd.Series": # We cannot go via Arrow's `to_pandas` because of the following issue: # https://issues.apache.org/jira/browse/ARROW-12680 diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index e7979fa4d27..e1d913742ec 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -3,7 +3,7 @@ from __future__ import annotations import datetime -from typing import Any, Sequence, cast +from typing import Any, Optional, Sequence, cast import numpy as np import pandas as pd @@ -80,10 +80,10 @@ def __init__( self, data: Buffer, dtype: Dtype, - size: int = None, # TODO: make non-optional - mask: Buffer = None, + size: Optional[int] = None, # TODO: make non-optional + mask: Optional[Buffer] = None, offset: int = 0, - null_count: int = None, + null_count: Optional[int] = None, ): dtype = cudf.dtype(dtype) @@ -251,7 +251,10 @@ def time_unit(self) -> str: return self._time_unit def fillna( - self, fill_value: Any = None, method: str = None, dtype: Dtype = None + self, + fill_value: Any = None, + method: Optional[str] = None, + dtype: Optional[Dtype] = None, ) -> TimeDeltaColumn: if fill_value is not None: if cudf.utils.utils._isnat(fill_value): @@ -313,7 +316,7 @@ def mean(self, skipna=None, dtype: Dtype = np.float64) -> pd.Timedelta: unit=self.time_unit, ) - def median(self, skipna: bool = None) -> pd.Timedelta: + def median(self, skipna: Optional[bool] = None) -> pd.Timedelta: return pd.Timedelta( self.as_numerical.median(skipna=skipna), unit=self.time_unit ) @@ -340,9 +343,9 @@ def quantile( def sum( self, - skipna: bool = None, + skipna: Optional[bool] = None, min_count: int = 0, - dtype: Dtype = None, + dtype: Optional[Dtype] = None, ) -> pd.Timedelta: return pd.Timedelta( # Since sum isn't overridden in Numerical[Base]Column, mypy only @@ -356,7 +359,7 @@ def sum( def std( self, - skipna: bool = None, + skipna: Optional[bool] = None, min_count: int = 0, dtype: Dtype = np.float64, ddof: int = 1, diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py index 707eda3f5e6..832d5acf2de 100644 --- a/python/cudf/cudf/core/column_accessor.py +++ b/python/cudf/cudf/core/column_accessor.py @@ -101,7 +101,7 @@ class ColumnAccessor(abc.MutableMapping): def __init__( self, - data: Union[abc.MutableMapping, ColumnAccessor] = None, + data: Union[abc.MutableMapping, ColumnAccessor, None] = None, multiindex: bool = False, level_names=None, ): diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 2371dfc51cd..0fb82b8583c 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -1988,8 +1988,8 @@ def from_dict( cls, data: dict, orient: str = "columns", - dtype: Dtype = None, - columns: list = None, + dtype: Optional[Dtype] = None, + columns: Optional[list] = None, ) -> DataFrame: """ Construct DataFrame from dict of array-like or dicts. diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 0b6ea1fb646..f955c5dfbd6 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -1095,8 +1095,13 @@ def _normalize_aggs( columns = values._columns aggs_per_column = (aggs,) * len(columns) + # is_list_like performs type narrowing but type-checkers don't + # know it. One could add a TypeGuard annotation to + # is_list_like (see PEP647), but that is less useful than it + # seems because unlike the builtin narrowings it only performs + # narrowing in the positive case. normalized_aggs = [ - list(agg) if is_list_like(agg) else [agg] + list(agg) if is_list_like(agg) else [agg] # type: ignore for agg in aggs_per_column ] return column_names, columns, normalized_aggs diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 281290e1788..0ec06f8d81f 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -2601,7 +2601,6 @@ def __init__( copy=False, name=None, ): - if freq is not None: raise NotImplementedError("freq is not yet supported") diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 4a9bc89fa34..edabdb34435 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -101,7 +101,6 @@ def __init__( name=None, **kwargs, ): - if sortorder is not None: raise NotImplementedError("sortorder is not yet supported") if name is not None: @@ -811,7 +810,6 @@ def _get_valid_indices_by_tuple(self, index, row_tuple, max_length): @_cudf_nvtx_annotate def _index_and_downcast(self, result, index, index_key): - if isinstance(index_key, (numbers.Number, slice)): index_key = [index_key] if ( @@ -1069,7 +1067,6 @@ def _is_interval(self): @classmethod @_cudf_nvtx_annotate def _concat(cls, objs): - source_data = [o.to_frame(index=False) for o in objs] # TODO: Verify if this is really necessary or if we can rely on diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py index c4128621148..037ac9c378e 100644 --- a/python/cudf/cudf/core/single_column_frame.py +++ b/python/cudf/cudf/core/single_column_frame.py @@ -140,7 +140,6 @@ def to_numpy( return super().to_numpy(dtype, copy, na_value).flatten() def tolist(self): # noqa: D102 - raise TypeError( "cuDF does not support conversion to host memory " "via the `tolist()` method. Consider using " From 7575e8da54499990b51535a0f975acd02a493144 Mon Sep 17 00:00:00 2001 From: Nghia Truong <7416935+ttnghia@users.noreply.github.com> Date: Fri, 12 May 2023 15:37:56 -0700 Subject: [PATCH 10/16] Fix `contiguous_split` performance (#13342) This fixes a performance issue in `contiguous_split` that is due to `pack_metadata` not being implemented by an efficient way. In particular, the output bytes are copied from the internal buffer to the output buffer byte-by-byte, through `std::back_inserter`: ``` std::copy(metadata_begin, metadata_begin + (metadata.size() * sizeof(detail::serialized_column)), std::back_inserter(metadata_bytes)); ``` This was probably optimized somehow by the compiler, but recent refactors made some changes to the code and probably prevent such optimization. ### Benchmark Latest cudf commit: ``` ---------------------------------------------------------------------------------------------------------------------------------------------------- ContiguousSplit/6Gb512ColsNoValidity/6442450944/512/256/0/iterations:8/manual_time 46.1 ms 46.1 ms 8 bytes_per_second=260.086G/s ContiguousSplit/6Gb512ColsValidity/6442450944/512/256/1/iterations:8/manual_time 48.1 ms 48.0 ms 8 bytes_per_second=257.527G/s ContiguousSplit/6Gb10ColsNoValidity/6442450944/10/256/0/iterations:8/manual_time 27.4 ms 27.4 ms 8 bytes_per_second=438.188G/s ContiguousSplit/6Gb10ColsValidity/6442450944/10/256/1/iterations:8/manual_time 28.5 ms 28.5 ms 8 bytes_per_second=434.381G/s ContiguousSplit/4Gb512ColsNoValidity/4294967296/512/256/0/iterations:8/manual_time 34.5 ms 34.5 ms 8 bytes_per_second=231.825G/s ContiguousSplit/4Gb512ColsValidity/4294967296/512/256/1/iterations:8/manual_time 37.4 ms 37.4 ms 8 bytes_per_second=220.521G/s ContiguousSplit/4Gb10ColsNoValidity/4294967296/10/256/0/iterations:8/manual_time 18.9 ms 18.9 ms 8 bytes_per_second=422.259G/s ContiguousSplit/4Gb10ColsValidity/4294967296/10/256/1/iterations:8/manual_time 19.4 ms 19.4 ms 8 bytes_per_second=424.595G/s ContiguousSplit/4Gb4ColsNoSplits/1073741824/4/0/1/iterations:8/manual_time 4.35 ms 4.35 ms 8 bytes_per_second=474.47G/s ContiguousSplit/4Gb4ColsValidityNoSplits/1073741824/4/0/1/iterations:8/manual_time 4.35 ms 4.36 ms 8 bytes_per_second=473.665G/s ContiguousSplit/1Gb512ColsNoValidity/1073741824/512/256/0/iterations:8/manual_time 22.2 ms 22.2 ms 8 bytes_per_second=90.1502G/s ContiguousSplit/1Gb512ColsValidity/1073741824/512/256/1/iterations:8/manual_time 25.1 ms 25.1 ms 8 bytes_per_second=82.1379G/s ContiguousSplit/1Gb10ColsNoValidity/1073741824/10/256/0/iterations:8/manual_time 5.08 ms 5.08 ms 8 bytes_per_second=393.98G/s ContiguousSplit/1Gb10ColsValidity/1073741824/10/256/1/iterations:8/manual_time 5.28 ms 5.28 ms 8 bytes_per_second=390.85G/s ContiguousSplit/1Gb1ColNoSplits/1073741824/1/0/1/iterations:8/manual_time 4.34 ms 4.35 ms 8 bytes_per_second=474.715G/s ContiguousSplit/1Gb1ColValidityNoSplits/1073741824/1/0/1/iterations:8/manual_time 4.47 ms 4.47 ms 8 bytes_per_second=461.788G/s ContiguousSplitStrings/4Gb512ColsNoValidity/4294967296/512/256/0/iterations:8/manual_time 98.1 ms 98.0 ms 8 bytes_per_second=81.6345G/s ContiguousSplitStrings/4Gb512ColsValidity/4294967296/512/256/1/iterations:8/manual_time 89.5 ms 89.5 ms 8 bytes_per_second=90.843G/s ContiguousSplitStrings/4Gb10ColsNoValidity/4294967296/10/256/0/iterations:8/manual_time 28.9 ms 29.9 ms 8 bytes_per_second=290.261G/s ContiguousSplitStrings/4Gb10ColsValidity/4294967296/10/256/1/iterations:8/manual_time 20.4 ms 20.4 ms 8 bytes_per_second=417.033G/s ContiguousSplitStrings/4Gb4ColsNoSplits/1073741824/4/0/0/iterations:8/manual_time 6.70 ms 7.32 ms 8 bytes_per_second=335.9G/s ContiguousSplitStrings/4Gb4ColsValidityNoSplits/1073741824/4/0/1/iterations:8/manual_time 4.35 ms 4.36 ms 8 bytes_per_second=524.386G/s ContiguousSplitStrings/1Gb512ColsNoValidity/1073741824/512/256/0/iterations:8/manual_time 77.8 ms 77.8 ms 8 bytes_per_second=25.7184G/s ContiguousSplitStrings/1Gb512ColsValidity/1073741824/512/256/1/iterations:8/manual_time 79.2 ms 79.1 ms 8 bytes_per_second=25.6833G/s ContiguousSplitStrings/1Gb10ColsNoValidity/1073741824/10/256/0/iterations:8/manual_time 8.57 ms 8.81 ms 8 bytes_per_second=245.062G/s ContiguousSplitStrings/1Gb10ColsValidity/1073741824/10/256/1/iterations:8/manual_time 7.83 ms 6.15 ms 8 bytes_per_second=272.089G/s ContiguousSplitStrings/1Gb1ColNoSplits/1073741824/1/0/0/iterations:8/manual_time 6.66 ms 9.17 ms 8 bytes_per_second=450.551G/s ContiguousSplitStrings/1Gb1ColValidityNoSplits/1073741824/1/0/1/iterations:8/manual_time 4.41 ms 4.41 ms 8 bytes_per_second=687.88G/s ``` With this fix: ``` ---------------------------------------------------------------------------------------------------------------------------------------------------- ContiguousSplit/6Gb512ColsNoValidity/6442450944/512/256/0/iterations:8/manual_time 38.5 ms 38.4 ms 8 bytes_per_second=311.981G/s ContiguousSplit/6Gb512ColsValidity/6442450944/512/256/1/iterations:8/manual_time 42.8 ms 42.7 ms 8 bytes_per_second=289.289G/s ContiguousSplit/6Gb10ColsNoValidity/6442450944/10/256/0/iterations:8/manual_time 27.6 ms 27.5 ms 8 bytes_per_second=435.365G/s ContiguousSplit/6Gb10ColsValidity/6442450944/10/256/1/iterations:8/manual_time 28.4 ms 28.3 ms 8 bytes_per_second=436.145G/s ContiguousSplit/4Gb512ColsNoValidity/4294967296/512/256/0/iterations:8/manual_time 27.2 ms 27.2 ms 8 bytes_per_second=293.677G/s ContiguousSplit/4Gb512ColsValidity/4294967296/512/256/1/iterations:8/manual_time 29.9 ms 29.9 ms 8 bytes_per_second=276.137G/s ContiguousSplit/4Gb10ColsNoValidity/4294967296/10/256/0/iterations:8/manual_time 19.0 ms 19.0 ms 8 bytes_per_second=421.185G/s ContiguousSplit/4Gb10ColsValidity/4294967296/10/256/1/iterations:8/manual_time 19.1 ms 19.1 ms 8 bytes_per_second=431.306G/s ContiguousSplit/4Gb4ColsNoSplits/1073741824/4/0/1/iterations:8/manual_time 4.35 ms 4.35 ms 8 bytes_per_second=474.311G/s ContiguousSplit/4Gb4ColsValidityNoSplits/1073741824/4/0/1/iterations:8/manual_time 4.34 ms 4.35 ms 8 bytes_per_second=475.281G/s ContiguousSplit/1Gb512ColsNoValidity/1073741824/512/256/0/iterations:8/manual_time 14.6 ms 14.6 ms 8 bytes_per_second=137.131G/s ContiguousSplit/1Gb512ColsValidity/1073741824/512/256/1/iterations:8/manual_time 17.2 ms 17.2 ms 8 bytes_per_second=119.946G/s ContiguousSplit/1Gb10ColsNoValidity/1073741824/10/256/0/iterations:8/manual_time 4.89 ms 4.89 ms 8 bytes_per_second=409.281G/s ContiguousSplit/1Gb10ColsValidity/1073741824/10/256/1/iterations:8/manual_time 5.09 ms 5.10 ms 8 bytes_per_second=404.981G/s ContiguousSplit/1Gb1ColNoSplits/1073741824/1/0/1/iterations:8/manual_time 4.40 ms 4.41 ms 8 bytes_per_second=469.011G/s ContiguousSplit/1Gb1ColValidityNoSplits/1073741824/1/0/1/iterations:8/manual_time 4.40 ms 4.41 ms 8 bytes_per_second=468.577G/s ContiguousSplitStrings/4Gb512ColsNoValidity/4294967296/512/256/0/iterations:8/manual_time 76.0 ms 75.9 ms 8 bytes_per_second=105.396G/s ContiguousSplitStrings/4Gb512ColsValidity/4294967296/512/256/1/iterations:8/manual_time 70.6 ms 70.5 ms 8 bytes_per_second=115.205G/s ContiguousSplitStrings/4Gb10ColsNoValidity/4294967296/10/256/0/iterations:8/manual_time 28.6 ms 29.6 ms 8 bytes_per_second=293.253G/s ContiguousSplitStrings/4Gb10ColsValidity/4294967296/10/256/1/iterations:8/manual_time 19.0 ms 19.0 ms 8 bytes_per_second=448.676G/s ContiguousSplitStrings/4Gb4ColsNoSplits/1073741824/4/0/0/iterations:8/manual_time 6.69 ms 7.32 ms 8 bytes_per_second=336.342G/s ContiguousSplitStrings/4Gb4ColsValidityNoSplits/1073741824/4/0/1/iterations:8/manual_time 4.40 ms 4.39 ms 8 bytes_per_second=518.755G/s ContiguousSplitStrings/1Gb512ColsNoValidity/1073741824/512/256/0/iterations:8/manual_time 55.4 ms 55.4 ms 8 bytes_per_second=36.1167G/s ContiguousSplitStrings/1Gb512ColsValidity/1073741824/512/256/1/iterations:8/manual_time 57.0 ms 56.9 ms 8 bytes_per_second=35.6588G/s ContiguousSplitStrings/1Gb10ColsNoValidity/1073741824/10/256/0/iterations:8/manual_time 8.48 ms 8.73 ms 8 bytes_per_second=247.664G/s ContiguousSplitStrings/1Gb10ColsValidity/1073741824/10/256/1/iterations:8/manual_time 5.99 ms 6.00 ms 8 bytes_per_second=355.742G/s ContiguousSplitStrings/1Gb1ColNoSplits/1073741824/1/0/0/iterations:8/manual_time 6.69 ms 9.30 ms 8 bytes_per_second=448.359G/s ContiguousSplitStrings/1Gb1ColValidityNoSplits/1073741824/1/0/1/iterations:8/manual_time 4.33 ms 4.33 ms 8 bytes_per_second=700.639G/s ``` Authors: - Nghia Truong (https://github.com/ttnghia) Approvers: - Alessandro Bellina (https://github.com/abellina) - David Wendt (https://github.com/davidwendt) URL: https://github.com/rapidsai/cudf/pull/13342 --- cpp/include/cudf/detail/contiguous_split.hpp | 16 +++++- cpp/src/copying/contiguous_split.cu | 20 +++++--- cpp/src/copying/pack.cpp | 54 +++++++++++--------- 3 files changed, 59 insertions(+), 31 deletions(-) diff --git a/cpp/include/cudf/detail/contiguous_split.hpp b/cpp/include/cudf/detail/contiguous_split.hpp index 4c6d19739cf..d9a35470b7d 100644 --- a/cpp/include/cudf/detail/contiguous_split.hpp +++ b/cpp/include/cudf/detail/contiguous_split.hpp @@ -67,7 +67,7 @@ class metadata_builder { * @brief Destructor that will be implemented as default, required because metadata_builder_impl * is incomplete at this stage. */ - ~metadata_builder() = default; + ~metadata_builder(); /** * @brief Add a column to this metadata builder. @@ -105,9 +105,23 @@ class metadata_builder { */ std::vector build() const; + /** + * @brief Clear the internal buffer containing all added metadata. + */ + void clear(); + private: std::unique_ptr impl; }; +/** + * @copydoc pack_metadata + * @param builder The reusable builder object to create packed column metadata. + */ +std::vector pack_metadata(table_view const& table, + uint8_t const* contiguous_buffer, + size_t buffer_size, + metadata_builder& builder); + } // namespace detail } // namespace cudf diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu index 4c3b4eddb8d..e7ac424001c 100644 --- a/cpp/src/copying/contiguous_split.cu +++ b/cpp/src/copying/contiguous_split.cu @@ -1251,6 +1251,8 @@ std::vector contiguous_split(cudf::table_view const& input, std::vector cols; cols.reserve(num_root_columns); auto cur_dst_buf_info = h_dst_buf_info; + cudf::detail::metadata_builder meta_builder(num_root_columns); + for (std::size_t idx = 0; idx < num_partitions; idx++) { // traverse the buffers and build the columns. cur_dst_buf_info = build_output_columns( @@ -1258,14 +1260,18 @@ std::vector contiguous_split(cudf::table_view const& input, // pack the columns cudf::table_view t{cols}; - result.push_back(packed_table{ - t, - packed_columns{ - std::make_unique>(cudf::pack_metadata( - t, reinterpret_cast(out_buffers[idx].data()), out_buffers[idx].size())), - std::make_unique(std::move(out_buffers[idx]))}}); - cols.clear(); + + cudf::packed_columns packed_cols{ + std::make_unique>( + cudf::detail::pack_metadata(t, + reinterpret_cast(out_buffers[idx].data()), + out_buffers[idx].size(), + meta_builder)), + std::make_unique(std::move(out_buffers[idx]))}; + meta_builder.clear(); + + result.emplace_back(packed_table{std::move(t), std::move(packed_cols)}); } return result; } diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp index bac9aac1886..e4de4a43b68 100644 --- a/cpp/src/copying/pack.cpp +++ b/cpp/src/copying/pack.cpp @@ -35,6 +35,8 @@ namespace { * and unpack. */ struct serialized_column { + serialized_column() = default; + serialized_column(data_type _type, size_type _size, size_type _null_count, @@ -150,24 +152,22 @@ packed_columns pack(cudf::table_view const& input, return contig_split_result.empty() ? packed_columns{} : std::move(contig_split_result[0].data); } -template -std::vector pack_metadata(ColumnIter begin, - ColumnIter end, +std::vector pack_metadata(table_view const& table, uint8_t const* contiguous_buffer, - size_t buffer_size) + size_t buffer_size, + metadata_builder& builder) { - auto mb = metadata_builder(std::distance(begin, end)); - - std::for_each(begin, end, [&mb, &contiguous_buffer, &buffer_size](column_view const& col) { - build_column_metadata(mb, col, contiguous_buffer, buffer_size); - }); + std::for_each( + table.begin(), table.end(), [&builder, contiguous_buffer, buffer_size](column_view const& col) { + build_column_metadata(builder, col, contiguous_buffer, buffer_size); + }); - return mb.build(); + return builder.build(); } class metadata_builder_impl { public: - metadata_builder_impl() = default; + metadata_builder_impl(size_type const num_root_columns) { metadata.reserve(num_root_columns); } void add_column_info_to_meta(data_type const col_type, size_type const col_size, @@ -182,14 +182,16 @@ class metadata_builder_impl { std::vector build() const { - // convert to anonymous bytes - std::vector metadata_bytes; - auto const metadata_begin = reinterpret_cast(metadata.data()); - std::copy(metadata_begin, - metadata_begin + (metadata.size() * sizeof(detail::serialized_column)), - std::back_inserter(metadata_bytes)); - - return metadata_bytes; + auto output = std::vector(metadata.size() * sizeof(detail::serialized_column)); + std::memcpy(output.data(), metadata.data(), output.size()); + return output; + } + + void clear() + { + // Clear all, except the first metadata entry storing the number of top level columns that + // was added upon object construction. + metadata.resize(1); } private: @@ -228,13 +230,16 @@ table_view unpack(uint8_t const* metadata, uint8_t const* gpu_data) } metadata_builder::metadata_builder(size_type const num_root_columns) - : impl(std::make_unique()) + : impl(std::make_unique(num_root_columns + + 1 /*one more extra metadata entry as below*/)) { // first metadata entry is a stub indicating how many total (top level) columns // there are impl->add_column_info_to_meta(data_type{type_id::EMPTY}, num_root_columns, 0, -1, -1, 0); } +metadata_builder::~metadata_builder() = default; + void metadata_builder::add_column_info_to_meta(data_type const col_type, size_type const col_size, size_type const col_null_count, @@ -248,6 +253,8 @@ void metadata_builder::add_column_info_to_meta(data_type const col_type, std::vector metadata_builder::build() const { return impl->build(); } +void metadata_builder::clear() { return impl->clear(); } + } // namespace detail /** @@ -267,9 +274,10 @@ std::vector pack_metadata(table_view const& table, size_t buffer_size) { CUDF_FUNC_RANGE(); - return table.is_empty() - ? std::vector{} - : detail::pack_metadata(table.begin(), table.end(), contiguous_buffer, buffer_size); + if (table.is_empty()) { return std::vector{}; } + + auto builder = cudf::detail::metadata_builder(table.num_columns()); + return detail::pack_metadata(table, contiguous_buffer, buffer_size, builder); } /** From 03f08f5be0c71683e2f889f9e00f866256314ed2 Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Sat, 13 May 2023 13:18:45 +0530 Subject: [PATCH 11/16] Fix 64bit shift bug in avro reader (#13276) Replace 8 bit uint with 64bit so that the shift upto 64 happens correctly. (`cudf::io::avro::container::get_encoded()`) Authors: - Karthikeyan (https://github.com/karthikeyann) Approvers: - Nghia Truong (https://github.com/ttnghia) - Mark Harris (https://github.com/harrism) - Vukasin Milovanovic (https://github.com/vuule) URL: https://github.com/rapidsai/cudf/pull/13276 --- cpp/src/io/avro/avro.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/src/io/avro/avro.cpp b/cpp/src/io/avro/avro.cpp index 6312fbf93a9..3c8524588af 100644 --- a/cpp/src/io/avro/avro.cpp +++ b/cpp/src/io/avro/avro.cpp @@ -27,8 +27,9 @@ template <> uint64_t container::get_encoded() { uint64_t val = 0; - for (uint64_t len = 0; len < 64; len += 7) { - auto const byte = get_raw(); + for (auto len = 0; len < 64; len += 7) { + // 64-bit int since shift left is upto 64. + uint64_t const byte = get_raw(); val |= (byte & 0x7f) << len; if (byte < 0x80) break; } From 40e4fbe14a4d3b2f42563c06bd978b1948b7923a Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Mon, 15 May 2023 09:31:21 +0100 Subject: [PATCH 12/16] Expunge most uses of `TypeVar(bound="Foo")` (#13346) With #13336, we can use the PEP674 `Self` type. Authors: - Lawrence Mitchell (https://github.com/wence-) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/13346 --- python/cudf/cudf/_lib/column.pyi | 9 ++-- python/cudf/cudf/core/_base_index.py | 10 ++-- python/cudf/cudf/core/buffer/buffer.py | 11 ++--- python/cudf/cudf/core/buffer/cow_buffer.py | 12 ++--- .../cudf/cudf/core/buffer/spillable_buffer.py | 21 ++------ python/cudf/cudf/core/column/categorical.py | 29 ++++++----- python/cudf/cudf/core/column/column.py | 48 +++++++++---------- python/cudf/cudf/core/column/interval.py | 3 +- python/cudf/cudf/core/column/numerical.py | 2 +- python/cudf/cudf/core/column/string.py | 2 +- python/cudf/cudf/core/column/struct.py | 2 +- python/cudf/cudf/core/dataframe.py | 7 +-- python/cudf/cudf/core/frame.py | 15 +++--- python/cudf/cudf/core/index.py | 5 +- python/cudf/cudf/core/indexed_frame.py | 21 ++++---- python/cudf/cudf/core/single_column_frame.py | 4 +- python/cudf/cudf/core/tools/datetimes.py | 14 +++--- 17 files changed, 93 insertions(+), 122 deletions(-) diff --git a/python/cudf/cudf/_lib/column.pyi b/python/cudf/cudf/_lib/column.pyi index bd53801a972..93edef311db 100644 --- a/python/cudf/cudf/_lib/column.pyi +++ b/python/cudf/cudf/_lib/column.pyi @@ -4,12 +4,12 @@ from __future__ import annotations from typing import Dict, Optional, Tuple, TypeVar +from typing_extensions import Self + from cudf._typing import Dtype, DtypeObj, ScalarLike from cudf.core.buffer import Buffer from cudf.core.column import ColumnBase -T = TypeVar("T") - class Column: _data: Optional[Buffer] _mask: Optional[Buffer] @@ -56,7 +56,7 @@ class Column: @property def mask_ptr(self) -> int: ... def set_base_mask(self, value: Optional[Buffer]) -> None: ... - def set_mask(self: T, value: Optional[Buffer]) -> T: ... + def set_mask(self, value: Optional[Buffer]) -> Self: ... @property def null_count(self) -> int: ... @property @@ -68,7 +68,8 @@ class Column: def set_base_children(self, value: Tuple[ColumnBase, ...]) -> None: ... def _mimic_inplace( self, other_col: ColumnBase, inplace=False - ) -> Optional[ColumnBase]: ... + ) -> Optional[Self]: ... + # TODO: The val parameter should be Scalar, not ScalarLike @staticmethod def from_scalar(val: ScalarLike, size: int) -> ColumnBase: ... diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py index 68c52a8b9e8..a2e3bc44f3a 100644 --- a/python/cudf/cudf/core/_base_index.py +++ b/python/cudf/cudf/core/_base_index.py @@ -5,9 +5,10 @@ import pickle import warnings from functools import cached_property -from typing import Any, Set, TypeVar +from typing import Any, Set import pandas as pd +from typing_extensions import Self import cudf from cudf._lib.copying import _gather_map_is_valid, gather @@ -62,8 +63,6 @@ Float64Index([1.0, 2.0, 3.0], dtype='float64') """ -BaseIndexT = TypeVar("BaseIndexT", bound="BaseIndex") - class BaseIndex(Serializable): """Base class for all cudf Index types.""" @@ -101,8 +100,8 @@ def __contains__(self, item): return item in self._values def _copy_type_metadata( - self: BaseIndexT, other: BaseIndexT, *, override_dtypes=None - ) -> BaseIndexT: + self, other: Self, *, override_dtypes=None + ) -> Self: raise NotImplementedError def get_level_values(self, level): @@ -1451,7 +1450,6 @@ def get_slice_bound(self, label, side, kind=None): raise NotImplementedError def __array_function__(self, func, types, args, kwargs): - # check if the function is implemented for the current type cudf_index_module = type(self) for submodule in func.__module__.split(".")[1:]: diff --git a/python/cudf/cudf/core/buffer/buffer.py b/python/cudf/cudf/core/buffer/buffer.py index abf1ec47e3d..97f3b16bec8 100644 --- a/python/cudf/cudf/core/buffer/buffer.py +++ b/python/cudf/cudf/core/buffer/buffer.py @@ -5,9 +5,10 @@ import math import pickle from types import SimpleNamespace -from typing import Any, Dict, Mapping, Optional, Sequence, Tuple, Type, TypeVar +from typing import Any, Dict, Mapping, Optional, Sequence, Tuple import numpy +from typing_extensions import Self import rmm @@ -15,8 +16,6 @@ from cudf.core.abc import Serializable from cudf.utils.string import format_bytes -T = TypeVar("T", bound="Buffer") - def host_memory_allocation(nbytes: int) -> memoryview: """Allocate host memory using NumPy @@ -108,7 +107,7 @@ def __init__(self): ) @classmethod - def _from_device_memory(cls: Type[T], data: Any) -> T: + def _from_device_memory(cls, data: Any) -> Self: """Create a Buffer from an object exposing `__cuda_array_interface__`. No data is being copied. @@ -139,7 +138,7 @@ def _from_device_memory(cls: Type[T], data: Any) -> T: return ret @classmethod - def _from_host_memory(cls: Type[T], data: Any) -> T: + def _from_host_memory(cls, data: Any) -> Self: """Create a Buffer from a buffer or array like object Data must implement `__array_interface__`, the buffer protocol, and/or @@ -310,7 +309,7 @@ def serialize(self) -> Tuple[dict, list]: return header, frames @classmethod - def deserialize(cls: Type[T], header: dict, frames: list) -> T: + def deserialize(cls, header: dict, frames: list) -> Self: """Create an Buffer from a serialized representation. Parameters diff --git a/python/cudf/cudf/core/buffer/cow_buffer.py b/python/cudf/cudf/core/buffer/cow_buffer.py index 16a1e3942e7..6243916b91b 100644 --- a/python/cudf/cudf/core/buffer/cow_buffer.py +++ b/python/cudf/cudf/core/buffer/cow_buffer.py @@ -4,15 +4,15 @@ import weakref from collections import defaultdict -from typing import Any, DefaultDict, Tuple, Type, TypeVar +from typing import Any, DefaultDict, Tuple from weakref import WeakSet +from typing_extensions import Self + import rmm from cudf.core.buffer.buffer import Buffer -T = TypeVar("T", bound="CopyOnWriteBuffer") - def _keys_cleanup(ptr): weak_set_values = CopyOnWriteBuffer._instances[ptr] @@ -55,9 +55,7 @@ def _finalize_init(self): weakref.finalize(self, _keys_cleanup, self._ptr) @classmethod - def _from_device_memory( - cls: Type[T], data: Any, *, exposed: bool = False - ) -> T: + def _from_device_memory(cls, data: Any, *, exposed: bool = False) -> Self: """Create a Buffer from an object exposing `__cuda_array_interface__`. No data is being copied. @@ -82,7 +80,7 @@ def _from_device_memory( return ret @classmethod - def _from_host_memory(cls: Type[T], data: Any) -> T: + def _from_host_memory(cls, data: Any) -> Self: ret = super()._from_host_memory(data) ret._finalize_init() return ret diff --git a/python/cudf/cudf/core/buffer/spillable_buffer.py b/python/cudf/cudf/core/buffer/spillable_buffer.py index 169b52b828e..c1bc49c7a9e 100644 --- a/python/cudf/cudf/core/buffer/spillable_buffer.py +++ b/python/cudf/cudf/core/buffer/spillable_buffer.py @@ -7,18 +7,10 @@ import time import weakref from threading import RLock -from typing import ( - TYPE_CHECKING, - Any, - Dict, - List, - Optional, - Tuple, - Type, - TypeVar, -) +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple import numpy +from typing_extensions import Self import rmm @@ -34,9 +26,6 @@ from cudf.core.buffer.spill_manager import SpillManager -T = TypeVar("T", bound="SpillableBuffer") - - def get_spillable_owner(data) -> Optional[SpillableBuffer]: """Get the spillable owner of `data`, if any exist @@ -212,9 +201,7 @@ def _finalize_init(self, ptr_desc: Dict[str, Any], exposed: bool) -> None: self._manager.add(self) @classmethod - def _from_device_memory( - cls: Type[T], data: Any, *, exposed: bool = False - ) -> T: + def _from_device_memory(cls, data: Any, *, exposed: bool = False) -> Self: """Create a spillabe buffer from device memory. No data is being copied. @@ -236,7 +223,7 @@ def _from_device_memory( return ret @classmethod - def _from_host_memory(cls: Type[T], data: Any) -> T: + def _from_host_memory(cls, data: Any) -> Self: """Create a spillabe buffer from host memory. Data must implement `__array_interface__`, the buffer protocol, and/or diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index c6d7f779884..39332807139 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -11,6 +11,7 @@ import pandas as pd import pyarrow as pa from numba import cuda +from typing_extensions import Self import cudf from cudf import _lib as libcudf @@ -716,7 +717,6 @@ def __init__( null_count: Optional[int] = None, children: Tuple["column.ColumnBase", ...] = (), ): - if size is None: for child in children: assert child.offset == 0 @@ -874,7 +874,7 @@ def _fill( begin: int, end: int, inplace: bool = False, - ) -> "column.ColumnBase": + ) -> Self: if end <= begin or begin >= self.size: return self if inplace else self.copy() @@ -890,17 +890,20 @@ def _fill( def slice( self, start: int, stop: int, stride: Optional[int] = None - ) -> "column.ColumnBase": + ) -> Self: codes = self.codes.slice(start, stop, stride) - return cudf.core.column.build_categorical_column( - categories=self.categories, - codes=cudf.core.column.build_column( - codes.base_data, dtype=codes.dtype + return cast( + Self, + cudf.core.column.build_categorical_column( + categories=self.categories, + codes=cudf.core.column.build_column( + codes.base_data, dtype=codes.dtype + ), + mask=codes.base_mask, + ordered=self.ordered, + size=codes.size, + offset=codes.offset, ), - mask=codes.base_mask, - ordered=self.ordered, - size=codes.size, - offset=codes.offset, ) def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: @@ -1356,7 +1359,7 @@ def _get_decategorized_column(self) -> ColumnBase: out = out.set_mask(self.mask) return out - def copy(self, deep: bool = True) -> CategoricalColumn: + def copy(self, deep: bool = True) -> Self: result_col = super().copy(deep=deep) if deep: result_col.categories = libcudf.copying.copy_column( @@ -1370,7 +1373,7 @@ def memory_usage(self) -> int: def _mimic_inplace( self, other_col: ColumnBase, inplace: bool = False - ) -> Optional[ColumnBase]: + ) -> Optional[Self]: out = super()._mimic_inplace(other_col, inplace=inplace) if inplace and isinstance(other_col, CategoricalColumn): self._codes = other_col._codes diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 6557001f884..607bf83ff6c 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -2,6 +2,7 @@ from __future__ import annotations +import builtins import pickle import warnings from functools import cached_property @@ -15,7 +16,6 @@ Optional, Sequence, Tuple, - TypeVar, Union, cast, ) @@ -25,6 +25,7 @@ import pandas as pd import pyarrow as pa from numba import cuda +from typing_extensions import Self import rmm @@ -97,11 +98,6 @@ else: from pandas.core.arrays._arrow_utils import ArrowIntervalType -T = TypeVar("T", bound="ColumnBase") -# TODO: This workaround allows type hints for `slice`, since `slice` is a -# method in ColumnBase. -Slice = TypeVar("Slice", bound=slice) - class ColumnBase(Column, Serializable, BinaryOperand, Reducible): _VALID_REDUCTIONS = { @@ -203,7 +199,7 @@ def __repr__(self): def to_pandas( self, index: Optional[pd.Index] = None, **kwargs - ) -> "pd.Series": + ) -> pd.Series: """Convert object to pandas type. The default implementation falls back to PyArrow for the conversion. @@ -246,11 +242,11 @@ def values(self) -> "cupy.ndarray": return cupy.asarray(self.data_array_view(mode="write")) def find_and_replace( - self: T, + self, to_replace: ColumnLike, replacement: ColumnLike, all_nan: bool = False, - ) -> T: + ) -> Self: raise NotImplementedError def clip(self, lo: ScalarLike, hi: ScalarLike) -> ColumnBase: @@ -399,7 +395,7 @@ def _fill( begin: int, end: int, inplace: bool = False, - ) -> Optional[ColumnBase]: + ) -> Optional[Self]: if end <= begin or begin >= self.size: return self if inplace else self.copy() @@ -439,15 +435,15 @@ def nullmask(self) -> Buffer: raise ValueError("Column has no null mask") return self.mask_array_view(mode="read") - def force_deep_copy(self: T) -> T: + def force_deep_copy(self) -> Self: """ A method to create deep copy irrespective of whether `copy-on-write` is enabled. """ result = libcudf.copying.copy_column(self) - return cast(T, result._with_type_metadata(self.dtype)) + return result._with_type_metadata(self.dtype) - def copy(self: T, deep: bool = True) -> T: + def copy(self, deep: bool = True) -> Self: """ Makes a copy of the Column. @@ -469,7 +465,7 @@ def copy(self: T, deep: bool = True) -> T: return self.force_deep_copy() else: return cast( - T, + Self, build_column( data=self.base_data if self.base_data is None @@ -611,8 +607,10 @@ def _wrap_binop_normalization(self, other): return self.normalize_binop_value(other) def _scatter_by_slice( - self, key: Slice, value: Union[cudf.core.scalar.Scalar, ColumnBase] - ) -> Optional[ColumnBase]: + self, + key: builtins.slice, + value: Union[cudf.core.scalar.Scalar, ColumnBase], + ) -> Optional[Self]: """If this function returns None, it's either a no-op (slice is empty), or the inplace replacement is already performed (fill-in-place). """ @@ -650,7 +648,7 @@ def _scatter_by_column( self, key: cudf.core.column.NumericalColumn, value: Union[cudf.core.scalar.Scalar, ColumnBase], - ) -> ColumnBase: + ) -> Self: if is_bool_dtype(key.dtype): # `key` is boolean mask if len(key) != len(self): @@ -701,11 +699,11 @@ def _check_scatter_key_length( raise ValueError(msg) def fillna( - self: T, + self, value: Any = None, method: Optional[str] = None, dtype: Optional[Dtype] = None, - ) -> T: + ) -> Self: """Fill null values with ``value``. Returns a copy with null filled. @@ -773,8 +771,8 @@ def quantile( raise TypeError(f"cannot perform quantile with type {self.dtype}") def take( - self: T, indices: ColumnBase, nullify: bool = False, check_bounds=True - ) -> T: + self, indices: ColumnBase, nullify: bool = False, check_bounds=True + ) -> Self: """Return Column by taking values from the corresponding *indices*. Skip bounds checking if check_bounds is False. @@ -782,7 +780,7 @@ def take( """ # Handle zero size if indices.size == 0: - return cast(T, column_empty_like(self, newsize=0)) + return cast(Self, column_empty_like(self, newsize=0)) # TODO: For performance, the check and conversion of gather map should # be done by the caller. This check will be removed in future release. @@ -1411,8 +1409,10 @@ def column_empty_like( and is_categorical_dtype(column.dtype) and dtype == column.dtype ): - column = cast("cudf.core.column.CategoricalColumn", column) - codes = column_empty_like(column.codes, masked=masked, newsize=newsize) + catcolumn = cast("cudf.core.column.CategoricalColumn", column) + codes = column_empty_like( + catcolumn.codes, masked=masked, newsize=newsize + ) return build_column( data=None, dtype=dtype, diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py index 1b9caa42ecf..38384d09126 100644 --- a/python/cudf/cudf/core/column/interval.py +++ b/python/cudf/cudf/core/column/interval.py @@ -21,7 +21,6 @@ def __init__( children=(), closed="right", ): - super().__init__( data=None, dtype=dtype, @@ -128,7 +127,7 @@ def as_interval_column(self, dtype, **kwargs): def to_pandas( self, index: Optional[pd.Index] = None, **kwargs - ) -> "pd.Series": + ) -> pd.Series: # Note: This does not handle null values in the interval column. # However, this exact sequence (calling __from_arrow__ on the output of # self.to_arrow) is currently the best known way to convert interval diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 840858c4bdb..39798320f88 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -723,7 +723,7 @@ def to_pandas( index: Optional[pd.Index] = None, nullable: bool = False, **kwargs, - ) -> "pd.Series": + ) -> pd.Series: if nullable and self.dtype in np_dtypes_to_pandas_dtypes: pandas_nullable_dtype = np_dtypes_to_pandas_dtypes[self.dtype] arrow_array = self.to_arrow() diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 8e83d0c72b6..a3163f1cebe 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5632,7 +5632,7 @@ def to_pandas( index: Optional[pd.Index] = None, nullable: bool = False, **kwargs, - ) -> "pd.Series": + ) -> pd.Series: if nullable: pandas_array = pd.StringDtype().__from_arrow__(self.to_arrow()) pd_series = pd.Series(pandas_array, copy=False) diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py index 6306bd1f32d..8558faa7f24 100644 --- a/python/cudf/cudf/core/column/struct.py +++ b/python/cudf/cudf/core/column/struct.py @@ -60,7 +60,7 @@ def to_arrow(self): def to_pandas( self, index: Optional[pd.Index] = None, **kwargs - ) -> "pd.Series": + ) -> pd.Series: # We cannot go via Arrow's `to_pandas` because of the following issue: # https://issues.apache.org/jira/browse/ARROW-12680 diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 0fb82b8583c..2aa370ac8e5 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -22,7 +22,6 @@ Optional, Set, Tuple, - TypeVar, Union, ) @@ -37,6 +36,7 @@ from pandas.core.dtypes.common import is_float, is_integer from pandas.io.formats import console from pandas.io.formats.printing import pprint_thing +from typing_extensions import Self import cudf import cudf.core.common @@ -105,9 +105,6 @@ _external_only_api, ) -T = TypeVar("T", bound="DataFrame") - - _cupy_nan_methods_map = { "min": "nanmin", "max": "nanmax", @@ -1306,7 +1303,7 @@ def __delitem__(self, name): self._drop_column(name) @_cudf_nvtx_annotate - def _slice(self: T, arg: slice) -> T: + def _slice(self, arg: slice) -> Self: """ _slice : slice the frame as per the arg diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 230a5054a00..26b1ee971da 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -16,7 +16,6 @@ MutableMapping, Optional, Tuple, - TypeVar, Union, ) @@ -24,6 +23,7 @@ import numpy as np import pandas as pd import pyarrow as pa +from typing_extensions import Self import cudf from cudf import _lib as libcudf @@ -45,8 +45,6 @@ from cudf.utils.dtypes import find_common_type from cudf.utils.utils import _array_ufunc, _cudf_nvtx_annotate -T = TypeVar("T", bound="Frame") - # TODO: It looks like Frame is missing a declaration of `copy`, need to add class Frame(BinaryOperand, Scannable): @@ -146,8 +144,8 @@ def _from_columns_like_self( return frame._copy_type_metadata(self, override_dtypes=override_dtypes) def _mimic_inplace( - self: T, result: T, inplace: bool = False - ) -> Optional[Frame]: + self, result: Self, inplace: bool = False + ) -> Optional[Self]: if inplace: for col in self._data: if col in result._data: @@ -1069,7 +1067,6 @@ def from_arrow(cls, data): # Handle dict arrays cudf_category_frame = {} if len(dict_indices): - dict_indices_table = pa.table(dict_indices) data = data.drop(dict_indices_table.column_names) indices_columns = libcudf.interop.from_arrow(dict_indices_table) @@ -1191,11 +1188,11 @@ def _positions_from_column_names(self, column_names): ] def _copy_type_metadata( - self: T, - other: T, + self, + other: Self, *, override_dtypes: Optional[abc.Iterable[Optional[Dtype]]] = None, - ) -> T: + ) -> Self: """ Copy type metadata from each column of `other` to the corresponding column of `self`. diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 0ec06f8d81f..f5dc8298bd2 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -15,7 +15,6 @@ Optional, Tuple, Type, - TypeVar, Union, ) @@ -65,8 +64,6 @@ ) from cudf.utils.utils import _cudf_nvtx_annotate, search_range -T = TypeVar("T", bound="Frame") - def _lexsorted_equal_range( idx: Union[GenericIndex, cudf.MultiIndex], @@ -1048,7 +1045,7 @@ def _from_data( def _binaryop( self, - other: T, + other: Frame, op: str, fill_value: Any = None, *args, diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 95931af038c..3c4d8d84c34 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -27,6 +27,7 @@ import cupy as cp import numpy as np import pandas as pd +from typing_extensions import Self import cudf import cudf._lib as libcudf @@ -224,8 +225,6 @@ def __init__(self, frame): _LocIndexerClass = TypeVar("_LocIndexerClass", bound="_FrameIndexer") _IlocIndexerClass = TypeVar("_IlocIndexerClass", bound="_FrameIndexer") -T = TypeVar("T", bound="IndexedFrame") - class IndexedFrame(Frame): """A frame containing an index. @@ -357,8 +356,8 @@ def _from_columns_like_self( ) def _mimic_inplace( - self: T, result: T, inplace: bool = False - ) -> Optional[Frame]: + self, result: Self, inplace: bool = False + ) -> Optional[Self]: if inplace: self._index = result._index return super()._mimic_inplace(result, inplace) @@ -441,7 +440,7 @@ def _scan(self, op, axis=None, skipna=True): results[name] = getattr(result_col, op)() return self._from_data(results, self._index) - def _check_data_index_length_match(self: T) -> None: + def _check_data_index_length_match(self) -> None: # Validate that the number of rows in the data matches the index if the # data is not empty. This is a helper for the constructor. if self._data.nrows > 0 and self._data.nrows != len(self._index): @@ -450,7 +449,7 @@ def _check_data_index_length_match(self: T) -> None: f"match length of index ({len(self._index)})" ) - def copy(self: T, deep: bool = True) -> T: + def copy(self, deep: bool = True) -> Self: """Make a copy of this object's indices and data. When ``deep=True`` (default), a new object will be created with a @@ -918,12 +917,12 @@ def clip(self, lower=None, upper=None, inplace=False, axis=1): return self._mimic_inplace(output, inplace=inplace) def _copy_type_metadata( - self: T, - other: T, + self, + other: Self, include_index: bool = True, *, override_dtypes: Optional[abc.Iterable[Optional[Dtype]]] = None, - ) -> T: + ) -> Self: """ Copy type metadata from each column of `other` to the corresponding column of `self`. @@ -2322,12 +2321,12 @@ def _n_largest_or_smallest(self, largest, n, columns, keep): raise ValueError('keep must be either "first", "last"') def _align_to_index( - self: T, + self, index: ColumnLike, how: str = "outer", sort: bool = True, allow_non_unique: bool = False, - ) -> T: + ) -> Self: index = cudf.core.index.as_index(index) if self.index.equals(index): diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py index 037ac9c378e..5f07c57fc80 100644 --- a/python/cudf/cudf/core/single_column_frame.py +++ b/python/cudf/cudf/core/single_column_frame.py @@ -4,7 +4,7 @@ from __future__ import annotations import warnings -from typing import Any, Dict, Optional, Tuple, TypeVar, Union +from typing import Any, Dict, Optional, Tuple, Union import cupy import numpy as np @@ -20,8 +20,6 @@ from cudf.core.frame import Frame from cudf.utils.utils import NotIterable, _cudf_nvtx_annotate -T = TypeVar("T", bound="Frame") - class SingleColumnFrame(Frame, NotIterable): """A one-dimensional frame. diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py index 92ef49e92d9..0ee9f511061 100644 --- a/python/cudf/cudf/core/tools/datetimes.py +++ b/python/cudf/cudf/core/tools/datetimes.py @@ -1,15 +1,16 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. import math import re import warnings -from typing import Sequence, Type, TypeVar, Union +from typing import Sequence, Union import cupy as cp import numpy as np import pandas as pd import pandas.tseries.offsets as pd_offset from pandas.core.tools.datetimes import _unit_map +from typing_extensions import Self import cudf from cudf import _lib as libcudf @@ -373,9 +374,6 @@ def get_units(value): return value -_T = TypeVar("_T", bound="DateOffset") - - class DateOffset: """ An object used for binary ops where calendrical arithmetic @@ -647,7 +645,7 @@ def __repr__(self): return repr_str @classmethod - def _from_freqstr(cls: Type[_T], freqstr: str) -> _T: + def _from_freqstr(cls, freqstr: str) -> Self: """ Parse a string and return a DateOffset object expects strings of the form 3D, 25W, 10ms, 42ns, etc. @@ -669,9 +667,9 @@ def _from_freqstr(cls: Type[_T], freqstr: str) -> _T: @classmethod def _from_pandas_ticks_or_weeks( - cls: Type[_T], + cls, tick: Union[pd.tseries.offsets.Tick, pd.tseries.offsets.Week], - ) -> _T: + ) -> Self: return cls(**{cls._TICK_OR_WEEK_TO_UNITS[type(tick)]: tick.n}) def _maybe_as_fast_pandas_offset(self): From 79c01169b4b0bc5b002b8b9b4c57d9e0f6e748e5 Mon Sep 17 00:00:00 2001 From: "Richard (Rick) Zamora" Date: Mon, 15 May 2023 03:45:51 -0500 Subject: [PATCH 13/16] Move `meta` calculation in `dask_cudf.read_parquet` (#13327) Implements `ArrowDatasetEngine._create_dd_meta` so that a cudf-backed `meta` object is created before the full "partitioning plan" is generated. This is important in the new dask expression project (dask-expr), where we need accurate metadata **before** the full partitioning plan is generated. Authors: - Richard (Rick) Zamora (https://github.com/rjzamora) Approvers: - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/cudf/pull/13327 --- python/dask_cudf/dask_cudf/io/parquet.py | 33 +++++++++++++----------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py index f19c373150d..67fc7215fc7 100644 --- a/python/dask_cudf/dask_cudf/io/parquet.py +++ b/python/dask_cudf/dask_cudf/io/parquet.py @@ -30,28 +30,31 @@ class CudfEngine(ArrowDatasetEngine): - @staticmethod - def read_metadata(*args, **kwargs): - meta, stats, parts, index = ArrowDatasetEngine.read_metadata( - *args, **kwargs + @classmethod + def _create_dd_meta(cls, dataset_info, **kwargs): + # Start with pandas-version of meta + meta_pd = super()._create_dd_meta(dataset_info, **kwargs) + + # Convert to cudf + meta_cudf = cudf.from_pandas(meta_pd) + + # Re-set "object" dtypes to align with pa schema + kwargs = dataset_info.get("kwargs", {}) + set_object_dtypes_from_pa_schema( + meta_cudf, + kwargs.get("schema", None), ) - new_meta = cudf.from_pandas(meta) - if parts: - # Re-set "object" dtypes align with pa schema - set_object_dtypes_from_pa_schema( - new_meta, - parts[0].get("common_kwargs", {}).get("schema", None), - ) # If `strings_to_categorical==True`, convert objects to int32 strings_to_cats = kwargs.get("strings_to_categorical", False) - for col in new_meta._data.names: + for col in meta_cudf._data.names: if ( - isinstance(new_meta._data[col], cudf.core.column.StringColumn) + isinstance(meta_cudf._data[col], cudf.core.column.StringColumn) and strings_to_cats ): - new_meta._data[col] = new_meta._data[col].astype("int32") - return (new_meta, stats, parts, index) + meta_cudf._data[col] = meta_cudf._data[col].astype("int32") + + return meta_cudf @classmethod def multi_support(cls): From 2825d5abc87d57d565aee5178353dd0a2809ee45 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Mon, 15 May 2023 07:56:33 -0400 Subject: [PATCH 14/16] Performance improvement in cudf::strings::all_characters_of_type (#13259) Improves performance for `cudf::strings::all_characters_of_type()` API which covers many cudf `is_X` functions. The solution improves performance for all string lengths as measured by the new benchmark included in this PR. Additionally, the code was cleaned up to help with maintenance and clarity. Reference #13048 Authors: - David Wendt (https://github.com/davidwendt) - Karthikeyan (https://github.com/karthikeyann) Approvers: - Nghia Truong (https://github.com/ttnghia) - Karthikeyan (https://github.com/karthikeyann) URL: https://github.com/rapidsai/cudf/pull/13259 --- cpp/benchmarks/CMakeLists.txt | 3 +- cpp/benchmarks/string/char_types.cpp | 66 +++++++++++++++++ cpp/src/strings/char_types/char_types.cu | 93 +++++++++++++++--------- 3 files changed, 127 insertions(+), 35 deletions(-) create mode 100644 cpp/benchmarks/string/char_types.cpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index dcc70a4b6d9..c3db3370c62 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -300,7 +300,8 @@ ConfigureBench( ) ConfigureNVBench( - STRINGS_NVBENCH string/like.cpp string/reverse.cpp string/lengths.cpp string/case.cpp + STRINGS_NVBENCH string/case.cpp string/char_types.cpp string/lengths.cpp string/like.cpp + string/reverse.cpp ) # ################################################################################################## diff --git a/cpp/benchmarks/string/char_types.cpp b/cpp/benchmarks/string/char_types.cpp new file mode 100644 index 00000000000..8e9e595fcef --- /dev/null +++ b/cpp/benchmarks/string/char_types.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +#include + +static void bench_char_types(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + auto const api_type = state.get_string("api"); + + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + data_profile const table_profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + auto const table = + create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile); + cudf::strings_column_view input(table->view().column(0)); + auto input_types = cudf::strings::string_character_types::SPACE; + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + // gather some throughput statistics as well + auto chars_size = input.chars_size(); + state.add_global_memory_reads(chars_size); // all bytes are read; + if (api_type == "all") { + state.add_global_memory_writes(num_rows); // output is a bool8 per row + } else { + state.add_global_memory_writes(chars_size); + } + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + if (api_type == "all") { + auto result = cudf::strings::all_characters_of_type(input, input_types); + } else { + auto result = cudf::strings::filter_characters_of_type(input, input_types); + } + }); +} + +NVBENCH_BENCH(bench_char_types) + .set_name("char_types") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048, 4096}) + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}) + .add_string_axis("api", {"all", "filter"}); diff --git a/cpp/src/strings/char_types/char_types.cu b/cpp/src/strings/char_types/char_types.cu index a403061ba0e..b87fb80fcc2 100644 --- a/cpp/src/strings/char_types/char_types.cu +++ b/cpp/src/strings/char_types/char_types.cu @@ -31,59 +31,84 @@ #include #include -#include #include namespace cudf { namespace strings { namespace detail { -// -std::unique_ptr all_characters_of_type(strings_column_view const& strings, +namespace { + +/** + * @brief Returns true for each string where all characters match the given types. + * + * Only the characters that match to `verify_types` are checked. + * Returns false if no characters are checked or one character does not match `types`. + * Returns true if at least one character is checked and all checked characters match `types`. + */ +struct char_types_fn { + column_device_view const d_column; + character_flags_table_type const* d_flags; + string_character_types const types; + string_character_types const verify_types; + + __device__ bool operator()(size_type idx) const + { + if (d_column.is_null(idx)) { return false; } + auto const d_str = d_column.element(idx); + auto const end = d_str.data() + d_str.size_bytes(); + + bool type_matched = !d_str.empty(); // require at least one character; + size_type check_count = 0; // count checked characters + for (auto itr = d_str.data(); type_matched && (itr < end); ++itr) { + uint8_t const chr = static_cast(*itr); + if (is_utf8_continuation_char(chr)) { continue; } + auto u8 = static_cast(chr); // holds UTF8 value + // using max(int8) here since max(char)=255 on ARM systems + if (u8 > std::numeric_limits::max()) { to_char_utf8(itr, u8); } + + // lookup flags in table by codepoint + auto const code_point = utf8_to_codepoint(u8); + auto const flag = code_point <= 0x00'FFFF ? d_flags[code_point] : 0; + + if ((verify_types & flag) || // should flag be verified; + (flag == 0 && verify_types == ALL_TYPES)) // special edge case + { + type_matched = (types & flag) > 0; + ++check_count; + } + } + + return type_matched && (check_count > 0); + } +}; +} // namespace + +std::unique_ptr all_characters_of_type(strings_column_view const& input, string_character_types types, string_character_types verify_types, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto strings_count = strings.size(); - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_column = *strings_column; + auto d_strings = column_device_view::create(input.parent(), stream); // create output column - auto results = make_numeric_column(data_type{type_id::BOOL8}, - strings_count, - cudf::detail::copy_bitmask(strings.parent(), stream, mr), - strings.null_count(), + auto results = make_numeric_column(data_type{type_id::BOOL8}, + input.size(), + cudf::detail::copy_bitmask(input.parent(), stream, mr), + input.null_count(), stream, mr); - auto results_view = results->mutable_view(); - auto d_results = results_view.data(); // get the static character types table auto d_flags = detail::get_character_flags_table(); + // set the output values by checking the character types for each string thrust::transform(rmm::exec_policy(stream), thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings_count), - d_results, - [d_column, d_flags, types, verify_types, d_results] __device__(size_type idx) { - if (d_column.is_null(idx)) return false; - auto d_str = d_column.element(idx); - bool check = !d_str.empty(); // require at least one character - size_type check_count = 0; - for (auto itr = d_str.begin(); check && (itr != d_str.end()); ++itr) { - auto code_point = detail::utf8_to_codepoint(*itr); - // lookup flags in table by code-point - auto flag = code_point <= 0x00'FFFF ? d_flags[code_point] : 0; - if ((verify_types & flag) || // should flag be verified - (flag == 0 && verify_types == ALL_TYPES)) // special edge case - { - check = (types & flag) > 0; - ++check_count; - } - } - return check && (check_count > 0); - }); - // - results->set_null_count(strings.null_count()); + thrust::make_counting_iterator(input.size()), + results->mutable_view().data(), + char_types_fn{*d_strings, d_flags, types, verify_types}); + + results->set_null_count(input.null_count()); return results; } From b16f9c42044047fb402f57c29180cea72bec4701 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Mon, 15 May 2023 07:58:29 -0400 Subject: [PATCH 15/16] Throw error if UNINITIALIZED is passed to cudf::state_null_count (#13292) Changes `cudf::state_null_count` to throw a `std::invalid_argument` error which `mask_state::UNINITIALIZED` is passed. This change is part of removing `UNKNOWN_NULL_COUNT` from cudf/libcudf. Reference: https://github.com/rapidsai/cudf/issues/11968 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Nghia Truong (https://github.com/ttnghia) - Yunsong Wang (https://github.com/PointKernel) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/13292 --- cpp/include/cudf/copying.hpp | 6 ++++ cpp/include/cudf/null_mask.hpp | 2 ++ cpp/src/bitmask/null_mask.cu | 5 ++- cpp/src/column/column_factories.cpp | 54 ++++++++++++++++------------- cpp/src/copying/copy.cpp | 2 +- cpp/tests/bitmask/bitmask_tests.cpp | 1 + 6 files changed, 41 insertions(+), 29 deletions(-) diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index 921ef5f65f1..cdb8a1c8a09 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -228,6 +228,9 @@ std::unique_ptr empty_like(scalar const& input); * * Supports only fixed-width types. * + * If the `mask_alloc` allocates a validity mask that mask is also uninitialized + * and the validity bits and the null count should be set by the caller. + * * @param[in] input Immutable view of input column to emulate * @param[in] mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN * @param[in] mr Device memory resource used to allocate the returned column's device memory @@ -244,6 +247,9 @@ std::unique_ptr allocate_like( * * Supports only fixed-width types. * + * If the `mask_alloc` allocates a validity mask that mask is also uninitialized + * and the validity bits and the null count should be set by the caller. + * * @param[in] input Immutable view of input column to emulate * @param[in] size The desired number of elements that the new column should have capacity for * @param[in] mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN diff --git a/cpp/include/cudf/null_mask.hpp b/cpp/include/cudf/null_mask.hpp index 360006c1eea..e8bc97e95b3 100644 --- a/cpp/include/cudf/null_mask.hpp +++ b/cpp/include/cudf/null_mask.hpp @@ -36,6 +36,8 @@ namespace cudf { * @brief Returns the null count for a null mask of the specified `state` * representing `size` elements. * + * @throw std::invalid_argument if state is UNINITIALIZED + * * @param state The state of the null mask * @param size The number of elements represented by the mask * @return The count of null elements diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index b98a2196748..4c22988900b 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -50,10 +50,9 @@ size_type state_null_count(mask_state state, size_type size) { switch (state) { case mask_state::UNALLOCATED: return 0; - case mask_state::UNINITIALIZED: return UNKNOWN_NULL_COUNT; case mask_state::ALL_NULL: return size; case mask_state::ALL_VALID: return 0; - default: CUDF_FAIL("Invalid null mask state."); + default: CUDF_FAIL("Invalid null mask state.", std::invalid_argument); } } diff --git a/cpp/src/column/column_factories.cpp b/cpp/src/column/column_factories.cpp index 5f455e26e52..e147b12ad99 100644 --- a/cpp/src/column/column_factories.cpp +++ b/cpp/src/column/column_factories.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -81,12 +81,13 @@ std::unique_ptr make_numeric_column(data_type type, CUDF_EXPECTS(is_numeric(type), "Invalid, non-numeric type."); CUDF_EXPECTS(size >= 0, "Column size cannot be negative."); - return std::make_unique(type, - size, - rmm::device_buffer{size * cudf::size_of(type), stream, mr}, - detail::create_null_mask(size, state, stream, mr), - state_null_count(state, size), - std::vector>{}); + return std::make_unique( + type, + size, + rmm::device_buffer{size * cudf::size_of(type), stream, mr}, + detail::create_null_mask(size, state, stream, mr), + state == mask_state::UNINITIALIZED ? 0 : state_null_count(state, size), + std::vector>{}); } // Allocate storage for a specified number of numeric elements @@ -100,12 +101,13 @@ std::unique_ptr make_fixed_point_column(data_type type, CUDF_EXPECTS(is_fixed_point(type), "Invalid, non-fixed_point type."); CUDF_EXPECTS(size >= 0, "Column size cannot be negative."); - return std::make_unique(type, - size, - rmm::device_buffer{size * cudf::size_of(type), stream, mr}, - detail::create_null_mask(size, state, stream, mr), - state_null_count(state, size), - std::vector>{}); + return std::make_unique( + type, + size, + rmm::device_buffer{size * cudf::size_of(type), stream, mr}, + detail::create_null_mask(size, state, stream, mr), + state == mask_state::UNINITIALIZED ? 0 : state_null_count(state, size), + std::vector>{}); } // Allocate storage for a specified number of timestamp elements @@ -119,12 +121,13 @@ std::unique_ptr make_timestamp_column(data_type type, CUDF_EXPECTS(is_timestamp(type), "Invalid, non-timestamp type."); CUDF_EXPECTS(size >= 0, "Column size cannot be negative."); - return std::make_unique(type, - size, - rmm::device_buffer{size * cudf::size_of(type), stream, mr}, - detail::create_null_mask(size, state, stream, mr), - state_null_count(state, size), - std::vector>{}); + return std::make_unique( + type, + size, + rmm::device_buffer{size * cudf::size_of(type), stream, mr}, + detail::create_null_mask(size, state, stream, mr), + state == mask_state::UNINITIALIZED ? 0 : state_null_count(state, size), + std::vector>{}); } // Allocate storage for a specified number of duration elements @@ -138,12 +141,13 @@ std::unique_ptr make_duration_column(data_type type, CUDF_EXPECTS(is_duration(type), "Invalid, non-duration type."); CUDF_EXPECTS(size >= 0, "Column size cannot be negative."); - return std::make_unique(type, - size, - rmm::device_buffer{size * cudf::size_of(type), stream, mr}, - detail::create_null_mask(size, state, stream, mr), - state_null_count(state, size), - std::vector>{}); + return std::make_unique( + type, + size, + rmm::device_buffer{size * cudf::size_of(type), stream, mr}, + detail::create_null_mask(size, state, stream, mr), + state == mask_state::UNINITIALIZED ? 0 : state_null_count(state, size), + std::vector>{}); } // Allocate storage for a specified number of fixed width elements diff --git a/cpp/src/copying/copy.cpp b/cpp/src/copying/copy.cpp index 2bcfeae20c4..9d4b02ffb4f 100644 --- a/cpp/src/copying/copy.cpp +++ b/cpp/src/copying/copy.cpp @@ -129,7 +129,7 @@ std::unique_ptr allocate_like(column_view const& input, size, rmm::device_buffer(size * size_of(input.type()), stream, mr), detail::create_null_mask(size, allocate_mask, stream, mr), - state_null_count(allocate_mask, input.size())); + 0); } } // namespace detail diff --git a/cpp/tests/bitmask/bitmask_tests.cpp b/cpp/tests/bitmask/bitmask_tests.cpp index a4bbb213930..4693fc8e342 100644 --- a/cpp/tests/bitmask/bitmask_tests.cpp +++ b/cpp/tests/bitmask/bitmask_tests.cpp @@ -37,6 +37,7 @@ TEST_F(BitmaskUtilitiesTest, StateNullCount) EXPECT_EQ(0, cudf::state_null_count(cudf::mask_state::UNALLOCATED, 42)); EXPECT_EQ(42, cudf::state_null_count(cudf::mask_state::ALL_NULL, 42)); EXPECT_EQ(0, cudf::state_null_count(cudf::mask_state::ALL_VALID, 42)); + EXPECT_THROW(cudf::state_null_count(cudf::mask_state::UNINITIALIZED, 42), std::invalid_argument); } TEST_F(BitmaskUtilitiesTest, BitmaskAllocationSize) From 4fe3e38861a334e72ceb3e97e065fbee24f693f0 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Mon, 15 May 2023 08:01:44 -0400 Subject: [PATCH 16/16] Fix some libcudf functions to set the null count on returning columns (#13331) Fixes a few more places where the null count was not being set in a libcudf function before returning a column. A couple of places were setting the null count value into the mutable-view object. So the value was calculated correctly but not set into the actual column object. Reference: https://github.com/rapidsai/cudf/issues/11968 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/13331 --- cpp/src/binaryop/binaryop.cpp | 3 +++ cpp/src/copying/copy_range.cu | 3 ++- cpp/src/filling/fill.cu | 1 + cpp/tests/copying/purge_nonempty_nulls_tests.cpp | 6 ++++++ 4 files changed, 12 insertions(+), 1 deletion(-) diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index daec9b5b199..8482769b569 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -217,6 +217,8 @@ std::unique_ptr binary_operation(LhsType const& lhs, auto out_view = out->mutable_view(); cudf::binops::compiled::binary_operation(out_view, lhs, rhs, op, stream); + // TODO: consider having the binary_operation count nulls instead + out->set_null_count(cudf::detail::null_count(out_view.null_mask(), 0, out->size(), stream)); return out; } } // namespace compiled @@ -373,6 +375,7 @@ std::unique_ptr binary_operation(column_view const& lhs, auto out_view = out->mutable_view(); binops::jit::binary_operation(out_view, lhs, rhs, ptx, stream); + out->set_null_count(cudf::detail::null_count(out_view.null_mask(), 0, out->size(), stream)); return out; } } // namespace detail diff --git a/cpp/src/copying/copy_range.cu b/cpp/src/copying/copy_range.cu index 9a506c866bc..64599716765 100644 --- a/cpp/src/copying/copy_range.cu +++ b/cpp/src/copying/copy_range.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -108,6 +108,7 @@ struct out_of_place_copy_range_dispatch { if (source_end != source_begin) { // otherwise no-op auto ret_view = p_ret->mutable_view(); in_place_copy_range(source, ret_view, source_begin, source_end, target_begin, stream); + p_ret->set_null_count(ret_view.null_count()); } return p_ret; diff --git a/cpp/src/filling/fill.cu b/cpp/src/filling/fill.cu index a747cc195ae..342392c773e 100644 --- a/cpp/src/filling/fill.cu +++ b/cpp/src/filling/fill.cu @@ -122,6 +122,7 @@ struct out_of_place_fill_range_dispatch { auto ret_view = p_ret->mutable_view(); using DeviceType = cudf::device_storage_type_t; in_place_fill(ret_view, begin, end, value, stream); + p_ret->set_null_count(ret_view.null_count()); } return p_ret; diff --git a/cpp/tests/copying/purge_nonempty_nulls_tests.cpp b/cpp/tests/copying/purge_nonempty_nulls_tests.cpp index ea23a1c1069..a196f2ec980 100644 --- a/cpp/tests/copying/purge_nonempty_nulls_tests.cpp +++ b/cpp/tests/copying/purge_nonempty_nulls_tests.cpp @@ -75,6 +75,7 @@ TEST_F(PurgeNonEmptyNullsTest, SingleLevelList) // Set nullmask, post construction. cudf::detail::set_null_mask( input->mutable_view().null_mask(), 2, 3, false, cudf::get_default_stream()); + input->set_null_count(1); EXPECT_TRUE(cudf::may_have_nonempty_nulls(*input)); EXPECT_TRUE(cudf::has_nonempty_nulls(*input)); @@ -158,6 +159,7 @@ TEST_F(PurgeNonEmptyNullsTest, TwoLevelList) // Set nullmask, post construction. cudf::detail::set_null_mask( input->mutable_view().null_mask(), 3, 4, false, cudf::get_default_stream()); + input->set_null_count(1); EXPECT_TRUE(cudf::may_have_nonempty_nulls(*input)); EXPECT_TRUE(cudf::has_nonempty_nulls(*input)); @@ -213,6 +215,7 @@ TEST_F(PurgeNonEmptyNullsTest, ThreeLevelList) // Set nullmask, post construction. cudf::detail::set_null_mask( input->mutable_view().null_mask(), 3, 4, false, cudf::get_default_stream()); + input->set_null_count(1); EXPECT_TRUE(cudf::may_have_nonempty_nulls(*input)); EXPECT_TRUE(cudf::has_nonempty_nulls(*input)); @@ -267,6 +270,7 @@ TEST_F(PurgeNonEmptyNullsTest, ListOfStrings) // Set nullmask, post construction. cudf::detail::set_null_mask( input->mutable_view().null_mask(), 2, 3, false, cudf::get_default_stream()); + input->set_null_count(1); EXPECT_TRUE(cudf::may_have_nonempty_nulls(*input)); EXPECT_TRUE(cudf::has_nonempty_nulls(*input)); @@ -332,6 +336,7 @@ TEST_F(PurgeNonEmptyNullsTest, UnsanitizedListOfUnsanitizedStrings) // Set strings nullmask, post construction. cudf::set_null_mask(strings->mutable_view().null_mask(), 7, 8, false); + strings->set_null_count(1); EXPECT_TRUE(cudf::may_have_nonempty_nulls(*strings)); EXPECT_TRUE(cudf::has_nonempty_nulls(*strings)); @@ -358,6 +363,7 @@ TEST_F(PurgeNonEmptyNullsTest, UnsanitizedListOfUnsanitizedStrings) // Set lists nullmask, post construction. cudf::detail::set_null_mask( lists->mutable_view().null_mask(), 2, 3, false, cudf::get_default_stream()); + lists->set_null_count(1); EXPECT_TRUE(cudf::may_have_nonempty_nulls(*lists)); EXPECT_TRUE(cudf::has_nonempty_nulls(*lists));