From 8f5e64ddcba788ddcc715fda7f2bf852166b7ee6 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 17 Jan 2024 16:35:47 +1100 Subject: [PATCH 1/6] Provide explicit pool size and avoid RMM detail APIs (#14741) This PR fixes up cuDF to avoid usage that will soon be deprecated in RMM. Depends on https://github.com/rapidsai/rmm/pull/1417 Fixes #14658 Authors: - Mark Harris (https://github.com/harrism) - Yunsong Wang (https://github.com/PointKernel) - Nghia Truong (https://github.com/ttnghia) Approvers: - Nghia Truong (https://github.com/ttnghia) - Yunsong Wang (https://github.com/PointKernel) URL: https://github.com/rapidsai/cudf/pull/14741 --- cpp/benchmarks/fixture/benchmark_fixture.hpp | 6 ++++-- cpp/benchmarks/fixture/nvbench_fixture.hpp | 9 ++++++--- cpp/examples/basic/src/process_csv.cpp | 5 +++-- cpp/examples/nested_types/deduplication.cpp | 8 ++++++-- cpp/examples/strings/common.hpp | 6 ++++-- cpp/include/cudf_test/testing_main.hpp | 7 ++++--- 6 files changed, 27 insertions(+), 14 deletions(-) diff --git a/cpp/benchmarks/fixture/benchmark_fixture.hpp b/cpp/benchmarks/fixture/benchmark_fixture.hpp index bc6c2e52da8..36370560727 100644 --- a/cpp/benchmarks/fixture/benchmark_fixture.hpp +++ b/cpp/benchmarks/fixture/benchmark_fixture.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include #include @@ -33,7 +34,8 @@ inline auto make_pool_instance() { static rmm::mr::cuda_memory_resource cuda_mr; static auto pool_mr = - std::make_shared>(&cuda_mr); + std::make_shared>( + &cuda_mr, rmm::percent_of_free_device_memory(50)); return pool_mr; } } // namespace diff --git a/cpp/benchmarks/fixture/nvbench_fixture.hpp b/cpp/benchmarks/fixture/nvbench_fixture.hpp index e08f9101522..701ed67e666 100644 --- a/cpp/benchmarks/fixture/nvbench_fixture.hpp +++ b/cpp/benchmarks/fixture/nvbench_fixture.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include +#include #include #include #include @@ -42,7 +43,8 @@ struct nvbench_base_fixture { inline auto make_pool() { - return rmm::mr::make_owning_wrapper(make_cuda()); + return rmm::mr::make_owning_wrapper( + make_cuda(), rmm::percent_of_free_device_memory(50)); } inline auto make_async() { return std::make_shared(); } @@ -56,7 +58,8 @@ struct nvbench_base_fixture { inline auto make_managed_pool() { - return rmm::mr::make_owning_wrapper(make_managed()); + return rmm::mr::make_owning_wrapper( + make_managed(), rmm::percent_of_free_device_memory(50)); } inline std::shared_ptr create_memory_resource( diff --git a/cpp/examples/basic/src/process_csv.cpp b/cpp/examples/basic/src/process_csv.cpp index edd14d9ee5f..0d2b6b099ac 100644 --- a/cpp/examples/basic/src/process_csv.cpp +++ b/cpp/examples/basic/src/process_csv.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -82,7 +83,7 @@ int main(int argc, char** argv) // Construct a memory pool using the CUDA memory resource // Using a memory pool for device memory allocations is important for good performance in libcudf. // The pool defaults to allocating half of the available GPU memory. - rmm::mr::pool_memory_resource mr{&cuda_mr}; + rmm::mr::pool_memory_resource mr{&cuda_mr, rmm::percent_of_free_device_memory(50)}; // Set the pool resource to be used by default for all device memory allocations // Note: It is the user's responsibility to ensure the `mr` object stays alive for the duration of diff --git a/cpp/examples/nested_types/deduplication.cpp b/cpp/examples/nested_types/deduplication.cpp index 5969985cc72..c7c54592b70 100644 --- a/cpp/examples/nested_types/deduplication.cpp +++ b/cpp/examples/nested_types/deduplication.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -57,7 +58,10 @@ std::shared_ptr create_memory_resource(bool pool) { auto cuda_mr = std::make_shared(); - if (pool) { return rmm::mr::make_owning_wrapper(cuda_mr); } + if (pool) { + return rmm::mr::make_owning_wrapper( + cuda_mr, rmm::percent_of_free_device_memory(50)); + } return cuda_mr; } diff --git a/cpp/examples/strings/common.hpp b/cpp/examples/strings/common.hpp index 2fd9daf9339..0dbe6fe2b7b 100644 --- a/cpp/examples/strings/common.hpp +++ b/cpp/examples/strings/common.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -60,7 +61,8 @@ auto make_cuda_mr() { return std::make_shared(); */ auto make_pool_mr() { - return rmm::mr::make_owning_wrapper(make_cuda_mr()); + return rmm::mr::make_owning_wrapper( + make_cuda_mr(), rmm::percent_of_free_device_memory(50)); } /** diff --git a/cpp/include/cudf_test/testing_main.hpp b/cpp/include/cudf_test/testing_main.hpp index 12dbb4c7851..88e3088d794 100644 --- a/cpp/include/cudf_test/testing_main.hpp +++ b/cpp/include/cudf_test/testing_main.hpp @@ -21,6 +21,7 @@ #include +#include #include #include #include @@ -43,9 +44,9 @@ inline auto make_managed() { return std::make_shared(make_cuda(), min_alloc); } From c7acdaa231fb0ffe7751611590f9b85ba7508d4d Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Wed, 17 Jan 2024 17:08:51 +0530 Subject: [PATCH 2/6] Move chars column to parent data buffer in strings column (#14202) Eliminates chars column and moves chars data to parent string column's _data buffer. Summary of changes - chars child column is removed, chars buffer is added to parent column - Adds stream to `chars_size()`, `chars_end()` in `strings_column_view` and their invocations - Remove `chars_column_index`, and deprecate `chars()` from `strings_column_view` - Replace `chars_col.begin()` with `static_cast(parent.head())` - Adds string column factory which accepts `rmm::device_buffer` instead of chars column - Deprecate string column factory which accepts chars column - IO changes - contiguous split (From @nvdbaranec ), to_arrow, parquet writer. - Fix binary ops, column_view, interleave columns, byte cast, strings APIs, text APIs - Fix tests, benchmarks (mostly adding `stream` parameter to chars_size) - Java fixes (From @andygrove) - Python changes - .data special case for string column - get size from offsets column for rmm.DeviceBuffer in column - special condition for string slice - Pickle file update for string column - a few unit tests updates Preparing for https://github.com/rapidsai/cudf/issues/13733 Authors: - Karthikeyan (https://github.com/karthikeyann) Approvers: - Jason Lowe (https://github.com/jlowe) - David Wendt (https://github.com/davidwendt) - Nghia Truong (https://github.com/ttnghia) - Lawrence Mitchell (https://github.com/wence-) - Matthew Roeschke (https://github.com/mroeschke) - Ashwin Srinath (https://github.com/shwina) URL: https://github.com/rapidsai/cudf/pull/14202 --- cpp/benchmarks/hashing/hash.cpp | 4 +- cpp/benchmarks/json/json.cu | 4 +- cpp/benchmarks/string/case.cpp | 18 ++--- cpp/benchmarks/string/char_types.cpp | 4 +- cpp/benchmarks/string/combine.cpp | 5 +- cpp/benchmarks/string/contains.cpp | 4 +- cpp/benchmarks/string/convert_datetime.cpp | 5 +- cpp/benchmarks/string/convert_fixed_point.cpp | 10 ++- cpp/benchmarks/string/convert_numerics.cpp | 10 ++- cpp/benchmarks/string/copy.cu | 7 +- cpp/benchmarks/string/count.cpp | 4 +- cpp/benchmarks/string/extract.cpp | 4 +- cpp/benchmarks/string/factory.cu | 4 +- cpp/benchmarks/string/filter.cpp | 4 +- cpp/benchmarks/string/find.cpp | 4 +- cpp/benchmarks/string/gather.cpp | 5 +- cpp/benchmarks/string/join_strings.cpp | 4 +- cpp/benchmarks/string/lengths.cpp | 4 +- cpp/benchmarks/string/like.cpp | 4 +- cpp/benchmarks/string/repeat_strings.cpp | 8 +- cpp/benchmarks/string/replace.cpp | 4 +- cpp/benchmarks/string/replace_re.cpp | 4 +- cpp/benchmarks/string/reverse.cpp | 4 +- cpp/benchmarks/string/slice.cpp | 4 +- cpp/benchmarks/string/split.cpp | 4 +- cpp/benchmarks/string/split_re.cpp | 4 +- cpp/benchmarks/string/translate.cpp | 4 +- cpp/benchmarks/string/url_decode.cu | 6 +- cpp/benchmarks/text/edit_distance.cpp | 5 +- cpp/benchmarks/text/hash_ngrams.cpp | 4 +- cpp/benchmarks/text/jaccard.cpp | 8 +- cpp/benchmarks/text/minhash.cpp | 4 +- cpp/benchmarks/text/ngrams.cpp | 4 +- cpp/benchmarks/text/normalize.cpp | 4 +- cpp/benchmarks/text/replace.cpp | 4 +- cpp/benchmarks/text/tokenize.cpp | 4 +- cpp/benchmarks/text/vocab.cpp | 13 +-- .../developer_guide/DEVELOPER_GUIDE.md | 10 +-- cpp/doxygen/developer_guide/strings.png | Bin 41562 -> 50931 bytes cpp/examples/strings/custom_prealloc.cu | 4 +- .../cudf/column/column_device_view.cuh | 4 +- cpp/include/cudf/column/column_factories.hpp | 26 +++++- cpp/include/cudf/io/types.hpp | 7 +- .../cudf/strings/strings_column_view.hpp | 20 +++-- cpp/include/cudf_test/column_utilities.hpp | 8 +- cpp/src/binaryop/compiled/binary_ops.cu | 6 +- cpp/src/column/column_view.cpp | 6 +- cpp/src/copying/contiguous_split.cu | 76 ++++++++++++------ cpp/src/interop/to_arrow.cu | 33 +++++--- cpp/src/io/csv/writer_impl.cu | 6 +- cpp/src/io/json/json_column.cu | 4 +- cpp/src/io/json/write_json.cu | 16 ++-- cpp/src/io/parquet/writer_impl.cu | 13 ++- cpp/src/io/utilities/column_buffer.cpp | 10 +-- cpp/src/lists/interleave_columns.cu | 5 +- cpp/src/reshape/byte_cast.cu | 7 +- cpp/src/strings/attributes.cu | 5 +- cpp/src/strings/case.cu | 21 ++--- cpp/src/strings/combine/join.cu | 12 ++- cpp/src/strings/copying/concatenate.cu | 6 +- cpp/src/strings/copying/copying.cu | 15 ++-- cpp/src/strings/copying/shift.cu | 12 ++- cpp/src/strings/replace/multi.cu | 17 ++-- cpp/src/strings/replace/replace.cu | 8 +- cpp/src/strings/reverse.cu | 4 +- cpp/src/strings/search/find.cu | 4 +- cpp/src/strings/split/split.cuh | 7 +- cpp/src/strings/strings_column_factories.cu | 40 ++++++--- cpp/src/strings/strings_column_view.cpp | 23 +++--- cpp/src/strings/wrap.cu | 12 ++- cpp/src/text/bpe/byte_pair_encoding.cu | 10 +-- cpp/src/text/generate_ngrams.cu | 6 +- cpp/src/text/normalize.cu | 4 +- cpp/src/text/subword/subword_tokenize.cu | 4 +- cpp/src/text/tokenize.cu | 5 +- cpp/src/text/vocabulary_tokenize.cu | 17 ++-- cpp/src/transform/row_conversion.cu | 8 +- cpp/tests/io/json_type_cast_test.cu | 8 +- cpp/tests/strings/array_tests.cpp | 13 ++- cpp/tests/strings/factories_test.cu | 18 +++-- .../utilities_tests/column_wrapper_tests.cpp | 2 +- .../ai/rapids/cudf/JCudfSerialization.java | 2 +- java/src/main/native/src/ColumnViewJni.cpp | 20 ++--- java/src/main/native/src/TableJni.cpp | 8 +- python/cudf/cudf/_lib/column.pyx | 36 ++++++++- python/cudf/cudf/core/column/column.py | 11 +-- python/cudf/cudf/core/column/string.py | 55 +++++++------ python/cudf/cudf/core/df_protocol.py | 6 +- .../stringColumnWithRangeIndex_cudf_0.16.pkl | Bin 1709 -> 0 bytes .../stringColumnWithRangeIndex_cudf_23.12.pkl | Bin 0 -> 1394 bytes python/cudf/cudf/tests/test_df_protocol.py | 5 +- python/cudf/cudf/tests/test_serialize.py | 6 +- python/cudf/cudf/tests/test_testing.py | 6 +- 93 files changed, 519 insertions(+), 378 deletions(-) delete mode 100644 python/cudf/cudf/tests/data/pkl/stringColumnWithRangeIndex_cudf_0.16.pkl create mode 100644 python/cudf/cudf/tests/data/pkl/stringColumnWithRangeIndex_cudf_23.12.pkl diff --git a/cpp/benchmarks/hashing/hash.cpp b/cpp/benchmarks/hashing/hash.cpp index e679b4b62d2..4930fc59ac3 100644 --- a/cpp/benchmarks/hashing/hash.cpp +++ b/cpp/benchmarks/hashing/hash.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,7 +43,7 @@ static void bench_hash(nvbench::state& state) // collect statistics cudf::strings_column_view input(data->get_column(1).view()); - auto const chars_size = input.chars_size(); + auto const chars_size = input.chars_size(stream); // add memory read from string column state.add_global_memory_reads(chars_size); // add memory read from int64_t column diff --git a/cpp/benchmarks/json/json.cu b/cpp/benchmarks/json/json.cu index 5dc30aebe38..c74701445f8 100644 --- a/cpp/benchmarks/json/json.cu +++ b/cpp/benchmarks/json/json.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -190,7 +190,7 @@ void BM_case(benchmark::State& state, std::string query_arg) int desired_bytes = state.range(1); auto input = build_json_string_column(desired_bytes, num_rows); cudf::strings_column_view scv(input->view()); - size_t num_chars = scv.chars().size(); + size_t num_chars = scv.chars_size(cudf::get_default_stream()); std::string json_path(query_arg); diff --git a/cpp/benchmarks/string/case.cpp b/cpp/benchmarks/string/case.cpp index 385bb7630f8..639a3dc1181 100644 --- a/cpp/benchmarks/string/case.cpp +++ b/cpp/benchmarks/string/case.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,18 +43,18 @@ void bench_case(nvbench::state& state) if (encoding == "ascii") { data_profile ascii_profile = data_profile_builder().no_validity().distribution( cudf::type_id::INT8, distribution_id::UNIFORM, 32, 126); // nice ASCII range - auto input = cudf::strings_column_view(col_view); - auto ascii_column = - create_random_column(cudf::type_id::INT8, row_count{input.chars_size()}, ascii_profile); + auto input = cudf::strings_column_view(col_view); + auto ascii_column = create_random_column( + cudf::type_id::INT8, row_count{input.chars_size(cudf::get_default_stream())}, ascii_profile); auto ascii_data = ascii_column->view(); col_view = cudf::column_view(col_view.type(), col_view.size(), - nullptr, + ascii_data.data(), col_view.null_mask(), col_view.null_count(), 0, - {input.offsets(), ascii_data}); + {input.offsets()}); ascii_contents = ascii_column->release(); } @@ -62,9 +62,9 @@ void bench_case(nvbench::state& state) state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); - state.add_element_count(input.chars_size(), "chars_size"); - state.add_global_memory_reads(input.chars_size()); - state.add_global_memory_writes(input.chars_size()); + state.add_element_count(input.chars_size(cudf::get_default_stream()), "chars_size"); + state.add_global_memory_reads(input.chars_size(cudf::get_default_stream())); + state.add_global_memory_writes(input.chars_size(cudf::get_default_stream())); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { auto result = cudf::strings::to_lower(input); }); diff --git a/cpp/benchmarks/string/char_types.cpp b/cpp/benchmarks/string/char_types.cpp index 59e6245fd41..eec9a5f54d7 100644 --- a/cpp/benchmarks/string/char_types.cpp +++ b/cpp/benchmarks/string/char_types.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,7 +42,7 @@ static void bench_char_types(nvbench::state& state) state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); // gather some throughput statistics as well - auto chars_size = input.chars_size(); + auto chars_size = input.chars_size(cudf::get_default_stream()); state.add_global_memory_reads(chars_size); // all bytes are read; if (api_type == "all") { state.add_global_memory_writes(num_rows); // output is a bool8 per row diff --git a/cpp/benchmarks/string/combine.cpp b/cpp/benchmarks/string/combine.cpp index 4ed54a38a48..7acfb1ffb0d 100644 --- a/cpp/benchmarks/string/combine.cpp +++ b/cpp/benchmarks/string/combine.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -44,7 +44,8 @@ static void BM_combine(benchmark::State& state) cudf::strings::concatenate(table->view(), separator); } - state.SetBytesProcessed(state.iterations() * (input1.chars_size() + input2.chars_size())); + state.SetBytesProcessed(state.iterations() * (input1.chars_size(cudf::get_default_stream()) + + input2.chars_size(cudf::get_default_stream()))); } static void generate_bench_args(benchmark::internal::Benchmark* b) diff --git a/cpp/benchmarks/string/contains.cpp b/cpp/benchmarks/string/contains.cpp index af45d5d8fee..6d839c1de64 100644 --- a/cpp/benchmarks/string/contains.cpp +++ b/cpp/benchmarks/string/contains.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -100,7 +100,7 @@ static void bench_contains(nvbench::state& state) auto pattern = patterns[pattern_index]; auto program = cudf::strings::regex_program::create(pattern); - auto chars_size = input.chars_size(); + auto chars_size = input.chars_size(cudf::get_default_stream()); state.add_element_count(chars_size, "chars_size"); state.add_global_memory_reads(chars_size); state.add_global_memory_writes(input.size()); diff --git a/cpp/benchmarks/string/convert_datetime.cpp b/cpp/benchmarks/string/convert_datetime.cpp index 5f332a3e1a0..5deca3664b7 100644 --- a/cpp/benchmarks/string/convert_datetime.cpp +++ b/cpp/benchmarks/string/convert_datetime.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -48,7 +48,8 @@ void BM_convert_datetime(benchmark::State& state, direction dir) cudf::strings::from_timestamps(input, "%Y-%m-%d %H:%M:%S"); } - auto const bytes = dir == direction::to ? source_string.chars_size() : n_rows * sizeof(TypeParam); + auto const bytes = dir == direction::to ? source_string.chars_size(cudf::get_default_stream()) + : n_rows * sizeof(TypeParam); state.SetBytesProcessed(state.iterations() * bytes); } diff --git a/cpp/benchmarks/string/convert_fixed_point.cpp b/cpp/benchmarks/string/convert_fixed_point.cpp index 0cc98ee146c..e5bd794e405 100644 --- a/cpp/benchmarks/string/convert_fixed_point.cpp +++ b/cpp/benchmarks/string/convert_fixed_point.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,8 +49,9 @@ void convert_to_fixed_point(benchmark::State& state) } // bytes_processed = bytes_input + bytes_output - state.SetBytesProcessed(state.iterations() * - (strings_view.chars_size() + rows * cudf::size_of(dtype))); + state.SetBytesProcessed( + state.iterations() * + (strings_view.chars_size(cudf::get_default_stream()) + rows * cudf::size_of(dtype))); } class StringsFromFixedPoint : public cudf::benchmark {}; @@ -74,7 +75,8 @@ void convert_from_fixed_point(benchmark::State& state) // bytes_processed = bytes_input + bytes_output state.SetBytesProcessed( state.iterations() * - (cudf::strings_column_view(results->view()).chars_size() + rows * cudf::size_of(dtype))); + (cudf::strings_column_view(results->view()).chars_size(cudf::get_default_stream()) + + rows * cudf::size_of(dtype))); } #define CONVERT_TO_FIXED_POINT_BMD(name, fixed_point_type) \ diff --git a/cpp/benchmarks/string/convert_numerics.cpp b/cpp/benchmarks/string/convert_numerics.cpp index cce5d0f6a4d..8f875c5c80f 100644 --- a/cpp/benchmarks/string/convert_numerics.cpp +++ b/cpp/benchmarks/string/convert_numerics.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -63,8 +63,9 @@ void convert_to_number(benchmark::State& state) } // bytes_processed = bytes_input + bytes_output - state.SetBytesProcessed(state.iterations() * - (strings_view.chars_size() + rows * sizeof(NumericType))); + state.SetBytesProcessed( + state.iterations() * + (strings_view.chars_size(cudf::get_default_stream()) + rows * sizeof(NumericType))); } class StringsFromNumeric : public cudf::benchmark {}; @@ -90,7 +91,8 @@ void convert_from_number(benchmark::State& state) // bytes_processed = bytes_input + bytes_output state.SetBytesProcessed( state.iterations() * - (cudf::strings_column_view(results->view()).chars_size() + rows * sizeof(NumericType))); + (cudf::strings_column_view(results->view()).chars_size(cudf::get_default_stream()) + + rows * sizeof(NumericType))); } #define CONVERT_TO_NUMERICS_BD(name, type) \ diff --git a/cpp/benchmarks/string/copy.cu b/cpp/benchmarks/string/copy.cu index 27438f80f92..6b2f6c3a0a7 100644 --- a/cpp/benchmarks/string/copy.cu +++ b/cpp/benchmarks/string/copy.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -64,8 +64,9 @@ static void BM_copy(benchmark::State& state, copy_type ct) } } - state.SetBytesProcessed(state.iterations() * - cudf::strings_column_view(source->view().column(0)).chars_size()); + state.SetBytesProcessed( + state.iterations() * + cudf::strings_column_view(source->view().column(0)).chars_size(cudf::get_default_stream())); } static void generate_bench_args(benchmark::internal::Benchmark* b) diff --git a/cpp/benchmarks/string/count.cpp b/cpp/benchmarks/string/count.cpp index 08406462632..a656010dca5 100644 --- a/cpp/benchmarks/string/count.cpp +++ b/cpp/benchmarks/string/count.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,7 +47,7 @@ static void bench_count(nvbench::state& state) state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); // gather some throughput statistics as well - auto chars_size = input.chars_size(); + auto chars_size = input.chars_size(cudf::get_default_stream()); state.add_element_count(chars_size, "chars_size"); state.add_global_memory_reads(chars_size); state.add_global_memory_writes(input.size()); diff --git a/cpp/benchmarks/string/extract.cpp b/cpp/benchmarks/string/extract.cpp index 135dadabbe4..af4fedb5799 100644 --- a/cpp/benchmarks/string/extract.cpp +++ b/cpp/benchmarks/string/extract.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -67,7 +67,7 @@ static void bench_extract(nvbench::state& state) state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); // gather some throughput statistics as well - auto chars_size = strings_view.chars_size(); + auto chars_size = strings_view.chars_size(cudf::get_default_stream()); state.add_element_count(chars_size, "chars_size"); // number of bytes; state.add_global_memory_reads(chars_size); // all bytes are read; state.add_global_memory_writes(chars_size); // all bytes are written diff --git a/cpp/benchmarks/string/factory.cu b/cpp/benchmarks/string/factory.cu index c73bcb0b0ad..c4e74c4d97e 100644 --- a/cpp/benchmarks/string/factory.cu +++ b/cpp/benchmarks/string/factory.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -67,7 +67,7 @@ static void BM_factory(benchmark::State& state) } cudf::strings_column_view input(column->view()); - state.SetBytesProcessed(state.iterations() * input.chars_size()); + state.SetBytesProcessed(state.iterations() * input.chars_size(cudf::get_default_stream())); } static void generate_bench_args(benchmark::internal::Benchmark* b) diff --git a/cpp/benchmarks/string/filter.cpp b/cpp/benchmarks/string/filter.cpp index b935fc4a11f..613834b1f3e 100644 --- a/cpp/benchmarks/string/filter.cpp +++ b/cpp/benchmarks/string/filter.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -57,7 +57,7 @@ static void BM_filter_chars(benchmark::State& state, FilterAPI api) } } - state.SetBytesProcessed(state.iterations() * input.chars_size()); + state.SetBytesProcessed(state.iterations() * input.chars_size(cudf::get_default_stream())); } static void generate_bench_args(benchmark::internal::Benchmark* b) diff --git a/cpp/benchmarks/string/find.cpp b/cpp/benchmarks/string/find.cpp index 5f2e6946b8b..e866092f3a3 100644 --- a/cpp/benchmarks/string/find.cpp +++ b/cpp/benchmarks/string/find.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,7 +56,7 @@ static void BM_find_scalar(benchmark::State& state, FindAPI find_api) } } - state.SetBytesProcessed(state.iterations() * input.chars_size()); + state.SetBytesProcessed(state.iterations() * input.chars_size(cudf::get_default_stream())); } static void generate_bench_args(benchmark::internal::Benchmark* b) diff --git a/cpp/benchmarks/string/gather.cpp b/cpp/benchmarks/string/gather.cpp index 530b09b7d6a..5b1c679be7d 100644 --- a/cpp/benchmarks/string/gather.cpp +++ b/cpp/benchmarks/string/gather.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,7 +43,8 @@ static void bench_gather(nvbench::state& state) create_random_table({cudf::type_id::INT32}, row_count{num_rows}, map_profile); state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); - auto chars_size = cudf::strings_column_view(input_table->view().column(0)).chars_size(); + auto chars_size = + cudf::strings_column_view(input_table->view().column(0)).chars_size(cudf::get_default_stream()); state.add_global_memory_reads(chars_size); // all bytes are read; state.add_global_memory_writes(chars_size); diff --git a/cpp/benchmarks/string/join_strings.cpp b/cpp/benchmarks/string/join_strings.cpp index a122c0022a9..6dcf731ad3c 100644 --- a/cpp/benchmarks/string/join_strings.cpp +++ b/cpp/benchmarks/string/join_strings.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,7 +40,7 @@ static void bench_join(nvbench::state& state) state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); // gather some throughput statistics as well - auto const chars_size = input.chars_size(); + auto const chars_size = input.chars_size(cudf::get_default_stream()); state.add_element_count(chars_size, "chars_size"); // number of bytes; state.add_global_memory_reads(chars_size); // all bytes are read; state.add_global_memory_writes(chars_size); // all bytes are written diff --git a/cpp/benchmarks/string/lengths.cpp b/cpp/benchmarks/string/lengths.cpp index 36c4bf64a00..a19060ead3b 100644 --- a/cpp/benchmarks/string/lengths.cpp +++ b/cpp/benchmarks/string/lengths.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,7 +40,7 @@ static void bench_lengths(nvbench::state& state) state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); // gather some throughput statistics as well - auto chars_size = input.chars_size(); + auto chars_size = input.chars_size(cudf::get_default_stream()); state.add_global_memory_reads(chars_size); // all bytes are read; state.add_global_memory_writes(num_rows); // output is an integer per row diff --git a/cpp/benchmarks/string/like.cpp b/cpp/benchmarks/string/like.cpp index 6ac832471a5..99cef640dc3 100644 --- a/cpp/benchmarks/string/like.cpp +++ b/cpp/benchmarks/string/like.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -99,7 +99,7 @@ static void bench_like(nvbench::state& state) state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); // gather some throughput statistics as well - auto chars_size = input.chars_size(); + auto chars_size = input.chars_size(cudf::get_default_stream()); state.add_element_count(chars_size, "chars_size"); // number of bytes; state.add_global_memory_reads(chars_size); // all bytes are read; state.add_global_memory_writes(n_rows); // writes are BOOL8 diff --git a/cpp/benchmarks/string/repeat_strings.cpp b/cpp/benchmarks/string/repeat_strings.cpp index 92645524efb..f1d1516f248 100644 --- a/cpp/benchmarks/string/repeat_strings.cpp +++ b/cpp/benchmarks/string/repeat_strings.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -59,7 +59,7 @@ static void BM_repeat_strings_scalar_times(benchmark::State& state) cudf::strings::repeat_strings(strings_col, default_repeat_times); } - state.SetBytesProcessed(state.iterations() * strings_col.chars_size()); + state.SetBytesProcessed(state.iterations() * strings_col.chars_size(cudf::get_default_stream())); } static void BM_repeat_strings_column_times(benchmark::State& state) @@ -75,8 +75,8 @@ static void BM_repeat_strings_column_times(benchmark::State& state) cudf::strings::repeat_strings(strings_col, repeat_times_col); } - state.SetBytesProcessed(state.iterations() * - (strings_col.chars_size() + repeat_times_col.size() * sizeof(int32_t))); + state.SetBytesProcessed(state.iterations() * (strings_col.chars_size(cudf::get_default_stream()) + + repeat_times_col.size() * sizeof(int32_t))); } static void generate_bench_args(benchmark::internal::Benchmark* b) diff --git a/cpp/benchmarks/string/replace.cpp b/cpp/benchmarks/string/replace.cpp index 5ddf09f5cec..c8f26142193 100644 --- a/cpp/benchmarks/string/replace.cpp +++ b/cpp/benchmarks/string/replace.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -58,7 +58,7 @@ static void BM_replace(benchmark::State& state, replace_type rt) } } - state.SetBytesProcessed(state.iterations() * input.chars_size()); + state.SetBytesProcessed(state.iterations() * input.chars_size(cudf::get_default_stream())); } static void generate_bench_args(benchmark::internal::Benchmark* b) diff --git a/cpp/benchmarks/string/replace_re.cpp b/cpp/benchmarks/string/replace_re.cpp index b8efd76ab41..4dcf1314f83 100644 --- a/cpp/benchmarks/string/replace_re.cpp +++ b/cpp/benchmarks/string/replace_re.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,7 +42,7 @@ static void bench_replace(nvbench::state& state) auto program = cudf::strings::regex_program::create("(\\d+)"); - auto chars_size = input.chars_size(); + auto chars_size = input.chars_size(cudf::get_default_stream()); state.add_element_count(chars_size, "chars_size"); state.add_global_memory_reads(chars_size); state.add_global_memory_writes(chars_size); diff --git a/cpp/benchmarks/string/reverse.cpp b/cpp/benchmarks/string/reverse.cpp index 31cd4639115..a2676609a40 100644 --- a/cpp/benchmarks/string/reverse.cpp +++ b/cpp/benchmarks/string/reverse.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,7 +40,7 @@ static void bench_reverse(nvbench::state& state) state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); // gather some throughput statistics as well - auto chars_size = input.chars_size(); + auto chars_size = input.chars_size(cudf::get_default_stream()); state.add_element_count(chars_size, "chars_size"); // number of bytes; state.add_global_memory_reads(chars_size); // all bytes are read; state.add_global_memory_writes(chars_size); // all bytes are written diff --git a/cpp/benchmarks/string/slice.cpp b/cpp/benchmarks/string/slice.cpp index 6c1d7d98d3a..0f973a7c8b5 100644 --- a/cpp/benchmarks/string/slice.cpp +++ b/cpp/benchmarks/string/slice.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -58,7 +58,7 @@ static void BM_slice(benchmark::State& state, slice_type rt) } } - state.SetBytesProcessed(state.iterations() * input.chars_size()); + state.SetBytesProcessed(state.iterations() * input.chars_size(cudf::get_default_stream())); } static void generate_bench_args(benchmark::internal::Benchmark* b) diff --git a/cpp/benchmarks/string/split.cpp b/cpp/benchmarks/string/split.cpp index eb724fabfd1..9ef58daf0fc 100644 --- a/cpp/benchmarks/string/split.cpp +++ b/cpp/benchmarks/string/split.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -44,7 +44,7 @@ static void bench_split(nvbench::state& state) state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); // gather some throughput statistics as well - auto chars_size = input.chars_size(); + auto chars_size = input.chars_size(cudf::get_default_stream()); state.add_element_count(chars_size, "chars_size"); // number of bytes; state.add_global_memory_reads(chars_size); // all bytes are read; state.add_global_memory_writes(chars_size); // all bytes are written diff --git a/cpp/benchmarks/string/split_re.cpp b/cpp/benchmarks/string/split_re.cpp index 67aa6f0e008..1fdb6e67109 100644 --- a/cpp/benchmarks/string/split_re.cpp +++ b/cpp/benchmarks/string/split_re.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -44,7 +44,7 @@ static void bench_split(nvbench::state& state) state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); // gather some throughput statistics as well - auto chars_size = input.chars_size(); + auto chars_size = input.chars_size(cudf::get_default_stream()); state.add_element_count(chars_size, "chars_size"); // number of bytes; state.add_global_memory_reads(chars_size); // all bytes are read; state.add_global_memory_writes(chars_size); // all bytes are written diff --git a/cpp/benchmarks/string/translate.cpp b/cpp/benchmarks/string/translate.cpp index 00ca7459964..dc3c8c71488 100644 --- a/cpp/benchmarks/string/translate.cpp +++ b/cpp/benchmarks/string/translate.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,7 +56,7 @@ static void BM_translate(benchmark::State& state, int entry_count) cudf::strings::translate(input, entries); } - state.SetBytesProcessed(state.iterations() * input.chars_size()); + state.SetBytesProcessed(state.iterations() * input.chars_size(cudf::get_default_stream())); } static void generate_bench_args(benchmark::internal::Benchmark* b) diff --git a/cpp/benchmarks/string/url_decode.cu b/cpp/benchmarks/string/url_decode.cu index 9ede89bee43..b3aeb69e5ea 100644 --- a/cpp/benchmarks/string/url_decode.cu +++ b/cpp/benchmarks/string/url_decode.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -67,7 +67,7 @@ auto generate_column(cudf::size_type num_rows, cudf::size_type chars_per_row, do auto col_1a = cudf::test::strings_column_wrapper(strings.begin(), strings.end()); auto table_a = cudf::repeat(cudf::table_view{{col_1a}}, num_rows); auto result_col = std::move(table_a->release()[0]); // string column with num_rows aaa... - auto chars_col = result_col->child(cudf::strings_column_view::chars_column_index).mutable_view(); + auto chars_data = static_cast(result_col->mutable_view().head()); auto offset_col = result_col->child(cudf::strings_column_view::offsets_column_index).view(); auto engine = thrust::default_random_engine{}; @@ -75,7 +75,7 @@ auto generate_column(cudf::size_type num_rows, cudf::size_type chars_per_row, do thrust::make_zip_iterator(offset_col.begin(), offset_col.begin() + 1), num_rows, - url_string_generator{chars_col.begin(), esc_seq_chance, engine}); + url_string_generator{chars_data, esc_seq_chance, engine}); return result_col; } diff --git a/cpp/benchmarks/text/edit_distance.cpp b/cpp/benchmarks/text/edit_distance.cpp index 8a8bd9ae586..0a1ea52c415 100644 --- a/cpp/benchmarks/text/edit_distance.cpp +++ b/cpp/benchmarks/text/edit_distance.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,7 +43,8 @@ static void bench_edit_distance(nvbench::state& state) state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); - auto chars_size = input1.chars_size() + input2.chars_size(); + auto chars_size = + input1.chars_size(cudf::get_default_stream()) + input2.chars_size(cudf::get_default_stream()); state.add_global_memory_reads(chars_size); // output are integers (one per row) state.add_global_memory_writes(num_rows); diff --git a/cpp/benchmarks/text/hash_ngrams.cpp b/cpp/benchmarks/text/hash_ngrams.cpp index 5bbd2fc6819..3df0c61fc31 100644 --- a/cpp/benchmarks/text/hash_ngrams.cpp +++ b/cpp/benchmarks/text/hash_ngrams.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,7 +43,7 @@ static void bench_hash_ngrams(nvbench::state& state) state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); - auto chars_size = input.chars_size(); + auto chars_size = input.chars_size(cudf::get_default_stream()); state.add_global_memory_reads(chars_size); // output are hashes: approximate total number of hashes state.add_global_memory_writes(num_rows * ngrams); diff --git a/cpp/benchmarks/text/jaccard.cpp b/cpp/benchmarks/text/jaccard.cpp index 70470b829bd..60251c96096 100644 --- a/cpp/benchmarks/text/jaccard.cpp +++ b/cpp/benchmarks/text/jaccard.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include #include +#include #include @@ -44,9 +45,10 @@ static void bench_jaccard(nvbench::state& state) cudf::strings_column_view input1(input_table->view().column(0)); cudf::strings_column_view input2(input_table->view().column(1)); - state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); - auto chars_size = input1.chars_size() + input2.chars_size(); + auto chars_size = input1.chars_size(stream) + input2.chars_size(stream); state.add_global_memory_reads(chars_size); state.add_global_memory_writes(num_rows); diff --git a/cpp/benchmarks/text/minhash.cpp b/cpp/benchmarks/text/minhash.cpp index 1b60caa24de..d10d0d307d7 100644 --- a/cpp/benchmarks/text/minhash.cpp +++ b/cpp/benchmarks/text/minhash.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -52,7 +52,7 @@ static void bench_minhash(nvbench::state& state) state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); - auto chars_size = input.chars_size(); + auto chars_size = input.chars_size(cudf::get_default_stream()); state.add_global_memory_reads(chars_size); state.add_global_memory_writes(num_rows); // output are hashes diff --git a/cpp/benchmarks/text/ngrams.cpp b/cpp/benchmarks/text/ngrams.cpp index f3fd5cc5729..8e48f8e9a05 100644 --- a/cpp/benchmarks/text/ngrams.cpp +++ b/cpp/benchmarks/text/ngrams.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -46,7 +46,7 @@ static void BM_ngrams(benchmark::State& state, ngrams_type nt) } } - state.SetBytesProcessed(state.iterations() * input.chars_size()); + state.SetBytesProcessed(state.iterations() * input.chars_size(cudf::get_default_stream())); } static void generate_bench_args(benchmark::internal::Benchmark* b) diff --git a/cpp/benchmarks/text/normalize.cpp b/cpp/benchmarks/text/normalize.cpp index 6878fa4f8b6..71bccd80d39 100644 --- a/cpp/benchmarks/text/normalize.cpp +++ b/cpp/benchmarks/text/normalize.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,7 +43,7 @@ static void bench_normalize(nvbench::state& state) state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); - auto chars_size = input.chars_size(); + auto chars_size = input.chars_size(cudf::get_default_stream()); state.add_global_memory_reads(chars_size); state.add_global_memory_writes(chars_size); diff --git a/cpp/benchmarks/text/replace.cpp b/cpp/benchmarks/text/replace.cpp index 257f62aa728..767ebab3eee 100644 --- a/cpp/benchmarks/text/replace.cpp +++ b/cpp/benchmarks/text/replace.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -59,7 +59,7 @@ static void bench_replace(nvbench::state& state) state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); - auto chars_size = view.chars_size(); + auto chars_size = view.chars_size(cudf::get_default_stream()); state.add_global_memory_reads(chars_size); state.add_global_memory_writes(chars_size); diff --git a/cpp/benchmarks/text/tokenize.cpp b/cpp/benchmarks/text/tokenize.cpp index b556a84c541..2151b28d637 100644 --- a/cpp/benchmarks/text/tokenize.cpp +++ b/cpp/benchmarks/text/tokenize.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -46,7 +46,7 @@ static void bench_tokenize(nvbench::state& state) state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); - auto chars_size = input.chars_size(); + auto chars_size = input.chars_size(cudf::get_default_stream()); state.add_global_memory_reads(chars_size); state.add_global_memory_writes(chars_size); diff --git a/cpp/benchmarks/text/vocab.cpp b/cpp/benchmarks/text/vocab.cpp index 80942e2697d..770519294ad 100644 --- a/cpp/benchmarks/text/vocab.cpp +++ b/cpp/benchmarks/text/vocab.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,6 +31,7 @@ static void bench_vocab_tokenize(nvbench::state& state) { + auto const stream = cudf::get_default_stream(); auto const num_rows = static_cast(state.get_int64("num_rows")); auto const row_width = static_cast(state.get_int64("row_width")); @@ -63,16 +64,16 @@ static void bench_vocab_tokenize(nvbench::state& state) }(); auto const vocab = nvtext::load_vocabulary(cudf::strings_column_view(vocab_col->view())); - auto token_count = [input] { + auto token_count = [input, stream] { auto const counts = nvtext::count_tokens(input); auto const agg = cudf::make_sum_aggregation(); auto const count = cudf::reduce(counts->view(), *agg, counts->type()); - return static_cast*>(count.get()) - ->value(cudf::get_default_stream()); + return static_cast*>(count.get())->value(stream); }(); - state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); - auto chars_size = input.chars_size() + cudf::strings_column_view(vocab_col->view()).chars_size(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + auto chars_size = + input.chars_size(stream) + cudf::strings_column_view(vocab_col->view()).chars_size(stream); state.add_global_memory_reads(chars_size); state.add_global_memory_writes(token_count); diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md index fc2f72de33c..c38151d7518 100644 --- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md +++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md @@ -1197,17 +1197,15 @@ This is related to [Arrow's "Variable-Size List" memory layout](https://arrow.ap ## Strings columns -Strings are represented in much the same way as lists, except that the data child column is always -a non-nullable column of `INT8` data. The parent column's type is `STRING` and contains no data, +Strings are represented as a column with a data device buffer and a child offsets column. +The parent column's type is `STRING` and its data holds all the characters across all the strings packed together but its size represents the number of strings in the column, and its null mask represents the validity of each string. To summarize, the strings column children are: 1. A non-nullable column of [`size_type`](#cudfsize_type) elements that indicates the offset to the beginning of each - string in a dense column of all characters. -2. A non-nullable column of `INT8` elements of all the characters across all the strings packed - together. + string in a dense data buffer of all characters. -With this representation, `characters[offsets[i]]` is the first character of string `i`, and the +With this representation, `data[offsets[i]]` is the first character of string `i`, and the size of string `i` is given by `offsets[i+1] - offsets[i]`. The following image shows an example of this compound column representation of strings. diff --git a/cpp/doxygen/developer_guide/strings.png b/cpp/doxygen/developer_guide/strings.png index 85ffef283b6609e3244e0cf1ef89121b418c75eb..1d18ea8a407e31c41d02083ed0d955959ff082bc 100644 GIT binary patch literal 50931 zcmeFZWl)uE8#cNK0R;gGkya_`25EzkmhMnWx;qt7>F!dRMYl92($XDDw{*ik*ZY3o zp8aG0*?Z=j*)#ido_SDM-0Qxt>x|<#&f{9a&y{3u-k`XFKp<|)K9f{MATR?E2=os) z=Ff4s7siG+L^uSQ-9~*yd<{fkp}un53qXS}0rEJ;sDI88q2ED${rvWS{}`6x ze=p1bN5Wy}?{@1|hM4!^#nG%$jn%Nv$*v9!gIwI_HDr-)jn%3_tsFMn(aijBUTbgiB8qew9rI}t8?eY&UkanJ2sQG>`&j} z{S-+PaWB;&Km@#cgun>Uc01e{7uov!?HT9mcs3nXL$lsrFIER~k7oV-GekX81Z=I8 za08VkE{1$U6ApH}I8zT0h!0qpT!;^5bkbp5Rq}GD`>Q6sKju-tv6O7l8ONM$-9X&- z5CaSGfdTb8o%TrD(RABaj4NGsC24S`SzK|Pr@T(@`XrRTut#&}haSB4$dtpC*oYw72#mp?+5 zs_9qnw9(t$B~&yO#V0F_ zRsMkJ_)rLeKrM7tY3rpI9gkwFKu<9VAG&skW{Iu_ywaf2EtwdBc=iNVD{1)SV+=Z; zo)rGB;{~C~j{p6cZgk&YbOjW|}99Zy-KM{bwoQ3Hk4l0($E@Ks1u_k8i);Po^nk{a7d0ku4Z zURem_Ve#{SPff(dWrf$K>YYc5b$WC>ew}Z(oNcjaAXj3}Pxkyz788Fe9B$8Y-s6>t zpo&OjRLhnQSIL%kd{2q^E&%!Zy;$7$^p4_r7&&imro?P9+4G0!V|#rP_9ViJ>B0y4 zDHz(Y=80@Nf2XhK2zq|&9W9)fqjryr`gN(pZ!v-vbf{9FXB-#w4F!YG4ynvKW*mpG~eNodmU|EwA1=0asFdi zKw~TKh-E4w%M%O8?eyMmw&;G&rd$2ML4UtcE}rFQxWmEPFfZ$gO0hOKjUyJKA{>Ic z!Ul5BoKiv-vcRG@Z8hB#4?^l_k0w(5w16AimHX3+nI_NPL~I;mohtLq{_OYknb&Y{ zhf(k&Arc^~QPJb|=Sv{As}b3Fr5Uf``b8}UX1TEI+OKno%VQHA3eKi~Up2H0%trqC z+&0M|dYkjRfN}f)_9j-a$$GSOxJXOlVt=sSX_04b;a9ZD_Dr*v_)N1*h=A=xu~F+K zl#|x;-L5>vRQS(ry`-+Q`|NOY-2)d9a7PmI;m*-7y2zv+|My6ZBCPRSLGDpFd6ZuM zXi&#qQVQDfvCuwrXnUZ-QNKVmzxWudywa@8Z|4V@w4!Ip(X`{&6@mHeNO2yPXOB|2 zP2N@Y3iT}p2{|wES>aRgJ=2Q%VXeTfmKWC7*n1lyd{9{;ITsNyhRVY`OrF{PSC>RI zVRz=#Tm3HLS+!Ei6gb|Zv00B4uaA`Q7M;2~qA57_mbG45sjKy7i0L)D*=Aq1y~ap) z_1a%~p}sO!kzMshz-8rmb{?CumuTzZWn2T|?Y2E* zJJcP|nwZgGGtOgTwKY}mxkiTg&W;L7-0NCUIv;44e~iXXqjybv==jwqR|W?|+luxL zRRp5e@4_qla#+h~v-o+9l}7g0rJmGuQa(@K8DH0ecG=L@%hRq#!E@DIxm+Zt9O6;; zT?kAcA;0rucsvZzF{I=CPjQjIxEhGASf)sKpBrpcXi#{cN?%wFBZXK!+zmck4YOEW z+l-XBpLfLiDH@%3YOt&kcu>TL?mLR6&fyrx6`%_ICFdeAm1`AisqBgsiPI zp4CEAPWB;slDidBw+31DRqSZWMBU?HSSy{h+P>xFX92VQn9iU5)xjAuJ}arv+rC@% z%L|l_Etm+xBGgq|TTa)Cmz(`$+Rox zgX_YtTPB(lkcEpu*KUw-86`+At?to?A|Bbm#vppdcD@r!<3aBZQDMTzNE+JN3Zavl z{~6A^G>_T=)Y8evvzWDv&BtpQavMR%B88i1tIw5Bq#*wYi6l+Ejc}W^ts{4Bl5#~5 zLj|?`{qeO5ZvcRcl&XK*5aV9ZG@2UAQMuZKL@ApQ zpTI~uh7Qx2$Z4;Bn82=Adv$r#x)d77<+cCH*)XBT|H`k5262~>unlqdrz{V5Xj6AG zZ#xdcb$x_?ZDXRQs{m>~d3HqID*|H)DEd&g!c(wx@S$9-%rtxVkYO*Q_C3-}ui2{x z5C$PJYQwvfGDMRB(xlqA=)8F=#NI*cf3A|RL`{d-UmNZ!a63PC%#IkVwM|v&Asj}t zIog^IxpT~om|EBMtuK%B?>iQW; zTQ<30H{-PqH7lLTDuP<%uvcXJ**}Ns^FF;Q^KcV^Wl|3Pu>!`uIQseGy=nJZS)TrE z73#JyY@d#H3Gc(=syA#39mYg?OWowl^xvKmh^a$u>=QrhPIK6rIz3nqkJvEV z|M%ycxftS+0xIv=_Q*?l%)7N~ZFIKPv!|g+FNKNbOck%KmSzKTX!btZf=XkG$4t&| z73U|bj$IA_h1^xQ*5>4Vug@K76+pS4u7TrK7OjvNi&fqd-%fu;(?JU~J%vOqWTI#Ncio_q=dUdIHKUwEnD>PHClSTyMJIQ}qcUj9RsvntS%tqpo^Grbt zV^T`vT8^3O&z4d1;n0LDL6QTmoi4bYLuvQzT35AabW7rHD_VY9Scvyej_BWuW z5q2q%4yTYVKHVQw4!5%C%a{iI4$Jb9Y{DYbx7K=;Bbx$o_uVakuQ-xVMgfGIBzPr5 z>AKYhu;qm&I^w$+tR3Ptq(IS&Z|}~Iwgs8!j}~H;DbT`t@z4-6uoQ@6e%lGY_`Dk= z54dc`xaJE7J68wu+rudn`K?Dxce*Ok_nDw%=|QzX_~=2WfICf3W&tF@u3PPKd9n3e+J*%sgE`%-DHaS+0(RT#kEu@C{u*!WZrbSiU_i{8XSxRJ7ICOKpNj~~S{ zDfh-P$C8)4xw~o&0Dox%f%u0iXLFM&{MMz-hq}DR*IX9kwfq3vErtCT9wY8XF~}!W zTMjbn)!LYk$!)C_m1Xdlg|uP^p&>p(RY5!(hvcs?{_+N@rTf}Yp(D|Cb4Za4K!3gI zB0XdX#2l*m!XJGIPt?~$)UP2BZ73!Ie;ls=|K-PTSp(8xS?p0y{Xqpbm0RsO;aAeO z%j5Y-$^U*cUpZ3%zKIGI1R~1nf9VQ2Qn%Ftl6m#}5ap!Y1V8g4bQVtwWdh=9aIHc zAt%YmPF<$;3=^-MD8amh1-QIMN1wzMTQ&;8?uOj@-qRz=nQ1#&TS!#nBl(|iEC!Mk z)|f#~Td7}uJ0)&|B!UJb>iG9Nw0z%=7zQ-_ol03GLt(_K_u+<0b}zNPr0ndq(C5+8 zA9M$EbYfOcXogo_V_-L;zZBLObBUo(}vbk-z;geaG&mh3Z4e$b7nv@D1L4Bhz(-Xc~jj06QE^*@)}uvw{3Se@)G zXY!cI?dsm6!`7hARcOnPsWL@cX3xzM3wae!kw_A#&frO@T8i`wWF_y(= zKyA%&dyK^_SR^fBNmXzlbBKkMyv&v}Ro!j4_2T?^CsE(21Gw2x0oCpT7kyBNb_si&)?FO(i!q$ACQr6XQu^ws^{Ps~rNN0nG1A<@5l0nIxYvc#5$BUvkz$A1 zpY<&JN!x(OLQypL^zIYg3k$o1I=?G!roLOE!!Mb|8sK|0?XFX48ePZKSS;Dc{06XS z$16ZCJ;+rVCc?mo2Nm=U=wrZ{DlPgrwoBZ%XLuT*Daab}tUVD(U>CNO;yMx8kXAv6 zKro!_E-p6nLxt~mHf#nI&ZCvw&l$!9h&iWo1XmAAo(#gm&>q;mj^E({3SgO8DCFEqf z^Y@4QjA#DoZRlNQBRGK`c*cMj<$?2?-Wn}bWBM@-oMxTJtdl<8`(Q18#WJ*D&m1<3 zugI%@F+p$0ota&fd~+~Ak|JHqyB1c0#{b+g*&mp2&UJ?DN`|7=JG&;bI2cJyg8W4g z6>|4^nBK>2{2rc-xfZ`tkEakWU^(!;IAzAPPovUQw(FK9i&iOHHgtKMUDG_O>KxxP zapPl6Au|wf)(8x|H*1hNK*o7kZk8>O(mero!q%tEvi3FSY^zaOn!)KP*Mo4$G--ba(OOJ1 z6&0MiKoyAUA2sw=442#BF3O7qGNj(j;4h)LHh&2aXNzrAaTc*G;6j}}%p~HbQR-y* z&(L#`^x*4qrvFUZZ9Yr=X-I~I<$q)?hXJ=E(QLlHy}srhgJY+(m(}YY40J$O+Q@5_ zfuEwg^B%T*mot^>QNSvx^)OPyeE9i8$G?&lFMqJLKRyj%Esg32ZHlREWfJ(Ih8km1 z_3f_+M){}I{JGpLo9fO^&sNI}4YHVL3n}6m^K|%|SN9Co>T*$m)t{f?)mgmuzStG& zj#gR2=h?S!>fAPU58mgT#H{F$4sVmd8}b(A30g)K4jK(8DD0oV%U9Zd%m3DXto$*1 zrrYKOgV*8iTBKZo0P0nIMT17})2>=sOh4sSJRr1vlB&yfxtoupO_I@@;_R zC?s;teF_;9tEJ!%!5b=kFK6N%J+|axl+|8AvI}2v>4(5AJa?1H_@w z=;0Ki?})hX`D0%zTl%J_Jup*^)-Tg8gNi&X-k_F6q6#+<8srNu(_e(nx=E|@vboJK zltTR~12r<0F1YShJPz+g>3%6nE(p+vQa9docLWE6x~ECXaX(<%XRimTBzdh|vfWjc zL;u5_NVH-Q+N@4Wjap~YmZUa!bEL9*;|mQkp0{pZyK9{xeB}U~4ju<98)E!%#+PR$ z0Ho!b>p;mflJ-B}k>U4hr*==$@a<1DY&x3pUgG1thiClc5QJ5{W8e-w*FTMv=m~Z? zJ7_dEahv=sP`)M9D7yLSJ&o$N;^H4btGeN9lTL3cd`_Hn$_Mild70i$fIc`pnJVmx z3@g>HcoKfNI@k{RxWCxW*}NYU2ITg=)qXHj23OI$-u1H9(#w#BaG((yR_f_mkq-v* zm9@4N4X*S0U;6MPIhaDVE2!Imaj_q)4iyFfJxBIKsw8O)rfz&3@>Ux?TEnF}zxY7I z;uN4MVfF0p^_KjsOutjv=4JqUOkRHEDi#KCbu&#x`_ng`!5@M%x^4{U2&a;pdZ-qOdAIO`}@zQi^|~$`Q^>EVSFu$^#luEQ0l@VQ9z28WS1R(LM+yNtw1Hd{j20fv0n#6 zVnOKXkmmsuM?U}$=EM7ke?vKu*LYrh#UxMXvy7n#PZJ^b3E4#C9b$RzE~rSH7mB3w zKt#U2*a-uY<9lm>M$Toy6E0{0q9s`c`byFMaG(EayZwAd!n2S5#we_xHIJaR`0oVORQo2i~-E zWy}_u!o=ZPE#1lSQuXuDc0}G*3*nc2URGPiLLHbz@KY?5?6pTw7im{yE!Mx5k>Yp7 zWLM337QoLFZr+<#D4y8h>i6eskQLuAy=4>efWj`B+~FdQtt&uDLoyEe_N7BSUJm`< zxi~gXvW)6v(MS$%_l>bn9J=cu{p;|fuds@#8!@7YB?UG2%eT;K7FgA~EI>{V&Ir^5 z7FRsCMdzAn7lRBymQCEWex%rV2j6(sXD@uCW+A)Kx(%?VUb*pOa@)Z?*bVewZaFPD zrP+erPzE9xMGTN$?QBdidC@M05 zJj~t5dr}BoY*GoiM2?c)5o9J?m04FF{?VMuWW z3I*AArs*U{Hun5NN($c?v|&n7k8+CmWWHCw@A6P`2a}PuEj+X27^Fv1CXsAT)+tYn zS6akjJOyyOFpDDPx(l!SNV~er8n+r?9Q5>JL0 z72R+uEec@oizRr?yK$xx`%rj&QXLw!`g>Pw0-6oLRB#J)%c{ir)tFqCwGrgJ(h)6{ zOQ117y^O!{!K; z3)m!a#B~?n0GiFd9$=FyCAI|uHV}f2)WqfsQZ0RzkB?ph#8par*v~DI>ci(*($c>V zg2}7V(rL3qg9Ok`pgni7?62l$GWtvL3_PkIe_CQb@G{n*;q6-z&Ol`{%j?E5?3uoY zJU+(`p7jm0 zA_F-L0UarHjfGH>yL6dk6vjJEr8b9)5?fhGOqo8CrQ5`>5p-Bn%kPW&lSTg0{K!&} z;k}TG5=Vgs(}mUnCti!PZIWzRS=eiNB=O-#{?2^6JhjkD0mg5*c{6oQn(>we0+hhS zEU;*Xk*FEFA#6&VV6F$F6jKRCZ(g0HwVNoIvSb|tlZO?Q#(z=I_b-Hm*$H@hr^pE; zU$<8W@EgswbyvNaPv-{P1~R7uie|37hA@uPD%D>tp5qKybzoq>VNpLAE;v)Lwqjwz z5>42qUNsxUQHf*x!gS$v+fJKrgY^c9=h46xp7DU!^Ew?$6xYabxA{#rO8!QFdU5Qd z$Pw=^!#R-rxQ*tFeXg~Or7@@|3Z^%o*!bUpx|7Y(zt32I3R07R(cInsudhsHCMGhZ zowzRh{4cYWOg)PId;<$bqpk6;PPxNE)+V$yX*!fnXOx~kdGcnD zVJ0kFW1~bOrfxV{%pT(Cz*Dew>#?|$^Kvi0Q5yYMf#{Y1ZatX2(!cYZRsKZz zn|b$$HMQlVw7>&P+4W02{_U18`v#OV=yg9_V|@Xz`kwCZ)F+}kXip+TboqkwkJ!%- zJ^mRV_-FQUMY&dL6lr!CxjcqGe8En|u&X6Htv({CRFvUb4G6oy$seuT0{40UmkdOL zZa;OKy4LH;co_LwXk{*C^_8&cPJh@%z;cv>oJFxPDOtM%d?j05Sq?U?ebHVd9;+)kepY;f*(IG zrC8?(Jp>LSBmms(F=^)~FPJzjb^U#}RWDOnP;5`ksBlNnP9Iv$aLtW`e2H%kS09Nx zT27WIn;*a|fT`zWqM?`UNajEWWev?3sNtc9CPNojpE$P#pf!xE2z$<}O6W;n$mn_x{E9k-vAKGrtLmHj zLlI4w0BTIT--B9rHl(K9VRcttR_tTnu1nU+^b^H-@^X*6?8eh!9D21ICXr$SP7BX; znm`6Zb%~rE$YzIauZ1(4Riaw41J5pD^=(PyeM>U=r0G{ioPw% z1;Ca6qb{@&yL-3VBdn3v%sw|F@3Ht9!>?INn3I+r#_yInGR%NNu|E?fKg>UT6i6zG z7?g-^ff-ZpE|A0ZWhq0)AEdpqJM;6A5}NXl@Oh>0M5*|O(TcbgJFA{YE$HORk=78> zt3T`u>b%P3&t~*=-}nDF_gL223duK=mY;pJIhnt`fS;F&=8mon&wG&Svk)z#{;4We z$CWrbzDeT~M$%$E_kbtQGQdWpwPrsIzYS=#7Vdm{@|lWrC@8d77I|dMy80*la%KC$ zv3J|w*M1QoR@;Wf{)72%>BQ{?W9m6BUq_%HOK`(`yz_*d=c(!9&4 zJpu0yi5Q__`piDb=<2dOxn37TPA1C2Jyn=`F!s>%7Xm{3$#4pDJ!ZqE&nXnc)GO-< zMp_3Dao4VhN?n3b%Fq*kG`6-L+-Ryc_2p&FHeQc=x7^Vb>;`)IO{gu;0ai5$6V)dOkXG{xztq0DY@FY8T+;;o+RI43{>Ff-UaiV z01M|^Z9Yh?vJ71|ak>OU0~w&|O7by>2)BOa+xV62r9~Y(z1~-J6>90fqb?%*=j*pZ zp6|vS<+<;#sEj8U+?YXApd}fK)xtX+kvW)ae~^ojBpjWaxs{timA)bYC?yZV)h$6} z)w%rV@Y!R()ft#+Lf6YR=ZE>2v+lDWT8PQ=7l94*C>0lG#QaR{b)mS%mtK#D2*hJ9 z;)HtStuVAfeCx5K*)3)C^4JcVSF^%MvY{3)>x!o?)h$P(c;qNAf%{{bvv}uGrqAI~ zll{f{DcCaz_=lNQPeV9nam&N7!A*{%Yl}yK4F@+919_3 z?Y68?N~u3>!17nVIUw}-fNqT#t(%Fu&F8T09{c?o6N2);x!eIl%1JqzQMQf-n}G!4 zT=||WU25SL;cF$sXNsv?pJUMty<$61kR(t zKMm`pYOeRWP&AVPrcDX?v%77!`FtUvK!&S~TKxv@XlU)5_hmqlUp3GDOg4cyR2jg~ zc(ZCP@(bZM*M>IS>Ip3r`zFQ0Uaif;YvgT2)LJh=8U1pcO!&IPs_?dfk71T4jJjMe zlBgWh15dT$@X;B2@j6VQ_LkfL{^unxLIQIU`%KTVyWzDYpv}h^2Y1N zr(v?E1B~;ReoPm?2&avjU_7d6wQ@zb)#%|NQzd~ilGlU zCIG;H9$yAFBc0M~IL2T|YDuBD-3JaJ$mqW}jHX}|oS$8+8xxa0RB)?pe=(@RU&X$m z3NfE|LDH(|50UY`$#YNLninpnn8F9+hd%gSm;hIO#xXvP{+_TbuHNf=LNbBKBS>B= zjDC%Jaxs6cS+Z2HkWAV&Dhkws`p1@WCV_MJ7*PvEYkKoF+Y2F{gUv}Tx5_`>HU@k# zwa*ec46KFf44vk9=#YX))`E@jRHQyxER$)sbC@oKwqAh1L;I?({3@yCn3#R z?HR$uOvt{OR)2qI-5B4q!$M&Vq&jy#!@YnLSo(XuL<;Ii_0`wdG)XWof_Y~*(l3WQ z;M!o?Kc-)`9wEaG?%P{?Jq7EZfMCsSSJSTyt+iE^C>_EKs?vCUT4p^O3 z05ve%ZaXpXqiHV*hIRn0Go5GvgO}Z-;n7#xhOElZzDpru6`xE^ud0Gf!`9zN4eCuk!#9do`E6EsNu*)U;ns1lr)oC&@@L873fOPQs>|-SMKobh~ zICpr2_91HRriBl70F4Vf>pnUS`3sU)U)s_~`)@ct5`1WEa;sfx7_-bdDZZz#(_mP1 zNhR*{@ZP;I9zJgt>rHhN>RY?SYk(4gHdBW#Ac~H9OB14DPD!Phbx`eg+6Wkfp;WN- zJ9=N~**7zc%U;aFj$W1`sK*RZ!dt|S-lLG&Jc&;?jYZtIhR9Tb*VR7>x^a0pV|j}V zO(A+ZYQt`*zYdho^J@EN!!*3q7)iSzUm-2HDf=+jr|PR(jjH}WN1`WHf*4L_=oC-hW&=>F;kf{xD=By>;ur1~ zupHwW|Kx2++ds&>1%5KN0%xOS9;L zFT@*v;~9gxvKI8(eo(8(D1u6;`+CPUz_Z_WZjiE=D#669-Uj%=S{3$6sqEEnLol&8 z>k^@B|I3*cefb?{y3Q6HPkx{kRjJ0nZhKvSi1AL13$0uB@Ci)tMyBP#|8=j8xQ#9o z-&mR~`gB6xQmW|c4q7p62@+&vETm=$a_!FatNdfzQ(}<*^xmgv%*hz#7))I&2Mks) zJD@76)K1yl?gNTB$TuYi3TT>gy+@N^rWE64x zio5dlVrah{C?Nrzd}pguXJ`C+I|!%hNA8s;BDB;?`!InoG%Grr-eo#QE3({3VTW)@AUI|J7M0G`j{&u7hsPj=oF3vH_+RU$=V7_iylw zL12tT9)dkDWl(f~P?Sj_$yg~AW6i1?N=*{6=t!AS*=?mMta4DgV9U2-D19^gK&VVx zByv)(Z=H7ml}00ogi{+wJYe;`4Fv&014WDSh}!hC-8Z1Q^5%Q;*-ogl)0$j3-Vg8` zj709ayE%fUf_&4*G{g*4H5o}~*^{fn*R=Pqb|GH0d^+;pPauR?3hhRiJ{ba-w zCdBRqF&!LXcuYzc0A?7y__Os1bqV2tv*;(xeyNvlxdP%@G&*^%{*5~a5{mY!`=$VlLiotXz;oE^ceK}NAey)cHGpFXXN zX{tRibnjr)sAxnQ>UALIm|gcGjZdkt5S5qxH5h_ ztWdWBvmE?II_%my&fS)`9%Vk_0C~92h#ki2`7D^9?fgO7MJfTfWY9{Az4#D~}T;SsR3j}VZREisBIZp2ObC=Duz>Q)J z0*~Eoupqc<-5xMMoeLxgOEA(!AA|WO8GU|6-CYrp?5WROzMy|2Ig7!ZLqPTLw`X%K z6}`)|&zrTARGc@+FQS1Pf^LmhNf>smpfMCmWmDCZ$3_PL4cFGSU0c{87iCt*MO7m3XB!I)@Q9!KXlhyS*$X;+Rv7C_y4 ziR__N!z~I?|UOG>YeB1@Ul9#I0G1jGh!LN1HnyJ=-ar! zs`C)sThC+J$`A=FpMO2aPmkeFleqcIymXrb^xwF7se9QHP65_3gGz$&zBKOOq{z?H zHAGWrg4VpIRc|w1iL#?z`$5d4zSCQppZOIN&vR#I^#l`-o@*m*<*R84X;uQS= zkwFr4?K`UiYNoY^Bu5xYC^O<|b_m-8-|(`E&OC;IX~%YIE7g!D{|;c^X`RLi%Nc3F zEd&50nRdjSGE{cqI7m3^O8OyUsS^zGUkgzthVaE5xZ8TRoWhNvxICxrI5|wBW<4~X z!OJGAEFv$(`t*S2X<&0z7JuD&>H&r0+k1lb!IA`~LV;JmA2B_+_qgk9K{{*sJZUR( zZ1?RsOaqZc-$^YyPDJgq8}pT;ZR&qoyUyJjz>2RisM!s&)4J6*MZf9cnxCr*mfB!b z*jSD1ff(=A+{ErtdKtu??4dz#Cg}|_4z_H_UgwOKD8iR5Z1pgPl;O`iP;k)-0ZTf! zwYNU7TQY^bUn`*u>}U!^iGHu*7j*_r?V*fUh-Ru^w2K7D{vAl%(#!Bprq3HGRBI=n zG)-J_o{e_{d5T_b6~CbgBPq)DTm8f{5yecEYEk$0leCh{SIkaZ9>mmWY*Lj@!bU6X zKbDE#%86o?L)I3R-HT})ZUG5oC>7-@6k@l@IVtdua_a=L?wI$m8PFgTi-!bobbYDc zz*_$q&^`o1&cS>1{qOND14MvyZ9f96T>HLA&+PsqgcKZ-rp~HbwUt$e5V;g|&4J_ypa6wUG{VBhG`%1ibe%f-1Qx_boCxlzBIF(@m|s#!b+N!K%hPc(CS zw7o4VzzRmVU3WE|mlNno3+E^VIR3C)^ynCc#1}eZ!EI3c@$18b94iQGe!bBSbu<1Y6IJD=i2v1a&NT^#xwe1Qz5Diy;+(8F@oi`dqGuab(1;*Y z>ix8*|E$t?Y4WtTwqMKjM#R1<(R~awjz2$0R}_nj8lhRLZ>bb|=TsBZ|Kk4JK5)37 z!OY`uZP*%=pWRH*t2}6H!E=|Zw6xYGg@>|^kTz`>9kyWhXCjs>M@kJ?CsUigfzew3 zG94hnQ4iB62HVF6;l~P`(QRi$lKx@xgy}i;5}v!J&BoA(=eAk#Np)z4-W(jg%KLS% zYcn^|a1EFX7`d5yuK8Xx7cW?Sc{l$j^;9}So}>PI+HFr$qn!~wdk}E@_meF$qTg9Y zecHLY+`h_Tf57RpQPG~W$pi4Qjgyb#F<#qj#tTs3I9tyvkLpfNmNPk@wS+$-*6fjM z!8P~{HD6=pykAD(w#G`6^-Hi`Ab%TiMtfacZ>Od{#l|P|`>iN49vn_9VOW9{h)s|n zNwFY=c|q-aaWJ}6UEZ%{ch+?4LDn-$UHrv?mzpAn;0_ZidxrO}_@5Ku2Dle&cq)TX z8Vd-C=DGBhj)i4Uooa-72)5-3FSt1#$CaqYpLuJZy}L=N9)z%$)@+KVlj)R+4dONH zB&lIQDO$d*a72LDC`wXw1k7rOU<>>e6a7#ahD?@K*Jj*v4PfptL`f;;RV~EsVtP}h zt7lf=0c$*xaE?gRvxJRyJi+YGuS#Et?dI(V&I7aNAz?<75IR;P|LFcPeBjZmv7zL9 z1r4zV37=qsQUoyZ@qu20&xYFHx_OMiXZ>g-vL@!cyre1*&daeL3BbN2vS(^%RC?yW{wA7|jhN>`KFwaCsl-=oWm^_vS{$*apm8; zxp1tNC3(GTa8u?p=i~?JN`)a_Sq`vP<4mdD`wE3XVLA}b2UN6aUEZx3D)Ak79(uk` z@FzBh?0{_vJm8dx6sRE&OzofdaAh`^=h0n!{|JNMq`@dGa7hkJV4tl|->nAA41@4i z7s`BoH19hQO~o@Tb^&u#-u!#%0E~&D-_-i%(_&vEPtQp+k%=>csUHoK?Xv8ijK*5B)FCC+qA5^K)4{jKMLU zCt{6wvk&+Wd!-SbOp%9u^?m?ode=T*qr$NP<9GCwjM2WZ&+$Ltsb&fHkULjnBU?C@UPyCCn#r9-Rj7AZo$5f=%Nw7bDM6 z2q3-8uo-j*McJ3&b%)1@(;-EG^ZWerYB!DY>ZWaRLWa~ z?qJZ3RSu%UXqizfkY*@?yMQZuI^gXcO~80ZE$jm48s>QWM4#ee3~1qDm4nQI5;(LB zT!+ET1h#u?C0L>l@=#A#;a9lVf@dSZ7Nt06s(!&)j@Dd-q=(qyTYQf`2 zeL)Dq3s6f9*N_E&KBAC;9Cg42iZBlo0w=P70CnF;m25068aN`Wp9V^&J`iDW6Q{!E z??}2M&Y(?zdyVZXh4_9SXRyS^!-gTD4Uvl8VoY#u(!gyGSDMLJk^JYyjo`IH8Gfqx z?kirDN`19RaK2NwYlS-H_sHv^@2LwUmZ$igxi=5+jX`_>b?8=#h`UK>&zpVM%qhjeN?{Xb_45%+(CE`8%R+b>zJyTw`j0S7DnCcyQ~Eep~`UFTLZMU*m5 zf27SVMh?nEUe@o8Wj6L)Z+R;kaB5z;FJ?c^L7dEdcWE#7y0lw9vp06{Fd5{r?`04l`Y-V z^Ilx_kHt_3i>mXQJNwW|oNHa?$x*e64(><5LEeS(xQIv+h)_$WNW&^O_@9M^mj0jF2#Ts1r!QOOH0jO_UIX z_hH4L$HO5>&MH31S{j|9t13Jqn_8-rGTb^^gWY{HA%>z2(OP{n8i|_i@gFBS4*iGA zu7ApcQ`L`ks(qt4_<;M1H9p)nx}r+#r*2Q^C}`UI=0Bz^73)td%JkU=y^L5oW+MrKf7I5}F?!!sV%7)fUu#yuTP z+-G3$PZLKrd}xspDst#oaNg}G-xM=>8eOs2`fKfwzv|@=D^u^*JXx-Pz!|^e&}Pj z_>t&iKaFMwN#i1GMOhY|1*W$%#58ATE6{#0`qL7uzLxU^D0d&H*2Co!aaGgH$Ans#q3Sl`UUDp1>%D? z*qOUi1OBFc^&`z!k< zp)Yg}h_mDTcCp}sR23yu6Xb}}OsKB;A9vDS`SqZ-@A9mV*JvT;QRP`3>Cc?+tpr;S z8}<{w)4BxHKTz&Wft@;A-Y~)-Tiy{OW`OJYoK=BfAHEspgt|Kind1pvM3&R~jdi6L zOFz9CqPkC8vQfKzk#inM>%A6Z*7#1oV}(?UJn~$(-aU(nBL48`;ZhsgMk59<(FS&? zhCpY&-{>1uuhb_Tsl83O{NJnn`Ms@&#n#D@Y`WT;*qe57mbP3yhHSp_wo=g~PqS!K zx^UiI8qo8Nw@uC zCc*Ife=h6y$IcpUhmECQcw3+JO3Mh2JNWc3U>J=0T9<+|l)Aj%tz*JHCa z)ji!DXA)c3_LfdZCL=kxS-~0?ZSyzf=jA)74=;v{>aKbmS=Z&!eH=Ykp)y3Zwyf8c z<>vXl-0hce?_3EBKPpi1q~sCy+${0Jhdg`0;D6!#BwX%$Oh2_u;@XW)!K>j~Gx1YWN1lWi^B$cjPpe}Xw(dFdSf zBp--X>Rl_;aIo5)u-cf9|f$M`a*-P=ZJk8$?1;HOgR zm)R(eRJVq6mAgukiz7dj?%kp4BER$3)_C^L@R%jPnR0isgI3o=F8i8zAG2659ajDU-01*C{&RZb5DtVFTTC`?Ir<;nqxgUB?GSJ z>A%Rmnk?I;ws&!8^~VI?O%;{3u!u~;@NQJ+q3fDDv(J3EU|P_vSAb~@M7yt(F)WB| zlDwChtr8BeX=t`PNVivd`Jv$dx)WVN=}*4BySZ2f=Z`zn2*sZGv9&g}_~N~qnr2gW z{9K)K@zr{pw`-6GDD+~tecxf}3IWTN=eNtd?-KfwhO@jr*BqVnX?%L^YC=%7<6o@H zUXn`7T_pAE_o>cV$r1;lZ;2}F`Pt8?MmX8EXv;)T{}31DUtxxmhecIf=y2{r%j$;! zlRk;JkcV&n3ku;@a|7Ljb4}*TrSi2IGqKBPogN|4^N8jvPVI@yoi1;rv&YNVuFH3e zzVeTR6Qsm8z28zRF50e;GcZf=CW!2-vfH*EwGrt~TM%$zHE|`@Dik;I&Zl`~$vo z_y~aX_!N_R@#}-5>nla?;jto{ET%=iwmXW2f5RBu$gXtvtR|=+Ys*nI?=n8{noWyK z_~awjcjJV%N|ZjZSXXR@50}or{A;T{o)CJHIVX16Hs8Y2)*#I<@|#Cf1zka34V_D} z63@myrX00KMU|*$$HN+h)dK zKSPmrotWKhLh?G1_mB}H)`O9Bt)^nzsxdFnV9*FrG(2#rpV+nR)q7bewL{+4pj633 z`oPS;G^47xtF%_;x-nwl0d%OSZ4x-qmkIpb4N?|PLYi21_KErSg{kZB+Hyjg^1l z4%`FZ(JX*(;AEL_AU!RCLX)t4w-GC|b+h1I__d`G`Q>J7J1G*QatOye2C_>jkAtS-4u}Q@7ly@ zy6BG)G~2(Mnbn(qNKgO?C1Yo)AX?E@Y=Q?v`_`akiJEGRFxg%1Tzz-9`$i#QZWJa4 zqr)tQ{g?VZnboPByXFgGi6!Y9GxeocRjjl7Q5XJ}1R5>e*Stp?D+1a`D~ujEtul zPY!KDH53FU&zMs7YZ)u3e(?Dn8kxY+<0fx#IoBOim@AaM84R4dsiG z>s;EOW(2+D6%^vcCbOhM7p~w4_BUc>QO8jOcUe^sLsB;Wy41k=DT7T6e=wKha*Dfe z9q-H|^_Q~c2JW=Az)_#%)bn~^&dO;IcV@h|VR{|7VK98Da|2FSu_s41bueZ642)TZ z!V!6e6h2wE%SycjD{x`2!KaOc3!^@pg3`X}9_sKiv&cXAT#?Q{j}&*w+rVlnazG9m zToJC>Nwxi~AwF6}2;+aM^gY_OS?kt6DLp+-<8OH_YM6vX1lNvdaVI~G-KMVh=w5m4F}bo zpERmPwW)!6u_rP|58^--6`BoX28`DP)=YcTJmFA`mpBLpf-D~(=nWLb+%~l9oie;X z#dyPL_<)LJnCceTdAJ#KLrA|@oHv}sHj_Vnt=hp-gc$kQtZ{y$D(~pA67{e2g`gSx zM>pN^%6TTcM93}6%$}+SpJV#>Fp&4{Wp%Nn z6@woQa2zp4|2*@%XSrX5Yo3;~YXA0z2@YXfj_s#s+250LO0%MmbEFw)bu$FQ6YhOG zN!^JHOWrVY(=a>es>_kQUhtEsE4ojHAYr%J!@u<8lEVa|ga1 z%TA1hI=^0f&Jwu=PRG=-7+O!CuLO0)l34sKofR0T?{X$cg1~#0s`~72iBI4O(aCe^ z+2G2r>h&GPj7E}|e-Hh)sFRB;7oNtm+;k(^kNp?^<;H4Df4-65iPWVWOdwE(en0yG zJ2(Oc+5yrEK7OKz1>oIVMhjHF=_G*- z2sqX1D2I0WU2nI(=I8hz(xJ{Il+p!Pm@7JK*o_n?r>s%8Ed;1$y>0#@w*UKSg99AK zCKqL`YbpYTe9bA4^t+PC<~P#ON&92r_JjA=Gbpntro8V&woJWzXv!?3&;HioLoOc9e(6x1K0ic5}06efI~V$Wb1jO2{lT1pt~7?b&~=N zvZzmc0pEc%n5uK0fD#JoTGrYxM|U_GwB{iG^81He8q^8JOFcLO3M-~zIv_*q%Li`p z+w8gptyzBZBan#5&h+-}P~&TkY}*Gd(^&DJrX3ChH%Dq&q95bS_ja-b$X2qr#{1rQ zmhv^SSKL2}*!CS`5gWU_rC`ygu>9)_oB>s6c^^UZjONl9y z?|Kkbv?3U9I(9ktJ*s-U=R1C}eDf?PPE%^^kdYiRamxKgA3<{3D59v4lc7pncG z`VodiSe4NIq`FtLW;^`UBD3o+vU2WPI&$7}Ry2#zDCzxxqH)y3ti51E*q*qk?D}p) zkx*~8250)mrwT?2L_Ey&qmI?ZTw2;PP#HI--&E+DT<4nT{33w;CvU=uH0KDlE-|fTB;^)OvgIho{5fWNpkHxFFJiPcX8plp|sh_a?Zk%vVIK8wq8>^jdp9-+P5`%b1AG9aamIF zGa$BvR)7LlVN+~_ff2M-oLYJQgp+CnoqFjf+fQtl?)J``^8OC@Z6GOQ^Y*@S6jvs%+h=VpmX4H}I(hrr>j z|0JE=iYeWMTZ8qif&Esl{u+_a{`RjIBZrlGuJ|{~D zC6%$fyzAC2Cb`e_=|&AUlKi|+oi}aHcS)3ZWN1`2P-7EOT7F^e=Ac{m8r6l(uP98Z zk(}GYY065Hz8)+d*an*6C&K`o>g}T1M!HEYo*P?3+3qrsLRrVPd<$Z|kME%Ru6%K1 z{b<;sNaJ@Q57Ni%jExt!UMmx62(v90_@#90g2$Kl$_u5Tt~GJj-YwEAb<;edeh?8e zr8aQUY&}O+?tm1}5rOg7lMm@X@h>aLuexfQxVD9}s>I!LrW1GdH#XYUyUnb;bac|m zHI#LcEAzuB*E0BFXj6GVY1=rW{|b$XQJk(aWY#@qpNe0F?|QNl+_%p{&{UW8!nZP0 zc1}4rbyXRsj<2Es-q)rlpFg?Ol~xg{)I9NS9SPkwQLC07{SmbRQ9U*6mf_qLIUg8} z&bfVNmrv_XbRohT_0N~(UhNaHU47Ht-?Jur4K)V^e|jcjAQu30Bkz8p^sB7zpDWg( zbMmBbpYBz(cwSi3vIhK2l?ojNv?<3oixl{;``NpKl~cnN$1;jhqK-@OS3%ZO+e_ELmTp( zUS!Yk)+M?~mNGq48Rt6N{w}ofj}CqO`q58ZQ}IEZFE>>o-MTmQ$t7tZ|9I=k_xI9? z$ika4+8$FH_mVYdO0J0IH9l-qUH`7ayE&KLUpIcEIZ?W}z4U!b>$@d-ulKpDC3B~b ztDPb8w39sIwoOT|*-~ZWEM6~nd$z2;xm_)FpIgv%xAD2>{wW8@zuKN%=87$AQ?MFz zZBJ;bW@xV1M?ke@kk3m~C{s5nkn&k9K>T|D5_Ubk(rIzp-r@?XgYTk8zdM+_kcxiL zEw!}lI1-rFObKr$&G~ZBn9#?1{yj0}0xjvUAQt68NT@z-jZEG-L6fI4k*U^%=5Ln-{;NvvAtC$1 ze2yqj2{eWo*W$qNU&;EVcQz+nv~#4?Vb^z$_OgPx0M%-nw!{ zqur^|U)%T;$+Yty5&3#oGRbwaOP@Pd|E1y3+k{=4&Fxc=$quC}E<5WPtPzj?ocL1Uy(tA(+1=yothJqPEWE^~;v#y5JL>Tfyc7L*@xXYm_V zrj=^x>(c&F`TW`IvI$zja`Cfr>P%=`U>sWJDx~KxVyznU{;=_h5*X+(Jz!KJdY3XyKckci^e$C6!Ezzg2Xu24Uioc0#B8%FPu$cY!{hSVua_!%NdMSF zI!!Cy3ME>h^2xHJdmZBDGX9T+Q8FWj(wD&$;J~sx+3ud_f5B=vHNi-RLmb%`HM z@@#3n8W~(T_kaUF-nPKezoyB@CB2_z3{f*n>xSg1pp0{AHi}Wzsu^NCZW6Pv1>Bs% zXwz0hwj0#%mkl#GQsXeHuHtw5+Y#u4s(vfbb~QV7D`Q+>_T95Ss!ZRLUi6b0Y;`H%y9 zubhB}f@Sc))9+qBY5bjVgV0#3RuB5CnmlIw1t}%@hAHuV);udH-$Ti@?usIvh@{-; zt?i%2sdE$d)wjCEl#;u{rv58PxGoofg1%{d!yE)MAFtm4>MPi;4V$VoqF!*-ynl$I z7Mr0-;ROo|!F1z7o)b1McLX5_S25B%@rI|JAKFvK5w}=`deW|3K+@cD>Hb@Ug@?|1 zoHK>3RLuGAed~ARn|JD->+}izCP}eV`#PVkoAmt@BxE1qS$XvDR^5L@xBrPv`H*^< zfrUf9_oA}Z(~ObLbsH8x7c1~2j9+{;DNCDrLe-#RU#BDgz&d+*x%Cb&ve&PV%6s)d z%<)@yMcVP)PQPzzQYQysgbnRaYd`DwtsIohzMR&5X>IoP0+O6E6K|it9hvIU8o3jVIBrJo-wOk8p=*?pHH6|f#o$q<$Q?*I2o*iZ{NOw5o&!lXU#Ff`prD5-7{lfy;_php<3&H`SQZZfeD!c zgQwg0vX|=LLbOd?b831z`7(X$+BG+quE>6KAN+7N@Yh%9K^F98hUgyP{&M)EG4O&Koh`IX1`wv;&_U+)F?2p-7>N_MuI`Z<> z+5N>q&Tra>_!Q=E%HNN?TTmaHJNSF=-m4mBzaDPjrz#FSP3|gg@V~C6&TY3&^Hz%P zUb!p!H}&4%6x_a&B6r@2W%Kyt<6V&o8T0(5Y4b86&Sh)3INf+D zDoi*29Y2$#eagtd>B_@q>`_ijtLdWy!qZO!1AjW59p|Sv-qB(gq{19fum{%r3F{b9mOP-zMOfO$+5XgusqvTM96PPMP`+@1jxPJ+4d2CpJf4sjwEatb3ENH>5lPZfJ}bZ4k9}eBS@(;kF5YI( zE@sm77h7wiMM6h=k?Di&+ZDzi4^#%u%b0I^S9tML&2QqxrNn6oy3aR-ji1C>A&CR~ zZ+J}Xym&AH@v41n_PMr=vf!o~d5nIRl=`I$Nggm``1k6EHB*VM^wvoSd!Tfa^wkN> zKYR8@w+1*v+e8OYKr^+$BckYCW{D>&KK7bJs5C z)blQhTJfVQ!G^4g_N||H|B=7t=HMCMMkapVLLt}oas9J~GPwXW3qgg!#Y{J z?TO}7&mh?iU4ro)R@dwUYbzu&$4htXN=r3f+uk*LSU!?JhE_LZJIS!(eq=KZ|28-C zvx31ok=AFoD~Qy%kCkhb$RFoCd>}kPkp1#k@02YEB|?o>%vmHQwL9yizyEp}7m&nn z6Qay}#3MA4_e2LNg7_)2Y`(pl-`u*+rg6_Vn%Z)6vA{LrmO?s0XO&oz1?)T|J~>sV zn#X7#FN)TvmDl;w{++1(c0r!bQEf0f6Tj}mJQY5lzilzOIrWy$m&v7?`kV4SK7n1@ zN9{wr4!?-!d*Irg7V(4Xwp$=Rd6>KB`}gnBK9OouQ( z-k)}T?!(!hn)Q*xcE;$3^2*9PuJ-f9mvH6#@d09ieBz_4YlTlZII!W(5QmFzF}z?A z*&&4f0Y#n13&+K`4m(c|Hp5}OjfjGpt5bjQ<0y(4sH45aXPvf>iE*tWsmn#+g`;x8 zQr^>)px9LYVV%G3o5>1?fY}4DTb9qR+%IAYd#&1YZkuQ4wbT7SghX7@o@iJc8@BT8 zV4g75%Rk2%aZY|?%Wnf`-TKE1OxH@t$HwomMrF?-_OzIA!!X{z@VyAMx}R&HGM z32s=JKbhjWs89NIRWAeOh@}SBi_3m6gHMh5XYDGWku(aobBB+7_B0^fth3=Dm z>K?vk!RMzN1yeT^U+$&*;_AG?w5@<|;Z1n^F;zBCnFi|;j>3;pDIb`x`?Q1`vkVL8 z(`TPoYS%hl|8iE%Y_85cQR{)H@ALDkNv3u;j>pZsyDJj4qlN9+jL9q3g@Klb)rNW5 zy^nq>bhgwm#J<=ao5@mjBF$;|<%#NzZHkdej~>~vH7@nY-efNb8nBj3h_*dlU7_2W z{ZXaa&@tEki%kmQri|*j8$)1K&NkYbpD8a2uV_a_HKoQVWNZ;zp0s=GE>%wIPCM6O zpEM{e)tP@jSC5cOtbc|I-jVMncl}RKwVXTmGE+FZ>vi;@N#A(!1*N+$7st*YDEZ%h zD|EBQD*1YyaI~XG%`Ym(;_}9u{ys_RR;hO$XLQ(nd{L)CHF6z{fe{wQtJX3M8o|OW zC5*)l-QoY+zX~pA9pN%$k~`00QL{Vtg_aQmNx-1k)hd;=Zd-${&d9U>?TJobq+L=m ze075Bm~l?slk;H_)E)FVTpdCje5EJdE+;S%Zm${}?{se>d)hRO)mJ7Fn`0e6Q`edi zY)OAt1Dp(^q1t496*|grPs$%jG8_(6CU^h72j$PAYpwSX4Z8hp0Tg-%LGCAI(voBl z24w3~ew@ASb^#%(`uB5C{v3HNhmG9-_iIunY*aakw^3(r`}OXXjWHDPp!*X;bU5asXOYya~!uTM^p?Fv|x7_Pc+`_Bq&O!!07 zgN(0?f77N--m$0ZEHsVsDVsxHa0e{*dg@)ze*X^+SeT&9jJjRcuwE+fxzV@Pj&Q)c0}pw>cV-IVA?|} z`(Q|4FmFWl|G2dDck&gu>>*G_W6-e;VB~FgcS40@NA?c{c&I`EdzM;sLT|*M28mt8 zd}?QY-=+E2Cl%cxB_$;&IM^5~?R|t{3SlHt4dn2CAdrIm`y+M-Ez&z166t$}@$-4Q zk7&CR63^-L6~EA3q4gZ?JZ#+|J^UVrx9zvN`6y(kDzj3_n)`gw=IG3-C@MO*5)nkN z{%oDz*0SDze&>K)^HRCnY64{e{Y67~`u8&G84$GA~y`8_%&P z6Y5{@9?DFCkNiitw-<^|O7ix{oEpJH9sHu>qE3C~n^AjiF|nlJ{SELq$0XQJwA#AV+Ix(HvYfh5m3A_Zj*IWw_A+E|;6P zZzr#yB-X>VOQL51VRXcB$^+FICQgYE2dN8PYLa*qKeyPsQvF9tskd%Xq$K9) zT2JROo~;W#gOF~Xl@d|X))gca%doAB9+$p@+ZRB#DrM`$@+p?v% zzaJg@LPvfEKU$8*t2F{PL}RJiGFZrOU&Poft2g{+RT?7y^Mpscr-n#67@3jM=}D+I zTdTI=APWw=$~#YCLu9U^7i(UMemw~z4Z_TiTuTIOW?!L!4*X;nb}lZF_5(3gw`ZVq2hWRWuC0~wE8*5(n0(!g^-(3~0(oOYh2w1x-Y4N>08UKL79bzDSNq7NKR z*|X>lm5>OZt#f>3nsb`A+i)$7@tUV}|1>aXzQy@(rQxW#mX;=1VMH_2%(JVbz^_*3 z%5Pq|%$6OI7eChCBvc+JC3Wg_w6_~)ecDp>7PRtyPn82?!z}aYbC3p~0p2ep>)(l! z8$=f)F^l$+Jv2#5=J%dxdqoZ1yvy+o3LyX|C z^%?y{n5dwj07^^((DayC5p$yvSl^5siN6Z)?F@vzV@zVx>k zC5w9aaQpKubNhp?BIwLB=6CdSAQ9^~Y)IAqjEn&?&Gm&o2={h=md%f!Z9ghEgbfpL z{rYbvx-E(q`Zrlk3DyxtLVqb$Z<>9I4N=Q5WlNZftl;{w*1qwG?=5Z z0s1{S_9OmX4OkZ=lmdN^`Pon-xrwhF>(Gg2Yc1J(v7+@ z9ywXW&!M3hc`r8s0f7q@Ok!kiMc;=$0`*f|Bo*MMy6?Sev$s%`&_f*16Y+Y{p*$=^ zJxc5=^wUkHH5yP7=WnmBuC~GwOs3aCAlOtNjO>V+I^~laPu@&O;K{IR|6qh2eOWbI zN`pIpDttgSQJXX1e7&)`v<$g<%(&t~w89?foO!PNDb5w3m1#@QN5Sq4&$CcQ(!PEB z0B2kIdG3TGSww3&x&ZguVLPwP?`)mJs`K%kh5}x_hp1rL{al+mtng(Y zNcpeoeuM%XXhJ;N+Sy_nF7XMCGQ$#H*%wFI)mt?F(kPcJ%EC_*~Py^~2 z`@A)mPfhDzPR4nZWqM6Evbck4gB*3`!L)lgwQ4n`Md@2CH~#8IbNiJmSI~kQz^z45 z`0clU2yNTer}{S}o3Flr7KCGR&&WvTaRs!8*!sLhD{8;ztAK!joQyw0Y=Jj!Of_=C zt|cZWItnU6M6y`Vm`LK+Ih?Zk;kj|NW-$RHdT=q+cTb@Amk{%56@5mw+Nt7oN1mrvZ|h-u#BBOpG7)`7eT*lC2!+*(5j*! zz5Su~gr%PO{cEzXs7w4^My#ageH}8{H8hPa)*L(6;Tk&?ZWCyFz|;6J;Pq zVWG6#MEs5;bV}}o1KK3um^(8}v=`%xg|+TzzIy8YzD?fZ3Mvv|ur$RX}}gay_6|RY|;CfReV%C>v`s)CnANbkBz>twfF!G1n+xyRHKFwXp4Hft}a zI>=KH1x7D`llM;+6SL3-XpS-vawiUWPWI7zB}L3>T;8Ic7e+V{A0;5hHKdbgtHq$4 zX^Td2|LN+VS?Omsc)2<|hZ!raZk6|R0g}OVNIIhdW>-i$HzDAlj0_gz=J=YnnhS!D zwGN7k-fsPHHz1eEP_ImP)`se~0tXZZ!$sf#8a8|B=!87J$On>JG3hR$lE9`EqbJGB zdpN5LR?;VA%~z`9Aus!2Jwzq)p z`q{gq?@qW{e>~mhcb$ieSH6`U8q*7Fm1o)K(36+9 z*OW;5@u=&n=?lkbUhdxU?n-UTco}Dvkd+Ib*2~8r3av|* zvL9Pd>I7px&NeRXb$TnN=#4*g`X>iRRWREgjk;hye%f6O5B(*6M_gn(J-t?(vxr5+ z=yrv9g}01DKeY*@O0K`AzmwQqIk}Kt)cWS&w93b{DBo8sORn;l4gH9uO$U6E(e0Uh zXpcFRtvrLAL@AkMw)2@MT@xw4AS>v|J2o`*ImcRccE?`b$FFCG+ZVD(dmvWKmb}d6 ze$wE|m4l^xhZn9|Q5~O}RwzAuxLBa?fe5+4u`~2Mt+@|cJ=c^|jbIdodD!OI1ZOD5 znfss}B+VB%wiDBOQn+Su<0W()8feR@pGn!VCU5)3ne5NOyaE5cds(XLbQPm}Kh z*f2fC{7~dGLgtyvfqmymr$90|D4xV?#omcQStoNi^mDvy?GD&9Ju1EQgPJP}J%~#X zBk;bM&EO_-KPrm7oA>LM5EMlPw_8Bx*vWZj=uUNhiOI%$>c{z0NT zz8bv%iWEbOWtYUot3 z>NnHpK?c(FtnMdGZH4`@7i7e$f~5xN>D((7jD1 z)Esj6FYKQjLJFn)r4WyV$#&C5z94(86bHZ-WMZs1@do2-I&AuwjAL);1qvMRo%VrEWE zMUHVKPDFP_zq-(+N*vHVdh|6?C{sqwu-%OqOtC}y7Xt~(5{aJmGF`n?V&+9v3XJt9 z%|&l$IUT0CoaKwYK?6&isjTgp*Umpc?92+@Gm5Af?By|08oy-)g z-ylHEkw*JpkzqWcf^c8u8R+7{w{BS%mQqBFF0^i`GPw{fb>8gH8@W&RE)DLvkvNDN zuLvt_J9B*}8V>u<`A>NpUo>e^+o}<_qHG@^#X69pVgK5^y&uhSMhh!c9^?)?q69pc4rm!gP#lA0#p3a zn!T2mmcGTDrCu)8l{Onq$T_j+JF3Dh>P-*`X2rI?e*HSzv{z`;Wi6F8*_6X(zrDCQ z^eUC;sgkWh&kS_dWCJwJy}Dv->679bxo(a4)aj|0a&mGFk}*`G!otCgVjnPos#q^* zIY4>!w}Hx+8o-G86~%$1vsHqH&IiPDJ(wZakde72BU;g~;!&^IQ>5**bacWz_I6(g z6+0d=yI85(a02tVqrwVAJb@mQA=@ZH$()k z73b@Y-n}+^{+@p5x|XRp$<&*6T}3%$!j_s-P-OK(RbW}YyXpZl(w5^P@n@^zM#FO| zm%}ixOGuz#74zM1?v(dEetvG-767cSn_${?0DI1YJ%SpXRo*7O{3K(fZ|c!eg3LKjciUOSd}QQIL%NF;kvRfC2A61F6N^qh3A4P~u5Rd%I1zAK%P1juA0G41uMSJp_tkWVtw z)5~1?PI!gHVC6&^@<%Q*Ap#%&Tp;2#81j2U*felG38AgEfK$Qa*EJ46#G7$R+~dc@K&Ke8PLKE}hqxJ#&QZnbW}ga`27L_=#`y$+x>yH&$vb_)14m zZ{>XadXnSYj9c5P-Xat0pl7J9s|yOO>^f5(C1!D&#n-_29;J7l;b1X@bRDya)Lgcn>+cJkeYjA=;dM zQyzY-bK~C8dZnd8_^q7flhBlaqDh0rPxieQUdNZNqbvN<41}{Ed-h+uJ^r=yjhj}< z9|&zb4IluRYGlZli}2bHVY!s?>pmH1{9ez%@c8TQnTPUTQtbzLdC5ae4|^wIm@vU$ z_V;g+CerSlpXUqnd6H;_c^cnA<>K1VFQU3tpl)RCQH#tMd}=%XmFkI%v)D2I=<5p? zvykOfK+G~mg?7J09U&;E~p*KvIdlFWWCO3QfFlT5e6`TrfxW#;J@0RRLpg)aG> zYIFH-rK3bBLz>A8jQU=x$T)CeG zI}vqlO2&FwPMMCbc7})-C%#`!lO-t$IQxf%$bJ*}4D3ek+AJIU1CJON2znigY zl1IX$qS}GZ&Sw!p&HuSQjmh!jjNHdomgg)hfX7$G11F&%P-xG9l)~mqUP z6THl23dtRG`66b*ta(28?+S7eSi!ATY7m>d?6i#_Mj8PN3tqm%vF|aU*Q~6p$<93+ zHf*R6A?wL^r!Gd`k zllzkHv)*}b7hWSxkQdS<$@kGK@A$1O8tYH!PGPm^BtzKEc9kV!i zphK_S9z<`ex>JPH5-UM65@seLR;H*p18AJ5sAv>mWS#*6eblzo4^gR-Z~#tA^_K6;Dr$cfCQmp;|TW2GqaW5J+P@e-;pUkhL8#=z&s`UYr(cXR30t zJAQ1Z2<0`kzEX`qD-7yG$$ksC8=0jS$Tk%f6{6}4jOfKT5<0_u=t{`&k;_0zEuf^) zJ4BU9V=YW%=(-a-HKWA#SXOx3*zBS#5y=7P&o)T&>EWx?MK`wUv2dz3+XdGec+(Ij zObAnV9+)i4L|q zj|QjIlZ5c?MR!WB$sQcE)7Ean9^82iM~hXbrvwZRh-0AGuLlc)?uA_alK}n(JON%z z!(*EBAFfxj7T{EBJ{YW`@6AP+sGM6z+J%gImK%6}Rl!X>xjo)a%S9`ZlJX~`Rl}gk zomQ>d?$gnK{^CU%;+8r;{Eh5}Hed(na(3F}kmN~kT^l^spm%_;Yzi=7;i5>M1(n4Scx6^J6Lr>Y^`VfymbD`O1C zh0GKpMuFd2kr0##FHYuBCk3)^5sI>GVqKA}`|G z-MrU+=x~|OQd@Jg?t2S}YukN%(g>U%dgv_d>~CD@bapOwd>p)4t0tlMlJyAjB9wuK zBM%k}{c(O6M+#u^Ek4eH=H_OEAm(r)WGWiWCwd6xFrcB!K3L}c011IKW8iFz z1GSDM=lbTGH*1ec$Ufe;xtozN3O`4~;fx#@%U@Jd;(6uD2P;>UC6F!A)6=6~+fzzS z92S-YV*`M!45Hk#g%1#}PFPUs(Q*!au)9%vNMbT^V#6JOJJF*%`8e6}ld2&-55gLy zVmlrQq`2^MSn*=Oreb1ZT~_GECwN^SU1q-HX7}#MP9DzH4}$CN4NL}v9Ylj zijoL#KfE?U4j0+Ke-b7LF6+}nDblZ&H}+N!2|^`H{iu>b3Nte^b`Pa{8UN&(zrTb) zD;`eGo^nxM+Mt^^DO1i-%fR$qUF;deuWsiu5G=;Xzk2zyNP}_$nJG8Z3&9o^*(>DY z;xhgF_wU(RsFHDav!XdY;IH>d$7a!vla&NTE!YsN+iQmM*D0(C4h9M)t5BhU5inni zn;Ra%V-vS-p=$HG6~_$dCwKtpu1G*LsnMhWA!B3_9rLy#7)w|*AJjvKv1Eh_ z<%4IlV2HW)WoKpBy5A=GmgJ!XpD^E!yd5auBDTtqIJlaurJvt>(A z-lNqzCx>d|2;4O5`7J&TzrS|N%LFDmlHKmNxq)l1{?o!|3%nK=PlsJDq!jwZdKmWvuKa=a9sBnqYKV~coPNZN!W$DKW1lQRmOlyM#m5c| zv=rhuGA_#(j$0|Gh0x=cV4f~84vvV&6~4`;ZeZ*DU0hL7Ph@dbz9{S z&Q1O8*OVbGZN;(H*479CUoC#{L!2N%sv={L*K>cX@Hv^Rw&`4o9gd*p5K{WNkdP4V zTiIlkv?!BRVVWqOQWwA@e-;2rX_NtAH78e+@sa=W>8;(^t5z`e3PsP{tGbAyQ7X9d z<0Wl6iVkNW$Jf8_<6v{rf^qE|kl11iFq-ng#W3JC*IZ3_9+zJjBK2>&*Yx}~&!bIc51ZB<`YTmqY zFBupZdd$402bHv-@x-`(eQ(SIIAFaSIJJ{ZYzIhH#B48Gv`jwZ6)CdFw(mIh?tPBK z_4i~Udc6VYO}D4SYfh{kR)whQFeYLp9oTM(AhhrOqQD7Ecv!#E*C&fI_Q1dr)2^~k zMe=L14Sm3TzKZJ8IBufnQOW^=;DP}2OUwuy^4}FAC*xWaWyOxSzi^nmzo>#&B&sTN z@L=|rLOwpej^ja&F9rd3C3^7SgFBDm7;6go zp**JAhG~|5PxkVlKvB_`tep@)qv+JPu6ws`yhsv@0iuP98@!m+r$6>;8_ET-s`sB- zl)jiO*&XcXeeWJMHFe(w^l+YLHx%xFd%qV3--(APbp@t|&3t>COv}fQA6r`^Ufcyp z=K6wz5QP0 z7;LR>$tG-Yvs@SkDDwzNdZsrXJa~Y5!@~kpGE1>wxSdT>Ih54uhxviBUUG6eEiY7w z)CDNIEP|lS-Pq_(baI!zA4aH)kNGABF_6*tPLJKo2w$^}lww}ok5p+$UIi@XuoBHv zl{LSxkaJ!qNNV50J1IOYnZHAqn3|b6V=e0E=U1^O2YW9!9{<&Qx-n!N4cc6<(Fy3S zGXr+0UJ{yh!IE%r3Wp9oG*mfy^mzQC1*9NOh42fM$SWkv$>0Q)vSwuE0uqw?nF;l; zG%DSS4$B}`^wlF;xx*=`(Y&5lvJ~bVhyntV+SWWdWz2hOg{=dN19AE?GHr7nJR6ah zmqfHd3n@|s#rfJoem5}G+VZfs>phcbIv}cQqK`9|!=Ye;sDQc1k(o?we200dI(VVZNy>mJoh0sgFvt&1wQ92p&)y06p{dEYW^=>RxeVMtJekfzXtEvvS<-rS4-iZUF z@U}Rl{P>Q50JusqGm^!#+=7DjQ(J|XF-j`)+P!=C{s=EqjUf00pEL6a;JxX0%j+KT z!RkF4Jp)Vwo9Rc4va&LE12B<1C>LU&))b87U-w9S$xR}pFffkon99BFjfMOa`N_6C zgi`gM`qU;zkFF_w8B!O0U;w7FZ>jn%tZTo~vbT>UN@? zafyq4+no>8DD}RN0h9QZFp8vd>U+=lYwRcAM-qpYS^+F&W@=KBb?7D*-eZY(G*^f) zYFY+~i{HAMnl9o|DB%t9NDQlq_w5+mgIA{zxAK(X=}eoT9Vk4kwSlxtOzZ+a{p5k_ z$$_R-#H;CZb8}}U!4mmynH0N&>bA%9;HDaJx7e+CHRf)bDiyn=bp6fv#`20Cfs=7jKRRjS+Q;R~p_2b?M3;u2++r)Ub2G>~*uI&C6} z+A`l|3Zbk1qAgx=qyb}jCd*yJXgXj#7NpJ_JwMgBh^ip@B2BieqGBaH0wfLZ`-q;w zxV9gN`3WMKVV1QjQp}=0&>i%l$Ln58Yib|QI$0<-{1NzeR_kr#SNn!*5l9R+Ao9$) zm5GMpt!!+yb8S|bOzgOW6104A3E`n{d|yUoqU7i}g1AmX`&8WqV?n*-b<2JjOagoD zHPJ)c&V(ECwq`#kW`PSK=FO$DL{bZFk5tE$j-5@0u)MVhThBP)JC49;S7QF4RLSVMuEsGTk?rn@LUGoMLR?35sH(3(0{|)+l^+f;>}DG z@I~2Cr#5VZLM31k7V4){Fa+S2Jpjd{QN6H#{q6&2-~T~`1Crh#!a#|kGrD)qtr0qh zI$vz0^Kjdqh1;gEh(*V!9yGuTBI0RMu+K%TDzL!lf`=s=G>uVnbW#kMsA*Ai;_aU3 zEVY*HRiJr}}^#5oDj-H$ZwDZ+2k1r<-ZGDAElU z+v#_!=q$EqoPtcUEnB`n1!Mq)F z?Eh4q35DN++0^?8>-_n}`HY7@coCh!kTWnd|BRvz2_GCBq(t`+as}-W8!NJD7eztD zya|0L&*n`2Qb*(H#CIeALjZAgbN#NELW}6Ha2ZH!To6g%Wv9+_Za;KoBMZwkQfaRJ z0@ZN%=u7unm^{;XvK=&~v{2t#)V%HD57UT$K#Q~&lBEEPcnB2me9K<0qwAH(MZ{?K`C zBZsQ0>Y+5s&#OidF^#Ah7_jh*W$niYavF1TcOQp21%d|d#ND-&SebahP0exm9NX5d z9srEN2Z1_u1`hm*6DI)u^e&>}=8PXGDJgY@P5k?^dIC7?2+4JOLYpqow*&-${rAuJ zC_oUHb`s@97;W6_RVz+F0>2orwu=|XL59bk6NUpGrWjeG?K(mCXg|wiK@HtUk4=vK z2eFlSd39YoHD%Fe|D946S)5V*7vO$GJ^m5(9hYu0sNrOW3H_0NB55bTMy~mRK z`}-sESVF7uB`t*e$*&Eu*%12gK~jmt{Qmv>|8UG|Ik=qTJ>^gM5;$QWfh@YayDyBe z(eBAQi`*M4W=v1H?>Xc?BrhZ_g*}gzci?kV5>=>a!>|CZ#D9-$XE#^bkxRkP;cSL2 zEJxOc=-A+{I~7sIfYCaKy}rW)yN-YJ!Kp{J>Zf}fe(kA+@CFi`c08~KjD3J%*M74E zoB|a^Fv_k61OWUHCE;wr$nG`G-6hSqjDQfgbmPU!%~s(A0;Dm;2C0DH5wZX#4)O0` z(@}z9vVm|bT{gu9jed~^pf){@00Quivlpxk{<=s5Um~)|+}s=h&v6jWif9E z3F8-x2?HHX!xZ=UxtNKXJj70L;9K6=5|8o$Spy%{VFd27rW;>ABslp6r7&5J@EbU0 zAb51=3EQ6WaZ5BbA8lu0P=--Q0PxD4mfUB8q~IzwZN=(}*P$bTa`iF{SgTO22*o-8 z#Qe=fsCeLJK;5ogyH>o2p$7CVF4PF`A;tXxdxcW2*s|r7D-E#|NZAV@oIrOF`}BOl zyM5X_vnHDYb;Bxwt9OwQ@4$JYEFOSF1a54wvT>g|#YsdvC0`FOx|GL2?iaPJIE1hW z@#_yRT*N>uVof&s#t>Q!z)E=2kgKJD)t4oQ*t~m?W7`-558hi!P3|A<%o9N5i?FPh z@_U~|fo%eOXjbCX=l0*BeB_9|fi!xV6crV-e&HO#*dZcrgCmDm;6jZBklqXx;Q+MJ z7<>DY_FjCP-RT=h_zvLfegLWqKvx~`5x%6*1pq~5KxHR8pMkt~N&d3>j>8NT!4`Z9 z($pGkl_N7$nJ_{~R(?8ADR?d~79iJqSi1l0dvkbQ$gk6`Swm7=8!}l_Q_~731Y{V= zDnXWgDn|zXl!4Fzr^@?CecsUNE#1rhxuSRl$hR#$v*Bq&XQ(#dME&u9Kjm{=L@=BcwP zq!Pt0eZC)z-n@Up3OKUYQj4gQ6<{=ZeDRXH;jdrZNa{TAdC&9zqc&=%E z2jbS9JA1$vg2h;%lgut8R4%qr@=2RIWh;Ahj(X=w{R2B#Lj3@A=1|D&c>ch3ixqGN zI}wiJC>MJYY7rE{sK#Ynl{|#1!7|!VThpIPR&^K>NH|YJR;O>6M$TohYEBK+1P^TQ z@TCK&Rsf$vI7OBR@7wNkzR=${l{&{n!p=RFZ@0>(ELhD3<%ZEXuUyIey(q$sp0A$myAX-@T? za&>%Sq=9{t`>%%Xi{7j4d0(ea7{+)?GzG8kE)Vv;G*lfLt(5X)%}<7+7O|1#0}K7r zJ9SpubvIb5bbWCuFI=729Co*;dHuca1TOZo_v;A6Uef3g0OqNoGhzu;?o8X__v&k) z`Yt2>3bJI;R=UM625=wz>Txi{2qe(rsR_vDzckIXMMH#5mIic)z!`HJg5R62%k5FQpQ zjI>l3NM76eu&9!JO1XQ7PvOYf0a;RTCRtK*uOrd$^6)4wF3#C| zvT$XZd;eYxYy{E)KXN$T;OS--me=hFCDMYSq>M-eR_}FtOkG}fY%lYZRFla!qs`4Z zn+&FQNCi`~YMM5^Qv7;(T4DFQ)hqPrmL56HrE{ygT8`$70U|>x5m5<$cF*-a*hju2 zKlo>Glz(yZPg1L>mWAJoYZYY+-g}o5d^h4`tS2RHNbYn?Qj|Nuwq?xvy_>&cs@ z*>zGT6?zKfTSvBL;(()g(qp-Sc=QU9iz3CRvH{*QMB&UAleJLUsAxOxTlzVOTD>Y_ z)@*pSNM0jL#`hv~>paiu!_6^|t^eeB<)!&9=J1t>jUKQjVrS1%&Gu)!U8}d<*~+pd zU6uP%a?H)asLViG@y42#LZuniu@k*RcOE@-JhH)a30CS&B?rMPgabR;`SH)$5oEEM z>Je0#a2UgF6UH<##~i!Xx>Z}f74h{wmfp!S;?uK2U2NMN%~^qI4#B4$4?g&P`Nn>o zkIU_;MZ0S^Q;nSeWNrB?JgUoY`l^J%mhExB2ld#hvi=mRk5%D(BnnCV1ZcTgYQIq|3&~dZ^$N^IY=Ahu@VxDVmcn zlPrIq=QOGJod0>Kdm~Tf{P8{KWM>X9z4j3jCXbfLbQza1Wv(^8Sb{DYycnf@1pkBD zOi0*+s|T8Ch)W0t^wEp>@5NTYOc6>PS@83)eZ6s+Zhg?j55KV)juk|0+TnfD`_GVO zW7@CZUsorXHp|hIwk`B=j4e?Uy?>VHx)C=FIAGZfjzv&W;$=A1UWdjX_8m-Y8QQ}gDIOh}~X%G{W2HycmopFh%{T>h+>S16rM08(6a6r1%ekn_JrMHr-%yLa!R zG1dE-jLsORPgM|gU6^~dcWmHRC4QfpPG;##GvSS}08gI8Hi7PUdxdy0K5KIB>&C8t z3{_y~a%ecvT=M((%dM&Q@1AXOw$$alpWrUKbiihJ)GZU2h5kQ+|#<&i{;0-x+;uLsg;j+>*vraks}NL1Vqo z(VHJMNKdFgGrm73uJLPEa>rsjk`;9+zaj^M(1i{7;f>+hT8yvhnH{@6li5f7{x-GqeA` zi6Gu2(~a=mGBa4|-rlVu|AR|H=e-A~Tkn~pYUbvm-L{!?`>gWao_*}1Avy#yMpv<8 zM2|+O?%zhHB1X2%Vv!%Yr5_6YK`;`egufRp(C@#`xRyi^OV^hbVY}jl@7Wy0_w5iE z!EYZP#S6Wn;vlLNtX2X2pG&RZxs|z!AZF>8{7?dy!xLRX=^$IXV-*Fo&rc`S0%^pN z;k~3R$EOqjH@DWeX^1}tNYdzub8F8+Wiwf0_Dv4>A8~o1D?@9WNa4 zMaBQ|jIq>IjI^{h;itdIXP<9pCJ2)3-=~qHrRHE@2o|b&;YIqA1=n8X^7oY;8P(;9Gcl&rt*6Rzbcf>*ty%_!jAk0yXMDwV{eQ6kKcI5u=e}v^0*%= z9RM(Txjr@BrpoNM*(S~vtea^x z7}f2X_&&{+Cr+-sP?JWTqbJ-#&!pb5Z%I^_`Q!PoJutv6>gFXuWwl zKt6l>e_jyVu?ToYek2_21k=#jEJ9`HMGebe``5|){fIUBC9eN=s`#Lhb<5dJ=Ug|b zjQl9&FO-;M&HcHM-;r6|=(qCBMDlXo*4)XN+KB3tHDT8Sgl~2WFK?ER<2bc#o8t0! zGsE+*Dqr7hu?p1r$(4A{p(36Ygy5M?`}^nf^%jc{t)2RMTu45W-q0*)+~0PK$mkP4 zGr3f=lNEWJ`j=*uo2<=>iRlOMdOpiFWIyb`I>-(Pd1_ZpIy`1 zQRj+ougbRGwO9}{B93g5V1tI0Q9wt}0lZ%_1ZroLtWab9+YA0-bq zSE-C_GCLlwU|^qoM&>tnpI-U3Ph-3*^V6E7fvA=g_L9N(nskwO^Ip?FaXm4A*@ZUS zws(u~sD}dk(#P>&pI4qM9Q&?h%sV$r%Qy;jhj;Or_-@F&@t?Dhd5MscC7*fA%Up;# zdI+^S{G+K&s-1cc-VS^jqxH(2VXH+=+jyXFdh&$4oO8+ye_(ig<;}~t0$1V&@RvLG z-Z8Ql|Ga#*qH@!j54-odHy<_p$kQS-C2R8CzdyMId6r3au)9E%o#})4L^b0m2a;v z+>h@_=ZkY4z2d@bqyG94nV_ywpvuJ+ras^XpBG%_-Zbr^3uj0^BIBMoufE&YIe~^% z`X)5U?40OkS{b8?a~6iuJnG%6wP5pma<`=C+x}E@dnNnC2s!%d{*VaoaBV4BX$vVvwpOw9ki~%f?;XJK2O6|2q|Hl@x2o9x3POhCWCa9t-=>3xRTt@UY9x> zs7FG>xO^B$TXb1$XsLUIjz@5KWVL;S$?U5CpDWxG$_pZLY&A>77IDeXFJ5)KC&pl^ zGD6oc5jMYLq)yX7_q=O_2HSa^nn{L{Ge6VgwQPaG#4j#P)5F>2ps6krpU{+!($~Z} z*D48xI>Xt$U1e8hy)~ysngtAsTA+^3QcsCD8wKhMDjHz&T4a$eaDf1) zwp7KB6$VW{4M{2G2Q>mdM`u^&;iYL5!j}7>Xf)H;a1yI-)o9IlMevbTOS_)-?taw( z&TnnzahqziuF*kQ*IuDcGrtlip%AS#CbwlIrIB=kJE-7=2C5sEk+Zu(J9enq(C{iS zj|=U4p4MeXJMGviq;Xn_OP*(w?Bm8Qb_VQ1HBaIL9$m*p303Knj^l|VJJ-FsQwA@Z zt=`R%>GLHr9I0l+Y3;`~fz+_mFI@fljkPVlL>j*Kk**NaJmn^{o^045zWGl=_!xs| zs0INn$(I}p^D6R8YRe?D|9s%;NGtG7nkAiDIGR{F+gR2MyHj=%&YZ`78>e8M8bz`Wv|xr@nXD(on` z@FvE6%ak^BYtb{AFPLhkC-djLa6GNLv@xKE7& zf)fth3ScTg#~)f+fzXgxp=Pixg{rws7V6w{q@*?-_o46ckaAYC7?@a4wmLw`50DXn zQjHDq(4+4NER+}0Mp%2l@knhQIq1}viagdCa4~($KoKYI%3!q3@hv%v)TJV|JA83B zKrPWKYMn4|Nb#^~ksu<->?Y&69Yg-u!W5yz>|oVk+e2TTB3m3QvG}%!F;1MpG@1!J zTEVgDOK-Juq;G-}LBD6MU8yE6H-E((lC^7xfAcddc698zAm+Zb%6^q9#3&k&c4T^- zDpexFU`Ea@)_V=^`Cm1;v+}!VCm_)6xdle1l;YolMH}*WD8-f2+=1Vrgeudw)bg$3w^jwrSa`xsM4q>NS^7!UYvq~T)5P8(;#RV&aFYVNv_n-=ue!`<_Pmu; ze5q53XY)uxB9u5o_f z8o66vA%@4zNef~LMosv7lW>eZ&TVn*Ncscy=xZANNe0|WR6B$Ln$zLO2)%}9k>SiZLG|F~6cbR5?Vk?1 zW87d?@%iHuth9||a|Lr|D|tZh6*%j*UXLu*^aUHI=V8Q}xBI#k@yjAY_KwpF+#Hph zZvPIS#w2&>hNT1MN!-m8sx(u*#>XhlKHBaIBC!{L$Iot0ge*v#`8z*+U;|OKV?QId z*sO63Cd&en41#gMhM=inX1~)TK5_M6f!qRUbQdMvYyhToR~_{gGde%JE)Oc znD5Xfyc$lD`!Y0cUZ^?Eepk6ipQE~BFTLj4n(qgxULSgWQep^Dp-oxC+96Wo#Mb+V zztZo17J3hdQl}D@JXO%`MiPIF5OYhH(0+aiZ0){j9g3a~q~@l0UtXV{5)c#y*QI-B znx_1PJ#JZOk?HC@N6_e~_$x`S2}8qssx~`n(IOMSiwJ2ysQFt=kN!^<+-*WM36!XT zw`F#pdc{G)3%nV3H_8*7`iDH=!p7E{n6{>@EuMsrERtA27eyo8jQKK57OVuI9vb() z8{yTZ)ci)Vs}86~!~`n`2v^FzimCg~Q-$Ppr^xGxB95Xk3Ng2<@4iRe#mQJ)ksC*H z%0B8qrIs5zMsVmHrW$y&A7$7DUj`?vpKi}Co%KD#aTeNPYh+#*}1)b-zXl47J#N;Q5| zV8CM~1jryGFR9nQ2lNP-`Ff}*H6aN!sCJ~J!dtQ+C@U!{zV8Gw6G{F#i$c;b@j~Rr zwYZlG(IoyX6daM9;l2_EmMtnhzTITpe7m;29rM*`RxfBZdF?UmVP%4r4i{7+ex;?W zP}pm!?dyF>P?DZXQa^ia$c;%0H@^A4W0QB5GP|M6Y9{sAPsI(HxZ0kBpex3WFzJIh zd42HG>61_|&eocv7%yo98$6TrIehmUkaX&pUC#8T5)VQmuwQx;1FF z)};7PaU(vOa|OvoPwDLv(xZ@Me3(1nc+1gSMZ&)mGI>9(od zDu@`JDshBD=8HP?WSZO?q+{LUdqDxI!YOzDgquXMe33b#0U`JY+uCgMCv&TpKizuT zN4N~1u2P#PW)6>KkByj1DbM#^y=ZU7tW2OK)K0y$V*h#Q#_z3cI@tH4N+z!>1h8IT zMx{{(G`cE+RCIzrkeoWG#!pOGEx96~k)jog$mZL;0b%;v z?ik@R-6jqkao|>O3nj_i#bE|Lv$E1e0up>4U-+SMJyc&+Dzy3+>6f-9cEhY4m9=$I z#$PESagU1Mf>iKpW30PeX>&0~G|*2*54OZ@uYqR|3r_0N4P4?;XJeSKN{4K*udXOksWP0@=T4TErkIEuAPI9I1P`^g?6=C zH*P7TREumx_WaH6SOJ`WBQvE8ul<4-n;nJyq4=xSPb%NjrcmVqb$_H@qqo(*c-1cb zo$?i>i$uqDxAmSn(IlWa3`Lu`e97qpkxuXN<*}QkB%A-SX3p(S3e%sbtjL-c?r0*cW61gsa!Fqu4Rc6)8f}DY>n-e{CeU+_AA92S zseXz;(yQqnNhf*h2V2C|Vu3@?R#$mVc|VQ|BGakud=T$4)frNHI&LoNEE8Muw0N-x zAYmDJrYlfDnBVUYz<9l*jA(cxN2Iq)ZZpRE)$cMDmM-2O97eIb(q^4$2m_{H&*7LF6sVsU5RsaeIEnti{WuYK|B`B>frEkx#f` zwJ)u#;p30asZ+9a@N3BBPJbi0A$dyTLwhD7Y|K?`Gwo2uq=!aJlr_r=vVH z2#FG42CNWqiQ~`NsO|=$Nnqd~fX9OheXDZylS*KsIT@~5T?8*a9BvaPy*%t$-Vy$z z(PyoBU?2vAFN+!~fbs&}y)K!sk`lqcWC``LQETh#`v8mkZ!IaFKv+*mu&|%1&J=zt z^0rjsvj?-eV8Y$$C*A!hVx?4(ui6u56CHNfWMw`#!09ZR@lU4^ljw%A&uX`<=8&QPK8H%ei`a;SLM#1jz5=Triq7XX9NT zP&XBPFy3=6tVm7m>%kVF$cQagNONM!cf~_oo^h_xUz0>L$R5~66BM?R_wOuRGV?O1 zPq|4~$ywXOXmGmx*Ta~uzxTFy3BgeDzrk-yr5Jj( zQ$U%ZZoCaE@;lh#ZIPZo{~|InZrT356Y(Z%?K%uLhJIsIQ^hr?5+h{J87|Y9clLX} zk>EurCbLNqwiiZxOSzzPZGNFwKO5 zuarnE_3!Kuz?Bf#**MK?uAbfN{0*gtSMLYz*HKjLOFU**aIJG!aU&@PJe3#djOQFN zV1rs`{T27BJZC3*FRMrC&5T7rD&6hP?NZ9%dOCyWa6*v9vMa zX76>oEPEAI-mw6oL}KFc<9BbWEf@^;3uzOQ*c?SP?lWkO{&bk4>QH0xV$$C)rYX@^ zOTQi#qZ7{yH|T);BJ-wcuiauZr#WhB-AFZ_MU~2Vd?58hf9OBZR{S-X<25B|*BI;D z)hfSkn01#+n0=x4R(GfZDsNL?A+Mx6Rc8N$hx+%0d8a`qkt~lSu5pf`v6m&{@2_3t z9p8Hg&VmJT0wlO})e#$hy{i)?bgKyk-y=(^jTqLRO^(a&HpKVdpy4;IQ~9*w<)&fv zl&R(UKS12Oq!xuff*Rx0bb!bX^U2nS#K%O}G` zBT>t#{=Y7@JmS{Po5ES6UgS=H*GJ4~^@oV3fK03W1DS5_;jfoKpF#+!c!}!=Ra+^Z z`U`q@`@ao(|38TO|M$Nhtp%aZJP;G1X%~RZ^!m4cLTL~f2R)1iu;ODJu%>)~!0>j2 z3w9~Xqs0?wVXd5?V9E)Lza+odrArpkHb9XAc;}8l>YG*|oh`Jp24~71A0KbJ<^Q5pQ>K|w*AV=#=VzP6Toc_2$w97Yh%u5;((<>OzT z#q#Jb0Y#l7r_OE_Y zGIS_JR|$Lok)kFCw5~-wHy)tT>!94x@leYkwa9#6wN9T<0G2OMXR7Dl-!Gk}Lk z5~n)j{tSfB6Cm^HP|?sRvhw7EbWYjBENJL0tB_R0Am*l~ z+_si<70shW(5~t>3w>_}LEGqw`EjMwV*SLPn7S>usi1(D>2ka3NCXX^!Ed0kxQY9P zJ${VzOu6A*Y(ktd0iwx8(HP+A1DmWXF80HyZw>FaAZfohJ+Iv;lR6m%xUGA+3XIWTyTu+EmnLT^7&Rucs2TI~A`ADMK zqO6)(X49&M*q*Ym53!jkGtp`y)`dwGisy6gnjQDW8<@$HtW}-Ld5c{=I!Iev8~8yv z`Aj=q@KZbM`CiEh!Bgq02LWaQvJ>vZxpw6f?3|#8TBTnP22NZQRl&y0$`(Px;$djLb#Kh5%?+P6(b9@e zPG+R0X4jMmI^5Ms2k{YV1aC`aJs;>HCV4-u7h0{zFD@Q#1BMs)F^b8^G!_?YEIYi@JC#_M{^xU< zUxcL~(D=BE7x4u7TLMST1|&Qio98`}flK?jHXvlSUQp+S<>LPSa>-96e~K4C{E%u} znhD8x+F=3?&RYFu87E~>1ifLQ4}bm2B=+E8+TL^&RMVqZbr?jUI(2)f7*N) zSK0%!)43%jAfLmCDSh)8PzRMj^|PVD!BMU zvW)%ncu^!!C0xecJ^3YcR|&L7*Q2a2(?&j6vezY;k0gM))$ zhzEZ3pf5O?gFEy*AG3km?Ts&6=2mB!=s7clzbaT^&8W z{Bht1tNQEd#c#0D(9nRY7x2h~tr2r3Mr0zM{tQAKSet@eWjqhT(~6&2F`i8>hSXDd$BQi=?EaeikOCWhZcqs-4|sYB-0V z&F1-ng0his;>dJK8YZ~E11_uERIV)#F+Zh(jg7N&@%I`Kcfi_5UGgAxN*@Lnox@Jv z5(5lbtj?4h2l`6_l}pW!r*MCkzn=$i{5&ynRMe8QW+EG#xv7b1nN8UENbVMb)1IG? zPwhN@iS#<_RC9MOHw*th;o;@A&Gfh^@j!lgsT8P(d2*V{r(!2zPZ~cw4DcM%kcO#m z`^d?ceEQ39Yv+9bbuQfKjxTBZU3Knpax~U%Zf=7)JZQ<2?h+kc-Mn&5&*!h}4nbyQ za6i!pM7&wNl9Gn2HH0VhSO-VPKfoj?9cIpDe8@uR>FBVVH)CkoxK&v{D{I8scJo(% z>tB%~TaX4yVtIf!A%1?AvBE8T4zwvI!0@7ioAhYpI^&%J!_J2l*~FgazhFYwv`?~Q zB}`KI>1Dp9Km~fZI$6f!k8YL6NQ6Zimz{_=*JS*uM4!1NyG%#m3NL9B)40_w23Q-Y z5){61jcckN_fwTklW=`IYaB1{e=G5QGWs5tw(rkx?w@2p%o3lFxUT>V<&kI2tpB!0{yP0l1;HyI5MKW1^lh_$LA0 z3_!t@sH?ksunzQ4c~wryFz93Wj5TzOc4lHU^Xa|5dyb%}tLu%LFJ7$ zCKqKFE2}9elldncTzb6)Dygyg1qC1pWHZH^iUoj6jLzQf2O`$Nr=HPOM5X!p`Jg{A zBdhub!+ivBf3W!CzT9&`#w8@oJGD0{^7GI$GCEXWW5Tj48)E{`B+Ez>lC$tN0*C4I z4JmVb`#dAPnfL810|%2$|8YPsLPJZBG3VcC`}+FI%cC>7D{C4Kmsjp{ql*mQH?KGL zDDgX#Y;z9Be{v3cQVk3dAS^Fkyg$5+;IxUTW9nJDhZqoxh%XVC-;AGbbSJ|UTy-Fs z1ScZNw&q-5>x28?XgW+uR1UfV2xX;lGR#SPva>X#YFs)V9~Z}goH_H>^p>pqT!j1M zdcujuJjjN~L2^^+5Le!4jko3UJmWHneQ5IMPp$tra6GS!Z-RCAr%YaI7isA3u1{CB z^OHkkBcr7it(+Wydn7#;UVX0tX$7bUR6ut(p!@uG?C7Y0Jp)jgffB!h7Z=6Hx`DEw zJ_~~}i;T6qw-~#_TFHyPe(hJMttV)ffF=(NS6f?Kcn(v;!orr?XHBTtu3sNoFKXgq zkpEeIcY5VZBIq7Kh&rF1AG^A=ksy<~OwjSg)&<9Bm+t+|^_lcH%sUs52~$2< zi2@g1Z5J*oHx|AtLjv9LF68(8@rKhn?}mI1tcWVa(WptCdJXSO=)9VHdjhv_@9TN! z%za9CdHDq*==X7*0S?wahM8P?kmhc;Xeul3LVopA*a*QS<5wXzzF0Y8rriSi4R!`u zA)&r6U*Ty0WrF1D2tN;xmTAwg_C7DSn*0NnS&q4HM+~Mg-e*8uEC@MuT;3SZm8k4>Gzh;IyX~AG7;Ix?3r{%-@_4 z_@SZFeYM-~23(qs;vuV}rOn3VK}%lJ@dRYAsn(ooiG4NStVL73dES=)gq;xB?@$SJ z_x6sL+r1n%*3;Vts^HhW?1~B<^MSsbUzdJ@SO%2sF?PKj9ea@QrI;R;*3|ewh2-G9 z?)X0V3C!y}P~RzXTG_HdixQo%!t~RrcL&{poc#gm z#u$C%6Xvau3Z{#%XHebgCJx35mCujVA+cHrL`M8@)~WWl(?jo-`2;p@CCs#kuP61!N^nalv-L^+H^)> zgChnV@KPj<&Hc+<^lP6t=*?;{`iZ`BuT9^&*6#;BW5Le-UKD~g&|G-`VZw7eLD*%ZT kUbp`LhyS0tU>{EqC+tIm4Fbo%0oYy*6{nq*|!!#Z#(NeQeLm&`ZQ6KhlS51pR}%y`9LVRa~2xG(Qzn^?M(?D!39 zAACR3t@ev>mJpK@ijR-slEjej{8hH%woAft##Pq_Cmrt3cRGF^i43;77IzBz^`mLL zB2@aOo75M}rnc1%%LXeGvHI0A;$@+F&(?a)D_4PMh z)7*M@q!uMwW)|Pi9HfRQffYiKg|;6~|M>zrb@GE;=;RYZLUZzY+Wh46E`;pl$Jy4C zPvqazt{_QHe*5pIK}av29Qu3OKlk~M)Bd^7-_!oN&)>)WbK3v@asR`r{^N1~)2jaC zasSh*{^N1~!>az{?f!oufW7@c3qixw#>U3M!NJnfvetRZo_=Y4o$b;kLj!}cnVG)z zl)j!GyXwZq#=N||U8pg`pPT&}-QIp?Y-}ul%irH0{CsYF@Z-miSrs}OnzDN&CrWge zJvA+jj+Rz5{#sX#P>-jJ3yOLb&V3=%p{~CETO!3DeLHyO>FJ5bt;Y`ND=8@*O4rub zcC@z}85#ZlUauV-Q&v@V!BiRjwfg=J)>G%^bc3dRiCI5-@fQ&Ur6Rvx5dL6r2F z(E8@)-hWP%c;@atG&FQUO;BE5zN@Q?T8x#Ib!TU%QEVHJz7LUi*-=%~A zi)r;3nw}0gz)be__OhoL8yZseIJvrRKK{J5^>l4=>c@`^qKB84S3p3(vu9d{zkg3P z9vWV1KX&)=ITSKmACgVW!=UYTie(Ww85_OTnz1`{j;4z$;P$L!Pnwg z2syn9k8`u{T}a>_n3}3tOs_2?az+p-vV1!o`g(E zNda$t8>UjUM#7hBdd#Z|fhs$~KSxDHo#`$tDuRm8g=$%~?}3e~oFP5gK8>OaRk0I? zQ&Urh0yW&K-z7$8W;oDB9$XeqPW+Y-ZMw_AL?p#UMTf`7my=quHMp?>v#}%()7`DD z+nGde-8#qiPQiJqfidOB^0FN;5%Ceb3#3eEVZEbDt^$P_>0vB2(B_(4+g7lBWJkQYN-n_)ZLainwCdMRHyS25| zA5S0!(77R=K?}U2bF!Y8s1mw>^CG=FKcQ zro(`X)=Udn{pj8is;H<4(=w6u=})k_J0%H&0sU3?p-Yf8maub;7Cu#`#Sv%{24PB_ zX4hEAtPTGHgB)TGMBC#U8;^6{0a|J9&A91drZ9>M7IepHmk9C%R7d~{4q=U}#AIbTOn-j+7B-zWa4;ClI_LJsZD{mS2g`{tMn*@Q?Lctc&NqBmWi5StjOrbL zWgNP$ECnUY1eA(ak#`Ssl`CUdM9s+&+0;BrlzNdI4qv2~J zK|w(t9IcL>zzJqziptBM0xv`w%)z!UU&lc6M>r|**!|fL^Yil>kfqgCXo_n?W8*3e z>Ez;4TV0)_gj!gz=6CS&+OuGNUm&feCR+tV3@#qdXdI}1HwVPTD4 z8(;hkQp&UD&)mH|Smc~9^+-n(iLeAM@FEwzSO3aHqCb9(7w-T&(QQa3(bnVX?~SZ>+3jc(1-D#L6== zIy&x4a*`zF-vVKV`80j`)UWRG@$sH_;wW`U1dN?Oi&fltU(EyrXHJUxH*d~)n*O=F zeP3s%O5)aFkPLmqbXT^wp1V$K1C{wD$`3I)Sclu=Gcy}2E1lbk?d|Oa1w1(8zZVTJ zFBkmOP*+zUST_?3Kiu4Ob^R6TPHhK*UVyf?h+=U`$*>Znl3oh9w}ymAoSr}=&CM`= zImOYbDK>sH^!dvVEdBk_EiJNk(D(+bzm{GKf;E(^ySJD35w}=mWaM|Wuh-+pkAcNX z+A1)qF#iqCUVZ!m0;R>pYinyJj?SXztejdujcrlw?d?rYPRf_7Sh4bpP$(+?eEaqi zV~SYE_I3p|f7e|c$k{>qx+`cYsg+AjAw;30LjNbEeIN(vjMwk)Jcs6`VAnJtzMHco zIe!8$ zOi5rta2YC1VPmzr?|o%-{14H-&GO5jt$+JV0X!3P8g zkmaD!=ue+Mfy;vBUR)#nqoV(n(f@xh#U2aw$4}0{PJjwh(!+=X2K2MiTo*k~ahOKmiL3OKpSM81<0q@(0z z_WLXPEs}TcjDdgs{hLuPe%Gx!QU2==C^T*YxUWuVdGmGx>S)~!3WZ|e&?k={|A>Ge zSsX%>^3LxMNHAJD`FEH-pJ$w;E^k=~kETF1AFhnH9Ph^KZ_h=EPnX434WA*Ti0}QL zI+OZ5tXV5#8}szaA#`55uy)#KRrTr0?n@)5by*{H?M#3aA%R-cDS&uPSWHJ8&v6j< zJ7Bn51Y(fvevn|x(RSlTGpiKgolEnj;w1T&g`Sm_6%d~fq4`vWmz>lGGei%eg%8yX ztzP8j=Gu!HmD)kHb9RTc)eWE0)6isRpB8EGwb< zeAVfQc(Y+eIFEiebk0Wez(Hov(zQMxRwEii5Y?{=vink1nbV{t!Ugr!=en_4X2oe2 z0p1ImPCQy%Z`|k@UxFq;Io4M&aJMhbQa*~M0!NTzd@i{YzhTm4+>5yXH2=yG&)bTNM>O9Qo1oDr2dZ#Y+)w;WVVL?j;D z>EV302xFO(F~Pme2y}_him3U8Wdb3G(kWGRkSbgXZEL%ytA4V{t*yxDm&zXgv=l38PoWC*+h4r%lTvLNj2$$2v^E>K*s zpx@Qy1nP1+(i-|FC>0%vQf*Ll{8{+W7r-ayM)`d+Ooj+^2mk$U02xiV~SqRnZE~P$v%73VWyRf%b*{Ap8 zXbJ4EveedPyhFg)@J5W^uj@Q0lf~`lzv~zI7V$Er82xAYzojbag6!{#?=2~M zsFF%icDTUo=yY-;V@L!#Do21UIl5FVUEO~L;8K-pAuN%i#un#v)GM_?PH&t6hhFuO z(pDC~V;TyvPS;Fu5IT=X9sFQ6Lcd`5=hR=TDrX5E$q>Lglqx_Yc}2dj*JY5F(^+P~ z-3(xD3(v5Oqy`BGbe7_uUIthZso1ZoHVk&p?AG`B3miJoK~d&pGKbV8#d`Zu)t+>m zSO+>XgEhsn1{(9>;_XsPNS9e%5|z2w{Ovwz%P4$5>iKE&j*bpEZCUjFimEE?Fnt6a zEwc=F%igqaL`1~l?(Qg@_>sB!^}wwK2RMF0e=gp=^0jqEQ8Veo8#X|y0Dj~TK z?C|wVp^;Rb)Sq33PZNyPX>Hf}59Ov>5Hq;+HA3B5RL$HntEs(6&&AfG4@vG-7<&bJ zpX1D_fC9nbXz9bq=GenHSn-I-OzP6!muT-c&O(FYSHb~Wt7V7VU4k9QM(X&O@G1JW z!+@OhHqAGOS9@JZs~$(m$I@8{qOq~5D>c`jERFh5n;$AntX8!YnCmw2{)#^b5AvvL zj;*<{?t%$a8;{N??5HW`r6_4-b!j>$Bb+@1U-rWpZN0FoV@&33i2GQL9F0Mj_=~~(gP&1=#$9FYfv{qR-7}>#gc`} z)*zf)YrK3|y;gjmJ}o!A`8XK3VJ1 z(TrYVR#$4D6#+g$J{=?WKHV+6IrbSOyi-fQ=vbT4VKg)-&HE=+j|BSEfN#*$9k7US zMoZU5C3sj4ll)p~k%+Fyr&Xy;!K_iUzsiL_n~Azny*YHe`FQDl;W~El!g5p!0vi>6 zX?%4ylBAnURdV8U+VW|$i&VeKB$o<;^JKX8OP(*hwUO(1?i<0sb>f=#FR;|#BCz_f z;W*etbYc`@Wcy@^ep;-Ob~n27<8ed%BhYmQa$wR3k5SVt@O%_f@8HgdPsTs*rroAnju$JbJ8;{>AwPY2Vs2)OMdS5q_ zhTCIMl~((ii)lIzph+2b^HFgDoP@p=mAOZs5?iH;VKCG8sC1u|0^2)0pxBYX#;Te5$qz1!4Ew2p zWnJO8jSGh_?K9Z7dM8Ghl5m`BwT-F1T0vN^K6Vu&^4YT+1kjn1UkAFRl7$Y1Bp&;7 zFS85z^Lu>ITO>rpif7UEIEdS`0z1gN;|5C2b#Iup z5zNKAh=s#d7yeu{$d9NguzOR}5xqo4MLv_Sr@6_Rn!~1@3d!9FOwAebJ2J?2dF$k( zW{p?2RZx?yTJfkJnD(Bd?J_z{Zrd2GMt9uDhs7}GRQTZ6Yu7#4et5j!3%`jTRqox0 zMe~}Wd0zl{;JqqgpjNc0U7BmQ#x6u}ZdB?7o4?D+)T2gRExROB9;=10qA@oLZ&qvD zNTOq_Ayr;P`YoOD7xyTu5a)GSy2H`^48yp&m}D=kw$>M-DgTu|WcWF$4I^oCfZnpA zOHA=7#QL2X{ya%>x1Wea^&JVxBu=Da-e}v`7$3vZLK_jn36qg2q~so zyZ2@}Z|Agm`2gk5fzeL7H6G@-f|pta25W1EAcqeJ_AcB~t>GFvv<@_{?WQ8H7b zvt++v8&FvkjMQNB)nZgb7r78oYf%V!Fn**WtF+iaUbZ)bfWI4Td1P|Ts}E6?b=-Hq_WevLkO1@;AqvkH&M_GGRQn#b3ZXz{ods zMoJS4g}Xa$rJOcbhL^5Ha;2%8)nMKQzNi13^z6-Buorz#P3m-gzCk?SVv<#8&6VRg z=eY&n_lIRH<-O`1Z&duXX_&-FZ4T2rj#G!(WgH5V@GTGBWhYHe?WwJ&rKLshjuBARI|5M4Ig=$iB5oR7iAM4k58gE=Xw#~F8OigOr}5b6|3qw zi>FU9r?kbUndf~@2{C^C!fw25x}hat=)lFtGH}{lv`0z!!E*6D^IJ=)bcR+fZ$Y?J zM0r(}j=f-r>Z!6W8iZCUB(X|3U|F<)@y#~R(Je90CGFng7GGqRe2F*0eDczYC^8jP zYz@O+_?4PMd=HoN-1fh7ks7ocU(tE~vYpS~-hLouwazs)^lqIWM4R?k?RCly62$JX zepc!nic7b=?ZvgMQX9xl$VPvE{};Nh<-Oj6sciDHhWMQEU&d-mH%P}Q=u9%oNvd~> z;^TWNXdI0@NQ!MgT_Wk@A>}o^L~@fK$t|c6Yxq6Pxx` z2NC}W%@C0gqM}^>0&1RQw{I=2kUjHR4$`Pc!j}_$1(_rM;v^|v$|tD94xV6Y&H~`k zOMQd4b?FXXO-=12SuNSS7Un;OgjbBP>voWIHzvaX%mJtp53(bTu#T4|sC##?{$;4} ziQk54*$*LIKnNuTF2M^NKGA25aj~>g^HC>=6C_j{8ykChHY-qo$h>7DFdQ8R-`)BY zmBt$u_K@gHm!HdPTBS*3K#I3KKEPUV(#rp?GU z0EyTsK6TA&$b{C7l@YI)EY~$y(j>%Mrr9F>i~ISdR6iLdx8{eR9ei7mf7vq@$?1$-oH*<*GtCy8^&pjo^~R80V; z=m~k*m~c6n?*KQ#uo-TakQC2pe+|jgmnSQGeK~5vuEabput;`1XX$O05si5pm;7gT z4+4+EM4QFp8sTPU=&Cfi5D0a)wP#4jzU&n`;D@?yz)s00rdjKsZ#@e6n(a9agzCUx z($}GDTK9NyTxwZ@*G#jdwA5@8f&*&)nJ#g@gW$m0+SH(CpLsyX)Sx)nF&)W9vaz(p z-9`2RIG2DCF1lBVLA&6bm)y%9M}mc=_~ zTMHA*M9S&Ri?V<@bwE*FVw{n+eAAr~Js|o^@5ZDBB?`jye>EzInQ$+Smg;oSf z_eMMB=Bq~qo$aBkC7*K?*MCSL0&6;u{k_GgFxk$^XC&QST(u+{05hbBzNrpXnCw43 zo40i6U{m>yCd-9}&M!7+K1l|7a5JJt#b0-o2WhwZ%a6E4d4A~*L*d5I>s@2NK~$@W zGNdR#)bu=CX=Ob3`D)w)3xKR(w#G7%lUw7nB}Vi)X5-fx5!l$^l!rE~|fUdWB7%Q>b!T4kUGk)7 zbyzYai#N48f&oE5v9x9shl?X}4I=agC8nZP`Ufx5V_(j4j1g z*}Y=%cC&t54Qi{1dty<3CQxI}z-`CW2!Tcro0$rAtkckUUK-;0ZV(H-XI@Z;nE#+9UUXmSj1x9r1K&^o zwNPqc#y}(kU?KL_!v2=Zh3GYVMa2$J@eFM^fT+h5P~JlsOzniRk>;Ta*RDh}9q?dx z6%{ABK_#(y?{;L4o@Q{`b5P}BlFqus)*)d)uWRu-76fjt#Zk>HdZA4wv(Bk8%;E(K zr6YBpG6naC>o+j+wgWGIkK3g0_TC0?U;EUfLVt@!>9egBHbNZTVk*0K7=@MS>QilR*8FiBd%5fiq2O6ef44kDipqpM!s)_t^2TCi z1#@rkWa#)_dS>u_NYNG9TFs18R%#sMGd`;&pw;3+U*LaGpzhBrDIK)ep(>g5TKFE8 z+MxaRh;9(MrYo{~D-X&r<xCQ@50{)X6Rcb;RLdTO1;N|1%d> zMNw*ONC8s0+vJb;+~h51;lQCa&Kb7pWw!5bV=~pZ)%Ybiby4o?DK(y5nMy|fZoBtq zwy^ivJcoo-o?9+a8-$Oc^#xZhp8uY*w=p9NV*KH|vl);bG{i?*a5N6g8| zi7femLJ(F@K(-6Zx@yW~gkW>>5D%v1^!RG@+OGZ*buz*~Hc{8z3d<7`5+c`u1Kkh4 zJ`(LucinI|<4j+SMi{a`^FM&-VU#AA)*Ff~lv@PleUo;BHnkxWPEFGfWiKX9z`C1> z*>IAbr~XAkX*drW^XY1F-_V7X9m}@eCVr`OF561!n$tPktZkMXFGFU&UtFi6xiIf# zFpNbyO8ok?lO1UpowA_cy6qX0zDL#&GIw4-C4H;7pWr(R9MfwuW&J+XdoPy9#Yab* z0h`L`>8H0qyze^LyTvuX^R~|~WZ-TADAmC!V{UX_MZ9LSiMBV=@`g!DZZAI3!9E+I zh;2cQ?fMDZC?3cQ7M^uRSweO)TmU8y%O>eTCGL3^E$v#xyGPc@n+2aTs_R{QVm4qI zeeYma*fF~iG>x(wmtAj7b6x8SGJdTN>VxVsyE=|G+=y$6(;`pKX5OmaH;3BhX%~# z_#?$W*<+~4%jN1_PA95K<19Y=9tlqU z`zJ=7^4xPHM$D%(Uz5CS`-u?8a@~1#@rJUwxjAuw?Bddoo_OFOPoxF6K}vS^eCH$G zuVDZ4Cj|Pg|1@zrXu=Mt#?uSj*L^oN)O)Orvp+m0;QOR^9>SzBSF(nObz5d&o+k6u zGQ$hPp!|9Q_&aIKy`3h#VYvIlA)$56B;9_?s_??^t@EIh@Rdt(X?9Lv>WpC1>&2%N z8fIeVmI;>Y9`xqmk9EE48=$W5`}FBk&<*qO5Y{|=I`ymH zzhAyZ13H04Cml7=2efl=&^CSan-rO%!F}P>sZ)ULz%K25*Y1QYCu(VB1w6w%sDWdJ z$dM_P&CS!()4*6dmw`Y@icfKS6)Xr`pmtP*`=+Pa6-mH4=X*i77SzPR$;%a_5D!Z`yMmIED4LUY-+V9N5V*Jq zE}wy{2nJQx)(%#afE9PW0nI_sg~-afUUL~Tl>YYZ+uy&PAC;dl80yPfT4n|YDjFN# z5p@Z3E*r#N@gzCW6G~4l`$TfLp{=bAgVF3fxlN-F5IhjAXnA3{ar(h-NPi1)ZwB@x zlnm((-T~~5G{=d68ZAJ7wJ;VmeH|>ES`JD+y7;Y^67YE&Q4Fn+D~-FmyI?Iw60>9w zjdYUY+FCF0222qK^$|e5I4@ninEdWt)@9J>Ei2lZZ3`+bEoDLTzXN~E;k`gzQc_|< zenLGcZ3ithAeo$`?T(pgA~SKJ?bBQd!q@XXY(D)s^txIBV&>56OQ@=VFQhVuBhAl7 zC{|WNrXRI@o0ug#H7G(!oas#`QdmZi&hG3`XW&Q{`enjHi)cm$5l1r zxK+HikuiQ(yxDVRZi79SmgB z*#m#^S-4x!uU_f-LVB}!X}pCYXMEd*sx|e!TFDWjxg0Uzx46{q>X3Eivqv7J$$GQ; zUK`xI^`$+9v&UeL=0Q@Vg+`ns|$@`6%UUH_*@T>FMF(l5QnC zQQ}g-I(YQ(p^oiBxB+oawlTwCdlnv08!^{HtYiqjg|0h9M3ZwJ!)qFMf6DUGcq_pO zo6rq9!yBqAQ~J}8nmwh}8Wa3(Ej`m3%Jc2&9-V$5Nw+{Cg=~^PdGIYyf5-u#H?`EX zEBc=3djp7jyk4xx?J&|X3+1INRbO0gMMSy~AOpx#hZd-#xR;1&59-Z7>Tt-GlLPA;M#M*R+Cnt^Txt=Sonyt=YSRW zck>TFxs>#iBOph1NE<`IwFMq$D=0w7kirv_ldEgL!exMN=$Qg^oZx%mA@^{0S?f+; zOZi1Y-0mvyp?9Cl`0baWQ|A~@PMggN@A99a&2?a~((*bu_ zeLpdRuCc(}{{DV@6zkuG$dYuo3z-muk~Nv+_mDFCNw<}GRyi0?Ve%EwplL{79xhUM>$s-N6c-1K%K^!#ZKeUmBO7VzN_=gE`1*wTAh2;N{@gJ5KIGXQI0?`2kfa>F@EnNu$r58%Lj)VeS;jp)|x>`5FcL)%o1|$e~A#)smUfCeQ9zeMbdma#J z!Bv1RyBbtPVDr@Qg)VXRN=$YzRGU3zza22NO| zmBm_oDuVNECvH`Ta2z+%%5K#^+~GK^4Q7-X^sdXMC)sV zj))azR%e2DE)Nu2f0BxvZMDCGbaHYkDK0+S8Ym_%j_R9NAT{tf_8{h$*nLtu?wo^I z8%?kVEa)?6cl2;L+a9HL#N{kEH-VJgW}=J8QBKQ|w*~(-88t$y`KLXZ zEpL?)WsFG91H16tf(nGdUrD7P>pT_Wb&^>{UwoDQ~d1WTqq$TOI zOgO^=wq%KI^lAryp&{;3m;GTo%d6#G?n5mN&EzeS`B+i|A+@*J5L}!raY|&~B9Cv` zjZiXL8Su|TSk9Q+djb;U+B&%qslg?pKNin{`W54clNUVhMv3|L?a2fXHjcJK1kOS&;7iU0IMb-jJG*JmQr7qe@?f}rk%$kmLEPX6P96j5 z3oaaphIk0mk%`e;Y+sJun%mUj*0uDLI0}5OO7#XHU(t$|S5!Q6bv-+2G9m&g%tK(kbcL$Do-}4wgB#F;S(F zOO<0xn7jwP57jJhW8#m;<4BX8l&F5|8`UoL^9@s(%Mze7RI(VU<_n>}T%rT^5~Nx- z@2H`%Edku&Hz*P~F8cYE+x}FYi84^WH{W5yF8HNUDwk3yry%|4VB^T~fQtvYi$1!; zIk7k(nMYn03mc_JC-1ubwLsl0oGz2Slhbam1fcSQl0*Dj7`Ea8yltS(tuiKd{YQ!^7q~RD<;N3 z&x`9bY0)WA^Z&y1H-W9nj(Xt|eu#z6Pj(oR^Hn(La0UJ1K;gmzOm&?CQ;&Pk8de z<*r4;5s6_v3NAyTx`~5+RmJ`DjVo)KOhQ$K6Az7O{keT_?H3eXYOOlJw%{bunw*wz zJI$&{_3u={t1Az_+;ZB=b@R#6Gk10E4@`um9um6}$%fIxlmp9JC%#jm~Hl)1qZ? z>_X~K^`)G%OXLvbZhy{~ z@ymc3CUENJ? zks3TAGti2YoG<}2*(#VmkGx7YsFKGq;2m_v$qcy3bZ#RHmN|^{Frb;GBn{tzFinYo zV3|>FmMJcBD%7#wV^weS(s=WS(oz8q-kbPi!yX7uW@Zn>xx=oQ(ET%C{_9Cjlx2YH z&3YRYa^y)q9XcPFpSlDZl|2qlPNpU%RZnk%=?M_o%K*WsB#3tO)}1?bm6eu_r81YL zt8CX_7;(lt1_(BTV*Ft-J~O5;HV$H~{+LeYm4s!})jv$bD@ap+2yoY`^yVWakH&et zD-*?aE6IiUKMLG@d5Rp>0s(AqxIHrnlJ%s?yALk2v-j57ei2f0beo=-pcZ(5Sp~G$ z+1XY(BUgpjP<0|4jhG@)gm)GJ&6DMfRnH1%on(}apArEBqhiS~k zdH~TCr|3W^>B{8%wG_ywSzA^0159t2uEv?oa>hlW< zzC9vJv9f=fEG{p%2h{5nr8dm3;zoTiM&y%ud)l~EEhav8g*vb~rkEdu;yhU$uk#>zGR7IEjzQOF9p5CQK%oS#cL zdeOF-)ygh=cf*$jS@dYt<&=me(B-3^1)-^y#4&Xv#0)Y(qnyNQZfl< ze?YyV8m@Hz{*m5W^G{P8J=q}z0cM7V~4MHXj<(HVP|K@G4o3mV5>f+WQ0FLRI6X;G&;_A z=rI{LpEuz@B{U;)c%|kkC~r9FQ#DW3dZ??%zy)@c=(w@Dh-uJ9Zam7rn=?EZKfw)4 z46(LVR>r59W)68`p5ShT*fM{f4XnaAbsgGB1EM)WcK=mbAw^mG;g^nkT|TV5Ug)~9 ze&Qbc%=w!jwWkiG{!5W1;e%2X0`xlT{nXxJLiC0C>x(CY9$5w!qn=pYERDankBjT% zYCyr~f^~&K6; zO5V3{KO5a2=r|7=4_!Wh?bW;-H0n~s)Qa8c2*7PQnO~if^%1=n{Esu$Bdz9PRJ`#1 z{{8!4o(FKl&jbguRJZ-8=SZEoB`PLnJ!r6_<6>fB5;-lGdlO|=#pO?dqIYz18XtTM z`1i%*g<@!?lJQgNO~p4W%rN#BOm{%tD#tBj)QKQ3(=%k7mYzO(=&(?uhrP zK5XRY#jW#F`w#SAkE}MuhKJv>veIF}L}fm7P^AhsUeC-RttxTsE6`4^ZOW#KZ>}o$ zI|n9qY9##hAHrWRSfYG|v}bj$zWn{F2sq()ZHL6^xd`*l+p^f7b6pmSGuyRhi1&Ao zdj*-(wFYfFU$mrqJn!%>2 zw~$dWF~$M_Z#@Q16NeM~i%TIU9gv0kB`mfo^9-0}EPOPO1y=Kixc)jT>wAfDEr_#x zrws+{?d&jpe`hZNt^pNMD?<$gg82FK=ik3e1I*y?T8K4bVr=Xpa3mZYT*#C6AVW6L znuAaS`-lhWO2TKy{5?=%tf>+6B`oy3OM>?wA2$o;qfqKQqx7Ks2DA!f#kdBFe^BQt z7;swqQ4hiy3*j{IhPZQMDOW)zaKHn~jBlm9esiCX_o@NxN-vnBg+*qz1Q=mhHZ?*6 zt}a-gA_sbSY|Pu$)%6>N9rTHz2BuB+FCbhQ2Xl)Psi5N0miM9XYL0Bdy^%D)K`+eJ zkJFbG7Z=ADgONp0iXE<^<#+e>ot(btR}P{>ffay{d10X;Az%OpOxUrq4#NF2j7a#0 zB=6~t1CG7Eev;mOhlJ3mqW+xB&8VW-OB@_66OmJ3Hc=D|81n8$O&l5#L6vf>Q>XSv7l%C|Qc_ZrV`Fu9CvWvFcj2#8r&gweuuyz3{W>Z^x$4~7kdNN- z!7~nN@3T!3pT!d^;EwV_1y7sG%Faec-}6BqDu0qq(p&2kFEwhQZcV#CG=Xb=Tz#?C zQ6B2paqQvZve8C)lZ%VWTzO$(A#hPi$C=ScW7FP1kr@=RfTpZSAi?yBeMtBvk4h@W zsx^hW7zTe=S0O1WhZZn40^2VGNsrnLke83?y8)ZmMb&_tD z$A+iwo~cQTQ)%tw8hWXFtpdLhgvuiqGBh;gnYg2uU&dqxip*G*q`=qn<*VIdBVfE} zQ?c=8qJP997t%@z5&?#8tl*c{$-n{vf(rq{nyda9d1k}f&g^xN2&LH@n>?5#u2XQD2ibGi&hA;#jz`(~0NYJ#1bK?Il` z0GYCXM;RC2GKxM{G5t{y^CB2Bv$hsl9Amrg?O(w?X{xIM^5?fYnNOs!2x-QD$Q>EScDli@$OJZeqzfS?DLElao;Nj%)ItXkiN@+HP@)Rer7T5q;NLr_Kw%~*;rj2+=F`a{s47Z zm6l6E@OFyj)q>BAN7N1&-pBsNU5!^jWB~QeMUsd7vW;Xa#2EsTU1 zU%j}aMVib~T@!C!2Zqk7O`Lx6Nn}SJmEQJe%As&FtcLrG_gh!Uw2_Uqx8HYp89bev zd$Z+k6&meC`*fYYbHFJ4XDw&L<%l*;cP>$yLc-uvx`z_{dHn9HnoD$z|e3j zP=(su=&?UX^)S|SFhuVXZyAO9D^QE~A9_~pQDrlMeuaC*-*}DXeMVww{fU#|(hLJ$ z+mK`>w3BT<)6`9MrNDULN0t4VYe0`qn|rXfvUARm<=%|<2t6ZK8J|_G>*OCLjWmSc zw4qmI+%UG{!VJ5g5i=axuQ`=(G;sWLVj^Y2)yTOWW>+3{L$&}lQlE0%7UNnD$sH+4 z-Il#`r$4k>uOR%|*j0W`+6pWjME1wPfMxab&+*L&o% zlsI!PFB6x1VX7jgx8r>o)ALQ?h?HZD|8;--#7#F4I1?^j9%!fyW=H3kHf)(}l4#e! z!c*E}KCIh*iH$|AMFFm2xN!#L@q1{ZeX3X!v_@cw}hzIMl z=BNifvh#^$03WL?L$TatfPGu?q~#|wwK>iK6nNd))+Aa=o2;xpnHMUjUN{OrC8P?# zGT}HpD)@u{70JGAXwf+6^^7N0M`+e8Rm_Njxmhb?36?Yb0hBGw%0aGU$x-DoK2R4( zrB>8*WV}0la*x`VWZ0|p)URcfONuQuZQ~O(343zd_Qy3*Dr#TLWEU35jx{xxK3Fs?y?l8-8hZLmpmz6C-hs_g6gY>SKQc zZbhkOC?3kvsuMDz2l94$(P)3~u%&8JU#*&$U=7?gtykMc%9x9Z6$vG9E7u1!3@o(Q zc#~HurDSBPX)doZ<)blXmoH!DvwS-?^35B))k$VxD-KnKR5~CpwbcB(9%1q_C635L zC$jSsUqZpV@|6^U$$O{-er$doj7>XAT*X~8CgJ|K@Z7ELj){Qy3CvKqz{Ho{-{-V2oK9P7F(_IJP2;%!F9MCa z$~i{uQf;Trn`LK|@BbN51kT(IQb}j+IhYic!+u#$D#ydBvRdGkSZ^B3p1y&tXWYZJ z4n9%j!Cb67**u_w$B+ialP~)D_}G)Zf64w#Hx;s=JZzBx``|K|T_Sz^wqb0Ubhe?~-_8!wzfy@j_WuDkp zoK?^fHdfLE3M)J!IJ+sa%q-k{>=u`*l{kaS(u-bLAmWk>+2r(ejoRfXj!x?dXds&? zN*Xz?=7lNW7i4Jt-29~tL-#M1PqI?)Sy@_oz(U@4v+N>M!=1U3qW-vOz6a##$S1Tt zOAn}0D@4?ST${v$agJ% z@6;NrVZGz6H$Aod>(`BBPArDc@Ibph)5*3aj$fqHZ!^dsoRy7@c3mIS!3oWo%*qe< zyrPpao)R{x^YM{y5TCmr&tBgUWL}y3RRW!_9UnJDn{KQxFNZ}- zE?wtbLWqhx-E9JCg6JiAdb?f9TX64(6phk3x?0lx71(+{&HP&|tipgB7MRidZi8INlC-uGO3yWA!ieKQ&Se)OGap`@6_xeZ0wb>U|9A!7W zPOZK6T64`g#+YL-2xT1l;wnj0@k+F@ zu448b-Xat~xD_r&WZHg_YfxTaZ`(NE0UjUbkH_yGGUf(;+nrO)?sY3&;xV%x7{yJB z7x(&HyUA*D#)KHPD*cseu0uR7MkMYr6EuZYhV9Eg|3<^tIbpvad&|FU+l0kJkAN-m z_iv+#`zdY(swxHk)YM<<{LS~qzLq{tIcgfiNx{b21YPm}uz2Z>YhDkNwU76C{#V(; zzdvQ=mc?S$w48^}d345F5f6PsU^<$o$r%Cc3;=$g) zTl=c+flQ4Wa<0DW&G+z4*2ZhQ;Cuq$!u6|9Tr0F3d58q{MLw~ky?y`WWG6M+6{)Ub zWc1s_&m03@`TW2fT3l07-V0s3I>565-`uK16Gb@qKAAHNexFl`I0S{t=RY@bhW=XJl^|Jbg`37iVRLK}AgT2NQPy>SDu?xHN{3kTf{C{M8N z03^TjdSH0?3e}A#f+C}Q*@1e=6 zmwv+!*g6$m3^6uV9S&)O-1b1#U2S!Becmg8Alm|rdUA3CWv+kQqN1eq(S^^iZeAGx z~}OY@BMrD~J#!VldAuC|I8z?(d(E$2o9Fl5btcf@0wz z503@i$nbD>@h|XA0Ph-SD2kCASebP|P#RbwGbw-$N~i|?6d_MpS(*JsXCoL~%YHA}OHBSqf0uTUJg|(Ox0obs(o*o;^+Ln6*Rv{p&-0j}m*(t28^)NB< zrNV@j)e#6d2@Wpq_eF$u5_}YRGN4mIo8o)n=UZEnjg`l7X{x*{3~lLSl5w z{HIo8gv!=pUs_hS=nq1f&mM3|8ENTJ+K*jMp7KMdr&W^QzJLGDGIFY11IzD|`;{Zb zVjlo2oM1ly3_nu21zP%7KDL3Y4Wys&Wh0g9P?)HY!;eP^l7ko2!QmnJ(Zr?jUtOA# z#r|ZaLk5V&C`ysaz$BaQd~759z$YNEzP6^=rk!qf3_uf>vxP-KlH}D=s)>tiX%LVBOPn+Um3vCL+YiZ3lqaC8EC;nrDiO|7QXRXyoOD-yCPP;@07P?f zytesydEfbv+MOWH8ypP(%w%xH4G@$93mL#HnZ1OKMI_*~a^)QaU?R|>cPUM-u5&0F z*f!W_uq#haPWX_E|rs!fp!J}=NW_h9^VoTeft*aX1EJrnw<0lfkkyHA3$%u zNn4`7&{S9_!os+U7&PrV0E~gU57bI9qc9*`@Y%D3wxY)sAbc}7Ujc<6Sa7@{fp-HC zXCfyJ#c7!!yP%+S4B{US89o3KJh0S(iH4jWiWAt+gtA0dwzdN?1Xu6ywj^v|aRDo* zpiiH23ktY0pAU6*bpdoOD=Vu=cohW@gK@A*L65ky4zn-FaUz*Hy4gJegOLP3Klp|> z`tZ9DRhe~+^RxlA`wn#1KP@6CQxK)u@(j{4GGLp5Y8hMK-d_HwSZNO&&B#bbUdkmS z?mxuTBBEanMhIhrWltz|gNm9Owz7p(S7)a@`M)tNz_$AXvcr`oxUaAS6QZ>VczAdS z8^G#j^)|qnNE}gFh*6zAJx2!zS##4vLt<|SOF*wOJ*{i46oB?`q?CY50c;M|7tC5~qqywtcm>3NSQ3dpDySq!b{cjSr+ktN1uNnr)6?pnIH>09z z-~}kl9zYWP%FgZ&z!Ffip1wt^O!#l~-@GdW?s);rL4;#WT3TA3bA2^UZjoMhio$?RDGgDAKafV(qd=39-g& ztE;M@Mk{bYDDkPc^udid)PX)6+vkNh#0CU}0v~@}wHAhv-U`8NJ=u zuK9i78QXwPIU+syWnvWQQLYu4#bve{^b7A!>HCwG2NL`6n|y;I)D-v0jN z6CZj&Ex_fk4$ie9Y1lBQ5$BjA{?5k$Y9=KmLDQGb){`tp(P5;ssp(R-s_4*Ly~_C5 z*s6vsjDM4JIAsn-X(5i{rP*KPy+XghufXonuD=NS0bmx7u@&YcqgTrVSPMD{joid+)sFMoG0xVRM2E3+nPCw9#HnA3%?2o|V zJR2V={NTe{I_RKh6Ki6Xgd%opFq#rZ53a<{O=fc~BX?Byjj}Hw{DM%Y{n_3p&*#-d z^3?H1{C&n3#3HjR=uF3ABJn}8CuA=O5cy7sfj_kc1wDAZ*~!W+qXb+kO4(@G)H6og zLh|PZ1DT4--5+%V*3b(`!eo)EH)wCC^!G74Vm+;g6T5%-u@D#z?g-TkHdfI1)MCFG z93R)T>IyY}{rVA|ch)YJ%r*&LZF?a2<&nAhT3c8!ol~aarYR{RlsGKx7zaRN;iuY+ z9v#5^@cUF~hARXl`n-F=1lQoey}r$u^t5%)6Y`@K`V=@&62z1W1FOphYl$EXi)7hzXJIdFI zP-HExuP?AlF{*G$|8Rq5ZLTH1ka6R>o8YOj^LtgGVs$5{riQAqeBbRx==lXqf88KT zGrum_G3I2;+Ug%SBHoLmrmFha)f^A~-AhRkH_9R7=}q~)!Fj8jVq`GmxcaQi%AuOc z)0sVnrNvlX9Z)L_s7hSFGOY11b~b`tmO~=g6m4%*&KWI7T$|T)M$>$Fhr@GOG*j;` z5{LU)-dCue!ANfx=ClV0K1oTh#YwP$)(Nq3qov@?F!aL37Grnbk}O3c;yLCMR28$ahpR+JO^^8MO< z>U#A@@BttT{LXvr%^br=u^jt4?yE$xXFV=C_}xL61*oZXRYnZbR^yFLjZ2znf9dJ6FQCM-R4! zha>UqLI^<*q}s>DazTZ6c(B9&;OvVZI0En_E;6%nFfyFlzt8FcHAxgfPa8O@T_rL* z`8#4XTUpYJL~(fR5rft`+|Nq*aWGZpvh{109I~~I3bM0IH&<5vlgVwDK9I@uPt`jM z*wmtFPwqRzEeX%&w)yfD6bs{W?~Ym^WzG5g;*mJD`q6eO^Br!&2FI^#{L2unKrc{R zE6=dVnx0V&vDaa#F#Ub{MB+!P7zRZPNLBPOJM9yB z^9|6;f!GtkqV{`L_ zgI}t{USI$A;CF-7ul2&5oQ4IjXYfifH;8DOjiC0@{S%i_d*5%UjyMs_Q!~=jl{@^l zz8~6U?q`5Gn}7s3FT#1gG1Lz(b?1|2%@01n5}GEN@yi~uFu%x-J0LzhuF9bAfFQST zk>PwVA^MZxzyph}{cvo?#pIOoU{VEeXedYsYFiC{QK^o=zEOFrW5fYYtrp*&3)etY z;cVtTv8>R~sKW5HoXID0OM|jcaL}*fM8TbYyv05&T6Ho(EbV$pG9OwK(ob+QtxTCb zXJj}IQ1gl9A}s08dIRPKLy1&&LNii7{(j#mKE*f-<7lk(+S<%ec!PL0FA4opT_V(`apJ>KfShQ z0XXm|qUN_f@lrx>Z_J+2KgolPR_H&5H1oz|!U(P%i)lY{t6##YlXZyYh4;S** z=vv8OgLLV*2r?0!G<}j^EawtTgV+Bkd@~E2k`V?IOrhX)AtIvut$TZM#Zp-Mpz+de zV#3X43_CZqTmao>woCX-X-rR%J>(wKcb3tfZz{yb>N^ z$QcW$KEDp$va_*~6`z`Qt;GgM3hchU_X)bNDw5CfhLB~eypw?qPMzU!tGkz9`?OUDDr61T9vF)R{`NUr-s z8&G|CYHCvbX@&2P77QRePnEUb zvLhpjRib|IS|mY@2?vC5#TIa+?Ekq^V@IY2X(nmIodBgURbDJ?L*VZtg4c>>an6-2 z^G5xVu)3nvn8es_U1N*#O}JFgM=UYE7>ZDn5>BtdBMTi&p%pt!o4~pz&Y-%nz^|V7 z-wph49$J;mtB&0AErfWE4<9&v`?;a2ruNjRVqL91V&r;G1oky{H>||f_4T;uX!*>Y zoac2Fvzhqdnq&BJnRkdS+Xi@aZm+Xv!dgwO^0l*37rB8x*%6Re=_Rg+5B$0R`hgq) zvm!VSUuW*skCM%qDxr@pF7RXe)_c7q8QHmQK>6dr-9fnl134vSB3+lb}WN3YaQ#*X;HE!4M0L}y`ygZ+% zO^5$bq?SzoLU3ySBZ=)DF){JGFo0&ni*wYRvvr>y==32XcRz6XC1G5JxFLq=-wzfK z4Tmg*HTM70{fMA{dgYGn96 z$VtmT>QAd`5wl$>vQ@Rd>9sr~?C&tU*LH&stk#6x3Tk_{tLrS!hsU2jz2KGf(SGiq zNU3-SeaWp`G#>Hb7L+gAk#xVF-$=ozSgM%hIag;oH41knrJ@2*`(=HwE`#O+Ha358 zAx3j8&x*l&HO%KgmvxmAn^XVS9tsVANXlG?Xr^PlmzFRm|E*Na6IZ+lIP61<}a-eDXcH ziAjj0Uf?l4PmL^4Dt3?-uD0UInJ*$3>|4A2F%sd8%(?Tm>x;fW23_Ke9D|DnF;ww$ zeYJ}eg{|GZj9)ySiJMza4sZ#8A8{8F}mf< z3-LLG&-V>KKqQztW$-{-gk-)ELlf2CJ37h`IA8Nww^gaqr3v8M*LaH*>>9aHuFYIz z_0JY@PDZWCPd&T6>+|+dkW7rPr1ghYx8$n!-?a|{16Di}0|URy4cDqC32dW(COOAq zZuGCed%i@2=56QfY>HP6HGjrKD3{Sy9YE@H%vH$y@S(b}#M{;1iy|M!7Kz*Uz=9R$ z#*M1qA-H>IEwC`pg>QDezO`#R_XklI?~vG^Q0nFJY$lDQ>@7uSZ(kpWpkVI7)|dUq zv0rkekN;?ME?4=!HslnY1MW0q^S@iE;qk zJz`dzsIJRmKTU4sfAf0$!JgZ4&$cyo@L`a2htFULo|*#GYaQqvvP!oRi^Ww{7F~+c z7yvMk7Lw;7JJJAW%R)^;_u$}QDoKpADfqQ+@r5?Gw+A>qrF$J=#bY_~Ojvb&Vd0nG z?T%Z}M7i#UfkHtAI!|vpm;{T<@(du^iRtGG_whJYiJ%%$r&y;wz3br{=kn}6gMU`z zIzTwnBAfCplv@FB9lzhF&sorkN+G*QZazkeQUV}^bg#^A7;D1LzK_ z)vY$9Ay`z0A}gLj5lgBMST1~7PBZj$%zkVG&~4Xf-vz0C7tTY_afRSX{hMawAO~PQ zP&w8pDWQoDjgF0h*ZQEXgR5&yKl}m?sp$C~2W~E|0Wc%p8lPVsLU6YNpdqB^>E)I9 zf4_t3c0noVc;uv=CH!D4;W(l??2jI0&REIG;W7UBH~dT|I1bP6yNtaKbvxRk6UnAP zSop)308&5qyiZTh060>Ew+=m&W6=E@x&Y??b#--c9We2LeanjxzBc*7FCf4b)48y) z0JzH99x*_1+z%hFR6~5fbpzp@lSB^iSU@xM=2m7mispwSiwy(;c!OL~u@?YB0Kg#m z0dNl}iOIu*XKEsGT@Y*in6=&n?jkeI@rD5L0r)aE>YW3~_mz~wF5qrwj1J|}yoRf4 zL*dLpEA-t#*D-@Q26DX2#7rbuSLeVAA(_3q1j!u15Q&`-qQ^jFJ*i=dA7*mN4pBtU-kBRMpVPh+4gp z<6s&xOdp~ugqwsfZnz3v5-OR1^L4O)O-#&?My$nK@i6O^R99;)c|uhWLZ^k5zuf;} zAR36Ce%C4e0e{=|MMqaP{|Hhku=Eepbk2iC2%@}lvkOwln*rVuH@B;=8#1!9`|dv1;7#-kP~vMuw`y%z*x2H>``N4kc6V}$ zxKEF+Erd4=<21z^g6)nb229VDY0cgrsQGV0%dy(fdw@BIbu6C7lNG{pfjiZ2O6m9J zaeeX*tq0(#%Dy0;{N|3xXCJdFgISi&aD>|XB%kIqiKa+3PMpSr5Y|XVwzQnwOqCNh zx+)`_Igkp%`mnf^PduBRQ>!bbR`2!{4VFn*hYdb3F=3fE;`j=wEr-IuSo|h=QPDY} zBq!wb`{eJ%_NC`$;_*o0#Ene9;a4IE!~*VFwvgr$ABx3;Go}-yr#h5tdhX}lZXyn} zg@0$ZxAtf;ivi=~;~VSio6DB8*Q*+yTg64r^1p&A3od+o?}t_I$mg5DP?UPjV{3UJ zhkD7k;8_(lP^JNdJgjinaEg>&Wq_{jpr5(OtL9I&0BzadZW4@wB;j55D($XRX^2kT(Ii$4iqo&-$jObk59bFNSgng77Xi*ibl#Z?EX-=>dXc z*Jn(zIE9wNwLgAewySJVKv!(Gc>Qf@%MPxXbH{HFd(LQ-~wH<|(+T<6L0F)4Z^d3HP`=OZt#pKFq_yRkH4>m5I-A9D5b z(j&?O76l?iI9CZeA|DS>1c-ZSCPFMMEP}hOvh8T!7Z^&|$X;4;HRxEK{ zs*{3$XO*c}Co-<%3iU@`C(o87W#3#&yg2zKvavuWY!$bipi-tX3s~!e4iy*Oop&TF z+rNWFwuW21p}*_qZN#^mupt4WmAWBb@Mb6{`-^X8g{HovO7C!Fk4M3`J6Ks1>}Q8G ze>-9XHfjuy;9DcZTGLcLb%bPuRYTq3X4mEZ%(%#3vmX;;{h0@vljPPZUiGgl{&v!o zTiAwq^%hR+mM+&UJuv90zctmUK6zYCxv&~w_e=yLPms~ zx}U`5Yz^)|wx(hHfIx6JL6lkh>hz~Kgr8j>?ie7A5fk-;tvEv%7pXE&=wcowg{on` z=ysS9_&e*hJ*c%SSijfwnAOyC|A*~GeQiI^C~UF(a`zOX<8ZAM>y4bW5NFa*2deDL z0?WSew}a&IK?|y8_lZVKopn!nr;J-g#|^w&sNFtcE+W4fGqUE*HDmfabMo=ML^3t8 zJd2@(3M{dn(d%w)p_j~Y3)jL9;k(kIH|3#v=+~?{)$UWhL2c#2LLNzTR7@inf4+a912PC zUPl}1hZ1W=MFLAD@;uEfGT%HDo}@+CbLMwQ(Uezar`8Q|=<6N(r|q3F`liRP>vfr! zuC;D@>zw7Z6-HKj39VVBc1y)toW`mmQT;ax8b)0ZnN+Y0rNa8XUyUwMzBIYx2~)jf z>|AfxlQ(GTpsIIvm`>A{_ja4hP03~v!py~ih=m=prXMBk+`)P8sIJtsN}ttl=D0gQ zUQE=aN8_H1t>UktFuP?kTBwP+)K zTjLtTVX3+wQDKjgo|vdV{6%`SeKl`h@fNW-;5}_RTO;gM`0b$nPlirbIP@)nRC408 z`{2mGu@eeGAvS;?CJoPck%X*1WIA2G#Y+3jE{d{X{9M0gGRve0;#!Nx}=0M)rF zrLKBcWvN&X^SPC!J5B)xB9oi&gy@n(pRpItsc`~6>^UPn(hpXC;kF^6FYPQ-OCtwK zGJNY+RA;=$IIiKSt0(-f%fF@^tT1I>g@}xd{O}(_9u3I(80qwMpWoGHEyQ2X`;?kt zo>Vu^=jWdOHj}`Z`)&|QYFQb$u3GgBeaXd(&Wj2mng^7t0?FR1!M8|DTU*dn7xZ?^ z*v>zf_0Q8ZCiv!*(!{u^!mW=$M3@3k1fU;${VYv6+7CXY$ReA!Q;LKzJG}ZbskPuZ z7%BA@B4>No{LNR~n9*~X%u2dxzr3LrT|<)jJ|(eOAg6xFj~T;?M6M+qnUihkLo=*sVsWg?Gv@n~z z@R}Cg>aJU}^r-Nm@7tY79%S?VOXr$v=w)6t1_RfO8W4#7Q15ok7LoJ4k6l;{_laMx zS8h`=HB%>SRB-c%x& zyvJj^_vNp|jh@&hd&|Gi`OhtDUp?^B6^PQ0fw93{!0ma%*KRZR^mEz=dnE{-JTjql z*d;RUNiiCsyQ{vbefM^FFG{3-uG+r*9EBN}=$t%KmZ*1TRlMJm#eH>>qHsq~&Xeki z%ZX%(K8x(!x2DIfqvJXZ=Iyj${Anp=IThHcS)NH)?$Ur)jR4o- zBewJN<0!xLK0RT3VVBD{gPNz(h7fz$YGT9=D(G&Od+o59alSQOF6o&$?eO$Hs)uX-EDw?W7TE~P9|4nrV7~#&1lIh&|DyW^ z0Y&`p&-&rC2*lNdBYs2Sgg~TTC1s-A?E3%D+nbT-&?Eo9{~9#(D*T^s`D70@(bbRX zQ?l#dY|7Ey8wJ8%5#Z2Y-a5>>11ID*Zr+wnmb!aPIHV{XDw#^e%o!QrE0Oq79k@S9 z*_Ge>`Yu|uxZ;p5eh4orEG*P-kjI7B=LJzX=%^QrP}@KmA<7MX@(a3~1{n)<`Qi-e8yNiXHfUfFzj_b6$^q>&omT_WJU zP#O44Qp2t${=-`(vF;4?4k_d|eICrzuC9!KF7vjfL^c?z*1qKXDY)*J{B#>VcRobCqS)?k2j+S%9-ZK0IE9?rFlJr3(qnO#0? z>l5j=f`<;3fkfq9>oY_(*YXPL{^U0ew(F%oF=MieND7#EuABUX?wi-ojj}P8Yx0&L(&pB2(xb{d)Z3j_W^^ecSC=-S z*_Qg@bWecHnpQa?YNnU&_3upzkwbNx&%)}azrQW@_ zI+Cjm%N-=x^_@wx9Gxo{sc2qD_ueHiFQB#cK60P`sho3nHBpXJQon&A3Wx^hL4P7r|bnc=AGkQPGE`fSWI z&QC>#oy+F1GDc7oeHoY1wElG!7f~Bk6zrC4pC0PhAAb+*&i8)4rk67k8JQpHdMUOk zSj3+%6qSfL^mEZ0wCv#3=O^vBsq|QdUsBW7m`}zf)J!5)R_|_7anQgW=ETBCtvJWy zenJ(gFJ2WV&$gCAwk3J=o$FRr8B^LnsFf!z zn0}^MZiJ@9-Vg;Mn+Jd7Go(2eU7!`P@g2=93m|=hms$jPi0aL z)=D5S0lAs&OdS>b)sq>3&;|%+^JsN@yT`N`^eZ;+ff6{$4+^^BA@3zlJ<9_E5$Le* zH0FGx=GNN{RN;OWKexW>W0-(Q2GX$slk5!_D#C~n>X88xA^gRAyqdV9sJ2X9tvV!E zb>{dhqUNcKUvkTJ>(&FS56|gI*@;4?anF`r+`nnBzIk(E@OY71KRtYv9{c+EAi*`^ zE#Yv7;%5WOu`dX4)F-zs-4~NdIP&j^G-Z>bSl$BL1+^offlaOw03b?^dRtpr!GqQ$ zZAxSywF@`2wCtGq_{$IsDuh=CpH;F_^t z>uhnFx5|5*t``)vgB6M;AAl-b3>JzM@UGCUzF$-s!MA&yD~n!TgUGC^~}azOyNU^9&9}(I_rYHp|rurn$F&X z^1e4Uv|)w$H>FXnQGZGIQEAqppGcA%s&}5Fd`*<%MWm@0FL2R-tkCM1dfRsNzOHGy zs3R%RghbZs{M9DUUeu06CF`cAYH3p5`rS|K<`|ybWzGg4naW}=N`GR~*Jn3z(=VWLC{iR6_3dTNnY!pdvNc}h(z(3jflEojcFlM$ug5b7`i zj4!3AF1qPzK~?;{ooxJ7Z>1h8$V2UhTR%%nB~%q=&ZsT0HCg0`aAz<08Q`cB1z8&y zz>T}3i2W5=?XWC+tOK)&dr#?FbV5RzOKDDPK{SYww1;}B%mYgYxhUb>f{7QY zq8kzTW2@__C20Z(D(TVpwSNv9x%!*CS$+I_UY2*KR@&9iDPJag9@yE@f7_vKiIpA6&-!XKrf-|H1yQa$|$=?Kqv#xTWgnrO)*vLrgn+Ki4K<&ma%PP1G-#-(VBu+ zrB*nzJhw~H>xgFd@OB@lq5vA}ZzL&i!}H_h~0XSG1i~Pf(rYdUP=&V|zRQ(XnUs3>ptby^79{MRJ`ee~4<#4MG<0 ztMMv!$oR#~i&obN>CajCR9j=&>alcrYv8FH|IDCr)~?R73AINV7&hz{0Y9)Mg61Y16qbIqRk4B0knsbBqK>SOE}%oB81bT>ckGHR&zUBQcgPFnO7kTu)Q(a2$t#KE|)5<;j z1a^a>FhbnuxR-5rKh}*G-I>0SF5H_z3z2=SrFo}f5C0;!iN6KEo7oo2a6#quUAOQ3 z^>jZ}Ynr1e)tkj}zpyKgGzkDz`uik*ypwjL+B>K6tVYAr}p`vA_S2o$3`&9K}a~6}Jkc zNzv1TG14>ibai?9@^(uOfPmu7ku#1L&~G4&wlWe^kE>xILZ|OYJRS&T zY0ys0P=4m(8^ ze4;|Nl;>O}iPqD%LhVqUd|z5M49@K$BJ)TRzV&SQD>vx9{AfJ)fCbyGPEp;2f5*;c zboZ+YyS~l+w^FlrohvX{`YF!3e1~I+g$37_KMEAopgM!t|K_4qVxXmnu~E8xH;Kh04FbD;__p3uXKyDXGF$mW54_9pOvCew}_I znCVAkmSQ@Rt8`C`qvN6egH*}w^Ecif1ClK?gU=qS1a0`|W+IENDT@pkgZ#TK8yqnt zZU2V&?CO~AHWGyo;znC)9b~Av?moqHuu`n7XsaiTW7gx`us|p(MHz*?Vr9WqFE@#i z>uvV-t#sK=;9h2|@vrB%!M~xwwmu#@c`V_#d|mFRoxOcBtMyI$#dPoN@q3gu|Jh9T zz*M)DX#`7tr~Mp;8!dBikCmCP*O$vbUJXahcjnLA$&llWo4mo5IdUyB)=nQ$EZVAb zcGsMB=MDv?P{Usd$-PY}>QxM8qn8Soj zN&R|9U%%MjkSvyvPI|D-Smfb%D zJ(kkM<7=~Qz)ycJ-EnyjyJj-bA)zWQ&vroOZldI!dzXrn4u|_`!$esEKc1T}-i_qdAPWKdgHmLye-nT-7)yR&x_BdWq#;Pg| z%}MvWQ5d=}{cvBvFtc?3q40!OBHdT;q7p=CorXofP*=&_{Z*3-$UGwgcT zr2lx1{3MG;Cov$`%zd{=I>JuIqaVueV96ts%tJ~_%FymznPdVX-pI&kyb;vYQ)GoB z^~_P)1}|;x!gMP>83+DXcN}5ckW0o{=pTw3ZLFxGuA+;6ixsAl*OM{AjALw&Buj*^ z#7E*0O9Aa2+;l9Ibl~Tr4_{w09lQ)$oS34tth(bw)QKdH*3yGTZ0Gd;EHCtqnkgeQ zCf?VpMOls2;c7#HV)=CaQuM+gwHG#pq<^M;M5#D#&c;IH`;Y5wD`XW{4W_XR5P6sM? zVUOt*`5TQJA}`k7Fv|N+N!5}T`mp}q(zQN$J>&bJ;!czu-`dqWVyde7Om17Lx#Fy^f1A9hZ7>g%NA$@aBt9{&6J0kaPduefkYd-D6N)q4R%%1#c1jQ8 zU(?mh-z}#{#kJ6}ifHRA`C@6yc2Y)l`N!zL_gYjUN$1G_PKmBshYXa~#{2KHOl2QD zWBNWbal|{YbF&CrIaRXJ{!I8w87KP8$#^<3PN&hv?WO8Z=}KR5)AO|hqmf7({?}gJ z&)vR${Oq_}t^JengWd0Td^D>CoE%IcF>afNP`ATQfe539$%B4bGJ#Qrn{L+=u7>9 zv}OcvCLA|dtPjuFtbd}`cQAW|dH<|p{|K}klT7H~(63eG(8sLh7)Do>U4Ad(YDY(1 z5F=mz-9o$q|2I9AgvrH6V_&!3rI`0aT-WNk(n(4c;1Ma|;Wp^2)Lp__(0wCA*G^TXxDe3YCK#fHL;7?w+pby?VO;0Y@j&M<${p6EAi z(Ri_;u8LRqMRSR^qlAWw!elFB^z?JrAmt-QmzTQx^NtB(k-4vU$aVP%U#vvQ-hY^` zI_|B});f`T@UYAfH@eM2JL6#wGUpwlATMu>s-mhYR=;7Awgzot^pkuDN?Ma(?2zQ< zjgFTEjOfH;Pt+99O;1SX+FpkiPwlE2SSP2ys(*`%ae&u+NZ`p}xKPlui{F?==s8|Z z7wjW2YTcmq?l0ZtTAl`%6-nOPQY=*Gun$enCN+VYrhD0#@L$)^zvE99x{PpJTm|m2 zjd(Xrq2`v0ZuICdJbAsKHMjOJ)hwj6o@Oe3zJ+9gdeOA=E3!CUW+np~M?i?fl?&dX z9`F?8Floygpr6Y>g}}ByVznXESlCc^GH94zXrDeXd#OZN*ql+(a-g{_xl6b?{5Cyv zExR(?Cf2V>NN?Q?BSpZv{h|JNMx@Vuy*77E^aKHRs^YfnXLR-t3%fS+=?Z+=?U-Iw zggB8AO&7(%!n;>4S``>HoG&%mQ1-JYN7T2tAnaF_p?lB$2lObMxJI-BwKm2NOaW#2 zshz}XY%FWSEWO@B2Uhu#b4!bI2`9Mds(mS;{G+W8nt(DXzW}b@^J`SE`jYS9{o9n0 zDO=^e8gfx?6cicwKWmkry>;n~?bh8Td)T=Ax%&ptPk) zf=l!5{ZW2}z4Q0hS+m>8pLvSMyc;oQo!9-z&y%g4=|pP(#?w@ezAtb*4cK|#8gM$K z#jT^aOiwY$Z6AD^_YN5-$VL@$@Tn!brTjrJYn_RL?rYuMMQKLJbEHYRZyQ!@WL9Y%l+_B4`M`GdiYKFEqLfMHI zCXJ2hczAf$X*|983uDqZHrwLTyExr=;*v903Rk5b>TliJVDr33vXLrBjJ-fMueoCF z-&XpR_aqK&K(3a9M0!(i$ouyjY8D?Nfi;!_B=bUFj}7O8p(iXOefREkqbf#Yd5ioL z(?dt{h&=nk)_HCGh~f~vI%wwXHtb54=M+aFhBQMuvks<| zRo@9FN8D1VX?5$6dkUBfRAVM143W~wU`Filw*i$tQ5N!ijFEYji5|4sOw7F z?xqs_dXbTBH1NZ`+$LPMLQDGHy!O{JM+}-RJjS(}++rybDgSJ|y41ya{cNL#yFDEj zpDlA(3*UC~|F9azjAI(sDX8ef+QOe0WULCBTUYspbck!ZmGCk3Ci+s(mU-}x61RzK z9P%`7ch6oCKZ}p`a-lPZFS;tmTL5pqAI8HPVJtV?+Cw@yIq67n(f&wc59A{7;DSz@ z6E{%ha`NF+Jo=byaxzhF%t@BlXVVp}!iCL2gdWMetUmN-t6cl03NKXFLeT zlTO3qUdE9%thQcD$vrKf!w2YIM6Xn;qaO|ylPo6oKFiRlJ?#2X&1`T6Ax)P31XZZW zX_+^1+jhy|J4%`<=ls6$t1%s>1CNbmdsV^ibH9Pl?TH8!dSnB1gd0{{Hi31bwx*^< zeQvGtg~zW~&P)+`301v$?GbuqK= zl>hD_%bXapDAhuX#TUpT(nEhl*Eq z>}UM?3bwgV;Hc|wGi~(&USfM)-xf2M+OHSm2LWG8``NI3#nlMkBw612FVD9%tX^Ys z9_x;QzjN%IUDIi<+uBFnnz^a(-_LadD|41?#4Ph32;M?R2f$U4kD7|A?fP}p`|FPc z1#tw@c8qv?Xap_z*kAK+X_ZVYZ09{jH#L$aoWfe|Wk_AJu0+nIZdN>&A2++>vBGny z@*QtcT6c0l3hV1zFsU%#6Ao;)M{3MXdy6TK2PhUyI4`T3a z8RKDy#*m()&(>{gEA^j~euqyPRNq~l#8oUU?}(3|IsyB7s+elqOHng#cN8M4m*ahmsZU4q_S|&-N%?6TyL4ET3e$f1Lt6WTqxLv z<~D?-ms|XRPdwQN)f!cIS52flPE{fIy7g;?2MT*du{_L26B0h(68{xd$!DHc3Q z3Y3toTf}I9pR?(r(?25 zKqRyyShCh@cC%-pC>dK~jo~o&x9MNz3~>c#jsrq&iqG;tX|o4}3ec9Zvw98X=4yVS zlb-5t9m<{yvoIld#n`%?#B^~DuJ{FY_53-`Kdb&+zL{$#v@9(3DPNY~T+t4sh0Jtm zkpi;D#s*nqX+9fXIHdaUZi`oBr~3IOm1#mF>qDn&+PY7DIko={k?@KQ%S!tH@LHmh z8dqG|e;C<6Kz0aVlR?85MPSr?$o?Pp3m9h04k^zCU+8(4Oe-xVLU+zP?u)AbG(kP9 z`G;2IwBU*$bA(>lD@NKInL-JS;XeWpikkl)h(#Df_+5~jWulwV?$eg>^YM`JbSTxT1AC4k@!p8Bk(Hv>Yh`T2Q}PU~Mnb1K*ryiuDl=8S=M zSt>cIhzwZ^Xhtwoz{W^vRM*r5!vIN!S66)Mi|b;q15d61dXrf_nkMJ_H@Avi`yy@d zpuHV_Sjn9Np5qZ4%W06U2loole#^NRsf7L512-`L?Cgsi8 zPjFlv7$BH#1ZCrTGCTMJZhEIZjq+sA-rqkwL`fDwr{45B146CYyG=?v4Qn0RTbp8l zW~pcYjaE_8jOlY`rCYLiaz0Eq_P#+&@#?u4!!K8%vwT!zr74Z#C8O9kD++S*g;F$U z1!nX(0PUMps9_P8yu)V70|Lscm_m+Tk?>an2L`$tDw|4~y??vGe<9q9m{ zp%?`N&Vx2yPQ0Z$b3?-?mL;C{_KFkcQk(6E8nm15+A_zJ-bc~5ebhPqk(rst4(55v zCVg~p4h_aaKREg7H}r}!)W-4gqp&sx)nnboY;yKntAXz5Fa-j;?zul?|1cF!k8h;@ zw}d6l0@@XpI>vcWP7N8YTHD(4PA~U|2}Ad>N+^`V{Wl`=@~6N$VT1qbskku@9rQpF zn@zsj4^9(nv`86@t9zj+nj|u-l3$1nv@F}lRll=Fk?C-J$&`yxqQRl_;K6N?;m@>W z`GLFPha*pDvf6vwhwBxYymAI!uwfQ?hwCOIm|xI~cK{;?$V#Kx8b#qF>wcb)?brvP zzOLlPsyWm-pqRO_?Xc>f#GLl?C`<`BA?Z`lSx-uS?0`Fb`2!w}RDI265_tuI`5~Fd zy~UQEgc~)Wf#h}k5YArAIM9Y_q>$Ssy#nLbyncd}jnJ$cgPO2IaMaUqqn#qI{Z!yX z<$^==HY#*J_bsDh!tIDtIauXCd7yNd^AiE4AC;?}V)#SH>x}x<#kaf{J_FPs)!fJf zjdo+>Q@7gh-$UAr0shVN6ciNPDCgoU(Evjb>*2vcN8lg|NvZ(sI)v6`McJQPfa$*E zdHB%$^6kY}Nz%dvl$4bJi=Z==r(&D=fs(qi&D;lLuT2^?ISy3y1o_#@wT7f%UrZhI zZfb1%ajn8$I>_h2-ly{;qo4?M%yR~b5<4J(fG}XH5qJ`i1!C-zQglyz6 z@-y2I^}eRp^3oCuJ+n45=gRxJYev4Kx>njcVu6 zkBrZEu;X$Et>>5v&?iH=k0Y>4JB5foysHsV)+4w2yygFM@oD*66aDc7r(2Ci9;N+~ z)g^yLtA*})VNPUnhJCTa@`ex`QIW%b7%dM6eUR|w`@pNZcnC#J6nisX}Oar`L=mDzkJDfB5(ar2j=?X-$IK- zXqi1hIFJ)RB-}@%>Fb&K`7Zg(#QU7~vmfH!zk)}z3AD^Yu3y8>IApBf?4k1s3=PX9 zz=?hmc~1^wBT$DAWd$v?vU+6$b=@p67kL^-M3HzIt8;IB69e#@OEpi!SXbw(1n$Te zygG?VtEBY^%h1SZ+<`E_tr=v;_#twODCmsdZAs-Tl7foL>AM8J;gU@I zg7gQ>q$i_rY^`QG1*>lyT22plgz(Y>)UaPWEWIwJ5BOq|_OR4q@2kWGvc$<>@}=x# zwTs)UMVi+a_xF`YS`WNmE;cv&>7nrtoZpO*rk{<`AxT_s8>BLflXy1u1+grqVy=oQ zC5l0;5oI41d(#-nSo;haB=uNX`K67`jo{?(OH3m!)`s^87OUiQouO*k z3S6j$&U4HQOr{!Y)&a(c4H6ea&`DOb50^XXu!yz25F zJUqlzUJ12^by%H5p~g+ZLg~)=1=Y(~pJ-zyAB}M3>-;;c&m2al5KIkqcq?1k-4bd!vA9#mn#o#*Cu*s8X) z4(l?-ZhJIzv6(Q~SLH)6G-O35r0*R*oQ@O-Cd+)uZA*eX|EqPl`o_Z?UgiK)Q8yut z^#4`em4`#Q_Td>*wrs~bW#9Lu#g>Om7;bJI;K(`J8=U%?vr+ZX;G# zPW+tkV$1ZLio1N`Wj>noHyJfcLv92J{4B?)c&e&@fN&z;qTf2vg;cu7s)|R&o5Rc7 z#p>Ya;=n4j-tURLd8ho2Ji3VSRK2&-#4w3l!*dmgxW!*P8pPc--j{;6bVy>#;<_NF}flVLs z@S&G=Aq5|4sT3Gfzwk1unW5C!Y`Sw!;UarP0+6RWkRX5IeonZ)>#~QMcCs}n;WU{Yy*EErz5{6y6{P+t2ZAj z*?)-FaMt0y0msnNQf207ON+r(=J~Hj?1*)S#*Y)EP0QcFlAcAb89rQy*d zeYFSqdhTwL$k~aBEe{EIc{$SH6_3NKDz{owucPF4W%sD&ReH?2VKGDm{_Ivx+;xJS zAt8Qp%A?(C*hvOBS)Tr}b0e6|#ndTUXDz-ZEq?Ih##_FLm&v_82ZFTH<1?5_$o$7? z-Qp9sv_}zCbm{^}X)Rg7<9=*MpRH)UK=dbi?C}wYkQegIcN>EyiP5@Z!HJ$L?K5_8 zUWevzQE_n>?t$*^gf8NHFPSWPF4L6@wBiwIdzjxZ^a!k~&EbR#j-Z!uYnm&gZ{#Fc z7?l|_dA(N?nsvqk9B!43Es7EBEDdF+oqKRQI2s8bf}J=i$L)bwL7rIjE1? zvMJI>{HVb%jN^2_DV&oUXWyye#n1P2KWyHQST_FM4myL9i~VP{>h*})ca=XXHM(;v z(f+dbp@j+DwZ6|-yq!_-6zGls7Hj78Y?&T4{;7 zIvCHT5zT3Q6*1z*J6K4u?q*Fjkc0Yr*Af zN|}Egpbp=CN#gHD_%iIBLb}cZ3!nXQM6dU8X&_{mxY1N1UEbKkv{DYPA~QJ(^p=e* zZE*UiDcUZMu?5uEQjDyf%#%+Db-4P4y0$QPvgLP^4o3N&NN6f5GLt_gXSFeS@UF$U zi(}KI+;PRzJgIuj7w!u)1QPo!gxtRIss?l%R4|Cx-8(TMR$QWrdGy;}H(1JctAAKn zXejtv|59W-&+FLxLGJBYkFjaouIS;ue1R+G-EWlJ!*xS#BhfDu@$XXi;n0Nbg=cCy zzu)rp?87BpCp$IAtkdQnF?Y< z+0ajseD(;qxaX%vc^c>THe(v^h9?nV8OVXNkHVR|a}_jB@E$r=w1l@;x-^1%>4yoqu@y?)aN zg{}CaY_eSsXQ{nmpo4jAX^Xf0ZQ!>rmI8j_LJA?hmUnZ z)eE8&q{ZwP2bG4?TJhx2&{gnVdb5Yhw`xH%uS$IJ+&U=i%-93(>rE*=sm^~;cpZeE zuNIH6m%HQDOUl&4i11Log;F-c=Vjf#{nm_jr|#>7f8@=9Fac{|miZc5e%_nv@pxV2 zoUTyaCxd{_P@-Vur+_9G>qEHwZ9}(cgX`u(LPJ*Ryt7MoG^mI*$dVdJ^pzKT^fgdPcT~6>1IuY+;`OB3Fhm?tZrer;D3)p-`opS;o595WS z8{Aj6q3W0vBuuz+ibWY$JfMOsG;;E!ECX8;-!F4u2s&0=4k`f&*y#PFg9hh9Ue6&z z9RN>M0gKoUo`R~gN%v4Fe@z4?5ECi%r3^HW!6oevX{|QH!QyBdy)FAuK zLhyGZN9Cz`Ko{-Y!@`!^+Vrh!wRd+Nq}8cK)q%Tg;KQz zQ8Y~kVnNdcGD2abXG%yo5;ozA6l;Z0v|u+sKQ#&+Q3u*+B4;MJx&m=2xp9&UgQ|N4 z+D$N5{xCoP&lhkYHwlFr4}N<5#YZ_G|nXrMH`M>_9TPWt15*1^_I43s^NsKF96sA!$HlZ3FK^rHbhtpJr(nwg zfFrP=4@!dC7Yk!ejg^&>1lgW#kS1i!#RJ7IAa7Y|Y182HPa?qs^u;b@gtOKl1gJDU z1n=8`HoF{v-bCl%?^b*h(1Xug@qbS6qk*mX|7>8Z1OKamt@zdme%ip#M!0o? ztr7feg#X>Z&qnx9ind1ZvlMb*agZoDwz~YYGj7HIe{*IN_x1N``)@fiVZkuXP$)B; LwNaUYTjak0jc?ZQ diff --git a/cpp/examples/strings/custom_prealloc.cu b/cpp/examples/strings/custom_prealloc.cu index 0af4c47e947..5088ebd6168 100644 --- a/cpp/examples/strings/custom_prealloc.cu +++ b/cpp/examples/strings/custom_prealloc.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -101,7 +101,7 @@ std::unique_ptr redact_strings(cudf::column_view const& names, auto const offsets = scv.offsets_begin(); // create working memory to hold the output of each string - auto working_memory = rmm::device_uvector(scv.chars_size(), stream); + auto working_memory = rmm::device_uvector(scv.chars_size(stream), stream); // create a vector for the output strings' pointers auto str_ptrs = rmm::device_uvector(names.size(), stream); diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index daee443a5f3..19722d127cb 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -445,7 +445,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base { __device__ T element(size_type element_index) const noexcept { size_type index = element_index + offset(); // account for this view's _offset - char const* d_strings = d_children[strings_column_view::chars_column_index].data(); + char const* d_strings = static_cast(_data); auto const offsets = d_children[strings_column_view::offsets_column_index]; auto const itr = cudf::detail::input_offsetalator(offsets.head(), offsets.type()); auto const offset = itr[index]; diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp index ce5772dcf3c..a6167d983c5 100644 --- a/cpp/include/cudf/column/column_factories.hpp +++ b/cpp/include/cudf/column/column_factories.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -462,9 +462,31 @@ std::unique_ptr make_strings_column( * nulls is used for interpreting this bitmask. * @return Constructed strings column */ +[[deprecated]] std::unique_ptr make_strings_column(size_type num_strings, + std::unique_ptr offsets_column, + std::unique_ptr chars_column, + size_type null_count, + rmm::device_buffer&& null_mask); +/** + * @brief Construct a STRING type column given offsets column, chars columns, and null mask and null + * count. + * + * The columns and mask are moved into the resulting strings column. + * + * @param num_strings The number of strings the column represents. + * @param offsets_column The column of offset values for this column. The number of elements is + * one more than the total number of strings so the `offset[last] - offset[0]` is the total number + * of bytes in the strings vector. + * @param chars_buffer The buffer of char bytes for all the strings for this column. Individual + * strings are identified by the offsets and the nullmask. + * @param null_count The number of null string entries. + * @param null_mask The bits specifying the null strings in device memory. Arrow format for + * nulls is used for interpreting this bitmask. + * @return Constructed strings column + */ std::unique_ptr make_strings_column(size_type num_strings, std::unique_ptr offsets_column, - std::unique_ptr chars_column, + rmm::device_buffer&& chars_buffer, size_type null_count, rmm::device_buffer&& null_mask); diff --git a/cpp/include/cudf/io/types.hpp b/cpp/include/cudf/io/types.hpp index 47a48f2175b..3208a81cd63 100644 --- a/cpp/include/cudf/io/types.hpp +++ b/cpp/include/cudf/io/types.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -693,6 +693,11 @@ class column_in_metadata { column_in_metadata& set_output_as_binary(bool binary) noexcept { _output_as_binary = binary; + if (_output_as_binary and children.size() == 1) { + children.emplace_back(); + } else if (!_output_as_binary and children.size() == 2) { + children.pop_back(); + } return *this; } diff --git a/cpp/include/cudf/strings/strings_column_view.hpp b/cpp/include/cudf/strings/strings_column_view.hpp index e27d32fceb9..36054f7c229 100644 --- a/cpp/include/cudf/strings/strings_column_view.hpp +++ b/cpp/include/cudf/strings/strings_column_view.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #pragma once #include +#include /** * @file @@ -58,7 +59,6 @@ class strings_column_view : private column_view { strings_column_view& operator=(strings_column_view&&) = default; static constexpr size_type offsets_column_index{0}; ///< Child index of the offsets column - static constexpr size_type chars_column_index{1}; ///< Child index of the characters column using column_view::has_nulls; using column_view::is_empty; @@ -106,10 +106,12 @@ class strings_column_view : private column_view { /** * @brief Returns the internal column of chars * - * @throw cudf::logic_error if this is an empty column + * @throw cudf::logic error if this is an empty column + * @param stream CUDA stream used for device memory operations and kernel launches * @return The chars column */ - [[nodiscard]] column_view chars() const; + [[deprecated]] [[nodiscard]] column_view chars( + rmm::cuda_stream_view stream = cudf::get_default_stream()) const; /** * @brief Returns the number of bytes in the chars child column. @@ -117,9 +119,10 @@ class strings_column_view : private column_view { * This accounts for empty columns but does not reflect a sliced parent column * view (i.e.: non-zero offset or reduced row count). * + * @param stream CUDA stream used for device memory operations and kernel launches * @return Number of bytes in the chars child column */ - [[nodiscard]] size_type chars_size() const noexcept; + [[nodiscard]] size_type chars_size(rmm::cuda_stream_view stream) const noexcept; /** * @brief Return an iterator for the chars child column. @@ -128,11 +131,11 @@ class strings_column_view : private column_view { * The offsets child must be used to properly address the char bytes. * * For example, to access the first character of string `i` (accounting for - * a sliced column offset) use: `chars_begin()[offsets_begin()[i]]`. + * a sliced column offset) use: `chars_begin(stream)[offsets_begin()[i]]`. * * @return Iterator pointing to the first char byte. */ - [[nodiscard]] chars_iterator chars_begin() const; + [[nodiscard]] chars_iterator chars_begin(rmm::cuda_stream_view) const; /** * @brief Return an end iterator for the offsets child column. @@ -140,9 +143,10 @@ class strings_column_view : private column_view { * This does not apply the offset of the parent. * The offsets child must be used to properly address the char bytes. * + * @param stream CUDA stream used for device memory operations and kernel launches * @return Iterator pointing 1 past the last char byte. */ - [[nodiscard]] chars_iterator chars_end() const; + [[nodiscard]] chars_iterator chars_end(rmm::cuda_stream_view stream) const; }; //! Strings column APIs. diff --git a/cpp/include/cudf_test/column_utilities.hpp b/cpp/include/cudf_test/column_utilities.hpp index 6231f8207f9..49d5098f823 100644 --- a/cpp/include/cudf_test/column_utilities.hpp +++ b/cpp/include/cudf_test/column_utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -226,15 +226,15 @@ template <> inline std::pair, std::vector> to_host(column_view c) { thrust::host_vector host_data(c.size()); + auto stream = cudf::get_default_stream(); if (c.size() > c.null_count()) { auto const scv = strings_column_view(c); auto const h_chars = cudf::detail::make_std_vector_sync( - cudf::device_span(scv.chars().data(), scv.chars().size()), - cudf::get_default_stream()); + cudf::device_span(scv.chars_begin(stream), scv.chars_size(stream)), stream); auto const h_offsets = cudf::detail::make_std_vector_sync( cudf::device_span(scv.offsets().data() + scv.offset(), scv.size() + 1), - cudf::get_default_stream()); + stream); // build std::string vector from chars and offsets std::transform( diff --git a/cpp/src/binaryop/compiled/binary_ops.cu b/cpp/src/binaryop/compiled/binary_ops.cu index 464c15dac9d..73ba15e39f3 100644 --- a/cpp/src/binaryop/compiled/binary_ops.cu +++ b/cpp/src/binaryop/compiled/binary_ops.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -86,11 +86,11 @@ scalar_as_column_view::return_type scalar_as_column_view::operator()(s.validity_data()), static_cast(!s.is_valid(stream)), 0, - {offsets_column->view(), chars_column_v}); + {offsets_column->view()}); return std::pair{col_v, std::move(offsets_column)}; } diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp index 75722ede9d2..4d16298c605 100644 --- a/cpp/src/column/column_view.cpp +++ b/cpp/src/column/column_view.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,7 +51,9 @@ column_view_base::column_view_base(data_type type, CUDF_EXPECTS(nullptr == data, "EMPTY column should have no data."); CUDF_EXPECTS(nullptr == null_mask, "EMPTY column should have no null mask."); } else if (is_compound(type)) { - CUDF_EXPECTS(nullptr == data, "Compound (parent) columns cannot have data"); + if (type.id() != type_id::STRING) { + CUDF_EXPECTS(nullptr == data, "Compound (parent) columns cannot have data"); + } } else if (size > 0) { CUDF_EXPECTS(nullptr != data, "Null data pointer."); } diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu index dd4af236ecf..54d0aa10353 100644 --- a/cpp/src/copying/contiguous_split.cu +++ b/cpp/src/copying/contiguous_split.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -502,23 +502,34 @@ std::pair buf_info_functor::operator() 0; + + // string columns contain the underlying chars data. + *current = src_buf_info(type_id::STRING, + nullptr, + offset_stack_pos, + // if I have an offsets child, it's index will be my parent_offset_index + has_offsets_child ? ((current + 1) - head) : parent_offset_index, + false, + col.offset()); + + // if I have offsets, I need to include that in the stack size + offset_stack_pos += has_offsets_child ? offset_depth + 1 : offset_depth; current++; - offset_stack_pos += offset_depth; - // string columns don't necessarily have children - if (col.num_children() > 0) { - CUDF_EXPECTS(col.num_children() == 2, "Encountered malformed string column"); + if (has_offsets_child) { + CUDF_EXPECTS(col.num_children() == 1, "Encountered malformed string column"); strings_column_view scv(col); // info for the offsets buffer @@ -539,15 +550,6 @@ std::pair buf_info_functor::operator() build_output_column_metadata( }(); // size/data pointer for the column - auto const col_size = static_cast(current_info->num_elements); - int64_t const data_offset = src.num_children() > 0 || col_size == 0 || src.head() == nullptr - ? -1 - : static_cast(current_info->dst_offset); + auto const col_size = [&]() { + // if I am a string column, I need to use the number of rows from my child offset column. the + // number of rows in my dst_buf_info struct will be equal to the number of chars, which is + // incorrect. this is a quirk of how cudf stores strings. + if (src.type().id() == type_id::STRING) { + // if I have no children (no offsets), then I must have a row count of 0 + if (src.num_children() == 0) { return 0; } + + // otherwise my actual number of rows will be the num_rows field of the next dst_buf_info + // struct (our child offsets column) + return (current_info + 1)->num_rows; + } + + // otherwise the number of rows is the number of elements + return static_cast(current_info->num_elements); + }(); + int64_t const data_offset = + col_size == 0 || src.head() == nullptr ? -1 : static_cast(current_info->dst_offset); mb.add_column_info_to_meta( src.type(), col_size, null_count, data_offset, bitmask_offset, src.num_children()); @@ -902,11 +918,19 @@ struct dst_valid_count_output_iterator { */ struct size_of_helper { template - constexpr std::enable_if_t(), int> __device__ operator()() const + constexpr std::enable_if_t() && !std::is_same_v, int> + __device__ operator()() const { return 0; } + template + constexpr std::enable_if_t() && std::is_same_v, int> + __device__ operator()() const + { + return sizeof(cudf::device_storage_type_t); + } + template constexpr std::enable_if_t(), int> __device__ operator()() const noexcept { @@ -1236,7 +1260,7 @@ std::unique_ptr compute_splits( } // final element indices and row count - int const out_element_index = src_info.is_validity ? row_start / 32 : row_start; + int const src_element_index = src_info.is_validity ? row_start / 32 : row_start; int const num_rows = row_end - row_start; // if I am an offsets column, all my values need to be shifted int const value_shift = src_info.offsets == nullptr ? 0 : src_info.offsets[row_start]; @@ -1259,7 +1283,7 @@ std::unique_ptr compute_splits( num_elements, element_size, num_rows, - out_element_index, + src_element_index, 0, value_shift, bit_shift, diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu index 3a9fe50d25b..04ca1250ed5 100644 --- a/cpp/src/interop/to_arrow.cu +++ b/cpp/src/interop/to_arrow.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include #include @@ -49,16 +51,16 @@ namespace { * @brief Create arrow data buffer from given cudf column */ template -std::shared_ptr fetch_data_buffer(column_view input_view, +std::shared_ptr fetch_data_buffer(device_span input, arrow::MemoryPool* ar_mr, rmm::cuda_stream_view stream) { - int64_t const data_size_in_bytes = sizeof(T) * input_view.size(); + int64_t const data_size_in_bytes = sizeof(T) * input.size(); auto data_buffer = allocate_arrow_buffer(data_size_in_bytes, ar_mr); CUDF_CUDA_TRY(cudaMemcpyAsync(data_buffer->mutable_data(), - input_view.data(), + input.data(), data_size_in_bytes, cudaMemcpyDefault, stream.value())); @@ -136,11 +138,13 @@ struct dispatch_to_arrow { arrow::MemoryPool* ar_mr, rmm::cuda_stream_view stream) { - return to_arrow_array(id, - static_cast(input_view.size()), - fetch_data_buffer(input_view, ar_mr, stream), - fetch_mask_buffer(input_view, ar_mr, stream), - static_cast(input_view.null_count())); + return to_arrow_array( + id, + static_cast(input_view.size()), + fetch_data_buffer( + device_span(input_view.data(), input_view.size()), ar_mr, stream), + fetch_mask_buffer(input_view, ar_mr, stream), + static_cast(input_view.null_count())); } }; @@ -280,7 +284,7 @@ std::shared_ptr dispatch_to_arrow::operator()( { std::unique_ptr tmp_column = ((input.offset() != 0) or - ((input.num_children() == 2) and (input.child(0).size() - 1 != input.size()))) + ((input.num_children() == 1) and (input.child(0).size() - 1 != input.size()))) ? std::make_unique(input, stream) : nullptr; @@ -295,8 +299,13 @@ std::shared_ptr dispatch_to_arrow::operator()( return std::make_shared( 0, std::move(tmp_offset_buffer), std::move(tmp_data_buffer)); } - auto offset_buffer = child_arrays[0]->data()->buffers[1]; - auto data_buffer = child_arrays[1]->data()->buffers[1]; + auto offset_buffer = child_arrays[strings_column_view::offsets_column_index]->data()->buffers[1]; + auto const sview = strings_column_view{input_view}; + auto data_buffer = fetch_data_buffer( + device_span{sview.chars_begin(stream), + static_cast(sview.chars_size(stream))}, + ar_mr, + stream); return std::make_shared(static_cast(input_view.size()), offset_buffer, data_buffer, diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu index aad761acdba..995d8d942c9 100644 --- a/cpp/src/io/csv/writer_impl.cu +++ b/cpp/src/io/csv/writer_impl.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -377,8 +377,8 @@ void write_chunked(data_sink* out_sink, rmm::mr::get_current_device_resource()); strings_column_view strings_column{p_str_col_w_nl->view()}; - auto total_num_bytes = strings_column.chars_size(); - char const* ptr_all_bytes = strings_column.chars_begin(); + auto total_num_bytes = strings_column.chars_size(stream); + char const* ptr_all_bytes = strings_column.chars_begin(stream); if (out_sink->is_device_write_preferred(total_num_bytes)) { // Direct write from device memory diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index 056cce18a52..f1296daca26 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -363,7 +363,7 @@ std::vector copy_strings_to_host(device_span input, if (col.is_empty()) return std::vector{}; auto const scv = cudf::strings_column_view(col); auto const h_chars = cudf::detail::make_std_vector_sync( - cudf::device_span(scv.chars().data(), scv.chars().size()), stream); + cudf::device_span(scv.chars_begin(stream), scv.chars_size(stream)), stream); auto const h_offsets = cudf::detail::make_std_vector_sync( cudf::device_span(scv.offsets().data() + scv.offset(), scv.size() + 1), diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu index b2017ee513f..c35f15049bd 100644 --- a/cpp/src/io/json/write_json.cu +++ b/cpp/src/io/json/write_json.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -347,10 +347,13 @@ std::unique_ptr struct_to_strings(table_view const& strings_columns, d_strview_offsets + row_string_offsets.size(), old_offsets.begin(), row_string_offsets.begin()); + auto chars_data = joined_col->release().data; + auto const chars_size = chars_data->size(); return make_strings_column( strings_columns.num_rows(), std::make_unique(std::move(row_string_offsets), rmm::device_buffer{}, 0), - std::move(joined_col->release().children[strings_column_view::chars_column_index]), + std::make_unique( + data_type{type_id::INT8}, chars_size, std::move(*chars_data), rmm::device_buffer{}, 0), 0, {}); } @@ -469,10 +472,13 @@ std::unique_ptr join_list_of_strings(lists_column_view const& lists_stri d_strview_offsets.end(), old_offsets.begin(), row_string_offsets.begin()); + auto chars_data = joined_col->release().data; + auto const chars_size = chars_data->size(); return make_strings_column( num_lists, std::make_unique(std::move(row_string_offsets), rmm::device_buffer{}, 0), - std::move(joined_col->release().children[strings_column_view::chars_column_index]), + std::make_unique( + data_type{type_id::INT8}, chars_size, std::move(*chars_data), rmm::device_buffer{}, 0), lists_strings.null_count(), cudf::detail::copy_bitmask(lists_strings.parent(), stream, mr)); } @@ -812,8 +818,8 @@ void write_chunked(data_sink* out_sink, CUDF_FUNC_RANGE(); CUDF_EXPECTS(str_column_view.size() > 0, "Unexpected empty strings column."); - auto const total_num_bytes = str_column_view.chars_size() - skip_last_chars; - char const* ptr_all_bytes = str_column_view.chars_begin(); + auto const total_num_bytes = str_column_view.chars_size(stream) - skip_last_chars; + char const* ptr_all_bytes = str_column_view.chars_begin(stream); if (out_sink->is_device_write_preferred(total_num_bytes)) { // Direct write from device memory diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 279a814a4e1..90f52c0ee70 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -609,10 +609,10 @@ std::vector construct_schema_tree( // column that isn't a single-depth list the code will throw. if (col_meta.is_enabled_output_as_binary() && is_last_list_child(col)) { CUDF_EXPECTS(col_meta.num_children() == 2 or col_meta.num_children() == 0, - "Binary column's corresponding metadata should have zero or two children!"); + "Binary column's corresponding metadata should have zero or two children"); if (col_meta.num_children() > 0) { CUDF_EXPECTS(col->children[lists_column_view::child_column_index]->children.empty(), - "Binary column must not be nested!"); + "Binary column must not be nested"); } schema_tree_node col_schema{}; @@ -734,8 +734,13 @@ std::vector construct_schema_tree( } else { // if leaf, add current if (col->type().id() == type_id::STRING) { - CUDF_EXPECTS(col_meta.num_children() == 2 or col_meta.num_children() == 0, - "String column's corresponding metadata should have zero or two children"); + if (col_meta.is_enabled_output_as_binary()) { + CUDF_EXPECTS(col_meta.num_children() == 2 or col_meta.num_children() == 0, + "Binary column's corresponding metadata should have zero or two children"); + } else { + CUDF_EXPECTS(col_meta.num_children() == 1 or col_meta.num_children() == 0, + "String column's corresponding metadata should have zero or one children"); + } } else { CUDF_EXPECTS(col_meta.num_children() == 0, "Leaf column's corresponding metadata cannot have children"); diff --git a/cpp/src/io/utilities/column_buffer.cpp b/cpp/src/io/utilities/column_buffer.cpp index 1cbd5929525..88617510394 100644 --- a/cpp/src/io/utilities/column_buffer.cpp +++ b/cpp/src/io/utilities/column_buffer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -161,7 +161,6 @@ std::unique_ptr make_column(column_buffer_base& buffer, if (schema.value_or(reader_column_schema{}).is_enabled_convert_binary_to_strings()) { if (schema_info != nullptr) { schema_info->children.push_back(column_name_info{"offsets"}); - schema_info->children.push_back(column_name_info{"chars"}); } // make_strings_column allocates new memory, it does not simply move @@ -177,12 +176,11 @@ std::unique_ptr make_column(column_buffer_base& buffer, auto col_content = string_col->release(); // convert to uint8 column, strings are currently stored as int8 - auto contents = - col_content.children[strings_column_view::chars_column_index].release()->release(); - auto data = contents.data.release(); + auto data = col_content.data.release(); + auto char_size = data->size(); auto uint8_col = std::make_unique( - data_type{type_id::UINT8}, data->size(), std::move(*data), rmm::device_buffer{}, 0); + data_type{type_id::UINT8}, char_size, std::move(*data), rmm::device_buffer{}, 0); if (schema_info != nullptr) { schema_info->children.push_back(column_name_info{"offsets"}); diff --git a/cpp/src/lists/interleave_columns.cu b/cpp/src/lists/interleave_columns.cu index cbc99fcdb83..7b37e2dc8f6 100644 --- a/cpp/src/lists/interleave_columns.cu +++ b/cpp/src/lists/interleave_columns.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -193,8 +193,7 @@ struct compute_string_sizes_and_interleave_lists_fn { auto const start_byte = str_offsets[start_str_idx]; auto const end_byte = str_offsets[end_str_idx]; if (start_byte < end_byte) { - auto const input_ptr = - str_col.child(strings_column_view::chars_column_index).template data() + start_byte; + auto const input_ptr = str_col.template head() + start_byte; auto const output_ptr = d_chars + d_offsets[write_idx]; thrust::copy(thrust::seq, input_ptr, input_ptr + end_byte - start_byte, output_ptr); } diff --git a/cpp/src/reshape/byte_cast.cu b/cpp/src/reshape/byte_cast.cu index 5f89b6d9b3b..6ed28e693fd 100644 --- a/cpp/src/reshape/byte_cast.cu +++ b/cpp/src/reshape/byte_cast.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -135,10 +135,9 @@ struct byte_list_conversion_fn(input, stream, mr)->release(); - auto chars_contents = col_content.children[strings_column_view::chars_column_index]->release(); - auto const num_chars = chars_contents.data->size(); + auto const num_chars = col_content.data->size(); auto uint8_col = std::make_unique( - output_type, num_chars, std::move(*(chars_contents.data)), rmm::device_buffer{}, 0); + output_type, num_chars, std::move(*(col_content.data)), rmm::device_buffer{}, 0); auto result = make_lists_column( input.size(), diff --git a/cpp/src/strings/attributes.cu b/cpp/src/strings/attributes.cu index de51356845c..00e49f9d97e 100644 --- a/cpp/src/strings/attributes.cu +++ b/cpp/src/strings/attributes.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -170,7 +170,8 @@ std::unique_ptr count_characters(strings_column_view const& input, rmm::mr::device_memory_resource* mr) { if ((input.size() == input.null_count()) || - ((input.chars_size() / (input.size() - input.null_count())) < AVG_CHAR_BYTES_THRESHOLD)) { + ((input.chars_size(stream) / (input.size() - input.null_count())) < + AVG_CHAR_BYTES_THRESHOLD)) { auto ufn = cuda::proclaim_return_type( [] __device__(string_view const& d_str) { return d_str.length(); }); return counts_fn(input, ufn, stream, mr); diff --git a/cpp/src/strings/case.cu b/cpp/src/strings/case.cu index 8f4c2ee574a..c2e8033b42d 100644 --- a/cpp/src/strings/case.cu +++ b/cpp/src/strings/case.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,6 +33,7 @@ #include #include +#include namespace cudf { namespace strings { @@ -211,7 +212,7 @@ std::unique_ptr convert_case(strings_column_view const& input, upper_lower_fn converter{ccfn, *d_strings}; // For smaller strings, use the regular string-parallel algorithm - if ((input.chars_size() / (input.size() - input.null_count())) < AVG_CHAR_BYTES_THRESHOLD) { + if ((input.chars_size(stream) / (input.size() - input.null_count())) < AVG_CHAR_BYTES_THRESHOLD) { auto [offsets, chars] = cudf::strings::detail::make_strings_children(converter, input.size(), stream, mr); return make_strings_column(input.size(), @@ -227,16 +228,16 @@ std::unique_ptr convert_case(strings_column_view const& input, // but results in a large performance gain when the input contains only single-byte characters. // The count_if is faster than any_of or all_of: https://github.com/NVIDIA/thrust/issues/1016 bool const multi_byte_chars = - thrust::count_if( - rmm::exec_policy(stream), input.chars_begin(), input.chars_end(), [] __device__(auto chr) { - return is_utf8_continuation_char(chr); - }) > 0; + thrust::count_if(rmm::exec_policy(stream), + input.chars_begin(stream), + input.chars_end(stream), + cuda::proclaim_return_type( + [] __device__(auto chr) { return is_utf8_continuation_char(chr); })) > 0; if (!multi_byte_chars) { // optimization for ASCII-only case: copy the input column and inplace replace each character - auto result = std::make_unique(input.parent(), stream, mr); - auto d_chars = - result->mutable_view().child(strings_column_view::chars_column_index).data(); - auto const chars_size = strings_column_view(result->view()).chars_size(); + auto result = std::make_unique(input.parent(), stream, mr); + auto d_chars = result->mutable_view().head(); + auto const chars_size = strings_column_view(result->view()).chars_size(stream); thrust::transform( rmm::exec_policy(stream), d_chars, d_chars + chars_size, d_chars, ascii_converter_fn{ccfn}); result->set_null_count(input.null_count()); diff --git a/cpp/src/strings/combine/join.cu b/cpp/src/strings/combine/join.cu index 9ab527feaf8..48304759f7a 100644 --- a/cpp/src/strings/combine/join.cu +++ b/cpp/src/strings/combine/join.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -145,15 +145,19 @@ std::unique_ptr join_strings(strings_column_view const& input, auto chars_column = [&] { // build the strings column and commandeer the chars column if ((input.size() == input.null_count()) || - ((input.chars_size() / (input.size() - input.null_count())) <= AVG_CHAR_BYTES_THRESHOLD)) { + ((input.chars_size(stream) / (input.size() - input.null_count())) <= + AVG_CHAR_BYTES_THRESHOLD)) { return std::get<1>( make_strings_children(join_fn{*d_strings, d_separator, d_narep}, input.size(), stream, mr)); } // dynamically feeds index pairs to build the output auto indices = cudf::detail::make_counting_transform_iterator( 0, join_gather_fn{*d_strings, d_separator, d_narep}); - auto joined_col = make_strings_column(indices, indices + (input.size() * 2), stream, mr); - return std::move(joined_col->release().children.back()); + auto joined_col = make_strings_column(indices, indices + (input.size() * 2), stream, mr); + auto chars_data = joined_col->release().data; + auto const chars_size = chars_data->size(); + return std::make_unique( + data_type{type_id::INT8}, chars_size, std::move(*chars_data), rmm::device_buffer{}, 0); }(); // build the offsets: single string output has offsets [0,chars-size] diff --git a/cpp/src/strings/copying/concatenate.cu b/cpp/src/strings/copying/concatenate.cu index 027466ef13c..2d9b06183e2 100644 --- a/cpp/src/strings/copying/concatenate.cu +++ b/cpp/src/strings/copying/concatenate.cu @@ -192,8 +192,7 @@ __global__ void fused_concatenate_string_chars_kernel(column_device_view const* auto const input_offsets_data = cudf::detail::input_offsetalator(offsets_child.head(), offsets_child.type()); - constexpr auto chars_child = strings_column_view::chars_column_index; - auto const* input_chars_data = input_view.child(chars_child).data(); + auto const* input_chars_data = input_view.head(); auto const first_char = input_offsets_data[input_view.offset()]; output_data[output_index] = input_chars_data[offset_index + first_char]; @@ -287,12 +286,11 @@ std::unique_ptr concatenate(host_span columns, continue; // empty column may not have children size_type column_offset = column->offset(); column_view offsets_child = column->child(strings_column_view::offsets_column_index); - column_view chars_child = column->child(strings_column_view::chars_column_index); auto const bytes_offset = get_offset_value(offsets_child, column_offset, stream); auto const bytes_end = get_offset_value(offsets_child, column_size + column_offset, stream); // copy the chars column data - auto d_chars = chars_child.data() + bytes_offset; + auto d_chars = column->head() + bytes_offset; auto const bytes = bytes_end - bytes_offset; CUDF_CUDA_TRY( diff --git a/cpp/src/strings/copying/copying.cu b/cpp/src/strings/copying/copying.cu index 2295a80ff5b..4f37d3864ac 100644 --- a/cpp/src/strings/copying/copying.cu +++ b/cpp/src/strings/copying/copying.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -65,12 +66,10 @@ std::unique_ptr copy_slice(strings_column_view const& strings, } // slice the chars child column - auto const data_size = - cudf::detail::get_value(offsets_column->view(), strings_count, stream); - auto chars_column = std::make_unique( - cudf::detail::slice(strings.chars(), {chars_offset, chars_offset + data_size}, stream).front(), - stream, - mr); + auto const data_size = static_cast( + cudf::detail::get_value(offsets_column->view(), strings_count, stream)); + auto chars_buffer = + rmm::device_buffer{strings.chars_begin(stream) + chars_offset, data_size, stream, mr}; // slice the null mask auto null_mask = cudf::detail::copy_bitmask( @@ -81,7 +80,7 @@ std::unique_ptr copy_slice(strings_column_view const& strings, return make_strings_column(strings_count, std::move(offsets_column), - std::move(chars_column), + std::move(chars_buffer), null_count, std::move(null_mask)); } diff --git a/cpp/src/strings/copying/shift.cu b/cpp/src/strings/copying/shift.cu index b54c433c23d..3b798a87d54 100644 --- a/cpp/src/strings/copying/shift.cu +++ b/cpp/src/strings/copying/shift.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,8 +69,7 @@ struct shift_chars_fn { auto const first_index = offset + d_column.child(strings_column_view::offsets_column_index) .element(d_column.offset() + d_column.size()); - return d_column.child(strings_column_view::chars_column_index) - .element(idx + first_index); + return d_column.head()[idx + first_index]; } else { auto const char_index = idx - last_index; return d_filler.data()[char_index % d_filler.size_bytes()]; @@ -79,10 +78,9 @@ struct shift_chars_fn { if (idx < offset) { return d_filler.data()[idx % d_filler.size_bytes()]; } else { - return d_column.child(strings_column_view::chars_column_index) - .element(idx - offset + - d_column.child(strings_column_view::offsets_column_index) - .element(d_column.offset())); + return d_column.head()[idx - offset + + d_column.child(strings_column_view::offsets_column_index) + .element(d_column.offset())]; } } } diff --git a/cpp/src/strings/replace/multi.cu b/cpp/src/strings/replace/multi.cu index 28736c2ca15..a0f9d1136f3 100644 --- a/cpp/src/strings/replace/multi.cu +++ b/cpp/src/strings/replace/multi.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -74,10 +74,7 @@ using target_pair = thrust::pair; * @brief Helper functions for performing character-parallel replace */ struct replace_multi_parallel_fn { - __device__ char const* get_base_ptr() const - { - return d_strings.child(strings_column_view::chars_column_index).data(); - } + __device__ char const* get_base_ptr() const { return d_strings.head(); } __device__ size_type const* get_offsets_ptr() const { @@ -378,6 +375,11 @@ std::unique_ptr replace_character_parallel(strings_column_view const& in // use this utility to gather the string parts into a contiguous chars column auto chars = make_strings_column(indices.begin(), indices.end(), stream, mr); + // TODO ideally we can pass this chars_data as rmm buffer to make_strings_column + auto chars_data = chars->release().data; + auto const chars_size = chars_data->size(); + auto chars_col = std::make_unique( + data_type{type_id::INT8}, chars_size, std::move(*chars_data), rmm::device_buffer{}, 0); // create offsets from the sizes offsets = @@ -386,7 +388,7 @@ std::unique_ptr replace_character_parallel(strings_column_view const& in // build the strings columns from the chars and offsets return make_strings_column(strings_count, std::move(offsets), - std::move(chars->release().children.back()), + std::move(chars_col), input.null_count(), cudf::detail::copy_bitmask(input.parent(), stream, mr)); } @@ -483,7 +485,8 @@ std::unique_ptr replace(strings_column_view const& input, CUDF_EXPECTS(repls.size() == targets.size(), "Sizes for targets and repls must match"); return (input.size() == input.null_count() || - ((input.chars_size() / (input.size() - input.null_count())) < AVG_CHAR_BYTES_THRESHOLD)) + ((input.chars_size(stream) / (input.size() - input.null_count())) < + AVG_CHAR_BYTES_THRESHOLD)) ? replace_string_parallel(input, targets, repls, stream, mr) : replace_character_parallel(input, targets, repls, stream, mr); } diff --git a/cpp/src/strings/replace/replace.cu b/cpp/src/strings/replace/replace.cu index aa955d3086e..8c4bd4490b9 100644 --- a/cpp/src/strings/replace/replace.cu +++ b/cpp/src/strings/replace/replace.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -415,7 +415,7 @@ std::unique_ptr replace_char_parallel(strings_column_view const& strings auto const strings_count = strings.size(); auto const offset_count = strings_count + 1; auto const d_offsets = strings.offsets_begin(); - auto const d_in_chars = strings.chars_begin(); + auto const d_in_chars = strings.chars_begin(stream); auto const chars_bytes = chars_end - chars_start; auto const target_size = d_target.size_bytes(); @@ -574,7 +574,7 @@ std::unique_ptr replace(strings_column_view con ? 0 : cudf::detail::get_value(strings.offsets(), strings.offset(), stream); size_type const chars_end = (offset_count == strings.offsets().size()) - ? strings.chars_size() + ? strings.chars_size(stream) : cudf::detail::get_value( strings.offsets(), strings.offset() + strings_count, stream); size_type const chars_bytes = chars_end - chars_start; @@ -612,7 +612,7 @@ std::unique_ptr replace( : cudf::detail::get_value( strings.offsets(), strings.offset(), stream); size_type chars_end = (offset_count == strings.offsets().size()) - ? strings.chars_size() + ? strings.chars_size(stream) : cudf::detail::get_value( strings.offsets(), strings.offset() + strings_count, stream); return replace_char_parallel( diff --git a/cpp/src/strings/reverse.cu b/cpp/src/strings/reverse.cu index 2855bdbb827..aecb029f25f 100644 --- a/cpp/src/strings/reverse.cu +++ b/cpp/src/strings/reverse.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -65,7 +65,7 @@ std::unique_ptr reverse(strings_column_view const& input, auto result = std::make_unique(input.parent(), stream, mr); auto const d_offsets = result->view().child(strings_column_view::offsets_column_index).data(); - auto d_chars = result->mutable_view().child(strings_column_view::chars_column_index).data(); + auto d_chars = result->mutable_view().head(); auto const d_column = column_device_view::create(input.parent(), stream); thrust::for_each_n(rmm::exec_policy(stream), diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu index d35f512e0f7..4ba1359c469 100644 --- a/cpp/src/strings/search/find.cu +++ b/cpp/src/strings/search/find.cu @@ -186,7 +186,7 @@ void find_utility(strings_column_view const& input, { auto d_strings = column_device_view::create(input.parent(), stream); auto d_results = output.mutable_view().data(); - if ((input.chars_size() / (input.size() - input.null_count())) > AVG_CHAR_BYTES_THRESHOLD) { + if ((input.chars_size(stream) / (input.size() - input.null_count())) > AVG_CHAR_BYTES_THRESHOLD) { // warp-per-string runs faster for longer strings (but not shorter ones) constexpr int block_size = 256; cudf::detail::grid_1d grid{input.size() * cudf::detail::warp_size, block_size}; @@ -538,7 +538,7 @@ std::unique_ptr contains(strings_column_view const& input, { // use warp parallel when the average string width is greater than the threshold if ((input.null_count() < input.size()) && - ((input.chars_size() / input.size()) > AVG_CHAR_BYTES_THRESHOLD)) { + ((input.chars_size(stream) / input.size()) > AVG_CHAR_BYTES_THRESHOLD)) { return contains_warp_parallel(input, target, stream, mr); } diff --git a/cpp/src/strings/split/split.cuh b/cpp/src/strings/split/split.cuh index dc0b04af388..c5fb44fc3dd 100644 --- a/cpp/src/strings/split/split.cuh +++ b/cpp/src/strings/split/split.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,10 +49,7 @@ namespace cudf::strings::detail { */ template struct base_split_tokenizer { - __device__ char const* get_base_ptr() const - { - return d_strings.child(strings_column_view::chars_column_index).data(); - } + __device__ char const* get_base_ptr() const { return d_strings.head(); } __device__ string_view const get_string(size_type idx) const { diff --git a/cpp/src/strings/strings_column_factories.cu b/cpp/src/strings/strings_column_factories.cu index 0b55e18b00a..5ba4d8d3132 100644 --- a/cpp/src/strings/strings_column_factories.cu +++ b/cpp/src/strings/strings_column_factories.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -131,10 +131,33 @@ std::unique_ptr make_strings_column(size_type num_strings, std::vector> children; children.emplace_back(std::move(offsets_column)); - children.emplace_back(std::move(chars_column)); return std::make_unique(data_type{type_id::STRING}, num_strings, - rmm::device_buffer{}, + std::move(*(chars_column->release().data.release())), + std::move(null_mask), + null_count, + std::move(children)); +} + +std::unique_ptr make_strings_column(size_type num_strings, + std::unique_ptr offsets_column, + rmm::device_buffer&& chars_buffer, + size_type null_count, + rmm::device_buffer&& null_mask) +{ + CUDF_FUNC_RANGE(); + + if (null_count > 0) { CUDF_EXPECTS(null_mask.size() > 0, "Column with nulls must be nullable."); } + CUDF_EXPECTS(num_strings == offsets_column->size() - 1, + "Invalid offsets column size for strings column."); + CUDF_EXPECTS(offsets_column->null_count() == 0, "Offsets column should not contain nulls"); + + std::vector> children; + children.emplace_back(std::move(offsets_column)); + + return std::make_unique(data_type{type_id::STRING}, + num_strings, + std::move(chars_buffer), std::move(null_mask), null_count, std::move(children)); @@ -151,7 +174,6 @@ std::unique_ptr make_strings_column(size_type num_strings, if (num_strings == 0) { return make_empty_column(type_id::STRING); } auto const offsets_size = static_cast(offsets.size()); - auto const chars_size = static_cast(chars.size()); if (null_count > 0) CUDF_EXPECTS(null_mask.size() > 0, "Column with nulls must be nullable."); @@ -164,21 +186,13 @@ std::unique_ptr make_strings_column(size_type num_strings, rmm::device_buffer(), 0); - auto chars_column = std::make_unique( // - data_type{type_id::INT8}, - chars_size, - chars.release(), - rmm::device_buffer(), - 0); - auto children = std::vector>(); children.emplace_back(std::move(offsets_column)); - children.emplace_back(std::move(chars_column)); return std::make_unique(data_type{type_id::STRING}, num_strings, - rmm::device_buffer{}, + chars.release(), std::move(null_mask), null_count, std::move(children)); diff --git a/cpp/src/strings/strings_column_view.cpp b/cpp/src/strings/strings_column_view.cpp index 4b206666d4b..27a8c6fb17f 100644 --- a/cpp/src/strings/strings_column_view.cpp +++ b/cpp/src/strings/strings_column_view.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,9 +14,12 @@ * limitations under the License. */ +#include #include #include +#include + namespace cudf { // strings_column_view::strings_column_view(column_view strings_column) : column_view(strings_column) @@ -42,26 +45,28 @@ strings_column_view::offset_iterator strings_column_view::offsets_end() const return offsets_begin() + size() + 1; } -column_view strings_column_view::chars() const +column_view strings_column_view::chars(rmm::cuda_stream_view stream) const { CUDF_EXPECTS(num_children() > 0, "strings column has no children"); - return child(chars_column_index); + return column_view( + data_type{type_id::INT8}, chars_size(stream), chars_begin(stream), nullptr, 0, 0); } -size_type strings_column_view::chars_size() const noexcept +size_type strings_column_view::chars_size(rmm::cuda_stream_view stream) const noexcept { if (size() == 0) return 0; - return chars().size(); + return detail::get_value(offsets(), offsets().size() - 1, stream); } -strings_column_view::chars_iterator strings_column_view::chars_begin() const +strings_column_view::chars_iterator strings_column_view::chars_begin(rmm::cuda_stream_view) const { - return chars().begin(); + return head(); } -strings_column_view::chars_iterator strings_column_view::chars_end() const +strings_column_view::chars_iterator strings_column_view::chars_end( + rmm::cuda_stream_view stream) const { - return chars_begin() + chars_size(); + return chars_begin(stream) + chars_size(stream); } } // namespace cudf diff --git a/cpp/src/strings/wrap.cu b/cpp/src/strings/wrap.cu index aa87a663964..19f1ac55bb0 100644 --- a/cpp/src/strings/wrap.cu +++ b/cpp/src/strings/wrap.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -111,8 +112,11 @@ std::unique_ptr wrap(strings_column_view const& strings, auto offsets_column = std::make_unique(strings.offsets(), stream, mr); // makes a copy auto d_new_offsets = offsets_column->view().template data(); - auto chars_column = std::make_unique(strings.chars(), stream, mr); // makes a copy - auto d_chars = chars_column->mutable_view().data(); + auto chars_buffer = rmm::device_buffer{strings.chars_begin(stream), + static_cast(strings.chars_size(stream)), + stream, + mr}; // makes a copy + auto d_chars = static_cast(chars_buffer.data()); device_execute_functor d_execute_fctr{d_column, d_new_offsets, d_chars, width}; @@ -123,7 +127,7 @@ std::unique_ptr wrap(strings_column_view const& strings, return make_strings_column(strings_count, std::move(offsets_column), - std::move(chars_column), + std::move(chars_buffer), null_count, std::move(null_mask)); } diff --git a/cpp/src/text/bpe/byte_pair_encoding.cu b/cpp/src/text/bpe/byte_pair_encoding.cu index 2d53faf548e..a697df913d3 100644 --- a/cpp/src/text/bpe/byte_pair_encoding.cu +++ b/cpp/src/text/bpe/byte_pair_encoding.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -342,7 +342,7 @@ std::unique_ptr byte_pair_encoding(cudf::strings_column_view const rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - if (input.is_empty() || input.chars_size() == 0) { + if (input.is_empty() || input.chars_size(stream) == 0) { return cudf::make_empty_column(cudf::type_id::STRING); } @@ -356,11 +356,11 @@ std::unique_ptr byte_pair_encoding(cudf::strings_column_view const : cudf::detail::get_value( input.offsets(), input.offset(), stream); auto const last_offset = (input.offset() == 0 && input.size() == input.offsets().size() - 1) - ? input.chars().size() + ? input.chars_size(stream) : cudf::detail::get_value( input.offsets(), input.size() + input.offset(), stream); auto const chars_size = last_offset - first_offset; - auto const d_input_chars = input.chars().data() + first_offset; + auto const d_input_chars = input.chars_begin(stream) + first_offset; auto const offset_data_type = cudf::data_type{cudf::type_to_id()}; auto offsets = cudf::make_numeric_column( @@ -406,7 +406,7 @@ std::unique_ptr byte_pair_encoding(cudf::strings_column_view const cudf::column_view(cudf::device_span(tmp_offsets)); auto const tmp_size = offsets_total - 1; auto const tmp_input = cudf::column_view( - input.parent().type(), tmp_size, nullptr, nullptr, 0, 0, {col_offsets, input.chars()}); + input.parent().type(), tmp_size, input.chars_begin(stream), nullptr, 0, 0, {col_offsets}); auto const d_tmp_strings = cudf::column_device_view::create(tmp_input, stream); // launch the byte-pair-encoding kernel on the temp column diff --git a/cpp/src/text/generate_ngrams.cu b/cpp/src/text/generate_ngrams.cu index 31e2405ce88..1d3e98a25ad 100644 --- a/cpp/src/text/generate_ngrams.cu +++ b/cpp/src/text/generate_ngrams.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -128,11 +128,11 @@ std::unique_ptr generate_ngrams(cudf::strings_column_view const& s // create a temporary column view from the non-empty offsets and chars column views cudf::column_view strings_view(cudf::data_type{cudf::type_id::STRING}, strings_count, - nullptr, + strings.chars_begin(stream), nullptr, 0, 0, - {non_empty_offsets_column->view(), strings.chars()}); + {non_empty_offsets_column->view()}); strings_column = cudf::column_device_view::create(strings_view, stream); d_strings = *strings_column; diff --git a/cpp/src/text/normalize.cu b/cpp/src/text/normalize.cu index 0fc1d221b15..5a0977d410f 100644 --- a/cpp/src/text/normalize.cu +++ b/cpp/src/text/normalize.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -210,7 +210,7 @@ std::unique_ptr normalize_characters(cudf::strings_column_view con auto const offsets = strings.offsets(); auto const d_offsets = offsets.data() + strings.offset(); auto const offset = cudf::detail::get_value(offsets, strings.offset(), stream); - auto const d_chars = strings.chars().data() + offset; + auto const d_chars = strings.chars_begin(stream) + offset; return normalizer.normalize(d_chars, d_offsets, strings.size(), stream); }(); diff --git a/cpp/src/text/subword/subword_tokenize.cu b/cpp/src/text/subword/subword_tokenize.cu index 1a3084a257f..a35d69e2145 100644 --- a/cpp/src/text/subword/subword_tokenize.cu +++ b/cpp/src/text/subword/subword_tokenize.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -186,7 +186,7 @@ tokenizer_result subword_tokenize(cudf::strings_column_view const& strings, auto const offsets = strings.offsets(); auto const d_offsets = offsets.data() + strings.offset(); auto const offset = cudf::detail::get_value(offsets, strings.offset(), stream); - auto const d_chars = strings.chars().data() + offset; + auto const d_chars = strings.chars_begin(stream) + offset; // Create tokenizer wordpiece_tokenizer tokenizer( diff --git a/cpp/src/text/tokenize.cu b/cpp/src/text/tokenize.cu index 87f6a61a533..c43b9dda56c 100644 --- a/cpp/src/text/tokenize.cu +++ b/cpp/src/text/tokenize.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -182,7 +182,8 @@ std::unique_ptr character_tokenize(cudf::strings_column_view const auto chars_bytes = cudf::detail::get_value( offsets, strings_column.offset() + strings_count, stream) - offset; - auto d_chars = strings_column.chars().data(); // unsigned is necessary for checking bits + auto d_chars = + strings_column.parent().data(); // unsigned is necessary for checking bits d_chars += offset; // To minimize memory, count the number of characters so we can diff --git a/cpp/src/text/vocabulary_tokenize.cu b/cpp/src/text/vocabulary_tokenize.cu index 511f1995374..91f4c304590 100644 --- a/cpp/src/text/vocabulary_tokenize.cu +++ b/cpp/src/text/vocabulary_tokenize.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -240,10 +240,8 @@ __global__ void token_counts_fn(cudf::column_device_view const d_strings, auto const offsets = d_strings.child(cudf::strings_column_view::offsets_column_index).data(); - auto const offset = offsets[str_idx + d_strings.offset()] - offsets[d_strings.offset()]; - auto const chars_begin = - d_strings.child(cudf::strings_column_view::chars_column_index).data() + - offsets[d_strings.offset()]; + auto const offset = offsets[str_idx + d_strings.offset()] - offsets[d_strings.offset()]; + auto const chars_begin = d_strings.data() + offsets[d_strings.offset()]; auto const begin = d_str.data(); auto const end = begin + d_str.size_bytes(); @@ -372,7 +370,7 @@ std::unique_ptr tokenize_with_vocabulary(cudf::strings_column_view auto map_ref = vocabulary._impl->get_map_ref(); auto const zero_itr = thrust::make_counting_iterator(0); - if ((input.chars_size() / (input.size() - input.null_count())) < AVG_CHAR_BYTES_THRESHOLD) { + if ((input.chars_size(stream) / (input.size() - input.null_count())) < AVG_CHAR_BYTES_THRESHOLD) { auto const sizes_itr = cudf::detail::make_counting_transform_iterator(0, strings_tokenizer{*d_strings, d_delimiter}); auto [token_offsets, total_count] = @@ -401,11 +399,11 @@ std::unique_ptr tokenize_with_vocabulary(cudf::strings_column_view : cudf::detail::get_value( input.offsets(), input.offset(), stream); auto const last_offset = (input.offset() == 0 && input.size() == input.offsets().size() - 1) - ? input.chars().size() + ? input.chars_size(stream) : cudf::detail::get_value( input.offsets(), input.size() + input.offset(), stream); auto const chars_size = last_offset - first_offset; - auto const d_input_chars = input.chars().data() + first_offset; + auto const d_input_chars = input.chars_begin(stream) + first_offset; rmm::device_uvector d_token_counts(input.size(), stream); rmm::device_uvector d_marks(chars_size, stream); @@ -436,9 +434,8 @@ std::unique_ptr tokenize_with_vocabulary(cudf::strings_column_view auto tmp_offsets = std::make_unique(std::move(d_tmp_offsets), rmm::device_buffer{}, 0); - auto tmp_chars = cudf::column_view(input.chars().type(), chars_size, d_input_chars, nullptr, 0); auto const tmp_input = cudf::column_view( - input.parent().type(), total_count, nullptr, nullptr, 0, 0, {tmp_offsets->view(), tmp_chars}); + input.parent().type(), total_count, d_input_chars, nullptr, 0, 0, {tmp_offsets->view()}); auto const d_tmp_strings = cudf::column_device_view::create(tmp_input, stream); diff --git a/cpp/src/transform/row_conversion.cu b/cpp/src/transform/row_conversion.cu index a1c5827e5da..b797e495480 100644 --- a/cpp/src/transform/row_conversion.cu +++ b/cpp/src/transform/row_conversion.cu @@ -1994,11 +1994,9 @@ std::vector> convert_to_rows( CUDF_EXPECTS(!variable_width_table.is_empty(), "No variable-width columns when expected!"); CUDF_EXPECTS(variable_width_offsets.has_value(), "No variable width offset data!"); - auto const variable_data_begin = - thrust::make_transform_iterator(variable_width_table.begin(), [](auto const& c) { - strings_column_view const scv{c}; - return is_compound(c.type()) ? scv.chars().template data() : nullptr; - }); + auto const variable_data_begin = thrust::make_transform_iterator( + variable_width_table.begin(), + [](auto const& c) { return is_compound(c.type()) ? c.template data() : nullptr; }); std::vector variable_width_input_data( variable_data_begin, variable_data_begin + variable_width_table.num_columns()); diff --git a/cpp/tests/io/json_type_cast_test.cu b/cpp/tests/io/json_type_cast_test.cu index 036b9170250..6923b7be42d 100644 --- a/cpp/tests/io/json_type_cast_test.cu +++ b/cpp/tests/io/json_type_cast_test.cu @@ -95,7 +95,7 @@ TEST_F(JSONTypeCastTest, String) std::get<0>(cudf::test::detail::make_null_mask(null_mask_it, null_mask_it + column.size())); auto str_col = cudf::io::json::detail::parse_data( - column.chars().data(), + column.chars_begin(stream), thrust::make_zip_iterator(thrust::make_tuple(column.offsets_begin(), svs_length.begin())), column.size(), type, @@ -128,7 +128,7 @@ TEST_F(JSONTypeCastTest, Int) std::get<0>(cudf::test::detail::make_null_mask(null_mask_it, null_mask_it + column.size())); auto col = cudf::io::json::detail::parse_data( - column.chars().data(), + column.chars_begin(stream), thrust::make_zip_iterator(thrust::make_tuple(column.offsets_begin(), svs_length.begin())), column.size(), type, @@ -168,7 +168,7 @@ TEST_F(JSONTypeCastTest, StringEscapes) std::get<0>(cudf::test::detail::make_null_mask(null_mask_it, null_mask_it + column.size())); auto col = cudf::io::json::detail::parse_data( - column.chars().data(), + column.chars_begin(stream), thrust::make_zip_iterator(thrust::make_tuple(column.offsets_begin(), svs_length.begin())), column.size(), type, @@ -237,7 +237,7 @@ TEST_F(JSONTypeCastTest, ErrorNulls) std::get<0>(cudf::test::detail::make_null_mask(null_mask_it, null_mask_it + column.size())); auto str_col = cudf::io::json::detail::parse_data( - column.chars().data(), + column.chars_begin(stream), thrust::make_zip_iterator(thrust::make_tuple(column.offsets_begin(), svs_length.begin())), column.size(), type, diff --git a/cpp/tests/strings/array_tests.cpp b/cpp/tests/strings/array_tests.cpp index d1e0dfb1ff1..c6cc8e078bb 100644 --- a/cpp/tests/strings/array_tests.cpp +++ b/cpp/tests/strings/array_tests.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -154,9 +155,14 @@ TEST_F(StringsColumnTest, GatherTooBig) std::vector h_chars(3000000); cudf::test::fixed_width_column_wrapper chars(h_chars.begin(), h_chars.end()); cudf::test::fixed_width_column_wrapper offsets({0, 3000000}); - auto input = cudf::column_view( - cudf::data_type{cudf::type_id::STRING}, 1, nullptr, nullptr, 0, 0, {offsets, chars}); - auto map = thrust::constant_iterator(0); + auto input = cudf::column_view(cudf::data_type{cudf::type_id::STRING}, + 1, + cudf::column_view(chars).begin(), + nullptr, + 0, + 0, + {offsets}); + auto map = thrust::constant_iterator(0); cudf::test::fixed_width_column_wrapper gather_map(map, map + 1000); EXPECT_THROW(cudf::gather(cudf::table_view{{input}}, gather_map), std::overflow_error); } @@ -220,7 +226,6 @@ TEST_F(StringsColumnTest, OffsetsBeginEnd) scv = cudf::strings_column_view(cudf::slice(input, {1, 5}).front()); EXPECT_EQ(std::distance(scv.offsets_begin(), scv.offsets_end()), static_cast(scv.size() + 1)); - EXPECT_EQ(std::distance(scv.chars_begin(), scv.chars_end()), 16L); } CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/strings/factories_test.cu b/cpp/tests/strings/factories_test.cu index 1066738df72..5381ad63bc3 100644 --- a/cpp/tests/strings/factories_test.cu +++ b/cpp/tests/strings/factories_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -87,16 +87,18 @@ TEST_F(StringsFactoriesTest, CreateColumnFromPair) EXPECT_TRUE(column->nullable()); EXPECT_TRUE(column->has_nulls()); } - EXPECT_EQ(2, column->num_children()); + EXPECT_EQ(1, column->num_children()); + EXPECT_NE(nullptr, column->view().head()); cudf::strings_column_view strings_view(column->view()); EXPECT_EQ(strings_view.size(), count); EXPECT_EQ(strings_view.offsets().size(), count + 1); - EXPECT_EQ(strings_view.chars().size(), memsize); + EXPECT_EQ(strings_view.chars_size(cudf::get_default_stream()), memsize); // check string data auto h_chars_data = cudf::detail::make_std_vector_sync( - cudf::device_span(strings_view.chars().data(), strings_view.chars().size()), + cudf::device_span(strings_view.chars_begin(cudf::get_default_stream()), + strings_view.chars_size(cudf::get_default_stream())), cudf::get_default_stream()); auto h_offsets_data = cudf::detail::make_std_vector_sync( cudf::device_span( @@ -159,16 +161,18 @@ TEST_F(StringsFactoriesTest, CreateColumnFromOffsets) count, std::move(d_offsets), std::move(d_buffer), null_count, d_nulls.release()); EXPECT_EQ(column->type(), cudf::data_type{cudf::type_id::STRING}); EXPECT_EQ(column->null_count(), null_count); - EXPECT_EQ(2, column->num_children()); + EXPECT_EQ(1, column->num_children()); + EXPECT_NE(nullptr, column->view().head()); cudf::strings_column_view strings_view(column->view()); EXPECT_EQ(strings_view.size(), count); EXPECT_EQ(strings_view.offsets().size(), count + 1); - EXPECT_EQ(strings_view.chars().size(), memsize); + EXPECT_EQ(strings_view.chars_size(cudf::get_default_stream()), memsize); // check string data auto h_chars_data = cudf::detail::make_std_vector_sync( - cudf::device_span(strings_view.chars().data(), strings_view.chars().size()), + cudf::device_span(strings_view.chars_begin(cudf::get_default_stream()), + strings_view.chars_size(cudf::get_default_stream())), cudf::get_default_stream()); auto h_offsets_data = cudf::detail::make_std_vector_sync( cudf::device_span( diff --git a/cpp/tests/utilities_tests/column_wrapper_tests.cpp b/cpp/tests/utilities_tests/column_wrapper_tests.cpp index da17e33e11a..479c6687e75 100644 --- a/cpp/tests/utilities_tests/column_wrapper_tests.cpp +++ b/cpp/tests/utilities_tests/column_wrapper_tests.cpp @@ -255,7 +255,7 @@ TYPED_TEST(StringsColumnWrapperTest, NullablePairListConstructorAllNull) EXPECT_EQ(view.size(), count); EXPECT_EQ(view.offsets().size(), count + 1); // all null entries results in no data allocated to chars - EXPECT_EQ(nullptr, view.chars().head()); + EXPECT_EQ(nullptr, view.parent().head()); EXPECT_NE(nullptr, view.offsets().head()); EXPECT_TRUE(view.has_nulls()); EXPECT_EQ(view.null_count(), 5); diff --git a/java/src/main/java/ai/rapids/cudf/JCudfSerialization.java b/java/src/main/java/ai/rapids/cudf/JCudfSerialization.java index 7deb5bae541..666a8864003 100644 --- a/java/src/main/java/ai/rapids/cudf/JCudfSerialization.java +++ b/java/src/main/java/ai/rapids/cudf/JCudfSerialization.java @@ -1742,7 +1742,7 @@ private static long buildColumnView(SerializedColumnHeader column, } DType dtype = column.getType(); long bufferAddress = combinedBuffer.getAddress(); - long dataAddress = dtype.isNestedType() ? 0 : bufferAddress + offsetsInfo.data; + long dataAddress = offsetsInfo.dataLen == 0 ? 0 : bufferAddress + offsetsInfo.data; long validityAddress = needsValidityBuffer(column.getNullCount()) ? bufferAddress + offsetsInfo.validity : 0; long offsetsAddress = dtype.hasOffsets() ? bufferAddress + offsetsInfo.offsets : 0; diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 675996df71c..47dc802cd49 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -111,6 +111,9 @@ std::size_t calc_device_memory_size(cudf::column_view const &view, bool const pa auto dtype = view.type(); if (cudf::is_fixed_width(dtype)) { total += pad_size(cudf::size_of(dtype) * view.size(), pad_for_cpu); + } else if (dtype.id() == cudf::type_id::STRING) { + auto scv = cudf::strings_column_view(view); + total += pad_size(scv.chars_size(cudf::get_default_stream()), pad_for_cpu); } return std::accumulate(view.child_begin(), view.child_end(), total, @@ -1974,18 +1977,11 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_makeCudfColumnView( new cudf::column_view(cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0)); } else { JNI_NULL_CHECK(env, j_offset, "offset is null", 0); - // This must be kept in sync with how string columns are created - // offsets are always the first child - // data is the second child - cudf::size_type *offsets = reinterpret_cast(j_offset); cudf::column_view offsets_column(cudf::data_type{cudf::type_id::INT32}, size + 1, offsets, nullptr, 0); - cudf::column_view data_column(cudf::data_type{cudf::type_id::INT8}, j_data_size, data, - nullptr, 0); return ptr_as_jlong(new cudf::column_view(cudf::data_type{cudf::type_id::STRING}, size, - nullptr, valid, j_null_count, 0, - {offsets_column, data_column})); + data, valid, j_null_count, 0, {offsets_column})); } } else if (n_type == cudf::type_id::LIST) { JNI_NULL_CHECK(env, j_children, "children of a list are null", 0); @@ -2082,8 +2078,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_getNativeDataAddress(JNIE if (column->type().id() == cudf::type_id::STRING) { if (column->size() > 0) { cudf::strings_column_view view = cudf::strings_column_view(*column); - cudf::column_view data_view = view.chars(); - result = reinterpret_cast(data_view.data()); + result = reinterpret_cast(view.chars_begin(cudf::get_default_stream())); } } else if (column->type().id() != cudf::type_id::LIST && column->type().id() != cudf::type_id::STRUCT) { @@ -2104,8 +2099,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_getNativeDataLength(JNIEn if (column->type().id() == cudf::type_id::STRING) { if (column->size() > 0) { cudf::strings_column_view view = cudf::strings_column_view(*column); - cudf::column_view data_view = view.chars(); - result = data_view.size(); + result = view.chars_size(cudf::get_default_stream()); } } else if (column->type().id() != cudf::type_id::LIST && column->type().id() != cudf::type_id::STRUCT) { diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index d7d0279174d..295574858da 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -905,12 +905,12 @@ cudf::column_view remove_validity_from_col(cudf::column_view column_view) { children.push_back(remove_validity_from_col(*it)); } if (!column_view.nullable() || column_view.null_count() != 0) { - return cudf::column_view(column_view.type(), column_view.size(), nullptr, + return cudf::column_view(column_view.type(), column_view.size(), column_view.head(), column_view.null_mask(), column_view.null_count(), column_view.offset(), children); } else { - return cudf::column_view(column_view.type(), column_view.size(), nullptr, nullptr, 0, - column_view.offset(), children); + return cudf::column_view(column_view.type(), column_view.size(), column_view.head(), nullptr, + 0, column_view.offset(), children); } } } diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx index 0edf9f8aa95..acd0ba519dd 100644 --- a/python/cudf/cudf/_lib/column.pyx +++ b/python/cudf/cudf/_lib/column.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. from typing import Literal @@ -39,6 +39,7 @@ from cudf._lib.types cimport ( from cudf._lib.null_mask import bitmask_allocation_size_bytes from cudf._lib.types import dtype_from_pylibcudf_column +cimport cudf._lib.cpp.copying as cpp_copying cimport cudf._lib.cpp.types as libcudf_types cimport cudf._lib.cpp.unary as libcudf_unary from cudf._lib.cpp.column.column cimport column, column_contents @@ -52,6 +53,19 @@ from cudf._lib.cpp.scalar.scalar cimport scalar from cudf._lib.scalar cimport DeviceScalar +cdef get_element(column_view col_view, size_type index): + + cdef unique_ptr[scalar] c_output + with nogil: + c_output = move( + cpp_copying.get_element(col_view, index) + ) + + return DeviceScalar.from_unique_ptr( + move(c_output), dtype=dtype_from_column_view(col_view) + ) + + cdef class Column: """ A Column stores columnar data in device memory. @@ -652,11 +666,29 @@ cdef class Column: mask_owner = mask_owner.base_mask base_size = owner.base_size base_nbytes = base_size * dtype_itemsize + # special case for string column + is_string_column = (cv.type().id() == libcudf_types.type_id.STRING) + if is_string_column: + # get the size from offset child column (device to host copy) + offsets_column_index = 0 + offset_child_column = cv.child(offsets_column_index) + if offset_child_column.size() == 0: + base_nbytes = 0 + else: + chars_size = get_element( + offset_child_column, offset_child_column.size()-1).value + base_nbytes = chars_size + if data_ptr: if data_owner is None: + buffer_size = ( + base_nbytes + if is_string_column + else ((size + offset) * dtype_itemsize) + ) data = as_buffer( rmm.DeviceBuffer(ptr=data_ptr, - size=(size+offset) * dtype_itemsize) + size=buffer_size) ) elif ( column_owner and diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index c13ec33c51c..705862c502a 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1468,17 +1468,9 @@ def column_empty( ), ) elif dtype.kind in "OU" and not isinstance(dtype, DecimalDtype): - data = None + data = as_buffer(rmm.DeviceBuffer(size=0)) children = ( full(row_count + 1, 0, dtype=libcudf.types.size_type_dtype), - build_column( - data=as_buffer( - rmm.DeviceBuffer( - size=row_count * cudf.dtype("int8").itemsize - ) - ), - dtype="int8", - ), ) else: data = as_buffer(rmm.DeviceBuffer(size=row_count * dtype.itemsize)) @@ -1583,6 +1575,7 @@ def build_column( ) elif dtype.type in (np.object_, np.str_): return cudf.core.column.StringColumn( + data=data, mask=mask, size=size, offset=offset, diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 84333fc205a..c47088caebc 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5465,6 +5465,7 @@ class StringColumn(column.ColumnBase): def __init__( self, + data: Optional[Buffer] = None, mask: Optional[Buffer] = None, size: Optional[int] = None, # TODO: make non-optional offset: int = 0, @@ -5491,11 +5492,10 @@ def __init__( # all nulls-column: offsets = column.full(size + 1, 0, dtype=size_type_dtype) - chars = cudf.core.column.column_empty(0, dtype="int8") - children = (offsets, chars) + children = (offsets,) super().__init__( - data=None, + data=data, size=size, dtype=dtype, mask=mask, @@ -5516,7 +5516,7 @@ def copy(self, deep: bool = True): def start_offset(self) -> int: if self._start_offset is None: if ( - len(self.base_children) == 2 + len(self.base_children) == 1 and self.offset < self.base_children[0].size ): self._start_offset = int( @@ -5531,7 +5531,7 @@ def start_offset(self) -> int: def end_offset(self) -> int: if self._end_offset is None: if ( - len(self.base_children) == 2 + len(self.base_children) == 1 and (self.offset + self.size) < self.base_children[0].size ): self._end_offset = int( @@ -5547,16 +5547,14 @@ def end_offset(self) -> int: @cached_property def memory_usage(self) -> int: n = 0 - if len(self.base_children) == 2: + if self.data is not None: + n += self.data.size + if len(self.base_children) == 1: child0_size = (self.size + 1) * self.base_children[ 0 ].dtype.itemsize - child1_size = ( - self.end_offset - self.start_offset - ) * self.base_children[1].dtype.itemsize - - n += child0_size + child1_size + n += child0_size if self.nullable: n += cudf._lib.null_mask.bitmask_allocation_size_bytes(self.size) return n @@ -5568,6 +5566,24 @@ def base_size(self) -> int: else: return self.base_children[0].size - 1 + # override for string column + @property + def data(self): + if self.base_data is None: + return None + if self._data is None: + if ( + self.offset == 0 + and len(self.base_children) > 0 + and self.size == self.base_children[0].size - 1 + ): + self._data = self.base_data + else: + self._data = self.base_data[ + self.start_offset : self.end_offset + ] + return self._data + def data_array_view( self, *, mode="write" ) -> cuda.devicearray.DeviceNDArray: @@ -5614,14 +5630,6 @@ def sum( else: return result_col - def set_base_data(self, value): - if value is not None: - raise RuntimeError( - "StringColumns do not use data attribute of Column, use " - "`set_base_children` instead" - ) - super().set_base_data(value) - def __contains__(self, item: ScalarLike) -> bool: if is_scalar(item): return True in libcudf.search.contains( @@ -5938,15 +5946,12 @@ def view(self, dtype) -> "cudf.core.column.ColumnBase": str_end_byte_offset = self.base_children[0].element_indexing( self.offset + self.size ) - char_dtype_size = self.base_children[1].dtype.itemsize - n_bytes_to_view = ( - str_end_byte_offset - str_byte_offset - ) * char_dtype_size + n_bytes_to_view = str_end_byte_offset - str_byte_offset to_view = column.build_column( - self.base_children[1].data, - dtype=self.base_children[1].dtype, + self.base_data, + dtype=cudf.api.types.dtype("int8"), offset=str_byte_offset, size=n_bytes_to_view, ) diff --git a/python/cudf/cudf/core/df_protocol.py b/python/cudf/cudf/core/df_protocol.py index 6e1c5f6fd00..c97d6dcdd2d 100644 --- a/python/cudf/cudf/core/df_protocol.py +++ b/python/cudf/cudf/core/df_protocol.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. import enum from collections import abc @@ -482,7 +482,9 @@ def _get_data_buffer( dtype = self._dtype_from_cudfdtype(col_data.dtype) elif self.dtype[0] == _DtypeKind.STRING: - col_data = self._col.children[1] + col_data = build_column( + data=self._col.data, dtype=np.dtype("int8") + ) dtype = self._dtype_from_cudfdtype(col_data.dtype) else: diff --git a/python/cudf/cudf/tests/data/pkl/stringColumnWithRangeIndex_cudf_0.16.pkl b/python/cudf/cudf/tests/data/pkl/stringColumnWithRangeIndex_cudf_0.16.pkl deleted file mode 100644 index 97c745c1dd0c214b12defd0baf07b763d7f5296c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1709 zcmcgt-%k@k5Wd2d3SI)%8vGGLL@|xAjSv%j&`3CAqLA_Rvc zkk{SBYgXNe)2c@VX>!u0ASYj<{Yu;DGsB`-)IAzg?vud(M&SU~VGqi3TI_7m%E`V5 z_Tzo?cri_BTqJlEq~%6I(hln(rjEfbVuV5#AxEGC=u=32;&ZaD;F(9+nb`rP^i(w# z+z(rrX+s;f&WGqRaNbi8z+S@$D_Mkmn6$TcPJToR#@;@KUiT1Tmz{B$+l5-K_U0Ep z`ono4pn<_|>R=T=uBZ}*dFe^BP2$aS%72K-=dQ822X^-ew(grj&2q)tRz;F!vwXec&S$AfL|o3$!jLtNswTV)Hx7($%5YVtCm~IDW)hRg^m6O}t z7nC#ts+4*N2|CLgdZeU-ww>T0CY%!-F;<<^lZI9UIh#F&&`Vv8ki8Drxdx=;C#Tw& zy<hGlb8@cygRsaA1 diff --git a/python/cudf/cudf/tests/data/pkl/stringColumnWithRangeIndex_cudf_23.12.pkl b/python/cudf/cudf/tests/data/pkl/stringColumnWithRangeIndex_cudf_23.12.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1ec077d10f77f4b3b3a8cc1bbfee559707b60dfc GIT binary patch literal 1394 zcmbtU-)qxQ6mGZFx*6!K9aA4gr4KeZ5$40j>LQ{BD$Hjox5;g9SelgkgE>*?OI-{1 zZD)V4o_n)w-Ry8`Aj!$iz27RrL8m8S8T?_E*h)^vM$^<5Wd#d>!agEn4JoBtLvvENwKjjI@<8*+YK3T)J z{*XNaS0Y0J{5?e2DiA8E8jg<6f1(6SA78<2dW^K2!LlxYgHEUO;U`}95$sCnA=rHj z)`~^TgfOqmOgl#2Yr@||=T8ggbK&y3FumRxbeu-?i%pMfO%&D?rCq5ANjQpJ(vFbX z2m;3#6b&Q0DO>QJvD5M3CR{HS(qgU)`l^LVcvK{zc9DcUZoRrs(gA&MNtn7uUL~dP z=2R|KN=aAq`Xrd(=5#uxx|+~*AeqT{GjM};(4czdkjIjeUP4VajzQr+y>9y=pB6)f z(}ZwNuut4Br(u?2o2gKmsZumhHI4EuC#c>8{BjTS9x4a!1lSI%o8D5J^Xb3ZTPFQ8 z-(@kQNs=9AJc$68*f!fWnCx|d*v5}{GrwI7k2AIY`n4Fnk)t;Z+!Ef#i+gsP6V%K^ F?-%(Z>0$r? literal 0 HcmV?d00001 diff --git a/python/cudf/cudf/tests/test_df_protocol.py b/python/cudf/cudf/tests/test_df_protocol.py index d6134c7bb01..bffbade14d8 100644 --- a/python/cudf/cudf/tests/test_df_protocol.py +++ b/python/cudf/cudf/tests/test_df_protocol.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. from typing import Any, Tuple @@ -112,7 +112,8 @@ def assert_column_equal(col: _CuDFColumn, cudfcol): assert col.get_buffers()["offsets"] is None elif col.dtype[0] == _DtypeKind.STRING: - assert_buffer_equal(col.get_buffers()["data"], cudfcol.children[1]) + chars_col = build_column(data=cudfcol.data, dtype="int8") + assert_buffer_equal(col.get_buffers()["data"], chars_col) assert_buffer_equal(col.get_buffers()["offsets"], cudfcol.children[0]) else: diff --git a/python/cudf/cudf/tests/test_serialize.py b/python/cudf/cudf/tests/test_serialize.py index cac170cce55..87efe6bbbcc 100644 --- a/python/cudf/cudf/tests/test_serialize.py +++ b/python/cudf/cudf/tests/test_serialize.py @@ -1,5 +1,6 @@ # Copyright (c) 2018-2024, NVIDIA CORPORATION. +import itertools import pickle import msgpack @@ -115,6 +116,7 @@ ] ), ], + ids=itertools.count(), ) @pytest.mark.parametrize("to_host", [True, False]) def test_serialize(df, to_host): @@ -368,8 +370,8 @@ def test_serialize_string_check_buffer_sizes(): assert expect == got -def test_deserialize_cudf_0_16(datadir): - fname = datadir / "pkl" / "stringColumnWithRangeIndex_cudf_0.16.pkl" +def test_deserialize_cudf_23_12(datadir): + fname = datadir / "pkl" / "stringColumnWithRangeIndex_cudf_23.12.pkl" expected = cudf.DataFrame({"a": ["hi", "hello", "world", None]}) with open(fname, "rb") as f: diff --git a/python/cudf/cudf/tests/test_testing.py b/python/cudf/cudf/tests/test_testing.py index e6658040663..3024c8e2e7b 100644 --- a/python/cudf/cudf/tests/test_testing.py +++ b/python/cudf/cudf/tests/test_testing.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. import numpy as np import pandas as pd @@ -431,8 +431,8 @@ def test_assert_column_memory_basic_same(arrow_arrays): data = cudf.core.column.ColumnBase.from_arrow(arrow_arrays) buf = cudf.core.buffer.as_buffer(data.base_data) - left = cudf.core.column.build_column(buf, dtype=np.int32) - right = cudf.core.column.build_column(buf, dtype=np.int32) + left = cudf.core.column.build_column(buf, dtype=np.int8) + right = cudf.core.column.build_column(buf, dtype=np.int8) assert_column_memory_eq(left, right) with pytest.raises(AssertionError): From 6abef4a4746f1f9917711f372726023efdc21e85 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Wed, 17 Jan 2024 10:12:21 -0500 Subject: [PATCH 3/6] Ensure that all CUDA kernels in cudf have hidden visibility. (#14726) To correct potential issues when using a static cuda runtime, we mark all kernels with internal linkage via the `static` keyword or `hidden` visibility. Note: This doesn't fix dependencies, but focuses just on the CUDA kernels in cudf directly. Authors: - Robert Maynard (https://github.com/robertmaynard) - Nghia Truong (https://github.com/ttnghia) Approvers: - Bradley Dice (https://github.com/bdice) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/14726 --- cpp/benchmarks/join/generate_input_tables.cuh | 30 ++-- .../type_dispatcher/type_dispatcher.cu | 8 +- cpp/examples/strings/custom_optimized.cu | 16 +- cpp/examples/strings/custom_prealloc.cu | 12 +- cpp/examples/strings/custom_with_malloc.cu | 14 +- cpp/include/cudf/detail/copy_if.cuh | 12 +- cpp/include/cudf/detail/copy_if_else.cuh | 4 +- cpp/include/cudf/detail/copy_range.cuh | 14 +- cpp/include/cudf/detail/null_mask.cuh | 24 +-- cpp/include/cudf/detail/utilities/cuda.cuh | 4 +- cpp/include/cudf/detail/valid_if.cuh | 18 +-- .../cudf/hashing/detail/helper_functions.cuh | 10 +- cpp/include/cudf/strings/detail/gather.cuh | 20 +-- cpp/include/cudf/types.hpp | 16 +- cpp/include/cudf_test/print_utilities.cuh | 4 +- cpp/src/binaryop/compiled/binary_ops.cuh | 4 +- cpp/src/binaryop/jit/kernel.cu | 28 ++-- cpp/src/bitmask/null_mask.cu | 30 ++-- cpp/src/copying/concatenate.cu | 24 +-- cpp/src/copying/contiguous_split.cu | 6 +- cpp/src/copying/scatter.cu | 8 +- cpp/src/io/avro/avro_gpu.cu | 4 +- cpp/src/io/comp/debrotli.cu | 4 +- cpp/src/io/comp/gpuinflate.cu | 6 +- cpp/src/io/comp/snap.cu | 4 +- cpp/src/io/comp/unsnap.cu | 4 +- cpp/src/io/csv/csv_gpu.cu | 8 +- cpp/src/io/fst/agent_dfa.cuh | 4 +- cpp/src/io/fst/dispatch_dfa.cuh | 4 +- cpp/src/io/json/legacy/json_gpu.cu | 30 ++-- cpp/src/io/orc/dict_enc.cu | 18 +-- cpp/src/io/orc/stats_enc.cu | 6 +- cpp/src/io/orc/stripe_data.cu | 6 +- cpp/src/io/orc/stripe_enc.cu | 16 +- cpp/src/io/orc/stripe_init.cu | 24 +-- cpp/src/io/orc/writer_impl.cu | 8 +- cpp/src/io/parquet/chunk_dict.cu | 10 +- cpp/src/io/parquet/decode_preprocess.cu | 4 +- cpp/src/io/parquet/page_data.cu | 4 +- cpp/src/io/parquet/page_delta_decode.cu | 8 +- cpp/src/io/parquet/page_enc.cu | 31 ++-- cpp/src/io/parquet/page_hdr.cu | 8 +- cpp/src/io/parquet/page_string_decode.cu | 12 +- cpp/src/io/statistics/column_statistics.cuh | 6 +- cpp/src/io/text/multibyte_split.cu | 8 +- cpp/src/io/utilities/data_casting.cu | 18 +-- cpp/src/io/utilities/parsing_utils.cu | 14 +- cpp/src/io/utilities/type_inference.cu | 12 +- cpp/src/join/conditional_join_kernels.cuh | 22 +-- cpp/src/join/mixed_join_kernel.cuh | 6 +- cpp/src/join/mixed_join_kernels.cuh | 3 +- cpp/src/join/mixed_join_kernels_semi.cu | 6 +- cpp/src/join/mixed_join_size_kernel.cuh | 31 ++-- cpp/src/join/mixed_join_size_kernels_semi.cu | 31 ++-- cpp/src/json/json_path.cu | 4 +- cpp/src/merge/merge.cu | 4 +- cpp/src/partitioning/partitioning.cu | 42 ++--- cpp/src/quantiles/tdigest/tdigest.cu | 16 +- .../quantiles/tdigest/tdigest_aggregation.cu | 20 +-- cpp/src/replace/nulls.cu | 22 +-- cpp/src/replace/replace.cu | 38 ++--- cpp/src/rolling/detail/rolling.cuh | 4 +- cpp/src/rolling/jit/kernel.cu | 20 +-- cpp/src/strings/attributes.cu | 4 +- cpp/src/strings/convert/convert_urls.cu | 12 +- cpp/src/strings/copying/concatenate.cu | 12 +- cpp/src/strings/regex/utilities.cuh | 12 +- cpp/src/strings/search/find.cu | 16 +- cpp/src/text/bpe/byte_pair_encoding.cu | 16 +- cpp/src/text/minhash.cu | 10 +- cpp/src/text/subword/data_normalizer.cu | 16 +- cpp/src/text/subword/subword_tokenize.cu | 2 +- cpp/src/text/subword/wordpiece_tokenizer.cu | 52 +++---- cpp/src/text/vocabulary_tokenize.cu | 8 +- cpp/src/transform/compute_column.cu | 4 +- cpp/src/transform/jit/kernel.cu | 4 +- cpp/src/transform/row_bit_count.cu | 10 +- cpp/src/transform/row_conversion.cu | 144 +++++++++--------- .../device_atomics/device_atomics_test.cu | 4 +- cpp/tests/error/error_handling_test.cu | 8 +- .../test_default_stream_identification.cu | 4 +- cpp/tests/scalar/scalar_device_view_test.cu | 18 +-- cpp/tests/streams/pool_test.cu | 4 +- cpp/tests/types/type_dispatcher_test.cu | 4 +- cpp/tests/utilities_tests/span_tests.cu | 6 +- 85 files changed, 627 insertions(+), 599 deletions(-) diff --git a/cpp/benchmarks/join/generate_input_tables.cuh b/cpp/benchmarks/join/generate_input_tables.cuh index 84e607a9f28..b14541564dd 100644 --- a/cpp/benchmarks/join/generate_input_tables.cuh +++ b/cpp/benchmarks/join/generate_input_tables.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,7 +31,7 @@ #include -__global__ static void init_curand(curandState* state, int const nstates) +CUDF_KERNEL void init_curand(curandState* state, int const nstates) { int ithread = threadIdx.x + blockIdx.x * blockDim.x; @@ -39,11 +39,11 @@ __global__ static void init_curand(curandState* state, int const nstates) } template -__global__ static void init_build_tbl(key_type* const build_tbl, - size_type const build_tbl_size, - int const multiplicity, - curandState* state, - int const num_states) +CUDF_KERNEL void init_build_tbl(key_type* const build_tbl, + size_type const build_tbl_size, + int const multiplicity, + curandState* state, + int const num_states) { auto const start_idx = blockIdx.x * blockDim.x + threadIdx.x; auto const stride = blockDim.x * gridDim.x; @@ -61,14 +61,14 @@ __global__ static void init_build_tbl(key_type* const build_tbl, } template -__global__ void init_probe_tbl(key_type* const probe_tbl, - size_type const probe_tbl_size, - size_type const build_tbl_size, - key_type const rand_max, - double const selectivity, - int const multiplicity, - curandState* state, - int const num_states) +CUDF_KERNEL void init_probe_tbl(key_type* const probe_tbl, + size_type const probe_tbl_size, + size_type const build_tbl_size, + key_type const rand_max, + double const selectivity, + int const multiplicity, + curandState* state, + int const num_states) { auto const start_idx = blockIdx.x * blockDim.x + threadIdx.x; auto const stride = blockDim.x * gridDim.x; diff --git a/cpp/benchmarks/type_dispatcher/type_dispatcher.cu b/cpp/benchmarks/type_dispatcher/type_dispatcher.cu index 3f985cffb1f..161328ae088 100644 --- a/cpp/benchmarks/type_dispatcher/type_dispatcher.cu +++ b/cpp/benchmarks/type_dispatcher/type_dispatcher.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -58,7 +58,7 @@ constexpr int block_size = 256; // This is for NO_DISPATCHING template -__global__ void no_dispatching_kernel(T** A, cudf::size_type n_rows, cudf::size_type n_cols) +CUDF_KERNEL void no_dispatching_kernel(T** A, cudf::size_type n_rows, cudf::size_type n_cols) { using F = Functor; cudf::size_type index = blockIdx.x * blockDim.x + threadIdx.x; @@ -72,7 +72,7 @@ __global__ void no_dispatching_kernel(T** A, cudf::size_type n_rows, cudf::size_ // This is for HOST_DISPATCHING template -__global__ void host_dispatching_kernel(cudf::mutable_column_device_view source_column) +CUDF_KERNEL void host_dispatching_kernel(cudf::mutable_column_device_view source_column) { using F = Functor; T* A = source_column.data(); @@ -124,7 +124,7 @@ struct RowHandle { // This is for DEVICE_DISPATCHING template -__global__ void device_dispatching_kernel(cudf::mutable_table_device_view source) +CUDF_KERNEL void device_dispatching_kernel(cudf::mutable_table_device_view source) { cudf::size_type const n_rows = source.num_rows(); cudf::size_type index = threadIdx.x + blockIdx.x * blockDim.x; diff --git a/cpp/examples/strings/custom_optimized.cu b/cpp/examples/strings/custom_optimized.cu index 36521871ad8..522093bc647 100644 --- a/cpp/examples/strings/custom_optimized.cu +++ b/cpp/examples/strings/custom_optimized.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,9 +37,9 @@ * @param d_visibilities Column of visibilities * @param d_sizes Output sizes for each row */ -__global__ void sizes_kernel(cudf::column_device_view const d_names, - cudf::column_device_view const d_visibilities, - cudf::size_type* d_sizes) +__global__ static void sizes_kernel(cudf::column_device_view const d_names, + cudf::column_device_view const d_visibilities, + cudf::size_type* d_sizes) { // The row index is resolved from the CUDA thread/block objects auto index = threadIdx.x + blockIdx.x * blockDim.x; @@ -74,10 +74,10 @@ __global__ void sizes_kernel(cudf::column_device_view const d_names, * @param d_offsets Byte offset in `d_chars` for each row * @param d_chars Output memory for all rows */ -__global__ void redact_kernel(cudf::column_device_view const d_names, - cudf::column_device_view const d_visibilities, - cudf::size_type const* d_offsets, - char* d_chars) +__global__ static void redact_kernel(cudf::column_device_view const d_names, + cudf::column_device_view const d_visibilities, + cudf::size_type const* d_offsets, + char* d_chars) { // The row index is resolved from the CUDA thread/block objects auto index = threadIdx.x + blockIdx.x * blockDim.x; diff --git a/cpp/examples/strings/custom_prealloc.cu b/cpp/examples/strings/custom_prealloc.cu index 5088ebd6168..93194899fe1 100644 --- a/cpp/examples/strings/custom_prealloc.cu +++ b/cpp/examples/strings/custom_prealloc.cu @@ -37,12 +37,12 @@ * @param d_offsets Byte offset in `d_chars` for each row * @param d_output Output array of string_view objects */ -__global__ void redact_kernel(cudf::column_device_view const d_names, - cudf::column_device_view const d_visibilities, - cudf::string_view redaction, - char* working_memory, - cudf::size_type const* d_offsets, - cudf::string_view* d_output) +__global__ static void redact_kernel(cudf::column_device_view const d_names, + cudf::column_device_view const d_visibilities, + cudf::string_view redaction, + char* working_memory, + cudf::size_type const* d_offsets, + cudf::string_view* d_output) { // The row index is resolved from the CUDA thread/block objects auto index = threadIdx.x + blockIdx.x * blockDim.x; diff --git a/cpp/examples/strings/custom_with_malloc.cu b/cpp/examples/strings/custom_with_malloc.cu index 32f7bf7cbd0..e02fb52cd76 100644 --- a/cpp/examples/strings/custom_with_malloc.cu +++ b/cpp/examples/strings/custom_with_malloc.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -64,10 +64,10 @@ void set_malloc_heap_size(size_t heap_size = 1073741824) // 1GB * @param redaction Redacted string replacement * @param d_output Output array of string_view objects */ -__global__ void redact_kernel(cudf::column_device_view const d_names, - cudf::column_device_view const d_visibilities, - cudf::string_view redaction, - cudf::string_view* d_output) +__global__ static void redact_kernel(cudf::column_device_view const d_names, + cudf::column_device_view const d_visibilities, + cudf::string_view redaction, + cudf::string_view* d_output) { // The row index is resolved from the CUDA thread/block objects auto index = threadIdx.x + blockIdx.x * blockDim.x; @@ -107,7 +107,9 @@ __global__ void redact_kernel(cudf::column_device_view const d_names, * @param redaction Redacted string replacement (not to be freed) * @param d_output Output array of string_view objects to free */ -__global__ void free_kernel(cudf::string_view redaction, cudf::string_view* d_output, int count) +__global__ static void free_kernel(cudf::string_view redaction, + cudf::string_view* d_output, + int count) { auto index = threadIdx.x + blockIdx.x * blockDim.x; if (index >= count) return; diff --git a/cpp/include/cudf/detail/copy_if.cuh b/cpp/include/cudf/detail/copy_if.cuh index ebe7e052b6d..1d051ea32ff 100644 --- a/cpp/include/cudf/detail/copy_if.cuh +++ b/cpp/include/cudf/detail/copy_if.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -52,10 +52,10 @@ namespace detail { // Compute the count of elements that pass the mask within each block template -__global__ void compute_block_counts(cudf::size_type* __restrict__ block_counts, - cudf::size_type size, - cudf::size_type per_thread, - Filter filter) +CUDF_KERNEL void compute_block_counts(cudf::size_type* __restrict__ block_counts, + cudf::size_type size, + cudf::size_type per_thread, + Filter filter) { int tid = threadIdx.x + per_thread * block_size * blockIdx.x; int count = 0; @@ -96,7 +96,7 @@ __device__ cudf::size_type block_scan_mask(bool mask_true, cudf::size_type& bloc // // Note: `filter` is not run on indices larger than the input column size template -__launch_bounds__(block_size) __global__ +__launch_bounds__(block_size) CUDF_KERNEL void scatter_kernel(cudf::mutable_column_device_view output_view, cudf::size_type* output_null_count, cudf::column_device_view input_view, diff --git a/cpp/include/cudf/detail/copy_if_else.cuh b/cpp/include/cudf/detail/copy_if_else.cuh index 04ad1f20196..6162fa5ecf1 100644 --- a/cpp/include/cudf/detail/copy_if_else.cuh +++ b/cpp/include/cudf/detail/copy_if_else.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,7 +37,7 @@ template -__launch_bounds__(block_size) __global__ +__launch_bounds__(block_size) CUDF_KERNEL void copy_if_else_kernel(LeftIter lhs, RightIter rhs, Filter filter, diff --git a/cpp/include/cudf/detail/copy_range.cuh b/cpp/include/cudf/detail/copy_range.cuh index 16e4e7a1297..4bfdaa94c53 100644 --- a/cpp/include/cudf/detail/copy_range.cuh +++ b/cpp/include/cudf/detail/copy_range.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,12 +40,12 @@ template -__global__ void copy_range_kernel(SourceValueIterator source_value_begin, - SourceValidityIterator source_validity_begin, - cudf::mutable_column_device_view target, - cudf::size_type target_begin, - cudf::size_type target_end, - cudf::size_type* __restrict__ const null_count) +CUDF_KERNEL void copy_range_kernel(SourceValueIterator source_value_begin, + SourceValidityIterator source_validity_begin, + cudf::mutable_column_device_view target, + cudf::size_type target_begin, + cudf::size_type target_end, + cudf::size_type* __restrict__ const null_count) { using cudf::detail::warp_size; diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh index ae05d4c6954..e57d85f2998 100644 --- a/cpp/include/cudf/detail/null_mask.cuh +++ b/cpp/include/cudf/detail/null_mask.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -61,12 +61,12 @@ namespace detail { * @param count_ptr Pointer to counter of set bits */ template -__global__ void offset_bitmask_binop(Binop op, - device_span destination, - device_span source, - device_span source_begin_bits, - size_type source_size_bits, - size_type* count_ptr) +CUDF_KERNEL void offset_bitmask_binop(Binop op, + device_span destination, + device_span source, + device_span source_begin_bits, + size_type source_size_bits, + size_type* count_ptr) { auto const tid = threadIdx.x + blockIdx.x * blockDim.x; @@ -214,11 +214,11 @@ enum class count_bits_policy : bool { * in each range is updated. */ template -__global__ void subtract_set_bits_range_boundaries_kernel(bitmask_type const* bitmask, - size_type num_ranges, - OffsetIterator first_bit_indices, - OffsetIterator last_bit_indices, - OutputIterator null_counts) +CUDF_KERNEL void subtract_set_bits_range_boundaries_kernel(bitmask_type const* bitmask, + size_type num_ranges, + OffsetIterator first_bit_indices, + OffsetIterator last_bit_indices, + OutputIterator null_counts) { constexpr size_type const word_size_in_bits{detail::size_in_bits()}; diff --git a/cpp/include/cudf/detail/utilities/cuda.cuh b/cpp/include/cudf/detail/utilities/cuda.cuh index 264302df0e9..86c85ca8d06 100644 --- a/cpp/include/cudf/detail/utilities/cuda.cuh +++ b/cpp/include/cudf/detail/utilities/cuda.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -211,7 +211,7 @@ __device__ inline T round_up_pow2(T number_to_round, T modulus) } template -__global__ void single_thread_kernel(F f) +CUDF_KERNEL void single_thread_kernel(F f) { f(); } diff --git a/cpp/include/cudf/detail/valid_if.cuh b/cpp/include/cudf/detail/valid_if.cuh index f3f95dad017..d0073177445 100644 --- a/cpp/include/cudf/detail/valid_if.cuh +++ b/cpp/include/cudf/detail/valid_if.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -44,7 +44,7 @@ namespace detail { * @param[out] valid_count The count of set bits in the output bitmask */ template -__global__ void valid_if_kernel( +CUDF_KERNEL void valid_if_kernel( bitmask_type* output, InputIterator begin, size_type size, Predicate p, size_type* valid_count) { constexpr size_type leader_lane{0}; @@ -151,13 +151,13 @@ template -__global__ void valid_if_n_kernel(InputIterator1 begin1, - InputIterator2 begin2, - BinaryPredicate p, - bitmask_type* masks[], - size_type mask_count, - size_type mask_num_bits, - size_type* valid_counts) +CUDF_KERNEL void valid_if_n_kernel(InputIterator1 begin1, + InputIterator2 begin2, + BinaryPredicate p, + bitmask_type* masks[], + size_type mask_count, + size_type mask_num_bits, + size_type* valid_counts) { for (size_type mask_idx = 0; mask_idx < mask_count; mask_idx++) { auto const mask = masks[mask_idx]; diff --git a/cpp/include/cudf/hashing/detail/helper_functions.cuh b/cpp/include/cudf/hashing/detail/helper_functions.cuh index cd58ec5f57d..3489fdeccee 100644 --- a/cpp/include/cudf/hashing/detail/helper_functions.cuh +++ b/cpp/include/cudf/hashing/detail/helper_functions.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023, NVIDIA CORPORATION. + * Copyright (c) 2017-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -130,10 +130,10 @@ __forceinline__ __device__ void store_pair_vectorized(pair_type* __restrict__ co } template -__global__ void init_hashtbl(value_type* __restrict__ const hashtbl_values, - size_type const n, - key_type const key_val, - elem_type const elem_val) +CUDF_KERNEL void init_hashtbl(value_type* __restrict__ const hashtbl_values, + size_type const n, + key_type const key_val, + elem_type const elem_val) { size_type const idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < n) { diff --git a/cpp/include/cudf/strings/detail/gather.cuh b/cpp/include/cudf/strings/detail/gather.cuh index e681373e6e0..f7d2ebebe9a 100644 --- a/cpp/include/cudf/strings/detail/gather.cuh +++ b/cpp/include/cudf/strings/detail/gather.cuh @@ -78,11 +78,11 @@ __forceinline__ __device__ uint4 load_uint4(char const* ptr) * @param total_out_strings Number of output strings to be gathered. */ template -__global__ void gather_chars_fn_string_parallel(StringIterator strings_begin, - char* out_chars, - cudf::detail::input_offsetalator const out_offsets, - MapIterator string_indices, - size_type total_out_strings) +CUDF_KERNEL void gather_chars_fn_string_parallel(StringIterator strings_begin, + char* out_chars, + cudf::detail::input_offsetalator const out_offsets, + MapIterator string_indices, + size_type total_out_strings) { constexpr size_t out_datatype_size = sizeof(uint4); constexpr size_t in_datatype_size = sizeof(uint); @@ -160,11 +160,11 @@ __global__ void gather_chars_fn_string_parallel(StringIterator strings_begin, * @param total_out_strings Number of output strings to be gathered. */ template -__global__ void gather_chars_fn_char_parallel(StringIterator strings_begin, - char* out_chars, - cudf::detail::input_offsetalator const out_offsets, - MapIterator string_indices, - size_type total_out_strings) +CUDF_KERNEL void gather_chars_fn_char_parallel(StringIterator strings_begin, + char* out_chars, + cudf::detail::input_offsetalator const out_offsets, + MapIterator string_indices, + size_type total_out_strings) { __shared__ int64_t out_offsets_threadblock[strings_per_threadblock + 1]; diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp index addab160b6e..86750ea4ca8 100644 --- a/cpp/include/cudf/types.hpp +++ b/cpp/include/cudf/types.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,9 +17,23 @@ #pragma once #ifdef __CUDACC__ +/** + * @brief Indicates that the function or method is usable on host and device + */ #define CUDF_HOST_DEVICE __host__ __device__ +/** + * @brief Indicates that the function is a CUDA kernel + */ +#define CUDF_KERNEL __global__ static #else +/** + * @brief Indicates that the function or method is usable on host and device + */ #define CUDF_HOST_DEVICE +/** + * @brief Indicates that the function is a CUDA kernel + */ +#define CUDF_KERNEL static #endif #include diff --git a/cpp/include/cudf_test/print_utilities.cuh b/cpp/include/cudf_test/print_utilities.cuh index 37ffcd401fc..ae6c8cef029 100644 --- a/cpp/include/cudf_test/print_utilities.cuh +++ b/cpp/include/cudf_test/print_utilities.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -103,7 +103,7 @@ CUDF_HOST_DEVICE void print_values(int32_t width, char delimiter, T arg, Ts... a } template -__global__ void print_array_kernel(std::size_t count, int32_t width, char delimiter, Ts... args) +CUDF_KERNEL void print_array_kernel(std::size_t count, int32_t width, char delimiter, Ts... args) { if (threadIdx.x == 0 && blockIdx.x == 0) { for (std::size_t i = 0; i < count; i++) { diff --git a/cpp/src/binaryop/compiled/binary_ops.cuh b/cpp/src/binaryop/compiled/binary_ops.cuh index 9a50eb0d0ec..d605c877d3f 100644 --- a/cpp/src/binaryop/compiled/binary_ops.cuh +++ b/cpp/src/binaryop/compiled/binary_ops.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -237,7 +237,7 @@ struct binary_op_double_device_dispatcher { * @param f Functor object to call for each element. */ template -__global__ void for_each_kernel(cudf::size_type size, Functor f) +CUDF_KERNEL void for_each_kernel(cudf::size_type size, Functor f) { int tid = threadIdx.x; int blkid = blockIdx.x; diff --git a/cpp/src/binaryop/jit/kernel.cu b/cpp/src/binaryop/jit/kernel.cu index c9cc61a4f34..39735a43474 100644 --- a/cpp/src/binaryop/jit/kernel.cu +++ b/cpp/src/binaryop/jit/kernel.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Copyright 2018-2019 BlazingDB, Inc. * Copyright 2018 Christian Noboa Mardini @@ -43,10 +43,10 @@ struct UserDefinedOp { }; template -__global__ void kernel_v_v(cudf::size_type size, - TypeOut* out_data, - TypeLhs* lhs_data, - TypeRhs* rhs_data) +CUDF_KERNEL void kernel_v_v(cudf::size_type size, + TypeOut* out_data, + TypeLhs* lhs_data, + TypeRhs* rhs_data) { int tid = threadIdx.x; int blkid = blockIdx.x; @@ -62,15 +62,15 @@ __global__ void kernel_v_v(cudf::size_type size, } template -__global__ void kernel_v_v_with_validity(cudf::size_type size, - TypeOut* out_data, - TypeLhs* lhs_data, - TypeRhs* rhs_data, - cudf::bitmask_type* output_mask, - cudf::bitmask_type const* lhs_mask, - cudf::size_type lhs_offset, - cudf::bitmask_type const* rhs_mask, - cudf::size_type rhs_offset) +CUDF_KERNEL void kernel_v_v_with_validity(cudf::size_type size, + TypeOut* out_data, + TypeLhs* lhs_data, + TypeRhs* rhs_data, + cudf::bitmask_type* output_mask, + cudf::bitmask_type const* lhs_mask, + cudf::size_type lhs_offset, + cudf::bitmask_type const* rhs_mask, + cudf::size_type rhs_offset) { int tid = threadIdx.x; int blkid = blockIdx.x; diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index 1a1cbb17d15..bb320e4b81a 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -98,11 +98,11 @@ rmm::device_buffer create_null_mask(size_type size, } namespace { -__global__ void set_null_mask_kernel(bitmask_type* __restrict__ destination, - size_type begin_bit, - size_type end_bit, - bool valid, - size_type number_of_mask_words) +CUDF_KERNEL void set_null_mask_kernel(bitmask_type* __restrict__ destination, + size_type begin_bit, + size_type end_bit, + bool valid, + size_type number_of_mask_words) { auto x = destination + word_index(begin_bit); thread_index_type const last_word = word_index(end_bit) - word_index(begin_bit); @@ -190,11 +190,11 @@ namespace { * @param number_of_mask_words The number of `cudf::bitmask_type` words to copy */ // TODO: Also make binops test that uses offset in column_view -__global__ void copy_offset_bitmask(bitmask_type* __restrict__ destination, - bitmask_type const* __restrict__ source, - size_type source_begin_bit, - size_type source_end_bit, - size_type number_of_mask_words) +CUDF_KERNEL void copy_offset_bitmask(bitmask_type* __restrict__ destination, + bitmask_type const* __restrict__ source, + size_type source_begin_bit, + size_type source_end_bit, + size_type number_of_mask_words) { auto const stride = cudf::detail::grid_1d::grid_stride(); for (thread_index_type destination_word_index = grid_1d::global_thread_id(); @@ -260,10 +260,10 @@ namespace { * @param[out] global_count The number of non-zero bits in the specified range */ template -__global__ void count_set_bits_kernel(bitmask_type const* bitmask, - size_type first_bit_index, - size_type last_bit_index, - size_type* global_count) +CUDF_KERNEL void count_set_bits_kernel(bitmask_type const* bitmask, + size_type first_bit_index, + size_type last_bit_index, + size_type* global_count) { constexpr auto const word_size{detail::size_in_bits()}; diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu index ddf39e21685..b1d850e0b27 100644 --- a/cpp/src/copying/concatenate.cu +++ b/cpp/src/copying/concatenate.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -111,12 +111,12 @@ auto create_device_views(host_span views, rmm::cuda_stream_vi * @param out_valid_count To hold the total number of valid bits set */ template -__global__ void concatenate_masks_kernel(column_device_view const* views, - size_t const* output_offsets, - size_type number_of_views, - bitmask_type* dest_mask, - size_type number_of_mask_bits, - size_type* out_valid_count) +CUDF_KERNEL void concatenate_masks_kernel(column_device_view const* views, + size_t const* output_offsets, + size_type number_of_views, + bitmask_type* dest_mask, + size_type number_of_mask_bits, + size_type* out_valid_count) { auto tidx = cudf::detail::grid_1d::global_thread_id(); auto const stride = cudf::detail::grid_1d::grid_stride(); @@ -187,11 +187,11 @@ size_type concatenate_masks(host_span views, namespace { template -__global__ void fused_concatenate_kernel(column_device_view const* input_views, - size_t const* input_offsets, - size_type num_input_views, - mutable_column_device_view output_view, - size_type* out_valid_count) +CUDF_KERNEL void fused_concatenate_kernel(column_device_view const* input_views, + size_t const* input_offsets, + size_type num_input_views, + mutable_column_device_view output_view, + size_type* out_valid_count) { auto const output_size = output_view.size(); auto* output_data = output_view.data(); diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu index 54d0aa10353..d711f40605a 100644 --- a/cpp/src/copying/contiguous_split.cu +++ b/cpp/src/copying/contiguous_split.cu @@ -280,9 +280,9 @@ __device__ void copy_buffer(uint8_t* __restrict__ dst, * @param buf_info Information on the range of values to be copied for each destination buffer */ template -__global__ void copy_partitions(IndexToDstBuf index_to_buffer, - uint8_t const** src_bufs, - dst_buf_info* buf_info) +CUDF_KERNEL void copy_partitions(IndexToDstBuf index_to_buffer, + uint8_t const** src_bufs, + dst_buf_info* buf_info) { auto const buf_index = blockIdx.x; auto const src_buf_index = buf_info[buf_index].src_buf_index; diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu index 8f326184012..517435503ee 100644 --- a/cpp/src/copying/scatter.cu +++ b/cpp/src/copying/scatter.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -50,9 +50,9 @@ namespace detail { namespace { template -__global__ void marking_bitmask_kernel(mutable_column_device_view destination, - MapIterator scatter_map, - size_type num_scatter_rows) +CUDF_KERNEL void marking_bitmask_kernel(mutable_column_device_view destination, + MapIterator scatter_map, + size_type num_scatter_rows) { auto row = cudf::detail::grid_1d::global_thread_id(); auto const stride = cudf::detail::grid_1d::grid_stride(); diff --git a/cpp/src/io/avro/avro_gpu.cu b/cpp/src/io/avro/avro_gpu.cu index 365f6d6875c..59177a68ee7 100644 --- a/cpp/src/io/avro/avro_gpu.cu +++ b/cpp/src/io/avro/avro_gpu.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -324,7 +324,7 @@ avro_decode_row(schemadesc_s const* schema, * @param[in] min_row_size Minimum size in bytes of a row */ // blockDim {32,num_warps,1} -__global__ void __launch_bounds__(num_warps * 32, 2) +CUDF_KERNEL void __launch_bounds__(num_warps * 32, 2) gpuDecodeAvroColumnData(device_span blocks, schemadesc_s* schema_g, device_span global_dictionary, diff --git a/cpp/src/io/comp/debrotli.cu b/cpp/src/io/comp/debrotli.cu index 8bafd054bdb..9c936fefd6c 100644 --- a/cpp/src/io/comp/debrotli.cu +++ b/cpp/src/io/comp/debrotli.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1911,7 +1911,7 @@ static __device__ void ProcessCommands(debrotli_state_s* s, brotli_dictionary_s * @param scratch_size Size of scratch heap space (smaller sizes may result in serialization between * blocks) */ -__global__ void __launch_bounds__(block_size, 2) +CUDF_KERNEL void __launch_bounds__(block_size, 2) gpu_debrotli_kernel(device_span const> inputs, device_span const> outputs, device_span results, diff --git a/cpp/src/io/comp/gpuinflate.cu b/cpp/src/io/comp/gpuinflate.cu index 8993815e560..cd50545afbd 100644 --- a/cpp/src/io/comp/gpuinflate.cu +++ b/cpp/src/io/comp/gpuinflate.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1024,7 +1024,7 @@ __device__ int parse_gzip_header(uint8_t const* src, size_t src_size) * @param parse_hdr If nonzero, indicates that the compressed bitstream includes a GZIP header */ template -__global__ void __launch_bounds__(block_size) +CUDF_KERNEL void __launch_bounds__(block_size) inflate_kernel(device_span const> inputs, device_span const> outputs, device_span results, @@ -1152,7 +1152,7 @@ __global__ void __launch_bounds__(block_size) * * @param inputs Source and destination information per block */ -__global__ void __launch_bounds__(1024) +CUDF_KERNEL void __launch_bounds__(1024) copy_uncompressed_kernel(device_span const> inputs, device_span const> outputs) { diff --git a/cpp/src/io/comp/snap.cu b/cpp/src/io/comp/snap.cu index 0428f4edaf2..a45e8b2083b 100644 --- a/cpp/src/io/comp/snap.cu +++ b/cpp/src/io/comp/snap.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -257,7 +257,7 @@ static __device__ uint32_t Match60(uint8_t const* src1, * @param[out] outputs Compression status per block * @param[in] count Number of blocks to compress */ -__global__ void __launch_bounds__(128) +CUDF_KERNEL void __launch_bounds__(128) snap_kernel(device_span const> inputs, device_span const> outputs, device_span results) diff --git a/cpp/src/io/comp/unsnap.cu b/cpp/src/io/comp/unsnap.cu index 504a2fe377c..46555a97e9c 100644 --- a/cpp/src/io/comp/unsnap.cu +++ b/cpp/src/io/comp/unsnap.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -628,7 +628,7 @@ __device__ void snappy_process_symbols(unsnap_state_s* s, int t, Storage& temp_s * @param[out] outputs Decompression status per block */ template -__global__ void __launch_bounds__(block_size) +CUDF_KERNEL void __launch_bounds__(block_size) unsnap_kernel(device_span const> inputs, device_span const> outputs, device_span results) diff --git a/cpp/src/io/csv/csv_gpu.cu b/cpp/src/io/csv/csv_gpu.cu index 248e17669bc..8252cccbdb9 100644 --- a/cpp/src/io/csv/csv_gpu.cu +++ b/cpp/src/io/csv/csv_gpu.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -168,7 +168,7 @@ __device__ __inline__ bool is_floatingpoint(long len, * @param row_offsets The start the CSV data of interest * @param d_column_data The count for each column data type */ -__global__ void __launch_bounds__(csvparse_block_dim) +CUDF_KERNEL void __launch_bounds__(csvparse_block_dim) data_type_detection(parse_options_view const opts, device_span csv_text, device_span const column_flags, @@ -305,7 +305,7 @@ __global__ void __launch_bounds__(csvparse_block_dim) * @param[out] valids The bitmaps indicating whether column fields are valid * @param[out] valid_counts The number of valid fields in each column */ -__global__ void __launch_bounds__(csvparse_block_dim) +CUDF_KERNEL void __launch_bounds__(csvparse_block_dim) convert_csv_to_cudf(cudf::io::parse_options_view options, device_span data, device_span column_flags, @@ -622,7 +622,7 @@ static inline __device__ rowctx32_t rowctx_inverse_merge_transform(uint64_t ctxt * @param escapechar Delimiter escape character * @param commentchar Comment line character (skip rows starting with this character) */ -__global__ void __launch_bounds__(rowofs_block_dim) +CUDF_KERNEL void __launch_bounds__(rowofs_block_dim) gather_row_offsets_gpu(uint64_t* row_ctx, device_span offsets_out, device_span const data, diff --git a/cpp/src/io/fst/agent_dfa.cuh b/cpp/src/io/fst/agent_dfa.cuh index f867a95a864..9bb087e788d 100644 --- a/cpp/src/io/fst/agent_dfa.cuh +++ b/cpp/src/io/fst/agent_dfa.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -493,7 +493,7 @@ template -__launch_bounds__(int32_t(AgentDFAPolicy::BLOCK_THREADS)) __global__ +__launch_bounds__(int32_t(AgentDFAPolicy::BLOCK_THREADS)) CUDF_KERNEL void SimulateDFAKernel(DfaT dfa, SymbolItT d_chars, OffsetT const num_chars, diff --git a/cpp/src/io/fst/dispatch_dfa.cuh b/cpp/src/io/fst/dispatch_dfa.cuh index a5c1a4f4f5c..be63ec6539f 100644 --- a/cpp/src/io/fst/dispatch_dfa.cuh +++ b/cpp/src/io/fst/dispatch_dfa.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -67,7 +67,7 @@ struct DeviceFSMPolicy { * @return */ template -__global__ void initialization_pass_kernel(TileState items_state, uint32_t num_tiles) +CUDF_KERNEL void initialization_pass_kernel(TileState items_state, uint32_t num_tiles) { items_state.InitializeStatus(num_tiles); } diff --git a/cpp/src/io/json/legacy/json_gpu.cu b/cpp/src/io/json/legacy/json_gpu.cu index b358cc2071b..4d5293e12fd 100644 --- a/cpp/src/io/json/legacy/json_gpu.cu +++ b/cpp/src/io/json/legacy/json_gpu.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -245,14 +245,14 @@ __device__ std::pair get_row_data_range( * @param[out] valid_fields The bitmaps indicating whether column fields are valid * @param[out] num_valid_fields The numbers of valid fields in columns */ -__global__ void convert_data_to_columns_kernel(parse_options_view opts, - device_span const data, - device_span const row_offsets, - device_span const column_types, - col_map_type col_map, - device_span const output_columns, - device_span const valid_fields, - device_span const num_valid_fields) +CUDF_KERNEL void convert_data_to_columns_kernel(parse_options_view opts, + device_span const data, + device_span const row_offsets, + device_span const column_types, + col_map_type col_map, + device_span const output_columns, + device_span const valid_fields, + device_span const num_valid_fields) { auto const rec_id = grid_1d::global_thread_id(); if (rec_id >= row_offsets.size()) return; @@ -321,7 +321,7 @@ __global__ void convert_data_to_columns_kernel(parse_options_view opts, * @param[in] num_columns The number of columns of input data * @param[out] column_infos The count for each column data type */ -__global__ void detect_data_types_kernel( +CUDF_KERNEL void detect_data_types_kernel( parse_options_view const opts, device_span const data, device_span const row_offsets, @@ -481,11 +481,11 @@ __device__ key_value_range get_next_key_value_range(char const* begin, * @param[out] keys_cnt Number of keys found in the file * @param[out] keys_info optional, information (offset, length, hash) for each found key */ -__global__ void collect_keys_info_kernel(parse_options_view const options, - device_span const data, - device_span const row_offsets, - unsigned long long int* keys_cnt, - thrust::optional keys_info) +CUDF_KERNEL void collect_keys_info_kernel(parse_options_view const options, + device_span const data, + device_span const row_offsets, + unsigned long long int* keys_cnt, + thrust::optional keys_info) { auto const rec_id = grid_1d::global_thread_id(); if (rec_id >= row_offsets.size()) return; diff --git a/cpp/src/io/orc/dict_enc.cu b/cpp/src/io/orc/dict_enc.cu index 1d2262a1ccc..5971482f80c 100644 --- a/cpp/src/io/orc/dict_enc.cu +++ b/cpp/src/io/orc/dict_enc.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,10 +27,10 @@ namespace cudf::io::orc::gpu { /** * @brief Counts the number of characters in each rowgroup of each string column. */ -__global__ void rowgroup_char_counts_kernel(device_2dspan char_counts, - device_span orc_columns, - device_2dspan rowgroup_bounds, - device_span str_col_indexes) +CUDF_KERNEL void rowgroup_char_counts_kernel(device_2dspan char_counts, + device_span orc_columns, + device_2dspan rowgroup_bounds, + device_span str_col_indexes) { // Index of the column in the `str_col_indexes` array auto const str_col_idx = blockIdx.y; @@ -75,7 +75,7 @@ void rowgroup_char_counts(device_2dspan counts, } template -__global__ void __launch_bounds__(block_size) +CUDF_KERNEL void __launch_bounds__(block_size) initialize_dictionary_hash_maps_kernel(device_span dictionaries) { auto const dict_map = dictionaries[blockIdx.x].map_slots; @@ -107,7 +107,7 @@ struct hash_functor { }; template -__global__ void __launch_bounds__(block_size) +CUDF_KERNEL void __launch_bounds__(block_size) populate_dictionary_hash_maps_kernel(device_2dspan dictionaries, device_span columns) { @@ -162,7 +162,7 @@ __global__ void __launch_bounds__(block_size) } template -__global__ void __launch_bounds__(block_size) +CUDF_KERNEL void __launch_bounds__(block_size) collect_map_entries_kernel(device_2dspan dictionaries) { auto const col_idx = blockIdx.x; @@ -196,7 +196,7 @@ __global__ void __launch_bounds__(block_size) } template -__global__ void __launch_bounds__(block_size) +CUDF_KERNEL void __launch_bounds__(block_size) get_dictionary_indices_kernel(device_2dspan dictionaries, device_span columns) { diff --git a/cpp/src/io/orc/stats_enc.cu b/cpp/src/io/orc/stats_enc.cu index 1afc0200bfa..31159ae0341 100644 --- a/cpp/src/io/orc/stats_enc.cu +++ b/cpp/src/io/orc/stats_enc.cu @@ -35,7 +35,7 @@ constexpr unsigned int init_threads_per_group = 32; constexpr unsigned int init_groups_per_block = 4; constexpr unsigned int init_threads_per_block = init_threads_per_group * init_groups_per_block; -__global__ void __launch_bounds__(init_threads_per_block) +CUDF_KERNEL void __launch_bounds__(init_threads_per_block) gpu_init_statistics_groups(statistics_group* groups, stats_column_desc const* cols, device_2dspan rowgroup_bounds) @@ -73,7 +73,7 @@ constexpr unsigned int pb_fldlen_common = pb_fld_hdrlen + (pb_fld_hdrlen + pb_fldlen_int64) + 2 * pb_fld_hdrlen; template -__global__ void __launch_bounds__(block_size, 1) +CUDF_KERNEL void __launch_bounds__(block_size, 1) gpu_init_statistics_buffersize(statistics_merge_group* groups, statistics_chunk const* chunks, uint32_t statistics_count) @@ -249,7 +249,7 @@ constexpr unsigned int encode_chunks_per_block = 4; constexpr unsigned int encode_threads_per_block = encode_threads_per_chunk * encode_chunks_per_block; -__global__ void __launch_bounds__(encode_threads_per_block) +CUDF_KERNEL void __launch_bounds__(encode_threads_per_block) gpu_encode_statistics(uint8_t* blob_bfr, statistics_merge_group* groups, statistics_chunk const* chunks, diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index 0b249bbdafe..14072d79172 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1082,7 +1082,7 @@ static __device__ int Decode_Decimals(orc_bytestream_s* bs, */ // blockDim {block_size,1,1} template -__global__ void __launch_bounds__(block_size) +CUDF_KERNEL void __launch_bounds__(block_size) gpuDecodeNullsAndStringDictionaries(ColumnDesc* chunks, DictionaryEntry* global_dictionary, uint32_t num_columns, @@ -1358,7 +1358,7 @@ static const __device__ __constant__ uint32_t kTimestampNanoScale[8] = { */ // blockDim {block_size,1,1} template -__global__ void __launch_bounds__(block_size) +CUDF_KERNEL void __launch_bounds__(block_size) gpuDecodeOrcColumnData(ColumnDesc* chunks, DictionaryEntry* global_dictionary, table_device_view tz_table, diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index b99826e070e..b7dd0ea9ec3 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -723,7 +723,7 @@ static __device__ void encode_null_mask(orcenc_state_s* s, */ // blockDim {`encode_block_size`,1,1} template -__global__ void __launch_bounds__(block_size) +CUDF_KERNEL void __launch_bounds__(block_size) gpuEncodeOrcColumnData(device_2dspan chunks, device_2dspan streams) { @@ -1008,7 +1008,7 @@ __global__ void __launch_bounds__(block_size) */ // blockDim {512,1,1} template -__global__ void __launch_bounds__(block_size) +CUDF_KERNEL void __launch_bounds__(block_size) gpuEncodeStringDictionaries(stripe_dictionary const* stripes, device_span columns, device_2dspan chunks, @@ -1091,7 +1091,7 @@ __global__ void __launch_bounds__(block_size) * @param[in,out] streams List of encoder chunk streams [column][rowgroup] */ // blockDim {compact_streams_block_size,1,1} -__global__ void __launch_bounds__(compact_streams_block_size) +CUDF_KERNEL void __launch_bounds__(compact_streams_block_size) gpuCompactOrcDataStreams(device_2dspan strm_desc, device_2dspan streams) { @@ -1136,7 +1136,7 @@ __global__ void __launch_bounds__(compact_streams_block_size) * @param[in] comp_block_align Required alignment for compressed blocks */ // blockDim {256,1,1} -__global__ void __launch_bounds__(256) +CUDF_KERNEL void __launch_bounds__(256) gpuInitCompressionBlocks(device_2dspan strm_desc, device_2dspan streams, // const? device_span> inputs, @@ -1191,7 +1191,7 @@ __global__ void __launch_bounds__(256) * @param[in] max_comp_blk_size Max size of any block after compression */ // blockDim {1024,1,1} -__global__ void __launch_bounds__(1024) +CUDF_KERNEL void __launch_bounds__(1024) gpuCompactCompressedBlocks(device_2dspan strm_desc, device_span const> inputs, device_span const> outputs, @@ -1274,8 +1274,8 @@ struct decimal_column_element_sizes { // Converts sizes of individual decimal elements to offsets within each row group // Conversion is done in-place template -__global__ void decimal_sizes_to_offsets_kernel(device_2dspan rg_bounds, - device_span sizes) +CUDF_KERNEL void decimal_sizes_to_offsets_kernel(device_2dspan rg_bounds, + device_span sizes) { using block_scan = cub::BlockScan; __shared__ typename block_scan::TempStorage scan_storage; diff --git a/cpp/src/io/orc/stripe_init.cu b/cpp/src/io/orc/stripe_init.cu index b31a4a081d1..327b9557176 100644 --- a/cpp/src/io/orc/stripe_init.cu +++ b/cpp/src/io/orc/stripe_init.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,7 +41,7 @@ struct compressed_stream_s { }; // blockDim {128,1,1} -__global__ void __launch_bounds__(128, 8) gpuParseCompressedStripeData( +CUDF_KERNEL void __launch_bounds__(128, 8) gpuParseCompressedStripeData( CompressedStreamInfo* strm_info, int32_t num_streams, uint32_t block_size, uint32_t log2maxcr) { __shared__ compressed_stream_s strm_g[4]; @@ -138,7 +138,7 @@ __global__ void __launch_bounds__(128, 8) gpuParseCompressedStripeData( } // blockDim {128,1,1} -__global__ void __launch_bounds__(128, 8) +CUDF_KERNEL void __launch_bounds__(128, 8) gpuPostDecompressionReassemble(CompressedStreamInfo* strm_info, int32_t num_streams) { __shared__ compressed_stream_s strm_g[4]; @@ -442,14 +442,14 @@ static __device__ void gpuMapRowIndexToUncompressed(rowindex_state_s* s, * value */ // blockDim {128,1,1} -__global__ void __launch_bounds__(128, 8) gpuParseRowGroupIndex(RowGroup* row_groups, - CompressedStreamInfo* strm_info, - ColumnDesc* chunks, - uint32_t num_columns, - uint32_t num_stripes, - uint32_t num_rowgroups, - uint32_t rowidx_stride, - bool use_base_stride) +CUDF_KERNEL void __launch_bounds__(128, 8) gpuParseRowGroupIndex(RowGroup* row_groups, + CompressedStreamInfo* strm_info, + ColumnDesc* chunks, + uint32_t num_columns, + uint32_t num_stripes, + uint32_t num_rowgroups, + uint32_t rowidx_stride, + bool use_base_stride) { __shared__ __align__(16) rowindex_state_s state_g; rowindex_state_s* const s = &state_g; @@ -513,7 +513,7 @@ __global__ void __launch_bounds__(128, 8) gpuParseRowGroupIndex(RowGroup* row_gr } template -__global__ void __launch_bounds__(block_size) +CUDF_KERNEL void __launch_bounds__(block_size) gpu_reduce_pushdown_masks(device_span orc_columns, device_2dspan rowgroup_bounds, device_2dspan set_counts) diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index cef4915e0c9..edc40391bfa 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -357,10 +357,10 @@ struct string_length_functor { statistics_merge_group const* stripe_stat_merge; }; -__global__ void copy_string_data(char* string_pool, - size_type* offsets, - statistics_chunk* chunks, - statistics_merge_group const* groups) +CUDF_KERNEL void copy_string_data(char* string_pool, + size_type* offsets, + statistics_chunk* chunks, + statistics_merge_group const* groups) { auto const idx = blockIdx.x / 2; if (groups[idx].stats_dtype == dtype_string) { diff --git a/cpp/src/io/parquet/chunk_dict.cu b/cpp/src/io/parquet/chunk_dict.cu index 53ff31ab0a7..a43c6d4cbb6 100644 --- a/cpp/src/io/parquet/chunk_dict.cu +++ b/cpp/src/io/parquet/chunk_dict.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,7 +31,7 @@ constexpr int DEFAULT_BLOCK_SIZE = 256; } template -__global__ void __launch_bounds__(block_size) +CUDF_KERNEL void __launch_bounds__(block_size) initialize_chunk_hash_maps_kernel(device_span chunks) { auto const chunk = chunks[blockIdx.x]; @@ -98,7 +98,7 @@ struct map_find_fn { }; template -__global__ void __launch_bounds__(block_size) +CUDF_KERNEL void __launch_bounds__(block_size) populate_chunk_hash_maps_kernel(cudf::detail::device_2dspan frags) { auto col_idx = blockIdx.y; @@ -189,7 +189,7 @@ __global__ void __launch_bounds__(block_size) } template -__global__ void __launch_bounds__(block_size) +CUDF_KERNEL void __launch_bounds__(block_size) collect_map_entries_kernel(device_span chunks) { auto& chunk = chunks[blockIdx.x]; @@ -223,7 +223,7 @@ __global__ void __launch_bounds__(block_size) } template -__global__ void __launch_bounds__(block_size) +CUDF_KERNEL void __launch_bounds__(block_size) get_dictionary_indices_kernel(cudf::detail::device_2dspan frags) { auto col_idx = blockIdx.y; diff --git a/cpp/src/io/parquet/decode_preprocess.cu b/cpp/src/io/parquet/decode_preprocess.cu index afe9a76a6d0..2d000600028 100644 --- a/cpp/src/io/parquet/decode_preprocess.cu +++ b/cpp/src/io/parquet/decode_preprocess.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -207,7 +207,7 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s, * (PageInfo::str_bytes) as part of the pass */ template -__global__ void __launch_bounds__(preprocess_block_size) +CUDF_KERNEL void __launch_bounds__(preprocess_block_size) gpuComputePageSizes(PageInfo* pages, device_span chunks, size_t min_row, diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu index d39edd70fcd..8d220e6fa96 100644 --- a/cpp/src/io/parquet/page_data.cu +++ b/cpp/src/io/parquet/page_data.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -421,7 +421,7 @@ static __device__ void gpuOutputGeneric( * @param error_code Error code to set if an error is encountered */ template -__global__ void __launch_bounds__(decode_block_size) +CUDF_KERNEL void __launch_bounds__(decode_block_size) gpuDecodePageData(PageInfo* pages, device_span chunks, size_t min_row, diff --git a/cpp/src/io/parquet/page_delta_decode.cu b/cpp/src/io/parquet/page_delta_decode.cu index 44ec0e1e027..d0557446f14 100644 --- a/cpp/src/io/parquet/page_delta_decode.cu +++ b/cpp/src/io/parquet/page_delta_decode.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -305,7 +305,7 @@ struct delta_byte_array_decoder { // with V2 page headers; see https://www.mail-archive.com/dev@parquet.apache.org/msg11826.html). // this kernel only needs 96 threads (3 warps)(for now). template -__global__ void __launch_bounds__(96) +CUDF_KERNEL void __launch_bounds__(96) gpuDecodeDeltaBinary(PageInfo* pages, device_span chunks, size_t min_row, @@ -430,7 +430,7 @@ __global__ void __launch_bounds__(96) // suffixes are not encoded in the header, we're going to have to first do a quick pass through them // to find the start/end of each structure. template -__global__ void __launch_bounds__(decode_block_size) +CUDF_KERNEL void __launch_bounds__(decode_block_size) gpuDecodeDeltaByteArray(PageInfo* pages, device_span chunks, size_t min_row, @@ -587,7 +587,7 @@ __global__ void __launch_bounds__(decode_block_size) // Decode page data that is DELTA_LENGTH_BYTE_ARRAY packed. This encoding consists of a // DELTA_BINARY_PACKED array of string lengths, followed by the string data. template -__global__ void __launch_bounds__(decode_block_size) +CUDF_KERNEL void __launch_bounds__(decode_block_size) gpuDecodeDeltaLengthByteArray(PageInfo* pages, device_span chunks, size_t min_row, diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index e16551024d1..12af5888d2f 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -385,7 +385,7 @@ __device__ uint8_t const* delta_encode(page_enc_state_s<0>* s, uint64_t* buffer, // blockDim {512,1,1} template -__global__ void __launch_bounds__(block_size) +CUDF_KERNEL void __launch_bounds__(block_size) gpuInitRowGroupFragments(device_2dspan frag, device_span col_desc, device_span partitions, @@ -422,7 +422,7 @@ __global__ void __launch_bounds__(block_size) // blockDim {512,1,1} template -__global__ void __launch_bounds__(block_size) +CUDF_KERNEL void __launch_bounds__(block_size) gpuCalculatePageFragments(device_span frag, device_span column_frag_sizes) { @@ -449,7 +449,7 @@ __global__ void __launch_bounds__(block_size) } // blockDim {128,1,1} -__global__ void __launch_bounds__(128) +CUDF_KERNEL void __launch_bounds__(128) gpuInitFragmentStats(device_span groups, device_span fragments) { @@ -510,7 +510,7 @@ __device__ size_t delta_data_len(Type physical_type, } // blockDim {128,1,1} -__global__ void __launch_bounds__(128) +CUDF_KERNEL void __launch_bounds__(128) gpuInitPages(device_2dspan chunks, device_span pages, device_span page_sizes, @@ -1244,9 +1244,10 @@ __device__ auto julian_days_with_time(int64_t v) // the level data is encoded. // blockDim(128, 1, 1) template -__global__ void __launch_bounds__(block_size, 8) gpuEncodePageLevels(device_span pages, - bool write_v2_headers, - encode_kernel_mask kernel_mask) +CUDF_KERNEL void __launch_bounds__(block_size, 8) + gpuEncodePageLevels(device_span pages, + bool write_v2_headers, + encode_kernel_mask kernel_mask) { __shared__ __align__(8) rle_page_enc_state_s state_g; @@ -1504,7 +1505,7 @@ __device__ void finish_page_encode(state_buf* s, // PLAIN page data encoder // blockDim(128, 1, 1) template -__global__ void __launch_bounds__(block_size, 8) +CUDF_KERNEL void __launch_bounds__(block_size, 8) gpuEncodePages(device_span pages, device_span> comp_in, device_span> comp_out, @@ -1739,7 +1740,7 @@ __global__ void __launch_bounds__(block_size, 8) // DICTIONARY page data encoder // blockDim(128, 1, 1) template -__global__ void __launch_bounds__(block_size, 8) +CUDF_KERNEL void __launch_bounds__(block_size, 8) gpuEncodeDictPages(device_span pages, device_span> comp_in, device_span> comp_out, @@ -1871,7 +1872,7 @@ __global__ void __launch_bounds__(block_size, 8) // DELTA_BINARY_PACKED page data encoder // blockDim(128, 1, 1) template -__global__ void __launch_bounds__(block_size, 8) +CUDF_KERNEL void __launch_bounds__(block_size, 8) gpuEncodeDeltaBinaryPages(device_span pages, device_span> comp_in, device_span> comp_out, @@ -1975,7 +1976,7 @@ __global__ void __launch_bounds__(block_size, 8) // DELTA_LENGTH_BYTE_ARRAY page data encoder // blockDim(128, 1, 1) template -__global__ void __launch_bounds__(block_size, 8) +CUDF_KERNEL void __launch_bounds__(block_size, 8) gpuEncodeDeltaLengthByteArrayPages(device_span pages, device_span> comp_in, device_span> comp_out, @@ -2105,7 +2106,7 @@ constexpr int decide_compression_block_size = decide_compression_warps_in_block * cudf::detail::warp_size; // blockDim(decide_compression_block_size, 1, 1) -__global__ void __launch_bounds__(decide_compression_block_size) +CUDF_KERNEL void __launch_bounds__(decide_compression_block_size) gpuDecideCompression(device_span chunks) { __shared__ __align__(8) EncColumnChunk ck_g[decide_compression_warps_in_block]; @@ -2575,7 +2576,7 @@ __device__ uint8_t* EncodeStatistics(uint8_t* start, } // blockDim(128, 1, 1) -__global__ void __launch_bounds__(128) +CUDF_KERNEL void __launch_bounds__(128) gpuEncodePageHeaders(device_span pages, device_span comp_results, device_span page_stats, @@ -2670,7 +2671,7 @@ __global__ void __launch_bounds__(128) } // blockDim(1024, 1, 1) -__global__ void __launch_bounds__(1024) +CUDF_KERNEL void __launch_bounds__(1024) gpuGatherPages(device_span chunks, device_span pages) { __shared__ __align__(8) EncColumnChunk ck_g; @@ -2848,7 +2849,7 @@ struct mask_tform { } // namespace // blockDim(1, 1, 1) -__global__ void __launch_bounds__(1) +CUDF_KERNEL void __launch_bounds__(1) gpuEncodeColumnIndexes(device_span chunks, device_span column_stats, int32_t column_index_truncate_length) diff --git a/cpp/src/io/parquet/page_hdr.cu b/cpp/src/io/parquet/page_hdr.cu index cc3f584422d..4be4f45497d 100644 --- a/cpp/src/io/parquet/page_hdr.cu +++ b/cpp/src/io/parquet/page_hdr.cu @@ -348,9 +348,9 @@ struct gpuParsePageHeader { * @param[in] num_chunks Number of column chunks */ // blockDim {128,1,1} -__global__ void __launch_bounds__(128) gpuDecodePageHeaders(ColumnChunkDesc* chunks, - int32_t num_chunks, - kernel_error::pointer error_code) +CUDF_KERNEL void __launch_bounds__(128) gpuDecodePageHeaders(ColumnChunkDesc* chunks, + int32_t num_chunks, + kernel_error::pointer error_code) { using cudf::detail::warp_size; gpuParsePageHeader parse_page_header; @@ -480,7 +480,7 @@ __global__ void __launch_bounds__(128) gpuDecodePageHeaders(ColumnChunkDesc* chu * @param[in] num_chunks Number of column chunks */ // blockDim {128,1,1} -__global__ void __launch_bounds__(128) +CUDF_KERNEL void __launch_bounds__(128) gpuBuildStringDictionaryIndex(ColumnChunkDesc* chunks, int32_t num_chunks) { __shared__ ColumnChunkDesc chunk_g[4]; diff --git a/cpp/src/io/parquet/page_string_decode.cu b/cpp/src/io/parquet/page_string_decode.cu index d559f93f45b..37a8cabc182 100644 --- a/cpp/src/io/parquet/page_string_decode.cu +++ b/cpp/src/io/parquet/page_string_decode.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -584,7 +584,7 @@ __device__ thrust::pair totalDeltaByteArraySize(uint8_t const* d * @tparam level_t Type used to store decoded repetition and definition levels */ template -__global__ void __launch_bounds__(preprocess_block_size) gpuComputeStringPageBounds( +CUDF_KERNEL void __launch_bounds__(preprocess_block_size) gpuComputeStringPageBounds( PageInfo* pages, device_span chunks, size_t min_row, size_t num_rows) { __shared__ __align__(16) page_state_s state_g; @@ -653,7 +653,7 @@ __global__ void __launch_bounds__(preprocess_block_size) gpuComputeStringPageBou * @param min_rows crop all rows below min_row * @param num_rows Maximum number of rows to read */ -__global__ void __launch_bounds__(delta_preproc_block_size) gpuComputeDeltaPageStringSizes( +CUDF_KERNEL void __launch_bounds__(delta_preproc_block_size) gpuComputeDeltaPageStringSizes( PageInfo* pages, device_span chunks, size_t min_row, size_t num_rows) { __shared__ __align__(16) page_state_s state_g; @@ -725,7 +725,7 @@ __global__ void __launch_bounds__(delta_preproc_block_size) gpuComputeDeltaPageS * @param min_rows crop all rows below min_row * @param num_rows Maximum number of rows to read */ -__global__ void __launch_bounds__(delta_length_block_size) gpuComputeDeltaLengthPageStringSizes( +CUDF_KERNEL void __launch_bounds__(delta_length_block_size) gpuComputeDeltaLengthPageStringSizes( PageInfo* pages, device_span chunks, size_t min_row, size_t num_rows) { using cudf::detail::warp_size; @@ -820,7 +820,7 @@ __global__ void __launch_bounds__(delta_length_block_size) gpuComputeDeltaLength * @param min_rows crop all rows below min_row * @param num_rows Maximum number of rows to read */ -__global__ void __launch_bounds__(preprocess_block_size) gpuComputePageStringSizes( +CUDF_KERNEL void __launch_bounds__(preprocess_block_size) gpuComputePageStringSizes( PageInfo* pages, device_span chunks, size_t min_row, size_t num_rows) { __shared__ __align__(16) page_state_s state_g; @@ -912,7 +912,7 @@ __global__ void __launch_bounds__(preprocess_block_size) gpuComputePageStringSiz * @tparam level_t Type used to store decoded repetition and definition levels */ template -__global__ void __launch_bounds__(decode_block_size) +CUDF_KERNEL void __launch_bounds__(decode_block_size) gpuDecodeStringPageData(PageInfo* pages, device_span chunks, size_t min_row, diff --git a/cpp/src/io/statistics/column_statistics.cuh b/cpp/src/io/statistics/column_statistics.cuh index f71fb95949f..db0d56ac321 100644 --- a/cpp/src/io/statistics/column_statistics.cuh +++ b/cpp/src/io/statistics/column_statistics.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -289,7 +289,7 @@ __device__ void cooperative_load(T& destination, T const* source = nullptr) * @tparam IO File format for which statistics calculation is being done */ template -__global__ void __launch_bounds__(block_size, 1) +CUDF_KERNEL void __launch_bounds__(block_size, 1) gpu_calculate_group_statistics(statistics_chunk* chunks, statistics_group const* groups, bool const int96_timestamps) @@ -368,7 +368,7 @@ void calculate_group_statistics(statistics_chunk* chunks, * @tparam IO File format for which statistics calculation is being done */ template -__global__ void __launch_bounds__(block_size, 1) +CUDF_KERNEL void __launch_bounds__(block_size, 1) gpu_merge_group_statistics(statistics_chunk* chunks_out, statistics_chunk const* chunks_in, statistics_merge_group const* groups) diff --git a/cpp/src/io/text/multibyte_split.cu b/cpp/src/io/text/multibyte_split.cu index 443ca0f5fe7..2194ee1aaa1 100644 --- a/cpp/src/io/text/multibyte_split.cu +++ b/cpp/src/io/text/multibyte_split.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -138,7 +138,7 @@ using byte_offset = int64_t; // it begins in. From there, each thread can then take deterministic action. In this case, the // deterministic action is counting and outputting delimiter offsets when a delimiter is found. -__global__ void multibyte_split_init_kernel( +CUDF_KERNEL void multibyte_split_init_kernel( cudf::size_type base_tile_idx, cudf::size_type num_tiles, cudf::io::text::detail::scan_tile_state_view tile_multistates, @@ -154,7 +154,7 @@ __global__ void multibyte_split_init_kernel( } } -__global__ __launch_bounds__(THREADS_PER_TILE) void multibyte_split_kernel( +CUDF_KERNEL __launch_bounds__(THREADS_PER_TILE) void multibyte_split_kernel( cudf::size_type base_tile_idx, byte_offset base_input_offset, output_offset base_output_offset, @@ -231,7 +231,7 @@ __global__ __launch_bounds__(THREADS_PER_TILE) void multibyte_split_kernel( } } -__global__ __launch_bounds__(THREADS_PER_TILE) void byte_split_kernel( +CUDF_KERNEL __launch_bounds__(THREADS_PER_TILE) void byte_split_kernel( cudf::size_type base_tile_idx, byte_offset base_input_offset, output_offset base_output_offset, diff --git a/cpp/src/io/utilities/data_casting.cu b/cpp/src/io/utilities/data_casting.cu index 9e5c5c76392..9545811a542 100644 --- a/cpp/src/io/utilities/data_casting.cu +++ b/cpp/src/io/utilities/data_casting.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -420,14 +420,14 @@ struct bitfield_block { * @param d_chars Character array to store the characters of strings */ template -__global__ void parse_fn_string_parallel(str_tuple_it str_tuples, - size_type total_out_strings, - size_type* str_counter, - bitmask_type* null_mask, - size_type* null_count_data, - cudf::io::parse_options_view const options, - size_type* d_offsets, - char* d_chars) +CUDF_KERNEL void parse_fn_string_parallel(str_tuple_it str_tuples, + size_type total_out_strings, + size_type* str_counter, + bitmask_type* null_mask, + size_type* null_count_data, + cudf::io::parse_options_view const options, + size_type* d_offsets, + char* d_chars) { constexpr auto BLOCK_SIZE = is_warp ? cudf::detail::warp_size : cudf::detail::warp_size * num_warps; diff --git a/cpp/src/io/utilities/parsing_utils.cu b/cpp/src/io/utilities/parsing_utils.cu index 06b86f33c85..d02ce99e6e5 100644 --- a/cpp/src/io/utilities/parsing_utils.cu +++ b/cpp/src/io/utilities/parsing_utils.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -87,12 +87,12 @@ __device__ __forceinline__ void setElement(void*, cudf::size_type, T const&, V c * @param[out] positions Array containing the output positions */ template -__global__ void count_and_set_positions(char const* data, - uint64_t size, - uint64_t offset, - char const key, - cudf::size_type* count, - T* positions) +CUDF_KERNEL void count_and_set_positions(char const* data, + uint64_t size, + uint64_t offset, + char const key, + cudf::size_type* count, + T* positions) { // thread IDs range per block, so also need the block id auto const tid = cudf::detail::grid_1d::global_thread_id(); diff --git a/cpp/src/io/utilities/type_inference.cu b/cpp/src/io/utilities/type_inference.cu index 79a5c8f1c4c..b446ad41946 100644 --- a/cpp/src/io/utilities/type_inference.cu +++ b/cpp/src/io/utilities/type_inference.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -112,11 +112,11 @@ __device__ __inline__ bool is_like_float(std::size_t len, * @param[out] column_info Histogram of column type counters */ template -__global__ void infer_column_type_kernel(OptionsView options, - device_span data, - ColumnStringIter offset_length_begin, - std::size_t size, - cudf::io::column_type_histogram* column_info) +CUDF_KERNEL void infer_column_type_kernel(OptionsView options, + device_span data, + ColumnStringIter offset_length_begin, + std::size_t size, + cudf::io::column_type_histogram* column_info) { auto thread_type_histogram = cudf::io::column_type_histogram{}; diff --git a/cpp/src/join/conditional_join_kernels.cuh b/cpp/src/join/conditional_join_kernels.cuh index f665aba698f..02ce27a36ba 100644 --- a/cpp/src/join/conditional_join_kernels.cuh +++ b/cpp/src/join/conditional_join_kernels.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -48,7 +48,7 @@ namespace detail { * @param[out] output_size The resulting output size */ template -__global__ void compute_conditional_join_output_size( +CUDF_KERNEL void compute_conditional_join_output_size( table_device_view left_table, table_device_view right_table, join_kind join_type, @@ -138,15 +138,15 @@ __global__ void compute_conditional_join_output_size( * the kernel needs to internally loop over left rows. Otherwise, loop over right rows. */ template -__global__ void conditional_join(table_device_view left_table, - table_device_view right_table, - join_kind join_type, - cudf::size_type* join_output_l, - cudf::size_type* join_output_r, - cudf::size_type* current_idx, - cudf::ast::detail::expression_device_view device_expression_data, - cudf::size_type const max_size, - bool const swap_tables) +CUDF_KERNEL void conditional_join(table_device_view left_table, + table_device_view right_table, + join_kind join_type, + cudf::size_type* join_output_l, + cudf::size_type* join_output_r, + cudf::size_type* current_idx, + cudf::ast::detail::expression_device_view device_expression_data, + cudf::size_type const max_size, + bool const swap_tables) { constexpr int num_warps = block_size / detail::warp_size; __shared__ cudf::size_type current_idx_shared[num_warps]; diff --git a/cpp/src/join/mixed_join_kernel.cuh b/cpp/src/join/mixed_join_kernel.cuh index efe575e14de..22bbbff967a 100644 --- a/cpp/src/join/mixed_join_kernel.cuh +++ b/cpp/src/join/mixed_join_kernel.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,8 +36,10 @@ namespace detail { namespace cg = cooperative_groups; +#pragma GCC diagnostic ignored "-Wattributes" + template -__launch_bounds__(block_size) __global__ +__attribute__((visibility("hidden"))) __launch_bounds__(block_size) __global__ void mixed_join(table_device_view left_table, table_device_view right_table, table_device_view probe, diff --git a/cpp/src/join/mixed_join_kernels.cuh b/cpp/src/join/mixed_join_kernels.cuh index 2cd4d0c3b38..1d36a246f02 100644 --- a/cpp/src/join/mixed_join_kernels.cuh +++ b/cpp/src/join/mixed_join_kernels.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -57,6 +57,7 @@ namespace detail { * left/right tables to determine which is the build table and which is the * probe table has already happened on the host. */ + template __global__ void compute_mixed_join_output_size( table_device_view left_table, diff --git a/cpp/src/join/mixed_join_kernels_semi.cu b/cpp/src/join/mixed_join_kernels_semi.cu index e31e35ff788..bde75395371 100644 --- a/cpp/src/join/mixed_join_kernels_semi.cu +++ b/cpp/src/join/mixed_join_kernels_semi.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,8 +31,10 @@ namespace detail { namespace cg = cooperative_groups; +#pragma GCC diagnostic ignored "-Wattributes" + template -__launch_bounds__(block_size) __global__ +__attribute__((visibility("hidden"))) __launch_bounds__(block_size) __global__ void mixed_join_semi(table_device_view left_table, table_device_view right_table, table_device_view probe, diff --git a/cpp/src/join/mixed_join_size_kernel.cuh b/cpp/src/join/mixed_join_size_kernel.cuh index ef377dadc4b..3bd7bfd7c9a 100644 --- a/cpp/src/join/mixed_join_size_kernel.cuh +++ b/cpp/src/join/mixed_join_size_kernel.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,20 +33,23 @@ namespace cudf { namespace detail { namespace cg = cooperative_groups; +#pragma GCC diagnostic ignored "-Wattributes" + template -__launch_bounds__(block_size) __global__ void compute_mixed_join_output_size( - table_device_view left_table, - table_device_view right_table, - table_device_view probe, - table_device_view build, - row_hash const hash_probe, - row_equality const equality_probe, - join_kind const join_type, - cudf::detail::mixed_multimap_type::device_view hash_table_view, - ast::detail::expression_device_view device_expression_data, - bool const swap_tables, - std::size_t* output_size, - cudf::device_span matches_per_row) +__attribute__((visibility("hidden"))) __launch_bounds__(block_size) __global__ + void compute_mixed_join_output_size( + table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_hash const hash_probe, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::mixed_multimap_type::device_view hash_table_view, + ast::detail::expression_device_view device_expression_data, + bool const swap_tables, + std::size_t* output_size, + cudf::device_span matches_per_row) { // The (required) extern storage of the shared memory array leads to // conflicting declarations between different templates. The easiest diff --git a/cpp/src/join/mixed_join_size_kernels_semi.cu b/cpp/src/join/mixed_join_size_kernels_semi.cu index fd7bf0234e9..31da6677aef 100644 --- a/cpp/src/join/mixed_join_size_kernels_semi.cu +++ b/cpp/src/join/mixed_join_size_kernels_semi.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,20 +31,23 @@ namespace detail { namespace cg = cooperative_groups; +#pragma GCC diagnostic ignored "-Wattributes" + template -__launch_bounds__(block_size) __global__ void compute_mixed_join_output_size_semi( - table_device_view left_table, - table_device_view right_table, - table_device_view probe, - table_device_view build, - row_hash const hash_probe, - row_equality const equality_probe, - join_kind const join_type, - cudf::detail::semi_map_type::device_view hash_table_view, - ast::detail::expression_device_view device_expression_data, - bool const swap_tables, - std::size_t* output_size, - cudf::device_span matches_per_row) +__attribute__((visibility("hidden"))) __launch_bounds__(block_size) __global__ + void compute_mixed_join_output_size_semi( + table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_hash const hash_probe, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::semi_map_type::device_view hash_table_view, + ast::detail::expression_device_view device_expression_data, + bool const swap_tables, + std::size_t* output_size, + cudf::device_span matches_per_row) { // The (required) extern storage of the shared memory array leads to // conflicting declarations between different templates. The easiest diff --git a/cpp/src/json/json_path.cu b/cpp/src/json/json_path.cu index c01357c96ca..6794838c70f 100644 --- a/cpp/src/json/json_path.cu +++ b/cpp/src/json/json_path.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -900,7 +900,7 @@ __device__ thrust::pair get_json_object_single( * @param options Options controlling behavior */ template -__launch_bounds__(block_size) __global__ +__launch_bounds__(block_size) CUDF_KERNEL void get_json_object_kernel(column_device_view col, path_operator const* const commands, size_type* output_offsets, diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index 0d30230de28..073a2a6b97e 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -122,7 +122,7 @@ using index_type = detail::index_type; * to be copied to the output. Length must be equal to `num_destination_rows` */ template -__global__ void materialize_merged_bitmask_kernel( +CUDF_KERNEL void materialize_merged_bitmask_kernel( column_device_view left_dcol, column_device_view right_dcol, bitmask_type* out_validity, diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu index 7b6676346c2..8d8f1a71672 100644 --- a/cpp/src/partitioning/partitioning.cu +++ b/cpp/src/partitioning/partitioning.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -122,14 +122,14 @@ class bitwise_partitioner { * @param[out] global_partition_sizes The number of rows in each partition. */ template -__global__ void compute_row_partition_numbers(row_hasher_t the_hasher, - size_type const num_rows, - size_type const num_partitions, - partitioner_type const the_partitioner, - size_type* __restrict__ row_partition_numbers, - size_type* __restrict__ row_partition_offset, - size_type* __restrict__ block_partition_sizes, - size_type* __restrict__ global_partition_sizes) +CUDF_KERNEL void compute_row_partition_numbers(row_hasher_t the_hasher, + size_type const num_rows, + size_type const num_partitions, + partitioner_type const the_partitioner, + size_type* __restrict__ row_partition_numbers, + size_type* __restrict__ row_partition_offset, + size_type* __restrict__ block_partition_sizes, + size_type* __restrict__ global_partition_sizes) { // Accumulate histogram of the size of each partition in shared memory extern __shared__ size_type shared_partition_sizes[]; @@ -197,10 +197,10 @@ __global__ void compute_row_partition_numbers(row_hasher_t the_hasher, {block0 partition(num_partitions-1) offset, block1 partition(num_partitions -1) offset, ...} } */ -__global__ void compute_row_output_locations(size_type* __restrict__ row_partition_numbers, - size_type const num_rows, - size_type const num_partitions, - size_type* __restrict__ block_partition_offsets) +CUDF_KERNEL void compute_row_output_locations(size_type* __restrict__ row_partition_numbers, + size_type const num_rows, + size_type const num_partitions, + size_type* __restrict__ block_partition_offsets) { // Shared array that holds the offset of this blocks partitions in // global memory @@ -255,14 +255,14 @@ __global__ void compute_row_output_locations(size_type* __restrict__ row_partiti * @param[in] scanned_block_partition_sizes The scan of block_partition_sizes */ template -__global__ void copy_block_partitions(InputIter input_iter, - DataType* __restrict__ output_buf, - size_type const num_rows, - size_type const num_partitions, - size_type const* __restrict__ row_partition_numbers, - size_type const* __restrict__ row_partition_offset, - size_type const* __restrict__ block_partition_sizes, - size_type const* __restrict__ scanned_block_partition_sizes) +CUDF_KERNEL void copy_block_partitions(InputIter input_iter, + DataType* __restrict__ output_buf, + size_type const num_rows, + size_type const num_partitions, + size_type const* __restrict__ row_partition_numbers, + size_type const* __restrict__ row_partition_offset, + size_type const* __restrict__ block_partition_sizes, + size_type const* __restrict__ scanned_block_partition_sizes) { extern __shared__ char shared_memory[]; auto block_output = reinterpret_cast(shared_memory); diff --git a/cpp/src/quantiles/tdigest/tdigest.cu b/cpp/src/quantiles/tdigest/tdigest.cu index 4764ac4d87a..c8ac19e01cc 100644 --- a/cpp/src/quantiles/tdigest/tdigest.cu +++ b/cpp/src/quantiles/tdigest/tdigest.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -68,13 +68,13 @@ struct make_centroid { // kernel for computing percentiles on input tdigest (mean, weight) centroid data. template -__global__ void compute_percentiles_kernel(device_span tdigest_offsets, - column_device_view percentiles, - CentroidIter centroids_, - double const* min_, - double const* max_, - double const* cumulative_weight_, - double* output) +CUDF_KERNEL void compute_percentiles_kernel(device_span tdigest_offsets, + column_device_view percentiles, + CentroidIter centroids_, + double const* min_, + double const* max_, + double const* cumulative_weight_, + double* output) { auto const tid = cudf::detail::grid_1d::global_thread_id(); diff --git a/cpp/src/quantiles/tdigest/tdigest_aggregation.cu b/cpp/src/quantiles/tdigest/tdigest_aggregation.cu index 450996a43d2..fc56d17d73b 100644 --- a/cpp/src/quantiles/tdigest/tdigest_aggregation.cu +++ b/cpp/src/quantiles/tdigest/tdigest_aggregation.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -370,15 +370,15 @@ std::unique_ptr to_tdigest_scalar(std::unique_ptr&& tdigest, */ template -__global__ void generate_cluster_limits_kernel(int delta, - size_type num_groups, - NearestWeightFunc nearest_weight, - GroupInfo group_info, - CumulativeWeight cumulative_weight, - double* group_cluster_wl, - size_type* group_num_clusters, - size_type const* group_cluster_offsets, - bool has_nulls) +CUDF_KERNEL void generate_cluster_limits_kernel(int delta, + size_type num_groups, + NearestWeightFunc nearest_weight, + GroupInfo group_info, + CumulativeWeight cumulative_weight, + double* group_cluster_wl, + size_type* group_num_clusters, + size_type const* group_cluster_offsets, + bool has_nulls) { int const tid = threadIdx.x + blockIdx.x * blockDim.x; diff --git a/cpp/src/replace/nulls.cu b/cpp/src/replace/nulls.cu index 2eb624d3f05..bd3e75e2e80 100644 --- a/cpp/src/replace/nulls.cu +++ b/cpp/src/replace/nulls.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -57,12 +57,12 @@ namespace { // anonymous static constexpr int BLOCK_SIZE = 256; template -__global__ void replace_nulls_strings(cudf::column_device_view input, - cudf::column_device_view replacement, - cudf::bitmask_type* output_valid, - cudf::size_type* offsets, - char* chars, - cudf::size_type* valid_counter) +CUDF_KERNEL void replace_nulls_strings(cudf::column_device_view input, + cudf::column_device_view replacement, + cudf::bitmask_type* output_valid, + cudf::size_type* offsets, + char* chars, + cudf::size_type* valid_counter) { cudf::size_type nrows = input.size(); auto i = cudf::detail::grid_1d::global_thread_id(); @@ -112,10 +112,10 @@ __global__ void replace_nulls_strings(cudf::column_device_view input, } template -__global__ void replace_nulls(cudf::column_device_view input, - cudf::column_device_view replacement, - cudf::mutable_column_device_view output, - cudf::size_type* output_valid_count) +CUDF_KERNEL void replace_nulls(cudf::column_device_view input, + cudf::column_device_view replacement, + cudf::mutable_column_device_view output, + cudf::size_type* output_valid_count) { cudf::size_type nrows = input.size(); auto i = cudf::detail::grid_1d::global_thread_id(); diff --git a/cpp/src/replace/replace.cu b/cpp/src/replace/replace.cu index 9341929de44..7cad2fb10d3 100644 --- a/cpp/src/replace/replace.cu +++ b/cpp/src/replace/replace.cu @@ -17,7 +17,7 @@ * limitations under the License. */ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -118,13 +118,13 @@ __device__ int get_new_string_value(cudf::size_type idx, * @param output_valid_count The output valid count */ template -__global__ void replace_strings_first_pass(cudf::column_device_view input, - cudf::column_device_view values_to_replace, - cudf::column_device_view replacement, - cudf::mutable_column_device_view offsets, - cudf::mutable_column_device_view indices, - cudf::bitmask_type* output_valid, - cudf::size_type* __restrict__ output_valid_count) +CUDF_KERNEL void replace_strings_first_pass(cudf::column_device_view input, + cudf::column_device_view values_to_replace, + cudf::column_device_view replacement, + cudf::mutable_column_device_view offsets, + cudf::mutable_column_device_view indices, + cudf::bitmask_type* output_valid, + cudf::size_type* __restrict__ output_valid_count) { cudf::size_type nrows = input.size(); auto tid = cudf::detail::grid_1d::global_thread_id(); @@ -184,11 +184,11 @@ __global__ void replace_strings_first_pass(cudf::column_device_view input, * @param indices Temporary column used to store the replacement indices. */ template -__global__ void replace_strings_second_pass(cudf::column_device_view input, - cudf::column_device_view replacement, - cudf::mutable_column_device_view offsets, - cudf::mutable_column_device_view strings, - cudf::mutable_column_device_view indices) +CUDF_KERNEL void replace_strings_second_pass(cudf::column_device_view input, + cudf::column_device_view replacement, + cudf::mutable_column_device_view offsets, + cudf::mutable_column_device_view strings, + cudf::mutable_column_device_view indices) { cudf::size_type nrows = input.size(); auto tid = cudf::detail::grid_1d::global_thread_id(); @@ -245,12 +245,12 @@ __global__ void replace_strings_second_pass(cudf::column_device_view input, * @param[in] replacement_valid Valid mask associated with d_replacement_values */ template -__global__ void replace_kernel(cudf::column_device_view input, - cudf::mutable_column_device_view output, - cudf::size_type* __restrict__ output_valid_count, - cudf::size_type nrows, - cudf::column_device_view values_to_replace, - cudf::column_device_view replacement) +CUDF_KERNEL void replace_kernel(cudf::column_device_view input, + cudf::mutable_column_device_view output, + cudf::size_type* __restrict__ output_valid_count, + cudf::size_type nrows, + cudf::column_device_view values_to_replace, + cudf::column_device_view replacement) { T* __restrict__ output_data = output.data(); diff --git a/cpp/src/rolling/detail/rolling.cuh b/cpp/src/rolling/detail/rolling.cuh index 0648ef3d30f..20845a97c7e 100644 --- a/cpp/src/rolling/detail/rolling.cuh +++ b/cpp/src/rolling/detail/rolling.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1022,7 +1022,7 @@ template -__launch_bounds__(block_size) __global__ +__launch_bounds__(block_size) CUDF_KERNEL void gpu_rolling(column_device_view input, column_device_view default_outputs, mutable_column_device_view output, diff --git a/cpp/src/rolling/jit/kernel.cu b/cpp/src/rolling/jit/kernel.cu index 06b224c39ad..2c753965c1c 100644 --- a/cpp/src/rolling/jit/kernel.cu +++ b/cpp/src/rolling/jit/kernel.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,15 +41,15 @@ template -__global__ void gpu_rolling_new(cudf::size_type nrows, - InType const* const __restrict__ in_col, - cudf::bitmask_type const* const __restrict__ in_col_valid, - OutType* __restrict__ out_col, - cudf::bitmask_type* __restrict__ out_col_valid, - cudf::size_type* __restrict__ output_valid_count, - PrecedingWindowType preceding_window_begin, - FollowingWindowType following_window_begin, - cudf::size_type min_periods) +CUDF_KERNEL void gpu_rolling_new(cudf::size_type nrows, + InType const* const __restrict__ in_col, + cudf::bitmask_type const* const __restrict__ in_col_valid, + OutType* __restrict__ out_col, + cudf::bitmask_type* __restrict__ out_col_valid, + cudf::size_type* __restrict__ output_valid_count, + PrecedingWindowType preceding_window_begin, + FollowingWindowType following_window_begin, + cudf::size_type min_periods) { cudf::thread_index_type i = blockIdx.x * blockDim.x + threadIdx.x; cudf::thread_index_type const stride = blockDim.x * gridDim.x; diff --git a/cpp/src/strings/attributes.cu b/cpp/src/strings/attributes.cu index 00e49f9d97e..2856c077fb2 100644 --- a/cpp/src/strings/attributes.cu +++ b/cpp/src/strings/attributes.cu @@ -110,8 +110,8 @@ std::unique_ptr counts_fn(strings_column_view const& strings, * @param d_strings Column with strings to count * @param d_lengths Results of the counts per string */ -__global__ void count_characters_parallel_fn(column_device_view const d_strings, - size_type* d_lengths) +CUDF_KERNEL void count_characters_parallel_fn(column_device_view const d_strings, + size_type* d_lengths) { auto const idx = cudf::detail::grid_1d::global_thread_id(); using warp_reduce = cub::WarpReduce; diff --git a/cpp/src/strings/convert/convert_urls.cu b/cpp/src/strings/convert/convert_urls.cu index 511acc38d75..b16eb318b39 100644 --- a/cpp/src/strings/convert/convert_urls.cu +++ b/cpp/src/strings/convert/convert_urls.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -197,8 +197,8 @@ __forceinline__ __device__ char escaped_sequence_to_byte(char const* const ptr) * @param[out] out_counts Number of characters in each decode URL. */ template -__global__ void url_decode_char_counter(column_device_view const in_strings, - size_type* const out_counts) +CUDF_KERNEL void url_decode_char_counter(column_device_view const in_strings, + size_type* const out_counts) { constexpr int halo_size = 2; __shared__ char temporary_buffer[num_warps_per_threadblock][char_block_size + halo_size]; @@ -280,9 +280,9 @@ __global__ void url_decode_char_counter(column_device_view const in_strings, * @param[in] out_offsets Offset value of each string associated with `out_chars`. */ template -__global__ void url_decode_char_replacer(column_device_view const in_strings, - char* const out_chars, - size_type const* const out_offsets) +CUDF_KERNEL void url_decode_char_replacer(column_device_view const in_strings, + char* const out_chars, + size_type const* const out_offsets) { constexpr int halo_size = 2; __shared__ char temporary_buffer[num_warps_per_threadblock][char_block_size + halo_size * 2]; diff --git a/cpp/src/strings/copying/concatenate.cu b/cpp/src/strings/copying/concatenate.cu index 2d9b06183e2..8cabd0dc75f 100644 --- a/cpp/src/strings/copying/concatenate.cu +++ b/cpp/src/strings/copying/concatenate.cu @@ -112,7 +112,7 @@ auto create_strings_device_views(host_span views, rmm::cuda_s } template -__global__ void fused_concatenate_string_offset_kernel( +CUDF_KERNEL void fused_concatenate_string_offset_kernel( column_device_view const* input_views, size_t const* input_offsets, size_t const* partition_offsets, @@ -171,11 +171,11 @@ __global__ void fused_concatenate_string_offset_kernel( } } -__global__ void fused_concatenate_string_chars_kernel(column_device_view const* input_views, - size_t const* partition_offsets, - size_type const num_input_views, - size_type const output_size, - char* output_data) +CUDF_KERNEL void fused_concatenate_string_chars_kernel(column_device_view const* input_views, + size_t const* partition_offsets, + size_type const num_input_views, + size_type const output_size, + char* output_data) { cudf::thread_index_type output_index = threadIdx.x + blockIdx.x * blockDim.x; diff --git a/cpp/src/strings/regex/utilities.cuh b/cpp/src/strings/regex/utilities.cuh index 23b53062bf3..bc8f5d68a4b 100644 --- a/cpp/src/strings/regex/utilities.cuh +++ b/cpp/src/strings/regex/utilities.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,7 +37,7 @@ namespace detail { constexpr auto regex_launch_kernel_block_size = 256; template -__global__ void for_each_kernel(ForEachFunction fn, reprog_device const d_prog, size_type size) +CUDF_KERNEL void for_each_kernel(ForEachFunction fn, reprog_device const d_prog, size_type size) { extern __shared__ u_char shmem[]; if (threadIdx.x == 0) { d_prog.store(shmem); } @@ -71,10 +71,10 @@ void launch_for_each_kernel(ForEachFunction fn, } template -__global__ void transform_kernel(TransformFunction fn, - reprog_device const d_prog, - OutputType* d_output, - size_type size) +CUDF_KERNEL void transform_kernel(TransformFunction fn, + reprog_device const d_prog, + OutputType* d_output, + size_type size) { extern __shared__ u_char shmem[]; if (threadIdx.x == 0) { d_prog.store(shmem); } diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu index 4ba1359c469..78343d58626 100644 --- a/cpp/src/strings/search/find.cu +++ b/cpp/src/strings/search/find.cu @@ -115,11 +115,11 @@ struct empty_target_fn { * @brief String per warp function for find/rfind */ template -__global__ void finder_warp_parallel_fn(column_device_view const d_strings, - TargetIterator const d_targets, - size_type const start, - size_type const stop, - size_type* d_results) +CUDF_KERNEL void finder_warp_parallel_fn(column_device_view const d_strings, + TargetIterator const d_targets, + size_type const start, + size_type const stop, + size_type* d_results) { size_type const idx = static_cast(threadIdx.x + blockIdx.x * blockDim.x); @@ -346,9 +346,9 @@ namespace { * @param d_target String to search for in each row of `d_strings` * @param d_results Indicates which rows contain `d_target` */ -__global__ void contains_warp_parallel_fn(column_device_view const d_strings, - string_view const d_target, - bool* d_results) +CUDF_KERNEL void contains_warp_parallel_fn(column_device_view const d_strings, + string_view const d_target, + bool* d_results) { size_type const idx = static_cast(threadIdx.x + blockIdx.x * blockDim.x); using warp_reduce = cub::WarpReduce; diff --git a/cpp/src/text/bpe/byte_pair_encoding.cu b/cpp/src/text/bpe/byte_pair_encoding.cu index a697df913d3..1f125636208 100644 --- a/cpp/src/text/bpe/byte_pair_encoding.cu +++ b/cpp/src/text/bpe/byte_pair_encoding.cu @@ -122,11 +122,11 @@ struct bpe_unpairable_offsets_fn { * @param d_rerank_data Working memory to hold locations where reranking is required */ template -__global__ void bpe_parallel_fn(cudf::column_device_view const d_strings, - MapRefType const d_map, - int8_t* d_spaces_data, // working memory - cudf::size_type* d_ranks_data, // more working memory - int8_t* d_rerank_data // and one more working memory +CUDF_KERNEL void bpe_parallel_fn(cudf::column_device_view const d_strings, + MapRefType const d_map, + int8_t* d_spaces_data, // working memory + cudf::size_type* d_ranks_data, // more working memory + int8_t* d_rerank_data // and one more working memory ) { // string per block @@ -291,9 +291,9 @@ __global__ void bpe_parallel_fn(cudf::column_device_view const d_strings, * @param d_spaces_data Output the location where separator will be inserted * @param d_sizes Output sizes of each row */ -__global__ void bpe_finalize(cudf::column_device_view const d_strings, - int8_t* d_spaces_data, // where separators are inserted - cudf::size_type* d_sizes // output sizes of encoded strings +CUDF_KERNEL void bpe_finalize(cudf::column_device_view const d_strings, + int8_t* d_spaces_data, // where separators are inserted + cudf::size_type* d_sizes // output sizes of encoded strings ) { // string per block diff --git a/cpp/src/text/minhash.cu b/cpp/src/text/minhash.cu index 4e0a538ffe9..dcb59166cec 100644 --- a/cpp/src/text/minhash.cu +++ b/cpp/src/text/minhash.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -62,10 +62,10 @@ template < typename HashFunction, typename hash_value_type = std:: conditional_t, uint32_t, uint64_t>> -__global__ void minhash_kernel(cudf::column_device_view const d_strings, - cudf::device_span seeds, - cudf::size_type width, - hash_value_type* d_hashes) +CUDF_KERNEL void minhash_kernel(cudf::column_device_view const d_strings, + cudf::device_span seeds, + cudf::size_type width, + hash_value_type* d_hashes) { auto const idx = static_cast(threadIdx.x + blockIdx.x * blockDim.x); if (idx >= (static_cast(d_strings.size()) * diff --git a/cpp/src/text/subword/data_normalizer.cu b/cpp/src/text/subword/data_normalizer.cu index 34eb95bea5c..c83bc2e318f 100644 --- a/cpp/src/text/subword/data_normalizer.cu +++ b/cpp/src/text/subword/data_normalizer.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -204,13 +204,13 @@ extract_code_points_from_utf8(unsigned char const* strings, * @param[out] code_points The resulting code point values from normalization. * @param[out] chars_per_thread Output number of code point values per string. */ -__global__ void kernel_data_normalizer(unsigned char const* strings, - size_t const total_bytes, - uint32_t const* cp_metadata, - uint64_t const* aux_table, - bool const do_lower_case, - uint32_t* code_points, - uint32_t* chars_per_thread) +CUDF_KERNEL void kernel_data_normalizer(unsigned char const* strings, + size_t const total_bytes, + uint32_t const* cp_metadata, + uint64_t const* aux_table, + bool const do_lower_case, + uint32_t* code_points, + uint32_t* chars_per_thread) { constexpr uint32_t init_val = (1 << FILTER_BIT); uint32_t replacement_code_points[MAX_NEW_CHARS] = {init_val, init_val, init_val}; diff --git a/cpp/src/text/subword/subword_tokenize.cu b/cpp/src/text/subword/subword_tokenize.cu index a35d69e2145..c9592e5cc48 100644 --- a/cpp/src/text/subword/subword_tokenize.cu +++ b/cpp/src/text/subword/subword_tokenize.cu @@ -56,7 +56,7 @@ namespace { * @param[out] attn_mask Identifies valid token id entries * @param[out] metadata Additional data per row */ -__global__ void kernel_compute_tensor_metadata( +CUDF_KERNEL void kernel_compute_tensor_metadata( // input uint32_t const* token_ids, cudf::size_type const* offsets, diff --git a/cpp/src/text/subword/wordpiece_tokenizer.cu b/cpp/src/text/subword/wordpiece_tokenizer.cu index 3b912017320..d2804af5f8b 100644 --- a/cpp/src/text/subword/wordpiece_tokenizer.cu +++ b/cpp/src/text/subword/wordpiece_tokenizer.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -75,12 +75,12 @@ namespace { * @param[out] tokens_per_word An array of size `num_code_points` which hold the number of * tokens. This kernel just sets all the values to 0. */ -__global__ void init_data_and_mark_word_start_and_ends(uint32_t const* code_points, - uint32_t* start_word_indices, - uint32_t* end_word_indices, - size_t num_code_points, - uint32_t* token_ids, - uint8_t* tokens_per_word) +CUDF_KERNEL void init_data_and_mark_word_start_and_ends(uint32_t const* code_points, + uint32_t* start_word_indices, + uint32_t* end_word_indices, + size_t num_code_points, + uint32_t* token_ids, + uint8_t* tokens_per_word) { cudf::thread_index_type char_for_thread = static_cast(blockDim.x) * static_cast(blockIdx.x) + @@ -131,11 +131,11 @@ __global__ void init_data_and_mark_word_start_and_ends(uint32_t const* code_poin * written to indicate this. * @param num_strings The total number of strings to be processed. */ -__global__ void mark_string_start_and_ends(uint32_t const* code_points, - cudf::size_type const* strings_offsets, - uint32_t* start_word_indices, - uint32_t* end_word_indices, - uint32_t num_strings) +CUDF_KERNEL void mark_string_start_and_ends(uint32_t const* code_points, + cudf::size_type const* strings_offsets, + uint32_t* start_word_indices, + uint32_t* end_word_indices, + uint32_t num_strings) { cudf::thread_index_type idx = static_cast(blockDim.x) * static_cast(blockIdx.x) + @@ -319,20 +319,20 @@ struct mark_special_tokens { * @param outer_hash_b_param: The b parameter for the outer hash * @param num_outer_bins: The number of bins for the outer hash */ -__global__ void kernel_wordpiece_tokenizer(uint32_t const* code_points, - uint64_t const* hash_table, - uint64_t const* bin_coefficients, - uint16_t const* bin_offsets, - uint16_t unk_token_id, - uint32_t outer_hash_a_param, - uint32_t outer_hash_b_param, - uint16_t num_outer_bins, - uint32_t const* word_starts, - uint32_t const* word_ends, - uint32_t max_word_length, - uint32_t total_words, - uint32_t* token_ids, - uint8_t* tokens_per_word) +CUDF_KERNEL void kernel_wordpiece_tokenizer(uint32_t const* code_points, + uint64_t const* hash_table, + uint64_t const* bin_coefficients, + uint16_t const* bin_offsets, + uint16_t unk_token_id, + uint32_t outer_hash_a_param, + uint32_t outer_hash_b_param, + uint16_t num_outer_bins, + uint32_t const* word_starts, + uint32_t const* word_ends, + uint32_t max_word_length, + uint32_t total_words, + uint32_t* token_ids, + uint8_t* tokens_per_word) { cudf::thread_index_type word_to_tokenize = static_cast(blockDim.x) * static_cast(blockIdx.x) + diff --git a/cpp/src/text/vocabulary_tokenize.cu b/cpp/src/text/vocabulary_tokenize.cu index 91f4c304590..a9e8d4d9a24 100644 --- a/cpp/src/text/vocabulary_tokenize.cu +++ b/cpp/src/text/vocabulary_tokenize.cu @@ -214,10 +214,10 @@ struct mark_delimiters_fn { } }; -__global__ void token_counts_fn(cudf::column_device_view const d_strings, - cudf::string_view const d_delimiter, - cudf::size_type* d_counts, - int8_t* d_results) +CUDF_KERNEL void token_counts_fn(cudf::column_device_view const d_strings, + cudf::string_view const d_delimiter, + cudf::size_type* d_counts, + int8_t* d_results) { // string per warp auto const idx = static_cast(threadIdx.x + blockIdx.x * blockDim.x); diff --git a/cpp/src/transform/compute_column.cu b/cpp/src/transform/compute_column.cu index 224dd93b048..eaf47adec10 100644 --- a/cpp/src/transform/compute_column.cu +++ b/cpp/src/transform/compute_column.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -54,7 +54,7 @@ namespace detail { * @param output_column The destination for the results of evaluating the expression. */ template -__launch_bounds__(max_block_size) __global__ +__launch_bounds__(max_block_size) CUDF_KERNEL void compute_column_kernel(table_device_view const table, ast::detail::expression_device_view device_expression_data, mutable_column_device_view output_column) diff --git a/cpp/src/transform/jit/kernel.cu b/cpp/src/transform/jit/kernel.cu index 0170cc50c6f..1e913ecb5bb 100644 --- a/cpp/src/transform/jit/kernel.cu +++ b/cpp/src/transform/jit/kernel.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,7 +35,7 @@ namespace transformation { namespace jit { template -__global__ void kernel(cudf::size_type size, TypeOut* out_data, TypeIn* in_data) +CUDF_KERNEL void kernel(cudf::size_type size, TypeOut* out_data, TypeIn* in_data) { // cannot use global_thread_id utility due to a JIT build issue by including // the `cudf/detail/utilities/cuda.cuh` header diff --git a/cpp/src/transform/row_bit_count.cu b/cpp/src/transform/row_bit_count.cu index b151b44565d..a91dc8fbbc6 100644 --- a/cpp/src/transform/row_bit_count.cu +++ b/cpp/src/transform/row_bit_count.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -398,10 +398,10 @@ __device__ size_type row_size_functor::operator()(column_device_vie * @param output Output span of size (# rows) where per-row bit sizes are stored * @param max_branch_depth Maximum depth of the span stack needed per-thread */ -__global__ void compute_row_sizes(device_span cols, - device_span info, - device_span output, - size_type max_branch_depth) +CUDF_KERNEL void compute_row_sizes(device_span cols, + device_span info, + device_span output, + size_type max_branch_depth) { extern __shared__ row_span thread_branch_stacks[]; int const tid = threadIdx.x + blockIdx.x * blockDim.x; diff --git a/cpp/src/transform/row_conversion.cu b/cpp/src/transform/row_conversion.cu index b797e495480..ef12fbeae52 100644 --- a/cpp/src/transform/row_conversion.cu +++ b/cpp/src/transform/row_conversion.cu @@ -314,14 +314,14 @@ struct fixed_width_row_offset_functor { * @param output_nm array of pointers to the output null masks * @param input_data pointing to the incoming row data */ -__global__ void copy_from_rows_fixed_width_optimized(const size_type num_rows, - const size_type num_columns, - const size_type row_size, - const size_type* input_offset_in_row, - const size_type* num_bytes, - int8_t** output_data, - bitmask_type** output_nm, - const int8_t* input_data) +CUDF_KERNEL void copy_from_rows_fixed_width_optimized(const size_type num_rows, + const size_type num_columns, + const size_type row_size, + const size_type* input_offset_in_row, + const size_type* num_bytes, + int8_t** output_data, + bitmask_type** output_nm, + const int8_t* input_data) { // We are going to copy the data in two passes. // The first pass copies a chunk of data into shared memory. @@ -433,15 +433,15 @@ __global__ void copy_from_rows_fixed_width_optimized(const size_type num_rows, } } -__global__ void copy_to_rows_fixed_width_optimized(const size_type start_row, - const size_type num_rows, - const size_type num_columns, - const size_type row_size, - const size_type* output_offset_in_row, - const size_type* num_bytes, - const int8_t** input_data, - const bitmask_type** input_nm, - int8_t* output_data) +CUDF_KERNEL void copy_to_rows_fixed_width_optimized(const size_type start_row, + const size_type num_rows, + const size_type num_columns, + const size_type row_size, + const size_type* output_offset_in_row, + const size_type* num_bytes, + const int8_t** input_data, + const bitmask_type** input_nm, + int8_t* output_data) { // We are going to copy the data in two passes. // The first pass copies a chunk of data into shared memory. @@ -588,16 +588,16 @@ __global__ void copy_to_rows_fixed_width_optimized(const size_type start_row, * */ template -__global__ void copy_to_rows(const size_type num_rows, - const size_type num_columns, - const size_type shmem_used_per_tile, - device_span tile_infos, - const int8_t** input_data, - const size_type* col_sizes, - const size_type* col_offsets, - RowOffsetFunctor row_offsets, - size_type const* batch_row_boundaries, - int8_t** output_data) +CUDF_KERNEL void copy_to_rows(const size_type num_rows, + const size_type num_columns, + const size_type shmem_used_per_tile, + device_span tile_infos, + const int8_t** input_data, + const size_type* col_sizes, + const size_type* col_offsets, + RowOffsetFunctor row_offsets, + size_type const* batch_row_boundaries, + int8_t** output_data) { // We are going to copy the data in two passes. // The first pass copies a chunk of data into shared memory. @@ -731,15 +731,15 @@ __global__ void copy_to_rows(const size_type num_rows, * */ template -__global__ void copy_validity_to_rows(const size_type num_rows, - const size_type num_columns, - const size_type shmem_used_per_tile, - RowOffsetFunctor row_offsets, - size_type const* batch_row_boundaries, - int8_t** output_data, - const size_type validity_offset, - device_span tile_infos, - const bitmask_type** input_nm) +CUDF_KERNEL void copy_validity_to_rows(const size_type num_rows, + const size_type num_columns, + const size_type shmem_used_per_tile, + RowOffsetFunctor row_offsets, + size_type const* batch_row_boundaries, + int8_t** output_data, + const size_type validity_offset, + device_span tile_infos, + const bitmask_type** input_nm) { extern __shared__ int8_t shared_data[]; @@ -851,15 +851,15 @@ __global__ void copy_validity_to_rows(const size_type num_rows, * */ template -__global__ void copy_strings_to_rows(size_type const num_rows, - size_type const num_variable_columns, - int8_t const** variable_input_data, - size_type const* variable_col_output_offsets, - size_type const** variable_col_offsets, - size_type fixed_width_row_size, - RowOffsetFunctor row_offsets, - size_type const batch_row_offset, - int8_t* output_data) +CUDF_KERNEL void copy_strings_to_rows(size_type const num_rows, + size_type const num_variable_columns, + int8_t const** variable_input_data, + size_type const* variable_col_output_offsets, + size_type const** variable_col_offsets, + size_type fixed_width_row_size, + RowOffsetFunctor row_offsets, + size_type const batch_row_offset, + int8_t* output_data) { // Each block will take a group of rows controlled by NUM_STRING_ROWS_PER_BLOCK_TO_ROWS. Each warp // will copy a row at a time. The base thread will first go through column data and fill out @@ -920,16 +920,16 @@ __global__ void copy_strings_to_rows(size_type const num_rows, * */ template -__global__ void copy_from_rows(const size_type num_rows, - const size_type num_columns, - const size_type shmem_used_per_tile, - RowOffsetFunctor row_offsets, - size_type const* batch_row_boundaries, - int8_t** output_data, - const size_type* col_sizes, - const size_type* col_offsets, - device_span tile_infos, - const int8_t* input_data) +CUDF_KERNEL void copy_from_rows(const size_type num_rows, + const size_type num_columns, + const size_type shmem_used_per_tile, + RowOffsetFunctor row_offsets, + size_type const* batch_row_boundaries, + int8_t** output_data, + const size_type* col_sizes, + const size_type* col_offsets, + device_span tile_infos, + const int8_t* input_data) { // We are going to copy the data in two passes. // The first pass copies a chunk of data into shared memory. @@ -1042,15 +1042,15 @@ __global__ void copy_from_rows(const size_type num_rows, * */ template -__global__ void copy_validity_from_rows(const size_type num_rows, - const size_type num_columns, - const size_type shmem_used_per_tile, - RowOffsetFunctor row_offsets, - size_type const* batch_row_boundaries, - bitmask_type** output_nm, - const size_type validity_offset, - device_span tile_infos, - const int8_t* input_data) +CUDF_KERNEL void copy_validity_from_rows(const size_type num_rows, + const size_type num_columns, + const size_type shmem_used_per_tile, + RowOffsetFunctor row_offsets, + size_type const* batch_row_boundaries, + bitmask_type** output_nm, + const size_type validity_offset, + device_span tile_infos, + const int8_t* input_data) { extern __shared__ int8_t shared[]; @@ -1175,14 +1175,14 @@ __global__ void copy_validity_from_rows(const size_type num_rows, * @param num_string_columns number of string columns in the table */ template -__global__ void copy_strings_from_rows(RowOffsetFunctor row_offsets, - int32_t** string_row_offsets, - int32_t** string_lengths, - size_type** string_column_offsets, - char** string_col_data, - int8_t const* row_data, - size_type const num_rows, - size_type const num_string_columns) +CUDF_KERNEL void copy_strings_from_rows(RowOffsetFunctor row_offsets, + int32_t** string_row_offsets, + int32_t** string_lengths, + size_type** string_column_offsets, + char** string_col_data, + int8_t const* row_data, + size_type const num_rows, + size_type const num_string_columns) { // Each warp takes a tile, which is a single column and up to ROWS_PER_BLOCK rows. A tile will not // wrap around the bottom of the table. The warp will copy the strings for each row in the tile. diff --git a/cpp/tests/device_atomics/device_atomics_test.cu b/cpp/tests/device_atomics/device_atomics_test.cu index f0c69ea6bfb..6e90d4462df 100644 --- a/cpp/tests/device_atomics/device_atomics_test.cu +++ b/cpp/tests/device_atomics/device_atomics_test.cu @@ -31,7 +31,7 @@ #include template -__global__ void gpu_atomic_test(T* result, T* data, size_t size) +CUDF_KERNEL void gpu_atomic_test(T* result, T* data, size_t size) { size_t id = blockIdx.x * blockDim.x + threadIdx.x; size_t step = blockDim.x * gridDim.x; @@ -79,7 +79,7 @@ __device__ T atomic_op(T* addr, T const& value, BinaryOp op) } template -__global__ void gpu_atomicCAS_test(T* result, T* data, size_t size) +CUDF_KERNEL void gpu_atomicCAS_test(T* result, T* data, size_t size) { size_t id = blockIdx.x * blockDim.x + threadIdx.x; size_t step = blockDim.x * gridDim.x; diff --git a/cpp/tests/error/error_handling_test.cu b/cpp/tests/error/error_handling_test.cu index 6bb1afda2a8..5cb2d729f3d 100644 --- a/cpp/tests/error/error_handling_test.cu +++ b/cpp/tests/error/error_handling_test.cu @@ -40,7 +40,7 @@ TEST(StreamCheck, success) { EXPECT_NO_THROW(CUDF_CHECK_CUDA(0)); } namespace { // Some silly kernel that will cause an error -void __global__ test_kernel(int* data) { data[threadIdx.x] = threadIdx.x; } +CUDF_KERNEL void test_kernel(int* data) { data[threadIdx.x] = threadIdx.x; } } // namespace // In a release build and without explicit synchronization, CUDF_CHECK_CUDA may @@ -70,7 +70,7 @@ TEST(StreamCheck, CatchFailedKernel) EXPECT_THROW(CUDF_CHECK_CUDA(stream.value()), cudf::cuda_error); } -__global__ void kernel() { asm("trap;"); } +CUDF_KERNEL void kernel() { asm("trap;"); } TEST(DeathTest, CudaFatalError) { @@ -88,9 +88,9 @@ TEST(DeathTest, CudaFatalError) #ifndef NDEBUG -__global__ void assert_false_kernel() { cudf_assert(false && "this kernel should die"); } +CUDF_KERNEL void assert_false_kernel() { cudf_assert(false && "this kernel should die"); } -__global__ void assert_true_kernel() { cudf_assert(true && "this kernel should live"); } +CUDF_KERNEL void assert_true_kernel() { cudf_assert(true && "this kernel should live"); } TEST(DebugAssertDeathTest, cudf_assert_false) { diff --git a/cpp/tests/identify_stream_usage/test_default_stream_identification.cu b/cpp/tests/identify_stream_usage/test_default_stream_identification.cu index 28bb47af40d..268c7b37c81 100644 --- a/cpp/tests/identify_stream_usage/test_default_stream_identification.cu +++ b/cpp/tests/identify_stream_usage/test_default_stream_identification.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ #include -__global__ void kernel() { printf("The kernel ran!\n"); } +__global__ static void kernel() { printf("The kernel ran!\n"); } void test_cudaLaunchKernel() { diff --git a/cpp/tests/scalar/scalar_device_view_test.cu b/cpp/tests/scalar/scalar_device_view_test.cu index 8d0e54f024f..8444716bccd 100644 --- a/cpp/tests/scalar/scalar_device_view_test.cu +++ b/cpp/tests/scalar/scalar_device_view_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,14 +35,14 @@ struct TypedScalarDeviceViewTest : public cudf::test::BaseFixture {}; TYPED_TEST_SUITE(TypedScalarDeviceViewTest, cudf::test::FixedWidthTypesWithoutFixedPoint); template -__global__ void test_set_value(ScalarDeviceViewType s, ScalarDeviceViewType s1) +CUDF_KERNEL void test_set_value(ScalarDeviceViewType s, ScalarDeviceViewType s1) { s1.set_value(s.value()); s1.set_valid(true); } template -__global__ void test_value(ScalarDeviceViewType s, ScalarDeviceViewType s1, bool* result) +CUDF_KERNEL void test_value(ScalarDeviceViewType s, ScalarDeviceViewType s1, bool* result) { *result = (s.value() == s1.value()); } @@ -73,7 +73,7 @@ TYPED_TEST(TypedScalarDeviceViewTest, Value) } template -__global__ void test_null(ScalarDeviceViewType s, bool* result) +CUDF_KERNEL void test_null(ScalarDeviceViewType s, bool* result) { *result = s.is_valid(); } @@ -92,7 +92,7 @@ TYPED_TEST(TypedScalarDeviceViewTest, ConstructNull) } template -__global__ void test_setnull(ScalarDeviceViewType s) +CUDF_KERNEL void test_setnull(ScalarDeviceViewType s) { s.set_valid(false); } @@ -113,10 +113,10 @@ TYPED_TEST(TypedScalarDeviceViewTest, SetNull) struct StringScalarDeviceViewTest : public cudf::test::BaseFixture {}; -__global__ void test_string_value(cudf::string_scalar_device_view s, - char const* value, - cudf::size_type size, - bool* result) +CUDF_KERNEL void test_string_value(cudf::string_scalar_device_view s, + char const* value, + cudf::size_type size, + bool* result) { *result = (s.value() == cudf::string_view(value, size)); } diff --git a/cpp/tests/streams/pool_test.cu b/cpp/tests/streams/pool_test.cu index 0f92e1c0c2b..52debe24fe8 100644 --- a/cpp/tests/streams/pool_test.cu +++ b/cpp/tests/streams/pool_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,7 +22,7 @@ class StreamPoolTest : public cudf::test::BaseFixture {}; -__global__ void do_nothing_kernel() {} +CUDF_KERNEL void do_nothing_kernel() {} TEST_F(StreamPoolTest, ForkStreams) { diff --git a/cpp/tests/types/type_dispatcher_test.cu b/cpp/tests/types/type_dispatcher_test.cu index d7df8f03ec1..0b26330d323 100644 --- a/cpp/tests/types/type_dispatcher_test.cu +++ b/cpp/tests/types/type_dispatcher_test.cu @@ -59,7 +59,7 @@ struct verify_dispatched_type { } }; -__global__ void dispatch_test_kernel(cudf::type_id id, bool* d_result) +CUDF_KERNEL void dispatch_test_kernel(cudf::type_id id, bool* d_result) { if (0 == threadIdx.x + blockIdx.x * blockDim.x) *d_result = cudf::type_dispatcher(cudf::data_type{id}, verify_dispatched_type{}, id); @@ -119,7 +119,7 @@ struct verify_double_dispatched_type { } }; -__global__ void double_dispatch_test_kernel(cudf::type_id id1, cudf::type_id id2, bool* d_result) +CUDF_KERNEL void double_dispatch_test_kernel(cudf::type_id id1, cudf::type_id id2, bool* d_result) { if (0 == threadIdx.x + blockIdx.x * blockDim.x) *d_result = cudf::double_type_dispatcher( diff --git a/cpp/tests/utilities_tests/span_tests.cu b/cpp/tests/utilities_tests/span_tests.cu index 870528d306c..2075c67a18a 100644 --- a/cpp/tests/utilities_tests/span_tests.cu +++ b/cpp/tests/utilities_tests/span_tests.cu @@ -247,7 +247,7 @@ TEST(SpanTest, CanConstructFromDeviceContainers) (void)device_span(d_uvector_c); } -__global__ void simple_device_kernel(device_span result) { result[0] = true; } +CUDF_KERNEL void simple_device_kernel(device_span result) { result[0] = true; } TEST(SpanTest, CanUseDeviceSpan) { @@ -277,7 +277,7 @@ TEST(MdSpanTest, CanDetermineEmptiness) EXPECT_TRUE(device_2dspan{no_columns_vector}.is_empty()); } -__global__ void readwrite_kernel(device_2dspan result) +CUDF_KERNEL void readwrite_kernel(device_2dspan result) { if (result[5][6] == 5) { result[5][6] *= 6; @@ -436,7 +436,7 @@ TEST(HostDeviceSpanTest, CanSendToDevice) EXPECT_EQ(std::string(d_message), hello_world_message); } -__global__ void simple_device_char_kernel(device_span result) +CUDF_KERNEL void simple_device_char_kernel(device_span result) { char const* str = "world hello"; for (int offset = 0; offset < result.size(); ++offset) { From 56a7b95050cb0e85637e2c5b47fe99f22fcaf5db Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 17 Jan 2024 11:12:09 -0500 Subject: [PATCH 4/6] Fix ptx file discovery in editable installs (#14767) The behavior of editable installs changed when we transitioned to scikit-build-core, and it affects where the ptx files created during the build can be discovered. Editable installs no longer place built files directly alongside source files. Instead, Python's import machinery is leveraged to add built files to the search path. Since ptx files are not Python files, the loader logic isn't relevant, but we now need to ensure that we always search for the ptx files alongside built artifacts (namely Cython compiled modules) rather than Python source files. I'm guessing that nobody has encountered this yet due to preexisting build artifacts in their source directories. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/14767 --- python/cudf/cudf/core/udf/utils.py | 16 ++++++++-------- python/cudf/cudf/utils/_numba.py | 24 +++++++++++++++++++----- python/cudf/udf_cpp/CMakeLists.txt | 29 ++--------------------------- 3 files changed, 29 insertions(+), 40 deletions(-) diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py index bfe5f5007fe..bd57db6b620 100644 --- a/python/cudf/cudf/core/udf/utils.py +++ b/python/cudf/cudf/core/udf/utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. import os from typing import Any, Callable, Dict @@ -17,10 +17,7 @@ import rmm -from cudf._lib.strings_udf import ( - column_from_udf_string_array, - column_to_string_view_array, -) +from cudf._lib import strings_udf from cudf.api.types import is_scalar from cudf.core.column.column import as_column from cudf.core.dtypes import dtype @@ -63,7 +60,10 @@ precompiled: cachetools.LRUCache = cachetools.LRUCache(maxsize=32) launch_arg_getters: Dict[Any, Any] = {} -_PTX_FILE = _get_ptx_file(os.path.dirname(__file__), "shim_") +_PTX_FILE = _get_ptx_file( + os.path.join(os.path.dirname(strings_udf.__file__), "..", "core", "udf"), + "shim_", +) @_cudf_nvtx_annotate @@ -319,7 +319,7 @@ def _return_arr_from_dtype(dtype, size): def _post_process_output_col(col, retty): if retty == _cudf_str_dtype: - return column_from_udf_string_array(col) + return strings_udf.column_from_udf_string_array(col) return as_column(col, retty) @@ -361,7 +361,7 @@ def set_malloc_heap_size(size=None): def column_to_string_view_array_init_heap(col): # lazily allocate heap only when a string needs to be returned - return column_to_string_view_array(col) + return strings_udf.column_to_string_view_array(col) class UDFError(RuntimeError): diff --git a/python/cudf/cudf/utils/_numba.py b/python/cudf/cudf/utils/_numba.py index fc45f60cdaf..7781c14e559 100644 --- a/python/cudf/cudf/utils/_numba.py +++ b/python/cudf/cudf/utils/_numba.py @@ -1,9 +1,10 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. import glob import os import sys import warnings +from functools import lru_cache from numba import config as numba_config @@ -20,9 +21,20 @@ def patch_numba_linker_pynvjitlink(): ) -CC_60_PTX_FILE = os.path.join( - os.path.dirname(__file__), "../core/udf/shim_60.ptx" -) +# Use an lru_cache with a single value to allow a delayed import of +# strings_udf. This is the easiest way to break an otherwise circular import +# loop of _lib.*->cudautils->_numba->_lib.strings_udf +@lru_cache +def _get_cc_60_ptx_file(): + from cudf._lib import strings_udf + + return os.path.join( + os.path.dirname(strings_udf.__file__), + "..", + "core", + "udf", + "shim_60.ptx", + ) def _get_best_ptx_file(archs, max_compute_capability): @@ -119,7 +131,9 @@ def _setup_numba(): versions = safe_get_versions() if versions != NO_DRIVER: driver_version, runtime_version = versions - ptx_toolkit_version = _get_cuda_version_from_ptx_file(CC_60_PTX_FILE) + ptx_toolkit_version = _get_cuda_version_from_ptx_file( + _get_cc_60_ptx_file() + ) # MVC is required whenever any PTX is newer than the driver # This could be the shipped PTX file or the PTX emitted by diff --git a/python/cudf/udf_cpp/CMakeLists.txt b/python/cudf/udf_cpp/CMakeLists.txt index 7d6dc84b322..57b52559f00 100644 --- a/python/cudf/udf_cpp/CMakeLists.txt +++ b/python/cudf/udf_cpp/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -55,30 +55,6 @@ target_compile_options( target_link_libraries(cudf_strings_udf PUBLIC cudf::cudf) install(TARGETS cudf_strings_udf DESTINATION ./cudf/_lib/) -# This function will copy the generated PTX file from its generator-specific location in the build -# tree into a specified location in the build tree from which we can install it. -function(copy_ptx_to_location target destination new_name) - set(cmake_generated_file - "${CMAKE_CURRENT_BINARY_DIR}/cmake/cp_${target}_$>_ptx.cmake" - ) - file( - GENERATE - OUTPUT "${cmake_generated_file}" - CONTENT - " -set(ptx_path \"$\") -file(MAKE_DIRECTORY \"${destination}\") -file(COPY_FILE \${ptx_path} \"${destination}/${new_name}\")" - ) - - add_custom_target( - ${target}_cp_ptx ALL - COMMAND ${CMAKE_COMMAND} -P "${cmake_generated_file}" - DEPENDS $ - COMMENT "Copying PTX files to '${destination}'" - ) -endfunction() - # Create the shim library for each architecture. set(SHIM_CUDA_FLAGS --expt-relaxed-constexpr -rdc=true) @@ -104,10 +80,9 @@ foreach(arch IN LISTS CMAKE_CUDA_ARCHITECTURES) target_compile_options(${tgt} PRIVATE "$<$:${SHIM_CUDA_FLAGS}>") target_link_libraries(${tgt} PUBLIC cudf::cudf) - copy_ptx_to_location(${tgt} "${CMAKE_CURRENT_BINARY_DIR}/../udf" ${tgt}.ptx) install( FILES $ - DESTINATION ./cudf/core/udf/ + DESTINATION cudf/core/udf/ RENAME ${tgt}.ptx ) endforeach() From 1bff50850c44695f3e85f4c2e90891a5ab385e11 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Wed, 17 Jan 2024 10:25:29 -0800 Subject: [PATCH 5/6] Expose streams in ORC reader and writer APIs (#14350) This PR contributes to https://github.com/rapidsai/cudf/issues/13744. -Added stream parameters to public APIs ``` cudf::io::read_orc cudf::io::write_orc cudf::io::read_orc_metadata cudf::io::read_parsed_orc_statistics ``` -Added stream gtests Authors: - Shruti Shivakumar (https://github.com/shrshi) - Nghia Truong (https://github.com/ttnghia) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/14350 --- cpp/include/cudf/io/orc.hpp | 10 +- cpp/include/cudf/io/orc_metadata.hpp | 14 ++- cpp/src/io/functions.cpp | 31 +++--- cpp/tests/CMakeLists.txt | 1 + cpp/tests/streams/io/orc_test.cpp | 137 +++++++++++++++++++++++++++ 5 files changed, 172 insertions(+), 21 deletions(-) create mode 100644 cpp/tests/streams/io/orc_test.cpp diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index 3ef356bed1b..a3f76817f8a 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -393,6 +393,7 @@ class orc_reader_options_builder { * @endcode * * @param options Settings for controlling reading behavior + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate device memory of the table in the returned * table_with_metadata. * @@ -400,6 +401,7 @@ class orc_reader_options_builder { */ table_with_metadata read_orc( orc_reader_options const& options, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group @@ -864,8 +866,10 @@ class orc_writer_options_builder { * @endcode * * @param options Settings for controlling reading behavior + * @param stream CUDA stream used for device memory operations and kernel launches */ -void write_orc(orc_writer_options const& options); +void write_orc(orc_writer_options const& options, + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Builds settings to use for `write_orc_chunked()`. @@ -1287,8 +1291,10 @@ class orc_chunked_writer { * @brief Constructor with chunked writer options * * @param[in] options options used to write table + * @param[in] stream CUDA stream used for device memory operations and kernel launches */ - orc_chunked_writer(chunked_orc_writer_options const& options); + orc_chunked_writer(chunked_orc_writer_options const& options, + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Writes table to output. diff --git a/cpp/include/cudf/io/orc_metadata.hpp b/cpp/include/cudf/io/orc_metadata.hpp index 25e0c130dff..19d44263d1b 100644 --- a/cpp/include/cudf/io/orc_metadata.hpp +++ b/cpp/include/cudf/io/orc_metadata.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -63,10 +63,12 @@ struct raw_orc_statistics { * @endcode * * @param src_info Dataset source + * @param stream CUDA stream used for device memory operations and kernel launches * * @return Column names and encoded ORC statistics */ -raw_orc_statistics read_raw_orc_statistics(source_info const& src_info); +raw_orc_statistics read_raw_orc_statistics( + source_info const& src_info, rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Monostate type alias for the statistics variant. @@ -207,10 +209,12 @@ struct parsed_orc_statistics { * @ingroup io_readers * * @param src_info Dataset source + * @param stream CUDA stream used for device memory operations and kernel launches * * @return Column names and decoded ORC statistics */ -parsed_orc_statistics read_parsed_orc_statistics(source_info const& src_info); +parsed_orc_statistics read_parsed_orc_statistics( + source_info const& src_info, rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Schema of an ORC column, including the nested columns. @@ -368,10 +372,12 @@ class orc_metadata { * @ingroup io_readers * * @param src_info Dataset source + * @param stream CUDA stream used for device memory operations and kernel launches * * @return orc_metadata with ORC schema, number of rows and number of stripes. */ -orc_metadata read_orc_metadata(source_info const& src_info); +orc_metadata read_orc_metadata(source_info const& src_info, + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** @} */ // end of group } // namespace io diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index e5489963618..42f2fd02d52 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -269,9 +269,9 @@ void write_csv(csv_writer_options const& options, mr); } -raw_orc_statistics read_raw_orc_statistics(source_info const& src_info) +raw_orc_statistics read_raw_orc_statistics(source_info const& src_info, + rmm::cuda_stream_view stream) { - auto stream = cudf::get_default_stream(); // Get source to read statistics from std::unique_ptr source; if (src_info.type() == io_type::FILEPATH) { @@ -342,9 +342,10 @@ column_statistics::column_statistics(orc::column_statistics&& cs) } } -parsed_orc_statistics read_parsed_orc_statistics(source_info const& src_info) +parsed_orc_statistics read_parsed_orc_statistics(source_info const& src_info, + rmm::cuda_stream_view stream) { - auto const raw_stats = read_raw_orc_statistics(src_info); + auto const raw_stats = read_raw_orc_statistics(src_info, stream); parsed_orc_statistics result; result.column_names = raw_stats.column_names; @@ -395,12 +396,12 @@ orc_column_schema make_orc_column_schema(host_span orc_sc } }; // namespace -orc_metadata read_orc_metadata(source_info const& src_info) +orc_metadata read_orc_metadata(source_info const& src_info, rmm::cuda_stream_view stream) { auto sources = make_datasources(src_info); CUDF_EXPECTS(sources.size() == 1, "Only a single source is currently supported."); - auto const footer = orc::metadata(sources.front().get(), cudf::detail::default_stream_value).ff; + auto const footer = orc::metadata(sources.front().get(), stream).ff; return {{make_orc_column_schema(footer.types, 0, "")}, static_cast(footer.numberOfRows), @@ -410,21 +411,21 @@ orc_metadata read_orc_metadata(source_info const& src_info) /** * @copydoc cudf::io::read_orc */ -table_with_metadata read_orc(orc_reader_options const& options, rmm::mr::device_memory_resource* mr) +table_with_metadata read_orc(orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); auto datasources = make_datasources(options.get_source()); - auto reader = std::make_unique( - std::move(datasources), options, cudf::get_default_stream(), mr); - + auto reader = std::make_unique(std::move(datasources), options, stream, mr); return reader->read(options); } /** * @copydoc cudf::io::write_orc */ -void write_orc(orc_writer_options const& options) +void write_orc(orc_writer_options const& options, rmm::cuda_stream_view stream) { namespace io_detail = cudf::io::detail; @@ -434,8 +435,7 @@ void write_orc(orc_writer_options const& options) CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for ORC writing"); auto writer = std::make_unique( - std::move(sinks[0]), options, io_detail::single_write_mode::YES, cudf::get_default_stream()); - + std::move(sinks[0]), options, io_detail::single_write_mode::YES, stream); try { writer->write(options.get_table()); } catch (...) { @@ -451,7 +451,8 @@ void write_orc(orc_writer_options const& options) /** * @copydoc cudf::io::orc_chunked_writer::orc_chunked_writer */ -orc_chunked_writer::orc_chunked_writer(chunked_orc_writer_options const& options) +orc_chunked_writer::orc_chunked_writer(chunked_orc_writer_options const& options, + rmm::cuda_stream_view stream) { namespace io_detail = cudf::io::detail; @@ -459,7 +460,7 @@ orc_chunked_writer::orc_chunked_writer(chunked_orc_writer_options const& options CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for ORC writing"); writer = std::make_unique( - std::move(sinks[0]), options, io_detail::single_write_mode::NO, cudf::get_default_stream()); + std::move(sinks[0]), options, io_detail::single_write_mode::NO, stream); } /** diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index f7b805b68f5..b385c63e9cd 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -655,6 +655,7 @@ ConfigureTest(STREAM_INTEROP_TEST streams/interop_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_JSONIO_TEST streams/io/json_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_LISTS_TEST streams/lists_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_NULL_MASK_TEST streams/null_mask_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_ORCIO_TEST streams/io/orc_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_PARQUETIO_TEST streams/io/parquet_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_POOL_TEST streams/pool_test.cu STREAM_MODE testing) ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing) diff --git a/cpp/tests/streams/io/orc_test.cpp b/cpp/tests/streams/io/orc_test.cpp new file mode 100644 index 00000000000..929c3697b3b --- /dev/null +++ b/cpp/tests/streams/io/orc_test.cpp @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +auto const temp_env = static_cast( + ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment)); + +class ORCTest : public cudf::test::BaseFixture {}; + +template +std::vector> make_uniqueptrs_vector(UniqPtrs&&... uniqptrs) +{ + std::vector> ptrsvec; + (ptrsvec.push_back(std::forward(uniqptrs)), ...); + return ptrsvec; +} + +cudf::table construct_table() +{ + constexpr auto num_rows = 10; + + auto const zeros_iterator = thrust::make_constant_iterator(0); + auto const ones_iterator = thrust::make_constant_iterator(1); + + cudf::test::fixed_width_column_wrapper col0(zeros_iterator, zeros_iterator + num_rows); + cudf::test::fixed_width_column_wrapper col1(zeros_iterator, zeros_iterator + num_rows); + cudf::test::fixed_width_column_wrapper col2(zeros_iterator, zeros_iterator + num_rows); + cudf::test::fixed_width_column_wrapper col3(zeros_iterator, zeros_iterator + num_rows); + cudf::test::fixed_width_column_wrapper col4(zeros_iterator, zeros_iterator + num_rows); + cudf::test::fixed_width_column_wrapper col5(zeros_iterator, zeros_iterator + num_rows); + + cudf::test::fixed_width_column_wrapper col6 = [&ones_iterator, num_rows] { + auto col6_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { + return numeric::decimal128{ones_iterator[i], numeric::scale_type{12}}; + }); + return cudf::test::fixed_width_column_wrapper(col6_data, + col6_data + num_rows); + }(); + + cudf::test::fixed_width_column_wrapper col7 = [&ones_iterator, num_rows] { + auto col7_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { + return numeric::decimal128{ones_iterator[i], numeric::scale_type{-12}}; + }); + return cudf::test::fixed_width_column_wrapper(col7_data, + col7_data + num_rows); + }(); + + cudf::test::lists_column_wrapper col8 = [] { + auto col8_mask = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i % 2); }); + return cudf::test::lists_column_wrapper( + {{1, 1}, {1, 1, 1}, {}, {1}, {1, 1, 1, 1}, {1, 1, 1, 1, 1}, {}, {1, -1}, {}, {-1, -1}}, + col8_mask); + }(); + + cudf::test::structs_column_wrapper col9 = [&ones_iterator] { + auto child_col_mask = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i % 2); }); + cudf::test::fixed_width_column_wrapper child_col( + ones_iterator, ones_iterator + num_rows, child_col_mask); + return cudf::test::structs_column_wrapper{child_col}; + }(); + + cudf::test::strings_column_wrapper col10 = [] { + std::vector col10_data(num_rows, "rapids"); + return cudf::test::strings_column_wrapper(col10_data.begin(), col10_data.end()); + }(); + + auto colsptr = make_uniqueptrs_vector(col0.release(), + col1.release(), + col2.release(), + col3.release(), + col4.release(), + col5.release(), + col6.release(), + col7.release(), + col8.release(), + col9.release(), + col10.release()); + return cudf::table(std::move(colsptr)); +} + +TEST_F(ORCTest, ORCWriter) +{ + auto tab = construct_table(); + auto filepath = temp_env->get_temp_filepath("OrcMultiColumn.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, tab); + cudf::io::write_orc(out_opts, cudf::test::get_default_stream()); +} + +TEST_F(ORCTest, ORCReader) +{ + auto tab = construct_table(); + auto filepath = temp_env->get_temp_filepath("OrcMultiColumn.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, tab); + cudf::io::write_orc(out_opts, cudf::test::get_default_stream()); + + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{{filepath}}); + auto result = cudf::io::read_orc(read_opts, cudf::test::get_default_stream()); + + auto meta = read_orc_metadata(cudf::io::source_info{filepath}); + auto const stats = cudf::io::read_parsed_orc_statistics(cudf::io::source_info{filepath}); +} From c81198789be183e7e1eb288eb98dd16f65b57e44 Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Wed, 17 Jan 2024 13:17:35 -0600 Subject: [PATCH 6/6] Defer PTX file load to runtime (#13690) This PR fixes an issue where cuDF fails to import on machines with no NVIDIA GPU present. cc @shwina Authors: - https://github.com/brandon-b-miller - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) - Ashwin Srinath (https://github.com/shwina) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/13690 --- python/cudf/cudf/core/udf/utils.py | 16 +++++++++++----- python/cudf/cudf/tests/test_no_device.py | 16 ++++++++++++++++ python/cudf/cudf/tests/test_string_udfs.py | 6 ++++-- 3 files changed, 31 insertions(+), 7 deletions(-) create mode 100644 python/cudf/cudf/tests/test_no_device.py diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py index bd57db6b620..12baf1ea6d1 100644 --- a/python/cudf/cudf/core/udf/utils.py +++ b/python/cudf/cudf/core/udf/utils.py @@ -1,5 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. +import functools import os from typing import Any, Callable, Dict @@ -60,10 +61,15 @@ precompiled: cachetools.LRUCache = cachetools.LRUCache(maxsize=32) launch_arg_getters: Dict[Any, Any] = {} -_PTX_FILE = _get_ptx_file( - os.path.join(os.path.dirname(strings_udf.__file__), "..", "core", "udf"), - "shim_", -) + +@functools.cache +def _ptx_file(): + return _get_ptx_file( + os.path.join( + os.path.dirname(strings_udf.__file__), "..", "core", "udf" + ), + "shim_", + ) @_cudf_nvtx_annotate @@ -286,7 +292,7 @@ def _get_kernel(kernel_string, globals_, sig, func): exec(kernel_string, globals_) _kernel = globals_["_kernel"] kernel = cuda.jit( - sig, link=[_PTX_FILE], extensions=[str_view_arg_handler] + sig, link=[_ptx_file()], extensions=[str_view_arg_handler] )(_kernel) return kernel diff --git a/python/cudf/cudf/tests/test_no_device.py b/python/cudf/cudf/tests/test_no_device.py new file mode 100644 index 00000000000..722762b2d0c --- /dev/null +++ b/python/cudf/cudf/tests/test_no_device.py @@ -0,0 +1,16 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +import os +import subprocess + + +def test_cudf_import_no_device(): + env = os.environ.copy() + env["CUDA_VISIBLE_DEVICES"] = "-1" + output = subprocess.run( + ["python", "-c", "import cudf"], + env=env, + capture_output=True, + text=True, + cwd="/", + ) + assert output.returncode == 0 diff --git a/python/cudf/cudf/tests/test_string_udfs.py b/python/cudf/cudf/tests/test_string_udfs.py index 88c73ccf964..5dbb86fe27d 100644 --- a/python/cudf/cudf/tests/test_string_udfs.py +++ b/python/cudf/cudf/tests/test_string_udfs.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. import numba import numpy as np @@ -20,10 +20,12 @@ string_view, udf_string, ) -from cudf.core.udf.utils import _PTX_FILE, _get_extensionty_size +from cudf.core.udf.utils import _get_extensionty_size, _ptx_file from cudf.testing._utils import assert_eq, sv_to_udf_str from cudf.utils._numba import _CUDFNumbaConfig +_PTX_FILE = _ptx_file() + def get_kernels(func, dtype, size): """