From bac5812bceac2a0ab431e0705812a365ecb02bef Mon Sep 17 00:00:00 2001 From: davidwendt Date: Mon, 21 Jun 2021 16:19:16 -0400 Subject: [PATCH 1/2] Remove strings_count parameter from create_chars_child_column --- cpp/include/cudf/strings/detail/copy_if_else.cuh | 2 +- cpp/include/cudf/strings/detail/copy_range.cuh | 3 +-- cpp/include/cudf/strings/detail/gather.cuh | 2 +- cpp/include/cudf/strings/detail/merge.cuh | 2 +- .../cudf/strings/detail/strings_column_factories.cuh | 9 ++++----- cpp/include/cudf/strings/detail/utilities.cuh | 5 ++--- cpp/include/cudf/strings/detail/utilities.hpp | 3 +-- cpp/src/hash/md5_hash.cu | 7 +++---- cpp/src/io/csv/durations.cu | 7 +++---- cpp/src/replace/clamp.cu | 3 +-- cpp/src/replace/nulls.cu | 2 +- cpp/src/replace/replace.cu | 2 +- cpp/src/reshape/interleave_columns.cu | 2 +- cpp/src/strings/combine/join.cu | 2 +- cpp/src/strings/convert/convert_booleans.cu | 2 +- cpp/src/strings/convert/convert_datetime.cu | 2 +- cpp/src/strings/convert/convert_durations.cu | 2 +- cpp/src/strings/convert/convert_fixed_point.cu | 2 +- cpp/src/strings/convert/convert_floats.cu | 2 +- cpp/src/strings/convert/convert_integers.cu | 2 +- cpp/src/strings/convert/convert_ipv4.cu | 2 +- cpp/src/strings/convert/convert_urls.cu | 5 ++--- cpp/src/strings/filling/fill.cu | 2 +- cpp/src/strings/padding.cu | 4 ++-- cpp/src/strings/repeat_strings.cu | 2 +- cpp/src/strings/replace/replace.cu | 6 +++--- cpp/src/strings/utilities.cu | 3 +-- cpp/src/text/detokenize.cu | 5 ++--- cpp/src/text/generate_ngrams.cu | 3 +-- cpp/src/text/ngrams_tokenize.cu | 2 +- 30 files changed, 43 insertions(+), 54 deletions(-) diff --git a/cpp/include/cudf/strings/detail/copy_if_else.cuh b/cpp/include/cudf/strings/detail/copy_if_else.cuh index 6d9bd9af8da..1f0f09125e6 100644 --- a/cpp/include/cudf/strings/detail/copy_if_else.cuh +++ b/cpp/include/cudf/strings/detail/copy_if_else.cuh @@ -93,7 +93,7 @@ std::unique_ptr copy_if_else( // build chars column auto const bytes = cudf::detail::get_value(offsets_column->view(), strings_count, stream); - auto chars_column = create_chars_child_column(strings_count, bytes, stream, mr); + auto chars_column = create_chars_child_column(bytes, stream, mr); auto d_chars = chars_column->mutable_view().template data(); // fill in chars thrust::for_each_n( diff --git a/cpp/include/cudf/strings/detail/copy_range.cuh b/cpp/include/cudf/strings/detail/copy_range.cuh index c0fa74c4662..adf9d3c6525 100644 --- a/cpp/include/cudf/strings/detail/copy_range.cuh +++ b/cpp/include/cudf/strings/detail/copy_range.cuh @@ -183,8 +183,7 @@ std::unique_ptr copy_range( thrust::device_pointer_cast(p_offsets_column->view().template data()); auto const chars_bytes = cudf::detail::get_value(p_offsets_column->view(), target.size(), stream); - auto p_chars_column = - strings::detail::create_chars_child_column(target.size(), chars_bytes, stream, mr); + auto p_chars_column = strings::detail::create_chars_child_column(chars_bytes, stream, mr); // copy to the chars column diff --git a/cpp/include/cudf/strings/detail/gather.cuh b/cpp/include/cudf/strings/detail/gather.cuh index 9215f1f5a0f..718c1737890 100644 --- a/cpp/include/cudf/strings/detail/gather.cuh +++ b/cpp/include/cudf/strings/detail/gather.cuh @@ -229,7 +229,7 @@ std::unique_ptr gather_chars(StringIterator strings_begin, auto const output_count = std::distance(map_begin, map_end); if (output_count == 0) return make_empty_column(data_type{type_id::INT8}); - auto chars_column = create_chars_child_column(output_count, chars_bytes, stream, mr); + auto chars_column = create_chars_child_column(chars_bytes, stream, mr); auto const d_chars = chars_column->mutable_view().template data(); constexpr int warps_per_threadblock = 4; diff --git a/cpp/include/cudf/strings/detail/merge.cuh b/cpp/include/cudf/strings/detail/merge.cuh index cd9790b1545..e5ed03f34d6 100644 --- a/cpp/include/cudf/strings/detail/merge.cuh +++ b/cpp/include/cudf/strings/detail/merge.cuh @@ -83,7 +83,7 @@ std::unique_ptr merge(strings_column_view const& lhs, // create the chars column auto const bytes = cudf::detail::get_value(offsets_column->view(), strings_count, stream); - auto chars_column = strings::detail::create_chars_child_column(strings_count, bytes, stream, mr); + auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr); // merge the strings auto d_chars = chars_column->mutable_view().template data(); thrust::for_each_n(rmm::exec_policy(stream), diff --git a/cpp/include/cudf/strings/detail/strings_column_factories.cuh b/cpp/include/cudf/strings/detail/strings_column_factories.cuh index 7a6006a8292..1831034231e 100644 --- a/cpp/include/cudf/strings/detail/strings_column_factories.cuh +++ b/cpp/include/cudf/strings/detail/strings_column_factories.cuh @@ -111,10 +111,9 @@ std::unique_ptr make_strings_column(IndexPairIterator begin, mr); } else { // this approach is 2-3x faster for a large number of smaller string lengths - auto chars_column = - strings::detail::create_chars_child_column(strings_count, bytes, stream, mr); - auto d_chars = chars_column->mutable_view().template data(); - auto copy_chars = [d_chars] __device__(auto item) { + auto chars_column = create_chars_child_column(bytes, stream, mr); + auto d_chars = chars_column->mutable_view().template data(); + auto copy_chars = [d_chars] __device__(auto item) { string_index_pair const str = thrust::get<0>(item); size_type const offset = thrust::get<1>(item); if (str.first != nullptr) memcpy(d_chars + offset, str.first, str.second); @@ -182,7 +181,7 @@ std::unique_ptr make_strings_column(CharIterator chars_begin, [] __device__(auto offset) { return static_cast(offset); }); // build chars column - auto chars_column = strings::detail::create_chars_child_column(strings_count, bytes, stream, mr); + auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr); auto chars_view = chars_column->mutable_view(); thrust::copy(rmm::exec_policy(stream), chars_begin, chars_end, chars_view.data()); diff --git a/cpp/include/cudf/strings/detail/utilities.cuh b/cpp/include/cudf/strings/detail/utilities.cuh index 68ebb5dbe19..8758a28885f 100644 --- a/cpp/include/cudf/strings/detail/utilities.cuh +++ b/cpp/include/cudf/strings/detail/utilities.cuh @@ -166,8 +166,7 @@ auto make_strings_children( // Now build the chars column auto const bytes = cudf::detail::get_value(offsets_view, strings_count, stream); - std::unique_ptr chars_column = - create_chars_child_column(strings_count, bytes, stream, mr); + std::unique_ptr chars_column = create_chars_child_column(bytes, stream, mr); // Execute the function fn again to fill the chars column. // Note that if the output chars column has zero size, the function fn should not be called to @@ -261,7 +260,7 @@ make_strings_children_with_null_mask( // Now build the chars column auto const bytes = cudf::detail::get_value(offsets_view, strings_count, stream); - auto chars_column = create_chars_child_column(strings_count, bytes, stream, mr); + auto chars_column = create_chars_child_column(bytes, stream, mr); // Execute the function fn again to fill the chars column. // Note that if the output chars column has zero size, the function fn should not be called to diff --git a/cpp/include/cudf/strings/detail/utilities.hpp b/cpp/include/cudf/strings/detail/utilities.hpp index 0cee185068e..6424841ba86 100644 --- a/cpp/include/cudf/strings/detail/utilities.hpp +++ b/cpp/include/cudf/strings/detail/utilities.hpp @@ -27,16 +27,15 @@ namespace strings { namespace detail { /** * @brief Create a chars column to be a child of a strings column. + * * This will return the properly sized column to be filled in by the caller. * - * @param strings_count Number of strings in the column. * @param bytes Number of bytes for the chars column. * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return The chars child column for a strings column. */ std::unique_ptr create_chars_child_column( - size_type strings_count, size_type bytes, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 692c3ade6c6..80348ae7b51 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -64,10 +64,9 @@ std::unique_ptr md5_hash(table_view const& input, auto offsets_column = cudf::strings::detail::make_offsets_child_column(begin, begin + input.num_rows(), stream, mr); - auto chars_column = - strings::detail::create_chars_child_column(input.num_rows(), input.num_rows() * 32, stream, mr); - auto chars_view = chars_column->mutable_view(); - auto d_chars = chars_view.data(); + auto chars_column = strings::detail::create_chars_child_column(input.num_rows() * 32, stream, mr); + auto chars_view = chars_column->mutable_view(); + auto d_chars = chars_view.data(); rmm::device_buffer null_mask{0, stream, mr}; diff --git a/cpp/src/io/csv/durations.cu b/cpp/src/io/csv/durations.cu index 821b87c52e4..e14e3b45a2c 100644 --- a/cpp/src/io/csv/durations.cu +++ b/cpp/src/io/csv/durations.cu @@ -190,10 +190,9 @@ struct dispatch_from_durations_fn { // build chars column auto const chars_bytes = cudf::detail::get_value(offsets_column->view(), strings_count, stream); - auto chars_column = - strings::detail::create_chars_child_column(strings_count, chars_bytes, stream, mr); - auto chars_view = chars_column->mutable_view(); - auto d_chars = chars_view.template data(); + auto chars_column = strings::detail::create_chars_child_column(chars_bytes, stream, mr); + auto chars_view = chars_column->mutable_view(); + auto d_chars = chars_view.template data(); thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(0), diff --git a/cpp/src/replace/clamp.cu b/cpp/src/replace/clamp.cu index e476d813e65..f65f8148b64 100644 --- a/cpp/src/replace/clamp.cu +++ b/cpp/src/replace/clamp.cu @@ -67,8 +67,7 @@ std::pair, std::unique_ptr> form_offsets_and_cha // build chars column auto const bytes = cudf::detail::get_value(offsets_column->view(), strings_count, stream); - auto chars_column = - cudf::strings::detail::create_chars_child_column(strings_count, bytes, stream, mr); + auto chars_column = cudf::strings::detail::create_chars_child_column(bytes, stream, mr); return std::make_pair(std::move(offsets_column), std::move(chars_column)); } diff --git a/cpp/src/replace/nulls.cu b/cpp/src/replace/nulls.cu index 1cd2f326f44..9341d2f4097 100644 --- a/cpp/src/replace/nulls.cu +++ b/cpp/src/replace/nulls.cu @@ -254,7 +254,7 @@ std::unique_ptr replace_nulls_column_kernel_forwarder::operator()< // Allocate chars array and output null mask std::unique_ptr output_chars = - cudf::strings::detail::create_chars_child_column(input.size(), bytes, stream, mr); + cudf::strings::detail::create_chars_child_column(bytes, stream, mr); auto output_chars_view = output_chars->mutable_view(); diff --git a/cpp/src/replace/replace.cu b/cpp/src/replace/replace.cu index 016eb20a5dc..290e25a8181 100644 --- a/cpp/src/replace/replace.cu +++ b/cpp/src/replace/replace.cu @@ -419,7 +419,7 @@ std::unique_ptr replace_kernel_forwarder::operator() output_chars = - cudf::strings::detail::create_chars_child_column(input_col.size(), bytes, stream, mr); + cudf::strings::detail::create_chars_child_column(bytes, stream, mr); auto output_chars_view = output_chars->mutable_view(); auto device_chars = cudf::mutable_column_device_view::create(output_chars_view); diff --git a/cpp/src/reshape/interleave_columns.cu b/cpp/src/reshape/interleave_columns.cu index 328959732a0..7c9965ce6b9 100644 --- a/cpp/src/reshape/interleave_columns.cu +++ b/cpp/src/reshape/interleave_columns.cu @@ -104,7 +104,7 @@ struct interleave_columns_functor { // Create the chars column auto const bytes = cudf::detail::get_value(offsets_column->view(), num_strings, stream); - auto chars_column = strings::detail::create_chars_child_column(num_strings, bytes, stream, mr); + auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr); // Fill the chars column auto d_results_chars = chars_column->mutable_view().data(); thrust::for_each_n( diff --git a/cpp/src/strings/combine/join.cu b/cpp/src/strings/combine/join.cu index 97a4ebf9be4..5a69ac7b3d5 100644 --- a/cpp/src/strings/combine/join.cu +++ b/cpp/src/strings/combine/join.cu @@ -97,7 +97,7 @@ std::unique_ptr join_strings(strings_column_view const& strings, auto null_mask = null_count ? cudf::detail::create_null_mask(1, cudf::mask_state::ALL_NULL, stream, mr) : rmm::device_buffer{0, stream, mr}; - auto chars_column = detail::create_chars_child_column(strings_count, bytes, stream, mr); + auto chars_column = create_chars_child_column(bytes, stream, mr); auto d_chars = chars_column->mutable_view().data(); thrust::for_each_n( rmm::exec_policy(stream), diff --git a/cpp/src/strings/convert/convert_booleans.cu b/cpp/src/strings/convert/convert_booleans.cu index c69bb39bdae..8823b7cf416 100644 --- a/cpp/src/strings/convert/convert_booleans.cu +++ b/cpp/src/strings/convert/convert_booleans.cu @@ -125,7 +125,7 @@ std::unique_ptr from_booleans(column_view const& booleans, // build chars column auto const bytes = cudf::detail::get_value(offsets_column->view(), strings_count, stream); - auto chars_column = create_chars_child_column(strings_count, bytes, stream, mr); + auto chars_column = create_chars_child_column(bytes, stream, mr); auto d_chars = chars_column->mutable_view().data(); thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(0), diff --git a/cpp/src/strings/convert/convert_datetime.cu b/cpp/src/strings/convert/convert_datetime.cu index cc918305349..0ec13b3648b 100644 --- a/cpp/src/strings/convert/convert_datetime.cu +++ b/cpp/src/strings/convert/convert_datetime.cu @@ -958,7 +958,7 @@ std::unique_ptr from_timestamps(column_view const& timestamps, // build chars column auto const bytes = cudf::detail::get_value(offsets_column->view(), strings_count, stream); - auto chars_column = create_chars_child_column(strings_count, bytes, stream, mr); + auto chars_column = create_chars_child_column(bytes, stream, mr); auto d_chars = chars_column->mutable_view().template data(); // fill in chars column with timestamps // dispatcher is called to handle the different timestamp types diff --git a/cpp/src/strings/convert/convert_durations.cu b/cpp/src/strings/convert/convert_durations.cu index 82039ad7692..7e6769a869b 100644 --- a/cpp/src/strings/convert/convert_durations.cu +++ b/cpp/src/strings/convert/convert_durations.cu @@ -427,7 +427,7 @@ struct dispatch_from_durations_fn { // build chars column auto const chars_bytes = cudf::detail::get_value(offsets_column->view(), strings_count, stream); - auto chars_column = detail::create_chars_child_column(strings_count, chars_bytes, stream, mr); + auto chars_column = detail::create_chars_child_column(chars_bytes, stream, mr); auto d_chars = chars_column->mutable_view().template data(); thrust::for_each_n(rmm::exec_policy(stream), diff --git a/cpp/src/strings/convert/convert_fixed_point.cu b/cpp/src/strings/convert/convert_fixed_point.cu index 3b2616ebc4f..5e3ce0d31c3 100644 --- a/cpp/src/strings/convert/convert_fixed_point.cu +++ b/cpp/src/strings/convert/convert_fixed_point.cu @@ -396,7 +396,7 @@ struct dispatch_from_fixed_point_fn { // build chars column auto const bytes = cudf::detail::get_value(offsets_column->view(), input.size(), stream); - auto chars_column = detail::create_chars_child_column(input.size(), bytes, stream, mr); + auto chars_column = detail::create_chars_child_column(bytes, stream, mr); auto d_chars = chars_column->mutable_view().template data(); thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(0), diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index 05142e7e5f2..d4d6974cef5 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -492,7 +492,7 @@ struct dispatch_from_floats_fn { // build chars column auto const bytes = cudf::detail::get_value(offsets_view, strings_count, stream); - auto chars_column = detail::create_chars_child_column(strings_count, bytes, stream, mr); + auto chars_column = detail::create_chars_child_column(bytes, stream, mr); auto chars_view = chars_column->mutable_view(); auto d_chars = chars_view.template data(); thrust::for_each_n(rmm::exec_policy(stream), diff --git a/cpp/src/strings/convert/convert_integers.cu b/cpp/src/strings/convert/convert_integers.cu index 01da56d7254..ce06743de6c 100644 --- a/cpp/src/strings/convert/convert_integers.cu +++ b/cpp/src/strings/convert/convert_integers.cu @@ -350,7 +350,7 @@ struct dispatch_from_integers_fn { // build chars column auto const bytes = cudf::detail::get_value(offsets_view, strings_count, stream); - auto chars_column = detail::create_chars_child_column(strings_count, bytes, stream, mr); + auto chars_column = detail::create_chars_child_column(bytes, stream, mr); auto chars_view = chars_column->mutable_view(); auto d_chars = chars_view.template data(); thrust::for_each_n(rmm::exec_policy(stream), diff --git a/cpp/src/strings/convert/convert_ipv4.cu b/cpp/src/strings/convert/convert_ipv4.cu index 2984069ea23..d7b79547f29 100644 --- a/cpp/src/strings/convert/convert_ipv4.cu +++ b/cpp/src/strings/convert/convert_ipv4.cu @@ -192,7 +192,7 @@ std::unique_ptr integers_to_ipv4( // build chars column auto const bytes = cudf::detail::get_value(offsets_column->view(), strings_count, stream); - auto chars_column = create_chars_child_column(strings_count, bytes, stream, mr); + auto chars_column = create_chars_child_column(bytes, stream, mr); auto d_chars = chars_column->mutable_view().data(); thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(0), diff --git a/cpp/src/strings/convert/convert_urls.cu b/cpp/src/strings/convert/convert_urls.cu index ce145e8a413..33647c7b22f 100644 --- a/cpp/src/strings/convert/convert_urls.cu +++ b/cpp/src/strings/convert/convert_urls.cu @@ -142,7 +142,7 @@ std::unique_ptr url_encode( auto const bytes = cudf::detail::get_value(offsets_column->view(), strings_count, stream); // build chars column - auto chars_column = create_chars_child_column(strings_count, bytes, stream, mr); + auto chars_column = create_chars_child_column(bytes, stream, mr); auto d_chars = chars_column->mutable_view().data(); thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(0), @@ -392,8 +392,7 @@ std::unique_ptr url_decode( // create the chars column auto chars_column = - create_chars_child_column(strings_count, - chars_bytes - (esc_count * 2), // replacing 3 bytes with 1 + create_chars_child_column(chars_bytes - (esc_count * 2), // replacing 3 bytes with 1 stream, mr); auto d_out_chars = chars_column->mutable_view().data(); diff --git a/cpp/src/strings/filling/fill.cu b/cpp/src/strings/filling/fill.cu index 25e9f7a2412..7ee64e75ae8 100644 --- a/cpp/src/strings/filling/fill.cu +++ b/cpp/src/strings/filling/fill.cu @@ -80,7 +80,7 @@ std::unique_ptr fill( // create the chars column auto const bytes = cudf::detail::get_value(offsets_column->view(), strings_count, stream); - auto chars_column = strings::detail::create_chars_child_column(strings_count, bytes, stream, mr); + auto chars_column = create_chars_child_column(bytes, stream, mr); // fill the chars column auto d_chars = chars_column->mutable_view().data(); thrust::for_each_n( diff --git a/cpp/src/strings/padding.cu b/cpp/src/strings/padding.cu index bce2ee52c1c..253bf846993 100644 --- a/cpp/src/strings/padding.cu +++ b/cpp/src/strings/padding.cu @@ -82,7 +82,7 @@ std::unique_ptr pad( // build chars column auto const bytes = cudf::detail::get_value(offsets_column->view(), strings_count, stream); - auto chars_column = strings::detail::create_chars_child_column(strings_count, bytes, stream, mr); + auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr); auto d_chars = chars_column->mutable_view().data(); if (side == pad_side::LEFT) { @@ -170,7 +170,7 @@ std::unique_ptr zfill( // build chars column auto const bytes = cudf::detail::get_value(offsets_column->view(), strings_count, stream); - auto chars_column = strings::detail::create_chars_child_column(strings_count, bytes, stream, mr); + auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr); auto d_chars = chars_column->mutable_view().data(); thrust::for_each_n(rmm::exec_policy(stream), diff --git a/cpp/src/strings/repeat_strings.cu b/cpp/src/strings/repeat_strings.cu index 79ea94e8a06..dd91fe0e49d 100644 --- a/cpp/src/strings/repeat_strings.cu +++ b/cpp/src/strings/repeat_strings.cu @@ -73,7 +73,7 @@ auto generate_empty_output(strings_column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto chars_column = create_chars_child_column(strings_count, 0, stream, mr); + auto chars_column = create_chars_child_column(0, stream, mr); auto offsets_column = make_numeric_column( data_type{type_to_id()}, strings_count + 1, mask_state::UNALLOCATED, stream, mr); diff --git a/cpp/src/strings/replace/replace.cu b/cpp/src/strings/replace/replace.cu index 02e861433a9..4185e6db685 100644 --- a/cpp/src/strings/replace/replace.cu +++ b/cpp/src/strings/replace/replace.cu @@ -477,8 +477,8 @@ std::unique_ptr replace_char_parallel(strings_column_view const& strings offsets_update_fn); // build the characters column - auto chars_column = create_chars_child_column( - strings_count, chars_bytes + (delta_per_target * target_count), stream, mr); + auto chars_column = + create_chars_child_column(chars_bytes + (delta_per_target * target_count), stream, mr); auto d_out_chars = chars_column->mutable_view().data(); thrust::for_each_n( rmm::exec_policy(stream), @@ -819,7 +819,7 @@ std::unique_ptr replace_nulls(strings_column_view const& strings, // build chars column auto const bytes = cudf::detail::get_value(offsets_column->view(), strings_count, stream); - auto chars_column = strings::detail::create_chars_child_column(strings_count, bytes, stream, mr); + auto chars_column = create_chars_child_column(bytes, stream, mr); auto d_chars = chars_column->mutable_view().data(); thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(0), diff --git a/cpp/src/strings/utilities.cu b/cpp/src/strings/utilities.cu index 3326bcab82f..f16589e8146 100644 --- a/cpp/src/strings/utilities.cu +++ b/cpp/src/strings/utilities.cu @@ -100,8 +100,7 @@ std::unique_ptr child_chars_from_string_vector(cudf::device_span create_chars_child_column(cudf::size_type strings_count, - cudf::size_type total_bytes, +std::unique_ptr create_chars_child_column(cudf::size_type total_bytes, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/src/text/detokenize.cu b/cpp/src/text/detokenize.cu index 4be081bcf93..8402f4acf1b 100644 --- a/cpp/src/text/detokenize.cu +++ b/cpp/src/text/detokenize.cu @@ -174,9 +174,8 @@ std::unique_ptr detokenize(cudf::strings_column_view const& string // build the chars column - append each source token to the appropriate output row cudf::size_type const total_bytes = cudf::detail::get_value(offsets_column->view(), output_count, stream); - auto chars_column = - cudf::strings::detail::create_chars_child_column(output_count, total_bytes, stream, mr); - auto d_chars = chars_column->mutable_view().data(); + auto chars_column = cudf::strings::detail::create_chars_child_column(total_bytes, stream, mr); + auto d_chars = chars_column->mutable_view().data(); thrust::for_each_n( rmm::exec_policy(stream), thrust::make_counting_iterator(0), diff --git a/cpp/src/text/generate_ngrams.cu b/cpp/src/text/generate_ngrams.cu index 71ef0bac4f0..cab5a54a57d 100644 --- a/cpp/src/text/generate_ngrams.cu +++ b/cpp/src/text/generate_ngrams.cu @@ -243,8 +243,7 @@ std::unique_ptr generate_character_ngrams(cudf::strings_column_vie // build the chars column auto const chars_bytes = cudf::detail::get_value(offsets_column->view(), total_ngrams, stream); - auto chars_column = - cudf::strings::detail::create_chars_child_column(total_ngrams, chars_bytes, stream, mr); + auto chars_column = cudf::strings::detail::create_chars_child_column(chars_bytes, stream, mr); generator.d_chars = chars_column->mutable_view().data(); // output chars thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(0), diff --git a/cpp/src/text/ngrams_tokenize.cu b/cpp/src/text/ngrams_tokenize.cu index 36136ef89fa..3b0945f05b9 100644 --- a/cpp/src/text/ngrams_tokenize.cu +++ b/cpp/src/text/ngrams_tokenize.cu @@ -220,7 +220,7 @@ std::unique_ptr ngrams_tokenize( // build chars column auto chars_column = - cudf::strings::detail::create_chars_child_column(strings_count, output_chars_size, stream, mr); + cudf::strings::detail::create_chars_child_column(output_chars_size, stream, mr); auto d_chars = chars_column->mutable_view().data(); // Generate the ngrams into the chars column data buffer. // The ngram_builder_fn functor also fills the d_ngram_sizes vector with the From e12a715e52c7853d373cd7c5f75d404c0e9370aa Mon Sep 17 00:00:00 2001 From: davidwendt Date: Mon, 21 Jun 2021 19:23:36 -0400 Subject: [PATCH 2/2] add utility call to json_path and concatenate --- cpp/src/strings/copying/concatenate.cu | 6 +++--- cpp/src/strings/json/json_path.cu | 4 ++-- cpp/src/strings/utilities.cu | 3 +-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/cpp/src/strings/copying/concatenate.cu b/cpp/src/strings/copying/concatenate.cu index 7d06d773519..866ff1adbc6 100644 --- a/cpp/src/strings/copying/concatenate.cu +++ b/cpp/src/strings/copying/concatenate.cu @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -226,9 +227,8 @@ std::unique_ptr concatenate(host_span columns, std::any_of(columns.begin(), columns.end(), [](auto const& col) { return col.has_nulls(); }); // create chars column - auto chars_column = - make_numeric_column(data_type{type_id::INT8}, total_bytes, mask_state::UNALLOCATED, stream, mr); - auto d_new_chars = chars_column->mutable_view().data(); + auto chars_column = create_chars_child_column(total_bytes, stream, mr); + auto d_new_chars = chars_column->mutable_view().data(); chars_column->set_null_count(0); // create offsets column diff --git a/cpp/src/strings/json/json_path.cu b/cpp/src/strings/json/json_path.cu index 8de9915a668..dfdd3226844 100644 --- a/cpp/src/strings/json/json_path.cu +++ b/cpp/src/strings/json/json_path.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -964,8 +965,7 @@ std::unique_ptr get_json_object(cudf::strings_column_view const& c cudf::detail::get_value(offsets_view, col.size(), stream); // allocate output string column - auto chars = cudf::make_fixed_width_column( - data_type{type_id::INT8}, output_size, mask_state::UNALLOCATED, stream, mr); + auto chars = create_chars_child_column(output_size, stream, mr); // potential optimization : if we know that all outputs are valid, we could skip creating // the validity mask altogether diff --git a/cpp/src/strings/utilities.cu b/cpp/src/strings/utilities.cu index f16589e8146..cfe51824540 100644 --- a/cpp/src/strings/utilities.cu +++ b/cpp/src/strings/utilities.cu @@ -84,8 +84,7 @@ std::unique_ptr child_chars_from_string_vector(cudf::device_span(); // create column - auto chars_column = - make_numeric_column(data_type{type_id::INT8}, bytes, mask_state::UNALLOCATED, stream, mr); + auto chars_column = create_chars_child_column(bytes, stream, mr); // get it's view auto d_chars = chars_column->mutable_view().data(); thrust::for_each_n(rmm::exec_policy(stream),