Skip to content

Commit

Permalink
Remove strings_count parameter from cudf::strings::detail::create_cha…
Browse files Browse the repository at this point in the history
…rs_child_column (#8576)

Closes #8559 

The `strings_count` parameter was necessary in a previous version of the `cudf::strings::detail::create_chars_child_column` but has become obsolete. This PR removes the parameter and fixes up the calls to the utility.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - Karthikeyan (https://github.com/karthikeyann)
  - Nghia Truong (https://github.com/ttnghia)

URL: #8576
  • Loading branch information
davidwendt authored Jun 22, 2021
1 parent 681c40e commit 15cd690
Show file tree
Hide file tree
Showing 32 changed files with 49 additions and 61 deletions.
2 changes: 1 addition & 1 deletion cpp/include/cudf/strings/detail/copy_if_else.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ std::unique_ptr<cudf::column> copy_if_else(
// build chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().template data<char>();
// fill in chars
thrust::for_each_n(
Expand Down
3 changes: 1 addition & 2 deletions cpp/include/cudf/strings/detail/copy_range.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,7 @@ std::unique_ptr<column> copy_range(
thrust::device_pointer_cast(p_offsets_column->view().template data<size_type>());
auto const chars_bytes =
cudf::detail::get_value<int32_t>(p_offsets_column->view(), target.size(), stream);
auto p_chars_column =
strings::detail::create_chars_child_column(target.size(), chars_bytes, stream, mr);
auto p_chars_column = strings::detail::create_chars_child_column(chars_bytes, stream, mr);

// copy to the chars column

Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/strings/detail/gather.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ std::unique_ptr<cudf::column> gather_chars(StringIterator strings_begin,
auto const output_count = std::distance(map_begin, map_end);
if (output_count == 0) return make_empty_column(data_type{type_id::INT8});

auto chars_column = create_chars_child_column(output_count, chars_bytes, stream, mr);
auto chars_column = create_chars_child_column(chars_bytes, stream, mr);
auto const d_chars = chars_column->mutable_view().template data<char>();

constexpr int warps_per_threadblock = 4;
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/strings/detail/merge.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ std::unique_ptr<column> merge(strings_column_view const& lhs,
// create the chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = strings::detail::create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr);
// merge the strings
auto d_chars = chars_column->mutable_view().template data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
Expand Down
9 changes: 4 additions & 5 deletions cpp/include/cudf/strings/detail/strings_column_factories.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,9 @@ std::unique_ptr<column> make_strings_column(IndexPairIterator begin,
mr);
} else {
// this approach is 2-3x faster for a large number of smaller string lengths
auto chars_column =
strings::detail::create_chars_child_column(strings_count, bytes, stream, mr);
auto d_chars = chars_column->mutable_view().template data<char>();
auto copy_chars = [d_chars] __device__(auto item) {
auto chars_column = create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().template data<char>();
auto copy_chars = [d_chars] __device__(auto item) {
string_index_pair const str = thrust::get<0>(item);
size_type const offset = thrust::get<1>(item);
if (str.first != nullptr) memcpy(d_chars + offset, str.first, str.second);
Expand Down Expand Up @@ -182,7 +181,7 @@ std::unique_ptr<column> make_strings_column(CharIterator chars_begin,
[] __device__(auto offset) { return static_cast<int32_t>(offset); });

// build chars column
auto chars_column = strings::detail::create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr);
auto chars_view = chars_column->mutable_view();
thrust::copy(rmm::exec_policy(stream), chars_begin, chars_end, chars_view.data<char>());

Expand Down
5 changes: 2 additions & 3 deletions cpp/include/cudf/strings/detail/utilities.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,7 @@ auto make_strings_children(

// Now build the chars column
auto const bytes = cudf::detail::get_value<int32_t>(offsets_view, strings_count, stream);
std::unique_ptr<column> chars_column =
create_chars_child_column(strings_count, bytes, stream, mr);
std::unique_ptr<column> chars_column = create_chars_child_column(bytes, stream, mr);

// Execute the function fn again to fill the chars column.
// Note that if the output chars column has zero size, the function fn should not be called to
Expand Down Expand Up @@ -261,7 +260,7 @@ make_strings_children_with_null_mask(

// Now build the chars column
auto const bytes = cudf::detail::get_value<int32_t>(offsets_view, strings_count, stream);
auto chars_column = create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = create_chars_child_column(bytes, stream, mr);

// Execute the function fn again to fill the chars column.
// Note that if the output chars column has zero size, the function fn should not be called to
Expand Down
3 changes: 1 addition & 2 deletions cpp/include/cudf/strings/detail/utilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,15 @@ namespace strings {
namespace detail {
/**
* @brief Create a chars column to be a child of a strings column.
*
* This will return the properly sized column to be filled in by the caller.
*
* @param strings_count Number of strings in the column.
* @param bytes Number of bytes for the chars column.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return The chars child column for a strings column.
*/
std::unique_ptr<column> create_chars_child_column(
size_type strings_count,
size_type bytes,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
Expand Down
7 changes: 3 additions & 4 deletions cpp/src/hash/md5_hash.cu
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,9 @@ std::unique_ptr<column> md5_hash(table_view const& input,
auto offsets_column =
cudf::strings::detail::make_offsets_child_column(begin, begin + input.num_rows(), stream, mr);

auto chars_column =
strings::detail::create_chars_child_column(input.num_rows(), input.num_rows() * 32, stream, mr);
auto chars_view = chars_column->mutable_view();
auto d_chars = chars_view.data<char>();
auto chars_column = strings::detail::create_chars_child_column(input.num_rows() * 32, stream, mr);
auto chars_view = chars_column->mutable_view();
auto d_chars = chars_view.data<char>();

rmm::device_buffer null_mask{0, stream, mr};

Expand Down
7 changes: 3 additions & 4 deletions cpp/src/io/csv/durations.cu
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,9 @@ struct dispatch_from_durations_fn {
// build chars column
auto const chars_bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column =
strings::detail::create_chars_child_column(strings_count, chars_bytes, stream, mr);
auto chars_view = chars_column->mutable_view();
auto d_chars = chars_view.template data<char>();
auto chars_column = strings::detail::create_chars_child_column(chars_bytes, stream, mr);
auto chars_view = chars_column->mutable_view();
auto d_chars = chars_view.template data<char>();

thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
Expand Down
3 changes: 1 addition & 2 deletions cpp/src/replace/clamp.cu
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,7 @@ std::pair<std::unique_ptr<column>, std::unique_ptr<column>> form_offsets_and_cha
// build chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column =
cudf::strings::detail::create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = cudf::strings::detail::create_chars_child_column(bytes, stream, mr);

return std::make_pair(std::move(offsets_column), std::move(chars_column));
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/replace/nulls.cu
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ std::unique_ptr<cudf::column> replace_nulls_column_kernel_forwarder::operator()<

// Allocate chars array and output null mask
std::unique_ptr<cudf::column> output_chars =
cudf::strings::detail::create_chars_child_column(input.size(), bytes, stream, mr);
cudf::strings::detail::create_chars_child_column(bytes, stream, mr);

auto output_chars_view = output_chars->mutable_view();

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/replace/replace.cu
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ std::unique_ptr<cudf::column> replace_kernel_forwarder::operator()<cudf::string_
// Allocate chars array and output null mask
cudf::size_type null_count = input_col.size() - valid_counter.value(stream);
std::unique_ptr<cudf::column> output_chars =
cudf::strings::detail::create_chars_child_column(input_col.size(), bytes, stream, mr);
cudf::strings::detail::create_chars_child_column(bytes, stream, mr);

auto output_chars_view = output_chars->mutable_view();
auto device_chars = cudf::mutable_column_device_view::create(output_chars_view);
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/reshape/interleave_columns.cu
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ struct interleave_columns_functor {
// Create the chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), num_strings, stream);
auto chars_column = strings::detail::create_chars_child_column(num_strings, bytes, stream, mr);
auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr);
// Fill the chars column
auto d_results_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/combine/join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ std::unique_ptr<column> join_strings(strings_column_view const& strings,
auto null_mask = null_count
? cudf::detail::create_null_mask(1, cudf::mask_state::ALL_NULL, stream, mr)
: rmm::device_buffer{0, stream, mr};
auto chars_column = detail::create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(
rmm::exec_policy(stream),
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/convert/convert_booleans.cu
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ std::unique_ptr<column> from_booleans(column_view const& booleans,
// build chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/convert/convert_datetime.cu
Original file line number Diff line number Diff line change
Expand Up @@ -958,7 +958,7 @@ std::unique_ptr<column> from_timestamps(column_view const& timestamps,
// build chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().template data<char>();
// fill in chars column with timestamps
// dispatcher is called to handle the different timestamp types
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/convert/convert_durations.cu
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ struct dispatch_from_durations_fn {
// build chars column
auto const chars_bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = detail::create_chars_child_column(strings_count, chars_bytes, stream, mr);
auto chars_column = detail::create_chars_child_column(chars_bytes, stream, mr);
auto d_chars = chars_column->mutable_view().template data<char>();

thrust::for_each_n(rmm::exec_policy(stream),
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/convert/convert_fixed_point.cu
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ struct dispatch_from_fixed_point_fn {
// build chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), input.size(), stream);
auto chars_column = detail::create_chars_child_column(input.size(), bytes, stream, mr);
auto chars_column = detail::create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().template data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/convert/convert_floats.cu
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ struct dispatch_from_floats_fn {

// build chars column
auto const bytes = cudf::detail::get_value<int32_t>(offsets_view, strings_count, stream);
auto chars_column = detail::create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = detail::create_chars_child_column(bytes, stream, mr);
auto chars_view = chars_column->mutable_view();
auto d_chars = chars_view.template data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/convert/convert_integers.cu
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ struct dispatch_from_integers_fn {

// build chars column
auto const bytes = cudf::detail::get_value<int32_t>(offsets_view, strings_count, stream);
auto chars_column = detail::create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = detail::create_chars_child_column(bytes, stream, mr);
auto chars_view = chars_column->mutable_view();
auto d_chars = chars_view.template data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/convert/convert_ipv4.cu
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ std::unique_ptr<column> integers_to_ipv4(
// build chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
Expand Down
5 changes: 2 additions & 3 deletions cpp/src/strings/convert/convert_urls.cu
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ std::unique_ptr<column> url_encode(
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
// build chars column
auto chars_column = create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
Expand Down Expand Up @@ -392,8 +392,7 @@ std::unique_ptr<column> url_decode(

// create the chars column
auto chars_column =
create_chars_child_column(strings_count,
chars_bytes - (esc_count * 2), // replacing 3 bytes with 1
create_chars_child_column(chars_bytes - (esc_count * 2), // replacing 3 bytes with 1
stream,
mr);
auto d_out_chars = chars_column->mutable_view().data<char>();
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/strings/copying/concatenate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/strings/detail/concatenate.hpp>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/table/table_device_view.cuh>

Expand Down Expand Up @@ -226,9 +227,8 @@ std::unique_ptr<column> concatenate(host_span<column_view const> columns,
std::any_of(columns.begin(), columns.end(), [](auto const& col) { return col.has_nulls(); });

// create chars column
auto chars_column =
make_numeric_column(data_type{type_id::INT8}, total_bytes, mask_state::UNALLOCATED, stream, mr);
auto d_new_chars = chars_column->mutable_view().data<char>();
auto chars_column = create_chars_child_column(total_bytes, stream, mr);
auto d_new_chars = chars_column->mutable_view().data<char>();
chars_column->set_null_count(0);

// create offsets column
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/filling/fill.cu
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ std::unique_ptr<column> fill(
// create the chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = strings::detail::create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = create_chars_child_column(bytes, stream, mr);
// fill the chars column
auto d_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/strings/json/json_path.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/json.hpp>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>
Expand Down Expand Up @@ -964,8 +965,7 @@ std::unique_ptr<cudf::column> get_json_object(cudf::strings_column_view const& c
cudf::detail::get_value<offset_type>(offsets_view, col.size(), stream);

// allocate output string column
auto chars = cudf::make_fixed_width_column(
data_type{type_id::INT8}, output_size, mask_state::UNALLOCATED, stream, mr);
auto chars = create_chars_child_column(output_size, stream, mr);

// potential optimization : if we know that all outputs are valid, we could skip creating
// the validity mask altogether
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/strings/padding.cu
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ std::unique_ptr<column> pad(
// build chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = strings::detail::create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().data<char>();

if (side == pad_side::LEFT) {
Expand Down Expand Up @@ -170,7 +170,7 @@ std::unique_ptr<column> zfill(
// build chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = strings::detail::create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().data<char>();

thrust::for_each_n(rmm::exec_policy(stream),
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/repeat_strings.cu
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ auto generate_empty_output(strings_column_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto chars_column = create_chars_child_column(strings_count, 0, stream, mr);
auto chars_column = create_chars_child_column(0, stream, mr);

auto offsets_column = make_numeric_column(
data_type{type_to_id<offset_type>()}, strings_count + 1, mask_state::UNALLOCATED, stream, mr);
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/strings/replace/replace.cu
Original file line number Diff line number Diff line change
Expand Up @@ -477,8 +477,8 @@ std::unique_ptr<column> replace_char_parallel(strings_column_view const& strings
offsets_update_fn);

// build the characters column
auto chars_column = create_chars_child_column(
strings_count, chars_bytes + (delta_per_target * target_count), stream, mr);
auto chars_column =
create_chars_child_column(chars_bytes + (delta_per_target * target_count), stream, mr);
auto d_out_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(
rmm::exec_policy(stream),
Expand Down Expand Up @@ -819,7 +819,7 @@ std::unique_ptr<column> replace_nulls(strings_column_view const& strings,
// build chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = strings::detail::create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
Expand Down
6 changes: 2 additions & 4 deletions cpp/src/strings/utilities.cu
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,7 @@ std::unique_ptr<cudf::column> child_chars_from_string_vector(cudf::device_span<s
auto const d_offsets = offsets.data<int32_t>();

// create column
auto chars_column =
make_numeric_column(data_type{type_id::INT8}, bytes, mask_state::UNALLOCATED, stream, mr);
auto chars_column = create_chars_child_column(bytes, stream, mr);
// get it's view
auto d_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
Expand All @@ -100,8 +99,7 @@ std::unique_ptr<cudf::column> child_chars_from_string_vector(cudf::device_span<s
}

//
std::unique_ptr<column> create_chars_child_column(cudf::size_type strings_count,
cudf::size_type total_bytes,
std::unique_ptr<column> create_chars_child_column(cudf::size_type total_bytes,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
Expand Down
Loading

0 comments on commit 15cd690

Please sign in to comment.