Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove strings_count parameter from cudf::strings::detail::create_chars_child_column #8576

Merged
merged 2 commits into from
Jun 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpp/include/cudf/strings/detail/copy_if_else.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ std::unique_ptr<cudf::column> copy_if_else(
// build chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().template data<char>();
// fill in chars
thrust::for_each_n(
Expand Down
3 changes: 1 addition & 2 deletions cpp/include/cudf/strings/detail/copy_range.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,7 @@ std::unique_ptr<column> copy_range(
thrust::device_pointer_cast(p_offsets_column->view().template data<size_type>());
auto const chars_bytes =
cudf::detail::get_value<int32_t>(p_offsets_column->view(), target.size(), stream);
auto p_chars_column =
strings::detail::create_chars_child_column(target.size(), chars_bytes, stream, mr);
auto p_chars_column = strings::detail::create_chars_child_column(chars_bytes, stream, mr);

// copy to the chars column

Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/strings/detail/gather.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ std::unique_ptr<cudf::column> gather_chars(StringIterator strings_begin,
auto const output_count = std::distance(map_begin, map_end);
if (output_count == 0) return make_empty_column(data_type{type_id::INT8});

auto chars_column = create_chars_child_column(output_count, chars_bytes, stream, mr);
auto chars_column = create_chars_child_column(chars_bytes, stream, mr);
auto const d_chars = chars_column->mutable_view().template data<char>();

constexpr int warps_per_threadblock = 4;
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/strings/detail/merge.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ std::unique_ptr<column> merge(strings_column_view const& lhs,
// create the chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = strings::detail::create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr);
// merge the strings
auto d_chars = chars_column->mutable_view().template data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,9 @@ std::unique_ptr<column> make_strings_column(IndexPairIterator begin,
mr);
} else {
// this approach is 2-3x faster for a large number of smaller string lengths
auto chars_column =
strings::detail::create_chars_child_column(strings_count, bytes, stream, mr);
auto d_chars = chars_column->mutable_view().template data<char>();
auto copy_chars = [d_chars] __device__(auto item) {
auto chars_column = create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().template data<char>();
auto copy_chars = [d_chars] __device__(auto item) {
string_index_pair const str = thrust::get<0>(item);
size_type const offset = thrust::get<1>(item);
if (str.first != nullptr) memcpy(d_chars + offset, str.first, str.second);
Expand Down Expand Up @@ -182,7 +181,7 @@ std::unique_ptr<column> make_strings_column(CharIterator chars_begin,
[] __device__(auto offset) { return static_cast<int32_t>(offset); });

// build chars column
auto chars_column = strings::detail::create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr);
auto chars_view = chars_column->mutable_view();
thrust::copy(rmm::exec_policy(stream), chars_begin, chars_end, chars_view.data<char>());

Expand Down
5 changes: 2 additions & 3 deletions cpp/include/cudf/strings/detail/utilities.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,7 @@ auto make_strings_children(

// Now build the chars column
auto const bytes = cudf::detail::get_value<int32_t>(offsets_view, strings_count, stream);
std::unique_ptr<column> chars_column =
create_chars_child_column(strings_count, bytes, stream, mr);
std::unique_ptr<column> chars_column = create_chars_child_column(bytes, stream, mr);

// Execute the function fn again to fill the chars column.
// Note that if the output chars column has zero size, the function fn should not be called to
Expand Down Expand Up @@ -261,7 +260,7 @@ make_strings_children_with_null_mask(

// Now build the chars column
auto const bytes = cudf::detail::get_value<int32_t>(offsets_view, strings_count, stream);
auto chars_column = create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = create_chars_child_column(bytes, stream, mr);

// Execute the function fn again to fill the chars column.
// Note that if the output chars column has zero size, the function fn should not be called to
Expand Down
3 changes: 1 addition & 2 deletions cpp/include/cudf/strings/detail/utilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,15 @@ namespace strings {
namespace detail {
/**
* @brief Create a chars column to be a child of a strings column.
*
* This will return the properly sized column to be filled in by the caller.
*
* @param strings_count Number of strings in the column.
* @param bytes Number of bytes for the chars column.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return The chars child column for a strings column.
*/
std::unique_ptr<column> create_chars_child_column(
size_type strings_count,
size_type bytes,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
Expand Down
7 changes: 3 additions & 4 deletions cpp/src/hash/md5_hash.cu
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,9 @@ std::unique_ptr<column> md5_hash(table_view const& input,
auto offsets_column =
cudf::strings::detail::make_offsets_child_column(begin, begin + input.num_rows(), stream, mr);

auto chars_column =
strings::detail::create_chars_child_column(input.num_rows(), input.num_rows() * 32, stream, mr);
auto chars_view = chars_column->mutable_view();
auto d_chars = chars_view.data<char>();
auto chars_column = strings::detail::create_chars_child_column(input.num_rows() * 32, stream, mr);
auto chars_view = chars_column->mutable_view();
auto d_chars = chars_view.data<char>();

rmm::device_buffer null_mask{0, stream, mr};

Expand Down
7 changes: 3 additions & 4 deletions cpp/src/io/csv/durations.cu
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,9 @@ struct dispatch_from_durations_fn {
// build chars column
auto const chars_bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column =
strings::detail::create_chars_child_column(strings_count, chars_bytes, stream, mr);
auto chars_view = chars_column->mutable_view();
auto d_chars = chars_view.template data<char>();
auto chars_column = strings::detail::create_chars_child_column(chars_bytes, stream, mr);
auto chars_view = chars_column->mutable_view();
auto d_chars = chars_view.template data<char>();

thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
Expand Down
3 changes: 1 addition & 2 deletions cpp/src/replace/clamp.cu
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,7 @@ std::pair<std::unique_ptr<column>, std::unique_ptr<column>> form_offsets_and_cha
// build chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column =
cudf::strings::detail::create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = cudf::strings::detail::create_chars_child_column(bytes, stream, mr);

return std::make_pair(std::move(offsets_column), std::move(chars_column));
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/replace/nulls.cu
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ std::unique_ptr<cudf::column> replace_nulls_column_kernel_forwarder::operator()<

// Allocate chars array and output null mask
std::unique_ptr<cudf::column> output_chars =
cudf::strings::detail::create_chars_child_column(input.size(), bytes, stream, mr);
cudf::strings::detail::create_chars_child_column(bytes, stream, mr);

auto output_chars_view = output_chars->mutable_view();

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/replace/replace.cu
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ std::unique_ptr<cudf::column> replace_kernel_forwarder::operator()<cudf::string_
// Allocate chars array and output null mask
cudf::size_type null_count = input_col.size() - valid_counter.value(stream);
std::unique_ptr<cudf::column> output_chars =
cudf::strings::detail::create_chars_child_column(input_col.size(), bytes, stream, mr);
cudf::strings::detail::create_chars_child_column(bytes, stream, mr);

auto output_chars_view = output_chars->mutable_view();
auto device_chars = cudf::mutable_column_device_view::create(output_chars_view);
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/reshape/interleave_columns.cu
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ struct interleave_columns_functor {
// Create the chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), num_strings, stream);
auto chars_column = strings::detail::create_chars_child_column(num_strings, bytes, stream, mr);
auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr);
// Fill the chars column
auto d_results_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/combine/join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ std::unique_ptr<column> join_strings(strings_column_view const& strings,
auto null_mask = null_count
? cudf::detail::create_null_mask(1, cudf::mask_state::ALL_NULL, stream, mr)
: rmm::device_buffer{0, stream, mr};
auto chars_column = detail::create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(
rmm::exec_policy(stream),
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/convert/convert_booleans.cu
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ std::unique_ptr<column> from_booleans(column_view const& booleans,
// build chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/convert/convert_datetime.cu
Original file line number Diff line number Diff line change
Expand Up @@ -958,7 +958,7 @@ std::unique_ptr<column> from_timestamps(column_view const& timestamps,
// build chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().template data<char>();
// fill in chars column with timestamps
// dispatcher is called to handle the different timestamp types
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/convert/convert_durations.cu
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ struct dispatch_from_durations_fn {
// build chars column
auto const chars_bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = detail::create_chars_child_column(strings_count, chars_bytes, stream, mr);
auto chars_column = detail::create_chars_child_column(chars_bytes, stream, mr);
auto d_chars = chars_column->mutable_view().template data<char>();

thrust::for_each_n(rmm::exec_policy(stream),
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/convert/convert_fixed_point.cu
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ struct dispatch_from_fixed_point_fn {
// build chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), input.size(), stream);
auto chars_column = detail::create_chars_child_column(input.size(), bytes, stream, mr);
auto chars_column = detail::create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().template data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/convert/convert_floats.cu
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ struct dispatch_from_floats_fn {

// build chars column
auto const bytes = cudf::detail::get_value<int32_t>(offsets_view, strings_count, stream);
auto chars_column = detail::create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = detail::create_chars_child_column(bytes, stream, mr);
auto chars_view = chars_column->mutable_view();
auto d_chars = chars_view.template data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/convert/convert_integers.cu
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ struct dispatch_from_integers_fn {

// build chars column
auto const bytes = cudf::detail::get_value<int32_t>(offsets_view, strings_count, stream);
auto chars_column = detail::create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = detail::create_chars_child_column(bytes, stream, mr);
auto chars_view = chars_column->mutable_view();
auto d_chars = chars_view.template data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/convert/convert_ipv4.cu
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ std::unique_ptr<column> integers_to_ipv4(
// build chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
Expand Down
5 changes: 2 additions & 3 deletions cpp/src/strings/convert/convert_urls.cu
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ std::unique_ptr<column> url_encode(
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
// build chars column
auto chars_column = create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
Expand Down Expand Up @@ -392,8 +392,7 @@ std::unique_ptr<column> url_decode(

// create the chars column
auto chars_column =
create_chars_child_column(strings_count,
chars_bytes - (esc_count * 2), // replacing 3 bytes with 1
create_chars_child_column(chars_bytes - (esc_count * 2), // replacing 3 bytes with 1
stream,
mr);
auto d_out_chars = chars_column->mutable_view().data<char>();
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/strings/copying/concatenate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/strings/detail/concatenate.hpp>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/table/table_device_view.cuh>

Expand Down Expand Up @@ -226,9 +227,8 @@ std::unique_ptr<column> concatenate(host_span<column_view const> columns,
std::any_of(columns.begin(), columns.end(), [](auto const& col) { return col.has_nulls(); });

// create chars column
auto chars_column =
make_numeric_column(data_type{type_id::INT8}, total_bytes, mask_state::UNALLOCATED, stream, mr);
auto d_new_chars = chars_column->mutable_view().data<char>();
auto chars_column = create_chars_child_column(total_bytes, stream, mr);
auto d_new_chars = chars_column->mutable_view().data<char>();
chars_column->set_null_count(0);

// create offsets column
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/filling/fill.cu
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ std::unique_ptr<column> fill(
// create the chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = strings::detail::create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = create_chars_child_column(bytes, stream, mr);
// fill the chars column
auto d_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/strings/json/json_path.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/json.hpp>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>
Expand Down Expand Up @@ -964,8 +965,7 @@ std::unique_ptr<cudf::column> get_json_object(cudf::strings_column_view const& c
cudf::detail::get_value<offset_type>(offsets_view, col.size(), stream);

// allocate output string column
auto chars = cudf::make_fixed_width_column(
data_type{type_id::INT8}, output_size, mask_state::UNALLOCATED, stream, mr);
auto chars = create_chars_child_column(output_size, stream, mr);

// potential optimization : if we know that all outputs are valid, we could skip creating
// the validity mask altogether
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/strings/padding.cu
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ std::unique_ptr<column> pad(
// build chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = strings::detail::create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().data<char>();

if (side == pad_side::LEFT) {
Expand Down Expand Up @@ -170,7 +170,7 @@ std::unique_ptr<column> zfill(
// build chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = strings::detail::create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().data<char>();

thrust::for_each_n(rmm::exec_policy(stream),
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/repeat_strings.cu
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ auto generate_empty_output(strings_column_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto chars_column = create_chars_child_column(strings_count, 0, stream, mr);
auto chars_column = create_chars_child_column(0, stream, mr);

auto offsets_column = make_numeric_column(
data_type{type_to_id<offset_type>()}, strings_count + 1, mask_state::UNALLOCATED, stream, mr);
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/strings/replace/replace.cu
Original file line number Diff line number Diff line change
Expand Up @@ -477,8 +477,8 @@ std::unique_ptr<column> replace_char_parallel(strings_column_view const& strings
offsets_update_fn);

// build the characters column
auto chars_column = create_chars_child_column(
strings_count, chars_bytes + (delta_per_target * target_count), stream, mr);
auto chars_column =
create_chars_child_column(chars_bytes + (delta_per_target * target_count), stream, mr);
auto d_out_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(
rmm::exec_policy(stream),
Expand Down Expand Up @@ -819,7 +819,7 @@ std::unique_ptr<column> replace_nulls(strings_column_view const& strings,
// build chars column
auto const bytes =
cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = strings::detail::create_chars_child_column(strings_count, bytes, stream, mr);
auto chars_column = create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
Expand Down
6 changes: 2 additions & 4 deletions cpp/src/strings/utilities.cu
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,7 @@ std::unique_ptr<cudf::column> child_chars_from_string_vector(cudf::device_span<s
auto const d_offsets = offsets.data<int32_t>();

// create column
auto chars_column =
make_numeric_column(data_type{type_id::INT8}, bytes, mask_state::UNALLOCATED, stream, mr);
auto chars_column = create_chars_child_column(bytes, stream, mr);
// get it's view
auto d_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
Expand All @@ -100,8 +99,7 @@ std::unique_ptr<cudf::column> child_chars_from_string_vector(cudf::device_span<s
}

//
std::unique_ptr<column> create_chars_child_column(cudf::size_type strings_count,
cudf::size_type total_bytes,
std::unique_ptr<column> create_chars_child_column(cudf::size_type total_bytes,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
Expand Down
Loading