Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix calls to deprecated strings factory API #14771

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cpp/benchmarks/common/generate_input.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -540,7 +540,7 @@ std::unique_ptr<cudf::column> create_random_utf8_string_column(data_profile cons
return cudf::make_strings_column(
num_rows,
std::make_unique<cudf::column>(std::move(offsets), rmm::device_buffer{}, 0),
std::make_unique<cudf::column>(std::move(chars), rmm::device_buffer{}, 0),
chars.release(),
null_count,
profile.get_null_probability().has_value() ? std::move(result_bitmask) : rmm::device_buffer{});
}
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/json/json.cu
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,10 @@ auto build_json_string_column(int desired_bytes, int num_rows)
auto d_store_order = cudf::column_device_view::create(float_2bool_columns->get_column(2));
json_benchmark_row_builder jb{
desired_bytes, num_rows, {*d_books, *d_bicycles}, *d_book_pct, *d_misc_order, *d_store_order};
auto children = cudf::strings::detail::make_strings_children(
auto [offsets, chars] = cudf::strings::detail::make_strings_children(
jb, num_rows, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
return cudf::make_strings_column(
num_rows, std::move(children.first), std::move(children.second), 0, {});
num_rows, std::move(offsets), std::move(chars->release().data.release()[0]), 0, {});
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The repeating pattern chars->release().data.release()[0] throughout this PR is a bit awkward. This relates to my comment about refactoring make_strings_children to return a buffer.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hopefully the awkwardness is only temporary once the new utility is created.

}

void BM_case(benchmark::State& state, std::string query_arg)
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/strings/detail/copy_if_else.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -109,7 +109,7 @@ std::unique_ptr<cudf::column> copy_if_else(StringIterLeft lhs_begin,

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
std::move(chars_column->release().data.release()[0]),
null_count,
std::move(null_mask));
}
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/strings/detail/copy_range.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -205,7 +205,7 @@ std::unique_ptr<column> copy_range(SourceValueIterator source_value_begin,

return make_strings_column(target.size(),
std::move(p_offsets_column),
std::move(p_chars_column),
std::move(p_chars_column->release().data.release()[0]),
null_count,
std::move(null_mask));
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/strings/detail/gather.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ std::unique_ptr<cudf::column> gather(strings_column_view const& strings,

return make_strings_column(output_count,
std::move(out_offsets_column),
std::move(out_chars_column),
std::move(out_chars_column->release().data.release()[0]),
0, // caller sets these
rmm::device_buffer{});
}
Expand Down
14 changes: 5 additions & 9 deletions cpp/include/cudf/strings/detail/merge.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -89,9 +89,8 @@ std::unique_ptr<column> merge(strings_column_view const& lhs,
auto d_offsets = offsets_column->view().template data<int32_t>();

// create the chars column
auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr);
// merge the strings
auto d_chars = chars_column->mutable_view().template data<char>();
rmm::device_uvector<char> chars(bytes, stream, mr);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we prefer a device_uvector instead of a raw buffer here, given that we want to hold character buffers at the end? (This pattern reoccurs.)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On further thought, maybe it's just nicer to have the typed container than a raw buffer. It plays well with our sync/async vector factories and the .release() is fairly easy to deal with when you're ready for a buffer. I'm okay with keeping this, just interested in hearing your thoughts.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typing aids in handling the remaining code that would have needed casting. Additionally, the inline call to release() doesn't necessitate a call to std::move.

auto d_chars = chars.data();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
strings_count,
Expand All @@ -103,11 +102,8 @@ std::unique_ptr<column> merge(strings_column_view const& lhs,
memcpy(d_chars + d_offsets[idx], d_str.data(), d_str.size_bytes());
});

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
null_count,
std::move(null_mask));
return make_strings_column(
strings_count, std::move(offsets_column), chars.release(), null_count, std::move(null_mask));
}

} // namespace detail
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ std::unique_ptr<column> make_strings_column(IndexPairIterator begin,

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
std::move(chars_column->release().data.release()[0]),
null_count,
std::move(null_mask));
}
Expand Down Expand Up @@ -187,13 +187,12 @@ std::unique_ptr<column> make_strings_column(CharIterator chars_begin,
[] __device__(auto offset) { return static_cast<int32_t>(offset); }));

// build chars column
auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr);
auto chars_view = chars_column->mutable_view();
thrust::copy(rmm::exec_policy(stream), chars_begin, chars_end, chars_view.data<char>());
rmm::device_uvector<char> chars_data(bytes, stream, mr);
thrust::copy(rmm::exec_policy(stream), chars_begin, chars_end, chars_data.begin());

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
chars_data.release(),
null_count,
std::move(null_mask));
}
Expand Down
32 changes: 17 additions & 15 deletions cpp/include/cudf_test/column_wrapper.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -757,20 +757,21 @@ class strings_column_wrapper : public detail::column_wrapper {
strings_column_wrapper(StringsIterator begin, StringsIterator end) : column_wrapper{}
{
size_type num_strings = std::distance(begin, end);
if (num_strings == 0) {
wrapped = cudf::make_empty_column(cudf::type_id::STRING);
return;
}
auto all_valid = thrust::make_constant_iterator(true);
auto [chars, offsets] = detail::make_chars_and_offsets(begin, end, all_valid);
auto d_chars = std::make_unique<cudf::column>(
cudf::detail::make_device_uvector_sync(
chars, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()),
rmm::device_buffer{},
0);
auto d_chars = cudf::detail::make_device_uvector_async(
chars, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource());
auto d_offsets = std::make_unique<cudf::column>(
cudf::detail::make_device_uvector_sync(
offsets, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()),
rmm::device_buffer{},
0);
wrapped =
cudf::make_strings_column(num_strings, std::move(d_offsets), std::move(d_chars), 0, {});
cudf::make_strings_column(num_strings, std::move(d_offsets), d_chars.release(), 0, {});
}

/**
Expand Down Expand Up @@ -805,23 +806,24 @@ class strings_column_wrapper : public detail::column_wrapper {
strings_column_wrapper(StringsIterator begin, StringsIterator end, ValidityIterator v)
: column_wrapper{}
{
size_type num_strings = std::distance(begin, end);
size_type num_strings = std::distance(begin, end);
if (num_strings == 0) {
wrapped = cudf::make_empty_column(cudf::type_id::STRING);
return;
}
auto [chars, offsets] = detail::make_chars_and_offsets(begin, end, v);
auto [null_mask, null_count] = detail::make_null_mask_vector(v, v + num_strings);
auto d_chars = std::make_unique<cudf::column>(
cudf::detail::make_device_uvector_sync(
chars, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()),
rmm::device_buffer{},
0);
auto d_chars = cudf::detail::make_device_uvector_async(
chars, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource());
auto d_offsets = std::make_unique<cudf::column>(
cudf::detail::make_device_uvector_sync(
cudf::detail::make_device_uvector_async(
offsets, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()),
rmm::device_buffer{},
0);
auto d_bitmask = cudf::detail::make_device_uvector_sync(
null_mask, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource());
wrapped = cudf::make_strings_column(
num_strings, std::move(d_offsets), std::move(d_chars), null_count, d_bitmask.release());
num_strings, std::move(d_offsets), d_chars.release(), null_count, d_bitmask.release());
}

/**
Expand Down
10 changes: 4 additions & 6 deletions cpp/src/hash/md5_hash.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -333,9 +333,8 @@ std::unique_ptr<column> md5(table_view const& input,
auto [offsets_column, bytes] =
cudf::detail::make_offsets_child_column(begin, begin + input.num_rows(), stream, mr);

auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr);
auto chars_view = chars_column->mutable_view();
auto d_chars = chars_view.data<char>();
rmm::device_uvector<char> chars(bytes, stream, mr);
auto d_chars = chars.data();

auto const device_input = table_device_view::create(input, stream);

Expand Down Expand Up @@ -366,8 +365,7 @@ std::unique_ptr<column> md5(table_view const& input,
}
});

return make_strings_column(
input.num_rows(), std::move(offsets_column), std::move(chars_column), 0, {});
return make_strings_column(input.num_rows(), std::move(offsets_column), chars.release(), 0, {});
}

} // namespace detail
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/interop/from_arrow.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -290,7 +290,7 @@ std::unique_ptr<column> dispatch_to_cudf_column::operator()<cudf::string_view>(
auto const num_rows = offsets_column->size() - 1;
auto out_col = make_strings_column(num_rows,
std::move(offsets_column),
std::move(chars_column),
std::move(chars_column->release().data.release()[0]),
array.null_count(),
std::move(*get_mask_buffer(array, stream, mr)));

Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/csv/durations.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -202,7 +202,7 @@ struct dispatch_from_durations_fn {
//
return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
std::move(chars_column->release().data.release()[0]),
durations.null_count(),
std::move(null_mask));
}
Expand Down
7 changes: 4 additions & 3 deletions cpp/src/io/csv/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -181,11 +181,12 @@ struct column_to_strings_fn {

auto d_column = column_device_view::create(column_v, stream_);
escape_strings_fn fn{*d_column, delimiter.value(stream_)};
auto children = cudf::strings::detail::make_strings_children(fn, column_v.size(), stream_, mr_);
auto [offsets_column, chars_column] =
cudf::strings::detail::make_strings_children(fn, column_v.size(), stream_, mr_);

return make_strings_column(column_v.size(),
std::move(children.first),
std::move(children.second),
std::move(offsets_column),
std::move(chars_column->release().data.release()[0]),
column_v.null_count(),
cudf::detail::copy_bitmask(column_v, stream_, mr_));
}
Expand Down
34 changes: 16 additions & 18 deletions cpp/src/io/json/legacy/reader_impl.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -530,29 +530,27 @@ table_with_metadata convert_data_to_table(parse_options_view const& parse_opts,
auto repl_chars = std::vector<char>{'"', '\\', '\t', '\r', '\b'};
auto repl_offsets = std::vector<size_type>{0, 1, 2, 3, 4, 5};

auto target = make_strings_column(
static_cast<size_type>(target_offsets.size() - 1),
std::make_unique<cudf::column>(
cudf::detail::make_device_uvector_async(
target_offsets, stream, rmm::mr::get_current_device_resource()),
rmm::device_buffer{},
0),
std::make_unique<cudf::column>(cudf::detail::make_device_uvector_async(
target_chars, stream, rmm::mr::get_current_device_resource()),
rmm::device_buffer{},
0),
0,
{});
auto target =
make_strings_column(static_cast<size_type>(target_offsets.size() - 1),
std::make_unique<cudf::column>(
cudf::detail::make_device_uvector_async(
target_offsets, stream, rmm::mr::get_current_device_resource()),
rmm::device_buffer{},
0),
cudf::detail::make_device_uvector_async(
target_chars, stream, rmm::mr::get_current_device_resource())
.release(),
0,
{});
auto repl = make_strings_column(
static_cast<size_type>(repl_offsets.size() - 1),
std::make_unique<cudf::column>(cudf::detail::make_device_uvector_async(
repl_offsets, stream, rmm::mr::get_current_device_resource()),
rmm::device_buffer{},
0),
std::make_unique<cudf::column>(cudf::detail::make_device_uvector_async(
repl_chars, stream, rmm::mr::get_current_device_resource()),
rmm::device_buffer{},
0),
cudf::detail::make_device_uvector_async(
repl_chars, stream, rmm::mr::get_current_device_resource())
.release(),
0,
{});

Expand Down
24 changes: 8 additions & 16 deletions cpp/src/io/json/write_json.cu
Original file line number Diff line number Diff line change
Expand Up @@ -170,12 +170,12 @@ struct escape_strings_fn {
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto children =
auto [offsets_column, chars_column] =
cudf::strings::detail::make_strings_children(*this, column_v.size(), stream, mr);

return make_strings_column(column_v.size(),
std::move(children.first),
std::move(children.second),
std::move(offsets_column),
std::move(chars_column->release().data.release()[0]),
column_v.null_count(),
cudf::detail::copy_bitmask(column_v, stream, mr));
}
Expand Down Expand Up @@ -347,13 +347,11 @@ std::unique_ptr<column> struct_to_strings(table_view const& strings_columns,
d_strview_offsets + row_string_offsets.size(),
old_offsets.begin<size_type>(),
row_string_offsets.begin());
auto chars_data = joined_col->release().data;
auto const chars_size = chars_data->size();
auto chars_data = joined_col->release().data;
return make_strings_column(
strings_columns.num_rows(),
std::make_unique<cudf::column>(std::move(row_string_offsets), rmm::device_buffer{}, 0),
std::make_unique<cudf::column>(
data_type{type_id::INT8}, chars_size, std::move(*chars_data), rmm::device_buffer{}, 0),
std::move(chars_data.release()[0]),
0,
{});
}
Expand Down Expand Up @@ -472,13 +470,11 @@ std::unique_ptr<column> join_list_of_strings(lists_column_view const& lists_stri
d_strview_offsets.end(),
old_offsets.begin<size_type>(),
row_string_offsets.begin());
auto chars_data = joined_col->release().data;
auto const chars_size = chars_data->size();
auto chars_data = joined_col->release().data;
return make_strings_column(
num_lists,
std::make_unique<cudf::column>(std::move(row_string_offsets), rmm::device_buffer{}, 0),
std::make_unique<cudf::column>(
data_type{type_id::INT8}, chars_size, std::move(*chars_data), rmm::device_buffer{}, 0),
std::move(chars_data.release()[0]),
lists_strings.null_count(),
cudf::detail::copy_bitmask(lists_strings.parent(), stream, mr));
}
Expand Down Expand Up @@ -780,11 +776,7 @@ std::unique_ptr<column> make_strings_column_from_host(host_span<std::string cons
rmm::device_buffer{},
0);
return cudf::make_strings_column(
host_strings.size(),
std::move(d_offsets),
std::make_unique<cudf::column>(std::move(d_chars), rmm::device_buffer{}, 0),
0,
{});
host_strings.size(), std::move(d_offsets), d_chars.release(), 0, {});
}

std::unique_ptr<column> make_column_names_column(host_span<column_name_info const> column_names,
Expand Down
Loading
Loading