Skip to content

Commit

Permalink
Merge branch 'branch-24.02' into chars-size-threshold
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwendt committed Jan 19, 2024
2 parents 8af2fb1 + 8fa2945 commit 114ad99
Show file tree
Hide file tree
Showing 66 changed files with 311 additions and 363 deletions.
4 changes: 2 additions & 2 deletions ci/build_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ popd

rapids-logger "Build Python docs"
pushd docs/cudf
make dirhtml O="-j 4"
make text O="-j 4"
make dirhtml
make text
mkdir -p "${RAPIDS_DOCS_DIR}/cudf/"{html,txt}
mv build/dirhtml/* "${RAPIDS_DOCS_DIR}/cudf/html"
mv build/text/* "${RAPIDS_DOCS_DIR}/cudf/txt"
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/common/generate_input.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -540,7 +540,7 @@ std::unique_ptr<cudf::column> create_random_utf8_string_column(data_profile cons
return cudf::make_strings_column(
num_rows,
std::make_unique<cudf::column>(std::move(offsets), rmm::device_buffer{}, 0),
std::make_unique<cudf::column>(std::move(chars), rmm::device_buffer{}, 0),
chars.release(),
null_count,
profile.get_null_probability().has_value() ? std::move(result_bitmask) : rmm::device_buffer{});
}
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/json/json.cu
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,10 @@ auto build_json_string_column(int desired_bytes, int num_rows)
auto d_store_order = cudf::column_device_view::create(float_2bool_columns->get_column(2));
json_benchmark_row_builder jb{
desired_bytes, num_rows, {*d_books, *d_bicycles}, *d_book_pct, *d_misc_order, *d_store_order};
auto children = cudf::strings::detail::make_strings_children(
auto [offsets, chars] = cudf::strings::detail::make_strings_children(
jb, num_rows, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
return cudf::make_strings_column(
num_rows, std::move(children.first), std::move(children.second), 0, {});
num_rows, std::move(offsets), std::move(chars->release().data.release()[0]), 0, {});
}

void BM_case(benchmark::State& state, std::string query_arg)
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/strings/detail/copy_if_else.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -109,7 +109,7 @@ std::unique_ptr<cudf::column> copy_if_else(StringIterLeft lhs_begin,

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
std::move(chars_column->release().data.release()[0]),
null_count,
std::move(null_mask));
}
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/strings/detail/copy_range.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -205,7 +205,7 @@ std::unique_ptr<column> copy_range(SourceValueIterator source_value_begin,

return make_strings_column(target.size(),
std::move(p_offsets_column),
std::move(p_chars_column),
std::move(p_chars_column->release().data.release()[0]),
null_count,
std::move(null_mask));
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/strings/detail/gather.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ std::unique_ptr<cudf::column> gather(strings_column_view const& strings,
return make_strings_column(output_count,
std::move(out_offsets_column),
std::move(out_chars_column),
std::move(out_chars_column->release().data.release()[0]),
0, // caller sets these
rmm::device_buffer{});
}
Expand Down
14 changes: 5 additions & 9 deletions cpp/include/cudf/strings/detail/merge.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -89,9 +89,8 @@ std::unique_ptr<column> merge(strings_column_view const& lhs,
auto d_offsets = offsets_column->view().template data<int32_t>();

// create the chars column
auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr);
// merge the strings
auto d_chars = chars_column->mutable_view().template data<char>();
rmm::device_uvector<char> chars(bytes, stream, mr);
auto d_chars = chars.data();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
strings_count,
Expand All @@ -103,11 +102,8 @@ std::unique_ptr<column> merge(strings_column_view const& lhs,
memcpy(d_chars + d_offsets[idx], d_str.data(), d_str.size_bytes());
});

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
null_count,
std::move(null_mask));
return make_strings_column(
strings_count, std::move(offsets_column), chars.release(), null_count, std::move(null_mask));
}

} // namespace detail
Expand Down
9 changes: 4 additions & 5 deletions cpp/include/cudf/strings/detail/strings_column_factories.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ std::unique_ptr<column> make_strings_column(IndexPairIterator begin,

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
std::move(chars_column->release().data.release()[0]),
null_count,
std::move(null_mask));
}
Expand Down Expand Up @@ -187,13 +187,12 @@ std::unique_ptr<column> make_strings_column(CharIterator chars_begin,
[] __device__(auto offset) { return static_cast<int32_t>(offset); }));

// build chars column
auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr);
auto chars_view = chars_column->mutable_view();
thrust::copy(rmm::exec_policy(stream), chars_begin, chars_end, chars_view.data<char>());
rmm::device_uvector<char> chars_data(bytes, stream, mr);
thrust::copy(rmm::exec_policy(stream), chars_begin, chars_end, chars_data.begin());

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
chars_data.release(),
null_count,
std::move(null_mask));
}
Expand Down
32 changes: 17 additions & 15 deletions cpp/include/cudf_test/column_wrapper.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -757,20 +757,21 @@ class strings_column_wrapper : public detail::column_wrapper {
strings_column_wrapper(StringsIterator begin, StringsIterator end) : column_wrapper{}
{
size_type num_strings = std::distance(begin, end);
if (num_strings == 0) {
wrapped = cudf::make_empty_column(cudf::type_id::STRING);
return;
}
auto all_valid = thrust::make_constant_iterator(true);
auto [chars, offsets] = detail::make_chars_and_offsets(begin, end, all_valid);
auto d_chars = std::make_unique<cudf::column>(
cudf::detail::make_device_uvector_sync(
chars, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()),
rmm::device_buffer{},
0);
auto d_chars = cudf::detail::make_device_uvector_async(
chars, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource());
auto d_offsets = std::make_unique<cudf::column>(
cudf::detail::make_device_uvector_sync(
offsets, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()),
rmm::device_buffer{},
0);
wrapped =
cudf::make_strings_column(num_strings, std::move(d_offsets), std::move(d_chars), 0, {});
cudf::make_strings_column(num_strings, std::move(d_offsets), d_chars.release(), 0, {});
}

/**
Expand Down Expand Up @@ -805,23 +806,24 @@ class strings_column_wrapper : public detail::column_wrapper {
strings_column_wrapper(StringsIterator begin, StringsIterator end, ValidityIterator v)
: column_wrapper{}
{
size_type num_strings = std::distance(begin, end);
size_type num_strings = std::distance(begin, end);
if (num_strings == 0) {
wrapped = cudf::make_empty_column(cudf::type_id::STRING);
return;
}
auto [chars, offsets] = detail::make_chars_and_offsets(begin, end, v);
auto [null_mask, null_count] = detail::make_null_mask_vector(v, v + num_strings);
auto d_chars = std::make_unique<cudf::column>(
cudf::detail::make_device_uvector_sync(
chars, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()),
rmm::device_buffer{},
0);
auto d_chars = cudf::detail::make_device_uvector_async(
chars, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource());
auto d_offsets = std::make_unique<cudf::column>(
cudf::detail::make_device_uvector_sync(
cudf::detail::make_device_uvector_async(
offsets, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()),
rmm::device_buffer{},
0);
auto d_bitmask = cudf::detail::make_device_uvector_sync(
null_mask, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource());
wrapped = cudf::make_strings_column(
num_strings, std::move(d_offsets), std::move(d_chars), null_count, d_bitmask.release());
num_strings, std::move(d_offsets), d_chars.release(), null_count, d_bitmask.release());
}

/**
Expand Down
10 changes: 4 additions & 6 deletions cpp/src/hash/md5_hash.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -333,9 +333,8 @@ std::unique_ptr<column> md5(table_view const& input,
auto [offsets_column, bytes] =
cudf::detail::make_offsets_child_column(begin, begin + input.num_rows(), stream, mr);

auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr);
auto chars_view = chars_column->mutable_view();
auto d_chars = chars_view.data<char>();
rmm::device_uvector<char> chars(bytes, stream, mr);
auto d_chars = chars.data();

auto const device_input = table_device_view::create(input, stream);

Expand Down Expand Up @@ -366,8 +365,7 @@ std::unique_ptr<column> md5(table_view const& input,
}
});

return make_strings_column(
input.num_rows(), std::move(offsets_column), std::move(chars_column), 0, {});
return make_strings_column(input.num_rows(), std::move(offsets_column), chars.release(), 0, {});
}

} // namespace detail
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/interop/from_arrow.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -290,7 +290,7 @@ std::unique_ptr<column> dispatch_to_cudf_column::operator()<cudf::string_view>(
auto const num_rows = offsets_column->size() - 1;
auto out_col = make_strings_column(num_rows,
std::move(offsets_column),
std::move(chars_column),
std::move(chars_column->release().data.release()[0]),
array.null_count(),
std::move(*get_mask_buffer(array, stream, mr)));

Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/csv/durations.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -202,7 +202,7 @@ struct dispatch_from_durations_fn {
//
return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
std::move(chars_column->release().data.release()[0]),
durations.null_count(),
std::move(null_mask));
}
Expand Down
7 changes: 4 additions & 3 deletions cpp/src/io/csv/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -181,11 +181,12 @@ struct column_to_strings_fn {

auto d_column = column_device_view::create(column_v, stream_);
escape_strings_fn fn{*d_column, delimiter.value(stream_)};
auto children = cudf::strings::detail::make_strings_children(fn, column_v.size(), stream_, mr_);
auto [offsets_column, chars_column] =
cudf::strings::detail::make_strings_children(fn, column_v.size(), stream_, mr_);

return make_strings_column(column_v.size(),
std::move(children.first),
std::move(children.second),
std::move(offsets_column),
std::move(chars_column->release().data.release()[0]),
column_v.null_count(),
cudf::detail::copy_bitmask(column_v, stream_, mr_));
}
Expand Down
34 changes: 16 additions & 18 deletions cpp/src/io/json/legacy/reader_impl.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -530,29 +530,27 @@ table_with_metadata convert_data_to_table(parse_options_view const& parse_opts,
auto repl_chars = std::vector<char>{'"', '\\', '\t', '\r', '\b'};
auto repl_offsets = std::vector<size_type>{0, 1, 2, 3, 4, 5};

auto target = make_strings_column(
static_cast<size_type>(target_offsets.size() - 1),
std::make_unique<cudf::column>(
cudf::detail::make_device_uvector_async(
target_offsets, stream, rmm::mr::get_current_device_resource()),
rmm::device_buffer{},
0),
std::make_unique<cudf::column>(cudf::detail::make_device_uvector_async(
target_chars, stream, rmm::mr::get_current_device_resource()),
rmm::device_buffer{},
0),
0,
{});
auto target =
make_strings_column(static_cast<size_type>(target_offsets.size() - 1),
std::make_unique<cudf::column>(
cudf::detail::make_device_uvector_async(
target_offsets, stream, rmm::mr::get_current_device_resource()),
rmm::device_buffer{},
0),
cudf::detail::make_device_uvector_async(
target_chars, stream, rmm::mr::get_current_device_resource())
.release(),
0,
{});
auto repl = make_strings_column(
static_cast<size_type>(repl_offsets.size() - 1),
std::make_unique<cudf::column>(cudf::detail::make_device_uvector_async(
repl_offsets, stream, rmm::mr::get_current_device_resource()),
rmm::device_buffer{},
0),
std::make_unique<cudf::column>(cudf::detail::make_device_uvector_async(
repl_chars, stream, rmm::mr::get_current_device_resource()),
rmm::device_buffer{},
0),
cudf::detail::make_device_uvector_async(
repl_chars, stream, rmm::mr::get_current_device_resource())
.release(),
0,
{});

Expand Down
24 changes: 8 additions & 16 deletions cpp/src/io/json/write_json.cu
Original file line number Diff line number Diff line change
Expand Up @@ -170,12 +170,12 @@ struct escape_strings_fn {
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto children =
auto [offsets_column, chars_column] =
cudf::strings::detail::make_strings_children(*this, column_v.size(), stream, mr);

return make_strings_column(column_v.size(),
std::move(children.first),
std::move(children.second),
std::move(offsets_column),
std::move(chars_column->release().data.release()[0]),
column_v.null_count(),
cudf::detail::copy_bitmask(column_v, stream, mr));
}
Expand Down Expand Up @@ -347,13 +347,11 @@ std::unique_ptr<column> struct_to_strings(table_view const& strings_columns,
d_strview_offsets + row_string_offsets.size(),
old_offsets.begin<size_type>(),
row_string_offsets.begin());
auto chars_data = joined_col->release().data;
auto const chars_size = chars_data->size();
auto chars_data = joined_col->release().data;
return make_strings_column(
strings_columns.num_rows(),
std::make_unique<cudf::column>(std::move(row_string_offsets), rmm::device_buffer{}, 0),
std::make_unique<cudf::column>(
data_type{type_id::INT8}, chars_size, std::move(*chars_data), rmm::device_buffer{}, 0),
std::move(chars_data.release()[0]),
0,
{});
}
Expand Down Expand Up @@ -472,13 +470,11 @@ std::unique_ptr<column> join_list_of_strings(lists_column_view const& lists_stri
d_strview_offsets.end(),
old_offsets.begin<size_type>(),
row_string_offsets.begin());
auto chars_data = joined_col->release().data;
auto const chars_size = chars_data->size();
auto chars_data = joined_col->release().data;
return make_strings_column(
num_lists,
std::make_unique<cudf::column>(std::move(row_string_offsets), rmm::device_buffer{}, 0),
std::make_unique<cudf::column>(
data_type{type_id::INT8}, chars_size, std::move(*chars_data), rmm::device_buffer{}, 0),
std::move(chars_data.release()[0]),
lists_strings.null_count(),
cudf::detail::copy_bitmask(lists_strings.parent(), stream, mr));
}
Expand Down Expand Up @@ -780,11 +776,7 @@ std::unique_ptr<column> make_strings_column_from_host(host_span<std::string cons
rmm::device_buffer{},
0);
return cudf::make_strings_column(
host_strings.size(),
std::move(d_offsets),
std::make_unique<cudf::column>(std::move(d_chars), rmm::device_buffer{}, 0),
0,
{});
host_strings.size(), std::move(d_offsets), d_chars.release(), 0, {});
}

std::unique_ptr<column> make_column_names_column(host_span<column_name_info const> column_names,
Expand Down
Loading

0 comments on commit 114ad99

Please sign in to comment.