Skip to content

Commit

Permalink
Add device_memory_resource parameter to create_string_vector_from_col…
Browse files Browse the repository at this point in the history
…umn (rapidsai#10673)

Adds the `rmm::mr::device_memory_resource` parameter to the `cudf::strings::detail::create_string_vector_from_column` function. This will be called in a future API in a later PR and the resulting memory object will returned to the user.

Also found and removed a few related functions that are no longer necessary and updated the callers appropriately simplifying the logic there.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)

URL: rapidsai#10673
  • Loading branch information
davidwendt authored Apr 19, 2022
1 parent ba1173d commit 08cd428
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 156 deletions.
21 changes: 6 additions & 15 deletions cpp/include/cudf/strings/detail/scatter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,13 @@
*/
#pragma once

#include <cudf/column/column.hpp>
#include <cudf/column/column_device_view.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/column/column_factories.hpp>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/scatter.h>
Expand Down Expand Up @@ -71,17 +70,9 @@ std::unique_ptr<column> scatter(
// do the scatter
thrust::scatter(rmm::exec_policy(stream), begin, end, scatter_map, target_vector.begin());

// build offsets column
auto offsets_column = child_offsets_from_string_vector(target_vector, stream, mr);
// build chars column
auto chars_column =
child_chars_from_string_vector(target_vector, offsets_column->view(), stream, mr);

return make_strings_column(target.size(),
std::move(offsets_column),
std::move(chars_column),
UNKNOWN_NULL_COUNT,
cudf::detail::copy_bitmask(target.parent(), stream, mr));
// build the output column
auto sv_span = cudf::device_span<string_view const>(target_vector);
return make_strings_column(sv_span, string_view{nullptr, 0}, stream, mr);
}

} // namespace detail
Expand Down
22 changes: 0 additions & 22 deletions cpp/include/cudf/strings/detail/utilities.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -71,28 +71,6 @@ std::unique_ptr<column> make_offsets_child_column(
return offsets_column;
}

/**
* @brief Creates an offsets column from a string_view iterator, and size.
*
* @tparam Iter Iterator type that returns string_view instances
* @param strings_begin Iterator to the beginning of the string_view sequence
* @param num_strings The number of string_view instances in the sequence
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return Child offsets column
*/
template <typename Iter>
std::unique_ptr<cudf::column> child_offsets_from_string_iterator(
Iter strings_begin,
cudf::size_type num_strings,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
auto transformer = [] __device__(string_view v) { return v.size_bytes(); };
auto begin = thrust::make_transform_iterator(strings_begin, transformer);
return make_offsets_child_column(begin, begin + num_strings, stream, mr);
}

/**
* @brief Copies input string data into a buffer and increments the pointer by the number of bytes
* copied.
Expand Down
31 changes: 3 additions & 28 deletions cpp/include/cudf/strings/detail/utilities.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -45,36 +45,11 @@ std::unique_ptr<column> create_chars_child_column(
*
* @param strings Strings column instance.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned vector's device memory.
* @return Device vector of string_views
*/
rmm::device_uvector<string_view> create_string_vector_from_column(
cudf::strings_column_view const strings, rmm::cuda_stream_view stream = rmm::cuda_stream_default);

/**
* @brief Creates an offsets column from a string_view vector.
*
* @param strings Strings input data
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return Child offsets column
*/
std::unique_ptr<cudf::column> child_offsets_from_string_vector(
cudf::device_span<string_view> strings,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Creates a chars column from a string_view vector.
*
* @param strings Strings input data
* @param d_offsets Offsets vector for placing strings into column's memory.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return Child chars column
*/
std::unique_ptr<cudf::column> child_chars_from_string_vector(
cudf::device_span<string_view> strings,
column_view const& offsets,
cudf::strings_column_view const strings,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

Expand Down
40 changes: 8 additions & 32 deletions cpp/src/lists/copying/scatter_helper.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@
#include <cudf/lists/detail/copying.hpp>
#include <cudf/lists/detail/scatter_helper.cuh>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/span.hpp>

#include <thrust/binary_search.h>
#include <thrust/distance.h>
Expand Down Expand Up @@ -253,39 +252,16 @@ struct list_child_constructor {
auto lists_column = actual_list_row.get_column();
auto lists_offsets_ptr = lists_column.offsets().template data<offset_type>();
auto child_strings_column = lists_column.child();
auto string_offsets_ptr =
child_strings_column.child(cudf::strings_column_view::offsets_column_index)
.template data<offset_type>();
auto string_chars_ptr =
child_strings_column.child(cudf::strings_column_view::chars_column_index)
.template data<char>();

auto strings_offset = lists_offsets_ptr[row_index] + intra_index;
auto char_offset = string_offsets_ptr[strings_offset];
auto char_ptr = string_chars_ptr + char_offset;
auto string_size =
string_offsets_ptr[strings_offset + 1] - string_offsets_ptr[strings_offset];
return string_view{char_ptr, string_size};
auto strings_offset = lists_offsets_ptr[row_index] + intra_index;

return child_strings_column.is_null(strings_offset)
? string_view{nullptr, 0}
: child_strings_column.template element<string_view>(strings_offset);
});

// string_views should now have been populated with source and target references.

auto string_offsets = cudf::strings::detail::child_offsets_from_string_iterator(
string_views.begin(), string_views.size(), stream, mr);

auto string_chars = cudf::strings::detail::child_chars_from_string_vector(
string_views, string_offsets->view(), stream, mr);
auto child_null_mask =
source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable()
? construct_child_nullmask(
list_vector, list_offsets, source_lists, target_lists, num_child_rows, stream, mr)
: std::make_pair(rmm::device_buffer{}, 0);

return cudf::make_strings_column(num_child_rows,
std::move(string_offsets),
std::move(string_chars),
child_null_mask.second, // Null count.
std::move(child_null_mask.first));
auto sv_span = cudf::device_span<string_view const>(string_views);
return cudf::make_strings_column(sv_span, string_view{nullptr, 0}, stream, mr);
}

/**
Expand Down
75 changes: 16 additions & 59 deletions cpp/src/strings/utilities.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,15 @@

#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/get_value.cuh>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/utilities/error.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/for_each.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/transform_reduce.h>
#include <thrust/transform_scan.h>

#include <cstring>
#include <thrust/transform.h>

namespace cudf {
namespace strings {
Expand All @@ -42,65 +37,27 @@ namespace detail {
/**
* @copydoc create_string_vector_from_column
*/
rmm::device_uvector<string_view> create_string_vector_from_column(cudf::strings_column_view strings,
rmm::cuda_stream_view stream)
{
auto strings_column = column_device_view::create(strings.parent(), stream);
auto d_column = *strings_column;

rmm::device_uvector<string_view> strings_vector(strings.size(), stream);
string_view* d_strings = strings_vector.data();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
strings.size(),
[d_column, d_strings] __device__(size_type idx) {
if (d_column.is_null(idx))
d_strings[idx] = string_view(nullptr, 0);
else
d_strings[idx] = d_column.element<string_view>(idx);
});
return strings_vector;
}

/**
* @copydoc child_offsets_from_string_vector
*/
std::unique_ptr<cudf::column> child_offsets_from_string_vector(
cudf::device_span<string_view> strings,
rmm::device_uvector<string_view> create_string_vector_from_column(
cudf::strings_column_view const input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return child_offsets_from_string_iterator(strings.begin(), strings.size(), stream, mr);
}
auto d_strings = column_device_view::create(input.parent(), stream);

/**
* @copydoc child_chars_from_string_vector
*/
std::unique_ptr<cudf::column> child_chars_from_string_vector(cudf::device_span<string_view> strings,
column_view const& offsets,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto const d_strings = strings.data();
auto const bytes = cudf::detail::get_value<int32_t>(offsets, strings.size(), stream);
auto const d_offsets = offsets.data<int32_t>();

// create column
auto chars_column = create_chars_child_column(bytes, stream, mr);
// get it's view
auto d_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
strings.size(),
[d_strings, d_offsets, d_chars] __device__(size_type idx) {
string_view const d_str = d_strings[idx];
memcpy(d_chars + d_offsets[idx], d_str.data(), d_str.size_bytes());
});

return chars_column;
auto strings_vector = rmm::device_uvector<string_view>(input.size(), stream, mr);

thrust::transform(
rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
thrust::make_counting_iterator<size_type>(input.size()),
strings_vector.begin(),
[d_strings = *d_strings] __device__(size_type idx) {
return d_strings.is_null(idx) ? string_view{nullptr, 0} : d_strings.element<string_view>(idx);
});

return strings_vector;
}

//
std::unique_ptr<column> create_chars_child_column(cudf::size_type total_bytes,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand Down

0 comments on commit 08cd428

Please sign in to comment.