Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove make strings children with null mask #8830

11 changes: 11 additions & 0 deletions cpp/include/cudf/detail/reshape.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,16 @@ std::unique_ptr<table> tile(
size_type count,
rmm::cuda_stream_view = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::interleave_columns
*
* @param stream CUDA stream used for device memory operations and kernel launches
*/
std::unique_ptr<column> interleave_columns(
table_view const& input,
rmm::cuda_stream_view = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

} // namespace detail
} // namespace cudf
76 changes: 0 additions & 76 deletions cpp/include/cudf/strings/detail/utilities.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#include <cudf/column/column.hpp>
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/get_value.cuh>
#include <cudf/detail/valid_if.cuh>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/string_view.cuh>

Expand Down Expand Up @@ -205,81 +204,6 @@ auto make_strings_children(
return make_strings_children(size_and_exec_fn, strings_count, strings_count, stream, mr);
}

/**
* @brief Creates child offsets, chars columns and null mask, null count of a strings column by
* applying the template function that can be used for computing the output size of each string as
* well as create the output.
*
* @tparam SizeAndExecuteFunction Function must accept an index and return a size.
* It must have members `d_offsets`, `d_chars`, and `d_validities` which are set to memory
* containing the offsets column, chars column and string validities during write.
*
* @param size_and_exec_fn This is called twice. Once for the output size of each string, which is
* written into the `d_offsets` array. After that, `d_chars` is set and this
* is called again to fill in the chars memory. The `d_validities` array may
* be modified to set the value `0` for the corresponding rows that contain
* null string elements.
* @param exec_size Range for executing the function `size_and_exec_fn`.
* @param strings_count Number of strings.
* @param mr Device memory resource used to allocate the returned columns' device memory.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @return offsets child column, chars child column, null_mask, and null_count for a strings column.
*/
template <typename SizeAndExecuteFunction>
std::tuple<std::unique_ptr<column>, std::unique_ptr<column>, rmm::device_buffer, size_type>
make_strings_children_with_null_mask(
SizeAndExecuteFunction size_and_exec_fn,
size_type exec_size,
size_type strings_count,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
auto offsets_column = make_numeric_column(
data_type{type_id::INT32}, strings_count + 1, mask_state::UNALLOCATED, stream, mr);
auto offsets_view = offsets_column->mutable_view();
auto d_offsets = offsets_view.template data<int32_t>();
size_and_exec_fn.d_offsets = d_offsets;

auto validities = rmm::device_uvector<int8_t>(strings_count, stream);
size_and_exec_fn.d_validities = validities.begin();

// This is called twice: once for offsets and validities, and once for chars
auto for_each_fn = [exec_size, stream](SizeAndExecuteFunction& size_and_exec_fn) {
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
exec_size,
size_and_exec_fn);
};

// Compute the string sizes (storing in `d_offsets`) and string validities
for_each_fn(size_and_exec_fn);

// Compute the offsets from string sizes
thrust::exclusive_scan(
rmm::exec_policy(stream), d_offsets, d_offsets + strings_count + 1, d_offsets);

// Now build the chars column
auto const bytes = cudf::detail::get_value<int32_t>(offsets_view, strings_count, stream);
auto chars_column = create_chars_child_column(bytes, stream, mr);

// Execute the function fn again to fill the chars column.
// Note that if the output chars column has zero size, the function fn should not be called to
// avoid accidentally overwriting the offsets.
if (bytes > 0) {
size_and_exec_fn.d_chars = chars_column->mutable_view().template data<char>();
for_each_fn(size_and_exec_fn);
}

// Finally compute null mask and null count from the validities array
auto [null_mask, null_count] = cudf::detail::valid_if(
validities.begin(), validities.end(), thrust::identity<int8_t>{}, stream, mr);

return std::make_tuple(std::move(offsets_column),
std::move(chars_column),
null_count > 0 ? std::move(null_mask) : rmm::device_buffer{},
null_count);
}

// This template is a thin wrapper around per-context singleton objects.
// It maintains a single object for each CUDA context.
template <typename TableType>
Expand Down
25 changes: 10 additions & 15 deletions cpp/src/lists/interleave_columns.cu
Original file line number Diff line number Diff line change
Expand Up @@ -172,29 +172,24 @@ struct interleave_list_entries_fn {
rmm::mr::device_memory_resource* mr) const noexcept
{
auto const table_dv_ptr = table_device_view::create(input);
auto const comp_fn = compute_string_sizes_and_interleave_lists_fn{
auto comp_fn = compute_string_sizes_and_interleave_lists_fn{
*table_dv_ptr, output_list_offsets.template begin<offset_type>(), data_has_null_mask};

if (data_has_null_mask) {
auto [offsets_column, chars_column, null_mask, null_count] =
cudf::strings::detail::make_strings_children_with_null_mask(
comp_fn, num_output_lists, num_output_entries, stream, mr);
return make_strings_column(num_output_entries,
std::move(offsets_column),
std::move(chars_column),
null_count,
std::move(null_mask),
stream,
mr);
}
auto validities =
rmm::device_uvector<int8_t>(data_has_null_mask ? num_output_entries : 0, stream);
comp_fn.d_validities = validities.data();
robertmaynard marked this conversation as resolved.
Show resolved Hide resolved

auto [offsets_column, chars_column] = cudf::strings::detail::make_strings_children(
comp_fn, num_output_lists, num_output_entries, stream, mr);

auto [null_mask, null_count] = cudf::detail::valid_if(
validities.begin(), validities.end(), thrust::identity<int8_t>{}, stream, mr);

return make_strings_column(num_output_entries,
std::move(offsets_column),
std::move(chars_column),
0,
rmm::device_buffer{},
null_count,
std::move(null_mask),
stream,
mr);
}
Expand Down
21 changes: 13 additions & 8 deletions cpp/src/reshape/interleave_columns.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <cudf/copying.hpp>
#include <cudf/detail/gather.cuh>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/reshape.hpp>
#include <cudf/lists/detail/interleave_columns.hpp>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/table/table_device_view.cuh>
Expand Down Expand Up @@ -184,12 +185,11 @@ struct interleave_columns_functor {
};

} // anonymous namespace
} // namespace detail

std::unique_ptr<column> interleave_columns(table_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
CUDF_EXPECTS(input.num_columns() > 0, "input must have at least one column to determine dtype.");

auto const dtype = input.column(0).type();
Expand All @@ -202,12 +202,17 @@ std::unique_ptr<column> interleave_columns(table_view const& input,
auto const output_needs_mask = std::any_of(
std::cbegin(input), std::cend(input), [](auto const& col) { return col.nullable(); });

return type_dispatcher<dispatch_storage_type>(dtype,
detail::interleave_columns_functor{},
input,
output_needs_mask,
rmm::cuda_stream_default,
mr);
return type_dispatcher<dispatch_storage_type>(
dtype, detail::interleave_columns_functor{}, input, output_needs_mask, stream, mr);
}

} // namespace detail

std::unique_ptr<column> interleave_columns(table_view const& input,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::interleave_columns(input, rmm::cuda_stream_default, mr);
}

} // namespace cudf
73 changes: 52 additions & 21 deletions cpp/src/strings/combine/join_list_elements.cu
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/get_value.cuh>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/valid_if.cuh>
#include <cudf/lists/lists_column_view.hpp>
#include <cudf/scalar/scalar_device_view.cuh>
#include <cudf/strings/combine.hpp>
Expand Down Expand Up @@ -60,9 +61,6 @@ struct compute_size_and_concatenate_fn {
// If d_chars != nullptr: only concatenate strings.
char* d_chars{nullptr};

// We need to set `1` or `0` for the validities of the output strings.
int8_t* d_validities{nullptr};

__device__ bool output_is_null(size_type const idx,
size_type const start_idx,
size_type const end_idx) const noexcept
Expand All @@ -73,33 +71,31 @@ struct compute_size_and_concatenate_fn {

__device__ void operator()(size_type const idx) const noexcept
{
// If this is the second pass, and the row `idx` is known to be a null string
if (d_chars && !d_validities[idx]) { return; }
// If this is the second pass, and the row `idx` is known to be a null or empty string
if (d_chars && (d_offsets[idx] == d_offsets[idx + 1])) { return; }

// Indices of the strings within the list row
auto const start_idx = list_offsets[idx];
auto const end_idx = list_offsets[idx + 1];

if (!d_chars && output_is_null(idx, start_idx, end_idx)) {
d_offsets[idx] = 0;
d_validities[idx] = false;
d_offsets[idx] = 0;
return;
}

auto const separator = func.separator(idx);
auto size_bytes = size_type{0};
char* output_ptr = d_chars ? d_chars + d_offsets[idx] : nullptr;
bool has_valid_element = false;
bool write_separator = false;
auto size_bytes = size_type{0};
bool has_valid_element = false;

for (size_type str_idx = start_idx; str_idx < end_idx; ++str_idx) {
bool null_element = strings_dv.is_null(str_idx);
has_valid_element = has_valid_element || !null_element;

if (!d_chars && (null_element && !string_narep_dv.is_valid())) {
d_offsets[idx] = 0;
d_validities[idx] = false;
return; // early termination: the entire list of strings will result in a null string
size_bytes = 0;
break;
}

if (write_separator && (separate_nulls == separator_on_nulls::YES || !null_element)) {
Expand All @@ -119,11 +115,7 @@ struct compute_size_and_concatenate_fn {

// If there are all null elements, the output should be the same as having an empty list input:
// a null or an empty string
if (!d_chars) {
d_offsets[idx] = has_valid_element ? size_bytes : 0;
d_validities[idx] =
has_valid_element || empty_list_policy == output_if_empty_list::EMPTY_STRING;
}
if (!d_chars) { d_offsets[idx] = has_valid_element ? size_bytes : 0; }
}
};

Expand All @@ -144,6 +136,33 @@ struct scalar_separator_fn {
__device__ string_view separator(size_type const) const noexcept { return d_separator.value(); }
};

template <typename CompFn>
struct validities_fn {
CompFn comp_fn;

validities_fn(CompFn comp_fn) : comp_fn(comp_fn) {}

__device__ bool operator()(size_type idx)
{
auto const start_idx = comp_fn.list_offsets[idx];
auto const end_idx = comp_fn.list_offsets[idx + 1];
bool valid_output = !comp_fn.output_is_null(idx, start_idx, end_idx);
if (valid_output) {
bool check_elements = false;
for (size_type str_idx = start_idx; str_idx < end_idx; ++str_idx) {
bool const valid_element = comp_fn.strings_dv.is_valid(str_idx);
check_elements = check_elements || valid_element;
// if an element is null and narep is invalid, the output row is null
if (!valid_element && !comp_fn.string_narep_dv.is_valid()) { return false; }
}
// handle empty-list-as-null output policy setting
valid_output =
check_elements || comp_fn.empty_list_policy == output_if_empty_list::EMPTY_STRING;
}
return valid_output;
}
};

} // namespace

std::unique_ptr<column> join_list_elements(lists_column_view const& lists_strings_column,
Expand Down Expand Up @@ -180,8 +199,14 @@ std::unique_ptr<column> join_list_elements(lists_column_view const& lists_string
string_narep_dv,
separate_nulls,
empty_list_policy};
auto [offsets_column, chars_column, null_mask, null_count] =
make_strings_children_with_null_mask(comp_fn, num_rows, num_rows, stream, mr);

auto [offsets_column, chars_column] = make_strings_children(comp_fn, num_rows, stream, mr);
auto [null_mask, null_count] =
cudf::detail::valid_if(thrust::counting_iterator<size_type>(0),
thrust::counting_iterator<size_type>(num_rows),
validities_fn{comp_fn},
stream,
mr);

return make_strings_column(num_rows,
std::move(offsets_column),
Expand Down Expand Up @@ -254,8 +279,14 @@ std::unique_ptr<column> join_list_elements(lists_column_view const& lists_string
string_narep_dv,
separate_nulls,
empty_list_policy};
auto [offsets_column, chars_column, null_mask, null_count] =
make_strings_children_with_null_mask(comp_fn, num_rows, num_rows, stream, mr);

auto [offsets_column, chars_column] = make_strings_children(comp_fn, num_rows, stream, mr);
auto [null_mask, null_count] =
cudf::detail::valid_if(thrust::counting_iterator<size_type>(0),
thrust::counting_iterator<size_type>(num_rows),
validities_fn{comp_fn},
stream,
mr);

return make_strings_column(num_rows,
std::move(offsets_column),
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/transpose/transpose.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
#include <cudf/copying.hpp>
#include <cudf/detail/copy.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/reshape.hpp>
#include <cudf/detail/transpose.hpp>
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/reshape.hpp>
#include <cudf/table/table_device_view.cuh>
#include <cudf/transpose.hpp>
#include <cudf/utilities/traits.hpp>
Expand All @@ -44,7 +44,7 @@ std::pair<std::unique_ptr<column>, table_view> transpose(table_view const& input
input.begin(), input.end(), [dtype](auto const& col) { return dtype == col.type(); }),
"Column type mismatch");

auto output_column = cudf::interleave_columns(input, mr);
auto output_column = cudf::detail::interleave_columns(input, stream, mr);
auto one_iter = thrust::make_counting_iterator<size_type>(1);
auto splits_iter = thrust::make_transform_iterator(
one_iter, [width = input.num_columns()](size_type idx) { return idx * width; });
Expand Down
4 changes: 2 additions & 2 deletions cpp/tests/strings/combine/join_list_elements_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,11 @@ TEST_F(StringsListsConcatenateTest, ZeroSizeStringsInput)
auto const expected = STR_COL{"", "", "", ""};

auto results = cudf::strings::join_list_elements(string_lv);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, verbosity);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity);

auto const separators = STR_COL{"", "", "", ""}.release();
results = cudf::strings::join_list_elements(string_lv, separators->view());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, verbosity);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity);
}

// Empty list results in null
Expand Down