Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle nested string columns with no children in contiguous_split. #6864

Merged
merged 15 commits into from
Jan 12, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@
- PR #6854 Fix the parameter order of writeParquetBufferBegin
- PR #6855 Fix `.str.replace_with_backrefs` docs examples
- PR #6853 Fix contiguous split of null string columns
- PR #6864 Handle contiguous_split corner case for nested string columns with no children

nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved

# cuDF 0.16.0 (21 Oct 2020)
Expand Down
45 changes: 25 additions & 20 deletions cpp/src/copying/contiguous_split.cu
Original file line number Diff line number Diff line change
Expand Up @@ -419,38 +419,43 @@ std::pair<src_buf_info*, size_type> buf_info_functor::operator()<cudf::string_vi
int parent_offset_index,
int offset_depth)
{
strings_column_view scv(col);

if (col.nullable()) {
std::tie(current, offset_stack_pos) =
add_null_buffer(col, current, offset_stack_pos, parent_offset_index, offset_depth);
}

auto offset_col = current;
// string columns can have no children and still be valid
if (col.num_children() > 0) {
strings_column_view scv(col);

// info for the offsets buffer
*current = src_buf_info(type_id::INT32,
scv.offsets().begin<cudf::id_to_type<type_id::INT32>>(),
offset_stack_pos,
parent_offset_index,
false,
col.offset());
auto offset_col = current;

// info for the offsets buffer
*current = src_buf_info(type_id::INT32,
scv.offsets().begin<cudf::id_to_type<type_id::INT32>>(),
offset_stack_pos,
parent_offset_index,
false,
col.offset());

// prevent appending buf_info for non-exist chars buffer
if (scv.chars_size() > 0) {
current++;
offset_stack_pos += offset_depth;

// since we are crossing an offset boundary, our offset_depth and parent_offset_index go up.
offset_depth++;
parent_offset_index = offset_col - head;

// info for the chars buffer
*current = src_buf_info(
type_id::INT8, nullptr, offset_stack_pos, parent_offset_index, false, col.offset());
// prevent appending buf_info for non-exist chars buffer
if (scv.chars_size() > 0) {
// since we are crossing an offset boundary, our offset_depth and parent_offset_index go up.
offset_depth++;
parent_offset_index = offset_col - head;

// info for the chars buffer
*current = src_buf_info(
type_id::INT8, nullptr, offset_stack_pos, parent_offset_index, false, col.offset());
current++;
offset_stack_pos += offset_depth;
}
}

return {current + 1, offset_stack_pos + offset_depth};
return {current, offset_stack_pos};
}

template <>
Expand Down
16 changes: 16 additions & 0 deletions cpp/tests/copying/split_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include <cudf/column/column_factories.hpp>
#include <cudf/copying.hpp>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/utilities/type_dispatcher.hpp>

#include <cudf_test/base_fixture.hpp>
Expand Down Expand Up @@ -1276,6 +1277,21 @@ TEST_F(ContiguousSplitTableCornerCases, PreSplitTable)
}
}

TEST_F(ContiguousSplitTableCornerCases, NestedEmptyStrings)
{
{
auto empty_string = cudf::strings::detail::make_empty_strings_column();
auto offsets = cudf::test::fixed_width_column_wrapper<int>({0, 1});
auto list = cudf::make_lists_column(
1, offsets.release(), std::move(empty_string), 0, rmm::device_buffer{0});

cudf::table_view src_table({static_cast<cudf::column_view>(*list)});

std::vector<cudf::size_type> splits({0});
EXPECT_NO_THROW(contiguous_split(src_table, splits));
}
}

struct ContiguousSplitNestedTypesTest : public cudf::test::BaseFixture {
};

Expand Down