Skip to content

Commit

Permalink
Fix an issue in cudf::row_bit_count involving structs and lists at mu…
Browse files Browse the repository at this point in the history
…ltiple levels. (#11779)

`row_bit_count` keeps track of a stack of "branches" which represent a span of rows to be included in the computed size.  As you traverse through a hierarchy of lists, that span of rows is maintained as a stack.  The code that was handling jumping out from the bottom of a stack to a new column was making the faulty assumption that the jump was only 1 level up.

Authors:
  - https://github.com/nvdbaranec

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Alessandro Bellina (https://github.com/abellina)

URL: #11779
  • Loading branch information
nvdbaranec authored Sep 27, 2022
1 parent 1003e33 commit 5a416a0
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 4 deletions.
10 changes: 6 additions & 4 deletions cpp/src/transform/row_bit_count.cu
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,8 @@ __global__ void compute_row_sizes(device_span<column_device_view const> cols,
auto const num_rows = output.size();
if (tid >= num_rows) { return; }

// branch stack. points to the last list prior to branching.
// my_branch_stack points to the last span prior to branching. a branch occurs only
// when we are inside of a list contained within a struct column.
row_span* my_branch_stack = thread_branch_stacks + (threadIdx.x * max_branch_depth);
size_type branch_depth{0};

Expand All @@ -424,11 +425,12 @@ __global__ void compute_row_sizes(device_span<column_device_view const> cols,
for (size_type idx = 0; idx < cols.size(); idx++) {
column_device_view const& col = cols[idx];

// if we've returned from a branch
// if we've returned from a branch, pop to the proper span
if (info[idx].branch_depth_start < last_branch_depth) {
cur_span = my_branch_stack[--branch_depth];
branch_depth = info[idx].branch_depth_start;
cur_span = my_branch_stack[branch_depth];
}
// if we're entering a new branch.
// if we're entering a new branch, push the current span
// NOTE: this case can happen (a pop and a push by the same column)
// when we have a struct<list, list>
if (info[idx].branch_depth_end > info[idx].branch_depth_start) {
Expand Down
38 changes: 38 additions & 0 deletions cpp/tests/transform/row_bit_count_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -618,9 +618,47 @@ TEST_F(RowBitCount, Table)
thrust::make_counting_iterator(0) + t.num_rows(),
mcv.begin<size_type>(),
sum_functor{cv0.data<size_type>(), cv1.data<size_type>(), cv2.data<size_type>()});

CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *result);
}

TEST_F(RowBitCount, DepthJump)
{
// jump more than 1 branch depth.

using T = int;

// struct<list<struct<list<int>>, int>
// the jump occurs from depth 2 (the leafmost int column)
// to depth 0 (the topmost int column)
cudf::test::fixed_width_column_wrapper<T> ____c0{1, 2, 3, 5, 5, 6, 7, 8};
cudf::test::fixed_width_column_wrapper<cudf::offset_type> ___offsets{0, 2, 4, 6, 8};
auto ___c0 = cudf::make_lists_column(4, ___offsets.release(), ____c0.release(), 0, {});
std::vector<std::unique_ptr<cudf::column>> __children;
__children.push_back(std::move(___c0));
cudf::test::structs_column_wrapper __c0(std::move(__children));
cudf::test::fixed_width_column_wrapper<cudf::offset_type> _offsets{0, 3, 4};
auto _c0 = cudf::make_lists_column(2, _offsets.release(), __c0.release(), 0, {});
cudf::test::fixed_width_column_wrapper<int> _c1{3, 4};
std::vector<std::unique_ptr<cudf::column>> children;
children.push_back(std::move(_c0));
children.push_back(_c1.release());
cudf::test::structs_column_wrapper c0(std::move(children));

table_view t({c0});
auto result = cudf::row_bit_count(t);

// expected size = (num rows at level 1 + num_rows at level 2) + (# values the leaf int column) +
// 1 (value in topmost int column)
constexpr size_type offset_size = sizeof(cudf::offset_type) * CHAR_BIT;
constexpr size_type type_size = sizeof(T) * CHAR_BIT;
cudf::test::fixed_width_column_wrapper<size_type> expected{
((1 + 3) * offset_size) + (6 * type_size) + (1 * type_size),
((1 + 1) * offset_size) + (2 * type_size) + (1 * type_size)};

CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result);
}

TEST_F(RowBitCount, SlicedColumnsFixedWidth)
{
auto const slice_size = 7;
Expand Down

0 comments on commit 5a416a0

Please sign in to comment.