Skip to content

Commit

Permalink
Fix branch_stack calculation in row_bit_count():
Browse files Browse the repository at this point in the history
For input with a number of rows exceeding  `max_block_size`, `row_bit_count()` currently
reaches past the bounds of its shared-memory allocation, causing illegal memory access
errors like in (cudf/issues/8938)[rapidsai#8938].

This commit corrects the calculation of the branch stack's base address, and adds a
test for this case.
  • Loading branch information
mythrocks committed Aug 19, 2021
1 parent abe57f8 commit 5330c3d
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 1 deletion.
2 changes: 1 addition & 1 deletion cpp/src/transform/row_bit_count.cu
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ __global__ void compute_row_sizes(device_span<column_device_view const> cols,
if (tid >= num_rows) { return; }

// branch stack. points to the last list prior to branching.
row_span* my_branch_stack = thread_branch_stacks + (tid * max_branch_depth);
row_span* my_branch_stack = thread_branch_stacks + (threadIdx.x * max_branch_depth);
size_type branch_depth{0};

// current row span - always starts at 1 row.
Expand Down
65 changes: 65 additions & 0 deletions cpp/tests/transform/row_bit_count_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
*/

#include <cudf/column/column.hpp>
#include <cudf/column/column_factories.hpp>
#include <cudf/column/column_view.hpp>
#include <cudf/io/parquet.hpp>
#include <cudf/transform.hpp>
#include <cudf/types.hpp>
#include <cudf_test/base_fixture.hpp>
Expand All @@ -25,6 +27,9 @@

#include <rmm/exec_policy.hpp>

#include <thrust/fill.h>
#include <thrust/tabulate.h>

using namespace cudf;

template <typename T>
Expand Down Expand Up @@ -192,6 +197,66 @@ TEST_F(RowBitCount, StringsWithNulls)
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result);
}

namespace {

/**
* @brief __device__ functor to multiply input by 2, defined out of line because __device__ lambdas
* cannot be defined in a TEST_F().
*/
struct times_2 {
int32_t __device__ operator()(int32_t i) const { return i * 2; }
};

} // namespace

TEST_F(RowBitCount, StructsWithLists_RowsExceedingASingleBlock)
{
// Tests that `row_bit_count()` can handle struct<list<int32_t>> with more
// than max_block_size (256) rows.
// With a large number of rows, computation spills to multiple thread-blocks,
// thus exercising the branch-stack comptutation.
// The contents of the input column aren't as pertinent to this test as the
// column size. For what it's worth, it looks as follows:
// [ struct({0,1}), struct({2,3}), struct({4,5}), ... ]

using namespace cudf;
auto constexpr num_rows = 256 * 2; // Exceeding a block size.

// List child column = {0, 1, 2, 3, 4, ..., 2*num_rows};
auto ints = make_numeric_column(data_type{type_id::INT32}, num_rows * 2);
auto ints_view = ints->mutable_view();
thrust::tabulate(thrust::device,
ints_view.begin<int32_t>(),
ints_view.end<int32_t>(),
thrust::identity<int32_t>());

// List offsets = {0, 2, 4, 6, 8, ..., num_rows*2};
auto list_offsets = make_numeric_column(data_type{type_id::INT32}, num_rows + 1);
auto list_offsets_view = list_offsets->mutable_view();
thrust::tabulate(thrust::device,
list_offsets_view.begin<offset_type>(),
list_offsets_view.end<offset_type>(),
times_2{});

// List<int32_t> = {{0,1}, {2,3}, {4,5}, ..., {2*(num_rows-1), 2*num_rows-1}};
auto lists_column = make_lists_column(num_rows, std::move(list_offsets), std::move(ints), 0, {});

// Struct<List<int32_t>.
auto struct_members = std::vector<std::unique_ptr<column>>{};
struct_members.emplace_back(std::move(lists_column));
auto structs_column = make_structs_column(num_rows, std::move(struct_members), 0, {});

// Compute row_bit_count, and compare.
auto row_bit_counts = row_bit_count(table_view{{structs_column->view()}});
auto expected_row_bit_counts = make_numeric_column(data_type{type_id::INT32}, num_rows);
thrust::fill_n(thrust::device,
expected_row_bit_counts->mutable_view().begin<int32_t>(),
num_rows,
CHAR_BIT * (2 * sizeof(int32_t) + sizeof(offset_type)));

CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(row_bit_counts->view(), expected_row_bit_counts->view());
}

std::pair<std::unique_ptr<column>, std::unique_ptr<column>> build_struct_column()
{
std::vector<bool> struct_validity{0, 1, 1, 1, 1, 0};
Expand Down

0 comments on commit 5330c3d

Please sign in to comment.