Skip to content

Commit

Permalink
Fix memory read/write error in concatenate_lists_ignore_null (#8978)
Browse files Browse the repository at this point in the history
Reference #8883 

Running `cuda-memcheck` on `LISTS_TEST` found a read error in the `cudf::lists::detail::concatenate_lists_ignore_null()` utility. This function is using `thrust::transform` to build the output offsets values and executes from 0 to `num_rows+1`. The device lambda included logic to update a temporary `uint8` vector with validity value (0 or 1) for each row. Unfortunately, this required reading offset values at `idx` and `idx+1` which would fail when `idx==num_rows` since `idx+1` would be out-of-bounds for the input offsets in this case. Also, the `validities[idx]` statement would fail on write since `idx==num_rows` is past the end of this vector as well. Finally, the temporary `validities` vector was passed to `cudf::detail::valid_if` utility to turn it into a bitmask.

Since 2 kernels are used to create the output lists column, the temporary `validities` vector is not required since the `valid_if` utility can take a device predicate to build the bitmask instead. The code logic for determine validity was therefore moved from the `transform` call to the `valid_if` predicate instead. This keeps the same number of kernels without the need for the temporary buffer and fixes the out-of-bounds memory access.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Karthikeyan (https://github.com/karthikeyann)
  - Mark Harris (https://github.com/harrism)

URL: #8978
  • Loading branch information
davidwendt authored Aug 11, 2021
1 parent 34523c7 commit 2aaa57f
Showing 1 changed file with 18 additions and 24 deletions.
42 changes: 18 additions & 24 deletions cpp/src/lists/combine/concatenate_list_elements.cu
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,9 @@ std::unique_ptr<column> concatenate_lists_ignore_null(column_view const& input,
auto out_offsets = make_numeric_column(
data_type{type_id::INT32}, num_rows + 1, mask_state::UNALLOCATED, stream, mr);

// The array of int8_t stores validities for the output list elements.
auto validities = rmm::device_uvector<int8_t>(build_null_mask ? num_rows : 0, stream);

auto const d_out_offsets = out_offsets->mutable_view().template begin<offset_type>();
auto const d_row_offsets = lists_column_view(input).offsets_begin();
auto const d_list_offsets = lists_column_view(lists_column_view(input).child()).offsets_begin();
auto const lists_dv_ptr = column_device_view::create(lists_column_view(input).child());

// Concatenating the lists at the same row by converting the entry offsets from the child column
// into row offsets of the root column. Those entry offsets are subtracted by the first entry
Expand All @@ -67,22 +63,7 @@ std::unique_ptr<column> concatenate_lists_ignore_null(column_view const& input,
iter,
iter + num_rows + 1,
d_out_offsets,
[d_row_offsets,
d_list_offsets,
lists_dv = *lists_dv_ptr,
d_validities = validities.begin(),
build_null_mask,
iter] __device__(auto const idx) {
if (build_null_mask) {
// The output row will be null only if all lists on the input row are null.
auto const is_valid = thrust::any_of(thrust::seq,
iter + d_row_offsets[idx],
iter + d_row_offsets[idx + 1],
[&] __device__(auto const list_idx) {
return lists_dv.is_valid(list_idx);
});
d_validities[idx] = static_cast<int8_t>(is_valid);
}
[d_row_offsets, d_list_offsets] __device__(auto const idx) {
auto const start_offset = d_list_offsets[d_row_offsets[0]];
return d_list_offsets[d_row_offsets[idx]] - start_offset;
});
Expand All @@ -92,10 +73,23 @@ std::unique_ptr<column> concatenate_lists_ignore_null(column_view const& input,
lists_column_view(lists_column_view(input).get_sliced_child(stream)).get_sliced_child(stream));

auto [null_mask, null_count] = [&] {
return build_null_mask
? cudf::detail::valid_if(
validities.begin(), validities.end(), thrust::identity<int8_t>{}, stream, mr)
: std::make_pair(cudf::detail::copy_bitmask(input, stream, mr), input.null_count());
if (!build_null_mask)
return std::make_pair(cudf::detail::copy_bitmask(input, stream, mr), input.null_count());

// The output row will be null only if all lists on the input row are null.
auto const lists_dv_ptr = column_device_view::create(lists_column_view(input).child(), stream);
return cudf::detail::valid_if(
iter,
iter + num_rows,
[d_row_offsets, lists_dv = *lists_dv_ptr, iter] __device__(auto const idx) {
return thrust::any_of(
thrust::seq,
iter + d_row_offsets[idx],
iter + d_row_offsets[idx + 1],
[&] __device__(auto const list_idx) { return lists_dv.is_valid(list_idx); });
},
stream,
mr);
}();

return make_lists_column(num_rows,
Expand Down

0 comments on commit 2aaa57f

Please sign in to comment.