Skip to content

Commit

Permalink
bitmask_or implementation with bitmask refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
rwlee committed Feb 18, 2021
1 parent 4cd5f8d commit d3382e0
Show file tree
Hide file tree
Showing 7 changed files with 199 additions and 38 deletions.
2 changes: 1 addition & 1 deletion cpp/include/cudf/column/column_device_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -805,7 +805,7 @@ namespace detail {
* @brief Convenience function to get offset word from a bitmask
*
* @see copy_offset_bitmask
* @see offset_bitmask_and
* @see offset_bitmask_binop
*/
__device__ inline bitmask_type get_mask_offset_word(bitmask_type const* __restrict__ source,
size_type destination_word_index,
Expand Down
50 changes: 50 additions & 0 deletions cpp/include/cudf/detail/null_mask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,21 @@ rmm::device_buffer copy_bitmask(
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());

/**
* @copydoc bitmask_binop(Binop op, std::vector<bitmask_type const*>, std::vector<size_type> const&,
* size_type, rmm::mr::device_memory_resource *)
*
* @param stream CUDA stream used for device memory operations and kernel launches
*/
template <typename Binop>
rmm::device_buffer bitmask_binop(
Binop op,
std::vector<bitmask_type const *> const &masks,
std::vector<size_type> const &begin_bits,
size_type mask_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());

/**
* @copydoc bitmask_and(std::vector<bitmask_type const*>, std::vector<size_type> const&, size_type,
* rmm::mr::device_memory_resource *)
Expand All @@ -100,6 +115,18 @@ rmm::device_buffer bitmask_and(
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::bitmask_binop
*
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
template <typename Binop>
rmm::device_buffer bitmask_binop(
Binop op,
table_view const &view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::bitmask_and
*
Expand All @@ -110,6 +137,29 @@ rmm::device_buffer bitmask_and(
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());

/**
* @brief Performs a merger of the specified bitmasks using the binary operator
* provided, and writes in place to destination
*
* @param op The binary operator used to combine the bitmasks
* @param dest_mask Destination to which the AND result is written
* @param masks The list of data pointers of the bitmasks to be ANDed
* @param begin_bits The bit offsets from which each mask is to be ANDed
* @param mask_size The number of bits to be ANDed in each mask
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned device_buffer
* @return rmm::device_buffer Output bitmask
*/
template <typename Binop>
void inplace_bitmask_binop(
Binop op,
bitmask_type *dest_mask,
std::vector<bitmask_type const *> const &masks,
std::vector<size_type> const &begin_bits,
size_type mask_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());

/**
* @brief Performs a bitwise AND of the specified bitmasks,
* and writes in place to destination
Expand Down
14 changes: 14 additions & 0 deletions cpp/include/cudf/null_mask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,5 +220,19 @@ rmm::device_buffer bitmask_and(
table_view const& view,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Returns a bitwise OR of the bitmasks of columns of a table
*
* If any of the columns isn't nullable, it is considered all valid.
* If no column in the table is nullable, an empty bitmask is returned.
*
* @param view The table of columns
* @param mr Device memory resource used to allocate the returned device_buffer
* @return rmm::device_buffer Output bitmask
*/
rmm::device_buffer bitmask_or(
table_view const& view,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
} // namespace cudf
136 changes: 104 additions & 32 deletions cpp/src/bitmask/null_mask.cu
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,9 @@ __global__ void copy_offset_bitmask(bitmask_type *__restrict__ destination,
}

/**
* @brief Computes the bitwise AND of an array of bitmasks
* @brief Computes the merger of an array of bitmasks using a binary operator
*
* @param op The binary operator used to combine the bitmasks
* @param destination The bitmask to write result into
* @param source Array of source mask pointers. All masks must be of same size
* @param begin_bit Array of offsets into corresponding @p source masks.
Expand All @@ -326,20 +327,25 @@ __global__ void copy_offset_bitmask(bitmask_type *__restrict__ destination,
* @param source_size Number of bits in each mask in @p source
* @param number_of_mask_words The number of words of type bitmask_type to copy
*/
__global__ void offset_bitmask_and(bitmask_type *__restrict__ destination,
bitmask_type const *const *__restrict__ source,
size_type const *__restrict__ begin_bit,
size_type num_sources,
size_type source_size,
size_type number_of_mask_words)
template <typename Binop>
__global__ void offset_bitmask_binop(Binop op,
bitmask_type *__restrict__ destination,
bitmask_type const *const *__restrict__ source,
size_type const *__restrict__ begin_bit,
size_type num_sources,
size_type source_size,
size_type number_of_mask_words)
{
for (size_type destination_word_index = threadIdx.x + blockIdx.x * blockDim.x;
destination_word_index < number_of_mask_words;
destination_word_index += blockDim.x * gridDim.x) {
bitmask_type destination_word = ~bitmask_type{0}; // All bits 1
for (size_type i = 0; i < num_sources; i++) {
destination_word &= detail::get_mask_offset_word(
source[i], destination_word_index, begin_bit[i], begin_bit[i] + source_size);
bitmask_type destination_word = detail::get_mask_offset_word(
source[0], destination_word_index, begin_bit[0], begin_bit[0] + source_size);
for (size_type i = 1; i < num_sources; i++) {
destination_word =
op(destination_word,
detail::get_mask_offset_word(
source[i], destination_word_index, begin_bit[i], begin_bit[i] + source_size));
}

destination[destination_word_index] = destination_word;
Expand Down Expand Up @@ -420,13 +426,14 @@ rmm::device_buffer copy_bitmask(column_view const &view,
return null_mask;
}

// Inplace Bitwise AND of the masks
void inplace_bitmask_and(bitmask_type *dest_mask,
std::vector<bitmask_type const *> const &masks,
std::vector<size_type> const &begin_bits,
size_type mask_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr)
template <typename Binop>
void inplace_bitmask_binop(Binop op,
bitmask_type *dest_mask,
std::vector<bitmask_type const *> const &masks,
std::vector<size_type> const &begin_bits,
size_type mask_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr)
{
CUDF_EXPECTS(std::all_of(begin_bits.begin(), begin_bits.end(), [](auto b) { return b >= 0; }),
"Invalid range.");
Expand All @@ -440,7 +447,8 @@ void inplace_bitmask_and(bitmask_type *dest_mask,
rmm::device_vector<size_type> d_begin_bits(begin_bits);

cudf::detail::grid_1d config(number_of_mask_words, 256);
offset_bitmask_and<<<config.num_blocks, config.num_threads_per_block, 0, stream.value()>>>(
offset_bitmask_binop<<<config.num_blocks, config.num_threads_per_block, 0, stream.value()>>>(
op,
dest_mask,
d_masks.data().get(),
d_begin_bits.data().get(),
Expand All @@ -451,23 +459,58 @@ void inplace_bitmask_and(bitmask_type *dest_mask,
CHECK_CUDA(stream.value());
}

// Bitwise AND of the masks
rmm::device_buffer bitmask_and(std::vector<bitmask_type const *> const &masks,
std::vector<size_type> const &begin_bits,
size_type mask_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr)
// Inplace Bitwise AND of the masks
void inplace_bitmask_and(bitmask_type *dest_mask,
std::vector<bitmask_type const *> const &masks,
std::vector<size_type> const &begin_bits,
size_type mask_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr)
{
inplace_bitmask_binop(
[] __device__(bitmask_type left, bitmask_type right) { return left & right; },
dest_mask,
masks,
begin_bits,
mask_size,
stream,
mr);
}

template <typename Binop>
rmm::device_buffer bitmask_binop(Binop op,
std::vector<bitmask_type const *> const &masks,
std::vector<size_type> const &begin_bits,
size_type mask_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr)
{
rmm::device_buffer dest_mask{};
auto num_bytes = bitmask_allocation_size_bytes(mask_size);

dest_mask = rmm::device_buffer{num_bytes, stream, mr};
inplace_bitmask_and(
static_cast<bitmask_type *>(dest_mask.data()), masks, begin_bits, mask_size, stream, mr);
inplace_bitmask_binop(
op, static_cast<bitmask_type *>(dest_mask.data()), masks, begin_bits, mask_size, stream, mr);

return dest_mask;
}

// Bitwise AND of the masks
rmm::device_buffer bitmask_and(std::vector<bitmask_type const *> const &masks,
std::vector<size_type> const &begin_bits,
size_type mask_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr)
{
return bitmask_binop(
[] __device__(bitmask_type left, bitmask_type right) { return left & right; },
masks,
begin_bits,
mask_size,
stream,
mr);
}

cudf::size_type count_set_bits(bitmask_type const *bitmask,
size_type start,
size_type stop,
Expand Down Expand Up @@ -631,10 +674,12 @@ std::vector<size_type> segmented_count_unset_bits(bitmask_type const *bitmask,
return ret;
}

// Returns the bitwise AND of the null masks of all columns in the table view
rmm::device_buffer bitmask_and(table_view const &view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr)
// Returns the merged null masks of all columns in the table view
template <typename Binop>
rmm::device_buffer bitmask_binop(Binop op,
table_view const &view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr)
{
CUDF_FUNC_RANGE();
rmm::device_buffer null_mask{0, stream, mr};
Expand All @@ -650,12 +695,34 @@ rmm::device_buffer bitmask_and(table_view const &view,
}

if (masks.size() > 0) {
return cudf::detail::bitmask_and(masks, offsets, view.num_rows(), stream, mr);
return cudf::detail::bitmask_binop(op, masks, offsets, view.num_rows(), stream, mr);
}

return null_mask;
}

// Returns the bitwise AND of the null masks of all columns in the table view
rmm::device_buffer bitmask_and(table_view const &view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr)
{
return bitmask_binop(
[] __device__(bitmask_type left, bitmask_type right) { return left & right; },
view,
stream,
mr);
}

rmm::device_buffer bitmask_or(table_view const &view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr)
{
return bitmask_binop(
[] __device__(bitmask_type left, bitmask_type right) { return left | right; },
view,
stream,
mr);
}
} // namespace detail

// Count non-zero bits in the specified range
Expand Down Expand Up @@ -708,4 +775,9 @@ rmm::device_buffer bitmask_and(table_view const &view, rmm::mr::device_memory_re
return detail::bitmask_and(view, rmm::cuda_stream_default, mr);
}

rmm::device_buffer bitmask_or(table_view const &view, rmm::mr::device_memory_resource *mr)
{
return detail::bitmask_or(view, rmm::cuda_stream_default, mr);
}

} // namespace cudf
2 changes: 1 addition & 1 deletion java/src/main/java/ai/rapids/cudf/ColumnView.java
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@ public final ColumnVector normalizeNANsAndZeros() {
* @return the new ColumnVector with merged null mask.
*/
public final ColumnVector mergeAndSetValidity(BinaryOp mergeOp, ColumnView... columns) {
assert mergeOp == BinaryOp.BITWISE_AND : "Only BITWISE_AND supported right now";
assert mergeOp == BinaryOp.BITWISE_AND || mergeOp == BinaryOp.BITWISE_OR : "Only BITWISE_AND and BITWISE_OR supported right now";
long[] columnViews = new long[columns.length];
long size = getRowCount();

Expand Down
12 changes: 9 additions & 3 deletions java/src/main/native/src/ColumnViewJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1305,10 +1305,16 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_bitwiseMergeAndSetValidit
cudf::table_view *input_table = new cudf::table_view(column_views);

cudf::binary_operator op = static_cast<cudf::binary_operator>(bin_op);
if(op == cudf::binary_operator::BITWISE_AND) {
copy->set_null_mask(cudf::bitmask_and(*input_table));
switch(op) {
case cudf::binary_operator::BITWISE_AND:
copy->set_null_mask(cudf::bitmask_and(*input_table));
break;
case cudf::binary_operator::BITWISE_OR:
copy->set_null_mask(cudf::bitmask_or(*input_table));
break;
default:
JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "Unsupported merge operation", 0);
}

return reinterpret_cast<jlong>(copy.release());
}
CATCH_STD(env, 0);
Expand Down
21 changes: 20 additions & 1 deletion java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ void testSpark32BitMurmur3HashMixed() {
}

@Test
void testNullReconfigureNulls() {
void testAndNullReconfigureNulls() {
try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null);
ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null);
ColumnVector intResult = v1.mergeAndSetValidity(BinaryOp.BITWISE_AND, v0);
Expand All @@ -585,6 +585,25 @@ void testNullReconfigureNulls() {
}
}

@Test
void testOrNullReconfigureNulls() {
try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null);
ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null);
ColumnVector intResultV0 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0);
ColumnVector intResultV0V1 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1);
ColumnVector intResultMulti = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v0, v1, v1, v0, v1, v0);
ColumnVector v2 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3");
ColumnVector stringResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1);
ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", null);
ColumnVector noMaskResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_OR)) {
assertColumnsAreEqual(v0, intResultV0);
assertColumnsAreEqual(v1, intResultV0V1);
assertColumnsAreEqual(v1, intResultMulti);
assertColumnsAreEqual(stringExpected, stringResult);
assertColumnsAreEqual(v2, noMaskResult);
}
}

@Test
void isNotNullTestEmptyColumn() {
try (ColumnVector v = ColumnVector.fromBoxedInts();
Expand Down

0 comments on commit d3382e0

Please sign in to comment.