Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bitmask_or implementation with bitmask refactor #7406

Merged
merged 10 commits into from
Mar 8, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpp/include/cudf/column/column_device_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -805,7 +805,7 @@ namespace detail {
* @brief Convenience function to get offset word from a bitmask
*
* @see copy_offset_bitmask
* @see offset_bitmask_and
* @see offset_bitmask_binop
*/
__device__ inline bitmask_type get_mask_offset_word(bitmask_type const* __restrict__ source,
size_type destination_word_index,
Expand Down
50 changes: 50 additions & 0 deletions cpp/include/cudf/detail/null_mask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,21 @@ rmm::device_buffer copy_bitmask(
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());

/**
* @copydoc bitmask_binop(Binop op, std::vector<bitmask_type const*>, std::vector<size_type> const&,
* size_type, rmm::mr::device_memory_resource *)
*
* @param stream CUDA stream used for device memory operations and kernel launches
*/
template <typename Binop>
rwlee marked this conversation as resolved.
Show resolved Hide resolved
rmm::device_buffer bitmask_binop(
Binop op,
std::vector<bitmask_type const *> const &masks,
std::vector<size_type> const &begin_bits,
size_type mask_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());

/**
* @copydoc bitmask_and(std::vector<bitmask_type const*>, std::vector<size_type> const&, size_type,
* rmm::mr::device_memory_resource *)
Expand All @@ -100,6 +115,18 @@ rmm::device_buffer bitmask_and(
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::bitmask_binop
*
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
template <typename Binop>
rmm::device_buffer bitmask_binop(
Binop op,
table_view const &view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::bitmask_and
*
Expand All @@ -110,6 +137,29 @@ rmm::device_buffer bitmask_and(
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());

/**
* @brief Performs a merger of the specified bitmasks using the binary operator
* provided, and writes in place to destination
*
* @param op The binary operator used to combine the bitmasks
* @param dest_mask Destination to which the AND result is written
* @param masks The list of data pointers of the bitmasks to be ANDed
* @param begin_bits The bit offsets from which each mask is to be ANDed
* @param mask_size The number of bits to be ANDed in each mask
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned device_buffer
* @return rmm::device_buffer Output bitmask
*/
template <typename Binop>
void inplace_bitmask_binop(
Binop op,
bitmask_type *dest_mask,
std::vector<bitmask_type const *> const &masks,
std::vector<size_type> const &begin_bits,
size_type mask_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());

/**
* @brief Performs a bitwise AND of the specified bitmasks,
* and writes in place to destination
Expand Down
14 changes: 14 additions & 0 deletions cpp/include/cudf/null_mask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,5 +220,19 @@ rmm::device_buffer bitmask_and(
table_view const& view,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Returns a bitwise OR of the bitmasks of columns of a table
*
* If any of the columns isn't nullable, it is considered all valid.
* If no column in the table is nullable, an empty bitmask is returned.
*
* @param view The table of columns
* @param mr Device memory resource used to allocate the returned device_buffer
* @return rmm::device_buffer Output bitmask
*/
rmm::device_buffer bitmask_or(
table_view const& view,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
} // namespace cudf
136 changes: 104 additions & 32 deletions cpp/src/bitmask/null_mask.cu
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,9 @@ __global__ void copy_offset_bitmask(bitmask_type *__restrict__ destination,
}

/**
* @brief Computes the bitwise AND of an array of bitmasks
* @brief Computes the merger of an array of bitmasks using a binary operator
*
* @param op The binary operator used to combine the bitmasks
* @param destination The bitmask to write result into
* @param source Array of source mask pointers. All masks must be of same size
* @param begin_bit Array of offsets into corresponding @p source masks.
Expand All @@ -326,20 +327,25 @@ __global__ void copy_offset_bitmask(bitmask_type *__restrict__ destination,
* @param source_size Number of bits in each mask in @p source
* @param number_of_mask_words The number of words of type bitmask_type to copy
*/
__global__ void offset_bitmask_and(bitmask_type *__restrict__ destination,
bitmask_type const *const *__restrict__ source,
size_type const *__restrict__ begin_bit,
size_type num_sources,
size_type source_size,
size_type number_of_mask_words)
template <typename Binop>
__global__ void offset_bitmask_binop(Binop op,
bitmask_type *__restrict__ destination,
bitmask_type const *const *__restrict__ source,
size_type const *__restrict__ begin_bit,
size_type num_sources,
size_type source_size,
size_type number_of_mask_words)
{
for (size_type destination_word_index = threadIdx.x + blockIdx.x * blockDim.x;
destination_word_index < number_of_mask_words;
destination_word_index += blockDim.x * gridDim.x) {
bitmask_type destination_word = ~bitmask_type{0}; // All bits 1
for (size_type i = 0; i < num_sources; i++) {
destination_word &= detail::get_mask_offset_word(
source[i], destination_word_index, begin_bit[i], begin_bit[i] + source_size);
bitmask_type destination_word = detail::get_mask_offset_word(
source[0], destination_word_index, begin_bit[0], begin_bit[0] + source_size);
for (size_type i = 1; i < num_sources; i++) {
destination_word =
op(destination_word,
detail::get_mask_offset_word(
source[i], destination_word_index, begin_bit[i], begin_bit[i] + source_size));
rwlee marked this conversation as resolved.
Show resolved Hide resolved
}

destination[destination_word_index] = destination_word;
Expand Down Expand Up @@ -420,13 +426,14 @@ rmm::device_buffer copy_bitmask(column_view const &view,
return null_mask;
}

// Inplace Bitwise AND of the masks
void inplace_bitmask_and(bitmask_type *dest_mask,
std::vector<bitmask_type const *> const &masks,
std::vector<size_type> const &begin_bits,
size_type mask_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr)
template <typename Binop>
void inplace_bitmask_binop(Binop op,
bitmask_type *dest_mask,
std::vector<bitmask_type const *> const &masks,
std::vector<size_type> const &begin_bits,
size_type mask_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr)
{
CUDF_EXPECTS(std::all_of(begin_bits.begin(), begin_bits.end(), [](auto b) { return b >= 0; }),
"Invalid range.");
Expand All @@ -440,7 +447,8 @@ void inplace_bitmask_and(bitmask_type *dest_mask,
rmm::device_vector<size_type> d_begin_bits(begin_bits);

cudf::detail::grid_1d config(number_of_mask_words, 256);
offset_bitmask_and<<<config.num_blocks, config.num_threads_per_block, 0, stream.value()>>>(
offset_bitmask_binop<<<config.num_blocks, config.num_threads_per_block, 0, stream.value()>>>(
op,
dest_mask,
d_masks.data().get(),
d_begin_bits.data().get(),
Expand All @@ -451,23 +459,58 @@ void inplace_bitmask_and(bitmask_type *dest_mask,
CHECK_CUDA(stream.value());
}

// Bitwise AND of the masks
rmm::device_buffer bitmask_and(std::vector<bitmask_type const *> const &masks,
std::vector<size_type> const &begin_bits,
size_type mask_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr)
// Inplace Bitwise AND of the masks
void inplace_bitmask_and(bitmask_type *dest_mask,
std::vector<bitmask_type const *> const &masks,
std::vector<size_type> const &begin_bits,
size_type mask_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr)
{
inplace_bitmask_binop(
[] __device__(bitmask_type left, bitmask_type right) { return left & right; },
dest_mask,
masks,
begin_bits,
mask_size,
stream,
mr);
rwlee marked this conversation as resolved.
Show resolved Hide resolved
}

template <typename Binop>
rmm::device_buffer bitmask_binop(Binop op,
std::vector<bitmask_type const *> const &masks,
std::vector<size_type> const &begin_bits,
size_type mask_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr)
{
rmm::device_buffer dest_mask{};
auto num_bytes = bitmask_allocation_size_bytes(mask_size);

dest_mask = rmm::device_buffer{num_bytes, stream, mr};
inplace_bitmask_and(
static_cast<bitmask_type *>(dest_mask.data()), masks, begin_bits, mask_size, stream, mr);
inplace_bitmask_binop(
op, static_cast<bitmask_type *>(dest_mask.data()), masks, begin_bits, mask_size, stream, mr);

return dest_mask;
}

// Bitwise AND of the masks
rmm::device_buffer bitmask_and(std::vector<bitmask_type const *> const &masks,
std::vector<size_type> const &begin_bits,
size_type mask_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr)
{
return bitmask_binop(
[] __device__(bitmask_type left, bitmask_type right) { return left & right; },
masks,
begin_bits,
mask_size,
stream,
mr);
}

cudf::size_type count_set_bits(bitmask_type const *bitmask,
size_type start,
size_type stop,
Expand Down Expand Up @@ -631,10 +674,12 @@ std::vector<size_type> segmented_count_unset_bits(bitmask_type const *bitmask,
return ret;
}

// Returns the bitwise AND of the null masks of all columns in the table view
rmm::device_buffer bitmask_and(table_view const &view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr)
// Returns the merged null masks of all columns in the table view
template <typename Binop>
rmm::device_buffer bitmask_binop(Binop op,
table_view const &view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr)
{
CUDF_FUNC_RANGE();
rmm::device_buffer null_mask{0, stream, mr};
Expand All @@ -650,12 +695,34 @@ rmm::device_buffer bitmask_and(table_view const &view,
}

if (masks.size() > 0) {
return cudf::detail::bitmask_and(masks, offsets, view.num_rows(), stream, mr);
return cudf::detail::bitmask_binop(op, masks, offsets, view.num_rows(), stream, mr);
}

return null_mask;
}

// Returns the bitwise AND of the null masks of all columns in the table view
rmm::device_buffer bitmask_and(table_view const &view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr)
{
return bitmask_binop(
[] __device__(bitmask_type left, bitmask_type right) { return left & right; },
view,
stream,
mr);
}

rmm::device_buffer bitmask_or(table_view const &view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr)
{
return bitmask_binop(
[] __device__(bitmask_type left, bitmask_type right) { return left | right; },
view,
stream,
mr);
}
} // namespace detail

// Count non-zero bits in the specified range
Expand Down Expand Up @@ -708,4 +775,9 @@ rmm::device_buffer bitmask_and(table_view const &view, rmm::mr::device_memory_re
return detail::bitmask_and(view, rmm::cuda_stream_default, mr);
}

rmm::device_buffer bitmask_or(table_view const &view, rmm::mr::device_memory_resource *mr)
{
return detail::bitmask_or(view, rmm::cuda_stream_default, mr);
}

} // namespace cudf
2 changes: 1 addition & 1 deletion java/src/main/java/ai/rapids/cudf/ColumnView.java
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@ public final ColumnVector normalizeNANsAndZeros() {
* @return the new ColumnVector with merged null mask.
*/
public final ColumnVector mergeAndSetValidity(BinaryOp mergeOp, ColumnView... columns) {
assert mergeOp == BinaryOp.BITWISE_AND : "Only BITWISE_AND supported right now";
assert mergeOp == BinaryOp.BITWISE_AND || mergeOp == BinaryOp.BITWISE_OR : "Only BITWISE_AND and BITWISE_OR supported right now";
long[] columnViews = new long[columns.length];
long size = getRowCount();

Expand Down
12 changes: 9 additions & 3 deletions java/src/main/native/src/ColumnViewJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1305,10 +1305,16 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_bitwiseMergeAndSetValidit
cudf::table_view *input_table = new cudf::table_view(column_views);

cudf::binary_operator op = static_cast<cudf::binary_operator>(bin_op);
if(op == cudf::binary_operator::BITWISE_AND) {
copy->set_null_mask(cudf::bitmask_and(*input_table));
switch(op) {
case cudf::binary_operator::BITWISE_AND:
copy->set_null_mask(cudf::bitmask_and(*input_table));
break;
case cudf::binary_operator::BITWISE_OR:
copy->set_null_mask(cudf::bitmask_or(*input_table));
break;
default:
JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "Unsupported merge operation", 0);
rwlee marked this conversation as resolved.
Show resolved Hide resolved
}

return reinterpret_cast<jlong>(copy.release());
}
CATCH_STD(env, 0);
Expand Down
21 changes: 20 additions & 1 deletion java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ void testSpark32BitMurmur3HashMixed() {
}

@Test
void testNullReconfigureNulls() {
void testAndNullReconfigureNulls() {
try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null);
ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null);
ColumnVector intResult = v1.mergeAndSetValidity(BinaryOp.BITWISE_AND, v0);
Expand All @@ -585,6 +585,25 @@ void testNullReconfigureNulls() {
}
}

@Test
void testOrNullReconfigureNulls() {
try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null);
ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null);
ColumnVector intResultV0 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0);
ColumnVector intResultV0V1 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1);
ColumnVector intResultMulti = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v0, v1, v1, v0, v1, v0);
ColumnVector v2 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3");
ColumnVector stringResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1);
ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", null);
ColumnVector noMaskResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_OR)) {
assertColumnsAreEqual(v0, intResultV0);
assertColumnsAreEqual(v1, intResultV0V1);
assertColumnsAreEqual(v1, intResultMulti);
assertColumnsAreEqual(stringExpected, stringResult);
assertColumnsAreEqual(v2, noMaskResult);
}
}

@Test
void isNotNullTestEmptyColumn() {
try (ColumnVector v = ColumnVector.fromBoxedInts();
Expand Down